Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
A
Audio Tagging Silent Cities
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container registry
Model registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
FARRUGIA Nicolas
Audio Tagging Silent Cities
Commits
5414024f
Commit
5414024f
authored
4 years ago
by
EL KAOUI Imad-Eddine
Browse files
Options
Downloads
Patches
Plain Diff
Does the audio tagging per batch.
parent
30a46927
No related branches found
No related tags found
1 merge request
!1
Imad branch
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
batched_tag_silentcities.py
+215
-0
215 additions, 0 deletions
batched_tag_silentcities.py
with
215 additions
and
0 deletions
batched_tag_silentcities.py
0 → 100644
+
215
−
0
View file @
5414024f
## Author : Nicolas Farrugia, March 2020
## Silent City Project
import
torch
import
torchvision.transforms
as
transforms
import
utils
import
torch.nn
as
nn
from
torch.nn
import
functional
as
F
from
importlib
import
reload
from
tqdm
import
tqdm
import
os
import
sys
import
numpy
as
np
from
audioset_tagging_cnn.inference
import
audio_tagging_batched
from
audioset_tagging_cnn.inference
import
chunks
from
datetime
import
time
import
pandas
as
pd
from
librosa.core
import
get_duration
,
load
import
soundfile
as
sf
import
time
as
time_
from
audioset_tagging_cnn.resample
import
down_sample
import
datetime
import
argparse
from
ecoacoustics
import
compute_NDSI
,
compute_NB_peaks
,
compute_ACI
parser
=
argparse
.
ArgumentParser
(
description
=
'
Silent City Audio Tagging with pretrained LeeNet11 on Audioset
'
)
parser
.
add_argument
(
'
--length
'
,
default
=
10
,
type
=
int
,
help
=
'
Segment length
'
)
parser
.
add_argument
(
'
--nbcat
'
,
default
=
3
,
type
=
int
,
help
=
'
Maximum number of categories for writing annotated csv
'
)
parser
.
add_argument
(
'
--folder
'
,
default
=
None
,
type
=
str
,
help
=
'
Path to folder with wavefiles, will walk through subfolders
'
)
parser
.
add_argument
(
'
--file
'
,
default
=
None
,
type
=
str
,
help
=
'
Path to file to process
'
)
parser
.
add_argument
(
'
--verbose
'
,
action
=
'
store_true
'
,
help
=
'
Verbose (default False = nothing printed)
'
)
parser
.
add_argument
(
'
--overwrite
'
,
action
=
'
store_true
'
,
help
=
'
Overwrite files (default False)
'
)
parser
.
add_argument
(
'
--out
'
,
default
=
'
output.xz
'
,
type
=
str
,
help
=
'
Output file (pandas pickle), default is output.xz
'
)
parser
.
add_argument
(
'
--nocuda
'
,
action
=
'
store_false
'
,
help
=
'
Do not use the GPU for acceleration
'
)
args
=
parser
.
parse_args
()
if
args
.
folder
is
None
:
if
args
.
file
is
None
:
raise
(
AttributeError
(
"
Must provide either a file or a folder
"
))
verbose
=
args
.
verbose
Overwrite
=
args
.
overwrite
all_files
=
[]
if
args
.
folder
is
None
:
filelist
=
[
args
.
file
]
else
:
filelist
=
[]
for
root
,
dirs
,
files
in
os
.
walk
(
args
.
folder
,
topdown
=
False
):
for
name
in
files
:
if
name
[
-
3
:].
casefold
()
==
'
wav
'
:
filelist
.
append
(
os
.
path
.
join
(
root
,
name
))
#print(currentvid)
if
verbose
:
print
(
filelist
)
nbcat
=
args
.
nbcat
#checkpoint_path='./LeeNet11_mAP=0.266.pth'
checkpoint_path
=
'
ResNet22_mAP=0.430.pth
'
if
not
(
os
.
path
.
isfile
(
checkpoint_path
)):
raise
(
FileNotFoundError
(
"
Pretrained model {} wasn
'
t found, did you download it ?
"
.
format
(
checkpoint_path
)))
if
not
(
Overwrite
):
remaining_filelist
=
[]
for
wavfile
in
filelist
:
print
(
"
wavfile : {}
"
.
format
(
wavfile
))
pdfile
=
(
wavfile
[:
-
3
]
+
'
xz
'
)
if
(
os
.
path
.
isfile
(
pdfile
)):
print
((
"
File {} has already been processed ; loading
"
.
format
(
wavfile
)))
Df
=
pd
.
read_pickle
(
pdfile
)
all_files
.
append
(
Df
)
continue
else
:
remaining_filelist
.
append
(
wavfile
)
filelist
=
remaining_filelist
if
len
(
filelist
)
==
0
:
print
(
"
All files have already been processed
"
)
else
:
nbsec
=
args
.
length
batch_size
=
36
# Optimum btach size for our memory, global sample rate and segment duration.
chunk
=
[(
chunks
(
filelist
,
batch_size
))]
filelist
=
list
(
chunk
[
0
])
print
(
"
filelist : {}
"
.
format
(
filelist
))
nb_batch
=
len
(
filelist
)
print
(
"
nb_batch : {}
"
.
format
(
nb_batch
))
t_2
=
time_
.
time
()
for
batch
in
tqdm
(
filelist
):
len_batch
=
len
(
batch
)
print
(
"
len batch : {}
"
.
format
(
len_batch
))
try
:
wavfile
=
batch
[
0
]
print
(
"
wavfile : {}
"
.
format
(
wavfile
))
_
,
meta
=
utils
.
read_audio_hdr
(
wavfile
,
verbose
)
beg_seg
=
0
end_seg
=
np
.
floor
(
get_duration
(
filename
=
wavfile
))
allpreds
=
[]
onsets
=
[]
audioset_proba
=
[]
n
=
0
all_seg
=
[]
with
torch
.
no_grad
():
for
curstart
in
(
np
.
arange
(
beg_seg
,
end_seg
,
nbsec
)):
start
=
curstart
onsets
.
append
(
curstart
)
# Make predictions for audioset
t_0
=
time_
.
time
()
clipwise_output
,
labels
,
sorted_indexes
,
embedding
=
audio_tagging_batched
(
batch
,
checkpoint_path
,
offset
=
curstart
,
duration
=
nbsec
,
usecuda
=
args
.
nocuda
)
t_1
=
time_
.
time
()
print
(
"
batched audio_tagging_time : {}
"
.
format
(
t_1
-
t_0
))
for
j
,
wavfile
in
enumerate
(
batch
):
### Calculate Eco acoustic indices
print
(
"
batched audio_tagging_time : {}
"
.
format
(
t_1
-
t_0
))
(
waveform
,
sr
)
=
load
(
wavfile
,
sr
=
None
,
mono
=
True
,
offset
=
curstart
,
duration
=
nbsec
)
ndsi
=
compute_NDSI
(
waveform
,
sr
)
nbpeaks
=
compute_NB_peaks
(
waveform
,
sr
)
aci
,
_
=
compute_ACI
(
waveform
,
sr
)
# Print audio tagging top probabilities
#print("labels sorted : {}".format(np.array(labels)[sorted_indexes[j][0]]))
#break
texttagging
=
''
for
k
in
range
(
nbcat
):
texttagging
+=
np
.
array
(
labels
)[
sorted_indexes
[
j
][
k
]]
proba
=
100
*
clipwise_output
[
j
][
sorted_indexes
[
j
][
k
]]
texttagging
+=
'
({0:2.1f}%)
'
.
format
(
proba
)
texttagging
+=
'
,
'
texttagging
=
texttagging
[:
-
2
]
print
(
"
text tagging : {}
"
.
format
(
texttagging
))
# AudioSet
audioset_proba
.
append
(
clipwise_output
[
j
])
print
(
"
audioset proba : {}
"
.
format
(
audioset_proba
))
#audioset_fm.append(embedding)
annotation_str
=
"
{tagging}
"
.
format
(
tagging
=
texttagging
)
print
(
"
annotation_str : {}
"
.
format
(
annotation_str
))
if
verbose
:
print
(
annotation_str
)
_
,
meta
=
utils
.
read_audio_hdr
(
wavfile
,
verbose
)
current_dt
=
meta
[
'
datetime
'
]
delta
=
datetime
.
timedelta
(
seconds
=
int
(
curstart
))
onset_dt
=
current_dt
+
delta
curdict
=
dict
(
datetime
=
onset_dt
,
time
=
onset_dt
.
time
(),
file
=
wavfile
,
id
=
meta
[
'
id
'
],
onsets
=
curstart
,
label
=
annotation_str
,
date
=
onset_dt
.
date
(),
probas
=
clipwise_output
[
j
],
embedding
=
embedding
[
j
],
ndsi
=
ndsi
,
nbpeaks
=
nbpeaks
,
aci
=
aci
)
print
(
"
curdict : {}
"
.
format
(
curdict
))
all_seg
.
append
(
curdict
)
df_allseg
=
pd
.
DataFrame
(
all_seg
)
for
j
,
wavfile
in
enumerate
(
batch
):
pdfile
=
(
wavfile
[:
-
3
]
+
'
xz
'
)
df_forannot
=
df_allseg
[
df_allseg
[
'
file
'
]
==
wavfile
]
df_forannot
.
to_pickle
(
pdfile
)
print
(
"
df_forannot : {}
"
.
format
(
df_forannot
))
all_files
.
append
(
df_forannot
)
print
(
"
all_files : {}
"
.
format
(
all_files
))
except
Exception
as
e
:
print
(
'
Error with file {}
'
.
format
(
wavfile
))
raise
(
e
)
df
=
pd
.
concat
(
all_files
)
df
=
df
.
sort_values
(
by
=
'
datetime
'
)
df
.
to_pickle
(
args
.
out
)
t_3
=
time_
.
time
()
print
(
"
batched_tag_silentcities time : {}
"
.
format
(
t_3
-
t_2
))
\ No newline at end of file
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment