Skip to content
Snippets Groups Projects
Commit 5414024f authored by EL KAOUI Imad-Eddine's avatar EL KAOUI Imad-Eddine
Browse files

Does the audio tagging per batch.

parent 30a46927
No related branches found
No related tags found
1 merge request!1Imad branch
## Author : Nicolas Farrugia, March 2020
## Silent City Project
import torch
import torchvision.transforms as transforms
import utils
import torch.nn as nn
from torch.nn import functional as F
from importlib import reload
from tqdm import tqdm
import os
import sys
import numpy as np
from audioset_tagging_cnn.inference import audio_tagging_batched
from audioset_tagging_cnn.inference import chunks
from datetime import time
import pandas as pd
from librosa.core import get_duration,load
import soundfile as sf
import time as time_
from audioset_tagging_cnn.resample import down_sample
import datetime
import argparse
from ecoacoustics import compute_NDSI,compute_NB_peaks,compute_ACI
parser = argparse.ArgumentParser(description='Silent City Audio Tagging with pretrained LeeNet11 on Audioset')
parser.add_argument('--length', default=10, type=int, help='Segment length')
parser.add_argument('--nbcat', default=3, type=int, help='Maximum number of categories for writing annotated csv')
parser.add_argument('--folder', default=None, type=str, help='Path to folder with wavefiles, will walk through subfolders')
parser.add_argument('--file', default=None, type=str, help='Path to file to process')
parser.add_argument('--verbose', action='store_true', help='Verbose (default False = nothing printed)')
parser.add_argument('--overwrite', action='store_true', help='Overwrite files (default False)')
parser.add_argument('--out', default='output.xz', type=str, help='Output file (pandas pickle), default is output.xz')
parser.add_argument('--nocuda', action='store_false', help='Do not use the GPU for acceleration')
args = parser.parse_args()
if args.folder is None:
if args.file is None:
raise(AttributeError("Must provide either a file or a folder"))
verbose = args.verbose
Overwrite = args.overwrite
all_files = []
if args.folder is None:
filelist = [args.file]
else:
filelist = []
for root, dirs, files in os.walk(args.folder, topdown=False):
for name in files:
if name[-3:].casefold() == 'wav':
filelist.append(os.path.join(root, name))
#print(currentvid)
if verbose:
print(filelist)
nbcat = args.nbcat
#checkpoint_path='./LeeNet11_mAP=0.266.pth'
checkpoint_path = 'ResNet22_mAP=0.430.pth'
if not(os.path.isfile(checkpoint_path)):
raise(FileNotFoundError("Pretrained model {} wasn't found, did you download it ?".format(checkpoint_path)))
if not(Overwrite):
remaining_filelist = []
for wavfile in filelist:
print("wavfile : {}".format(wavfile))
pdfile = (wavfile[:-3] + 'xz')
if (os.path.isfile(pdfile)):
print(("File {} has already been processed ; loading ".format(wavfile)))
Df = pd.read_pickle(pdfile)
all_files.append(Df)
continue
else:
remaining_filelist.append(wavfile)
filelist = remaining_filelist
if len(filelist) == 0:
print("All files have already been processed")
else:
nbsec = args.length
batch_size = 36 # Optimum btach size for our memory, global sample rate and segment duration.
chunk = [(chunks(filelist,batch_size))]
filelist = list(chunk[0])
print("filelist : {}".format(filelist))
nb_batch = len(filelist)
print("nb_batch : {}".format(nb_batch))
t_2 = time_.time()
for batch in tqdm(filelist):
len_batch = len(batch)
print("len batch : {}".format(len_batch))
try:
wavfile = batch[0]
print("wavfile : {}".format(wavfile))
_,meta = utils.read_audio_hdr(wavfile,verbose)
beg_seg = 0
end_seg = np.floor(get_duration(filename=wavfile))
allpreds = []
onsets = []
audioset_proba = []
n=0
all_seg = []
with torch.no_grad():
for curstart in (np.arange(beg_seg,end_seg,nbsec)):
start = curstart
onsets.append(curstart)
# Make predictions for audioset
t_0 = time_.time()
clipwise_output, labels,sorted_indexes,embedding = audio_tagging_batched(batch,checkpoint_path,offset=curstart,duration=nbsec,usecuda=args.nocuda)
t_1 = time_.time()
print("batched audio_tagging_time : {}".format(t_1 - t_0))
for j,wavfile in enumerate(batch):
### Calculate Eco acoustic indices
print("batched audio_tagging_time : {}".format(t_1 - t_0))
(waveform, sr) = load(wavfile, sr=None, mono=True,offset=curstart,duration=nbsec)
ndsi = compute_NDSI(waveform,sr)
nbpeaks = compute_NB_peaks(waveform,sr)
aci,_ = compute_ACI(waveform,sr)
# Print audio tagging top probabilities
#print("labels sorted : {}".format(np.array(labels)[sorted_indexes[j][0]]))
#break
texttagging = ''
for k in range(nbcat):
texttagging += np.array(labels)[sorted_indexes[j][k]]
proba = 100 * clipwise_output[j][sorted_indexes[j][k]]
texttagging += ' ({0:2.1f}%)'.format(proba)
texttagging += ', '
texttagging = texttagging[:-2]
print("text tagging : {}".format(texttagging))
# AudioSet
audioset_proba.append(clipwise_output[j])
print("audioset proba : {}".format(audioset_proba))
#audioset_fm.append(embedding)
annotation_str = "{tagging}".format(tagging=texttagging)
print("annotation_str : {}".format(annotation_str))
if verbose:
print(annotation_str)
_,meta = utils.read_audio_hdr(wavfile,verbose)
current_dt = meta['datetime']
delta=datetime.timedelta(seconds=int(curstart))
onset_dt = current_dt + delta
curdict = dict(datetime=onset_dt,time=onset_dt.time(),file=wavfile,id=meta['id'],onsets=curstart,label=annotation_str,date=onset_dt.date(),probas=clipwise_output[j],embedding=embedding[j],ndsi=ndsi,nbpeaks=nbpeaks,aci=aci)
print("curdict : {}".format(curdict))
all_seg.append(curdict)
df_allseg = pd.DataFrame(all_seg)
for j, wavfile in enumerate(batch):
pdfile = (wavfile[:-3] + 'xz')
df_forannot = df_allseg[df_allseg['file'] == wavfile]
df_forannot.to_pickle(pdfile)
print("df_forannot : {}".format(df_forannot))
all_files.append(df_forannot)
print("all_files : {}".format(all_files))
except Exception as e:
print('Error with file {}'.format(wavfile))
raise(e)
df = pd.concat(all_files)
df = df.sort_values(by='datetime')
df.to_pickle(args.out)
t_3 = time_.time()
print("batched_tag_silentcities time : {}".format(t_3 - t_2))
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment