From 5414024f1225bf8d858046c709f0b35fa743274f Mon Sep 17 00:00:00 2001 From: imad-eddine <imad-eddine.el-kaoui@imt-atlantique.net> Date: Tue, 11 Aug 2020 15:20:24 +0200 Subject: [PATCH] Does the audio tagging per batch. --- batched_tag_silentcities.py | 215 ++++++++++++++++++++++++++++++++++++ 1 file changed, 215 insertions(+) create mode 100644 batched_tag_silentcities.py diff --git a/batched_tag_silentcities.py b/batched_tag_silentcities.py new file mode 100644 index 0000000..deca2f7 --- /dev/null +++ b/batched_tag_silentcities.py @@ -0,0 +1,215 @@ +## Author : Nicolas Farrugia, March 2020 +## Silent City Project + +import torch +import torchvision.transforms as transforms +import utils +import torch.nn as nn +from torch.nn import functional as F +from importlib import reload +from tqdm import tqdm +import os +import sys +import numpy as np +from audioset_tagging_cnn.inference import audio_tagging_batched +from audioset_tagging_cnn.inference import chunks +from datetime import time +import pandas as pd +from librosa.core import get_duration,load +import soundfile as sf +import time as time_ +from audioset_tagging_cnn.resample import down_sample + +import datetime + + +import argparse +from ecoacoustics import compute_NDSI,compute_NB_peaks,compute_ACI + + +parser = argparse.ArgumentParser(description='Silent City Audio Tagging with pretrained LeeNet11 on Audioset') +parser.add_argument('--length', default=10, type=int, help='Segment length') +parser.add_argument('--nbcat', default=3, type=int, help='Maximum number of categories for writing annotated csv') +parser.add_argument('--folder', default=None, type=str, help='Path to folder with wavefiles, will walk through subfolders') +parser.add_argument('--file', default=None, type=str, help='Path to file to process') +parser.add_argument('--verbose', action='store_true', help='Verbose (default False = nothing printed)') +parser.add_argument('--overwrite', action='store_true', help='Overwrite files (default False)') +parser.add_argument('--out', default='output.xz', type=str, help='Output file (pandas pickle), default is output.xz') +parser.add_argument('--nocuda', action='store_false', help='Do not use the GPU for acceleration') + +args = parser.parse_args() + +if args.folder is None: + + if args.file is None: + raise(AttributeError("Must provide either a file or a folder")) + +verbose = args.verbose +Overwrite = args.overwrite + + + +all_files = [] + +if args.folder is None: + filelist = [args.file] +else: + filelist = [] + + for root, dirs, files in os.walk(args.folder, topdown=False): + for name in files: + if name[-3:].casefold() == 'wav': + filelist.append(os.path.join(root, name)) + #print(currentvid) + +if verbose: + print(filelist) + + + +nbcat = args.nbcat + +#checkpoint_path='./LeeNet11_mAP=0.266.pth' +checkpoint_path = 'ResNet22_mAP=0.430.pth' + +if not(os.path.isfile(checkpoint_path)): + raise(FileNotFoundError("Pretrained model {} wasn't found, did you download it ?".format(checkpoint_path))) + +if not(Overwrite): + remaining_filelist = [] + for wavfile in filelist: + print("wavfile : {}".format(wavfile)) + pdfile = (wavfile[:-3] + 'xz') + if (os.path.isfile(pdfile)): + + print(("File {} has already been processed ; loading ".format(wavfile))) + + Df = pd.read_pickle(pdfile) + + all_files.append(Df) + continue + else: + remaining_filelist.append(wavfile) + + + filelist = remaining_filelist + +if len(filelist) == 0: + print("All files have already been processed") + +else: + nbsec = args.length + batch_size = 36 # Optimum btach size for our memory, global sample rate and segment duration. + chunk = [(chunks(filelist,batch_size))] + filelist = list(chunk[0]) + print("filelist : {}".format(filelist)) + nb_batch = len(filelist) + print("nb_batch : {}".format(nb_batch)) + + + t_2 = time_.time() + for batch in tqdm(filelist): + + len_batch = len(batch) + print("len batch : {}".format(len_batch)) + + try: + wavfile = batch[0] + print("wavfile : {}".format(wavfile)) + _,meta = utils.read_audio_hdr(wavfile,verbose) + + beg_seg = 0 + end_seg = np.floor(get_duration(filename=wavfile)) + + allpreds = [] + onsets = [] + + audioset_proba = [] + n=0 + + all_seg = [] + with torch.no_grad(): + for curstart in (np.arange(beg_seg,end_seg,nbsec)): + start = curstart + + onsets.append(curstart) + + # Make predictions for audioset + t_0 = time_.time() + clipwise_output, labels,sorted_indexes,embedding = audio_tagging_batched(batch,checkpoint_path,offset=curstart,duration=nbsec,usecuda=args.nocuda) + t_1 = time_.time() + print("batched audio_tagging_time : {}".format(t_1 - t_0)) + + for j,wavfile in enumerate(batch): + + + + + ### Calculate Eco acoustic indices + print("batched audio_tagging_time : {}".format(t_1 - t_0)) + + (waveform, sr) = load(wavfile, sr=None, mono=True,offset=curstart,duration=nbsec) + + ndsi = compute_NDSI(waveform,sr) + nbpeaks = compute_NB_peaks(waveform,sr) + + aci,_ = compute_ACI(waveform,sr) + + # Print audio tagging top probabilities + #print("labels sorted : {}".format(np.array(labels)[sorted_indexes[j][0]])) + #break + texttagging = '' + for k in range(nbcat): + texttagging += np.array(labels)[sorted_indexes[j][k]] + proba = 100 * clipwise_output[j][sorted_indexes[j][k]] + texttagging += ' ({0:2.1f}%)'.format(proba) + texttagging += ', ' + texttagging = texttagging[:-2] + print("text tagging : {}".format(texttagging)) + + + # AudioSet + audioset_proba.append(clipwise_output[j]) + print("audioset proba : {}".format(audioset_proba)) + #audioset_fm.append(embedding) + + annotation_str = "{tagging}".format(tagging=texttagging) + print("annotation_str : {}".format(annotation_str)) + + + if verbose: + print(annotation_str) + + _,meta = utils.read_audio_hdr(wavfile,verbose) + current_dt = meta['datetime'] + + delta=datetime.timedelta(seconds=int(curstart)) + + onset_dt = current_dt + delta + + curdict = dict(datetime=onset_dt,time=onset_dt.time(),file=wavfile,id=meta['id'],onsets=curstart,label=annotation_str,date=onset_dt.date(),probas=clipwise_output[j],embedding=embedding[j],ndsi=ndsi,nbpeaks=nbpeaks,aci=aci) + print("curdict : {}".format(curdict)) + all_seg.append(curdict) + + df_allseg = pd.DataFrame(all_seg) + for j, wavfile in enumerate(batch): + pdfile = (wavfile[:-3] + 'xz') + df_forannot = df_allseg[df_allseg['file'] == wavfile] + df_forannot.to_pickle(pdfile) + print("df_forannot : {}".format(df_forannot)) + all_files.append(df_forannot) + print("all_files : {}".format(all_files)) + + except Exception as e: + print('Error with file {}'.format(wavfile)) + raise(e) + + + df = pd.concat(all_files) + + df = df.sort_values(by='datetime') + + + df.to_pickle(args.out) + t_3 = time_.time() + print("batched_tag_silentcities time : {}".format(t_3 - t_2)) \ No newline at end of file -- GitLab