From d1152e6159e33ec75b9477cb9e4edfb464659f92 Mon Sep 17 00:00:00 2001 From: ax-le <axel.marmoret@imt-atlantique.fr> Date: Tue, 25 Jun 2024 18:40:43 +0200 Subject: [PATCH] Adding comments and a gitignore --- .gitignore | 21 ++++ README.md | 2 + .../__pycache__/anuraset.cpython-310.pyc | Bin 1201 -> 0 bytes .../__pycache__/base_dataset.cpython-310.pyc | Bin 1662 -> 0 bytes .../__pycache__/bouffaut.cpython-310.pyc | Bin 1664 -> 0 bytes .../datasets/__pycache__/narw.cpython-310.pyc | Bin 985 -> 0 bytes .../__pycache__/whalesing.cpython-310.pyc | Bin 751 -> 0 bytes nmf_bioacoustic/datasets/anuraset.py | 27 +++++ nmf_bioacoustic/datasets/base_dataset.py | 67 ++++++++++- nmf_bioacoustic/datasets/bouffaut.py | 73 +++++++++++- nmf_bioacoustic/datasets/narw.py | 35 ++++++ nmf_bioacoustic/datasets/whalesing.py | 17 +++ .../experiments/anuraset_nmf_count.py | 110 +++++++++++++----- .../source_separation.cpython-310.pyc | Bin 1607 -> 0 bytes nmf_bioacoustic/tasks/source_count.py | 104 ++++++++++++++++- nmf_bioacoustic/tasks/source_separation.py | 42 +++++++ nmf_bioacoustic/utils/audio_helper.py | 22 +++- nmf_bioacoustic/utils/data_manipulation.py | 29 ++++- .../utils/signal_to_spectrogram.py | 94 ++++++++++----- .../utils/spectrogram_to_signal.py | 33 ++++++ 20 files changed, 603 insertions(+), 73 deletions(-) create mode 100755 .gitignore delete mode 100644 nmf_bioacoustic/datasets/__pycache__/anuraset.cpython-310.pyc delete mode 100644 nmf_bioacoustic/datasets/__pycache__/base_dataset.cpython-310.pyc delete mode 100644 nmf_bioacoustic/datasets/__pycache__/bouffaut.cpython-310.pyc delete mode 100644 nmf_bioacoustic/datasets/__pycache__/narw.cpython-310.pyc delete mode 100644 nmf_bioacoustic/datasets/__pycache__/whalesing.cpython-310.pyc delete mode 100644 nmf_bioacoustic/tasks/__pycache__/source_separation.cpython-310.pyc diff --git a/.gitignore b/.gitignore new file mode 100755 index 0000000..3d4ac08 --- /dev/null +++ b/.gitignore @@ -0,0 +1,21 @@ +__pycache__/ +.ipynb_checkpoints/ + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST \ No newline at end of file diff --git a/README.md b/README.md index 61fd9ff..0eb921f 100644 --- a/README.md +++ b/README.md @@ -15,6 +15,7 @@ For now, this repository is mainly a Proof-Of-Concpet that NMF-like methods can This repository does not include the NMF code, which is instead maintained in the github project `nn_fac` of the current corresponding author (https://gitlab.imt-atlantique.fr/a23marmo/nonnegative-factorization/). ## Installation + You should first clone/fork/dowload this repository. You should then install the requirements using the command @@ -26,4 +27,5 @@ And finally install the code using This is the recommended way for installing the code. You may use another method, but note that the project was probably neither designed nor tested on a different way. ## Contact + Don't hesitate to contact me at the following mail adress if you have any question: axel.marmoret@imt-atlantique.fr \ No newline at end of file diff --git a/nmf_bioacoustic/datasets/__pycache__/anuraset.cpython-310.pyc b/nmf_bioacoustic/datasets/__pycache__/anuraset.cpython-310.pyc deleted file mode 100644 index 41719a3b7fa8f86f5ea63ed6e51c6074e022ff84..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1201 zcmd1j<>g{vU|?8VQJQv!iGkrUh=Yt-7#J8F7#J9eWf&M3QW#Pga~MJx+!<1sQkYv9 zQkYX2o0+4SQdoi+G+AGQ^l38QV$@0ov5+w{lvAw2z`&5o5XG3n5XIEakj9w8n8MV; z5yhOsoWjz=5XI8Yz`_v43Nq~$hhtu8QDSgvNixU{Py-l1Y*q#a29SxxK@1EGB@E3B z3m9t{YnW=77cw(4lrSw|u3=ipSi=y{lEPTSkj0Y4TE<wU4rZ}|+}+EV#h${F!rH=+ z!j{6`%LI~1VYFdLVF(6eO%A_XqIvmwRjEb!@x=wH$(gCe@yYq6c_mdGalrwu&YrHp znvAzti&E25E0P&uE@NO|0I``E7#M;<0mj3?z)-`G!kEpxfMFp+FoPzOUlB-wCgUyU z;?i6YRg_=8lA(x$fq~&yh<-+XZmNEwk#TNfQEtAzOKNd;Nq&KTKv8~HYH~@jeqL@` zd{Sn9Vsd_IaY<$}20taSB(XTPq*y-@6l}$*C3*#ww>WHa5_6MM674{C6oZ_`!%(FR z3s}AQ_{_Y_lK6N%o1FaQ#GGO~J(&8JP?xeKR%8}ya@=AqE-gqcy2Sz0af_)S<rYU# zYGO)!a&g%$?!>&j{F20y%>2CKTkOgCIi<OI#kbhfGIL5&i*B(d78K-EYO>s7DNfBv zyTz4Qnv$6xUyxXmaZ3P6V|-d>PU<bbwA93s(xTM({3MXaS2Et>jE_&wNh~goj|T@; z5hnu!LlGYX12`=dgOaZRBM&1FqZ%X2|0+QY&n1Jr4T~gDvH>L@XHX0ofnz9(A%#(r zp@t!gA%zJ{GuALlGAv+9VP43X!Xn9#%_PZ?#SCKAGSx7FlBQn~C>7q~bqsa!^pAJ* z^l^>%bh*W-2Z>&mB0&ZQhFjc;IXUsgndy0nImNeFiwklxOK!21CFYc-7HcvWiGq|c zXQou#63i>jO-e0_&rgG;HHa@kF$VVPE&lY>l6a(W0{OTY<N`fL7RD+GxQEe{X|jQ$ zD=#rOH9r0pSA2YKeoAQ$h|LopUs#%$1Cap-WRVicIlRztPEO6qfrT~L6_TI;2ZuT+ z_&~Urje&uIgMo)pfUO9`_tRv)rHYn9^<XJdFDbD&H6F^l#pj-yms*sV;{r*qVC_gg Y23rWS>lTL%#Km@?qzLve2ZIO`09LFuIsgCw diff --git a/nmf_bioacoustic/datasets/__pycache__/base_dataset.cpython-310.pyc b/nmf_bioacoustic/datasets/__pycache__/base_dataset.cpython-310.pyc deleted file mode 100644 index ee858bbde7d3b09af4dd536cfe553b56c0f35dc6..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1662 zcmd1j<>g{vU|=ZFDM<@qVPJR+;vi#Y1_lNP1_p*=8wLi36owSW9EK=HFwGRj1g4pz zn87qlE-MoQBSS7*6gwk>J3|Un3UdoX3Uf146h{hUFoPz`OOUyK$siILGr?>yVPIfL z1)Cnll){+8)WQ(OoWh*K(!vnMlERw8*1{0Qn!=vK(ZUeLmcp6B)xr?Pp28i>pviNK z&pkCSwJ0&iC9x#2IJG311<3*s8-$%fzEEOdV5niNVTflaVJu-<z+A(S#judE86?6| z!(77<&ziyz%&?NtPm}EyS7K>OW`2A@VoAm=mYmGul3Psq#kbf&tdz{6Til5`Iq}7r z>3NAc#kcs<QWHx`i&EqBld@8iOEg(-u@tA~q!ocIz9o>DmzQ6XSdy8aR~(;~nUh+? z$-uy{lA(x)fq~&ygnmYTZmNEwk#TNfQEtAzOKNd;Nq&KTKv6!(sAB!R+_d<l%>2aU z{L<o*%w!CH3dGaJ`bmk!sqs*rUP0w8j`;Y@yv&mLcy^H2*%%lY*cgQvxfrX&QG-hl zrY@NgDcnG8P~3pS4U|Yq7-|@@7@HY`8HzxuLX+thb53er5eEYUgC=7UNCntK5Q3M1 zf#DW=e0)x7UVJ>rpT!`vxEQO%Fzf~^Nd`&7YzI4)fq?;RbqWImLkU9`V+m6h^8%I{ zhAh?^hAg%kMoET+Of?Ky>{%QsjJ-@TOtnn4py*`J;;3QHW-d}LVOYRf!vu;{keY?e zwP4i?xN4XdGS;%xFf8D%VO+>q%UZ)y!<xnv%%I8Sr^$GWsklg!2NcOkMft^vw^(xW z6H|&n?zzPRNeEf_nR(!71$!JElSP6c&vF!{7ANKw<fPu>Pfsn0FD^(;E-A`SFG|dX z2p}Z_O*U|F-D1v6skp@kNkg~T^NTXm<BN-Kv4FJR;s>jbFUiP+rWsHmB84q?e0+Lp zNoGlEZhSmHD1Jfm8jL)QLX13&T#Ot{RZ<us4cCAY+MtjJVQ^@JVzY!Hg|V5jiBXfu zugWAOBePf`H?<@qKSiN9Bfm5!MIk9wArs`$+|<01)D(rxJcW{sRE6Y>%$yX3<ebFf zVok<d!hZQBo(M&*MMe2VnoMAC7fCWOK+-<KJ4F%<3=AL)P9_|TRdQ&K2L&lOL?GD+ z#RH(!2*O|wh=FoALl(mVMsV~p)i5kz0!2A2)iQ(Q?G|fhUP@|3ksK)TGJ~CUivyD1 zGgFEbFdcY{599)HaObDRXQoJlf*h2)L4jQ*i^a7FwSJl$puCuun420Oe~T+VJ~uz5 zGzY}yiH|QVP0WGF6!C$q6#x++XMuyH2vlG}f(aDA5EdwJ6@g+5lrD=w;v5Vdj2x^S z%p9CN%wQQ!&Rc8+iFqlB#kZIWQi_B?hJsYz(nc#y^h!%IbBgsKX)wMdA0^$gf+cT> rVwdGE&P<2t09y)i9mqWp7Ra5qIBX!kwF8wW#h_9G>{v!2Mj<8u--eqm diff --git a/nmf_bioacoustic/datasets/__pycache__/bouffaut.cpython-310.pyc b/nmf_bioacoustic/datasets/__pycache__/bouffaut.cpython-310.pyc deleted file mode 100644 index 19955528679cb839831855135a58e3f3ba30744b..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1664 zcmd1j<>g{vU|`^J%t_;BWng#=;vi!d1_lNP1_p*=Jq8Ab6owSW9EK1O&6L9!#pKSA z!kog=!jQs}%G}Hx#gf9A!k)s>!WhMx!W7J)$@LOsk|yIVMlHW&5EB_QLpjAd3=9mZ z3{i|J3{gz&3~7uhj44bl98t^=qoY_-SX0<q7@}C)8CV#i*n$}}*>7<;<(H<VC6<;X zgPaF7hylcAWnf?cnOv;Gz`#(#(9E!av4*jRsfKwWGb2L@Qx<a#6Uh8trW%HLmK26y z22EzaUyPh98E<i>q~>JiW|pKDX|mp8EiNrcExN@KAD@|*SrQ+Ai>V;x7DrKPVoH2+ zaoH{I#Js%xlEjkC{JdgKmRl^vsX1x4xDrcKGV|jL5=$~}2_PwmPs_|ny~UT7npjd= zlp3F(l$DxX0<thZJ~=0`xHvw3CBv^U{fzwFRQ*IF<J`oe+<bkP)Z*-t`~v-eqI{6@ zV*TL6vQ&li%o2snypq(Syu=)R=lqmZ{iMwN#N_y_tfWN!l*E$6;?$C2{Um7M=oM5J zaWF72fWo906gUEmB8*jnu&~pEc`lg=7J>{63~W#h&Y(DVV_;yYVaNjG6h=u9%~-=I z$*_Pag=ryU3bQ0bHj^YnEmI9s3X3E|EhunVB^hd2YM4^kBpGU1YnW?TYFN{lL6PQn zi^UBT5R7_7j0_A6w^%|d3sS52oKn*>^Atidb5j*GiZ!cvUGq}lJOxeWB5sgc?!=s& z_~Oj;yu_U1Tdc(eIhiGzY(+dEDdx<Sid+2P;EpfJ$Sj70HcLrmLFz5mB#_~^m{apo zii8*#7$Af&NE?58YDqj;7bF%rLEZ)B1QkXOFyvyal7NR4n&M<o3WJ3iJ2b_C1Iz{# zV2m-$wam3FwX8KPH7qqu*$k2l3m6tM)Uwqu*MNcr6eu8?rG^#EM-C=NKTXD4EZM1* z#hUD34?+TfrzA5sH6ENk<BN-=L4J_|5wak^aAoGDWG1H;$LD85d<ga{*qZ{$MfnBD z{uBWD6BKY<i~?YPaxii+R!PErimo1%+2Z5#5_41I<8N`r$LHp!l;(igJn`{`rHMHZ z8E`Tyk^{L&3FIPPXwptj&B=+6M{*R%`XZ3aia<dKN|VK)6w1NC!^puRzz!Du#pejm zU3w)ICBI}5{P>_C6N`8w14H9@7eiA+kTgnUfRZ{0gCipZTm)n>)-cRwNMWpDoXs$o zIfW^OxtFPyxr6~O&V(e+Qo^u+36yHnnHDl;F@tro%w<Yp0u@5N%!~}+B7@!U7o&a= z$n`}a%c{inb5o1bQ&W%w8ss!LO{QDS#i<3iSTpldQY$oBia^P%2$alju_fo{B`22L zVk^l96)Bpmw|Jl#7*c}pA|=jv!&@j^qg(7?*?2=3!w4yiLBV@VD!C{%u_QG<KQ9%@ zRp87a0ZIa(Y%akl!KlJmWX!<8pvhFE4vIYu5S#6mDAXy%dP#}Jsqs+WE!Mo!+=9wm vOnC*j_}o+TQi~FETp)!`5hyyrCLuWlkqkL(AnC>ql=Go6&cP_a#3up(9E_-t diff --git a/nmf_bioacoustic/datasets/__pycache__/narw.cpython-310.pyc b/nmf_bioacoustic/datasets/__pycache__/narw.cpython-310.pyc deleted file mode 100644 index 7e4271b2655b983cd64822e26073903069b038e0..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 985 zcmd1j<>g{vU|{h5oR((A#K7<v#6iX^3=9ko3=9m#G7Jn1DGVu$ISe5T?hGkRDa<Vl zDa@&i&CF3uDJ;PZnyfED`ZO7DF={1)SjZS;3W#5<#=yXk$`Hkv!Vtxj!r9J{#+bsI z!q&nW#hk*P!qLJI#nR5e!Vtw8%%I72i`&mJDBLBnB(XTP<fT#=1H&zr;*zwImkdk{ z3@;ft7#Nb-L54yxh|R&kz`zW0Ey$h{hGvEZj5UlkOf}3kEH$h(Yzx^K8A_NIFxN0G zWUOI`XGviQX3%8x(`3EHT3lL?T6BveK0Y%qvm`$L7E?jWEsmnp#FY5t;<8)ZiFtYX zC5a`O`FX{f?6+8oQ*+X8aV3_fWah^gB$i~{5<pTApO%@EdW)&J=oWigYGO%gQR*$O zjQoQ5oYcJZl8jrddGTp!B`X<kamL3d=Oh*v$H%W^_!Xp|k)NBYpJ-&9n^=^aukVst zoL!P%pdV0_pOu<iQmh}GSeB}go>`)hnOBlpl$V&J@0_2Ks-KjZpO_q<m6epJp8^S~ zV*R|tqH?{0$|6n%1_n^b7lQ~fMghhu5u^~+gZVX?2^xYR3ZxW-ok5|f#lXN&!;l5W zDU6aJnz4pak|CQ(lA)HVgsFxpg-Md3nX#6+hB=r)liBYU2gvVviMgq_I7>2fQ;SOy za|<+?i+C6q7;bSV=H$c|XQt;R<`myz$;m7((PSy&1<5dHrc~VG2M1w%Nk(QdBm_mk z0*NK5De)<(C8^2a&@U2WU|@g{;vlX3>8U00P=%n_0i}RqHU<U;DMlVf7RD-RWFMoc z)nq9Gd9{cYWF$yW5hx95vVoET$Pe-Hx47ctbMsS5b5i5uZ}G&(7nUaGKxDvyTO<#1 z0xvWklT&kY;^QH11G`cLWG={+MW8eZ!eEziFz_)7uz<z<G?{OS!s1OYDX};;9?HAL n=boCET9lXrP5)r!NM?gA0GWM@!v^9)J4R5Hfjq>)Ai@LyJnR0K diff --git a/nmf_bioacoustic/datasets/__pycache__/whalesing.cpython-310.pyc b/nmf_bioacoustic/datasets/__pycache__/whalesing.cpython-310.pyc deleted file mode 100644 index 141c8b9c3f9b5dec4c5ce2dcc4d678e3eb9d9152..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 751 zcmd1j<>g{vU|?9fu{7-;0|Ucj5C<8vFfcGUFfcF_%P=r7q%fo~<}idXxHF_Mr7*WJ zq%fy4HZw;trLY7uXtKTp>C<Gq#i*4GVj*K@D5qG3fq@~FA&N1DA&RM;A&oJGF@>px zBZ@hNIfbQ#A&RA)frTN86=d2i&hU)HoYdgVy!2$4c91C`HY)=I1BfmbVPIe=VQ6Mp zz*xhW!cfDskeQJog&~+>C8M7v(=FEG(t^~YTO9H6nR%Hd@$s6>w^)i(bJA{cC6=aS z=EoN#mSo)GOG`~GDJ@Ek&rixqO)go<c#AVWJ~=0`xHvw3CBv^!{fzwFRQ*IF<J`oe z+<bkP)Z*-t`~v-eqI{6@V*R|_wD_dV{KVw^(&CcLWDI^vVo73gYDuwvImkQ3nR)4Y z1(ii?3=9mQpe<%%U|<kotP+BIRuAUdWRMid#~=)1GcqtRID?`Bq!HxHA`qj9hk=1X zlc|WEfq~%`b7o2fSO7xsg2edKQ%m9#^YZdb5=%1k^NK+V!Pc@cR!JhPMN_581`4;l z#N5>Q_*-1@@wxdar8yurPkek~X<`mU2JDX_evmc1&`?WG&B=+6hu9BtS`jFH6@gq* z1d1{cE(ZCYgMo)pfEg_2r^$Rv6)ha~VBw~hlvtb^59QtBb5G4nElSLBfrKhpJCa>s Y3qf|>;;@0Z*bWqG#h`%VU=U#f0H)%$ZvX%Q diff --git a/nmf_bioacoustic/datasets/anuraset.py b/nmf_bioacoustic/datasets/anuraset.py index 819a669..4ba2ad5 100644 --- a/nmf_bioacoustic/datasets/anuraset.py +++ b/nmf_bioacoustic/datasets/anuraset.py @@ -1,6 +1,17 @@ +""" +Created on June 2024 + +@author: a23marmo + +Defines the dataset class for the AnuraSet dataset. Inherits from the base dataset class. +Annotations are simplified to the number of species present in the signal. +Species are counted as individual species, hence the number of species may be different from the number of calls. +""" + from nmf_bioacoustic.datasets.base_dataset import * class AnuraSet(GeneralDataset): + def __init__(self, audio_path, annotations_file, feature_object): super().__init__(audio_path, annotations_file, feature_object) self.annotations = pd.read_csv(annotations_file) @@ -8,5 +19,21 @@ class AnuraSet(GeneralDataset): self.annotations['nonzero_species_count'] = self.annotations.filter(regex='^SPECIES').apply(lambda row: (row != 0).sum(), axis=1) def get_annotations(self, idx): + """ + Returns the annotations of the idx-th signal in the dataset. + Annotations consist here of the number of species present in the signal. + This is a simplification of the original dataset, where the annotations are more complex. + Species are counted as individual species, hence the number of species may be different from the number of calls. + + Parameters + ---------- + idx : int + Index of the signal in the dataset. + + Returns + ------- + annot_this_file : int + Number of species present in the signal. + """ number_of_species = self.annotations['nonzero_species_count'][self.annotations["AUDIO_FILE_ID"] == self.all_signals[idx].split(".")[0]].values[0] return number_of_species \ No newline at end of file diff --git a/nmf_bioacoustic/datasets/base_dataset.py b/nmf_bioacoustic/datasets/base_dataset.py index 32888b7..1614460 100644 --- a/nmf_bioacoustic/datasets/base_dataset.py +++ b/nmf_bioacoustic/datasets/base_dataset.py @@ -1,18 +1,57 @@ -import pandas as pd +""" +Created on June 2024 + +@author: a23marmo + +Defines a base dataset class, from which will inherit the individual and specialized dataset classes. +""" + +# Imports import librosa import os -import nmf_bioacoustic.utils.signal_to_spectrogram as sig_to_spec +import pandas as pd class GeneralDataset(): + # This class is a general dataset class, from which will inherit the specialized dataset classes. def __init__(self, audio_path, annotations_file, feature_object): + """ + Initializes the dataset object. + + Parameters + ---------- + audio_path : str + Path to the audio files. + annotations_file : str + Path to the annotations file. + feature_object : FeatureObject + Feature object, defining the important parameters to compute spectrograms. + """ self.audio_path = audio_path self.all_signals = list(os.listdir(self.audio_path)) self.feature_object = feature_object def __len__(self): + """ + Returns the number of signals in the dataset. + """ return len(self.all_signals) def __getitem__(self, idx): + """ + Returns the spectrogram and the annotations of the idx-th signal in the dataset. + + Parameters + ---------- + idx : int + Index of the signal in the dataset. + + Returns + ------- + spec : numpy array + Spectrogram of the signal. + annot_this_file : not defined + Annotations of the signal, the type of which will depend on the dataset. + """ signal, orig_sr = librosa.load(os.path.join(self.audio_path, self.all_signals[idx]), sr = self.feature_object.sr) if self.feature_object.sr != orig_sr: signal = librosa.resample(signal, orig_sr, self.feature_object.sr) @@ -22,9 +61,33 @@ class GeneralDataset(): return spec, annot_this_file def get_annotations(self, idx): + """ + Returns the annotations of the idx-th signal in the dataset. + Base function, which should be implemented in the child class. + + Parameters + ---------- + idx : int + Index of the signal in the dataset. + """ raise NotImplementedError("This method should be implemented in the child class") def get_item_of_id(self, audio_id): + """ + Returns the spectrogram and the annotations of the signal with the given id. + + Parameters + ---------- + audio_id : str + Id of the signal in the dataset. + + Returns + ------- + spec : numpy array + Spectrogram of the signal. + annot_this_file : not defined + Annotations of the signal, the type of which will depend on the dataset. + """ index = self.all_signals.index(audio_id) return self.__getitem__(index) diff --git a/nmf_bioacoustic/datasets/bouffaut.py b/nmf_bioacoustic/datasets/bouffaut.py index 6cf4f6e..280b002 100644 --- a/nmf_bioacoustic/datasets/bouffaut.py +++ b/nmf_bioacoustic/datasets/bouffaut.py @@ -1,12 +1,51 @@ +""" +Created on June 2024 + +@author: a23marmo + +Defines the dataset class for the Bouffaut2020 dataset. Inherits from the base dataset class. +Corresponds to calls from Whales at very low frequencies. +""" + from nmf_bioacoustic.datasets.base_dataset import * -import numpy as np class Bouffaut(GeneralDataset): + def __init__(self, audio_path, annotations_file, feature_object): + """ + Initializes the dataset object. + + Parameters + ---------- + audio_path : str + Path to the audio files. + annotations_file : str + Path to the annotations file. + feature_object : FeatureObject + Feature object, defining the important parameters to compute spectrograms. + """ super().__init__(audio_path, annotations_file, feature_object) self.annotations = pd.read_csv(annotations_file, delimiter='\t') def get_annotations(self, idx): + """ + Returns the annotations of the idx-th signal in the dataset. + Annotations consist here of the type of call, the beginning and the end of the call. + + Parameters + ---------- + idx : int + Index of the signal in the dataset. + + Returns + ------- + type : pandas Series + Type of the calls in the signal. + begin : pandas Series + Beginning of the calls in the signal. + end : pandas Series + End of the calls in the signal. + """ annot_this_file = self.annotations[self.annotations["File"] == self.all_signals[idx].split(".")[0]] type = annot_this_file['Type'] begin = annot_this_file['Begin Time (s)'] @@ -14,6 +53,26 @@ class Bouffaut(GeneralDataset): return type, begin, end def crop_annotations(self, annotations, time_limit_s): + """ + Crops the annotations to the time limit. + Useful to focus experiments on a part of the audio signal. + + Parameters + ---------- + annotations : tuple + Tuple of pandas Series, containing the type of the calls, the beginning and the end of the calls. + time_limit_s : float + Time limit in seconds. + + Returns + ------- + type : pandas Series + Type of the calls in the signal. + begin : pandas Series + Beginning of the calls in the signal. + end : pandas Series + End of the calls in the signal. + """ type, begin, end = annotations indices_ok = begin[begin < time_limit_s].keys() type = type[indices_ok] @@ -22,6 +81,18 @@ class Bouffaut(GeneralDataset): return type, begin, end def create_one_annotation_file(dataset_path, annotations_1="Annotation.txt", annotations_2="Annotation_RR48_2013_D151.txt"): + """ + Merges two annotation files into one. + + Parameters + ---------- + dataset_path : str + Path to the dataset. + annotations_1 : str + Name of the first annotation file. + annotations_2 : str + Name of the second annotation file. + """ annot_1 = pd.read_csv(f"{dataset_path}/{annotations_1}", delimiter='\t') annot_2 = pd.read_csv(f"{dataset_path}/{annotations_2}", delimiter='\t') pd.concat([annot_1, annot_2]).to_csv(f"{dataset_path}/merged_annotations.txt", sep='\t', index=False) \ No newline at end of file diff --git a/nmf_bioacoustic/datasets/narw.py b/nmf_bioacoustic/datasets/narw.py index 9482a61..861df0b 100644 --- a/nmf_bioacoustic/datasets/narw.py +++ b/nmf_bioacoustic/datasets/narw.py @@ -1,11 +1,46 @@ +""" +Created on June 2024 + +@author: a23marmo + +Defines the dataset class for the NARW (North Atlantic Right Whale) dataset. Inherits from the base dataset class. +""" + from nmf_bioacoustic.datasets.base_dataset import * class NARWDataset(GeneralDataset): + def __init__(self, audio_path, annotations_file, feature_object): + """ + Initializes the dataset object. + + Parameters + ---------- + audio_path : str + Path to the audio files. + annotations_file : str + Path to the annotations file. + feature_object : FeatureObject + Feature object, defining the important parameters to compute spectrograms. + """ super().__init__(audio_path, annotations_file, feature_object) self.annotations = pd.read_csv(annotations_file) def get_annotations(self, idx): + """ + Returns the annotations of the idx-th signal in the dataset. + Annotations consist here of the time and types of the calls. + + Parameters + ---------- + idx : int + Index of the signal in the dataset. + + Returns + ------- + annot_this_file : pandas Series + Time and type of the calls. + """ annot_this_file = self.annotations[self.annotations["filename"] == self.all_signals[idx]] annotated_detections = list(annot_this_file["timestamp"]) return annotated_detections \ No newline at end of file diff --git a/nmf_bioacoustic/datasets/whalesing.py b/nmf_bioacoustic/datasets/whalesing.py index 815f13c..c8c9d6a 100644 --- a/nmf_bioacoustic/datasets/whalesing.py +++ b/nmf_bioacoustic/datasets/whalesing.py @@ -1,8 +1,25 @@ +""" +Created on June 2024 + +@author: a23marmo + +Defines the dataset class for the dataset of humpback whale. Inherits from the base dataset class. + +TODO: add reference for the dataset, ask Dorian Cazau. +""" + from nmf_bioacoustic.datasets.base_dataset import * class WhaleSing(GeneralDataset): + def __init__(self, audio_path, feature_object): + """ + Initializes the dataset object, exactly following tehe general dataset. + """ super().__init__(audio_path, None, feature_object) def get_annotations(self, idx): + """ + Void function, as the dataset does not have annotations. + """ return None diff --git a/nmf_bioacoustic/experiments/anuraset_nmf_count.py b/nmf_bioacoustic/experiments/anuraset_nmf_count.py index 4049440..17b0aad 100644 --- a/nmf_bioacoustic/experiments/anuraset_nmf_count.py +++ b/nmf_bioacoustic/experiments/anuraset_nmf_count.py @@ -1,52 +1,69 @@ +""" +Created on June 2024 + +@author: a23marmo + +Defines the experiment for counting the number of sources in the AnuraSet dataset, based on the NMF outputs. +It uses the package SACRED to log the results and the parameters of the experiment. +See the docs of SACRED for more details https://sacred.readthedocs.io/en/stable/ +""" + import matplotlib.pyplot as plt import numpy as np import tqdm -from nn_fac.nmf import nmf as nn_fac_nmf from sacred import Experiment from sacred.observers import FileStorageObserver from scipy.special import kl_div from sklearn.metrics import ConfusionMatrixDisplay from sklearn.model_selection import GridSearchCV +from nn_fac.nmf import nmf as nn_fac_nmf + import nmf_bioacoustic.tasks.source_count as species_count from nmf_bioacoustic.datasets.anuraset import AnuraSet from nmf_bioacoustic.utils.signal_to_spectrogram import FeatureObject -anuraset_path = "/home/a23marmo/datasets/anuraset" +anuraset_path = "/home/a23marmo/datasets/anuraset" # TODO: change this path to the correct one ex = Experiment("NMF on Anuraset for source count estimation") save_log_folder = "experiments/jjba/source_count/running" ex.observers.append(FileStorageObserver(save_log_folder)) - @ex.config def default_config(): - sr = 16000 - hop_length = 512 - n_fft = hop_length * 4 - feature = "nn_log_mel" - fmin = 0 - fmax = None - n_mels = 80 + # Base configuration + sr = 16000 # Sampling rate + hop_length = 512 # Hop length + n_fft = hop_length * 4 # FFT window size + feature = "nn_log_mel" # Feature to use + fmin = 0 # Minimum frequency + fmax = None # Maximum frequency + n_mels = 80 # Number of mel bands - n_nmf = 10 - beta = 2 + n_nmf = 10 # Number of components in the NMF + beta = 2 # Beta parameter of the NMF, defining the loss function. 2 for Euclidean, 1 for Kullback-Leibler, 0 for Itakura-Saito - default_var_divide = 10 - default_eps = 0.8 - default_metric = "correlation" + default_var_divide = 10 # Variance divide parameter for considering some components as noise. + default_eps = 0.8 # Epsilon parameter for the DBSCAN clustering + default_metric = "correlation" # Metric for the DBSCAN clustering - scoring = "accuracy" - cv = 4 + scoring = "accuracy" # Scoring for the GridSearchCV + cv = 4 # Number of folds for the GridSearchCV - param_grid = {"eps": np.linspace(0.3, 1.3, 11)} - - subset = "INCT17" + param_grid = {"eps": np.linspace(0.3, 1.3, 11)} # Grid for the DBSCAN clustering + subset = "INCT17" # Subset of the dataset to use @ex.capture def compute_all_nmf_on_this_dataset(n_nmf, beta, subset, sr, feature, hop_length, n_fft, fmin, fmax, n_mels): + """ + Computes the NMF on all the signals of the dataset, and returns the H matrices of the NMF (used for clustering) and the annotations. + + Parameters + ---------- + See the configuration of the experiment (default_config). + """ feature_object = FeatureObject(sr, feature, hop_length, n_fft=n_fft, fmin = fmin, fmax=fmax, mel_grill = False, n_mels=n_mels) dataset = AnuraSet( @@ -74,7 +91,7 @@ def compute_all_nmf_on_this_dataset(n_nmf, beta, subset, sr, feature, hop_length tol=1e-8, update_rule=nmf_algo, beta=beta, - normalize=[False, True], # sparsity_coefficients=[0, 10], + normalize=[False, True], verbose=False, return_costs=False, deterministic=True, @@ -96,6 +113,30 @@ def grid_search_DBSCAN( scoring, cv, ): + """ + Grid search for the DBSCAN clustering, based on the NMF outputs. + + In general, the parameter grid is used to find the best epsilon parameter for the DBSCAN clustering. + + Parameters + ---------- + all_H : list of np.array + List of the H matrices of the NMF. + all_annot : list of int + List of the annotations of the number of species in the signals. + param_grid : dict + Dictionary of the parameter grid for the DBSCAN clustering. + default_var_divide : float + Variance divide parameter for considering some components as noise. + default_metric : str + Metric for the DBSCAN clustering. + default_eps : float + Epsilon parameter for the DBSCAN clustering. + scoring : str + Scoring for the GridSearchCV. + cv : int + Number of folds for the GridSearchCV. + """ default_counter = species_count.SourceCountEstimator( var_divide=default_var_divide, eps=default_eps, metric=default_metric @@ -110,29 +151,42 @@ def grid_search_DBSCAN( @ex.automain def expe_nmf_main(_run, _config): + """ + Main function of the experiment. + """ + exp_id = _run.meta_info['options']['--id'] + + # Compute the NMF on the dataset all_H, all_annot = compute_all_nmf_on_this_dataset() + + # Grid search for the DBSCAN clustering fitted_estimation, accuracy = grid_search_DBSCAN(all_H=all_H, all_annot=all_annot) + + # Log the results + # Log the number of samples first _run.log_scalar("Number samples", len(all_annot)) + + # Log the accuracy print(f"Accuracy: {accuracy}") _run.log_scalar("Accuracy", accuracy) + # Log the average difference between the annotations and the estimation difference = np.mean(np.abs(np.subtract(fitted_estimation, all_annot))) print(f"Difference: {difference}") _run.log_scalar("Difference", difference) + # Log the KL divergence between the annotations and the estimation kl = np.mean(kl_div(all_annot, fitted_estimation)) print(f"KL div: {kl}") _run.log_scalar("KL div", kl) - _run.log_scalar("Estimation", fitted_estimation) - # ConfusionMatrixDisplay.from_predictions(all_annot, fitted_estimation).plot() - # plt.savefig(f'jjba_experiments/confusion_matrix_beta{_config['beta']}.png') - # plt.show() + # Log all the fitted estimation, if needed + _run.log_scalar("Estimation", fitted_estimation) + # Plots the confusion matrix, and save it cm_display = ConfusionMatrixDisplay.from_predictions(all_annot, fitted_estimation) - # cm_display.plot() plt.gca().invert_yaxis() # Get the current axes and invert the y-axis plt.xlabel("Predicted number of sources") plt.ylabel("Annotated number of species") @@ -141,6 +195,7 @@ def expe_nmf_main(_run, _config): ) plt.close() + # Plots the histogram of the annotations and the estimation, and save it fig, ax = plt.subplots() ax.hist( fitted_estimation, bins=np.arange(0.5, 11.5, 1), alpha=0.5, label="estimation" @@ -155,12 +210,15 @@ def expe_nmf_main(_run, _config): if __name__ == "__main__": + ## Run all the experiments + # for subset in ["INCT4", "INCT17", "INCT41", "INCT20955"]: # for feature in ["mel", "nn_log_mel"]: # for beta in [2,1,0]: # id_experiment = f"beta{beta}_nmf10_feature{feature}_subset{subset}" # r = ex.run(config_updates={'beta':beta, 'feature':feature, 'subset':subset}, options={'--id':id_experiment}) + ## Run one case beta = 1 feature = "nn_log_mel" subset="INCT17" diff --git a/nmf_bioacoustic/tasks/__pycache__/source_separation.cpython-310.pyc b/nmf_bioacoustic/tasks/__pycache__/source_separation.cpython-310.pyc deleted file mode 100644 index 1aa63dd6b195bb5eedefe7b942aedfff9c868b83..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1607 zcmd1j<>g{vU|`sjQkmAn&cN^(#6iZ)3=9ko3=9m#AR>h!g)xUAiV;jR<uWrdFf!z_ zM6ob3q%fo~<*??mMX`a|j5+MN98sK%3@Pj>tSM|Qj8R-EjKK_=94|p;`)M-X;wUai zO)k#JFTcf?UzC}inU|OoUyzYloa&a$4pIZfJPZsBoD2*M&LAf|V`N~cVM$@kW-8*T zVNGEuWi00LVn|_XW^7`tVNPKvWhfG@Va#GkVU}d5Wlm?PWiMe&VQFS$WGG?EVy<DR z0XdwZl_`axhPjm~jY*P0fT4z|h6Q9+3W|6NYcC5ULk(L|N(svX)`bi)Otl=foF#0y z%*bM|Vano2VM}38;Yi`^Wvk_?;atF(!nF`&H%k^*3U>-m3U4nHBSQ(x0`7$jj0`0_ zDSTkJ@<}o@Gr2Itw$yT$FsAU=aL;C#%Y<-q3IoVxHQXTALf9p|S$rT~4Py;A!Y3e6 zh&)J+fWO!m@YjH%jG>i@bdy1Ts$nbofEF_BDFTuVHB2}{LJ;CFklqqdcnE>`DZ+D^ z(Zi)Mr-nV4K~u!92$b@wLL>7_74lM3Qxr<_6$*;-%Q90^6-qKv6~JjlAwLbwfu$gY z#L|?^e2~)Avc#Oy#1e?w;{4L0<Wz;?)PlsK#FEVXyqAm&3=CC#;TicksS3H76(yxb zsaBfIxA@Xh6G80w{G_bZ<dR$bU|ZsgQcH?5K_-ITbc;DBKm8U<NoH>9OHkg?WWB{y zTy%>oBflU%Cp9m<B;yuaWqe{qX7Mey3JA?omYY~n#fxHJl}2W6da*vlZSe@V>6e3C z6%TclUO`@Zh$iDL?vkR!yyAkyqSU;SBCzW@g2BGBQh53Q|NsA0>=3R(l^hOZp%zuK zfQ-}>xW!tWkywy=i?yIAGq2<p4>;uFGg5O3Qj2a$<zyC@q~^t!<j0p}WERJR@@h#@ zetJ=2?k(nmoRV7{#idCFIr$~UMNA9~3`L+|DB@;dV7SFuoLUlJl39|IdW*d{u`D$$ zGyN6|IGeK;CFZ54-eSrtxW%5AS`G=H#Dap<ycA8ITa4ki7(I$uK}K+aLN-1pGdHuO zhz%sb3XX+aTp$a<PAR^{4fPGkowt}lmM|q&++xnmO~1vMxsstsgMoqJSCoE6er~FM zqLFcKVo`3szDsIxc1eDLen3$^D0YkW^K#STlQQ!Ylk-c9OEQx&_$7(O*;ryoub}dl z6eu}>5?DOcC5Q;*02Nh63=9k$i~@`tOk9jCOdL#Rj4X^ij5>^5jC_n7j3SJDj8TkE zj4Vt%j53S@jAD!&j7$wIOhsA@3=EnqkhJgTrpbMaHLo<cpt49B6b?#g?$s+T$;>I% zLy0+7uvn2WNIic}W>QgpaiU&IW^qAIVkJ1PZVBclmViPsCo@T}pc2Ho#Rj2^L_u*0 masoK~kO)nXaU3?e`6;D2sdk{`R}6}B4hA7c9%dd!J|+N*m&l9& diff --git a/nmf_bioacoustic/tasks/source_count.py b/nmf_bioacoustic/tasks/source_count.py index 86ee9f6..471fb37 100644 --- a/nmf_bioacoustic/tasks/source_count.py +++ b/nmf_bioacoustic/tasks/source_count.py @@ -1,19 +1,55 @@ -import sklearn.feature_selection +""" +Created on June 2024 + +@author: a23marmo + +Base code for the task of counting the number of species in a signal, using NMF. +""" + import numpy as np +import sklearn.feature_selection from sklearn.cluster import DBSCAN # HDBSCAN or OPTICS also possible, to test from sklearn.base import BaseEstimator from sklearn.metrics import accuracy_score - class SourceCountEstimator(BaseEstimator): - + # This class is used in the sklearn GridSearchCV. Hence, it follows sklearn's conventions. def __init__(self, *, var_divide=10, eps=0.8, metric='correlation'): + """ + Initializes the estimator object. + + Parameters + ---------- + var_divide : float + Variance divide parameter for considering some components as noise. + If the variance of a component is lower than the mean variance divided by var_divide, it is considered noise. + eps : float + Epsilon parameter for the DBSCAN clustering algorithm. + metric : str + Metric for the DBSCAN clustering algorithm. + """ self.var_divide = var_divide self.eps = eps self.metric = metric def fit(self, all_H, annotations): + """ + Fits the estimator to the data. + In this context, it means that it estimates the number of species in the signals. + + Parameters + ---------- + all_H : list of np.array + List of the H matrices of the NMF. + annotations : list of int + List of the annotations of the number of species in the signals. + + Returns + ------- + all_estimations : list of int + List of the estimations of the number of species in the signals. + """ all_estimations = [] for H in all_H: estimation = estimate_number_sources(H, var_divide=self.var_divide, eps=self.eps, metric=self.metric) @@ -22,6 +58,19 @@ class SourceCountEstimator(BaseEstimator): return all_estimations def predict(self, all_H): + """ + Predicts the number of species in the signals. + + Parameters + ---------- + all_H : list of np.array + List of the H matrices of the NMF. + + Returns + ------- + all_estimations : list of int + List of the estimations of the number of species in the signals. + """ all_estimations = [] for H in all_H: estimation = estimate_number_sources(H, var_divide=self.var_divide, eps=self.eps, metric=self.metric) @@ -30,12 +79,46 @@ class SourceCountEstimator(BaseEstimator): return all_estimations def score(self, estimations, annotations): - + """ + Computes the accuracy of the estimator. + + Parameters + ---------- + estimations : list of int + List of the estimations of the number of species in the signals. + annotations : list of int + List of the annotations of the number of species in the signals. + + Returns + ------- + accuracy : float + Accuracy of the estimator. + """ return accuracy_score(annotations, estimations) def estimate_number_sources(H, var_divide = 10, eps = 0.7, metric = 'correlation'): - # This function is used to return the number of species in the current audio file + """ + Estimates the number of species in the signal, based on the H matrix of NMF. + This function is used to return the number of species in the current audio file + + Parameters + ---------- + H : np.array + H matrix of the NMF. + var_divide : float + Variance divide parameter for considering some components as noise. + If the variance of a component is lower than the mean variance divided by var_divide, it is considered noise. + eps : float + Epsilon parameter for the DBSCAN clustering algorithm. + metric : str + Metric for the DBSCAN clustering algorithm. + + Returns + ------- + number_species : int + Estimated number of species in the signal. + """ # First, remove all features with a variance lower than the mean variance divided by 10 H_cropped = threshold_H(H, var_divide=var_divide) @@ -46,6 +129,9 @@ def estimate_number_sources(H, var_divide = 10, eps = 0.7, metric = 'correlation return number_species def threshold_H(H, var_divide=10): + """ + Thresholds the H matrix of NMF, removing the components with a variance lower than the mean variance divided by var_divide. + """ var = np.var(H, axis=1) threshold = np.mean(var)/var_divide @@ -55,7 +141,10 @@ def threshold_H(H, var_divide=10): return H_cropped -def DBSCAN_count(H, eps = 0.7, metric = 'correlation'): # jensenshannon, mahalanobis, cityblock +def DBSCAN_count(H, eps = 0.7, metric = 'correlation'): + """ + Counts the number of clusters in the H matrix of NMF, using DBSCAN. + """ db = DBSCAN(eps=eps, min_samples=1, metric=metric) db.fit(H) labels = db.labels_ @@ -63,4 +152,7 @@ def DBSCAN_count(H, eps = 0.7, metric = 'correlation'): # jensenshannon, mahalan return n_clusters_ def compute_difference(estimation, annotation): + """ + Computes the difference between the estimation and the annotation. + """ return np.abs(estimation - annotation) diff --git a/nmf_bioacoustic/tasks/source_separation.py b/nmf_bioacoustic/tasks/source_separation.py index 7a5516c..b338f46 100644 --- a/nmf_bioacoustic/tasks/source_separation.py +++ b/nmf_bioacoustic/tasks/source_separation.py @@ -1,3 +1,11 @@ +""" +Created on June 2024 + +@author: a23marmo + +Base code for the task of source separation in a signal, using NMF. +This is only qualitative, as the annotations of independent sources are not provided. +""" import numpy as np import nmf_bioacoustic.utils.audio_helper as audio_helper @@ -6,6 +14,34 @@ from librosa.display import specshow import matplotlib.pyplot as plt def evaluate_source_separation(W, H, feature_object, time_limit=None, phase_retrieval="original_phase", phase=None, plot_specs=False): + """ + Evaluates the source separation of the NMF. + Plots the audio of the mixture and the separated sources. + It can also plots the spectrograms of the mixture and the separated sources. + + Parameters + ---------- + W : np.array + W matrix of the NMF. + H : np.array + H matrix of the NMF. + feature_object : FeatureObject + Feature object, defining the important parameters to compute spectrograms. + time_limit : int + Time limit to evaluate the source separation, and limit the size of the audio and spectrograms. + phase_retrieval : str + Method to retrieve the phase of the audio. It can be 'original_phase' or 'griffin_lim'. + phase : np.array + Phase of the original audio, to be used in the phase retrieval. + Only used if phase_retrieval is 'original_phase'. + plot_specs : bool + If True, plots the spectrograms of the mixture and the separated sources. + + Returns + ------- + source_list : list of np.array + List of the separated sources, as spectrograms. + """ if phase_retrieval == "original_phase": assert phase is not None, "You need to provide the phase of the original audio to evaluate the source separation" @@ -14,19 +50,25 @@ def evaluate_source_separation(W, H, feature_object, time_limit=None, phase_retr source_list = [] + # Listen to the whole mixture print("Whole mixture:") audio_helper.listen_to_this_spectrogram(W@H[:,:time_limit], feature_object=feature_object, phase_retrieval = phase_retrieval, original_phase = phase[:,:time_limit]) + + # Plots the spectrogram of the whole mixture if plot_specs: fig, ax = plt.subplots() img = specshow(W@H[:,:time_limit], sr=feature_object.sr, hop_length=feature_object.hop_length, y_axis="log", x_axis="time", vmax=10) # specshow(W@H, sr=sr, hop_length=hop_length, y_axis="log") ax.set_title("Whole mixture") plt.savefig(f"imgs/source_separation/whole_mixture.png", transparent = True) plt.show() + + # Listen to the separated sources for i in range(0, H.shape[0]): print(f"Source: {i}") audio_helper.listen_to_this_spectrogram(W[:,i][:,np.newaxis]@H[i,:time_limit][np.newaxis,:], feature_object=feature_object, phase_retrieval = phase_retrieval, original_phase = phase[:,:time_limit]) source_list.append(W[:,i][:,np.newaxis]@H[i,:time_limit][np.newaxis,:]) + # Plots the spectrogram of the separated source if plot_specs: fig, ax = plt.subplots() img = specshow(source_list[-1], sr=feature_object.sr, hop_length=feature_object.hop_length, y_axis="log", x_axis="time", vmax=10) # specshow(W@H, sr=sr, hop_length=hop_length, y_axis="log") diff --git a/nmf_bioacoustic/utils/audio_helper.py b/nmf_bioacoustic/utils/audio_helper.py index 46d6920..52030fa 100755 --- a/nmf_bioacoustic/utils/audio_helper.py +++ b/nmf_bioacoustic/utils/audio_helper.py @@ -1,3 +1,11 @@ +""" +Created on April 2024 + +@author: a23marmo + +Code to listen to audio based on spectrograms, and to compute the SDR between two audio signals. +""" + import IPython.display as ipd import mir_eval import numpy as np @@ -14,11 +22,15 @@ def listen_to_this_spectrogram(spectrogram, feature_object, phase_retrieval = "g ---------- spectrogram : numpy array The spectrogram to be inverted. - hop_length : integer - The hop length, in number of samples. - sampling_rate : integer, optional - The sampling rate of the signal, in Hz. - The default is 44100. + feature_object : FeatureObject + Feature object, defining the important parameters to compute spectrograms. + phase_retrieval : str + Method to retrieve the phase of the audio. It can be 'original_phase' or 'griffin_lim'. + If set to 'original_phase', the original_phase parameter is used as an estimation of the phase. + If set to 'griffin_lim', the phase is estimated using the Griffin-Lim algorithm. + original_phase : numpy array + Phase of the original audio, to be used in the phase retrieval. + Only used if phase_retrieval is 'original_phase'. Returns ------- diff --git a/nmf_bioacoustic/utils/data_manipulation.py b/nmf_bioacoustic/utils/data_manipulation.py index 2db1e25..e4a0551 100644 --- a/nmf_bioacoustic/utils/data_manipulation.py +++ b/nmf_bioacoustic/utils/data_manipulation.py @@ -1,13 +1,23 @@ +""" +Created on June 2024 + +@author: a23marmo + +Code used to small data manipulation tasks, like conversion between time and frames. +""" + def time_to_frame(time_seconds, sr, hl): """ Compute the index of the frame given a time in seconds. - Parameters: + Parameters + ---------- - time_seconds: Time in seconds. - sr: Sampling rate in Hz. - hl: Hop length in samples. - Returns: + Returns + ------- - frame_index: Index of the frame. """ # Convert time to samples @@ -20,12 +30,14 @@ def frame_to_time(frame_index, sr, hl): """ Compute the time in seconds given the frame index. - Parameters: + Parameters + ---------- - frame_index: Index of the frame. - sr: Sampling rate in Hz. - hl: Hop length in samples. - Returns: + Returns + ------- - time_seconds: Time in seconds. """ # Compute time in samples @@ -36,6 +48,15 @@ def frame_to_time(frame_index, sr, hl): def crop_time(spec, time_limit_s, sr, hl): """ + Crop the spectrogram to a certain time limit. + Automatically converts the time_limit in seconds to the frame index. + + Parameters + ---------- + - spec: Spectrogram to be cropped. + - time_limit_s: Time limit in seconds. + - sr: Sampling rate in Hz. + - hl: Hop length in samples. """ # Compute the number of frames to keep limit_frame = time_to_frame(time_limit_s, sr, hl) diff --git a/nmf_bioacoustic/utils/signal_to_spectrogram.py b/nmf_bioacoustic/utils/signal_to_spectrogram.py index 94b55ef..dcd1227 100755 --- a/nmf_bioacoustic/utils/signal_to_spectrogram.py +++ b/nmf_bioacoustic/utils/signal_to_spectrogram.py @@ -4,11 +4,18 @@ Created on Wed Mar 25 16:54:59 2020 @author: amarmore -Computing spectrogram in different feature description. +Computing spectrogram in different feature description, using the library librosa [1]. -Note that Mel (and variants of Mel) spectrograms follow the particular definition of [1]. +Note that Mel (and variants of Mel) spectrograms follow the particular definition of [2]. -[1] Grill, T., & Schlüter, J. (2015, October). + +References +---------- +[1] McFee, B., Raffel, C., Liang, D., Ellis, D. P., McVicar, M., Battenberg, E., & Nieto, O. (2015, July). +librosa: Audio and music signal analysis in python. +In Proceedings of the 14th python in science conference (Vol. 8). + +[2] Grill, T., & Schlüter, J. (2015, October). Music Boundary Detection Using Neural Networks on Combined Features and Two-Level Annotations. In ISMIR (pp. 531-537). """ @@ -24,7 +31,46 @@ import IPython.display as ipd mel_power = 2 class FeatureObject(): + # An object to store the parameters of the spectrogram def __init__(self, sr, feature, hop_length, n_fft=2048, fmin = 0, fmax=None, mel_grill = True, n_mels=80): + """ + Initializes the object with the parameters of the spectrogram. + + Parameters + ---------- + sr : float + Sampling rate of the signal, (typically 44100Hz). + feature : String + The types of spectrograms to compute. + - "pcp" : Pitch class profile + - "cqt" : Constant-Q transform + - "mel" : Mel spectrogram + - "log_mel" : Log Mel spectrogram + - "nn_log_mel" : Nonnegative Log Mel spectrogram + - "padded_log_mel" : Padded Log Mel spectrogram + - "minmax_log_mel" : Min-max normalized Log Mel spectrogram + - "stft" : Short-time Fourier transform + - "stft_complex" : Complex Short-time Fourier transform + hop_length : integer + The desired hop_length, which is the step between two frames (ie the time "discretization" step) + It is expressed in terms of number of samples, which are defined by the sampling rate. + n_fft : integer, optional + The number of samples to use in the FFT. + The default is 2048. + fmin : integer, optional + The minimal frequence to consider, used for denoising. + The default is 0. + fmax : integer, optional + The maximal frequence to consider, used for denoising. + The default is None. + mel_grill : boolean, optional + If True, the Mel spectrogram is computed with the parameters of [2]. + Only used if feature is "mel" or derivates (like "log_mel" or "nn_log_mel"). + The default is True. + n_mels : integer, optional + Number of mel bands to consider. + The default is 80. + """ self.sr = sr self.feature = feature.lower() self.hop_length = hop_length @@ -51,28 +97,13 @@ class FeatureObject(): def get_spectrogram(self, signal): """ Returns a spectrogram, from the signal of a song. - Different types of spectrogram can be computed, which are specified by the argument "feature". + Different types of spectrogram can be computed, see the docstrig of the object constructor. All these spectrograms are computed with the toolbox librosa [1]. Parameters ---------- signal : numpy array Signal of the song. - sr : float - Sampling rate of the signal, (typically 44100Hz). - feature : String - The types of spectrograms to compute. - TODO - - hop_length : integer - The desired hop_length, which is the step between two frames (ie the time "discretization" step) - It is expressed in terms of number of samples, which are defined by the sampling rate. - fmin : integer, optional - The minimal frequence to consider, used for denoising. - The default is 98. - n_mfcc : integer, optional - Number of mfcc features. - The default is 20 (as in librosa). Raises ------ @@ -83,16 +114,6 @@ class FeatureObject(): ------- numpy array Spectrogram of the signal. - - References - ---------- - [1] McFee, B., Raffel, C., Liang, D., Ellis, D. P., McVicar, M., Battenberg, E., & Nieto, O. (2015, July). - librosa: Audio and music signal analysis in python. - In Proceedings of the 14th python in science conference (Vol. 8). - - [2] Grill, T., & Schlüter, J. (2015, October). - Music Boundary Detection Using Neural Networks on Combined Features and Two-Level Annotations. - In ISMIR (pp. 531-537). """ match self.feature: case "pcp": @@ -119,6 +140,9 @@ class FeatureObject(): raise err.InvalidArgumentValueException(f"Unknown signal representation: {self.feature}.") def _compute_pcp(self, signal): + """ + Computes the Pitch Class Profile of a signal. + """ norm=inf # Columns normalization win_len_smooth=82 # Size of the smoothign window n_octaves=6 @@ -130,10 +154,16 @@ class FeatureObject(): norm=norm, win_len_smooth=win_len_smooth) def _compute_cqt(self, signal): + """ + Computes the Constant-Q Transform of a signal. + """ constant_q_transf = librosa.cqt(y=signal, sr = self.sr, hop_length = self.hop_length) return np.abs(constant_q_transf) def _compute_mel_spectrogram(self, signal): + """ + Computes the Mel spectrogram of a signal. + """ if self.mel_grill: mel = librosa.feature.melspectrogram(y=signal, sr = self.sr, n_fft=2048, hop_length = self.hop_length, n_mels=80, fmin=80.0, fmax=16000, power=mel_power) else: @@ -141,6 +171,9 @@ class FeatureObject(): return np.abs(mel) def _compute_stft(self, signal, complex): + """ + Computes the Short-time Fourier Transform of a signal. + """ stft = librosa.stft(y=signal, hop_length=self.hop_length,n_fft=self.n_fft) if complex: mag, phase = librosa.magphase(stft, power = 1) @@ -149,6 +182,9 @@ class FeatureObject(): return np.abs(stft) def get_stft_from_mel(self, mel_spectrogram, feature=None): + """ + Computes the STFT from a Mel spectrogram. + """ if feature is None: # Recursive function, so it takes the feature as an argument feature = self.feature # Default case takes the object feature as the feature to compute diff --git a/nmf_bioacoustic/utils/spectrogram_to_signal.py b/nmf_bioacoustic/utils/spectrogram_to_signal.py index f0c3e54..39e72b7 100755 --- a/nmf_bioacoustic/utils/spectrogram_to_signal.py +++ b/nmf_bioacoustic/utils/spectrogram_to_signal.py @@ -1,3 +1,11 @@ +""" +Created on April 2024 + +@author: a23marmo + +Code to transform spectrograms to audio signals. +""" + import nmf_bioacoustic.utils.errors as err import nmf_bioacoustic.utils.signal_to_spectrogram as signal_to_spectrogram @@ -9,6 +17,28 @@ import librosa # %% Audio to signal conversion def spectrogram_to_audio_signal(spectrogram, feature_object, phase_retrieval = "griffin_lim", original_phase=None): + """ + Inverts the spectrogram using the istft method, and plots the audio using IPython.diplay.audio. + + Parameters + ---------- + spectrogram : numpy array + The spectrogram to be inverted. + feature_object : FeatureObject + Feature object, defining the important parameters to compute spectrograms. + phase_retrieval : str + Method to retrieve the phase of the audio. It can be 'original_phase' or 'griffin_lim'. + If set to 'original_phase', the original_phase parameter is used as an estimation of the phase. + If set to 'griffin_lim', the phase is estimated using the Griffin-Lim algorithm. + original_phase : numpy array + Phase of the original audio, to be used in the phase retrieval. + Only used if phase_retrieval is 'original_phase'. + + Returns + ------- + IPython.display audio + The audio signal of the song, reconstructed from NTD. + """ if feature_object.feature in ["stft", "stft_complex"]: spectrogram_stft = spectrogram elif feature_object.feature in ["mel", "log_mel", "nn_log_mel"]: @@ -29,5 +59,8 @@ def spectrogram_to_audio_signal(spectrogram, feature_object, phase_retrieval = " raise err.InvalidArgumentValueException(f"Phase retrieval method not understood: {phase_retrieval}.") def complex_stft_to_audio(stft_to_inverse, hop_length): + """ + Inverts the complex spectrogram using the istft method. + """ return librosa.istft(stft_to_inverse, hop_length = hop_length) -- GitLab