From d1152e6159e33ec75b9477cb9e4edfb464659f92 Mon Sep 17 00:00:00 2001
From: ax-le <axel.marmoret@imt-atlantique.fr>
Date: Tue, 25 Jun 2024 18:40:43 +0200
Subject: [PATCH] Adding comments and a gitignore

---
 .gitignore                                    |  21 ++++
 README.md                                     |   2 +
 .../__pycache__/anuraset.cpython-310.pyc      | Bin 1201 -> 0 bytes
 .../__pycache__/base_dataset.cpython-310.pyc  | Bin 1662 -> 0 bytes
 .../__pycache__/bouffaut.cpython-310.pyc      | Bin 1664 -> 0 bytes
 .../datasets/__pycache__/narw.cpython-310.pyc | Bin 985 -> 0 bytes
 .../__pycache__/whalesing.cpython-310.pyc     | Bin 751 -> 0 bytes
 nmf_bioacoustic/datasets/anuraset.py          |  27 +++++
 nmf_bioacoustic/datasets/base_dataset.py      |  67 ++++++++++-
 nmf_bioacoustic/datasets/bouffaut.py          |  73 +++++++++++-
 nmf_bioacoustic/datasets/narw.py              |  35 ++++++
 nmf_bioacoustic/datasets/whalesing.py         |  17 +++
 .../experiments/anuraset_nmf_count.py         | 110 +++++++++++++-----
 .../source_separation.cpython-310.pyc         | Bin 1607 -> 0 bytes
 nmf_bioacoustic/tasks/source_count.py         | 104 ++++++++++++++++-
 nmf_bioacoustic/tasks/source_separation.py    |  42 +++++++
 nmf_bioacoustic/utils/audio_helper.py         |  22 +++-
 nmf_bioacoustic/utils/data_manipulation.py    |  29 ++++-
 .../utils/signal_to_spectrogram.py            |  94 ++++++++++-----
 .../utils/spectrogram_to_signal.py            |  33 ++++++
 20 files changed, 603 insertions(+), 73 deletions(-)
 create mode 100755 .gitignore
 delete mode 100644 nmf_bioacoustic/datasets/__pycache__/anuraset.cpython-310.pyc
 delete mode 100644 nmf_bioacoustic/datasets/__pycache__/base_dataset.cpython-310.pyc
 delete mode 100644 nmf_bioacoustic/datasets/__pycache__/bouffaut.cpython-310.pyc
 delete mode 100644 nmf_bioacoustic/datasets/__pycache__/narw.cpython-310.pyc
 delete mode 100644 nmf_bioacoustic/datasets/__pycache__/whalesing.cpython-310.pyc
 delete mode 100644 nmf_bioacoustic/tasks/__pycache__/source_separation.cpython-310.pyc

diff --git a/.gitignore b/.gitignore
new file mode 100755
index 0000000..3d4ac08
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,21 @@
+__pycache__/
+.ipynb_checkpoints/
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
\ No newline at end of file
diff --git a/README.md b/README.md
index 61fd9ff..0eb921f 100644
--- a/README.md
+++ b/README.md
@@ -15,6 +15,7 @@ For now, this repository is mainly a Proof-Of-Concpet that NMF-like methods can
 This repository does not include the NMF code, which is instead maintained in the github project `nn_fac` of the current corresponding author (https://gitlab.imt-atlantique.fr/a23marmo/nonnegative-factorization/).
 
 ## Installation
+
 You should first clone/fork/dowload this repository.
 
 You should then install the requirements using the command
@@ -26,4 +27,5 @@ And finally install the code using
 This is the recommended way for installing the code. You may use another method, but note that the project was probably neither designed nor tested on a different way.
 
 ## Contact
+
 Don't hesitate to contact me at the following mail adress if you have any question: axel.marmoret@imt-atlantique.fr
\ No newline at end of file
diff --git a/nmf_bioacoustic/datasets/__pycache__/anuraset.cpython-310.pyc b/nmf_bioacoustic/datasets/__pycache__/anuraset.cpython-310.pyc
deleted file mode 100644
index 41719a3b7fa8f86f5ea63ed6e51c6074e022ff84..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 1201
zcmd1j<>g{vU|?8VQJQv!iGkrUh=Yt-7#J8F7#J9eWf&M3QW#Pga~MJx+!<1sQkYv9
zQkYX2o0+4SQdoi+G+AGQ^l38QV$@0ov5+w{lvAw2z`&5o5XG3n5XIEakj9w8n8MV;
z5yhOsoWjz=5XI8Yz`_v43Nq~$hhtu8QDSgvNixU{Py-l1Y*q#a29SxxK@1EGB@E3B
z3m9t{YnW=77cw(4lrSw|u3=ipSi=y{lEPTSkj0Y4TE<wU4rZ}|+}+EV#h${F!rH=+
z!j{6`%LI~1VYFdLVF(6eO%A_XqIvmwRjEb!@x=wH$(gCe@yYq6c_mdGalrwu&YrHp
znvAzti&E25E0P&uE@NO|0I``E7#M;<0mj3?z)-`G!kEpxfMFp+FoPzOUlB-wCgUyU
z;?i6YRg_=8lA(x$fq~&yh<-+XZmNEwk#TNfQEtAzOKNd;Nq&KTKv8~HYH~@jeqL@`
zd{Sn9Vsd_IaY<$}20taSB(XTPq*y-@6l}$*C3*#ww>WHa5_6MM674{C6oZ_`!%(FR
z3s}AQ_{_Y_lK6N%o1FaQ#GGO~J(&8JP?xeKR%8}ya@=AqE-gqcy2Sz0af_)S<rYU#
zYGO)!a&g%$?!>&j{F20y%>2CKTkOgCIi<OI#kbhfGIL5&i*B(d78K-EYO>s7DNfBv
zyTz4Qnv$6xUyxXmaZ3P6V|-d>PU<bbwA93s(xTM({3MXaS2Et>jE_&wNh~goj|T@;
z5hnu!LlGYX12`=dgOaZRBM&1FqZ%X2|0+QY&n1Jr4T~gDvH>L@XHX0ofnz9(A%#(r
zp@t!gA%zJ{GuALlGAv+9VP43X!Xn9#%_PZ?#SCKAGSx7FlBQn~C>7q~bqsa!^pAJ*
z^l^>%bh*W-2Z>&mB0&ZQhFjc;IXUsgndy0nImNeFiwklxOK!21CFYc-7HcvWiGq|c
zXQou#63i>jO-e0_&rgG;HHa@kF$VVPE&lY>l6a(W0{OTY<N`fL7RD+GxQEe{X|jQ$
zD=#rOH9r0pSA2YKeoAQ$h|LopUs#%$1Cap-WRVicIlRztPEO6qfrT~L6_TI;2ZuT+
z_&~Urje&uIgMo)pfUO9`_tRv)rHYn9^<XJdFDbD&H6F^l#pj-yms*sV;{r*qVC_gg
Y23rWS>lTL%#Km@?qzLve2ZIO`09LFuIsgCw

diff --git a/nmf_bioacoustic/datasets/__pycache__/base_dataset.cpython-310.pyc b/nmf_bioacoustic/datasets/__pycache__/base_dataset.cpython-310.pyc
deleted file mode 100644
index ee858bbde7d3b09af4dd536cfe553b56c0f35dc6..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 1662
zcmd1j<>g{vU|=ZFDM<@qVPJR+;vi#Y1_lNP1_p*=8wLi36owSW9EK=HFwGRj1g4pz
zn87qlE-MoQBSS7*6gwk>J3|Un3UdoX3Uf146h{hUFoPz`OOUyK$siILGr?>yVPIfL
z1)Cnll){+8)WQ(OoWh*K(!vnMlERw8*1{0Qn!=vK(ZUeLmcp6B)xr?Pp28i>pviNK
z&pkCSwJ0&iC9x#2IJG311<3*s8-$%fzEEOdV5niNVTflaVJu-<z+A(S#judE86?6|
z!(77<&ziyz%&?NtPm}EyS7K>OW`2A@VoAm=mYmGul3Psq#kbf&tdz{6Til5`Iq}7r
z>3NAc#kcs<QWHx`i&EqBld@8iOEg(-u@tA~q!ocIz9o>DmzQ6XSdy8aR~(;~nUh+?
z$-uy{lA(x)fq~&ygnmYTZmNEwk#TNfQEtAzOKNd;Nq&KTKv6!(sAB!R+_d<l%>2aU
z{L<o*%w!CH3dGaJ`bmk!sqs*rUP0w8j`;Y@yv&mLcy^H2*%%lY*cgQvxfrX&QG-hl
zrY@NgDcnG8P~3pS4U|Yq7-|@@7@HY`8HzxuLX+thb53er5eEYUgC=7UNCntK5Q3M1
zf#DW=e0)x7UVJ>rpT!`vxEQO%Fzf~^Nd`&7YzI4)fq?;RbqWImLkU9`V+m6h^8%I{
zhAh?^hAg%kMoET+Of?Ky>{%QsjJ-@TOtnn4py*`J;;3QHW-d}LVOYRf!vu;{keY?e
zwP4i?xN4XdGS;%xFf8D%VO+>q%UZ)y!<xnv%%I8Sr^$GWsklg!2NcOkMft^vw^(xW
z6H|&n?zzPRNeEf_nR(!71$!JElSP6c&vF!{7ANKw<fPu>Pfsn0FD^(;E-A`SFG|dX
z2p}Z_O*U|F-D1v6skp@kNkg~T^NTXm<BN-Kv4FJR;s>jbFUiP+rWsHmB84q?e0+Lp
zNoGlEZhSmHD1Jfm8jL)QLX13&T#Ot{RZ<us4cCAY+MtjJVQ^@JVzY!Hg|V5jiBXfu
zugWAOBePf`H?<@qKSiN9Bfm5!MIk9wArs`$+|<01)D(rxJcW{sRE6Y>%$yX3<ebFf
zVok<d!hZQBo(M&*MMe2VnoMAC7fCWOK+-<KJ4F%<3=AL)P9_|TRdQ&K2L&lOL?GD+
z#RH(!2*O|wh=FoALl(mVMsV~p)i5kz0!2A2)iQ(Q?G|fhUP@|3ksK)TGJ~CUivyD1
zGgFEbFdcY{599)HaObDRXQoJlf*h2)L4jQ*i^a7FwSJl$puCuun420Oe~T+VJ~uz5
zGzY}yiH|QVP0WGF6!C$q6#x++XMuyH2vlG}f(aDA5EdwJ6@g+5lrD=w;v5Vdj2x^S
z%p9CN%wQQ!&Rc8+iFqlB#kZIWQi_B?hJsYz(nc#y^h!%IbBgsKX)wMdA0^$gf+cT>
rVwdGE&P<2t09y)i9mqWp7Ra5qIBX!kwF8wW#h_9G>{v!2Mj<8u--eqm

diff --git a/nmf_bioacoustic/datasets/__pycache__/bouffaut.cpython-310.pyc b/nmf_bioacoustic/datasets/__pycache__/bouffaut.cpython-310.pyc
deleted file mode 100644
index 19955528679cb839831855135a58e3f3ba30744b..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 1664
zcmd1j<>g{vU|`^J%t_;BWng#=;vi!d1_lNP1_p*=Jq8Ab6owSW9EK1O&6L9!#pKSA
z!kog=!jQs}%G}Hx#gf9A!k)s>!WhMx!W7J)$@LOsk|yIVMlHW&5EB_QLpjAd3=9mZ
z3{i|J3{gz&3~7uhj44bl98t^=qoY_-SX0<q7@}C)8CV#i*n$}}*>7<;<(H<VC6<;X
zgPaF7hylcAWnf?cnOv;Gz`#(#(9E!av4*jRsfKwWGb2L@Qx<a#6Uh8trW%HLmK26y
z22EzaUyPh98E<i>q~>JiW|pKDX|mp8EiNrcExN@KAD@|*SrQ+Ai>V;x7DrKPVoH2+
zaoH{I#Js%xlEjkC{JdgKmRl^vsX1x4xDrcKGV|jL5=$~}2_PwmPs_|ny~UT7npjd=
zlp3F(l$DxX0<thZJ~=0`xHvw3CBv^U{fzwFRQ*IF<J`oe+<bkP)Z*-t`~v-eqI{6@
zV*TL6vQ&li%o2snypq(Syu=)R=lqmZ{iMwN#N_y_tfWN!l*E$6;?$C2{Um7M=oM5J
zaWF72fWo906gUEmB8*jnu&~pEc`lg=7J>{63~W#h&Y(DVV_;yYVaNjG6h=u9%~-=I
z$*_Pag=ryU3bQ0bHj^YnEmI9s3X3E|EhunVB^hd2YM4^kBpGU1YnW?TYFN{lL6PQn
zi^UBT5R7_7j0_A6w^%|d3sS52oKn*>^Atidb5j*GiZ!cvUGq}lJOxeWB5sgc?!=s&
z_~Oj;yu_U1Tdc(eIhiGzY(+dEDdx<Sid+2P;EpfJ$Sj70HcLrmLFz5mB#_~^m{apo
zii8*#7$Af&NE?58YDqj;7bF%rLEZ)B1QkXOFyvyal7NR4n&M<o3WJ3iJ2b_C1Iz{#
zV2m-$wam3FwX8KPH7qqu*$k2l3m6tM)Uwqu*MNcr6eu8?rG^#EM-C=NKTXD4EZM1*
z#hUD34?+TfrzA5sH6ENk<BN-=L4J_|5wak^aAoGDWG1H;$LD85d<ga{*qZ{$MfnBD
z{uBWD6BKY<i~?YPaxii+R!PErimo1%+2Z5#5_41I<8N`r$LHp!l;(igJn`{`rHMHZ
z8E`Tyk^{L&3FIPPXwptj&B=+6M{*R%`XZ3aia<dKN|VK)6w1NC!^puRzz!Du#pejm
zU3w)ICBI}5{P>_C6N`8w14H9@7eiA+kTgnUfRZ{0gCipZTm)n>)-cRwNMWpDoXs$o
zIfW^OxtFPyxr6~O&V(e+Qo^u+36yHnnHDl;F@tro%w<Yp0u@5N%!~}+B7@!U7o&a=
z$n`}a%c{inb5o1bQ&W%w8ss!LO{QDS#i<3iSTpldQY$oBia^P%2$alju_fo{B`22L
zVk^l96)Bpmw|Jl#7*c}pA|=jv!&@j^qg(7?*?2=3!w4yiLBV@VD!C{%u_QG<KQ9%@
zRp87a0ZIa(Y%akl!KlJmWX!<8pvhFE4vIYu5S#6mDAXy%dP#}Jsqs+WE!Mo!+=9wm
vOnC*j_}o+TQi~FETp)!`5hyyrCLuWlkqkL(AnC>ql=Go6&cP_a#3up(9E_-t

diff --git a/nmf_bioacoustic/datasets/__pycache__/narw.cpython-310.pyc b/nmf_bioacoustic/datasets/__pycache__/narw.cpython-310.pyc
deleted file mode 100644
index 7e4271b2655b983cd64822e26073903069b038e0..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 985
zcmd1j<>g{vU|{h5oR((A#K7<v#6iX^3=9ko3=9m#G7Jn1DGVu$ISe5T?hGkRDa<Vl
zDa@&i&CF3uDJ;PZnyfED`ZO7DF={1)SjZS;3W#5<#=yXk$`Hkv!Vtxj!r9J{#+bsI
z!q&nW#hk*P!qLJI#nR5e!Vtw8%%I72i`&mJDBLBnB(XTP<fT#=1H&zr;*zwImkdk{
z3@;ft7#Nb-L54yxh|R&kz`zW0Ey$h{hGvEZj5UlkOf}3kEH$h(Yzx^K8A_NIFxN0G
zWUOI`XGviQX3%8x(`3EHT3lL?T6BveK0Y%qvm`$L7E?jWEsmnp#FY5t;<8)ZiFtYX
zC5a`O`FX{f?6+8oQ*+X8aV3_fWah^gB$i~{5<pTApO%@EdW)&J=oWigYGO%gQR*$O
zjQoQ5oYcJZl8jrddGTp!B`X<kamL3d=Oh*v$H%W^_!Xp|k)NBYpJ-&9n^=^aukVst
zoL!P%pdV0_pOu<iQmh}GSeB}go>`)hnOBlpl$V&J@0_2Ks-KjZpO_q<m6epJp8^S~
zV*R|tqH?{0$|6n%1_n^b7lQ~fMghhu5u^~+gZVX?2^xYR3ZxW-ok5|f#lXN&!;l5W
zDU6aJnz4pak|CQ(lA)HVgsFxpg-Md3nX#6+hB=r)liBYU2gvVviMgq_I7>2fQ;SOy
za|<+?i+C6q7;bSV=H$c|XQt;R<`myz$;m7((PSy&1<5dHrc~VG2M1w%Nk(QdBm_mk
z0*NK5De)<(C8^2a&@U2WU|@g{;vlX3>8U00P=%n_0i}RqHU<U;DMlVf7RD-RWFMoc
z)nq9Gd9{cYWF$yW5hx95vVoET$Pe-Hx47ctbMsS5b5i5uZ}G&(7nUaGKxDvyTO<#1
z0xvWklT&kY;^QH11G`cLWG={+MW8eZ!eEziFz_)7uz<z<G?{OS!s1OYDX};;9?HAL
n=boCET9lXrP5)r!NM?gA0GWM@!v^9)J4R5Hfjq>)Ai@LyJnR0K

diff --git a/nmf_bioacoustic/datasets/__pycache__/whalesing.cpython-310.pyc b/nmf_bioacoustic/datasets/__pycache__/whalesing.cpython-310.pyc
deleted file mode 100644
index 141c8b9c3f9b5dec4c5ce2dcc4d678e3eb9d9152..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 751
zcmd1j<>g{vU|?9fu{7-;0|Ucj5C<8vFfcGUFfcF_%P=r7q%fo~<}idXxHF_Mr7*WJ
zq%fy4HZw;trLY7uXtKTp>C<Gq#i*4GVj*K@D5qG3fq@~FA&N1DA&RM;A&oJGF@>px
zBZ@hNIfbQ#A&RA)frTN86=d2i&hU)HoYdgVy!2$4c91C`HY)=I1BfmbVPIe=VQ6Mp
zz*xhW!cfDskeQJog&~+>C8M7v(=FEG(t^~YTO9H6nR%Hd@$s6>w^)i(bJA{cC6=aS
z=EoN#mSo)GOG`~GDJ@Ek&rixqO)go<c#AVWJ~=0`xHvw3CBv^!{fzwFRQ*IF<J`oe
z+<bkP)Z*-t`~v-eqI{6@V*R|_wD_dV{KVw^(&CcLWDI^vVo73gYDuwvImkQ3nR)4Y
z1(ii?3=9mQpe<%%U|<kotP+BIRuAUdWRMid#~=)1GcqtRID?`Bq!HxHA`qj9hk=1X
zlc|WEfq~%`b7o2fSO7xsg2edKQ%m9#^YZdb5=%1k^NK+V!Pc@cR!JhPMN_581`4;l
z#N5>Q_*-1@@wxdar8yurPkek~X<`mU2JDX_evmc1&`?WG&B=+6hu9BtS`jFH6@gq*
z1d1{cE(ZCYgMo)pfEg_2r^$Rv6)ha~VBw~hlvtb^59QtBb5G4nElSLBfrKhpJCa>s
Y3qf|>;;@0Z*bWqG#h`%VU=U#f0H)%$ZvX%Q

diff --git a/nmf_bioacoustic/datasets/anuraset.py b/nmf_bioacoustic/datasets/anuraset.py
index 819a669..4ba2ad5 100644
--- a/nmf_bioacoustic/datasets/anuraset.py
+++ b/nmf_bioacoustic/datasets/anuraset.py
@@ -1,6 +1,17 @@
+"""
+Created on June 2024
+
+@author: a23marmo
+
+Defines the dataset class for the AnuraSet dataset. Inherits from the base dataset class.
+Annotations are simplified to the number of species present in the signal.
+Species are counted as individual species, hence the number of species may be different from the number of calls.
+"""
+
 from nmf_bioacoustic.datasets.base_dataset import *
 
 class AnuraSet(GeneralDataset):
+
     def __init__(self, audio_path, annotations_file, feature_object):
         super().__init__(audio_path, annotations_file, feature_object)
         self.annotations = pd.read_csv(annotations_file)
@@ -8,5 +19,21 @@ class AnuraSet(GeneralDataset):
             self.annotations['nonzero_species_count'] = self.annotations.filter(regex='^SPECIES').apply(lambda row: (row != 0).sum(), axis=1)
 
     def get_annotations(self, idx):
+        """
+        Returns the annotations of the idx-th signal in the dataset.
+        Annotations consist here of the number of species present in the signal.
+        This is a simplification of the original dataset, where the annotations are more complex.
+        Species are counted as individual species, hence the number of species may be different from the number of calls.
+
+        Parameters
+        ----------
+        idx : int
+            Index of the signal in the dataset.
+
+        Returns
+        -------
+        annot_this_file : int
+            Number of species present in the signal.
+        """ 
         number_of_species = self.annotations['nonzero_species_count'][self.annotations["AUDIO_FILE_ID"] == self.all_signals[idx].split(".")[0]].values[0]
         return number_of_species
\ No newline at end of file
diff --git a/nmf_bioacoustic/datasets/base_dataset.py b/nmf_bioacoustic/datasets/base_dataset.py
index 32888b7..1614460 100644
--- a/nmf_bioacoustic/datasets/base_dataset.py
+++ b/nmf_bioacoustic/datasets/base_dataset.py
@@ -1,18 +1,57 @@
-import pandas as pd
+"""
+Created on June 2024
+
+@author: a23marmo
+
+Defines a base dataset class, from which will inherit the individual and specialized dataset classes.
+"""
+
+# Imports
 import librosa
 import os
-import nmf_bioacoustic.utils.signal_to_spectrogram as sig_to_spec
+import pandas as pd
 
 class GeneralDataset():
+    # This class is a general dataset class, from which will inherit the specialized dataset classes.
     def __init__(self, audio_path, annotations_file, feature_object):
+        """
+        Initializes the dataset object.
+
+        Parameters
+        ----------
+        audio_path : str
+            Path to the audio files.
+        annotations_file : str
+            Path to the annotations file.
+        feature_object : FeatureObject
+            Feature object, defining the important parameters to compute spectrograms.
+        """
         self.audio_path = audio_path
         self.all_signals = list(os.listdir(self.audio_path))
         self.feature_object = feature_object
 
     def __len__(self):
+        """
+        Returns the number of signals in the dataset.
+        """
         return len(self.all_signals)
     
     def __getitem__(self, idx):
+        """
+        Returns the spectrogram and the annotations of the idx-th signal in the dataset.
+        
+        Parameters
+        ----------
+        idx : int
+            Index of the signal in the dataset.
+
+        Returns
+        -------
+        spec : numpy array
+            Spectrogram of the signal.
+        annot_this_file : not defined
+            Annotations of the signal, the type of which will depend on the dataset.
+        """
         signal, orig_sr = librosa.load(os.path.join(self.audio_path, self.all_signals[idx]), sr = self.feature_object.sr)
         if self.feature_object.sr != orig_sr:
             signal = librosa.resample(signal, orig_sr, self.feature_object.sr)
@@ -22,9 +61,33 @@ class GeneralDataset():
         return spec, annot_this_file
     
     def get_annotations(self, idx):
+        """
+        Returns the annotations of the idx-th signal in the dataset.
+        Base function, which should be implemented in the child class.
+
+        Parameters
+        ----------
+        idx : int
+            Index of the signal in the dataset.
+        """
         raise NotImplementedError("This method should be implemented in the child class")
     
     def get_item_of_id(self, audio_id):
+        """
+        Returns the spectrogram and the annotations of the signal with the given id.
+        
+        Parameters
+        ----------
+        audio_id : str
+            Id of the signal in the dataset.
+
+        Returns
+        -------
+        spec : numpy array
+            Spectrogram of the signal.
+        annot_this_file : not defined
+            Annotations of the signal, the type of which will depend on the dataset.
+        """
         index = self.all_signals.index(audio_id)
         return self.__getitem__(index)
 
diff --git a/nmf_bioacoustic/datasets/bouffaut.py b/nmf_bioacoustic/datasets/bouffaut.py
index 6cf4f6e..280b002 100644
--- a/nmf_bioacoustic/datasets/bouffaut.py
+++ b/nmf_bioacoustic/datasets/bouffaut.py
@@ -1,12 +1,51 @@
+"""
+Created on June 2024
+
+@author: a23marmo
+
+Defines the dataset class for the Bouffaut2020 dataset. Inherits from the base dataset class.
+Corresponds to calls from Whales at very low frequencies.
+"""
+
 from nmf_bioacoustic.datasets.base_dataset import *
-import numpy as np
 
 class Bouffaut(GeneralDataset):
+
     def __init__(self, audio_path, annotations_file, feature_object):
+        """
+        Initializes the dataset object.
+        
+        Parameters
+        ----------
+        audio_path : str
+            Path to the audio files.
+        annotations_file : str
+            Path to the annotations file.
+        feature_object : FeatureObject
+            Feature object, defining the important parameters to compute spectrograms.
+        """
         super().__init__(audio_path, annotations_file, feature_object)
         self.annotations = pd.read_csv(annotations_file, delimiter='\t')
 
     def get_annotations(self, idx):
+        """
+        Returns the annotations of the idx-th signal in the dataset.
+        Annotations consist here of the type of call, the beginning and the end of the call.
+
+        Parameters
+        ----------
+        idx : int
+            Index of the signal in the dataset.
+        
+        Returns
+        -------
+        type : pandas Series
+            Type of the calls in the signal.
+        begin : pandas Series
+            Beginning of the calls in the signal.
+        end : pandas Series
+            End of the calls in the signal.
+        """
         annot_this_file = self.annotations[self.annotations["File"] == self.all_signals[idx].split(".")[0]]
         type = annot_this_file['Type']
         begin = annot_this_file['Begin Time (s)']
@@ -14,6 +53,26 @@ class Bouffaut(GeneralDataset):
         return type, begin, end
     
     def crop_annotations(self, annotations, time_limit_s):
+        """
+        Crops the annotations to the time limit.
+        Useful to focus experiments on a part of the audio signal.
+
+        Parameters
+        ----------
+        annotations : tuple
+            Tuple of pandas Series, containing the type of the calls, the beginning and the end of the calls.
+        time_limit_s : float
+            Time limit in seconds.
+
+        Returns
+        -------
+        type : pandas Series
+            Type of the calls in the signal.
+        begin : pandas Series
+            Beginning of the calls in the signal.
+        end : pandas Series
+            End of the calls in the signal.
+        """
         type, begin, end = annotations
         indices_ok = begin[begin < time_limit_s].keys()
         type = type[indices_ok]
@@ -22,6 +81,18 @@ class Bouffaut(GeneralDataset):
         return type, begin, end
     
 def create_one_annotation_file(dataset_path, annotations_1="Annotation.txt", annotations_2="Annotation_RR48_2013_D151.txt"):
+    """
+    Merges two annotation files into one.
+
+    Parameters
+    ----------
+    dataset_path : str
+        Path to the dataset.
+    annotations_1 : str
+        Name of the first annotation file.
+    annotations_2 : str
+        Name of the second annotation file.
+    """
     annot_1 = pd.read_csv(f"{dataset_path}/{annotations_1}", delimiter='\t')
     annot_2 = pd.read_csv(f"{dataset_path}/{annotations_2}", delimiter='\t')
     pd.concat([annot_1, annot_2]).to_csv(f"{dataset_path}/merged_annotations.txt", sep='\t', index=False)
\ No newline at end of file
diff --git a/nmf_bioacoustic/datasets/narw.py b/nmf_bioacoustic/datasets/narw.py
index 9482a61..861df0b 100644
--- a/nmf_bioacoustic/datasets/narw.py
+++ b/nmf_bioacoustic/datasets/narw.py
@@ -1,11 +1,46 @@
+"""
+Created on June 2024
+
+@author: a23marmo
+
+Defines the dataset class for the NARW (North Atlantic Right Whale) dataset. Inherits from the base dataset class.
+"""
+
 from nmf_bioacoustic.datasets.base_dataset import *
 
 class NARWDataset(GeneralDataset):
+
     def __init__(self, audio_path, annotations_file, feature_object):
+        """
+        Initializes the dataset object.
+
+        Parameters
+        ----------
+        audio_path : str
+            Path to the audio files.
+        annotations_file : str
+            Path to the annotations file.
+        feature_object : FeatureObject
+            Feature object, defining the important parameters to compute spectrograms.
+        """
         super().__init__(audio_path, annotations_file, feature_object)
         self.annotations = pd.read_csv(annotations_file)
     
     def get_annotations(self, idx):
+        """
+        Returns the annotations of the idx-th signal in the dataset.
+        Annotations consist here of the time and types of the calls.
+
+        Parameters
+        ----------
+        idx : int
+            Index of the signal in the dataset.
+
+        Returns
+        -------
+        annot_this_file : pandas Series
+            Time and type of the calls.
+        """ 
         annot_this_file = self.annotations[self.annotations["filename"] == self.all_signals[idx]]
         annotated_detections = list(annot_this_file["timestamp"])
         return annotated_detections
\ No newline at end of file
diff --git a/nmf_bioacoustic/datasets/whalesing.py b/nmf_bioacoustic/datasets/whalesing.py
index 815f13c..c8c9d6a 100644
--- a/nmf_bioacoustic/datasets/whalesing.py
+++ b/nmf_bioacoustic/datasets/whalesing.py
@@ -1,8 +1,25 @@
+"""
+Created on June 2024
+
+@author: a23marmo
+
+Defines the dataset class for the dataset of humpback whale. Inherits from the base dataset class.
+
+TODO: add reference for the dataset, ask Dorian Cazau.
+"""
+
 from nmf_bioacoustic.datasets.base_dataset import *
 
 class WhaleSing(GeneralDataset):
+
     def __init__(self, audio_path, feature_object):
+        """
+        Initializes the dataset object, exactly following tehe general dataset.
+        """
         super().__init__(audio_path, None, feature_object)
 
     def get_annotations(self, idx):
+        """
+        Void function, as the dataset does not have annotations.
+        """
         return None
diff --git a/nmf_bioacoustic/experiments/anuraset_nmf_count.py b/nmf_bioacoustic/experiments/anuraset_nmf_count.py
index 4049440..17b0aad 100644
--- a/nmf_bioacoustic/experiments/anuraset_nmf_count.py
+++ b/nmf_bioacoustic/experiments/anuraset_nmf_count.py
@@ -1,52 +1,69 @@
+"""
+Created on June 2024
+
+@author: a23marmo
+
+Defines the experiment for counting the number of sources in the AnuraSet dataset, based on the NMF outputs.
+It uses the package SACRED to log the results and the parameters of the experiment.
+See the docs of SACRED for more details https://sacred.readthedocs.io/en/stable/
+"""
+
 import matplotlib.pyplot as plt
 import numpy as np
 import tqdm
-from nn_fac.nmf import nmf as nn_fac_nmf
 from sacred import Experiment
 from sacred.observers import FileStorageObserver
 from scipy.special import kl_div
 from sklearn.metrics import ConfusionMatrixDisplay
 from sklearn.model_selection import GridSearchCV
 
+from nn_fac.nmf import nmf as nn_fac_nmf
+
 import nmf_bioacoustic.tasks.source_count as species_count
 from nmf_bioacoustic.datasets.anuraset import AnuraSet
 from nmf_bioacoustic.utils.signal_to_spectrogram import FeatureObject
 
-anuraset_path = "/home/a23marmo/datasets/anuraset"
+anuraset_path = "/home/a23marmo/datasets/anuraset" # TODO: change this path to the correct one
 
 ex = Experiment("NMF on Anuraset for source count estimation")
 
 save_log_folder = "experiments/jjba/source_count/running"
 ex.observers.append(FileStorageObserver(save_log_folder))
 
-
 @ex.config
 def default_config():
-    sr = 16000
-    hop_length = 512
-    n_fft = hop_length * 4
-    feature = "nn_log_mel"
-    fmin = 0
-    fmax = None
-    n_mels = 80
+    # Base configuration
+    sr = 16000 # Sampling rate
+    hop_length = 512 # Hop length
+    n_fft = hop_length * 4 # FFT window size
+    feature = "nn_log_mel" # Feature to use
+    fmin = 0 # Minimum frequency
+    fmax = None # Maximum frequency
+    n_mels = 80 # Number of mel bands
 
-    n_nmf = 10
-    beta = 2
+    n_nmf = 10 # Number of components in the NMF
+    beta = 2 # Beta parameter of the NMF, defining the loss function. 2 for Euclidean, 1 for Kullback-Leibler, 0 for Itakura-Saito
 
-    default_var_divide = 10
-    default_eps = 0.8
-    default_metric = "correlation"
+    default_var_divide = 10 # Variance divide parameter for considering some components as noise.
+    default_eps = 0.8 # Epsilon parameter for the DBSCAN clustering
+    default_metric = "correlation" # Metric for the DBSCAN clustering
 
-    scoring = "accuracy"
-    cv = 4
+    scoring = "accuracy" # Scoring for the GridSearchCV
+    cv = 4 # Number of folds for the GridSearchCV
 
-    param_grid = {"eps": np.linspace(0.3, 1.3, 11)}
-
-    subset = "INCT17"
+    param_grid = {"eps": np.linspace(0.3, 1.3, 11)} # Grid for the DBSCAN clustering
 
+    subset = "INCT17" # Subset of the dataset to use
 
 @ex.capture
 def compute_all_nmf_on_this_dataset(n_nmf, beta, subset, sr, feature, hop_length, n_fft, fmin, fmax, n_mels):
+    """
+    Computes the NMF on all the signals of the dataset, and returns the H matrices of the NMF (used for clustering) and the annotations.
+
+    Parameters
+    ----------
+    See the configuration of the experiment (default_config).
+    """
     feature_object = FeatureObject(sr, feature, hop_length, n_fft=n_fft, fmin = fmin, fmax=fmax, mel_grill = False, n_mels=n_mels)
 
     dataset = AnuraSet(
@@ -74,7 +91,7 @@ def compute_all_nmf_on_this_dataset(n_nmf, beta, subset, sr, feature, hop_length
             tol=1e-8,
             update_rule=nmf_algo,
             beta=beta,
-            normalize=[False, True],  # sparsity_coefficients=[0, 10],
+            normalize=[False, True],
             verbose=False,
             return_costs=False,
             deterministic=True,
@@ -96,6 +113,30 @@ def grid_search_DBSCAN(
     scoring,
     cv,
 ):
+    """
+    Grid search for the DBSCAN clustering, based on the NMF outputs.
+
+    In general, the parameter grid is used to find the best epsilon parameter for the DBSCAN clustering.
+
+    Parameters
+    ----------
+    all_H : list of np.array
+        List of the H matrices of the NMF.
+    all_annot : list of int
+        List of the annotations of the number of species in the signals.
+    param_grid : dict
+        Dictionary of the parameter grid for the DBSCAN clustering.
+    default_var_divide : float
+        Variance divide parameter for considering some components as noise.
+    default_metric : str
+        Metric for the DBSCAN clustering.
+    default_eps : float
+        Epsilon parameter for the DBSCAN clustering.
+    scoring : str   
+        Scoring for the GridSearchCV.
+    cv : int
+        Number of folds for the GridSearchCV.
+    """
 
     default_counter = species_count.SourceCountEstimator(
         var_divide=default_var_divide, eps=default_eps, metric=default_metric
@@ -110,29 +151,42 @@ def grid_search_DBSCAN(
 
 @ex.automain
 def expe_nmf_main(_run, _config):
+    """
+    Main function of the experiment.
+    """
+
     exp_id = _run.meta_info['options']['--id']
+
+    # Compute the NMF on the dataset
     all_H, all_annot = compute_all_nmf_on_this_dataset()
+
+    # Grid search for the DBSCAN clustering
     fitted_estimation, accuracy = grid_search_DBSCAN(all_H=all_H, all_annot=all_annot)
+    
+    # Log the results
+    # Log the number of samples first
     _run.log_scalar("Number samples", len(all_annot))
+    
+    # Log the accuracy
     print(f"Accuracy: {accuracy}")
     _run.log_scalar("Accuracy", accuracy)
 
+    # Log the average difference between the annotations and the estimation 
     difference = np.mean(np.abs(np.subtract(fitted_estimation, all_annot)))
     print(f"Difference: {difference}")
     _run.log_scalar("Difference", difference)
 
+    # Log the KL divergence between the annotations and the estimation
     kl = np.mean(kl_div(all_annot, fitted_estimation))
     print(f"KL div: {kl}")
     _run.log_scalar("KL div", kl)
 
-    _run.log_scalar("Estimation", fitted_estimation)
 
-    # ConfusionMatrixDisplay.from_predictions(all_annot, fitted_estimation).plot()
-    # plt.savefig(f'jjba_experiments/confusion_matrix_beta{_config['beta']}.png')
-    # plt.show()
+    # Log all the fitted estimation, if needed
+    _run.log_scalar("Estimation", fitted_estimation)
 
+    # Plots the confusion matrix, and save it
     cm_display = ConfusionMatrixDisplay.from_predictions(all_annot, fitted_estimation)
-    # cm_display.plot()
     plt.gca().invert_yaxis()  # Get the current axes and invert the y-axis
     plt.xlabel("Predicted number of sources")
     plt.ylabel("Annotated number of species")
@@ -141,6 +195,7 @@ def expe_nmf_main(_run, _config):
     )
     plt.close()
 
+    # Plots the histogram of the annotations and the estimation, and save it
     fig, ax = plt.subplots()
     ax.hist(
         fitted_estimation, bins=np.arange(0.5, 11.5, 1), alpha=0.5, label="estimation"
@@ -155,12 +210,15 @@ def expe_nmf_main(_run, _config):
 
 if __name__ == "__main__":
 
+    ## Run all the experiments
+
     # for subset in ["INCT4", "INCT17", "INCT41", "INCT20955"]:
     #     for feature in ["mel", "nn_log_mel"]:
     #         for beta in [2,1,0]:
     #             id_experiment = f"beta{beta}_nmf10_feature{feature}_subset{subset}"
     #             r = ex.run(config_updates={'beta':beta, 'feature':feature, 'subset':subset}, options={'--id':id_experiment})
 
+    ## Run one case
     beta = 1
     feature = "nn_log_mel"
     subset="INCT17"
diff --git a/nmf_bioacoustic/tasks/__pycache__/source_separation.cpython-310.pyc b/nmf_bioacoustic/tasks/__pycache__/source_separation.cpython-310.pyc
deleted file mode 100644
index 1aa63dd6b195bb5eedefe7b942aedfff9c868b83..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 1607
zcmd1j<>g{vU|`sjQkmAn&cN^(#6iZ)3=9ko3=9m#AR>h!g)xUAiV;jR<uWrdFf!z_
zM6ob3q%fo~<*??mMX`a|j5+MN98sK%3@Pj>tSM|Qj8R-EjKK_=94|p;`)M-X;wUai
zO)k#JFTcf?UzC}inU|OoUyzYloa&a$4pIZfJPZsBoD2*M&LAf|V`N~cVM$@kW-8*T
zVNGEuWi00LVn|_XW^7`tVNPKvWhfG@Va#GkVU}d5Wlm?PWiMe&VQFS$WGG?EVy<DR
z0XdwZl_`axhPjm~jY*P0fT4z|h6Q9+3W|6NYcC5ULk(L|N(svX)`bi)Otl=foF#0y
z%*bM|Vano2VM}38;Yi`^Wvk_?;atF(!nF`&H%k^*3U>-m3U4nHBSQ(x0`7$jj0`0_
zDSTkJ@<}o@Gr2Itw$yT$FsAU=aL;C#%Y<-q3IoVxHQXTALf9p|S$rT~4Py;A!Y3e6
zh&)J+fWO!m@YjH%jG>i@bdy1Ts$nbofEF_BDFTuVHB2}{LJ;CFklqqdcnE>`DZ+D^
z(Zi)Mr-nV4K~u!92$b@wLL>7_74lM3Qxr<_6$*;-%Q90^6-qKv6~JjlAwLbwfu$gY
z#L|?^e2~)Avc#Oy#1e?w;{4L0<Wz;?)PlsK#FEVXyqAm&3=CC#;TicksS3H76(yxb
zsaBfIxA@Xh6G80w{G_bZ<dR$bU|ZsgQcH?5K_-ITbc;DBKm8U<NoH>9OHkg?WWB{y
zTy%>oBflU%Cp9m<B;yuaWqe{qX7Mey3JA?omYY~n#fxHJl}2W6da*vlZSe@V>6e3C
z6%TclUO`@Zh$iDL?vkR!yyAkyqSU;SBCzW@g2BGBQh53Q|NsA0>=3R(l^hOZp%zuK
zfQ-}>xW!tWkywy=i?yIAGq2<p4>;uFGg5O3Qj2a$<zyC@q~^t!<j0p}WERJR@@h#@
zetJ=2?k(nmoRV7{#idCFIr$~UMNA9~3`L+|DB@;dV7SFuoLUlJl39|IdW*d{u`D$$
zGyN6|IGeK;CFZ54-eSrtxW%5AS`G=H#Dap<ycA8ITa4ki7(I$uK}K+aLN-1pGdHuO
zhz%sb3XX+aTp$a<PAR^{4fPGkowt}lmM|q&++xnmO~1vMxsstsgMoqJSCoE6er~FM
zqLFcKVo`3szDsIxc1eDLen3$^D0YkW^K#STlQQ!Ylk-c9OEQx&_$7(O*;ryoub}dl
z6eu}>5?DOcC5Q;*02Nh63=9k$i~@`tOk9jCOdL#Rj4X^ij5>^5jC_n7j3SJDj8TkE
zj4Vt%j53S@jAD!&j7$wIOhsA@3=EnqkhJgTrpbMaHLo<cpt49B6b?#g?$s+T$;>I%
zLy0+7uvn2WNIic}W>QgpaiU&IW^qAIVkJ1PZVBclmViPsCo@T}pc2Ho#Rj2^L_u*0
masoK~kO)nXaU3?e`6;D2sdk{`R}6}B4hA7c9%dd!J|+N*m&l9&

diff --git a/nmf_bioacoustic/tasks/source_count.py b/nmf_bioacoustic/tasks/source_count.py
index 86ee9f6..471fb37 100644
--- a/nmf_bioacoustic/tasks/source_count.py
+++ b/nmf_bioacoustic/tasks/source_count.py
@@ -1,19 +1,55 @@
-import sklearn.feature_selection
+"""
+Created on June 2024
+
+@author: a23marmo
+
+Base code for the task of counting the number of species in a signal, using NMF. 
+"""
+
 import numpy as np
+import sklearn.feature_selection
 from sklearn.cluster import DBSCAN # HDBSCAN or OPTICS also possible, to test
 from sklearn.base import BaseEstimator
 from sklearn.metrics import accuracy_score
 
-
 class SourceCountEstimator(BaseEstimator):
-
+    # This class is used in the sklearn GridSearchCV. Hence, it follows sklearn's conventions.
     def __init__(self, *, var_divide=10, eps=0.8, metric='correlation'):
+        """
+        Initializes the estimator object.
+        
+        Parameters
+        ----------
+        var_divide : float
+            Variance divide parameter for considering some components as noise.
+            If the variance of a component is lower than the mean variance divided by var_divide, it is considered noise.
+        eps : float
+            Epsilon parameter for the DBSCAN clustering algorithm.
+        metric : str
+            Metric for the DBSCAN clustering algorithm.
+        """
 
         self.var_divide = var_divide
         self.eps = eps
         self.metric = metric
 
     def fit(self, all_H, annotations):
+        """
+        Fits the estimator to the data.
+        In this context, it means that it estimates the number of species in the signals.
+
+        Parameters
+        ----------
+        all_H : list of np.array
+            List of the H matrices of the NMF.
+        annotations : list of int
+            List of the annotations of the number of species in the signals.
+
+        Returns
+        -------
+        all_estimations : list of int
+            List of the estimations of the number of species in the signals.
+        """
         all_estimations = []
         for H in all_H:
             estimation = estimate_number_sources(H, var_divide=self.var_divide, eps=self.eps, metric=self.metric)
@@ -22,6 +58,19 @@ class SourceCountEstimator(BaseEstimator):
         return all_estimations
     
     def predict(self, all_H):
+        """
+        Predicts the number of species in the signals.
+
+        Parameters
+        ----------
+        all_H : list of np.array
+            List of the H matrices of the NMF.
+
+        Returns
+        -------
+        all_estimations : list of int
+            List of the estimations of the number of species in the signals.
+        """
         all_estimations = []
         for H in all_H:
             estimation = estimate_number_sources(H, var_divide=self.var_divide, eps=self.eps, metric=self.metric)
@@ -30,12 +79,46 @@ class SourceCountEstimator(BaseEstimator):
         return all_estimations
 
     def score(self, estimations, annotations):
-
+        """
+        Computes the accuracy of the estimator.
+
+        Parameters
+        ----------
+        estimations : list of int
+            List of the estimations of the number of species in the signals.    
+        annotations : list of int
+            List of the annotations of the number of species in the signals.
+
+        Returns
+        -------
+        accuracy : float
+            Accuracy of the estimator.
+        """
         return accuracy_score(annotations, estimations)
 
 
 def estimate_number_sources(H, var_divide = 10, eps = 0.7, metric = 'correlation'):
-    # This function is used to return the number of species in the current audio file
+    """
+    Estimates the number of species in the signal, based on the H matrix of NMF.
+    This function is used to return the number of species in the current audio file
+    
+    Parameters
+    ----------
+    H : np.array
+        H matrix of the NMF.
+    var_divide : float
+        Variance divide parameter for considering some components as noise.
+        If the variance of a component is lower than the mean variance divided by var_divide, it is considered noise.
+    eps : float
+        Epsilon parameter for the DBSCAN clustering algorithm.
+    metric : str
+        Metric for the DBSCAN clustering algorithm.
+
+    Returns
+    ------- 
+    number_species : int
+        Estimated number of species in the signal.
+    """
 
     # First, remove all features with a variance lower than the mean variance divided by 10
     H_cropped = threshold_H(H, var_divide=var_divide)
@@ -46,6 +129,9 @@ def estimate_number_sources(H, var_divide = 10, eps = 0.7, metric = 'correlation
     return number_species
 
 def threshold_H(H, var_divide=10):
+    """
+    Thresholds the H matrix of NMF, removing the components with a variance lower than the mean variance divided by var_divide.
+    """
     var = np.var(H, axis=1)
     threshold = np.mean(var)/var_divide
 
@@ -55,7 +141,10 @@ def threshold_H(H, var_divide=10):
 
     return H_cropped
 
-def DBSCAN_count(H, eps = 0.7, metric = 'correlation'): # jensenshannon, mahalanobis, cityblock
+def DBSCAN_count(H, eps = 0.7, metric = 'correlation'):
+    """
+    Counts the number of clusters in the H matrix of NMF, using DBSCAN.
+    """
     db = DBSCAN(eps=eps, min_samples=1, metric=metric)
     db.fit(H)
     labels = db.labels_
@@ -63,4 +152,7 @@ def DBSCAN_count(H, eps = 0.7, metric = 'correlation'): # jensenshannon, mahalan
     return n_clusters_
 
 def compute_difference(estimation, annotation):
+    """
+    Computes the difference between the estimation and the annotation.
+    """
     return np.abs(estimation - annotation)
diff --git a/nmf_bioacoustic/tasks/source_separation.py b/nmf_bioacoustic/tasks/source_separation.py
index 7a5516c..b338f46 100644
--- a/nmf_bioacoustic/tasks/source_separation.py
+++ b/nmf_bioacoustic/tasks/source_separation.py
@@ -1,3 +1,11 @@
+"""
+Created on June 2024
+
+@author: a23marmo
+
+Base code for the task of source separation in a signal, using NMF.
+This is only qualitative, as the annotations of independent sources are not provided. 
+"""
 import numpy as np
 import nmf_bioacoustic.utils.audio_helper as audio_helper
 
@@ -6,6 +14,34 @@ from librosa.display import specshow
 import matplotlib.pyplot as plt
 
 def evaluate_source_separation(W, H, feature_object, time_limit=None, phase_retrieval="original_phase", phase=None, plot_specs=False):
+    """
+    Evaluates the source separation of the NMF.
+    Plots the audio of the mixture and the separated sources.
+    It can also plots the spectrograms of the mixture and the separated sources.
+    
+    Parameters
+    ----------
+    W : np.array
+        W matrix of the NMF.
+    H : np.array
+        H matrix of the NMF.
+    feature_object : FeatureObject
+        Feature object, defining the important parameters to compute spectrograms.
+    time_limit : int
+        Time limit to evaluate the source separation, and limit the size of the audio and spectrograms.
+    phase_retrieval : str
+        Method to retrieve the phase of the audio. It can be 'original_phase' or 'griffin_lim'.
+    phase : np.array
+        Phase of the original audio, to be used in the phase retrieval.
+        Only used if phase_retrieval is 'original_phase'.
+    plot_specs : bool
+        If True, plots the spectrograms of the mixture and the separated sources.
+
+    Returns
+    -------
+    source_list : list of np.array
+        List of the separated sources, as spectrograms.
+    """
     if phase_retrieval == "original_phase":
         assert phase is not None, "You need to provide the phase of the original audio to evaluate the source separation"
 
@@ -14,19 +50,25 @@ def evaluate_source_separation(W, H, feature_object, time_limit=None, phase_retr
 
     source_list = []
 
+    # Listen to the whole mixture
     print("Whole mixture:")
     audio_helper.listen_to_this_spectrogram(W@H[:,:time_limit], feature_object=feature_object, phase_retrieval = phase_retrieval, original_phase = phase[:,:time_limit])
+    
+    # Plots the spectrogram of the whole mixture
     if plot_specs:
         fig, ax = plt.subplots()
         img = specshow(W@H[:,:time_limit], sr=feature_object.sr, hop_length=feature_object.hop_length, y_axis="log", x_axis="time", vmax=10) # specshow(W@H, sr=sr, hop_length=hop_length, y_axis="log")
         ax.set_title("Whole mixture")
         plt.savefig(f"imgs/source_separation/whole_mixture.png", transparent = True)
         plt.show()
+
+    # Listen to the separated sources
     for i in range(0, H.shape[0]):
         print(f"Source: {i}")
         audio_helper.listen_to_this_spectrogram(W[:,i][:,np.newaxis]@H[i,:time_limit][np.newaxis,:], feature_object=feature_object, phase_retrieval = phase_retrieval, original_phase = phase[:,:time_limit])
         source_list.append(W[:,i][:,np.newaxis]@H[i,:time_limit][np.newaxis,:])
 
+        # Plots the spectrogram of the separated source
         if plot_specs:
             fig, ax = plt.subplots()
             img = specshow(source_list[-1], sr=feature_object.sr, hop_length=feature_object.hop_length, y_axis="log", x_axis="time", vmax=10) # specshow(W@H, sr=sr, hop_length=hop_length, y_axis="log")
diff --git a/nmf_bioacoustic/utils/audio_helper.py b/nmf_bioacoustic/utils/audio_helper.py
index 46d6920..52030fa 100755
--- a/nmf_bioacoustic/utils/audio_helper.py
+++ b/nmf_bioacoustic/utils/audio_helper.py
@@ -1,3 +1,11 @@
+"""
+Created on April 2024
+
+@author: a23marmo
+
+Code to listen to audio based on spectrograms, and to compute the SDR between two audio signals.
+"""
+
 import IPython.display as ipd
 import mir_eval
 import numpy as np
@@ -14,11 +22,15 @@ def listen_to_this_spectrogram(spectrogram, feature_object, phase_retrieval = "g
     ----------
     spectrogram : numpy array
         The spectrogram to be inverted.
-    hop_length : integer
-        The hop length, in number of samples.
-    sampling_rate : integer, optional
-        The sampling rate of the signal, in Hz.
-        The default is 44100.
+    feature_object : FeatureObject
+        Feature object, defining the important parameters to compute spectrograms.
+    phase_retrieval : str
+        Method to retrieve the phase of the audio. It can be 'original_phase' or 'griffin_lim'.
+        If set to 'original_phase', the original_phase parameter is used as an estimation of the phase.
+        If set to 'griffin_lim', the phase is estimated using the Griffin-Lim algorithm.
+    original_phase : numpy array
+        Phase of the original audio, to be used in the phase retrieval.
+        Only used if phase_retrieval is 'original_phase'.
 
     Returns
     -------
diff --git a/nmf_bioacoustic/utils/data_manipulation.py b/nmf_bioacoustic/utils/data_manipulation.py
index 2db1e25..e4a0551 100644
--- a/nmf_bioacoustic/utils/data_manipulation.py
+++ b/nmf_bioacoustic/utils/data_manipulation.py
@@ -1,13 +1,23 @@
+"""
+Created on June 2024
+
+@author: a23marmo
+
+Code used to small data manipulation tasks, like conversion between time and frames.
+"""
+
 def time_to_frame(time_seconds, sr, hl):
     """
     Compute the index of the frame given a time in seconds.
 
-    Parameters:
+    Parameters
+    ----------
     - time_seconds: Time in seconds.
     - sr: Sampling rate in Hz.
     - hl: Hop length in samples.
 
-    Returns:
+    Returns
+    -------
     - frame_index: Index of the frame.
     """
     # Convert time to samples
@@ -20,12 +30,14 @@ def frame_to_time(frame_index, sr, hl):
     """
     Compute the time in seconds given the frame index.
 
-    Parameters:
+    Parameters
+    ----------
     - frame_index: Index of the frame.
     - sr: Sampling rate in Hz.
     - hl: Hop length in samples.
 
-    Returns:
+    Returns
+    -------
     - time_seconds: Time in seconds.
     """
     # Compute time in samples
@@ -36,6 +48,15 @@ def frame_to_time(frame_index, sr, hl):
 
 def crop_time(spec, time_limit_s, sr, hl):
     """
+    Crop the spectrogram to a certain time limit.
+    Automatically converts the time_limit in seconds to the frame index.
+
+    Parameters
+    ----------
+    - spec: Spectrogram to be cropped.
+    - time_limit_s: Time limit in seconds.
+    - sr: Sampling rate in Hz.
+    - hl: Hop length in samples.
     """
     # Compute the number of frames to keep
     limit_frame = time_to_frame(time_limit_s, sr, hl)
diff --git a/nmf_bioacoustic/utils/signal_to_spectrogram.py b/nmf_bioacoustic/utils/signal_to_spectrogram.py
index 94b55ef..dcd1227 100755
--- a/nmf_bioacoustic/utils/signal_to_spectrogram.py
+++ b/nmf_bioacoustic/utils/signal_to_spectrogram.py
@@ -4,11 +4,18 @@ Created on Wed Mar 25 16:54:59 2020
 
 @author: amarmore
 
-Computing spectrogram in different feature description.
+Computing spectrogram in different feature description, using the library librosa [1].
 
-Note that Mel (and variants of Mel) spectrograms follow the particular definition of [1].
+Note that Mel (and variants of Mel) spectrograms follow the particular definition of [2].
 
-[1] Grill, T., & Schlüter, J. (2015, October). 
+
+References
+----------
+[1] McFee, B., Raffel, C., Liang, D., Ellis, D. P., McVicar, M., Battenberg, E., & Nieto, O. (2015, July).
+librosa: Audio and music signal analysis in python. 
+In Proceedings of the 14th python in science conference (Vol. 8).
+
+[2] Grill, T., & Schlüter, J. (2015, October). 
 Music Boundary Detection Using Neural Networks on Combined Features and Two-Level Annotations. 
 In ISMIR (pp. 531-537).
 """
@@ -24,7 +31,46 @@ import IPython.display as ipd
 mel_power = 2
 
 class FeatureObject():
+    # An object to store the parameters of the spectrogram
     def __init__(self, sr, feature, hop_length, n_fft=2048, fmin = 0, fmax=None, mel_grill = True, n_mels=80):
+        """
+        Initializes the object with the parameters of the spectrogram.
+
+        Parameters
+        ----------
+        sr : float
+            Sampling rate of the signal, (typically 44100Hz).
+        feature : String
+            The types of spectrograms to compute.
+                - "pcp" : Pitch class profile
+                - "cqt" : Constant-Q transform
+                - "mel" : Mel spectrogram
+                - "log_mel" : Log Mel spectrogram
+                - "nn_log_mel" : Nonnegative Log Mel spectrogram
+                - "padded_log_mel" : Padded Log Mel spectrogram
+                - "minmax_log_mel" : Min-max normalized Log Mel spectrogram
+                - "stft" : Short-time Fourier transform
+                - "stft_complex" : Complex Short-time Fourier transform
+        hop_length : integer
+            The desired hop_length, which is the step between two frames (ie the time "discretization" step)
+            It is expressed in terms of number of samples, which are defined by the sampling rate.
+        n_fft : integer, optional
+            The number of samples to use in the FFT.
+            The default is 2048.
+        fmin : integer, optional
+            The minimal frequence to consider, used for denoising.
+            The default is 0.
+        fmax : integer, optional
+            The maximal frequence to consider, used for denoising.
+            The default is None.
+        mel_grill : boolean, optional
+            If True, the Mel spectrogram is computed with the parameters of [2].
+            Only used if feature is "mel" or derivates (like "log_mel" or "nn_log_mel").
+            The default is True.
+        n_mels : integer, optional
+            Number of mel bands to consider.
+            The default is 80.
+        """
         self.sr = sr
         self.feature = feature.lower()
         self.hop_length = hop_length
@@ -51,28 +97,13 @@ class FeatureObject():
     def get_spectrogram(self, signal):
         """
         Returns a spectrogram, from the signal of a song.
-        Different types of spectrogram can be computed, which are specified by the argument "feature".
+        Different types of spectrogram can be computed, see the docstrig of the object constructor.
         All these spectrograms are computed with the toolbox librosa [1].
         
         Parameters
         ----------
         signal : numpy array
             Signal of the song.
-        sr : float
-            Sampling rate of the signal, (typically 44100Hz).
-        feature : String
-            The types of spectrograms to compute.
-                TODO
-
-        hop_length : integer
-            The desired hop_length, which is the step between two frames (ie the time "discretization" step)
-            It is expressed in terms of number of samples, which are defined by the sampling rate.
-        fmin : integer, optional
-            The minimal frequence to consider, used for denoising.
-            The default is 98.
-        n_mfcc : integer, optional
-            Number of mfcc features.
-            The default is 20 (as in librosa).
 
         Raises
         ------
@@ -83,16 +114,6 @@ class FeatureObject():
         -------
         numpy array
             Spectrogram of the signal.
-            
-        References
-        ----------
-        [1] McFee, B., Raffel, C., Liang, D., Ellis, D. P., McVicar, M., Battenberg, E., & Nieto, O. (2015, July).
-        librosa: Audio and music signal analysis in python. 
-        In Proceedings of the 14th python in science conference (Vol. 8).
-        
-        [2] Grill, T., & Schlüter, J. (2015, October). 
-        Music Boundary Detection Using Neural Networks on Combined Features and Two-Level Annotations. 
-        In ISMIR (pp. 531-537).
         """
         match self.feature:
             case "pcp":
@@ -119,6 +140,9 @@ class FeatureObject():
                 raise err.InvalidArgumentValueException(f"Unknown signal representation: {self.feature}.")
         
     def _compute_pcp(self, signal):
+        """
+        Computes the Pitch Class Profile of a signal.
+        """
         norm=inf # Columns normalization
         win_len_smooth=82 # Size of the smoothign window
         n_octaves=6
@@ -130,10 +154,16 @@ class FeatureObject():
                                     norm=norm, win_len_smooth=win_len_smooth)
 
     def _compute_cqt(self, signal):
+        """
+        Computes the Constant-Q Transform of a signal.
+        """
         constant_q_transf = librosa.cqt(y=signal, sr = self.sr, hop_length = self.hop_length)
         return np.abs(constant_q_transf)
 
     def _compute_mel_spectrogram(self, signal):
+        """
+        Computes the Mel spectrogram of a signal.
+        """
         if self.mel_grill:
             mel = librosa.feature.melspectrogram(y=signal, sr = self.sr, n_fft=2048, hop_length = self.hop_length, n_mels=80, fmin=80.0, fmax=16000, power=mel_power)
         else:
@@ -141,6 +171,9 @@ class FeatureObject():
         return np.abs(mel)
         
     def _compute_stft(self, signal, complex):
+        """
+        Computes the Short-time Fourier Transform of a signal.
+        """
         stft = librosa.stft(y=signal, hop_length=self.hop_length,n_fft=self.n_fft)
         if complex:
             mag, phase = librosa.magphase(stft, power = 1)
@@ -149,6 +182,9 @@ class FeatureObject():
             return np.abs(stft)
         
     def get_stft_from_mel(self, mel_spectrogram, feature=None):
+        """
+        Computes the STFT from a Mel spectrogram.
+        """
         if feature is None: # Recursive function, so it takes the feature as an argument
             feature = self.feature # Default case takes the object feature as the feature to compute
 
diff --git a/nmf_bioacoustic/utils/spectrogram_to_signal.py b/nmf_bioacoustic/utils/spectrogram_to_signal.py
index f0c3e54..39e72b7 100755
--- a/nmf_bioacoustic/utils/spectrogram_to_signal.py
+++ b/nmf_bioacoustic/utils/spectrogram_to_signal.py
@@ -1,3 +1,11 @@
+"""
+Created on April 2024
+
+@author: a23marmo
+
+Code to transform spectrograms to audio signals.
+"""
+
 import nmf_bioacoustic.utils.errors as err
 import nmf_bioacoustic.utils.signal_to_spectrogram as signal_to_spectrogram
 
@@ -9,6 +17,28 @@ import librosa
 
 # %% Audio to signal conversion
 def spectrogram_to_audio_signal(spectrogram, feature_object, phase_retrieval = "griffin_lim", original_phase=None):
+    """
+    Inverts the spectrogram using the istft method, and plots the audio using IPython.diplay.audio.
+
+    Parameters
+    ----------
+    spectrogram : numpy array
+        The spectrogram to be inverted.
+    feature_object : FeatureObject
+        Feature object, defining the important parameters to compute spectrograms.
+    phase_retrieval : str
+        Method to retrieve the phase of the audio. It can be 'original_phase' or 'griffin_lim'.
+        If set to 'original_phase', the original_phase parameter is used as an estimation of the phase.
+        If set to 'griffin_lim', the phase is estimated using the Griffin-Lim algorithm.
+    original_phase : numpy array
+        Phase of the original audio, to be used in the phase retrieval.
+        Only used if phase_retrieval is 'original_phase'.
+
+    Returns
+    -------
+    IPython.display audio
+        The audio signal of the song, reconstructed from NTD.
+    """
     if feature_object.feature in ["stft", "stft_complex"]:
         spectrogram_stft = spectrogram
     elif feature_object.feature in ["mel", "log_mel", "nn_log_mel"]:
@@ -29,5 +59,8 @@ def spectrogram_to_audio_signal(spectrogram, feature_object, phase_retrieval = "
         raise err.InvalidArgumentValueException(f"Phase retrieval method not understood: {phase_retrieval}.")
     
 def complex_stft_to_audio(stft_to_inverse, hop_length):
+    """
+    Inverts the complex spectrogram using the istft method.
+    """
     return librosa.istft(stft_to_inverse, hop_length = hop_length)
 
-- 
GitLab