Compare revisions

MARMORET Axel · MARMORET Axel · MARMORET Axel · MARMORET Axel · MARMORET Axel · MARMORET Axel
--- a/as_seg/__init__.py
+++ b/as_seg/__init__.py
@@ -2,9 +2,9 @@ from . import autosimilarity_computation
 from . import barwise_input
 from . import data_manipulation
 from . import CBM_algorithm
-from . import foote_novelty
+#from . import foote_novelty

 from .model import current_plot
 from .model import errors
-from .model import features
+from .model import signal_to_spectrogram
 from .model import display_results
--- a/as_seg/autosimilarity_computation.py
+++ b/as_seg/autosimilarity_computation.py
-# -*- coding: utf-8 -*-
-"""
-Created on Mon Mar 14 16:30:31 2022
-
-@author: amarmore
-
-Module used to compute autosimilarity matrices.
-"""
-import as_seg.model.errors as err
-
-import numpy as np
-import sklearn.metrics.pairwise as pairwise_distances
-import warnings
-
-def switch_autosimilarity(an_array, similarity_type, gamma = None, normalise = True):
-    """
-    High-level function to compute the autosimilarity of this matrix.
-    
-    Expects a matrix of shape (Bars, Feature representation).
-    
-    Computes it with different possible similarity function s_{x_i,x_j} (given two bars denoted as x_i and x_j):
-        - "cosine" for the cosine similarity, i.e. the normalised dot product:
-        .. math::
-            s_{x_i,x_j} = \\frac{\langle x_i, x_j \rangle}{||x_i|| ||x_j||}
-        -"covariance" for a covariance similarity, 
-        i.e. the dot product of centered features:
-        .. math::
-            s_{x_i,x_j} = \langle x_i - \hat{x}, x_j - \hat{x} \rangle
-        -"rbf" for the Radial Basis Function similarity, 
-        i.e. the exponent of the opposite of the euclidean distance between features:
-        .. math::
-            s_{x_i,x_j} = \\exp^{-\\gamma ||x_i - x_j||_2}
-        The euclidean distance can be the distance between the normalised features.
-        Gamma is a parameter.
-        See rbf_kernel from scikit-learn for more details.
-    
-    Parameters
-    ----------
-    an_array : numpy array
-        The array/matrix seen as array which autosimilarity will be computed.
-        Expected to be of shape (Bars, Feature representation).
-    similarity_type : string
-        Either "cosine", "covariance" or "rbf".
-        It represents the similarity function used for computing the autosimilarity.
-    gamma : positive float, optional
-        The gamma parameter in the rbf function, only used for the "rbf" similarity.
-        The default is None, meaning that it is computed as function of the standard deviation,
-        see get_gamma_std() for more details.
-    normalise : boolean, optional
-        Whether features should be normalised or not. 
-        Normalisation depends on the similarity function.
-        The default is True.
-
-    Returns
-    -------
-    numpy array
-        Autosimilarity matrix of the input an_array.
-
-    """
-    if similarity_type.lower() == "cosine":
-        return get_cosine_autosimilarity(an_array)#, normalise = normalise)
-    elif similarity_type.lower() == "covariance":
-        return get_covariance_autosimilarity(an_array, normalise = normalise)
-    elif similarity_type.lower() == "rbf":
-        return get_rbf_autosimilarity(an_array, gamma, normalise = normalise)
-    elif similarity_type.lower() == "centered_rbf":
-        return get_centered_rbf_autosimilarity(an_array, gamma, normalise = normalise)
-    else:
-        raise err.InvalidArgumentValueException(f"Incorrect similarity type: {similarity_type}. Should be cosine, covariance or rbf.")
-        
-def l2_normalise_barwise(an_array):
-    """
-    Normalises the array barwise (i.e., in its first dimension) by the l_2 norm.
-    
-    Null values are replaced by the small positive value of 10^{-10}.
-
-    Parameters
-    ----------
-    an_array : numpy array
-        The array which needs to be normalised.
-
-    Returns
-    -------
-    numpy array
-        The normalised array.
-
-    """
-    with warnings.catch_warnings():
-        warnings.filterwarnings("ignore", message="invalid value encountered in true_divide") # Avoiding to show the warning, as it's handled, not te confuse the user.
-        an_array_T = an_array.T/np.linalg.norm(an_array, axis = 1)
-        an_array_T = np.where(np.isnan(an_array_T), 1e-10, an_array_T) # Replace null lines, avoiding future errors in handling values.
-    return an_array_T.T
-
-def get_cosine_autosimilarity(an_array):#, normalise = True):
-    """
-    Computes the autosimilarity matrix with the cosine similarity function.
-    
-    The cosine similarity function is the normalised dot product between two bars, i.e.:
-    .. math::
-        s_{x_i,x_j} = \\frac{\langle x_i, x_j \rangle}{||x_i|| ||x_j||}
-    
-    Parameters
-    ----------
-    an_array : numpy array
-        The array/matrix seen as array which autosimilarity os to compute.
-        Expected to be of shape (Bars, Feature representation).
-
-    Returns
-    -------
-    numpy array
-        The autosimilarity of this array, with the cosine similarity function.
-
-    """
-    if type(an_array) is list:
-        this_array = np.array(an_array)
-    else:
-        this_array = an_array
-    #if normalise:
-    this_array = l2_normalise_barwise(this_array)
-    return this_array@this_array.T
-
-def get_covariance_autosimilarity(an_array, normalise = True):
-    """
-    Computes the autosimilarity matrix, where the similarity function is the covariance.
-    
-    The covariance similarity function corresponds to the dot product of centered features:
-    .. math::
-        s_{x_i,x_j} = \langle x_i - \hat{x}, x_j - \hat{x} \rangle
-
-    Parameters
-    ----------
-    an_array : numpy array
-        The array/matrix seen as array which autosimilarity will be computed.
-    normalise : boolean, optional
-        Whether features should be normalised or not. 
-        Normalisation here means that each centered feature is normalised by its norm.
-        The default is True.
-        
-    Returns
-    -------
-    numpy array
-        The covariance autosimilarity of this array.
-
-    """
-    if type(an_array) is list:
-        this_array = np.array(an_array)
-    else:
-        this_array = an_array
-    this_array = this_array - this_array.mean(axis=0) # centering, i.e. subtracting the average value row-wise
-    if normalise:
-        this_array = l2_normalise_barwise(this_array)
-    return this_array@this_array.T
-
-def get_rbf_autosimilarity(an_array, gamma = None, normalise = True):
-    """
-    Computes the autosimilarity matrix, where the similarity function is the Radial Basis Function (RBF).
-    
-    The RBF corresponds to the exponent of the opposite of the euclidean distance between features:
-    .. math::
-        s_{x_i,x_j} = \\exp^{-\\gamma ||x_i - x_j||_2}
-        
-    The RBF is computed via scikit-learn.
-    The default gamma value is computed in function get_gamma_std(), refer to that function for further details.
-
-    Parameters
-    ----------
-    an_array : numpy array
-        The array/matrix seen as array which autosimilarity will be computed.
-    gamma : positive float, optional
-        The gamma parameter in the rbf function.
-        The default is None, meaning that it is computed as function of the standard deviation,
-        see get_gamma_std() for more details.
-    normalise : boolean, optional
-        Whether features should be normalised or not. 
-        Normalisation here means that the euclidean norm is computed between normalised vectors.
-        The default is True.
-
-    Returns
-    -------
-    numpy array
-        The RBF autosimilarity of this array.
-
-    """
-    if type(an_array) is list:
-        this_array = np.array(an_array)
-    else:
-        this_array = an_array
-    if gamma == None:
-        gamma = get_gamma_std(this_array, scaling_factor = 1, no_diag = True, normalise = normalise)
-    if normalise:
-        this_array = l2_normalise_barwise(this_array)
-    return pairwise_distances.rbf_kernel(this_array, gamma = gamma)
-    
-def get_centered_rbf_autosimilarity(an_array, gamma = None, normalise = True):
-    """
-    TODO
-    """
-    if type(an_array) is list:
-        this_array = np.array(an_array)
-    else:
-        this_array = an_array
-    this_array = this_array - this_array.mean(axis=0) # centering, i.e. subtracting the average value row-wise
-    if gamma == None:
-        gamma = get_gamma_std(this_array, scaling_factor = 1, no_diag = True, normalise = normalise)
-    if normalise:
-        this_array = l2_normalise_barwise(this_array)
-    return pairwise_distances.rbf_kernel(this_array, gamma = gamma)
-
-
-def get_gamma_std(an_array, scaling_factor = 1, no_diag = True, normalise = True):
-    """
-    Default value for the gamma in the RBF similarity function.
-    
-    This default value is proportional to the inverse of the standard deviation of the values, more experiments should be made to fit it.
-    For now, it has been set quite empirically.
-
-    Parameters
-    ----------
-    an_array : numpy array
-        The array/matrix seen as array which autosimilarity will be computed.
-    scaling_factor : positive float, optional
-        Weigthing parameter, relating to the inverse of the standard deviation. 
-        The default is 1.
-    no_diag : boolen, optional
-        Whether the diagonal values (self similarity values) should be discarded (True) or taken into account (False). 
-        The default is True.
-    normalise : boolean, optional
-        Whether features should be normalised or not. 
-        Normalisation here means that the euclidean norm is computed between normalised vectors.
-        The default is True.
-
-    Returns
-    -------
-    gamma : float
-        The gamma parameter in the RBF similarity function.
-
-    """
-    if normalise:
-        an_array = l2_normalise_barwise(an_array)
-    euc_dist = pairwise_distances.euclidean_distances(an_array)
-    if not no_diag:
-        return scaling_factor/(2*np.std(euc_dist))
-    else:
-        for i in range(len(euc_dist)):
-            euc_dist[i,i] = float('NaN')
+# -*- coding: utf-8 -*-
+"""
+Created on Mon Mar 14 16:30:31 2022
+
+@author: amarmore
+
+Module used to compute autosimilarity matrices.
+"""
+import as_seg.model.errors as err
+
+import numpy as np
+import sklearn.metrics.pairwise as pairwise_distances
+import warnings
+eps = 1e-10
+
+def switch_autosimilarity(an_array, similarity_type, gamma = None, normalise = True):
+    """
+    High-level function to compute the autosimilarity of this matrix.
+    
+    Expects a matrix of shape (Bars, Feature representation).
+    
+    Computes it with different possible similarity function s_{x_i,x_j} (given two bars denoted as x_i and x_j):
+        - "cosine" for the cosine similarity, i.e. the normalised dot product:
+        .. math::
+            s_{x_i,x_j} = \\frac{\langle x_i, x_j \rangle}{||x_i|| ||x_j||}
+        -"covariance" for a covariance similarity, 
+        i.e. the dot product of centered features:
+        .. math::
+            s_{x_i,x_j} = \langle x_i - \hat{x}, x_j - \hat{x} \rangle
+        -"rbf" for the Radial Basis Function similarity, 
+        i.e. the exponent of the opposite of the euclidean distance between features:
+        .. math::
+            s_{x_i,x_j} = \\exp^{-\\gamma ||x_i - x_j||_2}
+        The euclidean distance can be the distance between the normalised features.
+        Gamma is a parameter.
+        See rbf_kernel from scikit-learn for more details.
+    
+    Parameters
+    ----------
+    an_array : numpy array
+        The array/matrix seen as array which autosimilarity will be computed.
+        Expected to be of shape (Bars, Feature representation).
+    similarity_type : string
+        Either "cosine", "covariance" or "rbf".
+        It represents the similarity function used for computing the autosimilarity.
+    gamma : positive float, optional
+        The gamma parameter in the rbf function, only used for the "rbf" similarity.
+        The default is None, meaning that it is computed as function of the standard deviation,
+        see get_gamma_std() for more details.
+    normalise : boolean, optional
+        Whether features should be normalised or not. 
+        Normalisation depends on the similarity function.
+        The default is True.
+
+    Returns
+    -------
+    numpy array
+        Autosimilarity matrix of the input an_array.
+
+    """
+    if similarity_type.lower() == "cosine":
+        return get_cosine_autosimilarity(an_array)#, normalise = normalise)
+    elif similarity_type.lower() == "autocorrelation" or similarity_type.lower() == "covariance":
+        return get_autocorrelation_autosimilarity(an_array, normalise = normalise)
+    elif similarity_type.lower() == "rbf":
+        return get_rbf_autosimilarity(an_array, gamma, normalise = normalise)
+    elif similarity_type.lower() == "centered_rbf":
+        return get_centered_rbf_autosimilarity(an_array, gamma, normalise = normalise)
+    else:
+        raise err.InvalidArgumentValueException(f"Incorrect similarity type: {similarity_type}. Should be cosine, covariance or rbf.")
+        
+def l2_normalise_barwise(an_array):
+    """
+    Normalises the array barwise (i.e., in its first dimension) by the l_2 norm.
+    
+    Null values are replaced by the small positive value of 10^{-10}.
+
+    Parameters
+    ----------
+    an_array : numpy array
+        The array which needs to be normalised.
+
+    Returns
+    -------
+    numpy array
+        The normalised array.
+
+    """
+    norm = np.linalg.norm(an_array, axis = 1)
+    an_array_T = np.transpose(an_array)
+    out = np.inf * np.ones_like(an_array_T)
+    np.divide(an_array_T, norm, out = out, where=norm!=0)
+    an_array_T = np.where(np.isinf(out), eps, out)
+    return np.transpose(an_array_T)
+
+def get_cosine_autosimilarity(an_array):#, normalise = True):
+    """
+    Computes the autosimilarity matrix with the cosine similarity function.
+    
+    The cosine similarity function is the normalised dot product between two bars, i.e.:
+    .. math::
+        s_{x_i,x_j} = \\frac{\langle x_i, x_j \rangle}{||x_i|| ||x_j||}
+    
+    Parameters
+    ----------
+    an_array : numpy array
+        The array/matrix seen as array which autosimilarity os to compute.
+        Expected to be of shape (Bars, Feature representation).
+
+    Returns
+    -------
+    numpy array
+        The autosimilarity of this array, with the cosine similarity function.
+
+    """
+    if type(an_array) is list:
+        this_array = np.array(an_array)
+    else:
+        this_array = an_array
+    #if normalise:
+    this_array = l2_normalise_barwise(this_array)
+    return this_array@this_array.T
+
+def get_covariance_autosimilarity(an_array, normalise = True):
+    """
+    Note: deprecated. The name of the matrix became "Autocorrelation" in the TISMIR version of the paper.
+    """
+    return get_autocorrelation_autosimilarity(an_array, normalise = normalise)
+
+def get_autocorrelation_autosimilarity(an_array, normalise = True):
+    """
+    Computes the autosimilarity matrix, where the similarity function is the autocorrelation.
+    
+    The autocorrelation similarity function corresponds to the dot product of centered features:
+    .. math::
+        s_{x_i,x_j} = \langle x_i - \hat{x}, x_j - \hat{x} \rangle
+
+    Parameters
+    ----------
+    an_array : numpy array
+        The array/matrix seen as array which autosimilarity will be computed.
+    normalise : boolean, optional
+        Whether features should be normalised or not. 
+        Normalisation here means that each centered feature is normalised by its norm.
+        The default is True.
+        
+    Returns
+    -------
+    numpy array
+        The autocorrelation autosimilarity of this array.
+
+    """
+    if type(an_array) is list:
+        this_array = np.array(an_array)
+    else:
+        this_array = an_array
+    this_array = this_array - this_array.mean(axis=0) # centering, i.e. subtracting the average value row-wise
+    if normalise:
+        this_array = l2_normalise_barwise(this_array)
+    return this_array@this_array.T
+
+def get_rbf_autosimilarity(an_array, gamma = None, normalise = True):
+    """
+    Computes the autosimilarity matrix, where the similarity function is the Radial Basis Function (RBF).
+    
+    The RBF corresponds to the exponent of the opposite of the euclidean distance between features:
+    .. math::
+        s_{x_i,x_j} = \\exp^{-\\gamma ||x_i - x_j||_2}
+        
+    The RBF is computed via scikit-learn.
+    The default gamma value is computed in function get_gamma_std(), refer to that function for further details.
+
+    Parameters
+    ----------
+    an_array : numpy array
+        The array/matrix seen as array which autosimilarity will be computed.
+    gamma : positive float, optional
+        The gamma parameter in the rbf function.
+        The default is None, meaning that it is computed as function of the standard deviation,
+        see get_gamma_std() for more details.
+    normalise : boolean, optional
+        Whether features should be normalised or not. 
+        Normalisation here means that the euclidean norm is computed between normalised vectors.
+        The default is True.
+
+    Returns
+    -------
+    numpy array
+        The RBF autosimilarity of this array.
+
+    """
+    if type(an_array) is list:
+        this_array = np.array(an_array)
+    else:
+        this_array = an_array
+    if gamma == None:
+        gamma = get_gamma_std(this_array, scaling_factor = 1, no_diag = True, normalise = normalise)
+    if normalise:
+        this_array = l2_normalise_barwise(this_array)
+    return pairwise_distances.rbf_kernel(this_array, gamma = gamma)
+    
+def get_centered_rbf_autosimilarity(an_array, gamma = None, normalise = True):
+    """
+    TODO
+    """
+    if type(an_array) is list:
+        this_array = np.array(an_array)
+    else:
+        this_array = an_array
+    this_array = this_array - this_array.mean(axis=0) # centering, i.e. subtracting the average value row-wise
+    if gamma == None:
+        gamma = get_gamma_std(this_array, scaling_factor = 1, no_diag = True, normalise = normalise)
+    if normalise:
+        this_array = l2_normalise_barwise(this_array)
+    return pairwise_distances.rbf_kernel(this_array, gamma = gamma)
+
+
+def get_gamma_std(an_array, scaling_factor = 1, no_diag = True, normalise = True):
+    """
+    Default value for the gamma in the RBF similarity function.
+    
+    This default value is proportional to the inverse of the standard deviation of the values, more experiments should be made to fit it.
+    For now, it has been set quite empirically.
+
+    Parameters
+    ----------
+    an_array : numpy array
+        The array/matrix seen as array which autosimilarity will be computed.
+    scaling_factor : positive float, optional
+        Weigthing parameter, relating to the inverse of the standard deviation. 
+        The default is 1.
+    no_diag : boolen, optional
+        Whether the diagonal values (self similarity values) should be discarded (True) or taken into account (False). 
+        The default is True.
+    normalise : boolean, optional
+        Whether features should be normalised or not. 
+        Normalisation here means that the euclidean norm is computed between normalised vectors.
+        The default is True.
+
+    Returns
+    -------
+    gamma : float
+        The gamma parameter in the RBF similarity function.
+
+    """
+    if normalise:
+        an_array = l2_normalise_barwise(an_array)
+    euc_dist = pairwise_distances.euclidean_distances(an_array)
+    if not no_diag:
+        return scaling_factor/(2*np.std(euc_dist))
+    else:
+        for i in range(len(euc_dist)):
+            euc_dist[i,i] = float('NaN')
        return scaling_factor/(2*np.nanstd(euc_dist))
\ No newline at end of file
--- a/as_seg/barwise_input.py
+++ b/as_seg/barwise_input.py
-# -*- coding: utf-8 -*-
-"""
-Created on Wed Apr 14 18:34:29 2021
-
-@author: amarmore
-
-Module used to handle compute the Barwise TF matrix, presented in [1]
-(Barwise TF matrix: a 2D representation of barwise features, 
-each feature representing Time-Frequency content, where time is expressed at barscale)
-
-See [1 - Chapter 2.4] or [2] for more information.
-
-References
----------
-[1] Unsupervised Machine Learning Paradigms for the Representation of Music Similarity and Structure, 
-PhD Thesis Marmoret Axel 
-(not uploaded yet but will be soon!)
-(You should check the website hal.archives-ouvertes.fr/ in case this docstring is not updated with the reference.)
-
-[2] Marmoret, A., Cohen, J.E, and Bimbot, F., "Barwise Compression Schemes 
-for Audio-Based Music Structure Analysis"", in: 19th Sound and Music Computing Conference, 
-SMC 2022, Sound and music Computing network, 2022.
-"""
-
-import as_seg.data_manipulation as dm
-import as_seg.model.errors as err
-
-import numpy as np
-import tensorly as tl
-import librosa
-
-# %% Tensors barwise spectrograms construction
-# !!! Be extremely careful with the organization of modes, which can be either Frequency-Time at barscale-Bars (FTB) or Bars-Frequency-Time at barscale (BFT) depending on the method.
-def tensorize_barwise_BFT(spectrogram, bars, hop_length_seconds, subdivision):
-    """
-    Returns a 3rd order tensor-spectrogram from the original spectrogram and bars starts and ends.
-    The order of modes is Bars-Frequency-Time at barscale (BFT).
-    Must be used for SSAE and the computtion of Barwise TF matrix.
-    
-    Each bar in the tensor-spectrogram contains the same number of frames, define by the "subdivision" parameter.
-    These frames are selected from an oversampled spectrogram, adapting to the specific size of each bar.
-    See [1] for details.
-
-    Parameters
-    ----------
-    spectrogram : list of list of floats or numpy array
-        The spectrogram to return as a tensor-spectrogram.
-    bars : list of tuples
-        List of the bars (start, end), in seconds, to cut the spectrogram at bar delimitation.
-    hop_length_seconds : float
-        The hop_length, in seconds.
-    subdivision : integer
-        The number of subdivision of the bar to be contained in each slice of the tensor.
-
-    Returns
-    -------
-    np.array tensor
-        The tensor-spectrogram as a np.array.
-
-    """
-    barwise_spec = []
-    bars_idx = dm.segments_from_time_to_frame_idx(bars[1:], hop_length_seconds)
-    for idx, beats in enumerate(bars_idx):
-        t_0 = beats[0]
-        t_1 = beats[1]
-        samples = [int(round(t_0 + k * (t_1 - t_0)/subdivision)) for k in range(subdivision)]
-        if len(samples) != len(set(samples)): # Check for repetitions
-            raise err.ToDebugException("The subdivision is too large, it leads to repeated samples chosen in the bar!")
-        if samples[-1] < spectrogram.shape[1]:
-            barwise_spec.append(spectrogram[:,samples])
-    return np.array(barwise_spec)
-
-def tensorize_barwise_FTB(spectrogram, bars, hop_length_seconds, subdivision):
-    #(careful: different mode organization than previous one: here, this is Frequency-Time-Bars)
-    """
-    Returns a 3rd order tensor-spectrogram from the original spectrogram and bars starts and ends.
-    The order of modes is Frequency-Time at barscale-Bars (FTB).
-    Must be used for NTD.
-    
-    Each bar in the tensor-spectrogram contains the same number of frames, define by the "subdivision" parameter.
-    These frames are selected from an oversampled spectrogram, adapting to the specific size of each bar.
-    See [1, Chap 2.4.2] for details.
-
-    Parameters
-    ----------
-    spectrogram : list of list of floats or numpy array
-        The spectrogram to return as a tensor-spectrogram.
-    bars : list of tuples
-        List of the bars (start, end), in seconds, to cut the spectrogram at bar delimitation.
-    hop_length_seconds : float
-        The hop_length, in seconds.
-    subdivision : integer
-        The number of subdivision of the bar to be contained in each slice of the tensor.
-
-    Returns
-    -------
-    tensorly tensor
-        The tensor-spectrogram as a tensorly tensor.
-
-    """
-    freq_len = spectrogram.shape[0]
-    bars_idx = dm.segments_from_time_to_frame_idx(bars[1:], hop_length_seconds)
-    samples_init = [int(round(bars_idx[0][0] + k * (bars_idx[0][1] - bars_idx[0][0])/subdivision)) for k in range(subdivision)]
-        
-    tens = np.array(spectrogram[:,samples_init]).reshape(freq_len, subdivision, 1)
-    
-    for bar in bars_idx[1:]:
-        t_0 = bar[0]
-        t_1 = bar[1]
-        samples = [int(round(t_0 + k * (t_1 - t_0)/subdivision)) for k in range(subdivision)]
-        if samples[-1] < spectrogram.shape[1]:
-            current_bar_tensor_spectrogram = spectrogram[:,samples].reshape(freq_len, subdivision,1)
-            tens = np.append(tens, current_bar_tensor_spectrogram, axis = 2)
-        else:
-            break
-    
-    return tl.tensor(tens)#, dtype=tl.float32)
-
-# %% Matrix barwise spectrograms handling
-def barwise_TF_matrix(spectrogram, bars, hop_length_seconds, subdivision):
-    """
-    Barwise TF matrix, a 2D representation of Barwise spectrograms as Time-Frequency vectors.
-    See [1] for details.
-
-    Parameters
-    ----------
-    spectrogram : list of list of floats or numpy array
-        The spectrogram to return as a tensor-spectrogram.
-    bars : list of tuples
-        List of the bars (start, end), in seconds, to cut the spectrogram at bar delimitation.
-    hop_length_seconds : float
-        The hop_length, in seconds.
-    subdivision : integer
-        The number of subdivision of the bar to be contained in each slice of the tensor.
-
-    Returns
-    -------
-    np.array
-        The Barwise TF matrix, of sizes (b, tf).
-
-    """
-    tensor_spectrogram = tensorize_barwise_BFT(spectrogram, bars, hop_length_seconds, subdivision)
-    return tl.unfold(tensor_spectrogram, 0)
-
-# %% Vector barwise spectrogram handling
-def TF_vector_to_TF_matrix(vector, frequency_dimension, subdivision):
-    """
-    Encapsulating the conversion from a Time-Frequency vector to a Time-Frequency matrix (spectrogram)
-
-    Parameters
-    ----------
-    vector : np.array
-        A Time-Frequency vector (typically a row in the Barwise TF matrix).
-    frequency_dimension : positive integer
-        The size of the frequency dimension 
-        (number of components in this dimension).
-    subdivision : positive integer
-        The size of the time dimension at the bar scale 
-        (number of time components in each bar, defined as parameter when creating the Barwise TF matrix).
-
-    Returns
-    -------
-    np.array
-        A Time-Frequency matrix (spectrogram) of size (frequency_dimension, subdivision).
-
-    """
-    assert frequency_dimension*subdivision == vector.shape[0]
-    return tl.fold(vector, 0, (frequency_dimension,subdivision))
-
-
-def beat_synchronize_msaf(spectrogram, frame_times, beat_frames, beat_times):
-    # Make beat synchronous
-    beatsync_feats = librosa.util.utils.sync(spectrogram.T, beat_frames, pad=True).T
-
-    # Assign times (and add last time if padded)
-    beatsync_times = np.copy(beat_times)
-    if beatsync_times.shape[0] != beatsync_feats.shape[0]:
-        beatsync_times = np.concatenate((beatsync_times,
-                                         [frame_times[-1]]))
-    return beatsync_feats, beatsync_times
+# -*- coding: utf-8 -*-
+"""
+Created on Wed Apr 14 18:34:29 2021
+
+@author: amarmore
+
+Module used to handle compute the Barwise TF matrix, presented in [1]
+(Barwise TF matrix: a 2D representation of barwise features, 
+each feature representing Time-Frequency content, where time is expressed at barscale)
+
+See [1 - Chapter 2.4] or [2] for more information.
+
+References
+----------
+[1] Unsupervised Machine Learning Paradigms for the Representation of Music Similarity and Structure, 
+PhD Thesis Marmoret Axel 
+(not uploaded yet but will be soon!)
+(You should check the website hal.archives-ouvertes.fr/ in case this docstring is not updated with the reference.)
+
+[2] Marmoret, A., Cohen, J.E, and Bimbot, F., "Barwise Compression Schemes 
+for Audio-Based Music Structure Analysis"", in: 19th Sound and Music Computing Conference, 
+SMC 2022, Sound and music Computing network, 2022.
+"""
+
+import as_seg.data_manipulation as dm
+import as_seg.model.errors as err
+
+import numpy as np
+import tensorly as tl
+import librosa
+
+# %% Spectrograms to tensors
+# !!! Be extremely careful with the organization of modes, which can be either Frequency-Time at barscale-Bars (FTB) or Bars-Frequency-Time at barscale (BFT) depending on the method.
+def tensorize_barwise_BFT(spectrogram, bars, hop_length_seconds, subdivision, subset_nb_bars = None):
+    """
+    Returns a 3rd order tensor-spectrogram from the original spectrogram and bars starts and ends.
+    The order of modes is Bars-Frequency-Time at barscale (BFT).
+    Must be used for SSAE and the computtion of Barwise TF matrix.
+    
+    Each bar in the tensor-spectrogram contains the same number of frames, define by the "subdivision" parameter.
+    These frames are selected from an oversampled spectrogram, adapting to the specific size of each bar.
+    See [1] for details.
+
+    Parameters
+    ----------
+    spectrogram : list of list of floats or numpy array
+        The spectrogram to return as a tensor-spectrogram.
+    bars : list of tuples
+        List of the bars (start, end), in seconds, to cut the spectrogram at bar delimitation.
+    hop_length_seconds : float
+        The hop_length, in seconds.
+    subdivision : integer
+        The number of subdivision of the bar to be contained in each slice of the tensor.
+
+    Returns
+    -------
+    np.array tensor
+        The tensor-spectrogram as a np.array.
+
+    """
+    barwise_spec = []
+    bars_idx = dm.segments_from_time_to_frame_idx(bars[1:], hop_length_seconds)
+    if subset_nb_bars is not None:
+        bars_idx = bars_idx[:subset_nb_bars]
+    for idx, beats in enumerate(bars_idx):
+        t_0 = beats[0]
+        t_1 = beats[1]
+        samples = [int(round(t_0 + k * (t_1 - t_0)/subdivision)) for k in range(subdivision)]
+        if len(samples) != len(set(samples)): # Check for repetitions
+            if idx != len(bars_idx) - 1: # It's not a problem if it's the last bar, because it is due to inconsistencies between the last downbeat estimated and the end of the song.
+                raise err.ToDebugException("The subdivision is too large, it leads to repeated samples chosen in the bar!")
+        if samples[-1] < spectrogram.shape[1]:
+            barwise_spec.append(spectrogram[:,samples])
+    return np.array(barwise_spec)
+
+def tensorize_barwise_FTB(spectrogram, bars, hop_length_seconds, subdivision, subset_nb_bars = None):
+    #(careful: different mode organization than previous one: here, this is Frequency-Time-Bars)
+    """
+    Returns a 3rd order tensor-spectrogram from the original spectrogram and bars starts and ends.
+    The order of modes is Frequency-Time at barscale-Bars (FTB).
+    Must be used for NTD.
+    
+    Each bar in the tensor-spectrogram contains the same number of frames, define by the "subdivision" parameter.
+    These frames are selected from an oversampled spectrogram, adapting to the specific size of each bar.
+    See [1, Chap 2.4.2] for details.
+
+    Parameters
+    ----------
+    spectrogram : list of list of floats or numpy array
+        The spectrogram to return as a tensor-spectrogram.
+    bars : list of tuples
+        List of the bars (start, end), in seconds, to cut the spectrogram at bar delimitation.
+    hop_length_seconds : float
+        The hop_length, in seconds.
+    subdivision : integer
+        The number of subdivision of the bar to be contained in each slice of the tensor.
+
+    Returns
+    -------
+    tensorly tensor
+        The tensor-spectrogram as a tensorly tensor.
+
+    """
+    freq_len = spectrogram.shape[0]
+    bars_idx = dm.segments_from_time_to_frame_idx(bars[1:], hop_length_seconds)
+    if subset_nb_bars is not None:
+        bars_idx = bars_idx[:subset_nb_bars]
+    samples_init = [int(round(bars_idx[0][0] + k * (bars_idx[0][1] - bars_idx[0][0])/subdivision)) for k in range(subdivision)]
+
+    tens = np.array(spectrogram[:,samples_init]).reshape(freq_len, subdivision, 1)
+    
+    for bar in bars_idx[1:]:
+        t_0 = bar[0]
+        t_1 = bar[1]
+        samples = [int(round(t_0 + k * (t_1 - t_0)/subdivision)) for k in range(subdivision)]
+        if samples[-1] < spectrogram.shape[1]:
+            current_bar_tensor_spectrogram = spectrogram[:,samples].reshape(freq_len, subdivision,1)
+            tens = np.append(tens, current_bar_tensor_spectrogram, axis = 2)
+        else:
+            break
+    
+    return tl.tensor(tens)#, dtype=tl.float32)
+
+# %% Tensors to spectrograms
+def tensor_barwise_to_spectrogram(tensor, mode_order = "BFT", subset_nb_bars = None):
+    if subset_nb_bars is not None:
+        tensor = barwise_subset_this_tensor(tensor, subset_nb_bars, mode_order = mode_order)
+    
+    if mode_order == "BFT":
+        return tl.unfold(tensor, 1)
+    
+    elif mode_order == "FTB":
+        return np.reshape(tensor, (tensor.shape[0], tensor.shape[1] * tensor.shape[2]), order = 'F') # Note: it is NOT the same than unfold(0)
+    
+    else:
+        raise err.InvalidArgumentValueException(f"Unknown mode order: {mode_order}.")
+
+def barwise_subset_this_tensor(tensor, subset_nb_bars, mode_order = "BFT"):
+    if mode_order == "BFT":
+        return tensor[:subset_nb_bars]
+   
+    elif mode_order == "FTB":
+        return tensor[:,:,:subset_nb_bars]
+
+    else:
+        raise err.InvalidArgumentValueException(f"Unknown mode order: {mode_order}.")
+    
+def get_this_bar_tensor(tensor, bar_idx, mode_order = "BFT"):
+    if mode_order == "BFT":
+        return tensor[bar_idx]
+   
+    elif mode_order == "FTB":
+        return tensor[:,:,bar_idx]
+
+    else:
+        raise err.InvalidArgumentValueException(f"Unknown mode order: {mode_order}.")
+
+# %% Spectrogram to Barwise TF matrix
+def barwise_TF_matrix(spectrogram, bars, hop_length_seconds, subdivision, subset_nb_bars = None):
+    """
+    Barwise TF matrix, a 2D representation of Barwise spectrograms as Time-Frequency vectors.
+    See [1] for details.
+
+    Parameters
+    ----------
+    spectrogram : list of list of floats or numpy array
+        The spectrogram to return as a tensor-spectrogram.
+    bars : list of tuples
+        List of the bars (start, end), in seconds, to cut the spectrogram at bar delimitation.
+    hop_length_seconds : float
+        The hop_length, in seconds.
+    subdivision : integer
+        The number of subdivision of the bar to be contained in each slice of the tensor.
+
+    Returns
+    -------
+    np.array
+        The Barwise TF matrix, of sizes (b, tf).
+
+    """
+    tensor_spectrogram = tensorize_barwise_BFT(spectrogram, bars, hop_length_seconds, subdivision, subset_nb_bars=subset_nb_bars)
+    return tl.unfold(tensor_spectrogram, 0)
+
+def barwise_subset_this_TF_matrix(matrix, subset_nb_bars):
+    assert subset_nb_bars is not None
+    return matrix[:subset_nb_bars]
+
+# %% Vector and Barwise TF to spectrogram
+def TF_vector_to_spectrogram(vector, frequency_dimension, subdivision):
+    """
+    Encapsulating the conversion from a Time-Frequency vector to a Time-Frequency matrix (spectrogram)
+
+    Parameters
+    ----------
+    vector : np.array
+        A Time-Frequency vector (typically a row in the Barwise TF matrix).
+    frequency_dimension : positive integer
+        The size of the frequency dimension 
+        (number of components in this dimension).
+    subdivision : positive integer
+        The size of the time dimension at the bar scale 
+        (number of time components in each bar, defined as parameter when creating the Barwise TF matrix).
+
+    Returns
+    -------
+    np.array
+        A Time-Frequency matrix (spectrogram) of size (frequency_dimension, subdivision).
+
+    """
+    assert frequency_dimension*subdivision == vector.shape[0]
+    return tl.fold(vector, 0, (frequency_dimension,subdivision))
+
+def TF_matrix_to_spectrogram(matrix, frequency_dimension, subdivision, subset_nb_bars = None):
+    spectrogram_content = None
+    if subset_nb_bars is not None:
+        matrix = barwise_subset_this_TF_matrix(matrix, subset_nb_bars)
+    for tf_vector in matrix:
+        bar_content = TF_vector_to_spectrogram(tf_vector, frequency_dimension, subdivision)
+        spectrogram_content = np.concatenate((spectrogram_content, bar_content), axis=1) if spectrogram_content is not None else bar_content
+    return spectrogram_content
+
+
+# Tensor to Barwise TF
+def tensor_barwise_to_barwise_TF(tensor, mode_order = "BFT"):
+    # Barmode: 0 for BTF, 2 for FTB
+    if mode_order == "BFT": # Checked
+        return tl.unfold(tensor, 0)
+    elif mode_order == "FTB": # Checked
+        return tl.unfold(tensor, 2)
+    else:
+        raise err.InvalidArgumentValueException(f"Unknown mode order: {mode_order}.")
+
+# %% Barwise TF to tensor
+# TODO
+
+# Beatwise MSAF
+def beat_synchronize_msaf(spectrogram, frame_times, beat_frames, beat_times):
+    # Make beat synchronous
+    beatsync_feats = librosa.util.utils.sync(spectrogram.T, beat_frames, pad=True).T
+
+    # Assign times (and add last time if padded)
+    beatsync_times = np.copy(beat_times)
+    if beatsync_times.shape[0] != beatsync_feats.shape[0]:
+        beatsync_times = np.concatenate((beatsync_times,
+                                         [frame_times[-1]]))
+    return beatsync_feats, beatsync_times
--- a/as_seg/data_manipulation.py
+++ b/as_seg/data_manipulation.py
@@ -49,22 +49,14 @@ def get_bars_from_audio(song_path):
    act = dbt.RNNDownBeatProcessor()(song_path)
    proc = dbt.DBNDownBeatTrackingProcessor(beats_per_bar=[3,4], fps=100)
    song_beats = proc(act)
-    downbeats_times = []
+    downbeats_times = [song_beats[0][0]]
    
-    if song_beats[0][1] != 1: # Adding a first downbeat at the start of the song
-        downbeats_times.append(0.1)
-    for beat in song_beats:
+    for beat in song_beats[1:]: # The first beat is already added
        if beat[1] == 1: # If the beat is a downbeat
            downbeats_times.append(beat[0])
            
-    # The following block of code artificially adds bars to the end of the song, in order to span the total song length.
-    # It seems like a good idea initially but may be detrimental, and should be debated anyway.
-    average_bar_length = np.mean([downbeats_times[i + 1] - downbeats_times[i] for i in range(len(downbeats_times) - 1)]) # average bar length in the song
    song_length = act.shape[0]/100 # Total length of the song
-    while downbeats_times[-1] + 1.1*average_bar_length < song_length: # As long as the bar estimation does not cover the entire song
-        downbeats_times.append(round(downbeats_times[-1] + average_bar_length, 2)) # artifically adds bars of the length of the average bar length in the song
    downbeats_times.append(song_length) # adding the last downbeat
-    
    return frontiers_to_segments(downbeats_times)
    
 def get_beats_from_audio_msaf(signal, sr, hop_length):
@@ -91,8 +83,8 @@ def get_beats_from_audio_madmom(song_path):
    act = bt.TCNBeatProcessor()(song_path)
    proc = bt.BeatTrackingProcessor(fps=100)
    song_beats = proc(act)
-    beats_times = []
    
+    # beats_times = []    
    # if song_beats[0][1] != 1: # Adding a first downbeat at the start of the song
        # beats_times.append(0.1)
    # for beat in song_beats:
@@ -435,7 +427,39 @@ def align_frontiers_on_bars(frontiers, bars):
            else:
                frontiers_on_bars.append(bars[i][0])
    return frontiers_on_bars
-            
+
+def get_median_hop(bars, subdivision = 96, sampling_rate = 44100):
+    """
+    Returns the median hop length in the song, used for audio reconstruction.
+    The rationale is that all bars are sampled with 'subdivision' number of frames, 
+    but they can be of different lengths in absolute time.
+    Hence, the time gap between two consecutive frames (the hop length) can differ between bars.
+    For reconstruction, we use the median hop length among all bars.
+
+    Parameters
+    ----------
+    bars : list of tuples of float
+        The bars, as (start time, end time) tuples.
+    subdivision : integer, optional
+        The number of subdivision of the bar to be contained in each slice of the tensor.
+        The default is 96.
+    sampling_rate : integer, optional
+        The sampling rate of the signal, in Hz.
+        The default is 44100.
+
+    Returns
+    -------
+    integer
+        The median hop length in these bars.
+
+    """
+    hops = []
+    for bar_idx in range(1, len(bars)):
+        len_sig = bars[bar_idx][1] - bars[bar_idx][0]
+        hop = int(len_sig/subdivision * sampling_rate)
+        hops.append(hop)
+    return int(np.median(hops)) # Generally used for audio reconstruction
+
 # %% Sonification of the segmentation
 def sonify_frontiers_path(audio_file_path, frontiers_in_seconds, output_path):
    """
@@ -559,14 +583,14 @@ def compute_score_of_segmentation(reference, segments_in_time, window_length = 0
    ref_intervals, useless = mir_eval.util.adjust_intervals(reference,t_min=0)
    est_intervals, useless = mir_eval.util.adjust_intervals(np.array(segments_in_time), t_min=0, t_max=ref_intervals[-1, 1])
    try:
-        return mir_eval.segment.detection(ref_intervals, est_intervals, window = window_length, trim = False)
+        return mir_eval.segment.detection(ref_intervals, est_intervals, window = window_length, trim = True)
    except ValueError:
        cleaned_intervals = []
        #print("A segment is (probably) composed of the same start and end. Can happen with time -> bar -> time conversion, but should'nt happen for data originally segmented in bars.")
        for idx in range(len(est_intervals)):
            if est_intervals[idx][0] != est_intervals[idx][1]:
                cleaned_intervals.append(est_intervals[idx])
-        return mir_eval.segment.detection(ref_intervals, np.array(cleaned_intervals), window = window_length, trim = False)
+        return mir_eval.segment.detection(ref_intervals, np.array(cleaned_intervals), window = window_length, trim = True)

 def compute_median_deviation_of_segmentation(reference, segments_in_time):
    """

--- a/as_seg/example.py
+++ b/as_seg/example.py
@@ -18,7 +18,7 @@ import librosa
 import mirdata

 # Module encapsulating the computation of features from the signal
-import as_seg.model.features as features
+import as_seg.model.signal_to_spectrogram as signal_to_spectrogram

 # General module for manipulating data: conversion between time, bars, frame indexes, loading of data, ...
 import as_seg.data_manipulation as dm
@@ -36,7 +36,7 @@ import as_seg.CBM_algorithm as cbm
 from as_seg.model.current_plot import *

 # %% Loading annotations and defining the audio path
-path_to_beatles_dataset = 'C:/Users/amarmore/this_folder/Beatles dataset/' # To change
+path_to_beatles_dataset = '/home/a23marmo/datasets/beatles' # To change
 beatles = mirdata.initialize('beatles', path_to_beatles_dataset)
 beatles.download()

@@ -56,7 +56,7 @@ hop_length = 32 # Oversampling the spectrogram, to select frames which will be e
 hop_length_seconds = hop_length/sampling_rate # As bars are in seconds, we convert this hop length in seconds.
 subdivision_bars = 96 # The number of time samples to consider in each bar.

-log_mel = features.get_spectrogram(the_signal, sampling_rate, "log_mel_grill", hop_length = hop_length) # Log_mel spectrogram
+log_mel = signal_to_spectrogram.get_spectrogram(the_signal, sampling_rate, "log_mel", hop_length = hop_length) # Log_mel spectrogram

 # %% Cosine autosimilarity
 barwise_TF_cosine = bi.barwise_TF_matrix(log_mel, bars, hop_length_seconds, subdivision_bars)

--- a/as_seg/model/dataloaders.py
+++ b/as_seg/model/dataloaders.py
+import mirdata
+import librosa
+import as_seg.model.signal_to_spectrogram as signal_to_spectrogram
+import pathlib
+import shutil
+import numpy as np
+import warnings
+
+import as_seg
+
+eps = 1e-10
+
+class BaseDataloader():
+    def __init__(self, feature, cache_path = None, sr=44100, hop_length = 32, subdivision = 96, verbose = False):
+        self.cache_path = cache_path
+        self.verbose = verbose
+
+        self.sr = sr
+        self.feature = feature
+        self.hop_length = hop_length
+
+        # For barwise or beatwise processing
+        self.subdivision = subdivision
+
+        self.frequency_dimension = signal_to_spectrogram.get_default_frequency_dimension(feature) # Risky, because it is not linked to the computation. Should be computed from the spectrogram.
+
+    def __getitem__(self, index):
+        raise NotImplementedError("This method should be implemented in the child class") from None
+
+    def __len__(self):
+        raise NotImplementedError("This method should be implemented in the child class") from None
+
+    def get_spectrogram(self, signal): # The spectrogram is not saved in the cache because it is too large in general
+        return signal_to_spectrogram.get_spectrogram(signal, self.sr, self.feature, self.hop_length)
+    
+    def get_bars(self, audio_path, index = None):
+        def _compute_bars():
+            return as_seg.data_manipulation.get_bars_from_audio(audio_path)
+
+        if self.cache_path is not None:
+            if index is None:
+                warnings.warn("No index provided for the cache, the cache will be ignored")
+            else:
+                dir_save_bars_path = f"{self.cache_path}/bars"
+                try:
+                    bars = np.load(f"{dir_save_bars_path}/{index}.npy", allow_pickle=True)
+                    if self.verbose:
+                        print("Using cached bars.")
+                except FileNotFoundError:
+                    bars = _compute_bars()
+                    pathlib.Path(dir_save_bars_path).mkdir(parents=True, exist_ok=True)
+                    np.save(f"{dir_save_bars_path}/{index}.npy", bars)
+                return bars
+        return _compute_bars()
+
+    def get_barwise_tf_matrix(self, track_path, bars, index = None):
+        def _compute_barwise_tf_matrix():
+            # Load the signal of the song
+            sig, _ = librosa.load(track_path, sr=self.sr, mono=True) #torchaudio.load(track.audio_path)
+            # Compute the spectrogram
+            spectrogram = self.get_spectrogram(sig)
+            return as_seg.barwise_input.barwise_TF_matrix(spectrogram, bars, self.hop_length/self.sr, self.subdivision) + eps
+        
+        if self.cache_path is not None:
+            if index is None:
+                warnings.warn("No index provided for the cache, the cache will be ignored")
+            else:
+                cache_file_name = f"{index}_{self.feature}_subdiv{self.subdivision}"
+                dir_save_barwise_tf_path = f"{self.cache_path}/barwise_tf_matrix"
+                try:
+                    barwise_tf_matrix = np.load(f"{dir_save_barwise_tf_path}/{cache_file_name}.npy", allow_pickle=True)
+                    if self.verbose:
+                        print("Using cached Barwise TF matrix.")
+                except FileNotFoundError:
+                    barwise_tf_matrix = _compute_barwise_tf_matrix()
+                    pathlib.Path(dir_save_barwise_tf_path).mkdir(parents=True, exist_ok=True)
+                    np.save(f"{dir_save_barwise_tf_path}/{cache_file_name}.npy", barwise_tf_matrix)
+                return barwise_tf_matrix
+        return _compute_barwise_tf_matrix()
+
+    def save_segments(self, segments, name):
+        # mirdata_segments = mirdata.annotations.SectionData(intervals=segments, interval_unit="s")
+        # jams_segments = mirdata.jams_utils.sections_to_jams(mirdata_segments)
+        dir_save_path = f"{self.data_path}/estimations/segments/{self.dataset_name.lower()}"
+        pathlib.Path(dir_save_path).mkdir(parents=True, exist_ok=True)
+        np.save(f"{dir_save_path}/{name}.npy", segments)
+
+    def score_flat_segmentation(self, segments, annotations):
+        close_tolerance = as_seg.data_manipulation.compute_score_of_segmentation(annotations, segments, window_length=0.5)
+        large_tolerance = as_seg.data_manipulation.compute_score_of_segmentation(annotations, segments, window_length=3)
+        return close_tolerance, large_tolerance
+    
+    def segments_from_bar_to_seconds(self, segments, bars):
+        # May be useful, if ever.
+        return as_seg.data_manipulation.segments_from_bar_to_time(segments, bars)
+
+class RWCPopDataloader(BaseDataloader):
+    def __init__(self, path, feature, cache_path = None, download=False, sr=44100, hop_length = 32, subdivision = 96):
+        super().__init__(feature, cache_path, sr, hop_length, subdivision)
+        self.data_path = path
+        rwcpop = mirdata.initialize('rwc_popular', data_home = path)
+        if download:
+            rwcpop.download()            
+        self.all_tracks = rwcpop.load_tracks()
+        self.indexes = rwcpop.track_ids
+
+        self.dataset_name = "RWCPop"
+
+    def __getitem__(self, index):
+        track_id = self.indexes[index]
+        track = self.all_tracks[track_id]
+
+        # Compute the bars
+        bars = self.get_bars(track.audio_path, index=track_id)
+
+        # Compute the barwise TF matrix
+        barwise_tf_matrix = self.get_barwise_tf_matrix(track.audio_path, bars, index=track_id)
+
+        # Get the annotationks
+        annotations_intervals = track.sections.intervals
+
+        # Return the the bars, the barwise TF matrix and the annotations
+        return track_id, bars, barwise_tf_matrix, annotations_intervals
+    
+    def __len__(self):
+        return len(self.indexes)
+    
+    def get_track_of_id(self, track_id):
+        index = self.indexes.index(track_id)
+        return self.__getitem__[index]
+    
+    def format_dataset(self, path_audio_files):
+        # Copy audio files to the right location.
+        # Suppose that the audio files are all in the same folder
+        for track_num in range(len(self.all_tracks)):
+            track_idx = self.indexes[track_num]
+            song_file_name = self.all_tracks[track_idx].audio_path.split('/')[-1]
+            src = f"{path_audio_files}/{song_file_name}" # May change depending on your file structure
+            dest = self.all_tracks[track_idx].audio_path
+            pathlib.Path(dest).parent.absolute().mkdir(parents=True, exist_ok=True)
+            shutil.copy(src, dest)
+
+class SALAMIDataloader(BaseDataloader):
+    def __init__(self, path, feature, cache_path = None, download=False, subset = None, sr=44100, hop_length = 32, subdivision = 96):
+        super().__init__(feature, cache_path, sr, hop_length, subdivision)
+        
+        self.dataset_name = "SALAMI"
+
+        self.data_path = path
+        salami = mirdata.initialize('salami', data_home = path)
+        if download:
+            salami.download()            
+        self.all_tracks = salami.load_tracks()
+        self.indexes = salami.track_ids
+
+        self.subset = subset
+        if subset is not None:
+            train_indexes, test_indexes = self.split_training_test()
+            if subset == "train":
+                self.indexes = train_indexes
+            elif subset == "test":
+                self.indexes = test_indexes
+            elif subset == "debug":
+                self.indexes = test_indexes[:4]
+            else:
+                raise ValueError("Subset should be either 'train' or 'test'")
+
+
+    def __getitem__(self, index):
+        # Parsing through files ordered with self.indexes
+        track_id = self.indexes[index]
+        track = self.all_tracks[track_id]
+
+        try:           
+            # Compute the bars
+            bars = self.get_bars(track.audio_path, index=track_id)
+
+            # Compute the barwise TF matrix
+            barwise_tf_matrix = self.get_barwise_tf_matrix(track.audio_path, bars, index=track_id)
+
+            # Get the annotations
+            dict_annotations = self.get_annotations(track)
+
+            # Return the the bars, the barwise TF matrix and the annotations
+            return track_id, bars, barwise_tf_matrix, dict_annotations
+    
+        except FileNotFoundError:
+            print(f'{track_id} not found.')
+            return track_id, None, None, None
+            # raise FileNotFoundError(f"Song {track_id} not found, normal ?") from None
+    
+    def __len__(self):
+        # To handle the fact that indexes are updated with the subset
+        return len(self.indexes)
+
+    def get_track_of_id(self, track_id):
+        try:
+            index = self.indexes.index(track_id)
+        except ValueError:
+            try:
+                index = self.indexes.index(str(track_id))
+            except ValueError:
+                raise ValueError(f"Track {track_id} not found in the dataset") from None
+        return self.__getitem__(index)
+
+    def get_annotations(self, track):
+        dict_annotations = {}
+        try: 
+            # Trying to get the first annotator
+            dict_annotations["upper_level_annotations"] = track.sections_annotator_1_uppercase.intervals
+            dict_annotations["lower_level_annotations"] = track.sections_annotator_1_lowercase.intervals
+            try: # Trying to load the second annotator
+                dict_annotations["upper_level_annotations_2"] = track.sections_annotator_2_uppercase.intervals
+                dict_annotations["lower_level_annotations_2"] = track.sections_annotator_2_lowercase.intervals
+                dict_annotations["annot_number"]  = 2
+            except AttributeError: # Only the first annotator was loaded
+                dict_annotations["annot_number"]  = 1
+        except AttributeError:
+            try:
+                # Trying to get the second annotator (no first one)
+                dict_annotations["upper_level_annotations"] = track.sections_annotator_2_uppercase.intervals
+                dict_annotations["lower_level_annotations"] = track.sections_annotator_2_lowercase.intervals
+                dict_annotations["annot_number"]  = 1
+            except AttributeError:
+                raise AttributeError(f"No annotations found for {track.track_id}")
+        
+        return dict_annotations
+    
+    def get_this_set_annotations(self, dict_annotations, annotation_level = "upper", annotator = 1):
+        if annotator == 1:
+            if annotation_level == "upper":
+                annotations = dict_annotations["upper_level_annotations"]
+            elif annotation_level == "lower":
+                annotations = dict_annotations["lower_level_annotations"]
+            else:
+                raise ValueError("Invalid annotation level")
+        elif annotator == 2:
+            assert dict_annotations["annot_number"] == 2, "No second annotator found."
+            if annotation_level == "upper":
+                annotations = dict_annotations["upper_level_annotations"]
+            elif annotation_level == "lower":
+                annotations = dict_annotations["lower_level_annotations"]
+            else:
+                raise ValueError("Invalid annotation level")
+        # elif annotator == "both":
+        #     assert dict_annotations["annot_number"] == 2, "No second annotator found."
+        #     annotations = dict_annotations["upper_level_annotations"] + dict_annotations["upper_level_annotations_2"]
+        else:
+            raise ValueError("Invalid annotator number")
+        return annotations
+
+    def split_training_test(self):
+        indexes_train = []
+        indexes_test = []
+        for track_id in self.indexes:
+            track = self.all_tracks[track_id]
+            try:
+                track.sections_annotator_1_uppercase.intervals
+                track.sections_annotator_2_uppercase.intervals
+                indexes_test.append(track_id)
+            except AttributeError:
+                indexes_train.append(track_id)
+        return indexes_train, indexes_test
+    
+    def score_flat_segmentation(self, segments, dict_annotations, annotation_level = "upper", annotator = 1):
+        if annotator == "both":
+            assert dict_annotations["annot_number"] == 2, "No second annotator found."
+            score_annot_1 = self.score_flat_segmentation(segments, dict_annotations, annotation_level = annotation_level, annotator = 1)
+            score_annot_2 = self.score_flat_segmentation(segments, dict_annotations, annotation_level = annotation_level, annotator = 2)
+            return score_annot_1, score_annot_2
+        
+        annotations = self.get_this_set_annotations(dict_annotations, annotation_level = annotation_level, annotator = annotator)
+        return super().score_flat_segmentation(segments, annotations)
+        
+    def score_flat_segmentation_twolevel(self, segments_upper_level, segments_lower_level, dict_annotations, annotator = 1):
+        score_upper_level = self.score_flat_segmentation(segments_upper_level, dict_annotations, annotation_level = "upper", annotator = annotator)
+        score_lower_level = self.score_flat_segmentation(segments_lower_level, dict_annotations, annotation_level = "lower", annotator = annotator)
+        return score_upper_level, score_lower_level
+    
+    def score_flat_segmentation_twolevel_best_of_several(self, list_segments_upper_level, list_segments_lower_level, dict_annotations, annotator = 1):
+        assert annotator != "both", "Not implemented yet"
+        stack_upper_scores = -np.inf * np.ones((len(list_segments_upper_level),2,3))
+        for idx, segments in enumerate(list_segments_upper_level):
+            stack_upper_scores[idx] = self.score_flat_segmentation(segments, dict_annotations, annotation_level = "upper", annotator = annotator)
+        idx_close = np.argmax(stack_upper_scores[:,0,2]) # Selecting best f measure at 0.5s
+        idx_large = np.argmax(stack_upper_scores[:,1,2]) # Selecting best f measure at 3s
+        score_upper_level = (stack_upper_scores[idx_close,0,:], stack_upper_scores[idx_large,1,:])
+
+        stack_lower_scores = -np.inf * np.ones((len(list_segments_lower_level),2,3))
+        for idx, segments in enumerate(list_segments_lower_level):
+            stack_lower_scores[idx] = self.score_flat_segmentation(segments, dict_annotations, annotation_level = "lower", annotator = annotator)
+        idx_close = np.argmax(stack_lower_scores[:,0,2]) # Selecting best f measure at 0.5s
+        idx_large = np.argmax(stack_lower_scores[:,1,2]) # Selecting best f measure at 3s
+        score_lower_level = (stack_lower_scores[idx_close,0,:], stack_lower_scores[idx_large,1,:])
+
+        return score_upper_level, score_lower_level
+
+
+    def get_sizes_of_annotated_segments(self, annotation_level = "upper", annotator = 1, plot = False):
+        lengths = []
+        for track_id in self.indexes:
+            track = self.all_tracks[track_id]
+
+            try:           
+                # Compute the bars
+                bars = self.get_bars(track.audio_path, index=track_id)
+
+                # Get the annotations
+                dict_annotations = self.get_annotations(track)
+
+                annotations = self.get_this_set_annotations(dict_annotations, annotation_level = annotation_level, annotator = annotator)
+
+                barwise_annot = as_seg.data_manipulation.frontiers_from_time_to_bar(np.array(annotations)[:,1], bars) # Convert the annotations from time to bar
+                for i in range(len(barwise_annot) - 1):
+                    lengths.append(barwise_annot[i+1] - barwise_annot[i]) # Store the length of the annotated segment
+        
+            except FileNotFoundError:
+                print(f'{track_id} not found.')
+                # raise FileNotFoundError(f"Song {track_id} not found, normal ?") from None
+
+        if plot:
+            as_seg.model.current_plot.plot_lenghts_hist(lengths)
+        return lengths
+        
+    # def format_dataset(self, path_audio_files): # TODO
+        # # Copy audio files to the right location.
+        # # Suppose that the audio files are all in the same folder
+        # for track_num in range(len(self.all_tracks)):
+        #     track_idx = self.indexes[track_num]
+        #     song_file_name = self.all_tracks[track_idx].audio_path.split('/')[-1]
+        #     src = f"{path_audio_files}/{song_file_name}" # May change depending on your file structure
+        #     dest = self.all_tracks[track_idx].audio_path
+        #     pathlib.Path(dest).parent.absolute().mkdir(parents=True, exist_ok=True)
+        #     shutil.copy(src, dest)
+    
+
+class BeatlesDataloader(BaseDataloader):
+    def __init__(self, path, feature, cache_path = None, download=False, sr=44100, hop_length = 32, subdivision = 96):
+        super().__init__(feature, cache_path, sr, hop_length, subdivision)
+        self.data_path = path
+        beatles = mirdata.initialize('beatles', data_home = path)
+        if download:
+            beatles.download()            
+        self.all_tracks = beatles.load_tracks()
+        self.indexes = beatles.track_ids
+
+        self.dataset_name = "Beatles"
+
+    def __getitem__(self, index):
+        track_id = self.indexes[index]
+        track = self.all_tracks[track_id]
+
+        # Compute the bars
+        bars = self.get_bars(track.audio_path, index=track_id)
+
+        # Compute the barwise TF matrix
+        barwise_tf_matrix = self.get_barwise_tf_matrix(track.audio_path, bars, index=track_id)
+
+        # Get the annotationks
+        annotations_intervals = track.sections.intervals
+
+        # Return the the bars, the barwise TF matrix and the annotations
+        return track_id, bars, barwise_tf_matrix, annotations_intervals
+    
+    def __len__(self):
+        return len(self.all_tracks)
+    
+    def get_track_of_id(self, track_id):
+        try:
+            index = self.indexes.index(track_id)
+        except ValueError:
+            try:
+                index = self.indexes.index(str(track_id))
+            except ValueError:
+                raise ValueError(f"Track {track_id} not found in the dataset") from None
+        return self.__getitem__(index)
+
+if __name__ == "__main__":
+    # rwcpop = RWCPopDataloader('/home/a23marmo/datasets/rwcpop', feature = "mel", cache_path = "/home/a23marmo/Bureau/data_persisted/rwcpop")
+    # # rwcpop.format_dataset('/home/a23marmo/Bureau/Audio samples/rwcpop/Entire RWC')
+    # for spectrogram, bars, barwise_tf_matrix, track_id, annotations in rwcpop:
+    #     print(spectrogram.shape, track_id)
+
+    salami = SALAMIDataloader('/home/a23marmo/datasets/salami', feature = "mel", cache_path = "/home/a23marmo/Bureau/data_persisted/salami", subset = "train")
+
+    for spectrogram, bars, barwise_tf_matrix, track_id, annotations in salami:
+        try:
+            print(track_id)
+        except FileNotFoundError:
+            print(f'{track_id} not found.')
--- a/as_seg/model/features.py
+++ b/as_seg/model/features.py
-# -*- coding: utf-8 -*-
-"""
-Created on Wed Mar 25 16:54:59 2020
-
-@author: amarmore
-
-Computing spectrogram in different feature description.
-
-Note that Mel (and variants of Mel) spectrograms are denoted "mel_grill", 
-as they follow the particular definition of [1].
-
-[1] Grill, T., & Schlüter, J. (2015, October). 
-Music Boundary Detection Using Neural Networks on Combined Features and Two-Level Annotations. 
-In ISMIR (pp. 531-537).
-"""
-
-import numpy as np
-import librosa.core
-import librosa.feature
-import librosa.effects
-from math import inf
-import as_seg.model.errors as err
-import IPython.display as ipd
-
-def get_spectrogram(signal, sr, feature, hop_length, n_fft = 2048, fmin = 98, n_mfcc = 20):
-    """
-    Returns a spectrogram, from the signal of a song.
-    Different types of spectrogram can be computed, which are specified by the argument "feature".
-    All these spectrograms are computed with the toolbox librosa [1].
-    
-    Parameters
-    ----------
-    signal : numpy array
-        Signal of the song.
-    sr : float
-        Sampling rate of the signal, (typically 44100Hz).
-    feature : String
-        The types of spectrograms to compute.
-            - stft : computes the Short-Time Fourier Transform of the signal.
-            Returns the Power spectrogram.
-            - pcp : computes a chromagram.
-            NB: this chromagram has been specificly fitted as a team, 
-            and the arguments are non standard but rather technical choices.
-            - pcp_stft : computes a chromagram from the stft of the song.
-            - cqt : computes a Constant-Q transform of the song.
-            - log_cqt : computes the logarithm of the Constant-Q transform of the song.
-            - tonnetz : computes the tonnetz representation of the song.
-            - pcp_tonnetz : computes the tonnetz representation of the song, starting from the chromas.
-                It allows us to better control paramaters over the computation of tonnetz, 
-                and can reduce computation when chromas are already computed (for scripts loading already computed spectrograms).
-            - mfcc : computes the Mel-Frequency Cepstral Coefficients of the song.
-            - mel_grill : computes the mel-spectrogram of the song, as dimensioned by [2].
-            - log_mel_grill : computes the logarithm of the previously defined mel-spectrogram.
-            - pos_log_mel_grill : computes the log(mel + 1) of the previously defined mel-spectrogram.
-
-    hop_length : integer
-        The desired hop_length, which is the step between two frames (ie the time "discretization" step)
-        It is expressed in terms of number of samples, which are defined by the sampling rate.
-    n_fft : integer, optional
-        Number of frames by stft feature.
-        The default is 2048.
-    fmin : integer, optional
-        The minimal frequence to consider, used for denoising.
-        The default is 98.
-    n_mfcc : integer, optional
-        Number of mfcc features.
-        The default is 20 (as in librosa).
-
-    Raises
-    ------
-    InvalidArgumentValueException
-        If the "feature" argument is not presented above.
-
-    Returns
-    -------
-    numpy array
-        Spectrogram of the signal.
-        
-    References
-    ----------
-    [1] McFee, B., Raffel, C., Liang, D., Ellis, D. P., McVicar, M., Battenberg, E., & Nieto, O. (2015, July).
-    librosa: Audio and music signal analysis in python. 
-    In Proceedings of the 14th python in science conference (Vol. 8).
-    
-    [2] Grill, T., & Schlüter, J. (2015, October). 
-    Music Boundary Detection Using Neural Networks on Combined Features and Two-Level Annotations. 
-    In ISMIR (pp. 531-537).
-    """
-    if feature.lower() == "stft":
-        if len(signal.shape) == 1:
-            stft = librosa.core.stft(y=np.asfortranarray(signal), n_fft=n_fft, hop_length = hop_length)
-            power_spectrogram = np.abs(stft) ** 2
-            return power_spectrogram
-        
-        power_spectrogram = np.abs(librosa.core.stft(y=np.asfortranarray(signal[:,0]), n_fft=n_fft, hop_length = hop_length))**2
-        for i in range(1,signal.shape[1]):
-            power_spectrogram += np.abs(librosa.core.stft(y=np.asfortranarray(signal[:,i]), n_fft=n_fft, hop_length = hop_length))**2
-        return power_spectrogram
-    
-    elif feature.lower() == "pcp_stft":
-        if len(signal.shape) == 1:
-            audio_harmonic, _ = librosa.effects.hpss(y=np.asfortranarray(signal))
-            chroma_stft = librosa.feature.chroma_stft(y=audio_harmonic, sr=sr, n_fft = n_fft, hop_length=hop_length)
-            return chroma_stft
-        audio_harmonic, _ = librosa.effects.hpss(y=np.asfortranarray(signal[:,0]))
-        chroma_stft = librosa.feature.chroma_stft(y=audio_harmonic, sr=sr, n_fft = n_fft, hop_length=hop_length)
-        for i in range(1,signal.shape[1]):
-            audio_harmonic, _ = librosa.effects.hpss(y=np.asfortranarray(signal[:,i]))
-            chroma_stft += librosa.feature.chroma_stft(y=audio_harmonic, sr=sr, n_fft = n_fft, hop_length=hop_length)   
-        return chroma_stft
-    elif feature == "pcp":
-        norm=inf # Columns normalization
-        win_len_smooth=82 # Size of the smoothign window
-        n_octaves=6
-        bins_per_chroma = 3
-        bins_per_octave=bins_per_chroma * 12
-        if len(signal.shape) == 1:
-            return librosa.feature.chroma_cens(y=np.asfortranarray(signal),sr=sr,hop_length=hop_length,
-                                   fmin=fmin, n_chroma=12, n_octaves=n_octaves, bins_per_octave=bins_per_octave,
-                                   norm=norm, win_len_smooth=win_len_smooth)
-        
-        pcp = librosa.feature.chroma_cens(y=np.asfortranarray(signal[:,0]),sr=sr,hop_length=hop_length,
-                                   fmin=fmin, n_chroma=12, n_octaves=n_octaves, bins_per_octave=bins_per_octave,
-                                   norm=norm, win_len_smooth=win_len_smooth)
-        for i in range(1,signal.shape[1]):
-            pcp += librosa.feature.chroma_cens(y=np.asfortranarray(signal[:,i]),sr=sr,hop_length=hop_length,
-                                   fmin=fmin, n_chroma=12, n_octaves=n_octaves, bins_per_octave=bins_per_octave,
-                                   norm=norm, win_len_smooth=win_len_smooth)
-    
-        return pcp
-    elif feature.lower() == "cqt":
-        if len(signal.shape) == 1:
-            constant_q_transf = librosa.core.cqt(y=np.asfortranarray(signal), sr = sr, hop_length = hop_length)
-            power_cqt = np.abs(constant_q_transf) ** 2
-            return power_cqt
-        power_cqt = np.abs(librosa.core.cqt(y=np.asfortranarray(signal[:,0]), sr = sr, hop_length = hop_length)) ** 2
-        for i in range(1,signal.shape[1]):
-            power_cqt += np.abs(librosa.core.cqt(y=np.asfortranarray(signal[:,i]), sr = sr, hop_length = hop_length)) ** 2
-        return power_cqt
-    elif feature.lower() == "log_cqt":
-        if len(signal.shape) == 1:
-            constant_q_transf = librosa.core.cqt(y=np.asfortranarray(signal), sr = sr, hop_length = hop_length)
-            power_cqt = np.abs(constant_q_transf) ** 2
-            log_cqt = ((1.0/80.0) * librosa.core.amplitude_to_db(y=np.abs(np.array(power_cqt)), ref=np.max)) + 1.0
-            return log_cqt
-        power_cqt = np.abs(librosa.core.cqt(y=np.asfortranarray(signal[:,0]), sr = sr, hop_length = hop_length)) ** 2
-        for i in range(1,signal.shape[1]):
-            power_cqt += np.abs(librosa.core.cqt(y=np.asfortranarray(signal[:,i]), sr = sr, hop_length = hop_length)) ** 2
-        log_cqt = ((1.0/80.0) * librosa.core.amplitude_to_db(y=np.abs(np.array(power_cqt)), ref=np.max)) + 1.0
-        return log_cqt
-    elif feature.lower() == "tonnetz":
-        if len(signal.shape) == 1:
-            return librosa.feature.tonnetz(y=np.asfortranarray(signal), sr = sr)
-        tonnetz = librosa.feature.tonnetz(y=np.asfortranarray(signal[:,0]), sr = sr)
-        for i in range(1,signal.shape[1]):
-            tonnetz += librosa.feature.tonnetz(y=np.asfortranarray(signal[:,i]), sr = sr)
-        return tonnetz
-    elif feature.lower() == "pcp_tonnetz":
-        return librosa.feature.tonnetz(y=None, sr = None, chroma = get_spectrogram(signal, sr, "pcp", hop_length, fmin = fmin))
-    # elif feature.lower() == "hcqt":
-    #     return my_compute_hcqt(np.asfortranarray(signal[:,0]), sr)
-    
-    elif feature.lower() == "mfcc":
-        if len(signal.shape) == 1:
-            return librosa.feature.mfcc(y=np.asfortranarray(signal), sr = sr, hop_length = hop_length, n_mfcc=n_mfcc)
-        mfcc = librosa.feature.mfcc(y=np.asfortranarray(signal[:,0]), sr = sr, hop_length = hop_length, n_mfcc=n_mfcc)
-        for i in range(1,signal.shape[1]):
-            mfcc += librosa.feature.mfcc(y=np.asfortranarray(signal[:,i]), sr = sr, hop_length = hop_length, n_mfcc=n_mfcc)
-        return mfcc
-    
-    # For Mel spectrograms, we use the same parameters as the ones of [2].
-    # [2] Grill, Thomas, and Jan Schlüter. "Music Boundary Detection Using Neural Networks on Combined Features and Two-Level Annotations." ISMIR. 2015.
-    elif feature.lower() == "mel_grill":
-        if len(signal.shape) == 1:
-            return np.abs(librosa.feature.melspectrogram(y=np.asfortranarray(signal), sr = sr, n_fft=2048, hop_length = hop_length, n_mels=80, fmin=80.0, fmax=16000))
-        mel = np.abs(librosa.feature.melspectrogram(y=np.asfortranarray(signal[:,0]), sr = sr, n_fft=2048, hop_length = hop_length, n_mels=80, fmin=80.0, fmax=16000))
-        for i in range(1,signal.shape[1]):
-            mel += np.abs(librosa.feature.melspectrogram(y=np.asfortranarray(signal[:,i]), sr = sr, n_fft=2048, hop_length = hop_length, n_mels=80, fmin=80.0, fmax=16000))
-        return mel
-    
-    elif feature == "log_mel_grill":
-        if len(signal.shape) == 1:
-            return librosa.power_to_db(np.abs(librosa.feature.melspectrogram(y=np.asfortranarray(signal), sr = sr, n_fft=2048, hop_length = hop_length, n_mels=80, fmin=80.0, fmax=16000)))
-        mel = np.abs(librosa.feature.melspectrogram(y=np.asfortranarray(signal[:,0]), sr = sr, n_fft=2048, hop_length = hop_length, n_mels=80, fmin=80.0, fmax=16000))
-        for i in range(1,signal.shape[1]):
-            mel += np.abs(librosa.feature.melspectrogram(y=np.asfortranarray(signal[:,i]), sr = sr, n_fft=2048, hop_length = hop_length, n_mels=80, fmin=80.0, fmax=16000))
-        return librosa.power_to_db(mel)
-    
-    elif feature == "nn_log_mel_grill":
-        if len(signal.shape) == 1:
-            mel = np.abs(librosa.feature.melspectrogram(y=np.asfortranarray(signal), sr = sr, n_fft=2048, hop_length = hop_length, n_mels=80, fmin=80.0, fmax=16000))
-            return librosa.power_to_db(mel + np.ones(mel.shape))
-        mel = np.abs(librosa.feature.melspectrogram(y=np.asfortranarray(signal[:,0]), sr = sr, n_fft=2048, hop_length = hop_length, n_mels=80, fmin=80.0, fmax=16000))
-        for i in range(1,signal.shape[1]):
-            mel += np.abs(librosa.feature.melspectrogram(y=np.asfortranarray(signal[:,i]), sr = sr, n_fft=2048, hop_length = hop_length, n_mels=80, fmin=80.0, fmax=16000))
-        return librosa.power_to_db(mel + np.ones(mel.shape))
-    
-    elif feature == "padded_log_mel_grill":
-        log_mel = get_spectrogram(signal, sr, "log_mel_grill", hop_length)
-        return log_mel - np.amin(log_mel) * np.ones(log_mel.shape)
-    
-    elif feature == "mel" or feature == "log_mel" or feature == "nn_log_mel":
-        raise err.InvalidArgumentValueException("Invalid feature parameter, aren't you looking for mel_grill/log_mel_grill (the only available Mel Spectrograms)?")
-    else:
-        raise err.InvalidArgumentValueException(f"Unknown signal representation: {feature}.")
-
-def get_log_mel_from_mel(mel_spectrogram, feature):
-    """
-    Computes a variant of a Mel spectrogram (typically Log Mel).
-
-    Parameters
-    ----------
-    mel_spectrogram : numpy array
-        Mel spectrogram of the signal.
-    feature : string
-        Desired feature name (must be a variant of a Mel spectrogram).
-
-    Raises
-    ------
-    err.InvalidArgumentValueException
-        Raised in case of unknown feature name.
-
-    Returns
-    -------
-    numpy array
-        Variant of the Mel spectrogram of the signal.
-
-    """
-    if feature == "log_mel_grill":
-        return librosa.power_to_db(np.abs(mel_spectrogram))
-    
-    elif feature == "nn_log_mel_grill":
-        return librosa.power_to_db(mel_spectrogram + np.ones(mel_spectrogram.shape))
-    
-    elif feature == "padded_log_mel_grill":
-        log_mel = get_log_mel_from_mel(mel_spectrogram, "log_mel_grill")
-        return log_mel - np.amin(log_mel) * np.ones(log_mel.shape)
-        
-    elif feature == "minmax_log_mel_grill":        
-        padded_log_mel = get_log_mel_from_mel(mel_spectrogram, "padded_log_mel_grill")
-        return np.divide(padded_log_mel, np.amax(padded_log_mel))
-
-    elif feature == "mel" or feature == "log_mel":
-        raise err.InvalidArgumentValueException("Invalid mel parameter, are't you looking for mel_grill?")
-    else:
-        raise err.InvalidArgumentValueException("Unknown feature representation.")
-        
-def get_audio_from_spectrogram(spectrogram, feature, hop_length, sr):
-    """
-    Computes an audio signal for a COMPLEX-valued spectrogram.
-
-    Parameters
-    ----------
-    spectrogram : numpy array
-        Complex-valued spectrogram.
-    feature : string
-        Name of the particular feature used for representing the signal in a spectrogram.
-    hop_length : int
-        Hop length of the spectrogram
-        (Or similar value for the reconstruction to make sense).
-    sr : inteer
-        Sampling rate of the signal, when processed into a spectrogram
-        (Or similar value for the reconstruction to make sense).
-
-    Raises
-    ------
-    InvalidArgumentValueException
-        In case of an unknown feature representation.
-
-    Returns
-    -------
-    ipd.Audio
-        Audio signal of the spectrogram.
-
-    """
-    if feature == "stft":
-        audio = librosa.griffinlim(spectrogram, hop_length = hop_length)
-        return ipd.Audio(audio, rate=sr)
-    elif feature == "mel_grill":
-        stft = librosa.feature.inverse.mel_to_stft(spectrogram, sr=sr, n_fft=2048, power=2.0, fmin=80.0, fmax=16000)
-        return get_audio_from_spectrogram(stft, "stft", hop_length, sr)
-    elif feature == "nn_log_mel_grill":
-        mel = librosa.db_to_power(spectrogram) - np.ones(spectrogram.shape)
-        return get_audio_from_spectrogram(mel, "mel_grill", hop_length, sr)
-    else:
-        raise err.InvalidArgumentValueException("Unknown feature representation, can't reconstruct a signal.")
-        
-        
-        
-# %% Implementation of PCP from MSAF (for baseline comparison)
-def get_pcp_as_msaf(signal, sr, hop_length):
-    audio_harmonic, _ = librosa.effects.hpss(y=signal)
-    pcp_cqt = np.abs(librosa.hybrid_cqt(y=audio_harmonic,
-                                        sr=sr,
-                                        hop_length=hop_length,
-                                        n_bins=84,
-                                        norm=np.inf,
-                                        fmin=27.5)) ** 2
-    pcp = librosa.feature.chroma_cqt(C=pcp_cqt,
-                                    sr=sr,
-                                    hop_length=hop_length,
-                                    n_octaves=6,
-                                    fmin=27.5).T
-                                    
-    frame_times = librosa.core.frames_to_time(np.arange(pcp.shape[0]), sr, hop_length)
-
-    return pcp, frame_times
-
-def get_beatsync_pcp_as_msaf(signal, sr, hop_length):
-    audio_harmonic, audio_percussive = librosa.effects.hpss(y=signal)
-
-    pcp_cqt = np.abs(librosa.hybrid_cqt(y=audio_harmonic,
-                                        sr=sr,
-                                        hop_length=hop_length,
-                                        n_bins=84,
-                                        norm=np.inf,
-                                        fmin=27.5)) ** 2
-    pcp = librosa.feature.chroma_cqt(C=pcp_cqt,
-                                    sr=sr,
-                                    hop_length=hop_length,
-                                    n_octaves=6,
-                                    fmin=27.5).T
-                                    
-    frame_times = librosa.core.frames_to_time(np.arange(pcp.shape[0]), sr, hop_length)
-    
-    # Compute beats
-    _, beat_frames = librosa.beat.beat_track(y=audio_percussive, sr=sr, hop_length=hop_length)
-
-    # To times
-    beat_times = librosa.frames_to_time(beat_frames, sr=sr,hop_length=hop_length)
-
-    # TODO: Is this really necessary?
-    if len(beat_times) > 0 and beat_times[0] == 0:
-        beat_times = beat_times[1:]
-        beat_frames = beat_frames[1:]
-
-    # Make beat synchronous
-    beatsync_feats = librosa.util.utils.sync(pcp.T, beat_frames, pad=True).T
-
-    # Assign times (and add last time if padded)
-    beatsync_times = np.copy(beat_times)
-    if beatsync_times.shape[0] != beatsync_feats.shape[0]:
-        beatsync_times = np.concatenate((beatsync_times,
-                                         [frame_times[-1]]))
-    return beatsync_feats, beatsync_times
\ No newline at end of file
--- a/as_seg/model/signal_to_spectrogram.py
+++ b/as_seg/model/signal_to_spectrogram.py
+# -*- coding: utf-8 -*-
+"""
+Created on Wed Mar 25 16:54:59 2020
+
+@author: amarmore
+
+Computing spectrogram in different feature description.
+
+Note that Mel (and variants of Mel) spectrograms follow the particular definition of [1].
+
+[1] Grill, T., & Schlüter, J. (2015, October). 
+Music Boundary Detection Using Neural Networks on Combined Features and Two-Level Annotations. 
+In ISMIR (pp. 531-537).
+"""
+
+import numpy as np
+import librosa.core
+import librosa.feature
+import librosa.effects
+from math import inf
+import as_seg.model.errors as err
+import IPython.display as ipd
+
+mel_power = 2
+
+# TODO: add MFCC, maybe tonnetz
+def get_spectrogram(signal, sr, feature, hop_length, fmin = 98):
+    """
+    Returns a spectrogram, from the signal of a song.
+    Different types of spectrogram can be computed, which are specified by the argument "feature".
+    All these spectrograms are computed with the toolbox librosa [1].
+    
+    Parameters
+    ----------
+    signal : numpy array
+        Signal of the song.
+    sr : float
+        Sampling rate of the signal, (typically 44100Hz).
+    feature : String
+        The types of spectrograms to compute.
+            TODO
+
+    hop_length : integer
+        The desired hop_length, which is the step between two frames (ie the time "discretization" step)
+        It is expressed in terms of number of samples, which are defined by the sampling rate.
+    fmin : integer, optional
+        The minimal frequence to consider, used for denoising.
+        The default is 98.
+    n_mfcc : integer, optional
+        Number of mfcc features.
+        The default is 20 (as in librosa).
+
+    Raises
+    ------
+    InvalidArgumentValueException
+        If the "feature" argument is not presented above.
+
+    Returns
+    -------
+    numpy array
+        Spectrogram of the signal.
+        
+    References
+    ----------
+    [1] McFee, B., Raffel, C., Liang, D., Ellis, D. P., McVicar, M., Battenberg, E., & Nieto, O. (2015, July).
+    librosa: Audio and music signal analysis in python. 
+    In Proceedings of the 14th python in science conference (Vol. 8).
+    
+    [2] Grill, T., & Schlüter, J. (2015, October). 
+    Music Boundary Detection Using Neural Networks on Combined Features and Two-Level Annotations. 
+    In ISMIR (pp. 531-537).
+    """
+    if feature.lower() == "pcp":
+        return compute_pcp(signal, sr, hop_length, fmin)
+    
+    elif feature.lower() == "cqt":
+        return compute_cqt(signal, sr, hop_length)
+    
+    # For Mel spectrograms, we use the same parameters as the ones of [2].
+    # [2] Grill, Thomas, and Jan Schlüter. "Music Boundary Detection Using Neural Networks on Combined Features and Two-Level Annotations." ISMIR. 2015.
+    elif feature.lower() == "mel":
+        return compute_mel_spectrogram(signal, sr, hop_length)
+    
+    elif "mel" in feature:
+        mel_spectrogram = get_spectrogram(signal, sr, "mel", hop_length)
+        return get_log_mel_from_mel(mel_spectrogram, feature)
+        
+    elif feature.lower() == "stft":
+        return compute_stft(signal, sr, hop_length, complex = False)
+    elif feature.lower() == "stft_complex":
+        return compute_stft(signal, sr, hop_length, complex = True)
+    
+    else:
+        raise err.InvalidArgumentValueException(f"Unknown signal representation: {feature}.")
+    
+def get_default_frequency_dimension(feature):
+    if feature.lower() == "pcp":
+        return 12
+    elif feature.lower() == "cqt":
+        return 84
+    elif "mel" in feature.lower():
+        return 80
+    elif feature.lower() == "stft" or feature.lower() == "stft_complex":
+        return 1025
+    else:
+        raise err.InvalidArgumentValueException(f"Unknown signal representation: {feature}.")
+
+def compute_pcp(signal, sr, hop_length, fmin):
+    norm=inf # Columns normalization
+    win_len_smooth=82 # Size of the smoothign window
+    n_octaves=6
+    bins_per_chroma = 3
+    bins_per_octave=bins_per_chroma * 12
+    return librosa.feature.chroma_cens(y=signal,sr=sr,hop_length=hop_length,
+                                fmin=fmin, n_chroma=12, n_octaves=n_octaves, bins_per_octave=bins_per_octave,
+                                norm=norm, win_len_smooth=win_len_smooth)
+
+def compute_cqt(signal, sr, hop_length):
+    constant_q_transf = librosa.cqt(y=signal, sr = sr, hop_length = hop_length)
+    return np.abs(constant_q_transf)
+
+def compute_mel_spectrogram(signal, sr, hop_length):
+    mel = librosa.feature.melspectrogram(y=signal, sr = sr, n_fft=2048, hop_length = hop_length, n_mels=80, fmin=80.0, fmax=16000, power=mel_power)
+    return np.abs(mel)
+
+def get_log_mel_from_mel(mel_spectrogram, feature):
+    """
+    Computes a variant of a Mel spectrogram (typically Log Mel).
+
+    Parameters
+    ----------
+    mel_spectrogram : numpy array
+        Mel spectrogram of the signal.
+    feature : string
+        Desired feature name (must be a variant of a Mel spectrogram).
+
+    Raises
+    ------
+    err.InvalidArgumentValueException
+        Raised in case of unknown feature name.
+
+    Returns
+    -------
+    numpy array
+        Variant of the Mel spectrogram of the signal.
+
+    """
+    if feature == "log_mel":
+        return librosa.power_to_db(np.abs(mel_spectrogram), ref=1)
+    
+    elif feature == "nn_log_mel":
+        mel_plus_one = np.abs(mel_spectrogram) + np.ones(mel_spectrogram.shape)
+        nn_log_mel = librosa.power_to_db(mel_plus_one, ref=1)
+        return nn_log_mel
+    
+    elif feature == "padded_log_mel":
+        log_mel = get_log_mel_from_mel(mel_spectrogram, "log_mel")
+        return log_mel - np.amin(log_mel) * np.ones(log_mel.shape)
+        
+    elif feature == "minmax_log_mel":        
+        padded_log_mel = get_log_mel_from_mel(mel_spectrogram, "padded_log_mel")
+        return np.divide(padded_log_mel, np.amax(padded_log_mel))
+    
+    else:
+        raise err.InvalidArgumentValueException("Unknown feature representation.")
+    
+def compute_stft(signal, sr, hop_length, complex):
+    stft = librosa.stft(y=signal, hop_length=hop_length,n_fft=2048)
+    if complex:
+        mag, phase = librosa.magphase(stft, power = 1)
+        return mag, phase
+    else:
+        return np.abs(stft)
+    
+def get_stft_from_mel(mel_spectrogram, feature, sr):
+    if feature == "mel":
+        return librosa.feature.inverse.mel_to_stft(M=mel_spectrogram, sr=sr, n_fft=2048, power=mel_power, fmin=80.0, fmax=16000)
+    
+    elif feature == "log_mel":
+        mel = librosa.db_to_power(S_db=mel_spectrogram, ref=1)
+        return get_stft_from_mel(mel, "mel", sr=sr)
+
+    elif feature == "nn_log_mel":
+        mel = librosa.db_to_power(S_db=mel_spectrogram, ref=1) - np.ones(mel_spectrogram.shape)
+        return get_stft_from_mel(mel, "mel", sr=sr)
+
+    else:
+        raise err.InvalidArgumentValueException("Unknown feature representation.")
--- a/as_seg/scripts/default_path.py
+++ b/as_seg/scripts/default_path.py
@@ -30,5 +30,5 @@ path_data_persisted_salami = f"{path_parent_of_data}/data/annotations/salami" ##
 path_entire_salami = "C:/Users/amarmore/Desktop/Audio samples/SALAMI" ## Path where are stored wav files of SALAMI (path where it is downloaded by mirdata also)

 # Come Together
-come_together = "C:/Users/amarmore/this_folder/The Beatles - Come Together"
-#path_data_persisted_come_together = "C:/Users/amarmore/Desktop/data_persisted"
\ No newline at end of file
+come_together = "/home/a23marmo/this_folder/The Beatles - Come Together"
+path_data_persisted_come_together = "/home/a23marmo/Bureau/data_persisted/cometogether"
\ No newline at end of file
No results found