Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
A
autosimilarity_segmentation
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package registry
Container registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
MARMORET Axel
autosimilarity_segmentation
Commits
090a2d05
Commit
090a2d05
authored
9 months ago
by
MARMORET Axel
Browse files
Options
Downloads
Patches
Plain Diff
Major modif: moving the spectrogram code into a dedicated project. Only referencing it now.
parent
1cf2e96e
No related branches found
No related tags found
No related merge requests found
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
as_seg/model/signal_to_spectrogram.py
+2
-178
2 additions, 178 deletions
as_seg/model/signal_to_spectrogram.py
with
2 additions
and
178 deletions
as_seg/model/signal_to_spectrogram.py
+
2
−
178
View file @
090a2d05
...
...
@@ -6,183 +6,7 @@ Created on Wed Mar 25 16:54:59 2020
Computing spectrogram in different feature description.
Note that Mel (and variants of Mel) spectrograms follow the particular definition of [1].
[1] Grill, T., & Schlüter, J. (2015, October).
Music Boundary Detection Using Neural Networks on Combined Features and Two-Level Annotations.
In ISMIR (pp. 531-537).
It is now moved to base_audio package.
"""
import
numpy
as
np
import
librosa.core
import
librosa.feature
import
librosa.effects
from
math
import
inf
import
as_seg.model.errors
as
err
import
IPython.display
as
ipd
mel_power
=
2
# TODO: add MFCC, maybe tonnetz
def
get_spectrogram
(
signal
,
sr
,
feature
,
hop_length
,
fmin
=
98
):
"""
Returns a spectrogram, from the signal of a song.
Different types of spectrogram can be computed, which are specified by the argument
"
feature
"
.
All these spectrograms are computed with the toolbox librosa [1].
Parameters
----------
signal : numpy array
Signal of the song.
sr : float
Sampling rate of the signal, (typically 44100Hz).
feature : String
The types of spectrograms to compute.
TODO
hop_length : integer
The desired hop_length, which is the step between two frames (ie the time
"
discretization
"
step)
It is expressed in terms of number of samples, which are defined by the sampling rate.
fmin : integer, optional
The minimal frequence to consider, used for denoising.
The default is 98.
n_mfcc : integer, optional
Number of mfcc features.
The default is 20 (as in librosa).
Raises
------
InvalidArgumentValueException
If the
"
feature
"
argument is not presented above.
Returns
-------
numpy array
Spectrogram of the signal.
References
----------
[1] McFee, B., Raffel, C., Liang, D., Ellis, D. P., McVicar, M., Battenberg, E., & Nieto, O. (2015, July).
librosa: Audio and music signal analysis in python.
In Proceedings of the 14th python in science conference (Vol. 8).
[2] Grill, T., & Schlüter, J. (2015, October).
Music Boundary Detection Using Neural Networks on Combined Features and Two-Level Annotations.
In ISMIR (pp. 531-537).
"""
if
feature
.
lower
()
==
"
pcp
"
:
return
compute_pcp
(
signal
,
sr
,
hop_length
,
fmin
)
elif
feature
.
lower
()
==
"
cqt
"
:
return
compute_cqt
(
signal
,
sr
,
hop_length
)
# For Mel spectrograms, we use the same parameters as the ones of [2].
# [2] Grill, Thomas, and Jan Schlüter. "Music Boundary Detection Using Neural Networks on Combined Features and Two-Level Annotations." ISMIR. 2015.
elif
feature
.
lower
()
==
"
mel
"
:
return
compute_mel_spectrogram
(
signal
,
sr
,
hop_length
)
elif
"
mel
"
in
feature
:
mel_spectrogram
=
get_spectrogram
(
signal
,
sr
,
"
mel
"
,
hop_length
)
return
get_log_mel_from_mel
(
mel_spectrogram
,
feature
)
elif
feature
.
lower
()
==
"
stft
"
:
return
compute_stft
(
signal
,
sr
,
hop_length
,
complex
=
False
)
elif
feature
.
lower
()
==
"
stft_complex
"
:
return
compute_stft
(
signal
,
sr
,
hop_length
,
complex
=
True
)
else
:
raise
err
.
InvalidArgumentValueException
(
f
"
Unknown signal representation:
{
feature
}
.
"
)
def
get_default_frequency_dimension
(
feature
):
if
feature
.
lower
()
==
"
pcp
"
:
return
12
elif
feature
.
lower
()
==
"
cqt
"
:
return
84
elif
"
mel
"
in
feature
.
lower
():
return
80
elif
feature
.
lower
()
==
"
stft
"
or
feature
.
lower
()
==
"
stft_complex
"
:
return
1025
else
:
raise
err
.
InvalidArgumentValueException
(
f
"
Unknown signal representation:
{
feature
}
.
"
)
def
compute_pcp
(
signal
,
sr
,
hop_length
,
fmin
):
norm
=
inf
# Columns normalization
win_len_smooth
=
82
# Size of the smoothign window
n_octaves
=
6
bins_per_chroma
=
3
bins_per_octave
=
bins_per_chroma
*
12
return
librosa
.
feature
.
chroma_cens
(
y
=
signal
,
sr
=
sr
,
hop_length
=
hop_length
,
fmin
=
fmin
,
n_chroma
=
12
,
n_octaves
=
n_octaves
,
bins_per_octave
=
bins_per_octave
,
norm
=
norm
,
win_len_smooth
=
win_len_smooth
)
def
compute_cqt
(
signal
,
sr
,
hop_length
):
constant_q_transf
=
librosa
.
cqt
(
y
=
signal
,
sr
=
sr
,
hop_length
=
hop_length
)
return
np
.
abs
(
constant_q_transf
)
def
compute_mel_spectrogram
(
signal
,
sr
,
hop_length
):
mel
=
librosa
.
feature
.
melspectrogram
(
y
=
signal
,
sr
=
sr
,
n_fft
=
2048
,
hop_length
=
hop_length
,
n_mels
=
80
,
fmin
=
80.0
,
fmax
=
16000
,
power
=
mel_power
)
return
np
.
abs
(
mel
)
def
get_log_mel_from_mel
(
mel_spectrogram
,
feature
):
"""
Computes a variant of a Mel spectrogram (typically Log Mel).
Parameters
----------
mel_spectrogram : numpy array
Mel spectrogram of the signal.
feature : string
Desired feature name (must be a variant of a Mel spectrogram).
Raises
------
err.InvalidArgumentValueException
Raised in case of unknown feature name.
Returns
-------
numpy array
Variant of the Mel spectrogram of the signal.
"""
if
feature
==
"
log_mel
"
:
return
librosa
.
power_to_db
(
np
.
abs
(
mel_spectrogram
),
ref
=
1
)
elif
feature
==
"
nn_log_mel
"
:
mel_plus_one
=
np
.
abs
(
mel_spectrogram
)
+
np
.
ones
(
mel_spectrogram
.
shape
)
nn_log_mel
=
librosa
.
power_to_db
(
mel_plus_one
,
ref
=
1
)
return
nn_log_mel
elif
feature
==
"
padded_log_mel
"
:
log_mel
=
get_log_mel_from_mel
(
mel_spectrogram
,
"
log_mel
"
)
return
log_mel
-
np
.
amin
(
log_mel
)
*
np
.
ones
(
log_mel
.
shape
)
elif
feature
==
"
minmax_log_mel
"
:
padded_log_mel
=
get_log_mel_from_mel
(
mel_spectrogram
,
"
padded_log_mel
"
)
return
np
.
divide
(
padded_log_mel
,
np
.
amax
(
padded_log_mel
))
else
:
raise
err
.
InvalidArgumentValueException
(
"
Unknown feature representation.
"
)
def
compute_stft
(
signal
,
sr
,
hop_length
,
complex
):
stft
=
librosa
.
stft
(
y
=
signal
,
hop_length
=
hop_length
,
n_fft
=
2048
)
if
complex
:
mag
,
phase
=
librosa
.
magphase
(
stft
,
power
=
1
)
return
mag
,
phase
else
:
return
np
.
abs
(
stft
)
def
get_stft_from_mel
(
mel_spectrogram
,
feature
,
sr
):
if
feature
==
"
mel
"
:
return
librosa
.
feature
.
inverse
.
mel_to_stft
(
M
=
mel_spectrogram
,
sr
=
sr
,
n_fft
=
2048
,
power
=
mel_power
,
fmin
=
80.0
,
fmax
=
16000
)
elif
feature
==
"
log_mel
"
:
mel
=
librosa
.
db_to_power
(
S_db
=
mel_spectrogram
,
ref
=
1
)
return
get_stft_from_mel
(
mel
,
"
mel
"
,
sr
=
sr
)
elif
feature
==
"
nn_log_mel
"
:
mel
=
librosa
.
db_to_power
(
S_db
=
mel_spectrogram
,
ref
=
1
)
-
np
.
ones
(
mel_spectrogram
.
shape
)
return
get_stft_from_mel
(
mel
,
"
mel
"
,
sr
=
sr
)
else
:
raise
err
.
InvalidArgumentValueException
(
"
Unknown feature representation.
"
)
from
base_audio.signal_to_spectrogram
import
*
\ No newline at end of file
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment