Select Git revision
postprocess.py
postprocess.py 6.31 KiB
import os,sys
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from analysis import subset_probas
import plotly.graph_objects as go
fewlabels = ['Applause',
'Bird vocalization, bird call, bird song',
'Caw',
'Bee, wasp, etc.',
'Wind noise (microphone)','Rain','Vehicle','Silence']
def average_proba_over_freq(Df,freq_str='D',subset_labels=fewlabels):
"""
Calculates probability density estimates over a configurable frequency
arguments :
Df : DataFrame, output of tag_silentcities
freq_str : Frequency over which to calculate probability density estimate (default : days)
subset_labels : Subset of labels from the Audioset Ontology to be used for the estimate.
default labels are :
'Applause','Bird vocalization, bird call, bird song','Chirp, tweet','Pigeon, dove',
'Caw','Bee, wasp, etc.','Wind noise (microphone)','Rain','Vehicle','Emergency vehicle','Rail transport',
'Aircraft','Silence'
outputs :
probas_agg : Probability Density estimates of the subset of labels calculate according to the frequency specified
"""
#Let's use the datetime (Timestamp) as a new index for the dataframe
ts = pd.DatetimeIndex(Df.datetime)
Df.index = ts
# Let's add the Labels from the shortlist as entries in the Df. Will be easier to manipulate them afterwards
prob = subset_probas(Df,subset_labels)
for f,curlabel in enumerate(subset_labels):
Df[curlabel] = prob[:,f]
# Now let's create a period range to easily compute statistics over days (frequency can be changed by changing the freq_str argument)
prng = pd.period_range(start=ts[0],end=ts[-1], freq=freq_str).astype('datetime64[ns]')
# And now create the final DataFrame that averages probabilities (of labels subset_labels) with the frequency defined in freq_str
allser = dict()
for lab in fewlabels:
curser = pd.Series([Df[prng[i]:prng[i+1]][lab].mean() for i in range(len(prng)-1)],index=prng[:-1])
allser[lab] = curser
probas_agg = pd.DataFrame(allser)
return probas_agg
def make_interactive_pdf(Df,list_resolutions = ['0.25H','H','3H','6H','12H','D'],active_beg = 4,subset_labels=fewlabels):
fig = go.Figure()
## which resolution is active when starting
### loop on resolution
for cur_res in list_resolutions:
probas_agg = average_proba_over_freq(Df,freq_str=cur_res,subset_labels=fewlabels)
datelabel = probas_agg.index
# Create figure
for curcol in probas_agg.columns:
fig.add_trace(go.Scatter(x=datelabel, y=probas_agg[curcol],name=curcol,visible=False))
nbcol = len(probas_agg.columns)
# Make one resolution visible trace visible
for curcol,_ in enumerate(probas_agg.columns):
fig.data[active_beg*nbcol+curcol].visible = True
# Create and add slider
steps = []
for i in range(len(list_resolutions)):
step = dict(
method="restyle",
label=list_resolutions[i],
args=["visible", [False] * len(fig.data)],
)
for curcol,_ in enumerate(probas_agg.columns):
step["args"][1][i*nbcol+curcol] = True # Toggle trace to "visible"
steps.append(step)
sliders = [dict(
active=active_beg*nbcol,
pad={"t": len(list_resolutions)},
#currentvalue={"prefix": "Resolution: "},
steps=steps
)]
fig.update_layout(
sliders=sliders
)
return fig
def average_eco_acoustics_over_freq(Df,freq_str='D'):
"""
Calculates ecoacoustics metrics averages over a configurable frequency
arguments :
Df : DataFrame, output of tag_silentcities
freq_str : Frequency over which to calculate the ecoacoustics metrics (default : days)
outputs :
Acoustics_agg : Average ecoacoustics metrics calculated according to the frequency specified
"""
#Let's use the datetime (Timestamp) as a new index for the dataframe
ts = pd.DatetimeIndex(Df.datetime)
Df.index = ts
# Now let's create a period range to easily compute statistics over days (frequency can be changed by changing the freq_str argument)
prng = pd.period_range(start=ts[0],end=ts[-1], freq=freq_str).astype('datetime64[ns]')
# And now create the final DataFrame that averages probabilities (of labels subset_labels) with the frequency defined in freq_str
allser = dict()
for metric in ['nbpeaks','ndsi','aci']:
Df[metric] = (Df[metric]-Df[metric].mean())/Df[metric].std()
curser = pd.Series([Df[metric][prng[i]:prng[i+1]].mean() for i in range(len(prng)-1)],index=prng[:-1])
allser[metric] = curser
acoustics_agg = pd.DataFrame(allser)
return acoustics_agg
def make_interactive_eco_acoustics(Df,list_resolutions = ['0.25H','H','3H','6H','12H','D'],active_beg = 4):
fig = go.Figure()
## which resolution is active when starting
### loop on resolution
for cur_res in list_resolutions:
acoustics_agg = average_eco_acoustics_over_freq(Df,freq_str=cur_res)
datelabel = acoustics_agg.index
# Create figure
for curcol in acoustics_agg.columns:
fig.add_trace(go.Scatter(x=datelabel, y=acoustics_agg[curcol],name=curcol,visible=False))
nbcol = len(acoustics_agg.columns)
# Make one resolution visible trace visible
for curcol,_ in enumerate(acoustics_agg.columns):
fig.data[active_beg*nbcol+curcol].visible = True
# Create and add slider
steps = []
for i in range(len(list_resolutions)):
step = dict(
method="restyle",
label=list_resolutions[i],
args=["visible", [False] * len(fig.data)],
)
for curcol,_ in enumerate(acoustics_agg.columns):
step["args"][1][i*nbcol+curcol] = True # Toggle trace to "visible"
steps.append(step)
sliders = [dict(
active=active_beg*nbcol,
pad={"t": len(list_resolutions)},
#currentvalue={"prefix": "Resolution: "},
steps=steps
)]
fig.update_layout(
sliders=sliders
)
return fig
if __name__ == "__main__":
Df = pd.read_pickle('output_0048_003.xz')
fig = make_interactive_pdf(Df)
fig.write_html("myfig_48_03.html")
fig.show()