Skip to content
Snippets Groups Projects
Commit 8f66961a authored by Konstantin Gerd Eyhorn's avatar Konstantin Gerd Eyhorn
Browse files

add dataImport to pandas script

parent bc3e691b
No related branches found
No related tags found
No related merge requests found
import pandas as pd
import numpy as np
import os
# S_features = ["abs_S_Smin","rel_S_Smin_semi_width","rel_S_Smin_full_width","abs_S_Smax","rel_S_Smax_semi_width","rel_S_Smax_full_width","count_anomalies_S","ratio_anomalies_S","max_variation_S"]
# T_features = ["abs_T_Tmin","rel_T_Tmin_semi_width","rel_T_Tmin_full_width","abs_T_Tmax","rel_T_Tmax_semi_width","rel_T_Tmax_full_width","count_anomalies_T","ratio_anomalies_T","max_variation_T"]
# B_features = ["mean_correlation","nb_measurements"]
columns_all = [
"abs_S_Smin",
"rel_S_Smin_semi_width",
"rel_S_Smin_full_width",
"abs_S_Smax",
"rel_S_Smax_semi_width",
"rel_S_Smax_full_width",
"count_anomalies_S",
"ratio_anomalies_S",
"max_variation_S",
"abs_T_Tmin",
"rel_T_Tmin_semi_width",
"rel_T_Tmin_full_width",
"abs_T_Tmax",
"rel_T_Tmax_semi_width",
"rel_T_Tmax_full_width",
"count_anomalies_T",
"ratio_anomalies_T",
"max_variation_T",
"mean_correlation",
"nb_measurements",
]
def import_data():
print("Importing data...")
salinity_data = []
temperature_data = []
for root, subfolder, files in os.walk("dataset_custom/dataset_stats/"):
for file in files:
id = file.strip(".npy")
data = np.load(os.path.join(root, file))
data = pd.Series(data, index=columns_all)
data["id"] = id
if root.endswith("salinity/false_alarm"):
data["alarm"] = False
salinity_data.append(data)
elif root.endswith("salinity/true_alarm"):
data["alarm"] = True
salinity_data.append(data)
elif root.endswith("temperature/false_alarm"):
data["alarm"] = False
temperature_data.append(data)
elif root.endswith("temperature/true_alarm"):
data["alarm"] = True
temperature_data.append(data)
# convert to dataframe
df_salinity = pd.DataFrame(salinity_data)
df_temperature = pd.DataFrame(temperature_data)
# set id as index
df_salinity.set_index("id", inplace=True)
df_temperature.set_index("id", inplace=True)
print(df_salinity)
print(df_temperature)
# save as pickle
df_salinity.to_pickle("dataset_pandas/salinity.pkl")
df_temperature.to_pickle("dataset_pandas/temperature.pkl")
if __name__ == "__main__":
import_data()
File added
File added
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment