Skip to content
Snippets Groups Projects
Commit 4eca4b1f authored by MOREAU Yanice, Maurice, Gerard's avatar MOREAU Yanice, Maurice, Gerard
Browse files

added experiment for data scores in the main

parent 77c83668
No related branches found
No related tags found
No related merge requests found
# ML Project - Operator Decision
This repository is an implementation of a Multi Layer Perceptron for the classification of alarms raised by the statistical model implemented by PokaPok association for the monitoring of the state of the ocean.
To set the environment for training and running the model, install the requirements using [environment.yaml]()
The repository is made of the following folders:
* [dataset_pandas](): Resulting datasets from feature engineering of the profiler's data
......@@ -27,6 +27,7 @@ To **run** the model, the standalone script ``main.py`` is available. In the [ma
1. ``--run single``: to run a single training of the model. This is useful to investigate the accuracy and loss evolution plots, as well as the confusion matrix for different setups.
2. ``--run multiple``: to run the model for a fix model seed and different data splits. This is used to investigate the robustness of the model.
3. ``--run generate_scores``: to generate the statistics about the data, that are stored in the logs and can then be plotted using the file bat_profile_plots.py
Two notebooks are provided to help gain information about the datasets:
......
......@@ -440,19 +440,6 @@ def sort_testdata_into_cm(test_df, y_test_pred, y_test_pred_binary):
return results
def increment_dict_key(d, key):
"""
Increment the value of a key in a dictionary by one if it exists.
Otherwise, create the key with a value of one.
Parameters:
d (dict): The dictionary to update.
key: The key to increment or create.
"""
if key in d.keys():
d[key] += 1
else:
d[key] = 1
if __name__ == "__main__":
false_positives = {}
......@@ -510,11 +497,3 @@ if __name__ == "__main__":
json.dump(false_positives, json_file)
with open('./logs/false_negatives_2_v2.json', 'w') as json_file:
json.dump(false_negatives, json_file)
\ No newline at end of file
# TEST LOGS
## Dataset_2_v1
# 100 runs in a row and check the FP,FN
# 55 FN / 11.9 FP : avg on the test set
# Few regular FN but a lot for FP
\ No newline at end of file
......@@ -8,6 +8,7 @@ import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix
import argparse
import json
args = argparse.ArgumentParser()
args.add_argument("--run", type=str, default="single")
......@@ -106,9 +107,69 @@ def multiple_runs():
CSV_NAME, data_seed, model_seed, accuracy, recall, precision, f1, f2
)
def generate_scores():
"""
This function generates a json file counting how many times
each profile was classified wrong over N_RUNS runs
"""
# Parameters
N_RUNS = 1
DATASET_NAME = "2_v2" # For the saved JSON names, you should change the actual dataset in mlp.py
def increment_dict_key(d, key):
if key in d.keys():
d[key] += 1
else:
d[key] = 1
false_positives = {}
false_negatives = {}
# Trainings loop
for seed in range(N_RUNS):
# Instantiate the DataLoader with the desired parameters
dl = DataLoader(
variant="v1",
features="both",
batch_size=32,
seed=seed,
rebalance_train=True,
rebalance_test=True, ## Gives you a better idea of the model's performance
)
# Instantiate the MLPWrapper with the desired parameters
mlpw = m.MLPWrapper(
input_features=dl.num_features,
growth_rate=16,
train_loader=dl.train_loader,
test_loader=dl.test_loader,
learning_rate=1e-4,
model_seed=seed,
device="cpu",
)
mlpw.train(epochs=20)
y_test_pred, y_test_pred_binary = m.get_evaluation(mlpw.model, dl.X_test)
results = m.sort_testdata_into_cm(dl.test_df, y_test_pred, y_test_pred_binary)
for index, row in results.iterrows():
if row["CM"] == "FP":
increment_dict_key(false_positives, index)
if row["CM"] == "FN":
increment_dict_key(false_negatives, index)
# Save results to JSON files
with open(f'./logs/false_positives_{DATASET_NAME}.json', 'w') as json_file:
json.dump(false_positives, json_file)
with open(f'./logs/false_negatives_{DATASET_NAME}.json', 'w') as json_file:
json.dump(false_negatives, json_file)
if __name__ == "__main__":
if args.run == "single":
run_and_plot_distributions()
elif args.run == "multiple":
multiple_runs()
elif args.run == "generate_scores":
generate_scores()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment