diff --git a/projet_imt/.gitkeep b/clustering/__init__.py similarity index 100% rename from projet_imt/.gitkeep rename to clustering/__init__.py diff --git a/projet_imt.py b/projet_imt.py index 08e91502c3a9aa6d05d37aa81604b0739b924e46..4397e268d0d5bf835db66fc83d0f81ec1c4468fe 100644 --- a/projet_imt.py +++ b/projet_imt.py @@ -1,5 +1,11 @@ # imports +from clustering.agg_clustering import * +from clustering.mesures_clustering import * +from results.affichage_messages_par_cluster import * +from utils.reduction_dimesion import * + + import pandas as pd import numpy as np import matplotlib.pyplot as plt @@ -11,4 +17,14 @@ from sklearn.cluster import AgglomerativeClustering from sklearn.metrics import silhouette_score -def main(): \ No newline at end of file +def main(): + # lecture du fichier csv + df = load ..... + # reduction des dimensions + reduced_embeddings = reduce_embeddings(df, n_components=50, random_state=42) + # clustering + labels = agglomerative_clustering(reduced_embeddings, n_clusters=14) + # metriques de clustering + silhouette_score = compute_silhouette_scores(reduced_embeddings, labels) + # afficher les resultats + messages_par_cluster= afficher_messages_par_cluster(df,labels) diff --git a/results/__init__.py b/results/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/utils/__init__.py b/utils/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391