diff --git a/projet_imt.py b/projet_imt.py index 0420e28edb7f88a2b8110ee9ee7cd81a036ec459..dc81f060ced20ab1402629df7dc833291ff5b8c3 100644 --- a/projet_imt.py +++ b/projet_imt.py @@ -4,15 +4,21 @@ from clustering.agg_clustering import * from clustering.mesures_clustering import * from results.affichage_messages_par_cluster import * from utils.reduction_dimesion import * +from utils.reduction_dimension_2d import * +from results.visualize_clusters import * def main(): # lecture du fichier csv df = pd.read_csv("/Users/mac/Desktop/topic_modeling/df_user_messages_new.csv") # reduction des dimensions - reduced_embeddings = reduce_embeddings(df, n_components=50, random_state=42) + reduced_embeddings = reduce_embeddings(df, n_components=100, random_state=42) + # rediction des dimension en 2d pour la visualisation + reduced_embeddings_2d = reduce_embeddings_2d(reduced_embeddings, n_components=2, random_state=42) # clustering - labels = agglomerative_clustering(reduced_embeddings, n_clusters=14) + cluster_labels = agglomerative_clustering(reduced_embeddings, n_clusters=11) + # visualisation des clusters en 2D + visualize_clusters(reduced_embeddings_2d, cluster_labels, title="Clusters visualisés en 2D") # metriques de clustering silhouette_score = compute_silhouette_scores(reduced_embeddings, labels) # afficher les resultats