diff --git a/clustering/agg_clustering.py b/clustering/agg_clustering.py index b8a25e2de281bcc70e242b44dcf3e108f9d37890..6bda725901e758f73e457584c7539b35efad9e35 100644 --- a/clustering/agg_clustering.py +++ b/clustering/agg_clustering.py @@ -1,3 +1,4 @@ +from sklearn.cluster import AgglomerativeClustering def agglomerative_clustering(reduced_embeddings, n_clusters): """ Applique l'Agglomerative Clustering avec un nombre fixe de clusters. diff --git a/clustering/mesures_clustering.py b/clustering/mesures_clustering.py index af1ffdf76328c289e7b40c06d2b9f489c73ef130..9c6c2e749218b218332db86606c90dd49e73145c 100644 --- a/clustering/mesures_clustering.py +++ b/clustering/mesures_clustering.py @@ -1,3 +1,4 @@ +from sklearn.metrics import silhouette_score def compute_silhouette_scores(reduced_embeddings, labels): """ Calcule les scores de silhouette pour différents nombres de clusters. diff --git a/projet_imt.py b/projet_imt.py index 2b34264808cb1c0be5ccbb0a0d2ccb0271f89beb..b31b5d8f56079737c95f8ae9e9587564f2740b3a 100644 --- a/projet_imt.py +++ b/projet_imt.py @@ -8,18 +8,14 @@ from utils.reduction_dimesion import * import pandas as pd import numpy as np -import matplotlib.pyplot as plt -import ast import umap -from sklearn.cluster import KMeans from sklearn.metrics import silhouette_score from sklearn.cluster import AgglomerativeClustering -from sklearn.metrics import silhouette_score def main(): # lecture du fichier csv - df = load ..... + df = pd.read_csv("/Users/mac/Desktop/topic_modeling/df_user_messages_new.csv") # reduction des dimensions reduced_embeddings = reduce_embeddings(df, n_components=50, random_state=42) # clustering @@ -29,6 +25,9 @@ def main(): # afficher les resultats messages_par_cluster= afficher_messages_par_cluster(df,labels) + return messages_par_cluster + if __name__ == "__main__": - main() + messages_par_cluster = main() + print(messages_par_cluster) \ No newline at end of file