From d2a918bc5bb3b6e3b2a625df4478f8aba9ee8046 Mon Sep 17 00:00:00 2001 From: MAFTOUH Mohammed Amine <mohammed-amine.maftouh@imt-atlantique.net> Date: Wed, 5 Mar 2025 12:16:39 +0000 Subject: [PATCH] Update 3 files - /projet_imt.py - /clustering/mesures_clustering.py - /clustering/agg_clustering.py --- clustering/agg_clustering.py | 1 + clustering/mesures_clustering.py | 1 + projet_imt.py | 11 +++++------ 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/clustering/agg_clustering.py b/clustering/agg_clustering.py index b8a25e2..6bda725 100644 --- a/clustering/agg_clustering.py +++ b/clustering/agg_clustering.py @@ -1,3 +1,4 @@ +from sklearn.cluster import AgglomerativeClustering def agglomerative_clustering(reduced_embeddings, n_clusters): """ Applique l'Agglomerative Clustering avec un nombre fixe de clusters. diff --git a/clustering/mesures_clustering.py b/clustering/mesures_clustering.py index af1ffdf..9c6c2e7 100644 --- a/clustering/mesures_clustering.py +++ b/clustering/mesures_clustering.py @@ -1,3 +1,4 @@ +from sklearn.metrics import silhouette_score def compute_silhouette_scores(reduced_embeddings, labels): """ Calcule les scores de silhouette pour différents nombres de clusters. diff --git a/projet_imt.py b/projet_imt.py index 2b34264..b31b5d8 100644 --- a/projet_imt.py +++ b/projet_imt.py @@ -8,18 +8,14 @@ from utils.reduction_dimesion import * import pandas as pd import numpy as np -import matplotlib.pyplot as plt -import ast import umap -from sklearn.cluster import KMeans from sklearn.metrics import silhouette_score from sklearn.cluster import AgglomerativeClustering -from sklearn.metrics import silhouette_score def main(): # lecture du fichier csv - df = load ..... + df = pd.read_csv("/Users/mac/Desktop/topic_modeling/df_user_messages_new.csv") # reduction des dimensions reduced_embeddings = reduce_embeddings(df, n_components=50, random_state=42) # clustering @@ -29,6 +25,9 @@ def main(): # afficher les resultats messages_par_cluster= afficher_messages_par_cluster(df,labels) + return messages_par_cluster + if __name__ == "__main__": - main() + messages_par_cluster = main() + print(messages_par_cluster) \ No newline at end of file -- GitLab