Skip to content
Snippets Groups Projects
Commit d2a918bc authored by MAFTOUH Mohammed Amine's avatar MAFTOUH Mohammed Amine
Browse files

Update 3 files

- /projet_imt.py
- /clustering/mesures_clustering.py
- /clustering/agg_clustering.py
parent a6c44be2
No related branches found
No related tags found
No related merge requests found
from sklearn.cluster import AgglomerativeClustering
def agglomerative_clustering(reduced_embeddings, n_clusters): def agglomerative_clustering(reduced_embeddings, n_clusters):
""" """
Applique l'Agglomerative Clustering avec un nombre fixe de clusters. Applique l'Agglomerative Clustering avec un nombre fixe de clusters.
......
from sklearn.metrics import silhouette_score
def compute_silhouette_scores(reduced_embeddings, labels): def compute_silhouette_scores(reduced_embeddings, labels):
""" """
Calcule les scores de silhouette pour différents nombres de clusters. Calcule les scores de silhouette pour différents nombres de clusters.
......
...@@ -8,18 +8,14 @@ from utils.reduction_dimesion import * ...@@ -8,18 +8,14 @@ from utils.reduction_dimesion import *
import pandas as pd import pandas as pd
import numpy as np import numpy as np
import matplotlib.pyplot as plt
import ast
import umap import umap
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score from sklearn.metrics import silhouette_score
from sklearn.cluster import AgglomerativeClustering from sklearn.cluster import AgglomerativeClustering
from sklearn.metrics import silhouette_score
def main(): def main():
# lecture du fichier csv # lecture du fichier csv
df = load ..... df = pd.read_csv("/Users/mac/Desktop/topic_modeling/df_user_messages_new.csv")
# reduction des dimensions # reduction des dimensions
reduced_embeddings = reduce_embeddings(df, n_components=50, random_state=42) reduced_embeddings = reduce_embeddings(df, n_components=50, random_state=42)
# clustering # clustering
...@@ -29,6 +25,9 @@ def main(): ...@@ -29,6 +25,9 @@ def main():
# afficher les resultats # afficher les resultats
messages_par_cluster= afficher_messages_par_cluster(df,labels) messages_par_cluster= afficher_messages_par_cluster(df,labels)
return messages_par_cluster
if __name__ == "__main__": if __name__ == "__main__":
main() messages_par_cluster = main()
print(messages_par_cluster)
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment