From 4a16227c4fcac8e478d6a30c1777de20c51f72b2 Mon Sep 17 00:00:00 2001 From: MAFTOUH Mohammed Amine <mohammed-amine.maftouh@imt-atlantique.net> Date: Sun, 30 Mar 2025 15:22:59 +0000 Subject: [PATCH] Edit mesures_clustering.py --- clustering/mesures_clustering.py | 33 +++++++++++++++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) diff --git a/clustering/mesures_clustering.py b/clustering/mesures_clustering.py index 9c6c2e7..4f17e70 100644 --- a/clustering/mesures_clustering.py +++ b/clustering/mesures_clustering.py @@ -1,4 +1,5 @@ from sklearn.metrics import silhouette_score + def compute_silhouette_scores(reduced_embeddings, labels): """ Calcule les scores de silhouette pour différents nombres de clusters. @@ -8,4 +9,34 @@ def compute_silhouette_scores(reduced_embeddings, labels): :return: silhouette score """ silhouette_avg = silhouette_score(reduced_embeddings, labels) - return silhouette_avg \ No newline at end of file + return silhouette_avg + + +def compute_dunn_index(reduced_embeddings, labels): + """ + Calcule l'index de Dunn pour évaluer la qualité du clustering. + + :param reduced_embeddings: Matrice des embeddings réduits + :param labels: Labels prédits par les algorithmes de clustering + :return: Index de Dunn + """ + unique_labels = np.unique(labels) + # calcul des distances intra-cluster + intra_cluster_distances = [] + for label in unique_labels: + cluster_points = reduced_embeddings[labels == label] + if len(cluster_points) > 1: + distances = pdist(cluster_points) + intra_cluster_distances.append(np.max(distances)) + #calcul des distances inter-cluster + inter_cluster_distances = [] + for i, label_i in enumerate(unique_labels): + for j, label_j in enumerate(unique_labels): + if i < j: + points_i = reduced_embeddings[labels == label_i] + points_j = reduced_embeddings[labels == label_j] + distances = pdist(np.vstack([points_i, points_j])) + inter_cluster_distances.append(np.min(distances)) + #calcul de l'index de Dunn + dunn_index = np.min(inter_cluster_distances) / np.max(intra_cluster_distances) + return dunn_index \ No newline at end of file -- GitLab