fix: correct handling of single cluster

mbari-org · Jul 31, 2024 · f6ede19 · f6ede19
1 parent 638254b
commit f6ede19
Showing 1 changed file with 7 additions and 7 deletions.
diff --git a/sdcat/cluster/cluster.py b/sdcat/cluster/cluster.py
@@ -124,13 +124,6 @@ def _run_hdbscan_assign(
     unique_clusters.sort()
     info(f"Number of clusters including unassigned -1 cluster: {len(unique_clusters)}")
 
-    cluster_df['score'] = scan.probabilities_
-    # Get the index of the highest scores for each unique cluster sorted in increasing order
-    # and use this as a representative image for the cluster
-    max_scores = cluster_df.sort_values('cluster', ascending=True).groupby('cluster')['score'].idxmax()
-    # Remove the last index which is the -1 cluster
-    max_scores = max_scores[:-1]
-
     # If all the clusters are unassigned, then use all the samples as exemplars,
     # and assign them to the unknown cluster. If embedding is empty, this is also the case (failed to extract embeddings)
     if len(unique_clusters) == 1 and unique_clusters[0] == -1:
@@ -144,6 +137,13 @@ def _run_hdbscan_assign(
         coverage = 0.0
         return avg_sim_scores, exemplar_df, clusters, cluster_means, coverage
 
+    cluster_df['score'] = scan.probabilities_
+    # Get the index of the highest scores for each unique cluster sorted in increasing order
+    # and use this as a representative image for the cluster
+    max_scores = cluster_df.sort_values('cluster', ascending=True).groupby('cluster')['score'].idxmax()
+    # Remove the last index which is the -1 cluster
+    max_scores = max_scores[:-1]
+
     # Get the representative embeddings for the max scoring examplars for each cluster and store them in a numpy array
     exemplar_emb = [image_emb[i] for i in max_scores]
     exemplar_emb = np.array(exemplar_emb)