cleanup

ethz-spylab · Apr 29, 2024 · 18d60a2 · 18d60a2
1 parent 2258f23
commit 18d60a2
Show file tree

Hide file tree

Showing 4 changed files with 5 additions and 6 deletions.
diff --git a/README.md b/README.md
@@ -44,7 +44,7 @@ python get_clusters.py  [args]
 python find_patches.py  [args]
 ```
 
-TODO we attach the `DECATED_INDICES_PATH` and `DECAYED_SAMPLES_DICT_NN_PATH` files for the CC3M dataset (as of 2023) in a zip file as a Github release in this repo.
+We attach the `DECAYED_INDICES_PATH` and `DECAYED_SAMPLES_DICT_NN_PATH` files for the CC3M dataset (as of 2023) in a zip file as a Github release in this repo.
 
 
 Note:

diff --git a/config.yml b/config.yml
@@ -16,13 +16,14 @@ dataset_embeddings_path: "/data/projects/data-decay-repr/embeddings2/text_embedd
 cluster_count: 100
 
 # Clusters save folder
-clusters_folder: "/data/projects/data-decay-repr/clusters/"
+#clusters_folder: "/data/projects/data-decay-repr/clusters/"
+clusters_folder: /data/projects/data-decay/cc3m/script_tests/clusters/
 
 # Use torch kmeans instead of sklearn kmeans
 use_torch_kmeans: True
 
 # Location of decayed indices
-decayed_indices_path: "/data/projects/data-decay-repr/combined_decayed_indices.txt"
+decayed_indices_path: "/data/projects/data-decay-repr/cc3m_decayed_indices.txt"
 
 # Only need to recalculate decayed samples dictionary if decayed indices or the nearby_sample_count are updated 
 decayed_dict_calculate: True

diff --git a/find_patches.py b/find_patches.py
@@ -416,8 +416,6 @@ def main(args):
     p = argparse.ArgumentParser()
     p.add_argument("--verbose", type=bool, default=True, help="Whether to print landmark actions and results")
     p.add_argument("--cuda_device", type=int, default=0, help="Cuda device to use")
-    p.add_argument("--verbose", type=bool, default=DEFAULT_CONFIG.verbose, help="Whether to print landmark actions and results")
-    p.add_argument("--cuda_device", type=int, default=DEFAULT_CONFIG.cuda_device, help="Cuda device to use")
     p.add_argument("--captions_urls_path", type=str, default=DEFAULT_CONFIG.captions_urls_path, help="Location of the captions and urls")
     p.add_argument("--model_name", type=str, default=DEFAULT_CONFIG.model_name, help="Model to use for the embeddings")
     p.add_argument("--step_size", type=int, default=DEFAULT_CONFIG.step_size, help="Step size for calculating embeddings")

diff --git a/get_clusters.py b/get_clusters.py
@@ -27,7 +27,7 @@ def main(args):
     if args.use_torch_kmeans:
         from kmean_torch import kmeans_core
         print("Starting torch k-means")
-        km = kmeans_core(k=args.cluster_count, data_array=embeddings, batch_size=256 * 16, epochs=1, all_cuda=True, verbose=args.verbose)
+        km = kmeans_core(k=args.cluster_count, data_array=embeddings, batch_size=256 * 16, epochs=10, all_cuda=True, verbose=args.verbose)
         km.run()
         cluster_centers = km.cent.cpu().numpy()
     else: