Skip to content

Commit

Permalink
cleanup
Browse files Browse the repository at this point in the history
  • Loading branch information
Temmuz Celik committed Apr 29, 2024
1 parent 2258f23 commit 18d60a2
Show file tree
Hide file tree
Showing 4 changed files with 5 additions and 6 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ python get_clusters.py [args]
python find_patches.py [args]
```

TODO we attach the `DECATED_INDICES_PATH` and `DECAYED_SAMPLES_DICT_NN_PATH` files for the CC3M dataset (as of 2023) in a zip file as a Github release in this repo.
We attach the `DECAYED_INDICES_PATH` and `DECAYED_SAMPLES_DICT_NN_PATH` files for the CC3M dataset (as of 2023) in a zip file as a Github release in this repo.


Note:
Expand Down
5 changes: 3 additions & 2 deletions config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,14 @@ dataset_embeddings_path: "/data/projects/data-decay-repr/embeddings2/text_embedd
cluster_count: 100

# Clusters save folder
clusters_folder: "/data/projects/data-decay-repr/clusters/"
#clusters_folder: "/data/projects/data-decay-repr/clusters/"
clusters_folder: /data/projects/data-decay/cc3m/script_tests/clusters/

# Use torch kmeans instead of sklearn kmeans
use_torch_kmeans: True

# Location of decayed indices
decayed_indices_path: "/data/projects/data-decay-repr/combined_decayed_indices.txt"
decayed_indices_path: "/data/projects/data-decay-repr/cc3m_decayed_indices.txt"

# Only need to recalculate decayed samples dictionary if decayed indices or the nearby_sample_count are updated
decayed_dict_calculate: True
Expand Down
2 changes: 0 additions & 2 deletions find_patches.py
Original file line number Diff line number Diff line change
Expand Up @@ -416,8 +416,6 @@ def main(args):
p = argparse.ArgumentParser()
p.add_argument("--verbose", type=bool, default=True, help="Whether to print landmark actions and results")
p.add_argument("--cuda_device", type=int, default=0, help="Cuda device to use")
p.add_argument("--verbose", type=bool, default=DEFAULT_CONFIG.verbose, help="Whether to print landmark actions and results")
p.add_argument("--cuda_device", type=int, default=DEFAULT_CONFIG.cuda_device, help="Cuda device to use")
p.add_argument("--captions_urls_path", type=str, default=DEFAULT_CONFIG.captions_urls_path, help="Location of the captions and urls")
p.add_argument("--model_name", type=str, default=DEFAULT_CONFIG.model_name, help="Model to use for the embeddings")
p.add_argument("--step_size", type=int, default=DEFAULT_CONFIG.step_size, help="Step size for calculating embeddings")
Expand Down
2 changes: 1 addition & 1 deletion get_clusters.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def main(args):
if args.use_torch_kmeans:
from kmean_torch import kmeans_core
print("Starting torch k-means")
km = kmeans_core(k=args.cluster_count, data_array=embeddings, batch_size=256 * 16, epochs=1, all_cuda=True, verbose=args.verbose)
km = kmeans_core(k=args.cluster_count, data_array=embeddings, batch_size=256 * 16, epochs=10, all_cuda=True, verbose=args.verbose)
km.run()
cluster_centers = km.cent.cpu().numpy()
else:
Expand Down

0 comments on commit 18d60a2

Please sign in to comment.