From ce4102a552b9482748e881c4edb92d0a46f8bc75 Mon Sep 17 00:00:00 2001 From: piotrj Date: Sun, 19 May 2024 18:16:17 +0200 Subject: [PATCH] rotations hashing and pruning fixed command line parameters for image matching --- src/console.py | 19 +++++++++++++++++++ src/core.py | 12 +++++++----- src/dude.py | 25 +++++++++++++------------ 3 files changed, 39 insertions(+), 17 deletions(-) diff --git a/src/console.py b/src/console.py index 5492eb1..434fbeb 100644 --- a/src/console.py +++ b/src/console.py @@ -66,6 +66,11 @@ def parse_args(ver): c_help='do not run the gui. run the scan and save the result to the specified csv file. Implies -nh' if os.name=='nt' else 'do not run the gui. run the scan and save the result to the specified csv file.' run_mode_group.add_argument('-c','--csv' ,nargs=1,help=c_help) + parser.add_argument('-i','--images' ,action='store_true',help='Images similarity mode') + parser.add_argument('-ih' ,nargs=1,help='Images similarity mode hash size',choices=('4','6','8','10','12','14','16','18','20','22','24','26','28','30','32'),default='6') + parser.add_argument('-id' ,nargs=1,help='Images similarity mode divergence value',choices=('0','1','2','3','4','5','6','7','8','9'),default='5' ) + parser.add_argument('-ir' ,action='store_true',help='Images similarity mode process all rotations') + parser_help=parser.format_help().split('\n') help_parts=[parser_help[0]] + parser_help[7::] @@ -95,6 +100,20 @@ def parse_args(ver): command.append('--log') command.append(args.log) + if args.images or args.hash or args.divergence or args.rotations: + command.append('--images') + + if args.hash: + command.append('-ih') + command.append(args.ih) + + if args.divergence: + command.append('-id') + command.append(args.id) + + if args.rotations: + command.append('-ir') + if args.paths: command.extend(args.paths) diff --git a/src/core.py b/src/core.py index a83f1bc..88612b1 100755 --- a/src/core.py +++ b/src/core.py @@ -650,7 +650,7 @@ def my_hash_combo(file,hash_size): if all_rotations: file_rotate = file.rotate try: - result_dict[index_tuple]=( my_hash_combo(file,hash_size),my_hash_combo(file_rotate(90),hash_size),my_hash_combo(file_rotate(180),hash_size),my_hash_combo(file_rotate(270),hash_size) ) + result_dict[index_tuple]=( my_hash_combo(file,hash_size),my_hash_combo(file_rotate(90,expand=True),hash_size),my_hash_combo(file_rotate(180,expand=True),hash_size),my_hash_combo(file_rotate(270,expand=True),hash_size) ) except Exception as e: self.log.error(f'hashing file: {fullpath} error: {e}.') @@ -788,7 +788,9 @@ def similarity_clustering(self,hash_size,distance,all_rotations): self.info_line = self.info = 'Clustering ...' - model = DBSCAN(eps=de_norm_distance, min_samples=2,n_jobs=-1) + model = DBSCAN(eps=de_norm_distance, min_samples=2,n_jobs=-1,p=1) + #,algorithm='brute' + labels = model.fit(pool).labels_ del model @@ -802,12 +804,12 @@ def similarity_clustering(self,hash_size,distance,all_rotations): for label,key in zip(labels,keys): if label!=-1: groups_dict[label].add(key) - groups_sorted_by_quantity_dict[label]=len(keys) + groups_sorted_by_quantity_dict[label]+=1 ############################################## groups_sorted_by_quantity = [ label for label,number in sorted(groups_sorted_by_quantity_dict.items(),key=lambda x : x[1], reverse=True) ] - #kazy plik tylko raz + #kazdy plik tylko raz self.info_line = self.info = 'Pruning "multiple rotations" data ...' files_already_in_group=set() @@ -815,7 +817,7 @@ def similarity_clustering(self,hash_size,distance,all_rotations): pruned_groups_dict = defaultdict(set) for label in groups_sorted_by_quantity: - #print(f'{label=}') + #print(f'{label=}',type(label)) for key in groups_dict[label]: #print(f' {key=}') diff --git a/src/dude.py b/src/dude.py index 6efd961..b931c18 100755 --- a/src/dude.py +++ b/src/dude.py @@ -356,7 +356,7 @@ def handle_sigint(self): l_warning("Received SIGINT signal") self.action_abort=True - def __init__(self,cwd,paths_to_add=None,exclude=None,exclude_regexp=None,norun=None): + def __init__(self,cwd,paths_to_add=None,exclude=None,exclude_regexp=None,norun=None,images=False, ihash=6, idivergence=5, rotations=False): gc_disable() self.cwd=cwd @@ -826,10 +826,10 @@ def self_folder_tree_yview(*args): self.log_skipped_var.set(False) self.all_rotations=BooleanVar() - self.all_rotations.set(False) + self.all_rotations.set(rotations) self.similarity_mode_var=BooleanVar() - self.similarity_mode_var.set(False) + self.similarity_mode_var.set(images) self_scan_dialog_area_main = self_scan_dialog.area_main @@ -946,13 +946,13 @@ def self_folder_tree_yview(*args): self.similarity_distance_var = IntVar() self.similarity_distance_var_lab = StringVar() - self.similarity_distance_var.set(5) + self.similarity_distance_var.set(idivergence) self.similarity_hsize_var = IntVar() self.similarity_hsize_varx2 = IntVar() self.similarity_hsize_var_lab = StringVar() - self.similarity_hsize_var.set(3) - self.similarity_hsize_varx2.set(6) + self.similarity_hsize_var.set(ihash//2) + self.similarity_hsize_varx2.set(ihash) similarity_hsize_frame = LabelFrame(sf_par3,text='Hash size',borderwidth=2,bg=self.bg_color,takefocus=False) similarity_hsize_frame.grid(row=0,column=0,padx=2,sticky='news') @@ -3212,10 +3212,10 @@ def scan(self): if similarity_mode: self_progress_dialog_on_scan_lab[0].configure(image='',text='') - self_progress_dialog_on_scan_lab[1].configure(text='') - self_progress_dialog_on_scan_lab[2].configure(text='') - self_progress_dialog_on_scan_lab[3].configure(text='') - self_progress_dialog_on_scan_lab[4].configure(text='') + self_progress_dialog_on_scan_lab[1].configure(image='',text='') + self_progress_dialog_on_scan_lab[2].configure(image='',text='') + self_progress_dialog_on_scan_lab[3].configure(image='',text='') + self_progress_dialog_on_scan_lab[4].configure(image='',text='') self_progress_dialog_on_scan.widget.title('Images hashing') @@ -3231,7 +3231,7 @@ def scan(self): bytes_to_str_dude_core_sum_size = local_bytes_to_str(dude_core.sum_size) - self_progress_dialog_on_scan_lab[2].configure(text=dude_core.info_line) + #self_progress_dialog_on_scan_lab[2].configure(text=dude_core.info_line) while ih_thread_is_alive(): anything_changed=False @@ -5683,7 +5683,8 @@ def show_homepage(self): print('Done') else: - Gui(getcwd(),p_args.paths,p_args.exclude,p_args.exclude_regexp,p_args.norun) + images_mode = bool(p_args.images or p_args.ih or p_args.id or p_args.ir) + Gui( getcwd(),p_args.paths,p_args.exclude,p_args.exclude_regexp,p_args.norun,images_mode,int(p_args.ih[0]),int(p_args.id[0]),p_args.ir ) except Exception as e_main: print(e_main)