Skip to content

Commit

Permalink
rotations hashing and pruning fixed
Browse files Browse the repository at this point in the history
command line parameters for image matching
  • Loading branch information
PJDude committed May 19, 2024
1 parent 3e10a16 commit ce4102a
Show file tree
Hide file tree
Showing 3 changed files with 39 additions and 17 deletions.
19 changes: 19 additions & 0 deletions src/console.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,11 @@ def parse_args(ver):
c_help='do not run the gui. run the scan and save the result to the specified csv file. Implies -nh' if os.name=='nt' else 'do not run the gui. run the scan and save the result to the specified csv file.'
run_mode_group.add_argument('-c','--csv' ,nargs=1,help=c_help)

parser.add_argument('-i','--images' ,action='store_true',help='Images similarity mode')
parser.add_argument('-ih' ,nargs=1,help='Images similarity mode hash size',choices=('4','6','8','10','12','14','16','18','20','22','24','26','28','30','32'),default='6')
parser.add_argument('-id' ,nargs=1,help='Images similarity mode divergence value',choices=('0','1','2','3','4','5','6','7','8','9'),default='5' )
parser.add_argument('-ir' ,action='store_true',help='Images similarity mode process all rotations')

parser_help=parser.format_help().split('\n')
help_parts=[parser_help[0]] + parser_help[7::]

Expand Down Expand Up @@ -95,6 +100,20 @@ def parse_args(ver):
command.append('--log')
command.append(args.log)

if args.images or args.hash or args.divergence or args.rotations:
command.append('--images')

if args.hash:
command.append('-ih')
command.append(args.ih)

if args.divergence:
command.append('-id')
command.append(args.id)

if args.rotations:
command.append('-ir')

if args.paths:
command.extend(args.paths)

Expand Down
12 changes: 7 additions & 5 deletions src/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -650,7 +650,7 @@ def my_hash_combo(file,hash_size):
if all_rotations:
file_rotate = file.rotate
try:
result_dict[index_tuple]=( my_hash_combo(file,hash_size),my_hash_combo(file_rotate(90),hash_size),my_hash_combo(file_rotate(180),hash_size),my_hash_combo(file_rotate(270),hash_size) )
result_dict[index_tuple]=( my_hash_combo(file,hash_size),my_hash_combo(file_rotate(90,expand=True),hash_size),my_hash_combo(file_rotate(180,expand=True),hash_size),my_hash_combo(file_rotate(270,expand=True),hash_size) )

except Exception as e:
self.log.error(f'hashing file: {fullpath} error: {e}.')
Expand Down Expand Up @@ -788,7 +788,9 @@ def similarity_clustering(self,hash_size,distance,all_rotations):

self.info_line = self.info = 'Clustering ...'

model = DBSCAN(eps=de_norm_distance, min_samples=2,n_jobs=-1)
model = DBSCAN(eps=de_norm_distance, min_samples=2,n_jobs=-1,p=1)
#,algorithm='brute'

labels = model.fit(pool).labels_
del model

Expand All @@ -802,20 +804,20 @@ def similarity_clustering(self,hash_size,distance,all_rotations):
for label,key in zip(labels,keys):
if label!=-1:
groups_dict[label].add(key)
groups_sorted_by_quantity_dict[label]=len(keys)
groups_sorted_by_quantity_dict[label]+=1

##############################################
groups_sorted_by_quantity = [ label for label,number in sorted(groups_sorted_by_quantity_dict.items(),key=lambda x : x[1], reverse=True) ]

#kazy plik tylko raz
#kazdy plik tylko raz
self.info_line = self.info = 'Pruning "multiple rotations" data ...'

files_already_in_group=set()
files_already_in_group_add = files_already_in_group.add

pruned_groups_dict = defaultdict(set)
for label in groups_sorted_by_quantity:
#print(f'{label=}')
#print(f'{label=}',type(label))
for key in groups_dict[label]:
#print(f' {key=}')

Expand Down
25 changes: 13 additions & 12 deletions src/dude.py
Original file line number Diff line number Diff line change
Expand Up @@ -356,7 +356,7 @@ def handle_sigint(self):
l_warning("Received SIGINT signal")
self.action_abort=True

def __init__(self,cwd,paths_to_add=None,exclude=None,exclude_regexp=None,norun=None):
def __init__(self,cwd,paths_to_add=None,exclude=None,exclude_regexp=None,norun=None,images=False, ihash=6, idivergence=5, rotations=False):
gc_disable()

self.cwd=cwd
Expand Down Expand Up @@ -826,10 +826,10 @@ def self_folder_tree_yview(*args):
self.log_skipped_var.set(False)

self.all_rotations=BooleanVar()
self.all_rotations.set(False)
self.all_rotations.set(rotations)

self.similarity_mode_var=BooleanVar()
self.similarity_mode_var.set(False)
self.similarity_mode_var.set(images)

self_scan_dialog_area_main = self_scan_dialog.area_main

Expand Down Expand Up @@ -946,13 +946,13 @@ def self_folder_tree_yview(*args):

self.similarity_distance_var = IntVar()
self.similarity_distance_var_lab = StringVar()
self.similarity_distance_var.set(5)
self.similarity_distance_var.set(idivergence)

self.similarity_hsize_var = IntVar()
self.similarity_hsize_varx2 = IntVar()
self.similarity_hsize_var_lab = StringVar()
self.similarity_hsize_var.set(3)
self.similarity_hsize_varx2.set(6)
self.similarity_hsize_var.set(ihash//2)
self.similarity_hsize_varx2.set(ihash)

similarity_hsize_frame = LabelFrame(sf_par3,text='Hash size',borderwidth=2,bg=self.bg_color,takefocus=False)
similarity_hsize_frame.grid(row=0,column=0,padx=2,sticky='news')
Expand Down Expand Up @@ -3212,10 +3212,10 @@ def scan(self):

if similarity_mode:
self_progress_dialog_on_scan_lab[0].configure(image='',text='')
self_progress_dialog_on_scan_lab[1].configure(text='')
self_progress_dialog_on_scan_lab[2].configure(text='')
self_progress_dialog_on_scan_lab[3].configure(text='')
self_progress_dialog_on_scan_lab[4].configure(text='')
self_progress_dialog_on_scan_lab[1].configure(image='',text='')
self_progress_dialog_on_scan_lab[2].configure(image='',text='')
self_progress_dialog_on_scan_lab[3].configure(image='',text='')
self_progress_dialog_on_scan_lab[4].configure(image='',text='')

self_progress_dialog_on_scan.widget.title('Images hashing')

Expand All @@ -3231,7 +3231,7 @@ def scan(self):

bytes_to_str_dude_core_sum_size = local_bytes_to_str(dude_core.sum_size)

self_progress_dialog_on_scan_lab[2].configure(text=dude_core.info_line)
#self_progress_dialog_on_scan_lab[2].configure(text=dude_core.info_line)

while ih_thread_is_alive():
anything_changed=False
Expand Down Expand Up @@ -5683,7 +5683,8 @@ def show_homepage(self):
print('Done')

else:
Gui(getcwd(),p_args.paths,p_args.exclude,p_args.exclude_regexp,p_args.norun)
images_mode = bool(p_args.images or p_args.ih or p_args.id or p_args.ir)
Gui( getcwd(),p_args.paths,p_args.exclude,p_args.exclude_regexp,p_args.norun,images_mode,int(p_args.ih[0]),int(p_args.id[0]),p_args.ir )

except Exception as e_main:
print(e_main)
Expand Down

0 comments on commit ce4102a

Please sign in to comment.