Skip to content

Commit

Permalink
Merge pull request #17 from PanderMusubi/master
Browse files Browse the repository at this point in the history
support flipped images
  • Loading branch information
nibbledeez authored Sep 27, 2022
2 parents 5c8f7bb + 46afc27 commit 85cd21f
Show file tree
Hide file tree
Showing 4 changed files with 58 additions and 8 deletions.
18 changes: 18 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,24 @@ sudo systemctl disable mongodb.service
sudo service mongodb stop
```

Alternatively, install MongoDB from https://www.mongodb.com/docs/manual/tutorial/install-mongodb-on-ubuntu/

sudo apt-get install -y mongodb-org
sudo systemctl status mongod

See also https://stackoverflow.com/questions/37565758/mongodb-not-working-on-ubuntu-mongod-service-failed-with-result-exit-code and https://medium.com/@balasubramanim/how-to-resolve-socketexception-address-already-in-use-mongodb-75fa8ea4a2a6 Note that restarting has to be done with

sudo systemctl start mongod

Workaround if problems keep arising is to run duplicate_finder.py with sudo. When
running as root and http://127.0.0.1:5000 give the error 'Not Found', open with
Firefox (not Chrome or Chromium from flathub) the URL file:///tmp/tmp......../0.html
Perhaps even do a chown -R yourusername.yourusername on /tmp/tmp......../ first.

In case pip3 has problems:

apt-get install -y python3-numpy python3-scipy python3-pywt

Python 2 is the default version of Python, so we have to call `python3` explicitely:

```bash
Expand Down
37 changes: 32 additions & 5 deletions duplicate_finder.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
A tool to find and remove duplicate pictures.
Usage:
duplicate_finder.py add <path> ... [--db=<db_path>] [--parallel=<num_processes>]
duplicate_finder.py add <path> ... [--flipped] [--nomax] [--db=<db_path>] [--parallel=<num_processes>]
duplicate_finder.py remove <path> ... [--db=<db_path>]
duplicate_finder.py clear [--db=<db_path>]
duplicate_finder.py show [--db=<db_path>]
Expand All @@ -18,6 +18,9 @@
--parallel=<num_processes> The number of parallel processes to run to hash the image
files (default: number of CPUs).
add:
--flipped Also add flipped images
--nomax No maximum file size
find:
--print Only print duplicate files rather than displaying HTML file
--delete Move all found duplicate pictures to the trash. This option takes priority over --print.
Expand All @@ -28,6 +31,7 @@

import concurrent.futures
from contextlib import contextmanager
from datetime import datetime
import os
import magic
import math
Expand All @@ -46,6 +50,7 @@
import pymongo
from termcolor import cprint

FLIPPED = False

@contextmanager
def connect_to_db(db_conn_string='./db'):
Expand Down Expand Up @@ -122,6 +127,7 @@ def hash_file(file):
img = Image.open(file)

file_size = get_file_size(file)
file_time = get_file_time(file)
image_size = get_image_size(img)
capture_time = get_capture_time(img)

Expand All @@ -131,12 +137,20 @@ def hash_file(file):
turned_img = img.rotate(angle, expand=True)
else:
turned_img = img
hashes.append(str(imagehash.phash(turned_img)))
string = str(imagehash.phash(turned_img))
if string not in hashes:
hashes.append(string)
# also hash flipped image
if FLIPPED:
flipped_img = turned_img.transpose(method=Image.FLIP_LEFT_RIGHT)
string = str(imagehash.phash(flipped_img))
if string not in hashes:
hashes.append(string)

hashes = ''.join(sorted(hashes))

cprint("\tHashed {}".format(file), "blue")
return file, hashes, file_size, image_size, capture_time
return file, hashes, file_size, file_time, image_size, capture_time
except OSError:
cprint("\tUnable to open {}".format(file), "red")
return None
Expand All @@ -149,11 +163,12 @@ def hash_files_parallel(files, num_processes=None):
yield result


def _add_to_database(file_, hash_, file_size, image_size, capture_time, db):
def _add_to_database(file_, hash_, file_size, file_time, image_size, capture_time, db):
try:
db.insert_one({"_id": file_,
"hash": hash_,
"file_size": file_size,
"file_time": file_time,
"image_size": image_size,
"capture_time": capture_time})
except pymongo.errors.DuplicateKeyError:
Expand Down Expand Up @@ -228,6 +243,7 @@ def find(db, match_time=False):
"$push": {
"file_name": "$_id",
"file_size": "$file_size",
"file_time": "$file_time",
"image_size": "$image_size",
"capture_time": "$capture_time"
}
Expand Down Expand Up @@ -288,7 +304,7 @@ def render(duplicates, current, total):

with TemporaryDirectory() as folder:
# Generate all of the HTML files
chunk_size = 25
chunk_size = 50
for i, dups in enumerate(chunked(duplicates, chunk_size)):
with open('{}/{}.html'.format(folder, i), 'w') as f:
f.write(render(dups,
Expand All @@ -311,6 +327,13 @@ def get_file_size(file_name):
return 0


def get_file_time(file_name):
try:
return datetime.fromtimestamp(os.path.getmtime(file_name)).strftime('%Y:%m:%d, %H:%M:%S')
except FileNotFoundError:
return 0


def get_image_size(img):
return "{} x {}".format(*img.size)

Expand Down Expand Up @@ -349,6 +372,10 @@ def get_capture_time(img):

with connect_to_db(db_conn_string=DB_PATH) as db:
if args['add']:
if args['--flipped']:
FLIPPED = True
if args['--nomax']:
Image.MAX_IMAGE_PIXELS = None
add(args['<path>'], db, NUM_PROCESSES)
elif args['remove']:
remove(args['<path>'], db)
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -19,4 +19,4 @@ python-magic==0.4.27
scipy==1.5.3
setuptools==65.4.0
six==1.15.0
termcolor==2.0.1
termcolor==2.0.1
9 changes: 7 additions & 2 deletions template/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,17 @@
{% macro image(img, size) -%}
<div class="col-xs-{{ size }}">
<div class="thumbnail">
<img class="img-responsive" src="{{ img['file_name'] }}" alt="{{ img['file_name'] }}">
<a target="_blank" href="{{ img['file_name'] }}"><img class="img-responsive" src="{{ img['file_name'] }}" alt="{{ img['file_name'] }}"></a>
<div class="caption">
<h5 class="name">{{ img['file_name'] }}</h5>
<div class="file-size">{{ img['file_size'] | filesizeformat }}</div>
<div class="resolution">{{ img['image_size'] }}</div>
<div class="capture-time">{{ img['capture_time'] }}</div>
<div class="file-time">FILE {{ img['file_time'] }}</div>
{% if img['capture_time'] == 'Time unknown' %}
<div class="capture-time">EXIF <strong>{{ img['capture_time'] }}</strong></div>
{% else %}
<div class="capture-time">EXIF {{ img['capture_time'] }}</div>
{% endif %}
<button class="btn btn-danger delete-btn" role="button" data-name="{{ img['file_name'] }}" style="margin-top: 15px">
Delete
</button>
Expand Down

0 comments on commit 85cd21f

Please sign in to comment.