forked from swtyree/hope-dataset
-
Notifications
You must be signed in to change notification settings - Fork 0
/
setup.py
125 lines (105 loc) · 3.92 KB
/
setup.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
import os
import argparse
import hashlib
import json
try:
import gdown
except ModuleNotFoundError as e:
print(f'The Python package `gdown` is required to download the dataset.\nPlease run: `pip install gdown`\n')
raise
# parse args
parser = argparse.ArgumentParser(description='Download files for HOPE datasets.')
parser.add_argument('--overwrite', action='store_true',
help='Overwrite existing paths')
parser.add_argument('--meshes', action='store_true',
help='Download low-res and high-res object meshes')
parser.add_argument('--meshes-eval', action='store_true',
help='Download low-res object meshes')
parser.add_argument('--meshes-full', action='store_true',
help='Download high-res object meshes')
parser.add_argument('--image', action='store_true',
help='Download HOPE-Image dataset')
parser.add_argument('--image-valid', action='store_true',
help='Download HOPE-Image validation dataset')
parser.add_argument('--image-test', action='store_true',
help='Download HOPE-Image test dataset')
parser.add_argument('--video', action='store_true',
help='Download HOPE-Video dataset')
args = parser.parse_args()
# by default, download all parts
if not any([
args.meshes, args.meshes_eval, args.meshes_full,
args.image, args.image_valid, args.image_test,
args.video
]):
args.meshes = True
args.image = True
args.video = True
if args.meshes:
args.meshes_eval = True
args.meshes_full = True
if args.image:
args.image_valid = True
args.image_test = True
# read list of urls for downloading the dataset
urls = json.load(open('setup.json'))
filter_urls = lambda k: [(u['url'], u['dest'], u['md5']) for u in urls if u['group']==k]
# function to compute md5 hashes (why isn't this already in hashlib?!)
# (from https://stackoverflow.com/questions/22058048/hashing-a-file-in-python)
def compute_md5(fn, BUF_SIZE=65536):
md5 = hashlib.md5()
with open(fn, 'rb') as f:
while True:
data = f.read(BUF_SIZE)
if not data:
break
md5.update(data)
return md5.hexdigest()
# function to download and extract archives
def download_and_extract(group, msg=None, skip_existing=True):
filtered_urls = filter_urls(group)
print(f'Downloading {group if msg is None else msg} ({len(filtered_urls)} file{"s" if len(filtered_urls) != 1 else ""})...\n')
for url,dest,md5 in filtered_urls:
# skip if path already exists
if os.path.exists(dest) and skip_existing:
print(f'Path {dest} exists; skipping.\n(To not skip, use option --overwrite.)')
continue
# download archive file and check md5 hash
fn = gdown.download(url=url, quiet=False)
assert compute_md5(fn)==md5, 'Downloaded file failed MD5 hash! Exiting...'
print('MD5 passed.')
# make target path, extract, and delete archive
os.makedirs(dest, exist_ok=True)
gdown.extractall(path=fn, to=dest)
os.remove(fn)
print('Extracted.')
print('\nDone.\n\n')
# download requested parts of dataset
if args.meshes_eval:
download_and_extract(
'meshes_eval',
msg='low-res eval meshes',
skip_existing=not args.overwrite)
if args.meshes_full:
download_and_extract(
'meshes_full',
msg='full-res meshes',
skip_existing=not args.overwrite)
if args.image_valid:
download_and_extract(
'hope_image_valid',
msg='HOPE-Image validation set',
skip_existing=not args.overwrite
)
if args.image_test:
download_and_extract(
'hope_image_test',
msg='HOPE-Image test set',
skip_existing=not args.overwrite
)
if args.video:
download_and_extract(
'hope_video',
msg='HOPE-Video dataset',
skip_existing=not args.overwrite
)