-
Notifications
You must be signed in to change notification settings - Fork 0
/
cuhk_sysu.py
71 lines (55 loc) · 1.99 KB
/
cuhk_sysu.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
# encoding: utf-8
"""
@author: xingyu liao
@contact: [email protected]
"""
import glob
import os.path as osp
import re
import warnings
from .bases import ImageDataset
from ..datasets import DATASET_REGISTRY
@DATASET_REGISTRY.register()
class cuhkSYSU(ImageDataset):
"""CUHK SYSU datasets.
The dataset is collected from two sources: street snap and movie.
In street snap, 12,490 images and 6,057 query persons were collected
with movable cameras across hundreds of scenes while 5,694 images and
2,375 query persons were selected from movies and TV dramas.
Dataset statistics:
- identities: 11,934
- images: 34,574
No camid
"""
dataset_dir = 'cuhk_sysu'
dataset_name = "cuhkSYSU"
def __init__(self, root='datasets', **kwargs):
self.root = root
self.dataset_dir = osp.join(self.root, self.dataset_dir)
self.data_dir = osp.join(self.dataset_dir, "cropped_images")
required_files = [self.data_dir]
self.check_before_run(required_files)
train = self.process_dir(self.data_dir)
query = []
gallery = []
super(cuhkSYSU, self).__init__(train, query, gallery, **kwargs)
def process_dir(self, dirname):
img_paths = glob.glob(osp.join(dirname, '*.jpg'))
# num_imgs = len(img_paths)
# get all identities:
pid_container = set()
for img_path in img_paths:
img_name = osp.basename(img_path)
pid = img_name.split('_')[0]
pid_container.add(pid)
pid2label = {pid: label for label, pid in enumerate(pid_container)}
# num_pids = len(pid_container)
# extract data
data = []
for img_path in img_paths:
img_name = osp.basename(img_path)
pid = img_name.split('_')[0]
label = self.dataset_name + "_" + str(pid2label[pid])
camid = self.dataset_name + "_0"
data.append((img_path, label, camid)) # dummy camera id
return data