Skip to content

Commit

Permalink
fix: slight change to autolabels and support for pd (#92)
Browse files Browse the repository at this point in the history
* fix: slight change to autolabels and exposed to pd
  • Loading branch information
chrisochoatri authored May 5, 2022
1 parent 21bbe8d commit 5652a44
Show file tree
Hide file tree
Showing 3 changed files with 66 additions and 14 deletions.
36 changes: 27 additions & 9 deletions dgp/datasets/base_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -1007,7 +1007,11 @@ def _get_scene_container(
if requested_autolabels is not None:
logging.debug(f"Loading autolabeled annotations from {scene_dir}.")
autolabeled_scenes = _parse_autolabeled_scenes(
scene_dir, requested_autolabels, autolabel_root=autolabel_root, skip_missing_data=skip_missing_data
scene_dir,
requested_autolabels,
autolabel_root=autolabel_root,
skip_missing_data=skip_missing_data,
use_diskcache=use_diskcache,
)
else:
autolabeled_scenes = None
Expand Down Expand Up @@ -1381,21 +1385,24 @@ def load_annotations(self, scene_idx, sample_idx_in_scene, datum_name):
autolabel_annotations = self.get_autolabels_for_datum(scene_idx, sample_idx_in_scene, datum_name)
for autolabel_key in self.requested_autolabels:
# Some datums in a sample may not have associated annotations. Return "None" for those datums
_, annotation_key = autolabel_key.split('/')
# NOTE: model_name should already be stored in the scene json
# which is why we do not have to add it here to the annotation_file
model_name, annotation_key = autolabel_key.split('/')
# NOTE: model_name should typically not be included in the annotation_path stored inside the scene.json
# if for some reason it is, then it needs to be removed.

annotation_path = autolabel_annotations.get(autolabel_key, None)

if annotation_path is None:
autolabel_annotations[autolabel_key] = None
continue
if self.autolabel_root is not None:
annotation_file = os.path.join(
self.autolabel_root, os.path.basename(self.scenes[scene_idx].directory), 'autolabels',
annotation_path
self.autolabel_root, os.path.basename(self.scenes[scene_idx].directory), AUTOLABEL_FOLDER,
model_name, annotation_path
)
else:
annotation_file = os.path.join(self.scenes[scene_idx].directory, 'autolabels', annotation_path)
annotation_file = os.path.join(
self.scenes[scene_idx].directory, AUTOLABEL_FOLDER, model_name, annotation_path
)

if not os.path.exists(annotation_file):
logging.warning(f'missing {annotation_file}')
Expand Down Expand Up @@ -1835,7 +1842,13 @@ def get_file_meta_from_datum(self, scene_idx, sample_idx_in_scene, datum_name):
return data, annotations


def _parse_autolabeled_scenes(scene_dir, requested_autolabels, autolabel_root=None, skip_missing_data=False):
def _parse_autolabeled_scenes(
scene_dir,
requested_autolabels,
autolabel_root=None,
skip_missing_data=False,
use_diskcache=False,
):
"""Parse autolabeled scene JSONs
Parameters
Expand All @@ -1852,6 +1865,9 @@ def _parse_autolabeled_scenes(scene_dir, requested_autolabels, autolabel_root=No
skip_missing_data: bool, defaul: False
If true, skip over missing autolabel scenes
use_diskcache: bool, default: False
If diskcache should be used for autolabels
Returns
-------
autolabeled_scenes: dict
Expand Down Expand Up @@ -1883,5 +1899,7 @@ def _parse_autolabeled_scenes(scene_dir, requested_autolabels, autolabel_root=No
assert os.path.exists(autolabel_dir), 'Path to autolabels {} does not exist'.format(autolabel_dir)
assert os.path.exists(autolabel_scene), 'Scene JSON expected but not found at {}'.format(autolabel_scene)

autolabeled_scenes[autolabel] = SceneContainer(autolabel_scene, directory=autolabel_dir)
autolabeled_scenes[autolabel] = SceneContainer(
autolabel_scene, directory=autolabel_dir, use_diskcache=use_diskcache
)
return autolabeled_scenes
30 changes: 28 additions & 2 deletions dgp/datasets/pd_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,9 @@ class _ParallelDomainDataset(_SynchronizedDataset):
transform_accumulated_box_points: bool, default: False
Flag to use cuboid pose and instance id to warp points when using lidar accumulation.
autolabel_root: str, default: None
Path to autolabels.
"""
def __init__(
self,
Expand All @@ -98,6 +101,7 @@ def __init__(
use_virtual_camera_datums=True,
accumulation_context=None,
transform_accumulated_box_points=False,
autolabel_root=None,
):
self.coalesce_point_cloud = datum_names is not None and \
COALESCED_LIDAR_DATUM_NAME in datum_names
Expand Down Expand Up @@ -136,6 +140,7 @@ def __init__(
only_annotated_datums=only_annotated_datums,
accumulation_context=accumulation_context,
transform_accumulated_box_points=transform_accumulated_box_points,
autolabel_root=autolabel_root,
)

def coalesce_pc_data(self, items):
Expand All @@ -155,6 +160,12 @@ def coalesce_pc_data(self, items):
assert self.coalesce_point_cloud
assert len(pc_items) == len(LIDAR_DATUM_NAMES)

# TODO: fix this
if len(self.requested_autolabels) > 0:
logging.warning(
'autolabels were requested, however point cloud coalesce does not support coalescing autolabels'
)

# Only coalesce if there's more than 1 point cloud
coalesced_pc = OrderedDict()
X_V_merged, bbox_3d_V_merged, instance_ids_merged = [], [], []
Expand Down Expand Up @@ -248,6 +259,7 @@ def __init__(
dataset_root=None,
transform_accumulated_box_points=False,
use_diskcache=True,
autolabel_root=None,
):
if not use_diskcache:
logging.warning('Instantiating a dataset with use_diskcache=False may exhaust memory with a large dataset.')
Expand All @@ -261,10 +273,16 @@ def __init__(
skip_missing_data=skip_missing_data,
dataset_root=dataset_root,
use_diskcache=use_diskcache,
autolabel_root=autolabel_root,
)

# Return SynchronizedDataset with scenes built from dataset.json
dataset_metadata = DatasetMetadata.from_scene_containers(scenes, requested_annotations, requested_autolabels)
dataset_metadata = DatasetMetadata.from_scene_containers(
scenes,
requested_annotations,
requested_autolabels,
autolabel_root=autolabel_root,
)
super().__init__(
dataset_metadata,
scenes=scenes,
Expand All @@ -278,6 +296,7 @@ def __init__(
use_virtual_camera_datums=use_virtual_camera_datums,
accumulation_context=accumulation_context,
transform_accumulated_box_points=transform_accumulated_box_points,
autolabel_root=autolabel_root,
)


Expand All @@ -300,6 +319,7 @@ def __init__(
accumulation_context=None,
transform_accumulated_box_points=False,
use_diskcache=True,
autolabel_root=None,
):
if not use_diskcache:
logging.warning('Instantiating a dataset with use_diskcache=False may exhaust memory with a large dataset.')
Expand All @@ -311,10 +331,16 @@ def __init__(
is_datums_synchronized=True,
skip_missing_data=skip_missing_data,
use_diskcache=use_diskcache,
autolabel_root=autolabel_root,
)

# Return SynchronizedDataset with scenes built from dataset.json
dataset_metadata = DatasetMetadata.from_scene_containers([scene], requested_annotations, requested_autolabels)
dataset_metadata = DatasetMetadata.from_scene_containers(
[scene],
requested_annotations,
requested_autolabels,
autolabel_root=autolabel_root,
)
super().__init__(
dataset_metadata,
scenes=[scene],
Expand Down
14 changes: 11 additions & 3 deletions tests/test_autolabel_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,12 +62,16 @@ def clone_scene_as_autolabel(dataset_root, autolabel_root, autolabel_model, auto
if 'scene' in scene_json and scene_json.endswith('json'):
base_scene = open_pbobject(os.path.join(full_scene_dir, scene_json), Scene)
for i in range(len(base_scene.data)):
name = base_scene.data[i].id.name
datum = base_scene.data[i].datum
datum_type = datum.WhichOneof('datum_oneof')
datum_value = getattr(datum, datum_type) # This is datum.image or datum.point_cloud etc
annotation_type_id = ANNOTATION_KEY_TO_TYPE_ID[autolabel_type]
current_annotation = datum_value.annotations[annotation_type_id]
datum_value.annotations[annotation_type_id] = os.path.join(autolabel_scene_dir, current_annotation)
# NOTE: this should not actually change the path but is included for clarity
datum_value.annotations[annotation_type_id] = os.path.join(
ANNOTATION_TYPE_ID_TO_FOLDER[autolabel_type], name, os.path.basename(current_annotation)
)

save_pbobject_as_json(base_scene, os.path.join(autolabel_scene_dir, AUTOLABEL_SCENE_JSON_NAME))
# Only modify one scene.json, test scene should not contain multiple scene.jsons
Expand Down Expand Up @@ -109,7 +113,8 @@ def test_autolabels_default_root(self):
backward_context=1,
requested_annotations=('bounding_box_3d', ),
requested_autolabels=requested_autolabels,
autolabel_root=autolabel_root
autolabel_root=autolabel_root,
use_diskcache=False,
)

assert len(dataset) == 2
Expand Down Expand Up @@ -139,7 +144,8 @@ def test_autolabels_custom_root(self):
backward_context=1,
requested_annotations=('bounding_box_3d', ),
requested_autolabels=requested_autolabels,
autolabel_root=autolabel_root
autolabel_root=autolabel_root,
use_diskcache=False,
)

assert len(dataset) == 2
Expand Down Expand Up @@ -174,6 +180,7 @@ def test_autolabels_missing_files(self):
requested_autolabels=requested_autolabels,
autolabel_root=autolabel_root,
skip_missing_data=True,
use_diskcache=False,
)

assert len(dataset) == 2
Expand Down Expand Up @@ -210,6 +217,7 @@ def test_only_annotated_datums(self):
autolabel_root=autolabel_root,
only_annotated_datums=True,
skip_missing_data=True,
use_diskcache=False,
)

assert len(dataset) == 1
Expand Down

0 comments on commit 5652a44

Please sign in to comment.