Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use average coordinate of merged keypoints during NMS #711

Open
wants to merge 22 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 17 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion gtsfm/data_association/cpp_dsf_tracks_estimator.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ def run(self, matches_dict: Dict[Tuple[int, int], np.ndarray], keypoints_list: L
# (Converts python dict into gtsam.MatchIndicesMap.)
matches_map = gtsam.MatchIndicesMap()
for (i1, i2), corr_idxs in matches_dict.items():
matches_map[gtsam.IndexPair(i1, i2)] = corr_idxs
matches_map[gtsam.IndexPair(i1, i2)] = corr_idxs.astype(np.int32)

# Convert gtsfm Keypoints into gtsam Keypoints.
keypoints_vector = gtsam.KeypointsVector()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,8 @@ class ImageCorrespondenceGenerator(CorrespondenceGeneratorBase):
def __init__(self, matcher: ImageMatcherBase, deduplicate: bool = True) -> None:
"""
Args:
matcher: matcher to use.
deduplicate: whether to de-duplicate with a single image the detections received from each image pair.
matcher: Matcher to use.
deduplicate: Whether to de-duplicate with a single image the detections received from each image pair.
"""
self._matcher = matcher

Expand All @@ -42,6 +42,7 @@ def __repr__(self) -> str:
return f"""
ImageCorrespondenceGenerator:
{self._matcher}
{self._aggregator}
"""

def generate_correspondences(
Expand All @@ -53,9 +54,9 @@ def generate_correspondences(
"""Apply the correspondence generator to generate putative correspondences.

Args:
client: dask client, used to execute the front-end as futures.
images: list of all images, as futures.
image_pairs: indices of the pairs of images to estimate two-view pose and correspondences.
client: Dask client, used to execute the front-end as futures.
images: List of all images, as futures.
image_pairs: Indices of the pairs of images to estimate two-view pose and correspondences.

Returns:
List of keypoints, one entry for each input images.
Expand Down Expand Up @@ -96,14 +97,14 @@ def generate_correspondences_and_estimate_two_view(
two view estimator to complete the front-end.

Args:
client: dask client, used to execute the front-end as futures.
images: list of all images.
image_pairs: indices of the pairs of images to estimate two-view pose and correspondences.
camera_intrinsics: list of all camera intrinsics.
relative_pose_priors: priors on relative pose between two cameras.
client: Dask client, used to execute the front-end as futures.
images: List of all images.
image_pairs: Indices of the pairs of images to estimate two-view pose and correspondences.
camera_intrinsics: List of all camera intrinsics.
relative_pose_priors: Priors on relative pose between two cameras.
gt_cameras: GT cameras, used to evaluate metrics.
gt_scene_mesh: GT mesh of the 3D scene, used to evaluate metrics.
two_view_estimator: two view estimator, which is used to verify correspondences and estimate pose.
two_view_estimator: Two view estimator, which is used to verify correspondences and estimate pose.

Returns:
List of keypoints, one entry for each input images.
Expand Down Expand Up @@ -170,5 +171,4 @@ def apply_two_view_estimator(
}

two_view_output_dict = client.gather(two_view_output_futures)

return keypoints_list, two_view_output_dict
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,17 @@


class KeypointAggregatorDedup(KeypointAggregatorBase):
"""Keypoint aggregator with de-duplication."""
"""Keypoint aggregator with de-duplication of keypoints within each image."""

def __init__(self) -> None:
"""Initialize global variables"""
def __init__(self, nms_merge_radius: float = 3) -> None:
"""Initialize global variables.

Args:
nms_merge_radius: Radius (in pixels) to use when merging detections within the same view (image).
Note that tracks are merged, not suppressed.
"""
self.duplicates_found = 0
self.nms_merge_radius = nms_merge_radius

def append_unique_keypoints(
self, i: int, keypoints: Keypoints, per_image_kpt_coordinates: Dict[Tuple[int, int], np.ndarray]
Expand All @@ -45,11 +51,16 @@ def append_unique_keypoints(

for k, uv in enumerate(keypoints.coordinates):
diff_norms = np.linalg.norm(per_image_kpt_coordinates[i] - uv, axis=1)
# TODO(johnwlambert,ayushbaid): test loosening threshold below to some epsilon.
is_identical = np.any(diff_norms == 0)
if len(per_image_kpt_coordinates[i]) > 0 and is_identical:
is_duplicate = np.any(diff_norms <= self.nms_merge_radius)
if len(per_image_kpt_coordinates[i]) > 0 and is_duplicate:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

have you tested with len(per_image_kpt_coordinates[i]) == 0? does the <= work with empty arrrays?
Also np.any should be false in that case, so you would not need that condition at all?

self.duplicates_found += 1
i_indices[k] = np.argmin(diff_norms)
img_global_kpt_idx = np.argmin(diff_norms)
i_indices[k] = img_global_kpt_idx
# Modify global keypoint coordinate to be set to average value of merged detections, instead of
# using the first identified coordinate.
updated_uv = np.mean([per_image_kpt_coordinates[i][img_global_kpt_idx], uv], axis=0)
per_image_kpt_coordinates[i][img_global_kpt_idx] = updated_uv
Comment on lines +59 to +62
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Shouldn't you take the mean after you've determined which keypoints are to be merged?


else:
i_indices[k] = i_count
i_count += 1
Expand Down Expand Up @@ -99,10 +110,11 @@ def aggregate(
per_image_kpt_coordinates, i2_indices = self.append_unique_keypoints(
i=i2, keypoints=keypoints_i2, per_image_kpt_coordinates=per_image_kpt_coordinates
)
putative_corr_idxs = np.stack([i1_indices, i2_indices], axis=-1).astype(np.uint16)
putative_corr_idxs = np.stack([i1_indices, i2_indices], axis=-1).astype(np.int32)
putative_corr_idxs_dict[(i1, i2)] = putative_corr_idxs

logger.info(f"Merged {self.duplicates_found} duplicates during de-duplication.")
print(f"Merged {self.duplicates_found} duplicates during de-duplication.")
# Reset global state.
self.duplicates_found = 0

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@


class KeypointAggregatorUnique(KeypointAggregatorBase):
"""Keypoint aggregator without de-duplication."""
"""Keypoint aggregator without de-duplication, allowing for potentially duplicate keypoints per image."""

def aggregate(
self, keypoints_dict: Dict[Tuple[int, int], Tuple[Keypoints, Keypoints]]
Expand Down
2 changes: 1 addition & 1 deletion gtsfm/two_view_estimator.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,7 @@ def bundle_adjust(
logger.debug("Triangulated %d correspondences out of %d.", len(triangulated_tracks), len(verified_corr_idxs))

if len(triangulated_tracks) == 0:
return i2Ti1_initial.rotation(), Unit3(i2Ti1_initial.translation()), np.array([], dtype=np.uint32)
return i2Ti1_initial.rotation(), Unit3(i2Ti1_initial.translation()), np.zeros(shape=(0, 2), dtype=np.int32)

# Build BA inputs.
start_time = timeit.default_timer()
Expand Down
3 changes: 2 additions & 1 deletion scripts/benchmark_wildcat.sh
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ datasets=(
)

max_frame_lookahead_sizes=(
0
5
10
15
Expand Down Expand Up @@ -99,7 +100,7 @@ for num_matched in ${num_matched_sizes[@]}; do
--output_root $OUTPUT_ROOT \
--max_resolution 760 \
2>&1 | tee $OUTPUT_ROOT/out.log
elif [[ $loader == *"olsson"* ]]
elif [[ $loader == *"colmap"* ]]
then
python gtsfm/runner/run_scene_optimizer_colmaploader.py \
--mvs_off \
Expand Down
12 changes: 8 additions & 4 deletions scripts/collect_results.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,9 @@
"total_run_duration_sec",
]

total_fname = "total_summary_metrics.json"
total_metrics = ["total_runtime_sec"]

# Metrics that **do not** have a median + mean value associated.
SCALAR_METRIC_NAMES = [
"number_cameras",
Expand All @@ -83,6 +86,7 @@
"num_input_measurements",
"num_inlier_measurements",
"num_outlier_measurements",
"total_runtime_sec"
]


Expand All @@ -92,7 +96,7 @@ def main(user_root: Path, output_fpath: str) -> None:
table = defaultdict(list)
headers = ["method_name"]

experiment_roots = sorted(list(user_root.glob("*-*")))
experiment_roots = sorted(list(user_root.glob("*__*")))

method_idx = 0
for experiment_root in experiment_roots:
Expand All @@ -102,9 +106,9 @@ def main(user_root: Path, output_fpath: str) -> None:
table["method_name"].append(frontend_name)

for json_fname, metric_names, nickname in zip(
[retriever_fname, isp_fname, vg_fname, ra_fname, ta_fname, ba_result_fname],
[retriever_metrics, isp_metrics, vg_metrics, ra_metrics, ta_metrics, ba_result_metrics],
["retriever", "isp", "vg", "ra", "ta", "ba"],
[retriever_fname, isp_fname, vg_fname, ra_fname, ta_fname, ba_result_fname, total_fname],
[retriever_metrics, isp_metrics, vg_metrics, ra_metrics, ta_metrics, ba_result_metrics, total_metrics],
["retriever", "isp", "vg", "ra", "ta", "ba", "total"],
):
section_name = Path(json_fname).stem
print(f"{dirpath}/{json_fname}")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,24 @@

Authors: John Lambert
"""
import pathlib
import unittest
from typing import Dict, Tuple

import numpy as np

from gtsfm.common.keypoints import Keypoints
from gtsfm.frontend.cacher.image_matcher_cacher import ImageMatcherCacher
from gtsfm.frontend.matcher.loftr import LOFTR
from gtsfm.loader.olsson_loader import OlssonLoader
from gtsfm.frontend.correspondence_generator.keypoint_aggregator.keypoint_aggregator_dedup import (
KeypointAggregatorDedup,
)
from tests.frontend.correspondence_generator.keypoint_aggregator import test_keypoint_aggregator_base

DATA_ROOT_PATH = pathlib.Path(__file__).resolve().parent.parent.parent.parent / "data"
DEFAULT_FOLDER = DATA_ROOT_PATH / "set1_lund_door"


class TestKeypointAggregatorDedup(test_keypoint_aggregator_base.TestKeypointAggregatorBase):
"""Test class for DoG detector class in frontend.
Expand All @@ -22,7 +29,7 @@ class TestKeypointAggregatorDedup(test_keypoint_aggregator_base.TestKeypointAggr

def setUp(self):
super().setUp()
self.aggregator = KeypointAggregatorDedup()
self.aggregator = KeypointAggregatorDedup(nms_merge_radius=0.0)

def test_keypoint_aggregator_repeated_keypoints(self) -> None:
"""Ensure aggregation works over 3 images, with duplicate keypoints in the same image from separate pairs.
Expand Down Expand Up @@ -59,7 +66,7 @@ def test_keypoint_aggregator_repeated_keypoints(self) -> None:
assert len(keypoints_list) == 3
assert all([isinstance(kps, Keypoints) for kps in keypoints_list])

# removing duplicates
# Duplicates should have been removed.
expected_putative_corr_idxs_dict = {
(0, 1): np.array([[0, 0]]),
(1, 2): np.array([[1, 0]]),
Expand All @@ -84,6 +91,25 @@ def test_keypoint_aggregator_repeated_keypoints(self) -> None:
expected_image2_kps = np.array([[2.0, 2.0], [3.0, 3.0], [4.0, 4.0]])
assert np.allclose(keypoints_list[2].coordinates, expected_image2_kps)

def test_dedup_nms_merge_radius_3(self) -> None:
loader = OlssonLoader(str(DEFAULT_FOLDER), max_frame_lookahead=4)
image_matcher = ImageMatcherCacher(matcher_obj=LOFTR())

images = [loader.get_image(i) for i in range(4)]
image_pairs = [(0, 1), (1, 2), (2, 3), (0, 2), (1, 3)]
aggregator = KeypointAggregatorDedup(nms_merge_radius=3.0)

pairwise_correspondences = {
(i1, i2): image_matcher.match(image_i1=images[i1], image_i2=images[i2]) for i1, i2 in image_pairs
}

keypoints_list, putative_corr_idxs_dict = aggregator.aggregate(keypoints_dict=pairwise_correspondences)

for (i1, i2), corr_idxs in putative_corr_idxs_dict.items():
assert corr_idxs.dtype == np.int32
assert len(corr_idxs.shape) == 2
assert corr_idxs.shape[-1] == 2


if __name__ == "__main__":
unittest.main()
Loading