diff --git a/.github/workflows/benchmark-self-hosted.yml b/.github/workflows/benchmark-self-hosted.yml index dfc7c066c..16ceb1d2b 100644 --- a/.github/workflows/benchmark-self-hosted.yml +++ b/.github/workflows/benchmark-self-hosted.yml @@ -29,7 +29,7 @@ jobs: PYTHON_VERSION: 3.8 steps: - - uses: actions/checkout@v4.0.0 + - uses: actions/checkout@v4.1.0 - name: Cache frontend uses: actions/cache@v3 env: diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml index f96ca000e..083bf219c 100644 --- a/.github/workflows/benchmark.yml +++ b/.github/workflows/benchmark.yml @@ -36,7 +36,7 @@ jobs: PYTHON_VERSION: 3.8 steps: - - uses: actions/checkout@v4.0.0 + - uses: actions/checkout@v4.1.0 - name: Cache frontend uses: actions/cache@v3 env: diff --git a/.github/workflows/build-wheels.yml b/.github/workflows/build-wheels.yml index a2c8becca..ff6139283 100644 --- a/.github/workflows/build-wheels.yml +++ b/.github/workflows/build-wheels.yml @@ -18,7 +18,7 @@ jobs: shell: bash -l {0} steps: - - uses: actions/checkout@v4.0.0 + - uses: actions/checkout@v4.1.0 - uses: conda-incubator/setup-miniconda@v2 with: mamba-version: "*" diff --git a/.github/workflows/test-python.yml b/.github/workflows/test-python.yml index 9d62b9b1b..088bff4ef 100644 --- a/.github/workflows/test-python.yml +++ b/.github/workflows/test-python.yml @@ -14,7 +14,7 @@ jobs: PYTHON_VERSION: 3.8 steps: - - uses: actions/checkout@v4.0.0 + - uses: actions/checkout@v4.1.0 - uses: conda-incubator/setup-miniconda@v2 with: mamba-version: "*" diff --git a/.github/workflows/test-reproducibility.yml b/.github/workflows/test-reproducibility.yml index 583102fc5..820d2991b 100644 --- a/.github/workflows/test-reproducibility.yml +++ b/.github/workflows/test-reproducibility.yml @@ -14,7 +14,7 @@ jobs: PYTHON_VERSION: 3.8 steps: - - uses: actions/checkout@v4.0.0 + - uses: actions/checkout@v4.1.0 - uses: conda-incubator/setup-miniconda@v2 with: mamba-version: "*" diff --git a/gtsfm/common/keypoints.py b/gtsfm/common/keypoints.py index 0a8112834..abd31fa9b 100644 --- a/gtsfm/common/keypoints.py +++ b/gtsfm/common/keypoints.py @@ -34,9 +34,9 @@ def __init__( """Initializes the attributes. Args: - coordinates: the (x, y) coordinates of the features, of shape Nx2. - scales: optional scale of the detections, of shape N. - responses: optional confidences/responses for each detection, of shape N. + coordinates: The (x, y) coordinates of the features, of shape Nx2. + scales: Optional scale of the detections, of shape N. + responses: Optional confidences/responses for each detection, of shape N. """ self.coordinates = coordinates self.scales = scales @@ -61,10 +61,10 @@ def __eq__(self, other: object) -> bool: if not isinstance(other, Keypoints): return False - # equality check on coordinates + # Equality check on coordinates. coordinates_eq = np.array_equal(self.coordinates, other.coordinates) - # equality check on scales + # Equality check on scales. if self.scales is None and other.scales is None: scales_eq = True elif self.scales is not None and other.scales is not None: @@ -93,10 +93,10 @@ def get_top_k(self, k: int) -> Tuple["Keypoints", np.ndarray]: If k keypoints are requested, and only n < k are available, then returning n keypoints is the expected behavior. Args: - k: max number of keypoints to return. + k: Maximum number of keypoints to return. Returns: - subset of current keypoints. + Subset of current keypoints. """ if k >= len(self): return copy.deepcopy(self), np.arange(self.__len__()) @@ -114,7 +114,7 @@ def filter_by_mask(self, mask: np.ndarray) -> Tuple["Keypoints", np.ndarray]: Args: mask: (H, W) array of 0's and 1's corresponding to valid portions of the original image. - keypoints: detected keypoints with length M. + keypoints: Detected keypoints with length M. descriptors: (M, D) array of descriptors D is the dimension of each descriptor. Returns: @@ -148,7 +148,7 @@ def cast_to_float(self) -> "Keypoints": """Cast all attributes which are numpy arrays to float. Returns: - keypoints with the type-casted attributes. + Keypoints with the type-casted attributes. """ return Keypoints( coordinates=None if self.coordinates is None else self.coordinates.astype(np.float32), @@ -166,7 +166,7 @@ def cast_to_opencv_keypoints(self) -> List[cv.KeyPoint]: List of OpenCV's keypoints with the same information as input keypoints. """ - # cast input attributed to floating point numpy arrays. + # Cast input attributed to floating point numpy arrays. keypoints = self.cast_to_float() opencv_keypoints = [] @@ -216,14 +216,13 @@ def extract_indices(self, indices: np.ndarray) -> "Keypoints": """Form subset with the given indices. Args: - indices: indices to extract, as a 1-D vector. + indices: Indices to extract, as a 1-D vector. Returns: Subset of data at the given indices. """ - if indices.size == 0: - return Keypoints(coordinates=np.array([])) + return Keypoints(coordinates=np.zeros(shape=(0, 2))) return Keypoints( self.coordinates[indices], diff --git a/gtsfm/data_association/cpp_dsf_tracks_estimator.py b/gtsfm/data_association/cpp_dsf_tracks_estimator.py index bf8c58991..6625ca501 100644 --- a/gtsfm/data_association/cpp_dsf_tracks_estimator.py +++ b/gtsfm/data_association/cpp_dsf_tracks_estimator.py @@ -49,9 +49,15 @@ def run(self, matches_dict: Dict[Tuple[int, int], np.ndarray], keypoints_list: L # Check to ensure dimensions of coordinates are correct. dims_valid = all([kps.coordinates.ndim == 2 for kps in keypoints_list]) if not dims_valid: - raise Exception( - "Dimensions for Keypoint coordinates incorrect. Array needs to be 2D, but found " - f"{[kps.coordinates.shape for kps in keypoints_list]}" + error_str = " ".join( + [ + f"i={i}: shape={kps.coordinates.shape}," + for i, kps in enumerate(keypoints_list) + if kps.coordinates.ndim != 2 + ] + ) + raise ValueError( + f"Dimensions for Keypoint coordinates incorrect. Array needs to be 2D, but found {error_str}" ) # For each image pair (i1,i2), we provide a (K,2) matrix of corresponding keypoint indices (k1,k2). @@ -65,11 +71,7 @@ def run(self, matches_dict: Dict[Tuple[int, int], np.ndarray], keypoints_list: L for keypoint in keypoints_list: keypoints_vector.append(gtsam.gtsfm.Keypoints(keypoint.coordinates)) - tracks = gtsam.gtsfm.tracksFromPairwiseMatches( - matches_map, - keypoints_vector, - verbose=True, - ) + tracks = gtsam.gtsfm.tracksFromPairwiseMatches(matches_map, keypoints_vector, verbose=True) track_2d_list = [] for track in tracks: diff --git a/gtsfm/data_association/point3d_initializer.py b/gtsfm/data_association/point3d_initializer.py index fd650f4f1..04a2b8210 100644 --- a/gtsfm/data_association/point3d_initializer.py +++ b/gtsfm/data_association/point3d_initializer.py @@ -69,15 +69,15 @@ class TriangulationOptions(NamedTuple): See the following slides for a derivation of the #req'd samples: http://www.cse.psu.edu/~rtc12/CSE486/lecture15.pdf Args: - mode: triangulation mode, which dictates whether or not to use robust estimation. - reproj_error_threshold: the maximum reprojection error allowed. - min_triangulation_angle: threshold for the minimum angle (in degrees) subtended at the triangulated track from 2 + mode: Triangulation mode, which dictates whether or not to use robust estimation. + reproj_error_threshold: The maximum reprojection error allowed. + min_triangulation_angle: Threshold for the minimum angle (in degrees) subtended at the triangulated track from 2 cameras. Tracks for which the maximum angle at any two cameras is less then this threshold will be rejected. - min_inlier_ratio: a priori assumed minimum probability that a point is an inlier. - confidence: desired confidence that at least one hypothesis is outlier free. + min_inlier_ratio: A priori assumed minimum probability that a point is an inlier. + confidence: Desired confidence that at least one hypothesis is outlier free. dyn_num_hypotheses_multiplier: multiplication factor for dynamically computed hyptheses based on confidence. - min_num_hypotheses: minimum number of hypotheses. - max_num_hypotheses: maximum number of hypotheses. + min_num_hypotheses: Minimum number of hypotheses. + max_num_hypotheses: Maximum number of hypotheses. """ mode: TriangulationSamplingMode @@ -123,15 +123,18 @@ class Point3dInitializer: Args: track_cameras: Dict of cameras and their indices. - mode: triangulation mode, which dictates whether or not to use robust estimation. - reproj_error_thresh: threshold on reproj errors for inliers. - num_ransac_hypotheses (optional): desired number of RANSAC hypotheses. + mode: Triangulation mode, which dictates whether or not to use robust estimation. + reproj_error_thresh: Threshold on reproj errors for inliers. + num_ransac_hypotheses (optional): Desired number of RANSAC hypotheses. """ def __init__(self, track_camera_dict: Dict[int, gtsfm_types.CAMERA_TYPE], options: TriangulationOptions) -> None: self.track_camera_dict = track_camera_dict self.options = options + if len(track_camera_dict) == 0: + raise ValueError("No camera positions were estimated, so triangulation is not feasible.") + sample_camera = list(self.track_camera_dict.values())[0] self._camera_set_class = ( CameraSetCal3Bundler if isinstance(sample_camera, PinholeCameraCal3Bundler) else CameraSetCal3Fisheye @@ -142,23 +145,23 @@ def execute_ransac_variant(self, track_2d: SfmTrack2d) -> np.ndarray: RANSAC chooses one of 3 different sampling schemes to execute. Args: - track: feature track with N 2d measurements in separate images + track: Feature track with N 2d measurements in separate images Returns: - best_inliers: boolean array of length N. Indices of measurements - are set to true if they correspond to the best RANSAC hypothesis + best_inliers: Boolean array of length N. Indices of measurements + are set to true if they correspond to the best RANSAC hypothesis. """ - # Generate all possible matches + # Generate all possible matches. measurement_pairs = generate_measurement_pairs(track_2d) - # limit the number of samples to the number of available pairs + # Limit the number of samples to the number of available pairs. num_hypotheses = min(self.options.num_ransac_hypotheses(), len(measurement_pairs)) - # Sampling + # Sampling. samples = self.sample_ransac_hypotheses(track_2d, measurement_pairs, num_hypotheses) - # Initialize the best output containers + # Initialize the best output containers. best_num_votes = 0 best_error = MAX_TRACK_REPROJ_ERROR best_inliers = np.zeros(len(track_2d.measurements), dtype=bool) @@ -168,7 +171,7 @@ def execute_ransac_variant(self, track_2d: SfmTrack2d) -> np.ndarray: i1, uv1 = track_2d.measurements[k1] i2, uv2 = track_2d.measurements[k2] - # check for unestimated cameras + # Check for unestimated cameras. if self.track_camera_dict.get(i1) is None or self.track_camera_dict.get(i2) is None: logger.warning("Unestimated cameras found at indices %d or %d. Skipping them.", i1, i2) continue @@ -181,7 +184,7 @@ def execute_ransac_variant(self, track_2d: SfmTrack2d) -> np.ndarray: img_measurements.append(uv1) img_measurements.append(uv2) - # triangulate point for track + # Triangulate point for track. try: triangulated_pt = gtsam.triangulatePoint3( camera_estimates, @@ -204,11 +207,11 @@ def execute_ransac_variant(self, track_2d: SfmTrack2d) -> np.ndarray: # If the inlier number are the same, check the average error of inliers is_inlier = errors < self.options.reproj_error_threshold - # tally the number of votes + # Tally the number of votes. inlier_errors = errors[is_inlier] if inlier_errors.size > 0: - # only tally error over the inlier measurements + # Only tally error over the inlier measurements. avg_error = inlier_errors.mean() num_votes = is_inlier.astype(int).sum() @@ -223,7 +226,7 @@ def triangulate(self, track_2d: SfmTrack2d) -> Tuple[Optional[SfmTrack], Optiona """Triangulates 3D point according to the configured triangulation mode. Args: - track: feature track from which measurements are to be extracted + track: Feature track from which measurements are to be extracted. Returns: track with inlier measurements and 3D landmark. None returned if triangulation fails or has high error. @@ -300,11 +303,12 @@ def sample_ransac_hypotheses( """Sample a list of hypotheses (camera pairs) to use during triangulation. Args: - track: feature track from which measurements are to be extracted - measurement_pairs: all possible indices of pairs of measurements in a given track - num_hypotheses: desired number of samples + track: Feature track from which measurements are to be extracted. + measurement_pairs: All possible indices of pairs of measurements in a given track. + num_hypotheses: Desired number of samples. + Returns: - Indices of selected match + Indices of selected match. """ # Initialize scores as uniform distribution scores = np.ones(len(measurement_pairs), dtype=float) @@ -320,10 +324,10 @@ def sample_ransac_hypotheses( wTc1 = self.track_camera_dict[i1].pose() wTc2 = self.track_camera_dict[i2].pose() - # rough approximation approximation of baseline between the 2 cameras + # Rough approximation approximation of baseline between the 2 cameras scores[k] = np.linalg.norm(wTc1.inverse().compose(wTc2).translation()) - # Check the validity of scores + # Check the validity of scores. if sum(scores) <= 0.0: raise Exception("Sum of scores cannot be zero (or smaller than zero)! It must a bug somewhere") @@ -351,7 +355,7 @@ def extract_measurements( Returns None, None if less than 2 measurements were found with estimated camera poses after averaging. Args: - track: feature track from which measurements are to be extracted. + track: Feature track from which measurements are to be extracted. Returns: Vector of individual camera calibrations pertaining to track @@ -377,14 +381,13 @@ def extract_measurements( def generate_measurement_pairs(track: SfmTrack2d) -> List[Tuple[int, ...]]: - """ - Extract all possible measurement pairs in a track for triangulation. + """Extract all possible measurement pairs in a track for triangulation. Args: - track: feature track from which measurements are to be extracted + track: Feature track from which measurements are to be extracted. Returns: - measurement_idxs: all possible matching measurement indices in a given track + measurement_idxs: All possible matching measurement indices in a given track """ num_track_measurements = track.number_measurements() all_measurement_idxs = range(num_track_measurements) diff --git a/gtsfm/evaluation/compare_metrics.py b/gtsfm/evaluation/compare_metrics.py index 86b9a4840..a597e2b63 100644 --- a/gtsfm/evaluation/compare_metrics.py +++ b/gtsfm/evaluation/compare_metrics.py @@ -24,16 +24,17 @@ def compute_metrics_from_txt( """Calculate metrics from pipeline outputs parsed from COLMAP txt format. Args: - cameras: dictionary of COLMAP-formatted Cameras - images: dictionary of COLMAP-formatted Images - points3D: dictionary of COLMAP-formatted Point3Ds + cameras: Dictionary of COLMAP-formatted Cameras. + images: Dictionary of COLMAP-formatted Images. + points3D: Dictionary of COLMAP-formatted Point3Ds. reproj_error_threshold: Reprojection error threshold for filtering tracks. Returns: other_pipeline_metrics: A dictionary of metrics from another pipeline that are comparable with GTSfM """ - image_files, images, cameras, sfmtracks = io_utils.colmap2gtsfm(cameras, images, points3d, load_sfmtracks=True) - num_cameras = len(cameras) + _, _, intrinsics_gtsfm, sfmtracks, _, _, _ = io_utils.colmap2gtsfm(cameras, images, points3d, load_sfmtracks=True) + + num_cameras = len(intrinsics_gtsfm) unfiltered_track_lengths = [] image_id_num_measurements = {} for track in sfmtracks: diff --git a/gtsfm/evaluation/metrics.py b/gtsfm/evaluation/metrics.py index 6caa75a75..8628f30b0 100644 --- a/gtsfm/evaluation/metrics.py +++ b/gtsfm/evaluation/metrics.py @@ -89,9 +89,9 @@ def __init__( self._name = name if data is not None: - # Cast to a numpy array + # Cast to a numpy array. if isinstance(data, list): - # Replace None with NaN + # Replace None with NaN. data = [x if x is not None else np.NaN for x in data] if all(isinstance(x, int) for x in data): data = np.array(data, dtype=np.int32) @@ -113,7 +113,7 @@ def __init__( else: raise ValueError("Unsupported plot type for the data dimension") - # Create a summary if the data is a 1D distribution + # Create a summary if the data is a 1D distribution. if self._dim == 1: self._summary = self._create_summary(data) @@ -123,7 +123,7 @@ def __init__( else: self._data = None else: - # Metrics created from summary alone are 1D distribution metrics + # Metrics created from summary alone are 1D distribution metrics. self._dim = 1 self._summary = summary self._plot_type = self.PlotType.HISTOGRAM if "histogram" in summary else self.PlotType.BOX diff --git a/gtsfm/evaluation/metrics_report.py b/gtsfm/evaluation/metrics_report.py index 881fd0ec2..3edbe6711 100644 --- a/gtsfm/evaluation/metrics_report.py +++ b/gtsfm/evaluation/metrics_report.py @@ -31,7 +31,7 @@ def get_readable_metric_name(metric_name: str) -> str: metric_name: where words are separated by underscores. Returns: - readable metric name where words are separated by spaces. + Readable metric name where words are separated by spaces. """ words = metric_name.split("_") words = [word.capitalize() for word in words] @@ -80,8 +80,8 @@ def create_table_for_scalar_metrics_and_compare( return tabulate(table, headers="keys", tablefmt="html") -def add_plot(fig, metric_value: Dict[str, Union[List, Dict]], metric_name: str, row: int, col: int): - """Function for adding a plot to a figure +def add_plot(fig, metric_value: Dict[str, Union[List, Dict]], metric_name: str, row: int, col: int) -> None: + """Adds a plot (histogram or box plot) to a figure using Plotly. Args: fig: The figure that the plot will be added to. @@ -89,7 +89,6 @@ def add_plot(fig, metric_value: Dict[str, Union[List, Dict]], metric_name: str, metric_name: The name of the metric being plotted. row: The row number of the plot in the figure. col: The column number of the plot in the figure. - """ if "histogram" in metric_value[metrics.SUMMARY_KEY]: histogram = metric_value[metrics.SUMMARY_KEY]["histogram"] @@ -122,18 +121,20 @@ def create_plots_for_distributions(metrics_dict: Dict[str, Any]) -> str: For a certain metric, these can be either histogram or box according to the metric's property. Args: - metrics_dict: A dict, where keys are names of metrics and values are - the dictionary representation of the metric. + metrics_dict: Dictionary representation of the metric, where keys are names of metrics. + Returns: Plots in a grid converted to HTML as a string. """ distribution_metrics = [] # Separate all the 1D distribution metrics. for metric, value in metrics_dict.items(): - if isinstance(value, dict): - all_nan_summary = all(np.isnan(v) for v in value[metrics.SUMMARY_KEY].values()) - if not all_nan_summary: - distribution_metrics.append(metric) + if not isinstance(value, dict): + continue + all_nan_summary = all(np.isnan(v) for v in value[metrics.SUMMARY_KEY].values()) + if not all_nan_summary: + distribution_metrics.append(metric) + if len(distribution_metrics) == 0: return "" @@ -165,10 +166,9 @@ def create_plots_for_distributions_and_compare( For a certain metric, these can be either histogram or box according to the metric's property. Args: - metrics_dict: A dict, where keys are names of metrics and values are - the dictionary representation of the metric. - other_pipeline_metrics_dicts: metrics_dicts for other SfM pipelines - pipeline_names: A list of other SfM pipeline names + metrics_dict: Dictionary representation of the metric, where keys are names of metrics. + other_pipeline_metrics_dicts: Metrics_dicts for other SfM pipelines. + pipeline_names: A list of other SfM pipeline names. Returns: Plots in a grid converted to HTML as a string. diff --git a/gtsfm/evaluation/plot_metrics.py b/gtsfm/evaluation/plot_metrics.py index faa7eb072..44bc49e70 100644 --- a/gtsfm/evaluation/plot_metrics.py +++ b/gtsfm/evaluation/plot_metrics.py @@ -17,11 +17,12 @@ GTSFM_MODULE_METRICS_FNAMES = [ "frontend_summary.json", - "rotation_cycle_consistency_metrics.json", + "view_graph_estimation_metrics.json", "rotation_averaging_metrics.json", "translation_averaging_metrics.json", "data_association_metrics.json", "bundle_adjustment_metrics.json", + "total_summary_metrics.json", ] @@ -81,12 +82,13 @@ def create_metrics_plots_html( [colmap_metrics_groups, openmvg_metrics_groups], ["colmap", "openmvg"], ): - if json_path is not None: - for i, metrics_group in enumerate(gtsfm_metrics_groups): - metric_path = metric_paths[i] - json_metric_path = os.path.join(json_path, os.path.basename(metric_path)) - metrics_groups.append(GtsfmMetricsGroup.parse_from_json(json_metric_path)) - other_pipeline_metrics_groups[pipeline_name] = metrics_groups + if json_path is None: + continue + for i, metrics_group in enumerate(gtsfm_metrics_groups): + metric_path = metric_paths[i] + json_metric_path = os.path.join(json_path, os.path.basename(metric_path)) + metrics_groups.append(GtsfmMetricsGroup.parse_from_json(json_metric_path)) + other_pipeline_metrics_groups[pipeline_name] = metrics_groups metrics_report.generate_metrics_report_html(gtsfm_metrics_groups, output_file, other_pipeline_metrics_groups) diff --git a/gtsfm/frontend/cacher/image_matcher_cacher.py b/gtsfm/frontend/cacher/image_matcher_cacher.py index 8a34ee24a..2d9e6b1a4 100644 --- a/gtsfm/frontend/cacher/image_matcher_cacher.py +++ b/gtsfm/frontend/cacher/image_matcher_cacher.py @@ -64,36 +64,26 @@ def _save_result_to_cache( data = {"keypoints_i1": keypoints_i1, "keypoints_i2": keypoints_i2} io_utils.write_to_bz2_file(data, cache_path) - def match( - self, - image_i1: Image, - image_i2: Image, - ) -> Tuple[Keypoints, Keypoints]: + def match(self, image_i1: Image, image_i2: Image) -> Tuple[Keypoints, Keypoints]: """Identify feature matches across two images. If the results are in the cache, they are fetched and returned. Otherwise, the `match()` of the underlying object's API is called and the results are cached. Args: - image_i1: first input image of pair. - image_i2: second input image of pair. + image_i1: First input image of pair. + image_i2: Second input image of pair. Returns: Keypoints from image 1 (N keypoints will exist). Corresponding keypoints from image 2 (there will also be N keypoints). These represent feature matches. """ - cached_data = self._load_result_from_cache( - image_i1=image_i1, - image_i2=image_i2, - ) + cached_data = self._load_result_from_cache(image_i1=image_i1, image_i2=image_i2) if cached_data is not None: return cached_data - keypoints_i1, keypoints_i2 = self._matcher.match( - image_i1=image_i1, - image_i2=image_i2, - ) + keypoints_i1, keypoints_i2 = self._matcher.match(image_i1=image_i1, image_i2=image_i2) self._save_result_to_cache( image_i1=image_i1, image_i2=image_i2, keypoints_i1=keypoints_i1, keypoints_i2=keypoints_i2 diff --git a/gtsfm/frontend/correspondence_generator/correspondence_generator_base.py b/gtsfm/frontend/correspondence_generator/correspondence_generator_base.py index 1ab7e56a8..f925d90a1 100644 --- a/gtsfm/frontend/correspondence_generator/correspondence_generator_base.py +++ b/gtsfm/frontend/correspondence_generator/correspondence_generator_base.py @@ -24,9 +24,9 @@ def generate_correspondences( """Apply the correspondence generator to generate putative correspondences. Args: - client: dask client, used to execute the front-end as futures. - images: list of all images, as futures. - image_pairs: indices of the pairs of images to estimate two-view pose and correspondences. + client: Dask client, used to execute the front-end as futures. + images: List of all images, as futures. + image_pairs: Indices of the pairs of images to estimate two-view pose and correspondences. Returns: List of keypoints, one entry for each input images. diff --git a/gtsfm/frontend/correspondence_generator/det_desc_correspondence_generator.py b/gtsfm/frontend/correspondence_generator/det_desc_correspondence_generator.py index 1cb766d59..cf5e0b128 100644 --- a/gtsfm/frontend/correspondence_generator/det_desc_correspondence_generator.py +++ b/gtsfm/frontend/correspondence_generator/det_desc_correspondence_generator.py @@ -37,9 +37,9 @@ def generate_correspondences( """Apply the correspondence generator to generate putative correspondences. Args: - client: dask client, used to execute the front-end as futures. - images: list of all images, as futures. - image_pairs: indices of the pairs of images to estimate two-view pose and correspondences. + client: Dask client, used to execute the front-end as futures. + images: List of all images, as futures. + image_pairs: Indices of the pairs of images to estimate two-view pose and correspondences. Returns: List of keypoints, one entry for each input images. diff --git a/gtsfm/frontend/correspondence_generator/image_correspondence_generator.py b/gtsfm/frontend/correspondence_generator/image_correspondence_generator.py index af1ed50b4..5617ce837 100644 --- a/gtsfm/frontend/correspondence_generator/image_correspondence_generator.py +++ b/gtsfm/frontend/correspondence_generator/image_correspondence_generator.py @@ -79,7 +79,6 @@ def apply_image_matcher( ) keypoints_list, putative_corr_idxs_dict = self._aggregator.aggregate(keypoints_dict=pairwise_correspondences) - return keypoints_list, putative_corr_idxs_dict def generate_correspondences_and_estimate_two_view( diff --git a/gtsfm/frontend/correspondence_generator/keypoint_aggregator/keypoint_aggregator_dedup.py b/gtsfm/frontend/correspondence_generator/keypoint_aggregator/keypoint_aggregator_dedup.py index 4e0b49b9b..2ea8b4a85 100644 --- a/gtsfm/frontend/correspondence_generator/keypoint_aggregator/keypoint_aggregator_dedup.py +++ b/gtsfm/frontend/correspondence_generator/keypoint_aggregator/keypoint_aggregator_dedup.py @@ -104,6 +104,7 @@ def aggregate( # Have to merge keypoints across different views here (or turn off transitivity check). for (i1, i2), (keypoints_i1, keypoints_i2) in keypoints_dict.items(): + # NOTE: `Keypoints` coordinates with shape (0,2) are allowed here, when no matches are identified. per_image_kpt_coordinates, i1_indices = self.append_unique_keypoints( i=i1, keypoints=keypoints_i1, per_image_kpt_coordinates=per_image_kpt_coordinates ) @@ -118,7 +119,20 @@ def aggregate( # Reset global state. self.duplicates_found = 0 - keypoints_list: List[Keypoints] = [Keypoints(coordinates=np.array([]))] * (max_img_idx + 1) + keypoints_list: List[Keypoints] = [Keypoints(coordinates=np.zeros(shape=(0, 2)))] * (max_img_idx + 1) for i in per_image_kpt_coordinates.keys(): keypoints_list[i] = Keypoints(coordinates=per_image_kpt_coordinates[i]) + _assert_keypoints_rank(keypoints_list[i]) + return keypoints_list, putative_corr_idxs_dict + + +def _assert_keypoints_rank(keypoints: Keypoints) -> None: + """Verifies that `Keypoints` instance has correct rank-2 shape, with (N,2) shape.""" + rank_correct = keypoints.coordinates.ndim == 2 + shape_correct = keypoints.coordinates.shape[-1] == 2 + if not (rank_correct and shape_correct): + raise ValueError( + "[KeypointAggregatorDedup] Dimensions for Keypoint coordinates incorrect. Array needs to be 2D," + f" but found {keypoints.coordinates.shape}" + ) diff --git a/gtsfm/frontend/global_descriptor/netvlad_global_descriptor.py b/gtsfm/frontend/global_descriptor/netvlad_global_descriptor.py index 01fa3ca3d..0bb9d2b59 100644 --- a/gtsfm/frontend/global_descriptor/netvlad_global_descriptor.py +++ b/gtsfm/frontend/global_descriptor/netvlad_global_descriptor.py @@ -1,21 +1,17 @@ """Wrapper around the NetVLAD global image descriptor. - Based on Arandjelovic16cvpr: "NetVLAD: CNN architecture for weakly supervised place recognition" https://arxiv.org/pdf/1511.07247.pdf - NetVLAD, is a new generalized VLAD layer, inspired by the “Vector of Locally Aggregated Descriptors” image representation commonly used in image retrieval - Whereas bag-of-visual-words aggregation keeps counts of visual words, VLAD stores the sum of residuals (difference vector between the descriptor and its corresponding cluster centre) for each visual word. - Authors: John Lambert, Travis Driver """ + import numpy as np import torch from torch import nn - from gtsfm.common.image import Image from gtsfm.frontend.global_descriptor.global_descriptor_base import GlobalDescriptorBase from thirdparty.hloc.netvlad import NetVLAD @@ -24,27 +20,29 @@ class NetVLADGlobalDescriptor(GlobalDescriptorBase): """NetVLAD global descriptor""" - def __init__(self, use_cuda: bool = True) -> None: + def __init__(self) -> None: """ """ - self._use_cuda = use_cuda - self._model: nn.Module = NetVLAD().eval() + pass def describe(self, image: Image) -> np.ndarray: """Compute the NetVLAD global descriptor for a single image query. Args: - image: input image. + image: Input image. Returns: - img_desc: array of shape (D,) representing global image descriptor. + img_desc: Array of shape (D,) representing global image descriptor. """ - device = torch.device("cuda" if self._use_cuda and torch.cuda.is_available() else "cpu") - self._model.to(device) - + # Load model. + # Note: Initializing in the constructor leads to OOM. + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + model: nn.Module = NetVLAD().to(device) + model.eval() + + img_tensor = ( + torch.from_numpy(image.value_array).to(device).permute(2, 0, 1).unsqueeze(0).type(torch.float32) / 255 + ) with torch.no_grad(): - img_tensor = ( - torch.from_numpy(image.value_array).permute(2, 0, 1).unsqueeze(0).type(torch.float32).to(device) / 255 - ) - img_desc = self._model({"image": img_tensor}) + img_desc = model({"image": img_tensor}) return img_desc["global_descriptor"].detach().squeeze().cpu().numpy() diff --git a/gtsfm/loader/astrovision_loader.py b/gtsfm/loader/astrovision_loader.py index d784e1b7d..7556f72d4 100644 --- a/gtsfm/loader/astrovision_loader.py +++ b/gtsfm/loader/astrovision_loader.py @@ -66,8 +66,8 @@ def __init__( greater than the max_resolution, it will be downsampled to match the max_resolution. Raises: - FileNotFoundError if `data_dir` doesn't exist or image path does not exist. - RuntimeError if ground truth camera calibrations not provided. + FileNotFoundError: If `data_dir` doesn't exist or image path does not exist. + RuntimeError: If ground truth camera calibrations not provided. """ super().__init__(max_resolution) self._use_gt_extrinsics = use_gt_extrinsics @@ -78,11 +78,12 @@ def __init__( if not Path(data_dir).exists(): raise FileNotFoundError("No data found at %s." % data_dir) cameras, images, points3d = colmap_io.read_model(path=data_dir, ext=".bin") - img_fnames, self._wTi_list, self._calibrations, self._sfmtracks, _, _ = io_utils.colmap2gtsfm( + + img_fnames, self._wTi_list, self._calibrations, self._sfmtracks, _, _, _ = io_utils.colmap2gtsfm( cameras, images, points3d, load_sfmtracks=use_gt_sfmtracks ) - # Read in scene mesh as Trimesh object + # Read in scene mesh as Trimesh object. if gt_scene_mesh_path is not None: if not Path(gt_scene_mesh_path).exists(): raise FileNotFoundError(f"No mesh found at {gt_scene_mesh_path}") diff --git a/gtsfm/loader/colmap_loader.py b/gtsfm/loader/colmap_loader.py index 27d913eb5..5b3b6c020 100644 --- a/gtsfm/loader/colmap_loader.py +++ b/gtsfm/loader/colmap_loader.py @@ -44,7 +44,6 @@ def __init__( images_dir: str, use_gt_intrinsics: bool = True, use_gt_extrinsics: bool = True, - max_frame_lookahead: int = 1, max_resolution: int = 760, ) -> None: """Initializes to load from a specified folder on disk. @@ -56,9 +55,6 @@ def __init__( use_gt_intrinsics: Whether to use ground truth intrinsics. If COLMAP calibration is not found on disk, then use_gt_intrinsics will be set to false automatically. use_gt_extrinsics: Whether to use ground truth extrinsics. - max_frame_lookahead: Maximum number of consecutive frames to consider for - matching/co-visibility. Any value of max_frame_lookahead less than the size of - the dataset assumes data is sequentially captured max_resolution: Integer representing maximum length of image's short side, i.e. the smaller of the height/width of the image. e.g. for 1080p (1920 x 1080), max_resolution would be 1080. If the image resolution max(height, width) is @@ -67,11 +63,10 @@ def __init__( super().__init__(max_resolution) self._use_gt_intrinsics = use_gt_intrinsics self._use_gt_extrinsics = use_gt_extrinsics - self._max_frame_lookahead = max_frame_lookahead - - wTi_list, img_fnames = io_utils.read_images_txt(fpath=os.path.join(colmap_files_dirpath, "images.txt")) - self._calibrations, _ = io_utils.read_cameras_txt(fpath=os.path.join(colmap_files_dirpath, "cameras.txt")) + wTi_list, img_fnames, self._calibrations, _, _, _ = io_utils.read_scene_data_from_colmap_format( + colmap_files_dirpath + ) # TODO in future PR: if img_fnames is None, default to using everything inside image directory if self._calibrations is None: @@ -175,15 +170,3 @@ def get_camera_pose(self, index: int) -> Optional[Pose3]: wTi = self._wTi_list[index] return wTi - - def is_valid_pair(self, idx1: int, idx2: int) -> bool: - """Checks if (idx1, idx2) is a valid pair. idx1 < idx2 is required. - - Args: - idx1: First index of the pair. - idx2: Second index of the pair. - - Returns: - Validation result. - """ - return super().is_valid_pair(idx1, idx2) and abs(idx1 - idx2) <= self._max_frame_lookahead diff --git a/gtsfm/multi_view_optimizer.py b/gtsfm/multi_view_optimizer.py index 038d73ce3..c14ba3f1b 100644 --- a/gtsfm/multi_view_optimizer.py +++ b/gtsfm/multi_view_optimizer.py @@ -139,6 +139,7 @@ def create_computation_graph( relative_pose_priors, gt_wTi_list=gt_wTi_list, ) + init_cameras_graph = dask.delayed(init_cameras)(wTi_graph, all_intrinsics) ba_input_graph, data_assoc_metrics_graph = self.data_association_module.create_computation_graph( @@ -184,8 +185,9 @@ def init_cameras( camera_class = gtsfm_types.get_camera_class_for_calibration(intrinsics_list[0]) for idx, (wTi) in enumerate(wTi_list): - if wTi is not None: - cameras[idx] = camera_class(wTi, intrinsics_list[idx]) + if wTi is None: + continue + cameras[idx] = camera_class(wTi, intrinsics_list[idx]) return cameras diff --git a/gtsfm/retriever/joint_netvlad_sequential_retriever.py b/gtsfm/retriever/joint_netvlad_sequential_retriever.py index 7e935f466..f420ae34c 100644 --- a/gtsfm/retriever/joint_netvlad_sequential_retriever.py +++ b/gtsfm/retriever/joint_netvlad_sequential_retriever.py @@ -2,10 +2,12 @@ Authors: John Lambert """ - from pathlib import Path from typing import List, Optional, Tuple +import dask +from dask.delayed import Delayed + import gtsfm.utils.logger as logger_utils from gtsfm.loader.loader_base import LoaderBase from gtsfm.retriever.netvlad_retriever import NetVLADRetriever @@ -16,16 +18,18 @@ class JointNetVLADSequentialRetriever(RetrieverBase): - """Note: this class contains no .run() method.""" + """Retriever that includes both sequential and retrieval links.""" def __init__(self, num_matched: int, min_score: float, max_frame_lookahead: int) -> None: - """ + """Initializes sub-retrievers. + Args: - num_matched: number of K potential matches to provide per query. These are the top "K" matches per query. + num_matched: Number of K potential matches to provide per query. These are the top "K" matches per query. min_score: Minimum allowed similarity score to accept a match. - max_frame_lookahead: maximum number of consecutive frames to consider for matching/co-visibility. + max_frame_lookahead: Maximum number of consecutive frames to consider for matching/co-visibility. """ super().__init__(matching_regime=ImageMatchingRegime.SEQUENTIAL_WITH_RETRIEVAL) + self._num_matched = num_matched self._similarity_retriever = NetVLADRetriever(num_matched=num_matched, min_score=min_score) self._seq_retriever = SequentialRetriever(max_frame_lookahead=max_frame_lookahead) @@ -36,21 +40,32 @@ def __repr__(self) -> str: Sequential retriever: {self._seq_retriever} """ - def get_image_pairs(self, loader: LoaderBase, plots_output_dir: Optional[Path] = None) -> List[Tuple[int, int]]: + def create_computation_graph(self, loader: LoaderBase, plots_output_dir: Optional[Path] = None) -> Delayed: + """Compute potential image pairs. + + Args: + loader: Image loader. The length of this loader will provide the total number of images + for exhaustive global descriptor matching. + Return: + pair_indices: (i1,i2) image pairs. + """ + return self.get_image_pairs(loader=loader, plots_output_dir=plots_output_dir) + + def get_image_pairs(self, loader: LoaderBase, plots_output_dir: Optional[Path] = None) -> Delayed: """Compute potential image pairs. Args: - loader: image loader. The length of this loader will provide the total number of images + loader: Image loader. The length of this loader will provide the total number of images for exhaustive global descriptor matching. plots_output_dir: Directory to save plots to. If None, plots are not saved. Return: pair_indices: (i1,i2) image pairs. """ - sim_pairs = self._similarity_retriever.get_image_pairs(loader, plots_output_dir=plots_output_dir) - seq_pairs = self._seq_retriever.get_image_pairs(loader) + sim_pairs = self._similarity_retriever.create_computation_graph(loader, plots_output_dir=plots_output_dir) + seq_pairs = self._seq_retriever.create_computation_graph(loader) - return self.aggregate_pairs(sim_pairs=sim_pairs, seq_pairs=seq_pairs) + return dask.delayed(self.aggregate_pairs)(sim_pairs=sim_pairs, seq_pairs=seq_pairs) def aggregate_pairs( self, sim_pairs: List[Tuple[int, int]], seq_pairs: List[Tuple[int, int]] @@ -58,11 +73,11 @@ def aggregate_pairs( """Aggregate all image pair indices from both similarity-based and sequential retrieval. Args: - sim_pairs: image pairs (i1,i2) from similarity-based retrieval. - seq_pairs: image pairs (i1,i2) from sequential retrieval. + sim_pairs: Image pairs (i1,i2) from similarity-based retrieval. + seq_pairs: Image pairs (i1,i2) from sequential retrieval. Returns: - pairs: unique pairs (i1,i2) representing union of the input sets. + Unique pairs (i1,i2) representing union of the input sets. """ pairs = list(set(sim_pairs).union(set(seq_pairs))) logger.info("Found %d pairs from the NetVLAD + Sequential Retriever.", len(pairs)) diff --git a/gtsfm/retriever/netvlad_retriever.py b/gtsfm/retriever/netvlad_retriever.py index deec57753..2bc673364 100644 --- a/gtsfm/retriever/netvlad_retriever.py +++ b/gtsfm/retriever/netvlad_retriever.py @@ -1,21 +1,20 @@ """Retriever implementation which provides a NetVLAD global image descriptor to suggest potential image pairs. - Note: Similarity computation based off of Paul-Edouard Sarlin's HLOC: Reference: https://github.com/cvg/Hierarchical-Localization/blob/master/hloc/pairs_from_retrieval.py https://openaccess.thecvf.com/content_cvpr_2016/papers/Arandjelovic_NetVLAD_CNN_Architecture_CVPR_2016_paper.pdf - Authors: John Lambert """ - import math import os from dataclasses import dataclass from pathlib import Path from typing import List, Optional, Tuple +import dask import matplotlib.pyplot as plt import numpy as np import torch +from dask.delayed import Delayed import gtsfm.utils.logger as logger_utils from gtsfm.frontend.cacher.global_descriptor_cacher import GlobalDescriptorCacher @@ -24,7 +23,6 @@ from gtsfm.retriever.retriever_base import RetrieverBase, ImageMatchingRegime logger = logger_utils.get_logger() - MAX_NUM_IMAGES = 10000 @@ -60,7 +58,20 @@ def __repr__(self) -> str: Minimum score: {self._min_score} """ - def get_image_pairs(self, loader: LoaderBase, plots_output_dir: Optional[Path] = None) -> List[Tuple[int, int]]: + def create_computation_graph(self, loader: LoaderBase, plots_output_dir: Optional[Path] = None) -> Delayed: + """Compute potential image pairs. + + Args: + loader: image loader. The length of this loader will provide the total number of images + for exhaustive global descriptor matching. + plots_output_dir: Directory to save plots to. If None, plots are not saved. + + Returns: + Delayed task that evaluates to a list of (i1,i2) image pairs. + """ + return self.get_image_pairs(loader=loader, plots_output_dir=plots_output_dir) + + def get_image_pairs(self, loader: LoaderBase, plots_output_dir: Optional[Path] = None) -> Delayed: """Compute potential image pairs. Args: @@ -68,19 +79,19 @@ def get_image_pairs(self, loader: LoaderBase, plots_output_dir: Optional[Path] = for exhaustive global descriptor matching. plots_output_dir: Directory to save plots to. If None, plots are not saved. - Return: + Returns: Delayed task which evaluates to a list of (i1,i2) image pairs. """ num_images = len(loader) sim = self.compute_similarity_matrix(loader, num_images) - return self.compute_pairs_from_similarity_matrix(sim=sim, loader=loader, plots_output_dir=plots_output_dir) + return dask.delayed(self.compute_pairs_from_similarity_matrix)( + sim=sim, loader=loader, plots_output_dir=plots_output_dir + ) - def compute_similarity_matrix(self, loader: LoaderBase, num_images: int) -> torch.Tensor: + def compute_similarity_matrix(self, loader: LoaderBase, num_images: int) -> Delayed: """Compute a similarity matrix between all pairs of images. - We use block matching, to avoid excessive memory usage. We cannot fit more than 50x50 sized block into memory, on a 16 GB RAM machine. - A similar blocked exhaustive matching implementation can be found in COLMAP: https://github.com/colmap/colmap/blob/dev/src/feature/matching.cc#L899 @@ -96,83 +107,71 @@ def compute_similarity_matrix(self, loader: LoaderBase, num_images: int) -> torc if num_images > MAX_NUM_IMAGES: raise RuntimeError("Cannot construct similarity matrix of this size.") - subblock_results: List[SubBlockSimilarityResult] = [] + subblock_results: List[Delayed] = [] num_blocks = math.ceil(num_images / self._blocksize) for block_i in range(num_blocks): for block_j in range(block_i, num_blocks): subblock_results.append( - self._compute_similarity_subblock( + dask.delayed(self._compute_similarity_subblock)( num_images=num_images, loader=loader, block_i=block_i, block_j=block_j ) ) - sim = self._aggregate_subblocks(subblock_results=subblock_results, num_images=num_images) + sim = dask.delayed(self._aggregate_subblocks)(subblock_results=subblock_results, num_images=num_images) return sim def _compute_similarity_subblock(self, num_images: int, loader: LoaderBase, block_i: int, block_j: int): """Compute a sub-block of an image similarity matrix. - Args: num_images: number of images to compare for matching. loader: image loader. The length of this loader will provide the total number of images for exhaustive global descriptor matching. block_i: row index of sub-block. block_j: column index of sub-block. - Returns: sub-block similarity result. """ device = "cuda" if torch.cuda.is_available() else "cpu" - num_blocks = math.ceil(num_images / self._blocksize) # only compute the upper triangular portion of the similarity matrix. logger.info("Computing matching block (%d/%d,%d/%d)", block_i, num_blocks - 1, block_j, num_blocks - 1) block_i_query_descs = [] block_j_query_descs = [] - i_start = block_i * self._blocksize i_end = (block_i + 1) * self._blocksize i_end = min(i_end, num_images) - j_start = block_j * self._blocksize j_end = (block_j + 1) * self._blocksize j_end = min(j_end, num_images) - block_i_idxs = np.arange(i_start, i_end) block_j_idxs = np.arange(j_start, j_end) - # TODO(johnwlambert): load images only O(N) times, intead of O(N^2) times, and record cache keys. for i in block_i_idxs: image = loader.get_image(i) block_i_query_descs.append(self._global_descriptor_model.describe(image)) - for j in block_j_idxs: image = loader.get_image(j) block_j_query_descs.append(self._global_descriptor_model.describe(image)) - # Form (K,D) for K images. block_i_query_descs = torch.from_numpy(np.array(block_i_query_descs)) block_j_query_descs = torch.from_numpy(np.array(block_j_query_descs)) - # Einsum equivalent to (img_descs @ img_descs.T) sim_block = torch.einsum("id,jd->ij", block_i_query_descs.to(device), block_j_query_descs.to(device)) # sim[i_start:i_end, j_start:j_end] = sim_block - return SubBlockSimilarityResult(i_start=i_start, i_end=i_end, j_start=j_start, j_end=j_end, subblock=sim_block) def _aggregate_subblocks(self, subblock_results: List[SubBlockSimilarityResult], num_images: int) -> torch.Tensor: """Aggregate results from many independently computed sub-blocks of the similarity matrix into a single matrix. Args: - subblock_results: metadata and results of similarity matrix sub-block computation. - num_images: number of images to compare for matching. + subblock_results: Metadata and results of similarity matrix sub-block computation. + num_images: Number of images to compare for matching. Returns: - sim: tensor of shape (num_images, num_images) representing similarity matrix. + sim: Tensor of shape (num_images, num_images) representing similarity matrix. """ sim = torch.zeros((num_images, num_images)) - for sr in subblock_results: sim[sr.i_start : sr.i_end, sr.j_start : sr.j_end] = sr.subblock return sim @@ -181,9 +180,10 @@ def compute_pairs_from_similarity_matrix( self, sim: torch.Tensor, loader: LoaderBase, plots_output_dir: Optional[Path] = None ) -> List[Tuple[int, int]]: """ + Args: - sim: tensor of shape (num_images, num_images) representing similarity matrix. - loader: image loader. The length of this loader will provide the total number of images + sim: Tensor of shape (num_images, num_images) representing similarity matrix. + loader: Image loader. The length of this loader will provide the total number of images for exhaustive global descriptor matching. plots_output_dir: Directory to save plots to. If None, plots are not saved. @@ -199,16 +199,13 @@ def compute_pairs_from_similarity_matrix( sim, invalid=is_invalid_mat, num_select=self._num_matched, min_score=self._min_score ) named_pairs = [(query_names[i], query_names[j]) for i, j in pairs] - if plots_output_dir: os.makedirs(plots_output_dir, exist_ok=True) - # Save image of similarity matrix. plt.imshow(np.triu(sim.detach().cpu().numpy())) plt.title("Image Similarity Matrix") plt.savefig(str(plots_output_dir / "netvlad_similarity_matrix.jpg"), dpi=500) plt.close("all") - # Save values in similarity matrix. np.savetxt( fname=str(plots_output_dir / "netvlad_similarity_matrix.txt"), @@ -219,7 +216,7 @@ def compute_pairs_from_similarity_matrix( # Save named pairs and scores. with open(plots_output_dir / "netvlad_named_pairs.txt", "w") as fid: - for _named_pair, _pair_ind in zip(named_pairs, pairs): + for (_named_pair, _pair_ind) in zip(named_pairs, pairs): fid.write("%.4f %s %s\n" % (sim[_pair_ind[0], _pair_ind[1]], _named_pair[0], _named_pair[1])) logger.info("Found %d pairs from the NetVLAD Retriever.", len(pairs)) @@ -237,27 +234,24 @@ def pairs_from_score_matrix( Args: scores: (K1,K2) for matching K1 images against K2 images. invalid: (K1,K2) boolean array indicating invalid match pairs (e.g. self-matches). - num_select: number of matches to select, per query. - min_score: minimum allowed similarity score. + num_select: Number of matches to select, per query. + min_score: Minimum allowed similarity score. Returns: - pairs: tuples representing pairs (i1,i2) of images. + pairs: Tuples representing pairs (i1,i2) of images. """ N = scores.shape[0] # if there are only N images to choose from, selecting more than N is not allowed num_select = min(num_select, N) - assert scores.shape == invalid.shape invalid = torch.from_numpy(invalid).to(scores.device) if min_score is not None: # logical OR. invalid |= scores < min_score scores.masked_fill_(invalid, float("-inf")) - topk = torch.topk(scores, k=num_select, dim=1) indices = topk.indices.cpu().numpy() valid = topk.values.isfinite().cpu().numpy() - pairs = [] for i, j in zip(*np.where(valid)): pairs.append((i, indices[i, j])) diff --git a/gtsfm/retriever/retriever_base.py b/gtsfm/retriever/retriever_base.py index fe8d083ff..6bc7e8a52 100644 --- a/gtsfm/retriever/retriever_base.py +++ b/gtsfm/retriever/retriever_base.py @@ -8,6 +8,9 @@ from pathlib import Path from typing import List, Optional, Tuple +import dask +from dask.delayed import Delayed + from gtsfm.evaluation.metrics import GtsfmMetric, GtsfmMetricsGroup from gtsfm.loader.loader_base import LoaderBase from gtsfm.ui.gtsfm_process import GTSFMProcess, UiMetadata @@ -76,3 +79,16 @@ def evaluate(self, loader: LoaderBase, image_pair_indices: List[Tuple[int, int]] ], ) return retriever_metrics + + def create_computation_graph(self, loader: LoaderBase, plots_output_dir: Optional[Path] = None) -> Delayed: + """Create Dask graph for image retriever. + + Args: + loader: image loader. The length of this loader will provide the total number of images + for exhaustive global descriptor matching. + plots_output_dir: Directory to save plots to. + + Returns: + Delayed task that evaluates to a list of (i1,i2) image pairs. + """ + return dask.delayed(self.get_image_pairs)(loader=loader, plots_output_dir=plots_output_dir) diff --git a/gtsfm/runner/gtsfm_runner_base.py b/gtsfm/runner/gtsfm_runner_base.py index 418950170..e44e0252a 100644 --- a/gtsfm/runner/gtsfm_runner_base.py +++ b/gtsfm/runner/gtsfm_runner_base.py @@ -19,6 +19,7 @@ import gtsfm.evaluation.metrics_report as metrics_report import gtsfm.utils.logger as logger_utils import gtsfm.utils.metrics as metrics_utils +import gtsfm.utils.viz as viz_utils from gtsfm.common.gtsfm_data import GtsfmData from gtsfm import two_view_estimator from gtsfm.evaluation.metrics import GtsfmMetric, GtsfmMetricsGroup @@ -284,9 +285,12 @@ def run(self) -> GtsfmData: # TODO(Ayush): Use futures retriever_start_time = time.time() - image_pair_indices = self.scene_optimizer.retriever.get_image_pairs( + pairs_graph = self.scene_optimizer.retriever.create_computation_graph( self.loader, plots_output_dir=self.scene_optimizer._plot_base_path ) + with performance_report(filename="retriever-dask-report.html"): + image_pair_indices = pairs_graph.compute() + retriever_metrics = self.scene_optimizer.retriever.evaluate(self.loader, image_pair_indices) retriever_duration_sec = time.time() - retriever_start_time retriever_metrics.add_metric(GtsfmMetric("retriever_duration_sec", retriever_duration_sec)) @@ -320,6 +324,24 @@ def run(self) -> GtsfmData: two_view_estimation_duration_sec = time.time() - two_view_estimation_start_time i2Ri1_dict, i2Ui1_dict, v_corr_idxs_dict, two_view_reports_dict = unzip_two_view_results(two_view_results_dict) + + if self.scene_optimizer._save_two_view_correspondences_viz: + for (i1, i2) in v_corr_idxs_dict.keys(): + image_i1 = self.loader.get_image(i1) + image_i2 = self.loader.get_image(i2) + viz_utils.save_twoview_correspondences_viz( + image_i1, + image_i2, + keypoints_list[i1], + keypoints_list[i2], + v_corr_idxs_dict[(i1, i2)], + two_view_report=two_view_reports_dict[(i1, i2)], + file_path=os.path.join( + self.scene_optimizer._plot_correspondence_path, + f"{i1}_{i2}__{image_i1.file_name}_{image_i2.file_name}.jpg", + ), + ) + two_view_agg_metrics = two_view_estimator.aggregate_frontend_metrics( two_view_reports_dict=two_view_reports_dict, angular_err_threshold_deg=self.scene_optimizer._pose_angular_error_thresh, diff --git a/gtsfm/runner/run_scene_optimizer_colmaploader.py b/gtsfm/runner/run_scene_optimizer_colmaploader.py index efc72f80f..bc980775a 100644 --- a/gtsfm/runner/run_scene_optimizer_colmaploader.py +++ b/gtsfm/runner/run_scene_optimizer_colmaploader.py @@ -30,7 +30,6 @@ def construct_loader(self) -> LoaderBase: loader = ColmapLoader( colmap_files_dirpath=self.parsed_args.colmap_files_dirpath, images_dir=self.parsed_args.images_dir, - max_frame_lookahead=self.parsed_args.max_frame_lookahead, max_resolution=self.parsed_args.max_resolution, ) diff --git a/gtsfm/scene_optimizer.py b/gtsfm/scene_optimizer.py index 2363657c5..6f7ee2e19 100644 --- a/gtsfm/scene_optimizer.py +++ b/gtsfm/scene_optimizer.py @@ -233,7 +233,7 @@ def create_computation_graph( ) if self._save_3d_viz: delayed_results.extend( - save_visualizations( + save_matplotlib_visualizations( aligned_ba_input_graph=ba_input_graph, aligned_ba_output_graph=ba_output_graph, gt_pose_graph=gt_wTi_list, @@ -262,7 +262,7 @@ def create_computation_graph( ) ) - # Add metrics for dense reconstruction and voxel downsampling + # Add metrics for dense reconstruction and voxel downsampling. if densify_metrics_graph is not None: metrics_graph_list.append(densify_metrics_graph) if downsampling_metrics_graph is not None: @@ -308,14 +308,14 @@ def align_estimated_gtsfm_data( return ba_input, ba_output, gt_wTi_list -def save_visualizations( +def save_matplotlib_visualizations( aligned_ba_input_graph: Delayed, aligned_ba_output_graph: Delayed, gt_pose_graph: List[Optional[Delayed]], plot_ba_input_path: Path, plot_results_path: Path, ) -> List[Delayed]: - """Save SfmData before and after bundle adjustment and camera poses for visualization. + """Visualizes GtsfmData & camera poses before and after bundle adjustment using Matplotlib. Accepts delayed GtsfmData before and after bundle adjustment, along with GT poses, saves them and returns a delayed object. diff --git a/gtsfm/two_view_estimator.py b/gtsfm/two_view_estimator.py index 6f3ecac6f..9eb857713 100644 --- a/gtsfm/two_view_estimator.py +++ b/gtsfm/two_view_estimator.py @@ -584,7 +584,6 @@ def apply_two_view_estimator( } two_view_output_dict = client.gather(two_view_output_futures) - return two_view_output_dict diff --git a/gtsfm/utils/features.py b/gtsfm/utils/features.py index 861a18c38..f4fa26563 100644 --- a/gtsfm/utils/features.py +++ b/gtsfm/utils/features.py @@ -111,11 +111,11 @@ def generate_random_keypoints(num_keypoints: int, image_shape: Tuple[int, int]) """Generates random keypoints within the image bounds. Args: - num_keypoints: number of features to generate. - image_shape: size of the image, as (H,W) + num_keypoints: Number of features to generate. + image_shape: Size of the image, as (H,W) Returns: - generated keypoints. + Generated keypoints. """ if num_keypoints == 0: return Keypoints(coordinates=np.array([])) diff --git a/gtsfm/utils/io.py b/gtsfm/utils/io.py index b29b9ee20..de16060f8 100644 --- a/gtsfm/utils/io.py +++ b/gtsfm/utils/io.py @@ -45,10 +45,10 @@ def load_image(img_path: str) -> Image: Images will be converted to RGB if in a different format. Args: - img_path (str): the path of image to load. + img_path: The path of image to load. Returns: - loaded image in RGB format. + Loaded image in RGB format. """ original_image = PILImage.open(img_path) @@ -76,8 +76,8 @@ def save_image(image: Image, img_path: str) -> None: """Saves the image to disk Args: - image (np.array): image - img_path (str): the path on disk to save the image to + image (np.array): Image. + img_path: The path on disk to save the image to. """ im = PILImage.fromarray(image.value_array) im.save(img_path) @@ -137,7 +137,7 @@ def read_bal(file_path: str) -> GtsfmData: Args: - file_name: file path of the BAL file. + file_name: File path of the BAL file. Returns: The data as an GtsfmData object. @@ -150,7 +150,7 @@ def read_bundler(file_path: str) -> GtsfmData: """Read a Bundler file. Args: - file_name: file path of the Bundler file. + file_name: File path of the Bundler file. Returns: The data as an GtsfmData object. @@ -179,7 +179,7 @@ def colmap2gtsfm( images: Dict[int, ColmapImage], points3D: Dict[int, ColmapPoint3D], load_sfmtracks: bool = False, -) -> Tuple[List[str], List[Pose3], List[str], Optional[List[Point3]], np.ndarray, np.ndarray]: +) -> Tuple[List[str], List[Pose3], List[str], Optional[List[Point3]], np.ndarray, np.ndarray, List[Tuple[int, int]]]: """Converts COLMAP-formatted variables to GTSfM format. Args: @@ -195,18 +195,34 @@ def colmap2gtsfm( sfmtracks_gtsfm: Tracks of points in points3D. point_cloud: (N,3) array representing xyz coordinates of 3d points. rgb: Uint8 array of shape (N,3) representing per-point colors. + img_dims: List of dimensions of each img (H, W). """ # Note: Assumes input cameras use `PINHOLE` model if len(images) == 0 and len(cameras) == 0: raise RuntimeError("No Image or Camera data provided to loader.") - intrinsics_gtsfm, wTi_gtsfm, img_fnames = [], [], [] + intrinsics_gtsfm, wTi_gtsfm, img_fnames, img_dims = [], [], [], [] image_id_to_idx = {} # keeps track of discrepencies between `image_id` and List index. for idx, img in enumerate(images.values()): wTi_gtsfm.append(Pose3(Rot3(img.qvec2rotmat()), img.tvec).inverse()) img_fnames.append(img.name) - fx, _, cx, cy = cameras[img.camera_id].params[:4] + camera_model_name = cameras[img.camera_id].model + if camera_model_name == "SIMPLE_RADIAL": + # See https://github.com/colmap/colmap/blob/1f6812e333a1e4b2ef56aa74e2c3873e4e3a40cd/src/colmap/sensor/models.h#L212 # noqa: E501 + f, cx, cy, k = cameras[img.camera_id].params[:4] + fx = f + elif camera_model_name == "FULL_OPENCV": + # See https://github.com/colmap/colmap/blob/1f6812e333a1e4b2ef56aa74e2c3873e4e3a40cd/src/colmap/sensor/models.h#L273 # noqa: E501 + fx, fy, cx, cy = cameras[img.camera_id].params[:4] + elif camera_model_name == "PINHOLE": + # See https://github.com/colmap/colmap/blob/1f6812e333a1e4b2ef56aa74e2c3873e4e3a40cd/src/colmap/sensor/models.h#L196 # noqa: E501 + fx, fy, cx, cy = cameras[img.camera_id].params[:4] + else: + raise ValueError(f"Unsupported COLMAP camera type: {camera_model_name}") + intrinsics_gtsfm.append(Cal3Bundler(fx, 0.0, 0.0, cx, cy)) image_id_to_idx[img.id] = idx + img_h, img_w = cameras[img.camera_id].height, cameras[img.camera_id].width + img_dims.append((img_h, img_w)) if len(points3D) == 0 and load_sfmtracks: raise RuntimeError("No SfMTrack data provided to loader.") @@ -221,18 +237,18 @@ def colmap2gtsfm( point_cloud = np.array([point3d.xyz for point3d in points3D.values()]) rgb = np.array([point3d.rgb for point3d in points3D.values()]) - return img_fnames, wTi_gtsfm, intrinsics_gtsfm, sfmtracks_gtsfm, point_cloud, rgb + return img_fnames, wTi_gtsfm, intrinsics_gtsfm, sfmtracks_gtsfm, point_cloud, rgb, img_dims def read_cameras_txt( fpath: str, -) -> Tuple[Optional[List[Cal3Bundler]], Optional[Tuple[int, int]]]: +) -> Tuple[Optional[List[Cal3Bundler]], Optional[List[Tuple[int, int]]]]: """Read camera calibrations from a COLMAP-formatted cameras.txt file. Reference: https://colmap.github.io/format.html#cameras-txt Args: - fpaths: path to cameras.txt file + fpaths: Path to cameras.txt file Returns: Tuple of: @@ -246,7 +262,7 @@ def read_cameras_txt( with open(fpath, "r") as f: lines = f.readlines() - # may not be one line per camera (could be only one line of text if shared calibration) + # May not be one line per camera (could be only one line of text if shared calibration) num_cams = int(lines[2].replace("# Number of cameras: ", "").strip()) calibrations = [] @@ -286,9 +302,9 @@ def write_cameras(gtsfm_data: GtsfmData, images: List[Image], save_dir: str) -> Reference: https://colmap.github.io/format.html#cameras-txt Args: - gtsfm_data: scene data to write. - images: list of all images for this scene, in order of image index - save_dir: folder to put the cameras.txt file in. + gtsfm_data: Scene data to write. + images: List of all images for this scene, in order of image index. + save_dir: Folder to put the cameras.txt file in. """ os.makedirs(save_dir, exist_ok=True) @@ -320,7 +336,7 @@ def write_cameras(gtsfm_data: GtsfmData, images: List[Image], save_dir: str) -> f.write(f"{i} {camera_model} {image_width} {image_height} {fx} {u0} {v0} {k1} {k2}\n") -def read_images_txt(fpath: str) -> Tuple[Optional[List[Pose3]], Optional[List[str]]]: +def read_images_txt(fpath: str) -> Tuple[List[Pose3], List[str]]: """Read camera poses and image file names from a COLMAP-format images.txt file. Reference: https://colmap.github.io/format.html#images-txt @@ -331,22 +347,24 @@ def read_images_txt(fpath: str) -> Tuple[Optional[List[Pose3]], Optional[List[st to the bottom, and the Z axis to the front as seen from the image." Args: - fpath: Path to images.txt file + fpath: Path to images.txt file. Returns: - wTi_list: List of camera poses for each image, or None if file path invalid - img_fnames: Name of image file, for each image, or None if file path invalid + wTi_list: List of camera poses for each image. + img_fnames: Filename for each image. + + Raises: + ValueError: If file path invalid. """ if not Path(fpath).exists(): - logger.info("%s does not exist", fpath) - return None, None + raise FileNotFoundError(f"{fpath} does not exist.") with open(fpath, "r") as f: lines = f.readlines() wTi_list = [] img_fnames = [] - # ignore first 4 lines of text -- they contain a description of the file format + # Ignore first 4 lines of text -- they contain a description of the file format # and a record of the number of reconstructed images. for line in lines[4::2]: i, qw, qx, qy, qz, tx, ty, tz, i, img_fname = line.split() @@ -452,7 +470,7 @@ def read_points_txt(fpath: str) -> Tuple[Optional[np.ndarray], Optional[np.ndarr rgb = [] point_cloud = [] - # first 3 lines are information about the file format + # First 3 lines are information about the file format. # line at index 2 will be of the form # "# Number of points: 2122, mean track length: 2.8449575871819039" points_metadata = data[2] @@ -489,12 +507,13 @@ def read_scene_data_from_colmap_format( `cameras.bin`, `images.bin`, and `points3D.bin`. Returns: - 5-tuple of: + 6-tuple of: wTi_list: List of camera poses for each image. img_fnames: List of image file names, for each image. calibrations: Calibration object for each camera. point_cloud: Float array of shape (N,3) representing per-point x/y/z coordinates. rgb: Uint8 array of shape (N,3) representing per-point colors. + img_dims: List of dimensions of each img (H, W). """ # Determine whether scene data is stored in a text (txt) or binary (bin) file format. if Path(data_dir, "images.txt").exists(): @@ -517,7 +536,7 @@ def read_scene_data_from_colmap_format( elif file_format == "bin": cameras, images, points3d = colmap_io.read_model(path=data_dir, ext=".bin") - img_fnames, wTi_list, calibrations, _, point_cloud, rgb = colmap2gtsfm( + img_fnames, wTi_list, calibrations, _, point_cloud, rgb, img_dims = colmap2gtsfm( cameras, images, points3d, load_sfmtracks=False ) @@ -534,7 +553,7 @@ def write_points(gtsfm_data: GtsfmData, images: List[Image], save_dir: str) -> N Args: gtsfm_data: Scene data to write. - images: List of all images for this scene, in order of image index + images: List of all images for this scene, in order of image index. save_dir: Folder to put the points3D.txt file in. """ os.makedirs(save_dir, exist_ok=True) @@ -577,7 +596,7 @@ def save_track_visualizations( """ os.makedirs(save_dir, exist_ok=True) - # save each 2d track + # Save each 2d track. for i, track in enumerate(tracks_2d): patches = [] for m in track.measurements: diff --git a/gtsfm/utils/metrics.py b/gtsfm/utils/metrics.py index f57ac998d..23898b8f7 100644 --- a/gtsfm/utils/metrics.py +++ b/gtsfm/utils/metrics.py @@ -466,6 +466,7 @@ def get_metrics_for_sfmdata(gtsfm_data: GtsfmData, suffix: str, store_full_data: name=f"reprojection_errors{suffix}_px", data=gtsfm_data.get_scene_reprojection_errors(), store_full_data=store_full_data, + plot_type=GtsfmMetric.PlotType.BOX, ) ) return metrics diff --git a/gtsfm/utils/viz.py b/gtsfm/utils/viz.py index 8b4c4e77e..8d738ad69 100644 --- a/gtsfm/utils/viz.py +++ b/gtsfm/utils/viz.py @@ -24,27 +24,26 @@ def set_axes_equal(ax: Axes): - """ - Make axes of 3D plot have equal scale so that spheres appear as spheres, cubes as cubes, etc.. This is one - possible solution to Matplotlib's ax.set_aspect('equal') and ax.axis('equal') not working for 3D. + """Make axes of 3D plot have equal scale so that spheres appear as spheres, cubes as cubes, etc. + This is one possible solution to Matplotlib's ax.set_aspect('equal') and ax.axis('equal') not working for 3D. Ref: https://github.com/borglab/gtsam/blob/develop/python/gtsam/utils/plot.py#L13 Args: - ax: axis for the plot. + ax: Axis for the plot. """ - # get the min and max value for each of (x, y, z) axes as 3x2 matrix. + # Get the min and max value for each of (x, y, z) axes as 3x2 matrix. # This gives us the bounds of the minimum volume cuboid encapsulating all # data. limits = np.array([ax.get_xlim3d(), ax.get_ylim3d(), ax.get_zlim3d()]) - # find the centroid of the cuboid + # Find the centroid of the cuboid. centroid = np.mean(limits, axis=1) - # pick the largest edge length for this cuboid + # Pick the largest edge length for this cuboid. largest_edge_length = np.max(np.abs(limits[:, 1] - limits[:, 0])) - # set new limits to draw a cube using the largest edge length + # Set new limits to draw a cube using the largest edge length radius = 0.5 * largest_edge_length ax.set_xlim3d([centroid[0] - radius, centroid[0] + radius]) ax.set_ylim3d([centroid[1] - radius, centroid[1] + radius]) @@ -55,11 +54,11 @@ def draw_circle_cv2(image: Image, x: int, y: int, color: Tuple[int, int, int], c """Draw a solid circle on the image. Args: - image: image to draw the circle on. + image: Image to draw the circle on. x: x coordinate of the center of the circle. y: y coordinate of the center of the circle. color: RGB color of the circle. - circle_size (optional): the size of the circle (in pixels). Defaults to 10. + circle_size (optional): The size of the circle (in pixels). Defaults to 10. Returns: Image: image with the circle drawn on it. @@ -81,13 +80,13 @@ def draw_line_cv2( """Draw a line on the image from coordinates (x1, y1) to (x2, y2). Args: - image: image to draw the line on. + image: Image to draw the line on. x1: x coordinate of start of the line. y1: y coordinate of start of the line. x2: x coordinate of end of the line. y2: y coordinate of end of the line. - line_color: color of the line. - line_thickness (optional): line thickness. Defaults to 10. + line_color: Color of the line. + line_thickness (optional): Line thickness. Defaults to 10. Returns: Image: image with the line drawn on it. @@ -108,28 +107,28 @@ def plot_twoview_correspondences( """Plot correspondences between two images as lines between two circles. Args: - image_i1: first image. - image_i2: second image. - kps_i1: keypoints for image_i1. - kps_i2: keypoints for image_i2. - corr_idxs_i1i2: indices of correspondences between i1 and i2. - inlier_mask (optional): inlier mask for correspondences as boolean array. Defaults to None. - dot_color (optional): color for keypoints. Defaults to (0, 0, 0). - max_corrs (optional): max number of correspondences to plot. Defaults to 50. + image_i1: First image. + image_i2: Second image. + kps_i1: Keypoints for image_i1. + kps_i2: Keypoints for image_i2. + corr_idxs_i1i2: Indices of correspondences between i1 and i2. + inlier_mask (optional): Inlier mask for correspondences as boolean array. Defaults to None. + dot_color (optional): Color for keypoints. Defaults to (0, 0, 0). + max_corrs (optional): Max number of correspondences to plot. Defaults to 50. Returns: - image visualizing correspondences between two images. + Image visualizing correspondences between two images. """ image_i1, image_i2, scale_i1, scale_i2 = image_utils.match_image_widths(image_i1, image_i2) result = image_utils.vstack_image_pair(image_i1, image_i2) if max_corrs is not None and corr_idxs_i1i2.shape[0] > max_corrs: - # subsample matches + # Subsample matches. corr_idxs_i1i2 = corr_idxs_i1i2[np.random.choice(corr_idxs_i1i2.shape[0], max_corrs)] for corr_idx in range(corr_idxs_i1i2.shape[0]): - # mark the points in both images as circles, and draw connecting line + # Mark the points in both images as circles, and draw connecting line idx_i1, idx_i2 = corr_idxs_i1i2[corr_idx] x_i1 = (kps_i1.coordinates[idx_i1, 0] * scale_i1[0]).astype(np.int32) @@ -137,7 +136,7 @@ def plot_twoview_correspondences( x_i2 = (kps_i2.coordinates[idx_i2, 0] * scale_i2[0]).astype(np.int32) y_i2 = (kps_i2.coordinates[idx_i2, 1] * scale_i2[1]).astype(np.int32) + image_i1.height - # drawing correspondences with optional inlier mask + # Draw correspondences with optional inlier mask. if inlier_mask is None: line_color = tuple([int(c) for c in np.random.randint(0, 255 + 1, 3)]) elif inlier_mask[corr_idx]: @@ -159,10 +158,10 @@ def plot_sfm_data_3d(sfm_data: GtsfmData, ax: Axes, max_plot_radius: float = 50) """Plot the camera poses and landmarks in 3D matplotlib plot. Args: - sfm_data: SfmData object with camera and tracks. - ax: axis to plot on. - max_plot_radius: maximum distance threshold away from any camera for which a point - will be plotted + sfm_data: GtsfmData object with camera and tracks. + ax: Axis to plot on. + max_plot_radius: Maximum distance threshold away from any camera for which a point + will be plotted. """ camera_poses = [sfm_data.get_camera(i).pose() for i in sfm_data.get_valid_camera_indices()] plot_poses_3d(camera_poses, ax) @@ -187,9 +186,9 @@ def plot_poses_3d( Color convention: R -> x axis, G -> y axis, B -> z axis. Args: - wTi_list: list of poses to plot. - ax: axis to plot on. - center_marker_color (optional): color for camera center marker. Defaults to "k". + wTi_list: List of poses to plot. + ax: Axis to plot on. + center_marker_color (optional): Color for camera center marker. Defaults to "k". name: """ spec = "{}.".format(center_marker_color) @@ -226,8 +225,8 @@ def plot_and_compare_poses_3d(wTi_list: List[Pose3], wTi_list_: List[Pose3]) -> The markers are colored black (k) and cyan (c) for the two lists. Args: - wTi_list: first set of poses. - wTi_list_: second set of poses. + wTi_list: First set of poses. + wTi_list_: Second set of poses. """ fig = plt.figure() ax = fig.gca(projection="3d") @@ -245,19 +244,19 @@ def save_twoview_correspondences_viz( keypoints_i1: Keypoints, keypoints_i2: Keypoints, corr_idxs_i1i2: np.ndarray, - two_view_report: TwoViewEstimationReport, file_path: str, + two_view_report: Optional[TwoViewEstimationReport] = None, ) -> None: """Visualize correspondences between pairs of images. Args: - image_i1: image #i1. + image_i1: Image #i1. image_i2: image #i2. - keypoints_i1: detected Keypoints for image #i1. - keypoints_i2: detected Keypoints for image #i2. - corr_idxs_i1i2: correspondence indices. - two_view_report: front-end metrics and inlier/outlier info for image pair. - file_path: file path to save the visualization. + keypoints_i1: Detected Keypoints for image #i1. + keypoints_i2: Detected Keypoints for image #i2. + corr_idxs_i1i2: Correspondence indices. + file_path: File path to save the visualization. + two_view_report: Front-end metrics and inlier/outlier info for image pair. """ plot_img = plot_twoview_correspondences( image_i1, @@ -265,18 +264,18 @@ def save_twoview_correspondences_viz( keypoints_i1, keypoints_i2, corr_idxs_i1i2, - inlier_mask=two_view_report.v_corr_idxs_inlier_mask_gt, + inlier_mask=two_view_report.v_corr_idxs_inlier_mask_gt if two_view_report else None, ) io_utils.save_image(plot_img, file_path) def save_sfm_data_viz(sfm_data: GtsfmData, folder_name: str) -> None: - """Visualize the camera poses and 3d points in SfmData. + """Visualizes the camera poses and 3d points in GtsfmData using Matplotlib. Args: - sfm_data: data to visualize. - folder_name: folder to save the visualization at. + sfm_data: Data to visualize. + folder_name: Folder to save the visualization at. """ fig = plt.figure() ax = fig.add_subplot(projection="3d") @@ -284,10 +283,10 @@ def save_sfm_data_viz(sfm_data: GtsfmData, folder_name: str) -> None: plot_sfm_data_3d(sfm_data, ax) set_axes_equal(ax) - # save the 3D plot in the original view + # Save the 3D plot in the original view. fig.savefig(os.path.join(folder_name, "3d.png")) - # save the BEV representation + # Save the BEV representation. default_camera_elevation = 100 # in metres above ground ax.view_init(azim=0, elev=default_camera_elevation) fig.savefig(os.path.join(folder_name, "bev.png")) @@ -298,15 +297,15 @@ def save_sfm_data_viz(sfm_data: GtsfmData, folder_name: str) -> None: def save_camera_poses_viz( pre_ba_sfm_data: GtsfmData, post_ba_sfm_data: GtsfmData, gt_pose_graph: List[Optional[Pose3]], folder_name: str ) -> None: - """Visualize the camera pose and save to disk. + """Visualize the camera poses before and after bundle adjustment using Matplotlib, and saves plots to disk. Args: - pre_ba_sfm_data: data input to bundle adjustment. - post_ba_sfm_data: output of bundle adjustment. - gt_pose_graph: ground truth poses. - folder_name: folder to save the visualization at. + pre_ba_sfm_data: Data input to bundle adjustment. + post_ba_sfm_data: Output of bundle adjustment. + gt_pose_graph: Ground truth poses. + folder_name: Folder to save the visualization at. """ - # extract camera poses + # Extract camera poses. pre_ba_poses = [] for i in pre_ba_sfm_data.get_valid_camera_indices(): pre_ba_poses.append(pre_ba_sfm_data.get_camera(i).pose()) @@ -325,10 +324,10 @@ def save_camera_poses_viz( ax.legend(loc="upper left") set_axes_equal(ax) - # save the 3D plot in the original view + # Save the 3D plot in the original view. fig.savefig(os.path.join(folder_name, "poses_3d.png")) - # save the BEV representation + # Save the BEV representation. default_camera_elevation = 100 # in metres above ground ax.view_init(azim=0, elev=default_camera_elevation) fig.savefig(os.path.join(folder_name, "poses_bev.png")) diff --git a/scripts/benchmark_wildcat.sh b/scripts/benchmark_wildcat.sh index 3d543cc8f..e781afcd5 100755 --- a/scripts/benchmark_wildcat.sh +++ b/scripts/benchmark_wildcat.sh @@ -1,11 +1,13 @@ # Script to launch jobs over various datasets & front-ends. USER_ROOT=$1 +CLUSTER_CONFIG=$2 now=$(date +"%Y%m%d_%H%M%S") datasets=( + gendarmenmarkt-1463 palace-fine-arts-281 skydio-crane-mast-501 2011205_rc3 @@ -21,6 +23,7 @@ max_frame_lookahead_sizes=( ) num_matched_sizes=( + 0 5 10 15 @@ -35,21 +38,32 @@ correspondence_generator_config_names=( loftr ) +if [[ $CLUSTER_CONFIG ]] +then + CLUSTER_ARGS="--cluster_config $CLUSTER_CONFIG" +else + CLUSTER_ARGS="" +fi + for num_matched in ${num_matched_sizes[@]}; do for max_frame_lookahead in ${max_frame_lookahead_sizes[@]}; do for dataset in ${datasets[@]}; do + if [[ $dataset == *"gendarmenmarkt-1463"* && $max_frame_lookahead != 0 ]] + then + # Gendarmenmarkt images have no natural order. + continue + fi + for correspondence_generator_config_name in ${correspondence_generator_config_names[@]}; do - echo "Dataset: "${dataset} - echo "Num matched: "${num_matched} - echo "Max frame lookahead: "${max_frame_lookahead} - echo "Correspondence Generator: "${correspondence_generator_config_name} - if [[ $correspondence_generator_config_name == *"sift"* ]] then num_workers=10 elif [[ $correspondence_generator_config_name == *"lightglue"* ]] + then + num_workers=1 + elif [[ $correspondence_generator_config_name == *"superglue"* ]] then num_workers=1 elif [[ $correspondence_generator_config_name == *"loftr"* ]] @@ -57,6 +71,12 @@ for num_matched in ${num_matched_sizes[@]}; do num_workers=1 fi + echo "Dataset: ${dataset}" + echo "Num matched: ${num_matched}" + echo "Max frame lookahead: ${max_frame_lookahead}" + echo "Correspondence Generator: ${correspondence_generator_config_name}" + echo "Num workers: ${num_workers}" + if [[ $dataset == *"palace-fine-arts-281"* ]] then loader=olsson @@ -80,6 +100,11 @@ for num_matched in ${num_matched_sizes[@]}; do loader=colmap images_dir=/usr/local/gtsfm-data/gerrard-hall-100/images colmap_files_dirpath=/usr/local/gtsfm-data/gerrard-hall-100/colmap-3.7-sparse-txt-2023-07-27 + elif [[ $dataset == *"gendarmenmarkt-1463"* ]] + then + loader=colmap + images_dir=/usr/local/gtsfm-data/Gendarmenmarkt/images + colmap_files_dirpath=/usr/local/gtsfm-data/Gendarmenmarkt/gendarmenmark/im_size_full/0 fi OUTPUT_ROOT=${USER_ROOT}/${now}/${now}__${dataset}__results__num_matched${num_matched}__maxframelookahead${max_frame_lookahead}__760p__unified_${correspondence_generator_config_name} @@ -99,6 +124,7 @@ for num_matched in ${num_matched_sizes[@]}; do --worker_memory_limit "32GB" \ --output_root $OUTPUT_ROOT \ --max_resolution 760 \ + $CLUSTER_ARGS \ 2>&1 | tee $OUTPUT_ROOT/out.log elif [[ $loader == *"colmap"* ]] then @@ -115,6 +141,7 @@ for num_matched in ${num_matched_sizes[@]}; do --worker_memory_limit "32GB" \ --output_root $OUTPUT_ROOT \ --max_resolution 760 \ + $CLUSTER_ARGS \ 2>&1 | tee $OUTPUT_ROOT/out.log elif [[ $loader == *"astrovision"* ]] then @@ -130,6 +157,7 @@ for num_matched in ${num_matched_sizes[@]}; do --worker_memory_limit "32GB" \ --output_root $OUTPUT_ROOT \ --max_resolution 760 \ + $CLUSTER_ARGS \ 2>&1 | tee $OUTPUT_ROOT/out.log fi done diff --git a/scripts/collect_results.py b/scripts/collect_results.py index a9e79fb21..69e7ed15c 100644 --- a/scripts/collect_results.py +++ b/scripts/collect_results.py @@ -10,6 +10,8 @@ from typing import DefaultDict, Sequence import matplotlib.pyplot as plt +import numpy as np +from matplotlib import cm from tabulate import tabulate import gtsfm.utils.io as io_utils @@ -90,7 +92,6 @@ "step_2_run_duration_sec", "total_run_duration_sec", "total_duration_sec", - "total_duration_sec", "outlier_rejection_duration_sec", "optimization_duration_sec", "num_input_measurements", @@ -100,7 +101,6 @@ "total_two_view_estimation_duration_sec", "triangulation_runtime_sec", "gtsfm_data_creation_runtime", - "total_duration_sec", "total_runtime_sec", "retriever_duration_sec" ] @@ -206,8 +206,12 @@ def _make_runtime_pie_chart(experiment_roots: Sequence[Path]) -> None: runtime_labels.append("remainder_sec") runtimes.append(remainder_runtime) + # Create uniform purple to yellow colormap to prevent color re-use in pie chart. + n_colors = len(runtimes) + cs = cm.viridis(np.arange(n_colors)/n_colors * 1.0) + fig, ax = plt.subplots(figsize=(15, 10)) - ax.pie(runtimes, labels=runtime_labels, autopct="%1.1f%%", textprops={"fontsize": 10}) + ax.pie(runtimes, labels=runtime_labels, autopct="%1.1f%%", textprops={"fontsize": 10}, colors=cs) plt.title("Runtime Breakdown for " + str(experiment_root.name)) plt.show() diff --git a/tests/data_association/test_cpp_dsf_tracks_estimator.py b/tests/data_association/test_cpp_dsf_tracks_estimator.py index 30a4a5c04..8f64e2071 100644 --- a/tests/data_association/test_cpp_dsf_tracks_estimator.py +++ b/tests/data_association/test_cpp_dsf_tracks_estimator.py @@ -1,6 +1,8 @@ """Unit tests for the CppDsfTracksEstimator class.""" +import numpy as np +from gtsfm.common.keypoints import Keypoints from gtsfm.data_association.cpp_dsf_tracks_estimator import CppDsfTracksEstimator from tests.data_association.test_dsf_tracks_estimator import TestDsfTracksEstimator @@ -8,5 +10,13 @@ class TestCppDsfTracksEstimator(TestDsfTracksEstimator): """ """ - def setUp(self): + def setUp(self) -> None: self.estimator = CppDsfTracksEstimator() + + def test_nonrank2_keypoint_coordinates_raises(self) -> None: + + matches_dict = {(0, 1): np.random.randint(low=0, high=500, size=(100, 2), dtype=np.int32)} + keypoints_list = [Keypoints(np.zeros((0, 2))), Keypoints(np.zeros((0,)))] + + with self.assertRaises(ValueError): + self.estimator.run(matches_dict=matches_dict, keypoints_list=keypoints_list) diff --git a/tests/loader/test_colmap_loader.py b/tests/loader/test_colmap_loader.py index bf6cf0322..dbd91cdda 100644 --- a/tests/loader/test_colmap_loader.py +++ b/tests/loader/test_colmap_loader.py @@ -30,7 +30,6 @@ def setUp(self): images_dir, use_gt_intrinsics=True, use_gt_extrinsics=True, - max_frame_lookahead=3, max_resolution=500, ) @@ -38,7 +37,6 @@ def test_constructor_set_properties(self) -> None: """Ensure that constructor sets class properties correctly.""" assert self.loader._use_gt_intrinsics assert self.loader._use_gt_extrinsics - assert self.loader._max_frame_lookahead == 3 assert self.loader._max_resolution == 500 def test_len(self) -> None: @@ -125,7 +123,8 @@ def test_is_valid_pair_within_lookahead(self, base_is_valid_pair_mock: MagicMock def test_is_valid_pair_outside_lookahead(self, base_is_valid_pair_mock: MagicMock) -> None: i1 = 5 i2 = 15 - self.assertFalse(self.loader.is_valid_pair(i1, i2)) + # Max frame lookahead is determined by retriever, not by loader, so this should be a valid pair. + self.assertTrue(self.loader.is_valid_pair(i1, i2)) base_is_valid_pair_mock.assert_called_once_with(i1, i2) diff --git a/tests/retriever/test_exhaustive_retriever.py b/tests/retriever/test_exhaustive_retriever.py index ca6215871..d1852ed05 100644 --- a/tests/retriever/test_exhaustive_retriever.py +++ b/tests/retriever/test_exhaustive_retriever.py @@ -6,6 +6,8 @@ import unittest from pathlib import Path +from dask.distributed import Client, LocalCluster + from gtsfm.loader.olsson_loader import OlssonLoader from gtsfm.retriever.exhaustive_retriever import ExhaustiveRetriever @@ -19,12 +21,16 @@ def test_exhaustive_retriever_door(self) -> None: loader = OlssonLoader(folder=DOOR_DATA_ROOT) retriever = ExhaustiveRetriever() - pairs = retriever.get_image_pairs(loader=loader) + # Create dask client. + cluster = LocalCluster(n_workers=1, threads_per_worker=4) + pairs_graph = retriever.create_computation_graph(loader=loader) + with Client(cluster): + pairs = pairs_graph.compute() # {12 \choose 2} = (12 * 11) / 2 = 66 self.assertEqual(len(pairs), 66) - for i1, i2 in pairs: + for (i1, i2) in pairs: self.assertTrue(i1 < i2) diff --git a/tests/retriever/test_netvlad_retriever.py b/tests/retriever/test_netvlad_retriever.py index b5c6e05de..91cebe06c 100644 --- a/tests/retriever/test_netvlad_retriever.py +++ b/tests/retriever/test_netvlad_retriever.py @@ -6,6 +6,8 @@ import unittest from pathlib import Path +from dask.distributed import Client, LocalCluster + from gtsfm.loader.colmap_loader import ColmapLoader from gtsfm.loader.olsson_loader import OlssonLoader from gtsfm.retriever.netvlad_retriever import NetVLADRetriever @@ -24,15 +26,18 @@ def test_netvlad_retriever_crane_mast(self) -> None: loader = ColmapLoader( colmap_files_dirpath=colmap_files_dirpath, images_dir=images_dir, - max_frame_lookahead=100, max_resolution=760, ) retriever = NetVLADRetriever(num_matched=2) - pairs = retriever.get_image_pairs(loader=loader) + # Create dask client. + cluster = LocalCluster(n_workers=1, threads_per_worker=4) + pairs_graph = retriever.create_computation_graph(loader=loader) + with Client(cluster): + pairs = pairs_graph.compute() - # only 1 pair possible between frame 0 and frame 1 + # Only 1 pair possible between frame 0 and frame 1. self.assertEqual(len(pairs), 1) self.assertEqual(pairs, [(0, 1)]) @@ -41,11 +46,15 @@ def test_netvlad_retriever_door(self) -> None: loader = OlssonLoader(folder=DOOR_DATA_ROOT) retriever = NetVLADRetriever(num_matched=2) - pairs = retriever.get_image_pairs(loader=loader) + # Create dask client. + cluster = LocalCluster(n_workers=1, threads_per_worker=4) + pairs_graph = retriever.create_computation_graph(loader=loader) + with Client(cluster): + pairs = pairs_graph.compute() self.assertEqual(len(pairs), 21) - for i1, i2 in pairs: + for (i1, i2) in pairs: self.assertTrue(i1 != i2) self.assertTrue(i1 < i2) diff --git a/tests/utils/test_io_utils.py b/tests/utils/test_io_utils.py index 96715610c..6b6beb4c5 100644 --- a/tests/utils/test_io_utils.py +++ b/tests/utils/test_io_utils.py @@ -67,9 +67,8 @@ def test_read_images_txt(self) -> None: def test_read_images_txt_nonexistent_file(self) -> None: """Ensure that providing a path to a nonexistent file returns None for both return args.""" fpath = "nonexistent_dir/images.txt" - wTi_list, img_filenames = io_utils.read_images_txt(fpath) - self.assertIsNone(wTi_list) - self.assertIsNone(img_filenames) + with self.assertRaises(FileNotFoundError): + io_utils.read_images_txt(fpath) def test_read_cameras_txt(self) -> None: """Ensure that shared calibration from COLMAP output is read in as a single calibration."""