diff --git a/dlup/annotations_experimental.py b/dlup/annotations_experimental.py index d2dcd477..12f81a09 100644 --- a/dlup/annotations_experimental.py +++ b/dlup/annotations_experimental.py @@ -306,33 +306,26 @@ def filter_polygons(self, label: str) -> None: self._layers.remove_polygon(polygon) -# TODO: Temporary here -def convert_annotations( - annotations, - region_size: tuple[int, int], - default_value: int = 0, - index_map: dict[str, int] = None, -): - mask = np.empty(region_size, dtype=np.int32) - mask[:] = default_value - for curr_annotation in annotations: - holes_mask = None - index_value = index_map[curr_annotation.label] - original_values = None - interiors = [(np.asarray(pi)).round().astype(np.int32) for pi in curr_annotation.get_interiors()] - if interiors != []: - original_values = mask.copy() - holes_mask = np.zeros(region_size, dtype=np.int32) - # Get a mask where the holes are - cv2.fillPoly(holes_mask, interiors, [1]) - - cv2.fillPoly( - mask, - [(np.asarray(curr_annotation.get_exterior())).round().astype(np.int32)], - [index_value], - ) - if interiors != []: - # TODO: This is a bit hacky to ignore mypy here, but I don't know how to fix it. - mask = np.where(holes_mask == 1, original_values, mask) # type: ignore - return mask + def sort_polygons(self, key: callable, reverse: bool = False) -> None: + """Sort the polygons in-place. + + Parameters + ---------- + key : callable + The key to sort the polygons on, this has to be a lambda function or similar. + For instance `lambda polygon: polygon.area` will sort the polygons on the area, or + `lambda polygon: polygon.get_field(field_name)` will sort the polygons on that field. + reverse : bool + Whether to sort in reverse order. + + Note + ---- + This will internally invalidate the R-tree. You could rebuild this manually using `.rebuild_rtree()`, or + have the function itself do this on-demand (typically when you invoke a `.read_region()`) + Returns + ------- + None + + """ + self._layers.sort_polygons(key, reverse) \ No newline at end of file diff --git a/gen_polygons.py b/gen_polygons.py index 91048205..2ff04a5a 100644 --- a/gen_polygons.py +++ b/gen_polygons.py @@ -7,7 +7,6 @@ from dlup.annotations import WsiAnnotations from dlup.annotations_experimental import WsiAnnotationsExperimental as WsiAnnotations2 from dlup.data.transforms import convert_annotations -from dlup.annotations_experimental import convert_annotations as convert_annotations_new fn = Path("TCGA-E9-A1R4-01Z-00-DX1.B04D5A22-8CE5-49FD-8510-14444F46894D.geojson") import numpy as np @@ -19,6 +18,39 @@ print(f"Time to load annotations (dlup v0.7.0): {(time.time() - start_time):.5f}s") + +# TODO: Temporary here +def convert_annotations_new( + annotations, + region_size: tuple[int, int], + default_value: int = 0, + index_map: dict[str, int] = None, +): + mask = np.empty(region_size, dtype=np.int32) + mask[:] = default_value + for curr_annotation in annotations: + holes_mask = None + index_value = index_map[curr_annotation.label] + original_values = None + interiors = [(np.asarray(pi)).round().astype(np.int32) for pi in curr_annotation.get_interiors()] + if interiors != []: + original_values = mask.copy() + holes_mask = np.zeros(region_size, dtype=np.int32) + # Get a mask where the holes are + cv2.fillPoly(holes_mask, interiors, [1]) + + cv2.fillPoly( + mask, + [(np.asarray(curr_annotation.get_exterior())).round().astype(np.int32)], + [index_value], + ) + if interiors != []: + # TODO: This is a bit hacky to ignore mypy here, but I don't know how to fix it. + mask = np.where(holes_mask == 1, original_values, mask) # type: ignore + return mask + + + # Bounding box: bbox = annotations.bounding_box print(f"Bounding box: {bbox}") diff --git a/src/geometry.cpp b/src/geometry.cpp index 26df8aa2..09ac8b03 100644 --- a/src/geometry.cpp +++ b/src/geometry.cpp @@ -214,6 +214,7 @@ std::vector> Polygon::intersection(const BoostPolygon & BoostPolygon validPolygon = GeometryUtils::makeValid(*polygon); std::vector intersectionResult; + // intersectionResult.reserve(validPolygon.inners().size() * 5); bg::intersection(validPolygon, otherPolygon, intersectionResult); std::vector> result; @@ -303,6 +304,9 @@ cv::Mat generateMaskFromAnnotations(const std::vector> std::vector exterior_cv_points; std::vector> interiors_cv_points; + // exterior_cv_points.reserve(100000); + // interiors_cv_points.reserve(100000); + for (const auto &annotation : annotations) { int index_value = index_map.at(annotation->getField("label")->cast()); @@ -352,7 +356,6 @@ cv::Mat generateMaskFromAnnotations(const std::vector> return mask; } - py::array_t maskToPyArray(const cv::Mat &mask) { // Ensure the mask is of type CV_32S (int type) if (mask.type() != CV_32S) { @@ -508,6 +511,8 @@ class GeometryContainer { return py_polygons; } + void sortPolygons(const py::function &keyFunc, bool reverse); + void removePolygon(const PolygonPtr &p); void removePolygon(size_t index); @@ -526,24 +531,45 @@ class GeometryContainer { const std::pair &size); // TODO: Rethink the need for this function. - void reindexPolygons(const std::map &indexMap) { - for (auto &polygon : polygons) { - std::optional label_opt = polygon->getField("label"); - - if (label_opt.has_value()) { - std::string label = label_opt->cast(); - auto it = indexMap.find(label); - if (it != indexMap.end()) { - polygon->setField("index", py::int_(it->second)); - } else { - throw std::invalid_argument("Label '" + label + "' not found in indexMap"); - } + void reindexPolygons(const std::map &indexMap); +}; + +void GeometryContainer::reindexPolygons(const std::map &indexMap) { + for (auto &polygon : polygons) { + std::optional label_opt = polygon->getField("label"); + + if (label_opt.has_value()) { + std::string label = label_opt->cast(); + auto it = indexMap.find(label); + if (it != indexMap.end()) { + polygon->setField("index", py::int_(it->second)); } else { - throw std::invalid_argument("Polygon does not have a value for the 'label' field"); + throw std::invalid_argument("Label '" + label + "' not found in indexMap"); } + } else { + throw std::invalid_argument("Polygon does not have a value for the 'label' field"); } } -}; +} + +void GeometryContainer::sortPolygons(const py::function &keyFunc, bool reverse) { + std::sort(polygons.begin(), polygons.end(), [&keyFunc, reverse](const PolygonPtr &a, const PolygonPtr &b) { + py::object keyA = keyFunc(a); + py::object keyB = keyFunc(b); + + if (py::isinstance(keyA) && py::isinstance(keyB)) { + return reverse ? (keyA.cast() > keyB.cast()) + : (keyA.cast() < keyB.cast()); + } else if (py::isinstance(keyA) && py::isinstance(keyB)) { + return reverse ? (keyA.cast() > keyB.cast()) : (keyA.cast() < keyB.cast()); + } else if (py::isinstance(keyA) && py::isinstance(keyB)) { + return reverse ? (keyA.cast() > keyB.cast()) : (keyA.cast() < keyB.cast()); + } else { + throw std::invalid_argument("Unsupported key type for sorting."); + } + }); + rtreeWrapper.invalidate(); +} void GeometryContainer::scale(double scaling) { for (auto &point : points) { @@ -633,9 +659,14 @@ AnnotationRegion GeometryContainer::readRegion(const std::pair & std::sort(results.begin(), results.end(), [](const auto &a, const auto &b) { return a.second < b.second; }); + // const size_t estimatedSize = 10000; // Estimated size + std::vector> intersectedPolygons; std::vector> intersectedPoints; + // intersectedPolygons.reserve(estimatedSize); + // intersectedPoints.reserve(estimatedSize); + for (const auto &result : results) { size_t index = result.second; if (index < polygons.size()) { @@ -731,6 +762,7 @@ PYBIND11_MODULE(_geometry, m) { .def("remove_polygon", py::overload_cast(&GeometryContainer::removePolygon), "Remove a polygon by its index") .def("reindex_polygons", &GeometryContainer::reindexPolygons) + .def("sort_polygons", &GeometryContainer::sortPolygons, "Sort polygons by a custom key function") // Overload remove_point to handle both object and index .def("remove_point", py::overload_cast &>(&GeometryContainer::removePoint), diff --git a/test_performance.py b/test_performance.py index 963df841..639eb926 100644 --- a/test_performance.py +++ b/test_performance.py @@ -33,18 +33,25 @@ polygons = [ DlupPolygon(dg.Polygon([(0, 0), (0, 3), (3, 3), (3, 0)], [])), DlupPolygon(dg.Polygon([(2, 2), (2, 5), (5, 5), (5, 2)], [])), - DlupPolygon(dg.Polygon([(4, 4), (4, 7), (7, 7), (7, 4)], [])), + DlupPolygon(dg.Polygon([(4, 2), (4, 7), (7, 7), (7, 4)], [])), DlupPolygon(dg.Polygon([(6, 6), (6, 9), (9, 9), (9, 6)], [])), ] +for idx, polygon in enumerate(polygons): + polygon.label = str(idx) + + +print("Areas: ", [poly.area for poly in polygons]) + points = [DlupPoint(1, 1, label="taart"), DlupPoint(4, 4, index=1), DlupPoint(6, 6), DlupPoint(8, 8)] pointers = [] point_pointers = [] print("Looping over the polygons") -for poly in polygons: - poly.set_field("label", "test") +for idx, poly in enumerate(polygons): + if idx == 0: + poly.set_field("label", "sample0") pointers.append(poly.pointer_id) for point in points: @@ -62,7 +69,7 @@ container.add_polygon(polygon) # So my polygons are now this: -print("Polygons") +print("Polygons:") print(container.polygons) # # Remove the one with label 'taart' @@ -74,6 +81,8 @@ container.add_point(point) second_point_pointers.append(point.pointer_id) + + print(f"Points: {container.points}: {len(container.points)}") # Let's remove a point assert container.rtree_invalidated == False @@ -100,9 +109,10 @@ third_pointers = [] third_point_pointers = [] print(container.polygons) -for sample in container.polygons: +for idx, sample in enumerate(container.polygons): third_pointers.append(sample.pointer_id) - assert sample.get_field("label") == "test" + if idx == 0: + assert sample.get_field("label") == "sample0" # print(sample, sample.get_fields(), sample.get_pointer_id()) # @@ -112,68 +122,40 @@ assert pointers == third_pointers -print("Getting regions\n====================") regions = container.read_region((2, 2), 1.0, (10, 10)) polygon_shift = 0 point_counter = 0 -for region in regions: - if isinstance(region, DlupPolygon): - polygon_shift += 1 - assert region.get_field("label") == "test" - else: - assert isinstance(region, DlupPoint) - print(region, points[point_counter]) - point_counter += 1 +for region in regions.polygons: + assert isinstance(region, DlupPolygon) + assert region.get_field("label") == "test" + +for region in regions.points: + assert isinstance(region, DlupPoint) + print(region, points[point_counter]) # Let is try to get a non-existing field assert polygons[0].get_field("non_existing") is None +print("Before sorting\n") +# Let's sort the polygons, but lets first get the typeS: +for sample in container.polygons: + print(sample.area, sample.pointer_id) -import dlup - -print(dlup.geometry.__file__) - -import time - -fn = Path("TCGA-E9-A1R4-01Z-00-DX1.B04D5A22-8CE5-49FD-8510-14444F46894D.geojson") - -start_time = time.time() -annotations = WsiAnnotations.from_geojson(fn, sorting="NONE") - -print(f"Time to load annotations (dlup v0.7.0): {(time.time() - start_time):.5f}s") - - -# Bounding box: -bbox = annotations.bounding_box -print(f"Bounding box: {bbox}") - -# Let's get the region -start_time = time.time() -region = annotations.read_region((0, 0), 1.0, bbox[1]) -dlup_reg = time.time() - start_time -print(f"Time to read region (dlup v0.7.0): {dlup_reg:.5f}s") -# print(f"Number of polygons in region (dlup v0.7.0): {len(region)}") -print() - -start_time = time.time() -annotations2 = WsiAnnotationsExperimental.from_geojson(fn) -print(f"Time to load annotations (dlup v0.8.0.beta): {(time.time() - start_time):.5f}s") - -start_time = time.time() -region2 = annotations2.read_region((0, 0), 1.0, bbox[1]) -# Let's get all label names -labels0 = set([_.label for _ in region2]) - -new_reg = time.time() - start_time -print(f"Time to read (dlup v0.8.0.beta): {new_reg:.5f}s") -print(f"Number of polygons in region (dlup v0.8.0.beta): {len(region2)}") +print("After sorting\n") +container.sort_polygons(lambda x: x.area, False) +for sample in container.polygons: + print(sample.area, sample.pointer_id) -print(f"Factor faster: {((dlup_reg / new_reg)):.3f}") -labels1 = set([_.label for _ in region2]) +container.sort_polygons(lambda x: x.area, True) +for sample in container.polygons: + print(sample.area, sample.pointer_id) -assert labels0 == labels1 != [] +container.rebuild_rtree() +print(container.polygons) +container.sort_polygons(lambda x: x.get_field("label"), False) +print(container.polygons) +for sample in container.polygons: + print(sample.label, sample.pointer_id) -# print(annotations2._layers.polygons[:2]) -# print(annotations2._layers.polygons[0].label)