Skip to content

Commit

Permalink
Adding sorting capabilities
Browse files Browse the repository at this point in the history
  • Loading branch information
jonasteuwen committed Aug 13, 2024
1 parent 4943171 commit 8d2b241
Show file tree
Hide file tree
Showing 4 changed files with 142 additions and 103 deletions.
51 changes: 22 additions & 29 deletions dlup/annotations_experimental.py
Original file line number Diff line number Diff line change
Expand Up @@ -306,33 +306,26 @@ def filter_polygons(self, label: str) -> None:
self._layers.remove_polygon(polygon)


# TODO: Temporary here
def convert_annotations(
annotations,
region_size: tuple[int, int],
default_value: int = 0,
index_map: dict[str, int] = None,
):
mask = np.empty(region_size, dtype=np.int32)
mask[:] = default_value
for curr_annotation in annotations:
holes_mask = None
index_value = index_map[curr_annotation.label]
original_values = None
interiors = [(np.asarray(pi)).round().astype(np.int32) for pi in curr_annotation.get_interiors()]
if interiors != []:
original_values = mask.copy()
holes_mask = np.zeros(region_size, dtype=np.int32)
# Get a mask where the holes are
cv2.fillPoly(holes_mask, interiors, [1])

cv2.fillPoly(
mask,
[(np.asarray(curr_annotation.get_exterior())).round().astype(np.int32)],
[index_value],
)
if interiors != []:
# TODO: This is a bit hacky to ignore mypy here, but I don't know how to fix it.
mask = np.where(holes_mask == 1, original_values, mask) # type: ignore
return mask
def sort_polygons(self, key: callable, reverse: bool = False) -> None:
"""Sort the polygons in-place.
Parameters
----------
key : callable
The key to sort the polygons on, this has to be a lambda function or similar.
For instance `lambda polygon: polygon.area` will sort the polygons on the area, or
`lambda polygon: polygon.get_field(field_name)` will sort the polygons on that field.
reverse : bool
Whether to sort in reverse order.
Note
----
This will internally invalidate the R-tree. You could rebuild this manually using `.rebuild_rtree()`, or
have the function itself do this on-demand (typically when you invoke a `.read_region()`)
Returns
-------
None
"""
self._layers.sort_polygons(key, reverse)
34 changes: 33 additions & 1 deletion gen_polygons.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
from dlup.annotations import WsiAnnotations
from dlup.annotations_experimental import WsiAnnotationsExperimental as WsiAnnotations2
from dlup.data.transforms import convert_annotations
from dlup.annotations_experimental import convert_annotations as convert_annotations_new

fn = Path("TCGA-E9-A1R4-01Z-00-DX1.B04D5A22-8CE5-49FD-8510-14444F46894D.geojson")
import numpy as np
Expand All @@ -19,6 +18,39 @@
print(f"Time to load annotations (dlup v0.7.0): {(time.time() - start_time):.5f}s")



# TODO: Temporary here
def convert_annotations_new(
annotations,
region_size: tuple[int, int],
default_value: int = 0,
index_map: dict[str, int] = None,
):
mask = np.empty(region_size, dtype=np.int32)
mask[:] = default_value
for curr_annotation in annotations:
holes_mask = None
index_value = index_map[curr_annotation.label]
original_values = None
interiors = [(np.asarray(pi)).round().astype(np.int32) for pi in curr_annotation.get_interiors()]
if interiors != []:
original_values = mask.copy()
holes_mask = np.zeros(region_size, dtype=np.int32)
# Get a mask where the holes are
cv2.fillPoly(holes_mask, interiors, [1])

cv2.fillPoly(
mask,
[(np.asarray(curr_annotation.get_exterior())).round().astype(np.int32)],
[index_value],
)
if interiors != []:
# TODO: This is a bit hacky to ignore mypy here, but I don't know how to fix it.
mask = np.where(holes_mask == 1, original_values, mask) # type: ignore
return mask



# Bounding box:
bbox = annotations.bounding_box
print(f"Bounding box: {bbox}")
Expand Down
62 changes: 47 additions & 15 deletions src/geometry.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,7 @@ std::vector<std::shared_ptr<Polygon>> Polygon::intersection(const BoostPolygon &
BoostPolygon validPolygon = GeometryUtils::makeValid(*polygon);

std::vector<BoostPolygon> intersectionResult;
// intersectionResult.reserve(validPolygon.inners().size() * 5);
bg::intersection(validPolygon, otherPolygon, intersectionResult);

std::vector<std::shared_ptr<Polygon>> result;
Expand Down Expand Up @@ -303,6 +304,9 @@ cv::Mat generateMaskFromAnnotations(const std::vector<std::shared_ptr<Polygon>>
std::vector<cv::Point> exterior_cv_points;
std::vector<std::vector<cv::Point>> interiors_cv_points;

// exterior_cv_points.reserve(100000);
// interiors_cv_points.reserve(100000);

for (const auto &annotation : annotations) {
int index_value = index_map.at(annotation->getField("label")->cast<std::string>());

Expand Down Expand Up @@ -352,7 +356,6 @@ cv::Mat generateMaskFromAnnotations(const std::vector<std::shared_ptr<Polygon>>
return mask;
}


py::array_t<int> maskToPyArray(const cv::Mat &mask) {
// Ensure the mask is of type CV_32S (int type)
if (mask.type() != CV_32S) {
Expand Down Expand Up @@ -508,6 +511,8 @@ class GeometryContainer {
return py_polygons;
}

void sortPolygons(const py::function &keyFunc, bool reverse);

void removePolygon(const PolygonPtr &p);
void removePolygon(size_t index);

Expand All @@ -526,24 +531,45 @@ class GeometryContainer {
const std::pair<double, double> &size);

// TODO: Rethink the need for this function.
void reindexPolygons(const std::map<std::string, int> &indexMap) {
for (auto &polygon : polygons) {
std::optional<py::object> label_opt = polygon->getField("label");

if (label_opt.has_value()) {
std::string label = label_opt->cast<std::string>();
auto it = indexMap.find(label);
if (it != indexMap.end()) {
polygon->setField("index", py::int_(it->second));
} else {
throw std::invalid_argument("Label '" + label + "' not found in indexMap");
}
void reindexPolygons(const std::map<std::string, int> &indexMap);
};

void GeometryContainer::reindexPolygons(const std::map<std::string, int> &indexMap) {
for (auto &polygon : polygons) {
std::optional<py::object> label_opt = polygon->getField("label");

if (label_opt.has_value()) {
std::string label = label_opt->cast<std::string>();
auto it = indexMap.find(label);
if (it != indexMap.end()) {
polygon->setField("index", py::int_(it->second));
} else {
throw std::invalid_argument("Polygon does not have a value for the 'label' field");
throw std::invalid_argument("Label '" + label + "' not found in indexMap");
}
} else {
throw std::invalid_argument("Polygon does not have a value for the 'label' field");
}
}
};
}

void GeometryContainer::sortPolygons(const py::function &keyFunc, bool reverse) {
std::sort(polygons.begin(), polygons.end(), [&keyFunc, reverse](const PolygonPtr &a, const PolygonPtr &b) {
py::object keyA = keyFunc(a);
py::object keyB = keyFunc(b);

if (py::isinstance<py::str>(keyA) && py::isinstance<py::str>(keyB)) {
return reverse ? (keyA.cast<std::string>() > keyB.cast<std::string>())
: (keyA.cast<std::string>() < keyB.cast<std::string>());
} else if (py::isinstance<py::float_>(keyA) && py::isinstance<py::float_>(keyB)) {
return reverse ? (keyA.cast<double>() > keyB.cast<double>()) : (keyA.cast<double>() < keyB.cast<double>());
} else if (py::isinstance<py::int_>(keyA) && py::isinstance<py::int_>(keyB)) {
return reverse ? (keyA.cast<int>() > keyB.cast<int>()) : (keyA.cast<int>() < keyB.cast<int>());
} else {
throw std::invalid_argument("Unsupported key type for sorting.");
}
});
rtreeWrapper.invalidate();
}

void GeometryContainer::scale(double scaling) {
for (auto &point : points) {
Expand Down Expand Up @@ -633,9 +659,14 @@ AnnotationRegion GeometryContainer::readRegion(const std::pair<double, double> &

std::sort(results.begin(), results.end(), [](const auto &a, const auto &b) { return a.second < b.second; });

// const size_t estimatedSize = 10000; // Estimated size

std::vector<std::shared_ptr<Polygon>> intersectedPolygons;
std::vector<std::shared_ptr<Point>> intersectedPoints;

// intersectedPolygons.reserve(estimatedSize);
// intersectedPoints.reserve(estimatedSize);

for (const auto &result : results) {
size_t index = result.second;
if (index < polygons.size()) {
Expand Down Expand Up @@ -731,6 +762,7 @@ PYBIND11_MODULE(_geometry, m) {
.def("remove_polygon", py::overload_cast<size_t>(&GeometryContainer::removePolygon),
"Remove a polygon by its index")
.def("reindex_polygons", &GeometryContainer::reindexPolygons)
.def("sort_polygons", &GeometryContainer::sortPolygons, "Sort polygons by a custom key function")

// Overload remove_point to handle both object and index
.def("remove_point", py::overload_cast<const std::shared_ptr<Point> &>(&GeometryContainer::removePoint),
Expand Down
98 changes: 40 additions & 58 deletions test_performance.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,18 +33,25 @@
polygons = [
DlupPolygon(dg.Polygon([(0, 0), (0, 3), (3, 3), (3, 0)], [])),
DlupPolygon(dg.Polygon([(2, 2), (2, 5), (5, 5), (5, 2)], [])),
DlupPolygon(dg.Polygon([(4, 4), (4, 7), (7, 7), (7, 4)], [])),
DlupPolygon(dg.Polygon([(4, 2), (4, 7), (7, 7), (7, 4)], [])),
DlupPolygon(dg.Polygon([(6, 6), (6, 9), (9, 9), (9, 6)], [])),
]

for idx, polygon in enumerate(polygons):
polygon.label = str(idx)


print("Areas: ", [poly.area for poly in polygons])

points = [DlupPoint(1, 1, label="taart"), DlupPoint(4, 4, index=1), DlupPoint(6, 6), DlupPoint(8, 8)]

pointers = []
point_pointers = []

print("Looping over the polygons")
for poly in polygons:
poly.set_field("label", "test")
for idx, poly in enumerate(polygons):
if idx == 0:
poly.set_field("label", "sample0")
pointers.append(poly.pointer_id)

for point in points:
Expand All @@ -62,7 +69,7 @@
container.add_polygon(polygon)

# So my polygons are now this:
print("Polygons")
print("Polygons:")
print(container.polygons)

# # Remove the one with label 'taart'
Expand All @@ -74,6 +81,8 @@
container.add_point(point)
second_point_pointers.append(point.pointer_id)



print(f"Points: {container.points}: {len(container.points)}")
# Let's remove a point
assert container.rtree_invalidated == False
Expand All @@ -100,9 +109,10 @@
third_pointers = []
third_point_pointers = []
print(container.polygons)
for sample in container.polygons:
for idx, sample in enumerate(container.polygons):
third_pointers.append(sample.pointer_id)
assert sample.get_field("label") == "test"
if idx == 0:
assert sample.get_field("label") == "sample0"
# print(sample, sample.get_fields(), sample.get_pointer_id())
#

Expand All @@ -112,68 +122,40 @@

assert pointers == third_pointers

print("Getting regions\n====================")

regions = container.read_region((2, 2), 1.0, (10, 10))
polygon_shift = 0
point_counter = 0
for region in regions:
if isinstance(region, DlupPolygon):
polygon_shift += 1
assert region.get_field("label") == "test"
else:
assert isinstance(region, DlupPoint)
print(region, points[point_counter])
point_counter += 1
for region in regions.polygons:
assert isinstance(region, DlupPolygon)
assert region.get_field("label") == "test"

for region in regions.points:
assert isinstance(region, DlupPoint)
print(region, points[point_counter])

# Let is try to get a non-existing field
assert polygons[0].get_field("non_existing") is None

print("Before sorting\n")
# Let's sort the polygons, but lets first get the typeS:
for sample in container.polygons:
print(sample.area, sample.pointer_id)

import dlup

print(dlup.geometry.__file__)

import time

fn = Path("TCGA-E9-A1R4-01Z-00-DX1.B04D5A22-8CE5-49FD-8510-14444F46894D.geojson")

start_time = time.time()
annotations = WsiAnnotations.from_geojson(fn, sorting="NONE")

print(f"Time to load annotations (dlup v0.7.0): {(time.time() - start_time):.5f}s")


# Bounding box:
bbox = annotations.bounding_box
print(f"Bounding box: {bbox}")

# Let's get the region
start_time = time.time()
region = annotations.read_region((0, 0), 1.0, bbox[1])
dlup_reg = time.time() - start_time
print(f"Time to read region (dlup v0.7.0): {dlup_reg:.5f}s")
# print(f"Number of polygons in region (dlup v0.7.0): {len(region)}")
print()

start_time = time.time()
annotations2 = WsiAnnotationsExperimental.from_geojson(fn)
print(f"Time to load annotations (dlup v0.8.0.beta): {(time.time() - start_time):.5f}s")

start_time = time.time()
region2 = annotations2.read_region((0, 0), 1.0, bbox[1])
# Let's get all label names
labels0 = set([_.label for _ in region2])

new_reg = time.time() - start_time
print(f"Time to read (dlup v0.8.0.beta): {new_reg:.5f}s")
print(f"Number of polygons in region (dlup v0.8.0.beta): {len(region2)}")
print("After sorting\n")
container.sort_polygons(lambda x: x.area, False)
for sample in container.polygons:
print(sample.area, sample.pointer_id)

print(f"Factor faster: {((dlup_reg / new_reg)):.3f}")
labels1 = set([_.label for _ in region2])
container.sort_polygons(lambda x: x.area, True)
for sample in container.polygons:
print(sample.area, sample.pointer_id)

assert labels0 == labels1 != []
container.rebuild_rtree()

print(container.polygons)
container.sort_polygons(lambda x: x.get_field("label"), False)
print(container.polygons)
for sample in container.polygons:
print(sample.label, sample.pointer_id)

# print(annotations2._layers.polygons[:2])
# print(annotations2._layers.polygons[0].label)

0 comments on commit 8d2b241

Please sign in to comment.