From cde22212cf50a01f4fe801f7390513fd8985eda2 Mon Sep 17 00:00:00 2001 From: Frank Liu Date: Tue, 3 Sep 2024 14:25:43 -0700 Subject: [PATCH] [api] Adds mask generation task for sam2 model (#3450) --- api/src/main/java/ai/djl/Application.java | 9 +++++ .../java/ai/djl/pytorch/zoo/PtModelZoo.java | 6 ++- .../sam2-hiera-large-gpu/metadata.json | 37 +++++++++++++++++++ .../pytorch/sam2-hiera-large/metadata.json | 37 +++++++++++++++++++ .../pytorch/sam2-hiera-tiny-gpu/metadata.json | 37 +++++++++++++++++++ .../djl/pytorch/sam2-hiera-tiny/metadata.json | 37 +++++++++++++++++++ .../sam2-hiera-large-gpu/metadata.json | 37 +++++++++++++++++++ .../pytorch/sam2-hiera-tiny-gpu/metadata.json | 37 +++++++++++++++++++ .../inference/cv/SegmentAnything2.java | 2 +- 9 files changed, 236 insertions(+), 3 deletions(-) create mode 100644 engines/pytorch/pytorch-model-zoo/src/test/resources/mlrepo/model/cv/mask_generation/ai/djl/pytorch/sam2-hiera-large-gpu/metadata.json create mode 100644 engines/pytorch/pytorch-model-zoo/src/test/resources/mlrepo/model/cv/mask_generation/ai/djl/pytorch/sam2-hiera-large/metadata.json create mode 100644 engines/pytorch/pytorch-model-zoo/src/test/resources/mlrepo/model/cv/mask_generation/ai/djl/pytorch/sam2-hiera-tiny-gpu/metadata.json create mode 100644 engines/pytorch/pytorch-model-zoo/src/test/resources/mlrepo/model/cv/mask_generation/ai/djl/pytorch/sam2-hiera-tiny/metadata.json create mode 100644 engines/pytorch/pytorch-model-zoo/src/test/resources/mlrepo/model/cv/object_detection/ai/djl/pytorch/sam2-hiera-large-gpu/metadata.json create mode 100644 engines/pytorch/pytorch-model-zoo/src/test/resources/mlrepo/model/cv/object_detection/ai/djl/pytorch/sam2-hiera-tiny-gpu/metadata.json diff --git a/api/src/main/java/ai/djl/Application.java b/api/src/main/java/ai/djl/Application.java index 13ac1882b86..0f3d49d5df0 100644 --- a/api/src/main/java/ai/djl/Application.java +++ b/api/src/main/java/ai/djl/Application.java @@ -62,6 +62,9 @@ public static Application of(String path) { case "cv/instance_segmentation": case "instance_segmentation": return CV.INSTANCE_SEGMENTATION; + case "cv/mask_generation": + case "mask_generation": + return CV.MASK_GENERATION; case "cv/pose_estimation": case "pose_estimation": return CV.POSE_ESTIMATION; @@ -196,6 +199,12 @@ public interface CV { */ Application INSTANCE_SEGMENTATION = new Application("cv/instance_segmentation"); + /** + * An application that generates masks that identify a specific object or region of interest + * in a given image. + */ + Application MASK_GENERATION = new Application("cv/mask_generation"); + /** * An application that accepts an image of a single person and returns the {@link * ai.djl.modality.cv.output.Joints} locations of the person. diff --git a/engines/pytorch/pytorch-model-zoo/src/main/java/ai/djl/pytorch/zoo/PtModelZoo.java b/engines/pytorch/pytorch-model-zoo/src/main/java/ai/djl/pytorch/zoo/PtModelZoo.java index 0ed61eff528..b92fea429b3 100644 --- a/engines/pytorch/pytorch-model-zoo/src/main/java/ai/djl/pytorch/zoo/PtModelZoo.java +++ b/engines/pytorch/pytorch-model-zoo/src/main/java/ai/djl/pytorch/zoo/PtModelZoo.java @@ -43,8 +43,10 @@ public class PtModelZoo extends ModelZoo { addModel( REPOSITORY.model(CV.IMAGE_CLASSIFICATION, GROUP_ID, "resnet18_embedding", "0.0.1")); addModel(REPOSITORY.model(CV.INSTANCE_SEGMENTATION, GROUP_ID, "yolov8n-seg", "0.0.1")); - addModel(REPOSITORY.model(CV.OBJECT_DETECTION, GROUP_ID, "sam2-hiera-tiny", "0.0.1")); - addModel(REPOSITORY.model(CV.OBJECT_DETECTION, GROUP_ID, "sam2-hiera-large", "0.0.1")); + addModel(REPOSITORY.model(CV.MASK_GENERATION, GROUP_ID, "sam2-hiera-tiny", "0.0.1")); + addModel(REPOSITORY.model(CV.MASK_GENERATION, GROUP_ID, "sam2-hiera-tiny-gpu", "0.0.1")); + addModel(REPOSITORY.model(CV.MASK_GENERATION, GROUP_ID, "sam2-hiera-large", "0.0.1")); + addModel(REPOSITORY.model(CV.MASK_GENERATION, GROUP_ID, "sam2-hiera-large-gpu", "0.0.1")); addModel(REPOSITORY.model(CV.OBJECT_DETECTION, GROUP_ID, "ssd", "0.0.1")); addModel(REPOSITORY.model(CV.OBJECT_DETECTION, GROUP_ID, "yolov5s", "0.0.1")); addModel(REPOSITORY.model(CV.OBJECT_DETECTION, GROUP_ID, "yolov8n", "0.0.1")); diff --git a/engines/pytorch/pytorch-model-zoo/src/test/resources/mlrepo/model/cv/mask_generation/ai/djl/pytorch/sam2-hiera-large-gpu/metadata.json b/engines/pytorch/pytorch-model-zoo/src/test/resources/mlrepo/model/cv/mask_generation/ai/djl/pytorch/sam2-hiera-large-gpu/metadata.json new file mode 100644 index 00000000000..6fe7c7e3b3d --- /dev/null +++ b/engines/pytorch/pytorch-model-zoo/src/test/resources/mlrepo/model/cv/mask_generation/ai/djl/pytorch/sam2-hiera-large-gpu/metadata.json @@ -0,0 +1,37 @@ +{ + "metadataVersion": "0.2", + "resourceType": "model", + "application": "cv/mask_generation", + "groupId": "ai.djl.pytorch", + "artifactId": "sam2-hiera-large-gpu", + "name": "Mask generation", + "description": "Segment Anything in Images", + "website": "http://www.djl.ai/engines/pytorch/model-zoo", + "licenses": { + "license": { + "name": "The Apache License, Version 2.0", + "url": "https://www.apache.org/licenses/LICENSE-2.0" + } + }, + "artifacts": [ + { + "version": "0.0.1", + "snapshot": false, + "name": "sam2-hiera-large-gpu", + "arguments": { + "translatorFactory": "ai.djl.modality.cv.translator.Sam2TranslatorFactory" + }, + "options": { + "mapLocation": "true" + }, + "files": { + "model": { + "uri": "0.0.1/sam2-hiera-large-gpu.zip", + "name": "", + "sha1Hash": "0fb0399ca091edf54378348b7b99777bf8776603", + "size": 834565732 + } + } + } + ] +} diff --git a/engines/pytorch/pytorch-model-zoo/src/test/resources/mlrepo/model/cv/mask_generation/ai/djl/pytorch/sam2-hiera-large/metadata.json b/engines/pytorch/pytorch-model-zoo/src/test/resources/mlrepo/model/cv/mask_generation/ai/djl/pytorch/sam2-hiera-large/metadata.json new file mode 100644 index 00000000000..b2fd37e037f --- /dev/null +++ b/engines/pytorch/pytorch-model-zoo/src/test/resources/mlrepo/model/cv/mask_generation/ai/djl/pytorch/sam2-hiera-large/metadata.json @@ -0,0 +1,37 @@ +{ + "metadataVersion": "0.2", + "resourceType": "model", + "application": "cv/mask_generation", + "groupId": "ai.djl.pytorch", + "artifactId": "sam2-hiera-large", + "name": "Mask generation", + "description": "Segment Anything in Images", + "website": "http://www.djl.ai/engines/pytorch/model-zoo", + "licenses": { + "license": { + "name": "The Apache License, Version 2.0", + "url": "https://www.apache.org/licenses/LICENSE-2.0" + } + }, + "artifacts": [ + { + "version": "0.0.1", + "snapshot": false, + "name": "sam2-hiera-large", + "arguments": { + "translatorFactory": "ai.djl.modality.cv.translator.Sam2TranslatorFactory" + }, + "options": { + "mapLocation": "true" + }, + "files": { + "model": { + "uri": "0.0.1/sam2-hiera-large.zip", + "name": "", + "sha1Hash": "5688c31f52ae086e0c17dd235f4047245dc42eb3", + "size": 834572454 + } + } + } + ] +} diff --git a/engines/pytorch/pytorch-model-zoo/src/test/resources/mlrepo/model/cv/mask_generation/ai/djl/pytorch/sam2-hiera-tiny-gpu/metadata.json b/engines/pytorch/pytorch-model-zoo/src/test/resources/mlrepo/model/cv/mask_generation/ai/djl/pytorch/sam2-hiera-tiny-gpu/metadata.json new file mode 100644 index 00000000000..7b1b11df363 --- /dev/null +++ b/engines/pytorch/pytorch-model-zoo/src/test/resources/mlrepo/model/cv/mask_generation/ai/djl/pytorch/sam2-hiera-tiny-gpu/metadata.json @@ -0,0 +1,37 @@ +{ + "metadataVersion": "0.2", + "resourceType": "model", + "application": "cv/mask_generation", + "groupId": "ai.djl.pytorch", + "artifactId": "sam2-hiera-tiny-gpu", + "name": "Mask generation", + "description": "Segment Anything in Images", + "website": "http://www.djl.ai/engines/pytorch/model-zoo", + "licenses": { + "license": { + "name": "The Apache License, Version 2.0", + "url": "https://www.apache.org/licenses/LICENSE-2.0" + } + }, + "artifacts": [ + { + "version": "0.0.1", + "snapshot": false, + "name": "sam2-hiera-tiny-gpu", + "arguments": { + "translatorFactory": "ai.djl.modality.cv.translator.Sam2TranslatorFactory" + }, + "options": { + "mapLocation": "true" + }, + "files": { + "model": { + "uri": "0.0.1/sam2-hiera-tiny-gpu.zip", + "name": "", + "sha1Hash": "41440632b2f2d481282b8cd7004d37cc3c6f9a16", + "size": 145037570 + } + } + } + ] +} diff --git a/engines/pytorch/pytorch-model-zoo/src/test/resources/mlrepo/model/cv/mask_generation/ai/djl/pytorch/sam2-hiera-tiny/metadata.json b/engines/pytorch/pytorch-model-zoo/src/test/resources/mlrepo/model/cv/mask_generation/ai/djl/pytorch/sam2-hiera-tiny/metadata.json new file mode 100644 index 00000000000..728639314bf --- /dev/null +++ b/engines/pytorch/pytorch-model-zoo/src/test/resources/mlrepo/model/cv/mask_generation/ai/djl/pytorch/sam2-hiera-tiny/metadata.json @@ -0,0 +1,37 @@ +{ + "metadataVersion": "0.2", + "resourceType": "model", + "application": "cv/mask_generation", + "groupId": "ai.djl.pytorch", + "artifactId": "sam2-hiera-tiny", + "name": "Mask generation", + "description": "Segment Anything in Images", + "website": "http://www.djl.ai/engines/pytorch/model-zoo", + "licenses": { + "license": { + "name": "The Apache License, Version 2.0", + "url": "https://www.apache.org/licenses/LICENSE-2.0" + } + }, + "artifacts": [ + { + "version": "0.0.1", + "snapshot": false, + "name": "sam2-hiera-tiny", + "arguments": { + "translatorFactory": "ai.djl.modality.cv.translator.Sam2TranslatorFactory" + }, + "options": { + "mapLocation": "true" + }, + "files": { + "model": { + "uri": "0.0.1/sam2-hiera-tiny.zip", + "name": "", + "sha1Hash": "c1eb858f0e8d53c7ec7c94434cd39b69d61db449", + "size": 145062696 + } + } + } + ] +} diff --git a/engines/pytorch/pytorch-model-zoo/src/test/resources/mlrepo/model/cv/object_detection/ai/djl/pytorch/sam2-hiera-large-gpu/metadata.json b/engines/pytorch/pytorch-model-zoo/src/test/resources/mlrepo/model/cv/object_detection/ai/djl/pytorch/sam2-hiera-large-gpu/metadata.json new file mode 100644 index 00000000000..a3de5022568 --- /dev/null +++ b/engines/pytorch/pytorch-model-zoo/src/test/resources/mlrepo/model/cv/object_detection/ai/djl/pytorch/sam2-hiera-large-gpu/metadata.json @@ -0,0 +1,37 @@ +{ + "metadataVersion": "0.2", + "resourceType": "model", + "application": "cv/object_detection", + "groupId": "ai.djl.pytorch", + "artifactId": "sam2-hiera-large-gpu", + "name": "Mask generation", + "description": "Segment Anything in Images", + "website": "http://www.djl.ai/engines/pytorch/model-zoo", + "licenses": { + "license": { + "name": "The Apache License, Version 2.0", + "url": "https://www.apache.org/licenses/LICENSE-2.0" + } + }, + "artifacts": [ + { + "version": "0.0.1", + "snapshot": false, + "name": "sam2-hiera-large-gpu", + "arguments": { + "translatorFactory": "ai.djl.modality.cv.translator.Sam2TranslatorFactory" + }, + "options": { + "mapLocation": "true" + }, + "files": { + "model": { + "uri": "0.0.1/sam2-hiera-large-gpu.zip", + "name": "", + "sha1Hash": "0fb0399ca091edf54378348b7b99777bf8776603", + "size": 834565732 + } + } + } + ] +} diff --git a/engines/pytorch/pytorch-model-zoo/src/test/resources/mlrepo/model/cv/object_detection/ai/djl/pytorch/sam2-hiera-tiny-gpu/metadata.json b/engines/pytorch/pytorch-model-zoo/src/test/resources/mlrepo/model/cv/object_detection/ai/djl/pytorch/sam2-hiera-tiny-gpu/metadata.json new file mode 100644 index 00000000000..11cc455999b --- /dev/null +++ b/engines/pytorch/pytorch-model-zoo/src/test/resources/mlrepo/model/cv/object_detection/ai/djl/pytorch/sam2-hiera-tiny-gpu/metadata.json @@ -0,0 +1,37 @@ +{ + "metadataVersion": "0.2", + "resourceType": "model", + "application": "cv/object_detection", + "groupId": "ai.djl.pytorch", + "artifactId": "sam2-hiera-tiny-gpu", + "name": "Mask generation", + "description": "Segment Anything in Images", + "website": "http://www.djl.ai/engines/pytorch/model-zoo", + "licenses": { + "license": { + "name": "The Apache License, Version 2.0", + "url": "https://www.apache.org/licenses/LICENSE-2.0" + } + }, + "artifacts": [ + { + "version": "0.0.1", + "snapshot": false, + "name": "sam2-hiera-tiny-gpu", + "arguments": { + "translatorFactory": "ai.djl.modality.cv.translator.Sam2TranslatorFactory" + }, + "options": { + "mapLocation": "true" + }, + "files": { + "model": { + "uri": "0.0.1/sam2-hiera-tiny-gpu.zip", + "name": "", + "sha1Hash": "41440632b2f2d481282b8cd7004d37cc3c6f9a16", + "size": 145037570 + } + } + } + ] +} diff --git a/examples/src/main/java/ai/djl/examples/inference/cv/SegmentAnything2.java b/examples/src/main/java/ai/djl/examples/inference/cv/SegmentAnything2.java index 7b9fb0204b6..8fff3a94127 100644 --- a/examples/src/main/java/ai/djl/examples/inference/cv/SegmentAnything2.java +++ b/examples/src/main/java/ai/djl/examples/inference/cv/SegmentAnything2.java @@ -53,7 +53,7 @@ public static DetectedObjects predict() throws IOException, ModelException, Tran .setTypes(Sam2Input.class, DetectedObjects.class) .optModelUrls("djl://ai.djl.pytorch/sam2-hiera-tiny") .optEngine("PyTorch") - .optDevice(Device.cpu()) // this model only works on CPU + .optDevice(Device.cpu()) // use sam2-hiera-tiny-gpu for GPU .optTranslator(new Sam2Translator()) .optProgress(new ProgressBar()) .build();