From cde22212cf50a01f4fe801f7390513fd8985eda2 Mon Sep 17 00:00:00 2001
From: Frank Liu <frankfliu2000@gmail.com>
Date: Tue, 3 Sep 2024 14:25:43 -0700
Subject: [PATCH] [api] Adds mask generation task for sam2 model (#3450)

---
 api/src/main/java/ai/djl/Application.java     |  9 +++++
 .../java/ai/djl/pytorch/zoo/PtModelZoo.java   |  6 ++-
 .../sam2-hiera-large-gpu/metadata.json        | 37 +++++++++++++++++++
 .../pytorch/sam2-hiera-large/metadata.json    | 37 +++++++++++++++++++
 .../pytorch/sam2-hiera-tiny-gpu/metadata.json | 37 +++++++++++++++++++
 .../djl/pytorch/sam2-hiera-tiny/metadata.json | 37 +++++++++++++++++++
 .../sam2-hiera-large-gpu/metadata.json        | 37 +++++++++++++++++++
 .../pytorch/sam2-hiera-tiny-gpu/metadata.json | 37 +++++++++++++++++++
 .../inference/cv/SegmentAnything2.java        |  2 +-
 9 files changed, 236 insertions(+), 3 deletions(-)
 create mode 100644 engines/pytorch/pytorch-model-zoo/src/test/resources/mlrepo/model/cv/mask_generation/ai/djl/pytorch/sam2-hiera-large-gpu/metadata.json
 create mode 100644 engines/pytorch/pytorch-model-zoo/src/test/resources/mlrepo/model/cv/mask_generation/ai/djl/pytorch/sam2-hiera-large/metadata.json
 create mode 100644 engines/pytorch/pytorch-model-zoo/src/test/resources/mlrepo/model/cv/mask_generation/ai/djl/pytorch/sam2-hiera-tiny-gpu/metadata.json
 create mode 100644 engines/pytorch/pytorch-model-zoo/src/test/resources/mlrepo/model/cv/mask_generation/ai/djl/pytorch/sam2-hiera-tiny/metadata.json
 create mode 100644 engines/pytorch/pytorch-model-zoo/src/test/resources/mlrepo/model/cv/object_detection/ai/djl/pytorch/sam2-hiera-large-gpu/metadata.json
 create mode 100644 engines/pytorch/pytorch-model-zoo/src/test/resources/mlrepo/model/cv/object_detection/ai/djl/pytorch/sam2-hiera-tiny-gpu/metadata.json

diff --git a/api/src/main/java/ai/djl/Application.java b/api/src/main/java/ai/djl/Application.java
index 13ac1882b86..0f3d49d5df0 100644
--- a/api/src/main/java/ai/djl/Application.java
+++ b/api/src/main/java/ai/djl/Application.java
@@ -62,6 +62,9 @@ public static Application of(String path) {
             case "cv/instance_segmentation":
             case "instance_segmentation":
                 return CV.INSTANCE_SEGMENTATION;
+            case "cv/mask_generation":
+            case "mask_generation":
+                return CV.MASK_GENERATION;
             case "cv/pose_estimation":
             case "pose_estimation":
                 return CV.POSE_ESTIMATION;
@@ -196,6 +199,12 @@ public interface CV {
          */
         Application INSTANCE_SEGMENTATION = new Application("cv/instance_segmentation");
 
+        /**
+         * An application that generates masks that identify a specific object or region of interest
+         * in a given image.
+         */
+        Application MASK_GENERATION = new Application("cv/mask_generation");
+
         /**
          * An application that accepts an image of a single person and returns the {@link
          * ai.djl.modality.cv.output.Joints} locations of the person.
diff --git a/engines/pytorch/pytorch-model-zoo/src/main/java/ai/djl/pytorch/zoo/PtModelZoo.java b/engines/pytorch/pytorch-model-zoo/src/main/java/ai/djl/pytorch/zoo/PtModelZoo.java
index 0ed61eff528..b92fea429b3 100644
--- a/engines/pytorch/pytorch-model-zoo/src/main/java/ai/djl/pytorch/zoo/PtModelZoo.java
+++ b/engines/pytorch/pytorch-model-zoo/src/main/java/ai/djl/pytorch/zoo/PtModelZoo.java
@@ -43,8 +43,10 @@ public class PtModelZoo extends ModelZoo {
         addModel(
                 REPOSITORY.model(CV.IMAGE_CLASSIFICATION, GROUP_ID, "resnet18_embedding", "0.0.1"));
         addModel(REPOSITORY.model(CV.INSTANCE_SEGMENTATION, GROUP_ID, "yolov8n-seg", "0.0.1"));
-        addModel(REPOSITORY.model(CV.OBJECT_DETECTION, GROUP_ID, "sam2-hiera-tiny", "0.0.1"));
-        addModel(REPOSITORY.model(CV.OBJECT_DETECTION, GROUP_ID, "sam2-hiera-large", "0.0.1"));
+        addModel(REPOSITORY.model(CV.MASK_GENERATION, GROUP_ID, "sam2-hiera-tiny", "0.0.1"));
+        addModel(REPOSITORY.model(CV.MASK_GENERATION, GROUP_ID, "sam2-hiera-tiny-gpu", "0.0.1"));
+        addModel(REPOSITORY.model(CV.MASK_GENERATION, GROUP_ID, "sam2-hiera-large", "0.0.1"));
+        addModel(REPOSITORY.model(CV.MASK_GENERATION, GROUP_ID, "sam2-hiera-large-gpu", "0.0.1"));
         addModel(REPOSITORY.model(CV.OBJECT_DETECTION, GROUP_ID, "ssd", "0.0.1"));
         addModel(REPOSITORY.model(CV.OBJECT_DETECTION, GROUP_ID, "yolov5s", "0.0.1"));
         addModel(REPOSITORY.model(CV.OBJECT_DETECTION, GROUP_ID, "yolov8n", "0.0.1"));
diff --git a/engines/pytorch/pytorch-model-zoo/src/test/resources/mlrepo/model/cv/mask_generation/ai/djl/pytorch/sam2-hiera-large-gpu/metadata.json b/engines/pytorch/pytorch-model-zoo/src/test/resources/mlrepo/model/cv/mask_generation/ai/djl/pytorch/sam2-hiera-large-gpu/metadata.json
new file mode 100644
index 00000000000..6fe7c7e3b3d
--- /dev/null
+++ b/engines/pytorch/pytorch-model-zoo/src/test/resources/mlrepo/model/cv/mask_generation/ai/djl/pytorch/sam2-hiera-large-gpu/metadata.json
@@ -0,0 +1,37 @@
+{
+  "metadataVersion": "0.2",
+  "resourceType": "model",
+  "application": "cv/mask_generation",
+  "groupId": "ai.djl.pytorch",
+  "artifactId": "sam2-hiera-large-gpu",
+  "name": "Mask generation",
+  "description": "Segment Anything in Images",
+  "website": "http://www.djl.ai/engines/pytorch/model-zoo",
+  "licenses": {
+    "license": {
+      "name": "The Apache License, Version 2.0",
+      "url": "https://www.apache.org/licenses/LICENSE-2.0"
+    }
+  },
+  "artifacts": [
+    {
+      "version": "0.0.1",
+      "snapshot": false,
+      "name": "sam2-hiera-large-gpu",
+      "arguments": {
+        "translatorFactory": "ai.djl.modality.cv.translator.Sam2TranslatorFactory"
+      },
+      "options": {
+        "mapLocation": "true"
+      },
+      "files": {
+        "model": {
+          "uri": "0.0.1/sam2-hiera-large-gpu.zip",
+          "name": "",
+          "sha1Hash": "0fb0399ca091edf54378348b7b99777bf8776603",
+          "size": 834565732
+        }
+      }
+    }
+  ]
+}
diff --git a/engines/pytorch/pytorch-model-zoo/src/test/resources/mlrepo/model/cv/mask_generation/ai/djl/pytorch/sam2-hiera-large/metadata.json b/engines/pytorch/pytorch-model-zoo/src/test/resources/mlrepo/model/cv/mask_generation/ai/djl/pytorch/sam2-hiera-large/metadata.json
new file mode 100644
index 00000000000..b2fd37e037f
--- /dev/null
+++ b/engines/pytorch/pytorch-model-zoo/src/test/resources/mlrepo/model/cv/mask_generation/ai/djl/pytorch/sam2-hiera-large/metadata.json
@@ -0,0 +1,37 @@
+{
+  "metadataVersion": "0.2",
+  "resourceType": "model",
+  "application": "cv/mask_generation",
+  "groupId": "ai.djl.pytorch",
+  "artifactId": "sam2-hiera-large",
+  "name": "Mask generation",
+  "description": "Segment Anything in Images",
+  "website": "http://www.djl.ai/engines/pytorch/model-zoo",
+  "licenses": {
+    "license": {
+      "name": "The Apache License, Version 2.0",
+      "url": "https://www.apache.org/licenses/LICENSE-2.0"
+    }
+  },
+  "artifacts": [
+    {
+      "version": "0.0.1",
+      "snapshot": false,
+      "name": "sam2-hiera-large",
+      "arguments": {
+        "translatorFactory": "ai.djl.modality.cv.translator.Sam2TranslatorFactory"
+      },
+      "options": {
+        "mapLocation": "true"
+      },
+      "files": {
+        "model": {
+          "uri": "0.0.1/sam2-hiera-large.zip",
+          "name": "",
+          "sha1Hash": "5688c31f52ae086e0c17dd235f4047245dc42eb3",
+          "size": 834572454
+        }
+      }
+    }
+  ]
+}
diff --git a/engines/pytorch/pytorch-model-zoo/src/test/resources/mlrepo/model/cv/mask_generation/ai/djl/pytorch/sam2-hiera-tiny-gpu/metadata.json b/engines/pytorch/pytorch-model-zoo/src/test/resources/mlrepo/model/cv/mask_generation/ai/djl/pytorch/sam2-hiera-tiny-gpu/metadata.json
new file mode 100644
index 00000000000..7b1b11df363
--- /dev/null
+++ b/engines/pytorch/pytorch-model-zoo/src/test/resources/mlrepo/model/cv/mask_generation/ai/djl/pytorch/sam2-hiera-tiny-gpu/metadata.json
@@ -0,0 +1,37 @@
+{
+  "metadataVersion": "0.2",
+  "resourceType": "model",
+  "application": "cv/mask_generation",
+  "groupId": "ai.djl.pytorch",
+  "artifactId": "sam2-hiera-tiny-gpu",
+  "name": "Mask generation",
+  "description": "Segment Anything in Images",
+  "website": "http://www.djl.ai/engines/pytorch/model-zoo",
+  "licenses": {
+    "license": {
+      "name": "The Apache License, Version 2.0",
+      "url": "https://www.apache.org/licenses/LICENSE-2.0"
+    }
+  },
+  "artifacts": [
+    {
+      "version": "0.0.1",
+      "snapshot": false,
+      "name": "sam2-hiera-tiny-gpu",
+      "arguments": {
+        "translatorFactory": "ai.djl.modality.cv.translator.Sam2TranslatorFactory"
+      },
+      "options": {
+        "mapLocation": "true"
+      },
+      "files": {
+        "model": {
+          "uri": "0.0.1/sam2-hiera-tiny-gpu.zip",
+          "name": "",
+          "sha1Hash": "41440632b2f2d481282b8cd7004d37cc3c6f9a16",
+          "size": 145037570
+        }
+      }
+    }
+  ]
+}
diff --git a/engines/pytorch/pytorch-model-zoo/src/test/resources/mlrepo/model/cv/mask_generation/ai/djl/pytorch/sam2-hiera-tiny/metadata.json b/engines/pytorch/pytorch-model-zoo/src/test/resources/mlrepo/model/cv/mask_generation/ai/djl/pytorch/sam2-hiera-tiny/metadata.json
new file mode 100644
index 00000000000..728639314bf
--- /dev/null
+++ b/engines/pytorch/pytorch-model-zoo/src/test/resources/mlrepo/model/cv/mask_generation/ai/djl/pytorch/sam2-hiera-tiny/metadata.json
@@ -0,0 +1,37 @@
+{
+  "metadataVersion": "0.2",
+  "resourceType": "model",
+  "application": "cv/mask_generation",
+  "groupId": "ai.djl.pytorch",
+  "artifactId": "sam2-hiera-tiny",
+  "name": "Mask generation",
+  "description": "Segment Anything in Images",
+  "website": "http://www.djl.ai/engines/pytorch/model-zoo",
+  "licenses": {
+    "license": {
+      "name": "The Apache License, Version 2.0",
+      "url": "https://www.apache.org/licenses/LICENSE-2.0"
+    }
+  },
+  "artifacts": [
+    {
+      "version": "0.0.1",
+      "snapshot": false,
+      "name": "sam2-hiera-tiny",
+      "arguments": {
+        "translatorFactory": "ai.djl.modality.cv.translator.Sam2TranslatorFactory"
+      },
+      "options": {
+        "mapLocation": "true"
+      },
+      "files": {
+        "model": {
+          "uri": "0.0.1/sam2-hiera-tiny.zip",
+          "name": "",
+          "sha1Hash": "c1eb858f0e8d53c7ec7c94434cd39b69d61db449",
+          "size": 145062696
+        }
+      }
+    }
+  ]
+}
diff --git a/engines/pytorch/pytorch-model-zoo/src/test/resources/mlrepo/model/cv/object_detection/ai/djl/pytorch/sam2-hiera-large-gpu/metadata.json b/engines/pytorch/pytorch-model-zoo/src/test/resources/mlrepo/model/cv/object_detection/ai/djl/pytorch/sam2-hiera-large-gpu/metadata.json
new file mode 100644
index 00000000000..a3de5022568
--- /dev/null
+++ b/engines/pytorch/pytorch-model-zoo/src/test/resources/mlrepo/model/cv/object_detection/ai/djl/pytorch/sam2-hiera-large-gpu/metadata.json
@@ -0,0 +1,37 @@
+{
+  "metadataVersion": "0.2",
+  "resourceType": "model",
+  "application": "cv/object_detection",
+  "groupId": "ai.djl.pytorch",
+  "artifactId": "sam2-hiera-large-gpu",
+  "name": "Mask generation",
+  "description": "Segment Anything in Images",
+  "website": "http://www.djl.ai/engines/pytorch/model-zoo",
+  "licenses": {
+    "license": {
+      "name": "The Apache License, Version 2.0",
+      "url": "https://www.apache.org/licenses/LICENSE-2.0"
+    }
+  },
+  "artifacts": [
+    {
+      "version": "0.0.1",
+      "snapshot": false,
+      "name": "sam2-hiera-large-gpu",
+      "arguments": {
+        "translatorFactory": "ai.djl.modality.cv.translator.Sam2TranslatorFactory"
+      },
+      "options": {
+        "mapLocation": "true"
+      },
+      "files": {
+        "model": {
+          "uri": "0.0.1/sam2-hiera-large-gpu.zip",
+          "name": "",
+          "sha1Hash": "0fb0399ca091edf54378348b7b99777bf8776603",
+          "size": 834565732
+        }
+      }
+    }
+  ]
+}
diff --git a/engines/pytorch/pytorch-model-zoo/src/test/resources/mlrepo/model/cv/object_detection/ai/djl/pytorch/sam2-hiera-tiny-gpu/metadata.json b/engines/pytorch/pytorch-model-zoo/src/test/resources/mlrepo/model/cv/object_detection/ai/djl/pytorch/sam2-hiera-tiny-gpu/metadata.json
new file mode 100644
index 00000000000..11cc455999b
--- /dev/null
+++ b/engines/pytorch/pytorch-model-zoo/src/test/resources/mlrepo/model/cv/object_detection/ai/djl/pytorch/sam2-hiera-tiny-gpu/metadata.json
@@ -0,0 +1,37 @@
+{
+  "metadataVersion": "0.2",
+  "resourceType": "model",
+  "application": "cv/object_detection",
+  "groupId": "ai.djl.pytorch",
+  "artifactId": "sam2-hiera-tiny-gpu",
+  "name": "Mask generation",
+  "description": "Segment Anything in Images",
+  "website": "http://www.djl.ai/engines/pytorch/model-zoo",
+  "licenses": {
+    "license": {
+      "name": "The Apache License, Version 2.0",
+      "url": "https://www.apache.org/licenses/LICENSE-2.0"
+    }
+  },
+  "artifacts": [
+    {
+      "version": "0.0.1",
+      "snapshot": false,
+      "name": "sam2-hiera-tiny-gpu",
+      "arguments": {
+        "translatorFactory": "ai.djl.modality.cv.translator.Sam2TranslatorFactory"
+      },
+      "options": {
+        "mapLocation": "true"
+      },
+      "files": {
+        "model": {
+          "uri": "0.0.1/sam2-hiera-tiny-gpu.zip",
+          "name": "",
+          "sha1Hash": "41440632b2f2d481282b8cd7004d37cc3c6f9a16",
+          "size": 145037570
+        }
+      }
+    }
+  ]
+}
diff --git a/examples/src/main/java/ai/djl/examples/inference/cv/SegmentAnything2.java b/examples/src/main/java/ai/djl/examples/inference/cv/SegmentAnything2.java
index 7b9fb0204b6..8fff3a94127 100644
--- a/examples/src/main/java/ai/djl/examples/inference/cv/SegmentAnything2.java
+++ b/examples/src/main/java/ai/djl/examples/inference/cv/SegmentAnything2.java
@@ -53,7 +53,7 @@ public static DetectedObjects predict() throws IOException, ModelException, Tran
                         .setTypes(Sam2Input.class, DetectedObjects.class)
                         .optModelUrls("djl://ai.djl.pytorch/sam2-hiera-tiny")
                         .optEngine("PyTorch")
-                        .optDevice(Device.cpu()) // this model only works on CPU
+                        .optDevice(Device.cpu()) // use sam2-hiera-tiny-gpu for GPU
                         .optTranslator(new Sam2Translator())
                         .optProgress(new ProgressBar())
                         .build();