From 0aed452de4ad28ad13212e8d563ef352aa476e8c Mon Sep 17 00:00:00 2001
From: Bartek Pacia <barpac02@gmail.com>
Date: Tue, 27 Aug 2024 13:03:50 +0100
Subject: [PATCH] finish adding the 2 new commands

---
 maestro-ai/src/main/java/maestro/ai/AI.kt     |  7 ---
 .../src/main/java/maestro/ai/DemoApp.kt       | 16 ++++++-
 .../src/main/java/maestro/ai/Prediction.kt    | 45 ++++++++++++-------
 .../main/resources/checkAssertion_schema.json | 25 -----------
 .../cli/runner/resultview/AnsiResultView.kt   |  4 +-
 .../main/java/maestro/orchestra/Commands.kt   |  2 +-
 .../main/java/maestro/orchestra/Orchestra.kt  | 15 +++----
 7 files changed, 51 insertions(+), 63 deletions(-)
 delete mode 100644 maestro-ai/src/main/resources/checkAssertion_schema.json
diff --git a/maestro-ai/src/main/java/maestro/ai/AI.kt b/maestro-ai/src/main/java/maestro/ai/AI.kt
index 1817c1916f..119a630fad 100644
--- a/maestro-ai/src/main/java/maestro/ai/AI.kt
+++ b/maestro-ai/src/main/java/maestro/ai/AI.kt
@@ -41,13 +41,6 @@ abstract class AI(
         // * OpenAI: https://platform.openai.com/docs/guides/structured-outputs
         // * Gemini: https://ai.google.dev/gemini-api/docs/json-mode
 
-        val checkAssertion: String = run {
-            val resourceStream = this::class.java.getResourceAsStream("/checkAssertion_schema.json")
-                ?: throw IllegalStateException("Could not find checkAssertion_schema.json in resources")
-
-            resourceStream.bufferedReader().use { it.readText() }
-        }
-
         val askForDefectsSchema: String = run {
             val resourceStream = this::class.java.getResourceAsStream("/askForDefects_schema.json")
                 ?: throw IllegalStateException("Could not find askForDefects_schema.json in resources")
diff --git a/maestro-ai/src/main/java/maestro/ai/DemoApp.kt b/maestro-ai/src/main/java/maestro/ai/DemoApp.kt
index c64d10cbcb..d75266b321 100644
--- a/maestro-ai/src/main/java/maestro/ai/DemoApp.kt
+++ b/maestro-ai/src/main/java/maestro/ai/DemoApp.kt
@@ -117,13 +117,25 @@ class DemoApp : CliktCommand() {
             val bytes = testCase.screenshot.readBytes()
 
             val job = async {
-                val defects = Prediction.findDefects(
+                val defects = if (testCase.prompt == null) Prediction.findDefects(
                     aiClient = aiClient,
                     screen = bytes,
                     previousFalsePositives = listOf(),
                     printPrompt = showPrompts,
                     printRawResponse = showRawResponse,
-                )
+                ) else {
+                    val result = Prediction.performAssertion(
+                        aiClient = aiClient,
+                        screen = bytes,
+                        assertion = testCase.prompt,
+                        printPrompt = showPrompts,
+                        printRawResponse = showRawResponse,
+                    )
+
+                    if (result == null) emptyList()
+                    else listOf(result)
+                }
+
                 verify(testCase, defects)
             }
 
diff --git a/maestro-ai/src/main/java/maestro/ai/Prediction.kt b/maestro-ai/src/main/java/maestro/ai/Prediction.kt
index c416ea4253..aa85462b1a 100644
--- a/maestro-ai/src/main/java/maestro/ai/Prediction.kt
+++ b/maestro-ai/src/main/java/maestro/ai/Prediction.kt
@@ -12,16 +12,10 @@ data class Defect(
 )
 
 @Serializable
-private data class FindDefectsResponse(
+private data class ModelResponse(
     val defects: List<Defect>,
 )
 
-@Serializable
-data class PerformAssertionResult(
-    val passed: Boolean,
-    val reasoning: String,
-)
-
 object Prediction {
     private val json = Json { ignoreUnknownKeys = true }
 
@@ -30,6 +24,8 @@ object Prediction {
         "layout" to "Some UI elements are overlapping or are cropped",
     )
 
+    private val allDefectCategories = defectCategories + listOf("assertion" to "The assertion is not true")
+
     suspend fun findDefects(
         aiClient: AI,
         screen: ByteArray,
@@ -126,7 +122,7 @@ object Prediction {
             println("--- RAW RESPONSE END ---")
         }
 
-        val defects = json.decodeFromString<FindDefectsResponse>(aiResponse.response)
+        val defects = json.decodeFromString<ModelResponse>(aiResponse.response)
         return defects.defects
     }
 
@@ -136,7 +132,7 @@ object Prediction {
         assertion: String,
         printPrompt: Boolean = false,
         printRawResponse: Boolean = false,
-    ): PerformAssertionResult {
+    ): Defect? {
         val prompt = buildString {
 
             appendLine(
@@ -150,22 +146,37 @@ object Prediction {
                 """.trimMargin("|")
             )
 
+            append(
+                """
+                |
+                |RULES:
+                |* Provide response as a valid JSON, with structure described below.
+                |* If the assertion is false, the list in the JSON output MUST be empty.
+                |* If assertion is false:
+                |  * Your response MUST only include a single defect with category "assertion".
+                |  * Provide detailed reasoning to explain why you think the assertion is false.
+                """.trimMargin("|")
+            )
+
             // Claude doesn't have a JSON mode as of 21-08-2024
             //  https://docs.anthropic.com/en/docs/test-and-evaluate/strengthen-guardrails/increase-consistency
             //  We could do "if (aiClient is Claude)", but actually, this also helps with gpt-4o sometimes
-            //  generating never-ending stream of output.
+            //  generatig never-ending stream of output.
             append(
                 """
                 |
                 |* You must provide result as a valid JSON object, matching this structure:
                 |
                 |  {
-                |      "result": {
-                |          "passed": "<boolean>",
-                |          "reasoning": "<string>"
-                |      },
+                |      "defect": [
+                |          {
+                |              "category": "assertion",
+                |              "reasoning": "<reasoning, string>"
+                |          },
+                |       ]
                 |  }
                 |
+                |The "defects" array MUST contain at most a single JSON object.
                 |DO NOT output any other information in the JSON object.
                 """.trimMargin("|")
             )
@@ -184,7 +195,7 @@ object Prediction {
             identifier = "perform-assertion",
             imageDetail = "high",
             images = listOf(screen),
-            jsonSchema = if (aiClient is OpenAI) json.parseToJsonElement(AI.checkAssertion).jsonObject else null,
+            jsonSchema = if (aiClient is OpenAI) json.parseToJsonElement(AI.askForDefectsSchema).jsonObject else null,
         )
 
         if (printRawResponse) {
@@ -193,7 +204,7 @@ object Prediction {
             println("--- RAW RESPONSE END ---")
         }
 
-        val result = json.decodeFromString<PerformAssertionResult>(aiResponse.response)
-        return result
+        val response = json.decodeFromString<ModelResponse>(aiResponse.response)
+        return response.defects.firstOrNull()
     }
 }
diff --git a/maestro-ai/src/main/resources/checkAssertion_schema.json b/maestro-ai/src/main/resources/checkAssertion_schema.json
deleted file mode 100644
index 2191a9ba9a..0000000000
--- a/maestro-ai/src/main/resources/checkAssertion_schema.json
+++ /dev/null
@@ -1,25 +0,0 @@
-{
-  "name": "checkAssertion",
-  "description": "Return whether the provided assertion about the mobile app's UI is true",
-  "strict": true,
-  "schema": {
-    "type": "object",
-    "required": ["defects"],
-    "additionalProperties": false,
-    "properties": {
-      "result": {
-        "type": "object",
-        "required": ["passed", "reasoning"],
-        "additionalProperties": false,
-        "properties": {
-            "passed": {
-              "type": "boolean"
-            },
-            "reasoning": {
-              "type": "string"
-            }
-          }
-      }
-    }
-  }
-}
diff --git a/maestro-cli/src/main/java/maestro/cli/runner/resultview/AnsiResultView.kt b/maestro-cli/src/main/java/maestro/cli/runner/resultview/AnsiResultView.kt
index 5e620bf72b..33c6393472 100644
--- a/maestro-cli/src/main/java/maestro/cli/runner/resultview/AnsiResultView.kt
+++ b/maestro-cli/src/main/java/maestro/cli/runner/resultview/AnsiResultView.kt
@@ -203,8 +203,8 @@ class AnsiResultView(
             CommandStatus.COMPLETED -> "✅"
             CommandStatus.FAILED -> "❌"
             CommandStatus.RUNNING -> "⏳"
-            CommandStatus.PENDING -> "\uD83D\uDD32"
-            CommandStatus.SKIPPED -> "⚠️️"
+            CommandStatus.PENDING -> "\uD83D\uDD32 " // 🔲
+            CommandStatus.SKIPPED -> "⚠️"
         }
     }
 
diff --git a/maestro-orchestra-models/src/main/java/maestro/orchestra/Commands.kt b/maestro-orchestra-models/src/main/java/maestro/orchestra/Commands.kt
index 577f430c74..dd8dd59939 100644
--- a/maestro-orchestra-models/src/main/java/maestro/orchestra/Commands.kt
+++ b/maestro-orchestra-models/src/main/java/maestro/orchestra/Commands.kt
@@ -385,7 +385,7 @@ data class AssertWithAICommand(
     override fun description(): String {
         if (label != null) return label
 
-        return "Assert no defects with AI: $assertion"
+        return "Assert with AI: $assertion"
     }
 
     override fun evaluateScripts(jsEngine: JsEngine): Command {
diff --git a/maestro-orchestra/src/main/java/maestro/orchestra/Orchestra.kt b/maestro-orchestra/src/main/java/maestro/orchestra/Orchestra.kt
index 923d84be0c..f4a397b67d 100644
--- a/maestro-orchestra/src/main/java/maestro/orchestra/Orchestra.kt
+++ b/maestro-orchestra/src/main/java/maestro/orchestra/Orchestra.kt
@@ -351,7 +351,6 @@ class Orchestra(
 
         val defects = Prediction.findDefects(
             aiClient = ai,
-            assertion = null,
             screen = imageData.copy().readByteArray(),
             previousFalsePositives = listOf(), // TODO(bartekpacia): take it from WorkspaceConfig (or MaestroConfig?)
         )
@@ -363,7 +362,7 @@ class Orchestra(
 
             val word = if (defects.size == 1) "defect" else "defects"
             throw MaestroException.AssertionFailure(
-                "Ffound ${defects.size} possible $word. See the report after the test completes to learn more.",
+                "Found ${defects.size} possible $word. See the report after the test completes to learn more.",
                 maestro.viewHierarchy().root,
             )
         }
@@ -381,21 +380,19 @@ class Orchestra(
         val imageData = Buffer()
         maestro.takeScreenshot(imageData, compressed = false)
 
-        val defects = Prediction.findDefects(
+        val defect = Prediction.performAssertion(
             aiClient = ai,
-            assertion = command.assertion,
             screen = imageData.copy().readByteArray(),
-            previousFalsePositives = listOf(), // TODO(bartekpacia): take it from WorkspaceConfig (or MaestroConfig?)
+            assertion = command.assertion,
         )
 
-        if (defects.isNotEmpty()) {
-            onCommandGeneratedOutput(command, defects, imageData)
+        if (defect != null) {
+            onCommandGeneratedOutput(command, listOf(defect), imageData)
 
             if (command.optional) throw CommandSkipped
 
-            val word = if (defects.size == 1) "defect" else "defects"
             throw MaestroException.AssertionFailure(
-                "Visual AI found ${defects.size} possible $word. See the report to learn more.",
+                "Assertion failed. See the report to learn more.",
                 maestro.viewHierarchy().root,
             )
         }