implement the new commands

mobile-dev-inc · Aug 27, 2024 · c0b69c5 · c0b69c5
1 parent 6f041c9
commit c0b69c5
Show file tree

Hide file tree

Showing 5 changed files with 118 additions and 32 deletions.
diff --git a/maestro-ai/src/main/java/maestro/ai/AI.kt b/maestro-ai/src/main/java/maestro/ai/AI.kt
@@ -41,7 +41,14 @@ abstract class AI(
         // * OpenAI: https://platform.openai.com/docs/guides/structured-outputs
         // * Gemini: https://ai.google.dev/gemini-api/docs/json-mode
 
-        val assertVisualSchema: String = run {
+        val checkAssertion: String = run {
+            val resourceStream = this::class.java.getResourceAsStream("/checkAssertion_schema.json")
+                ?: throw IllegalStateException("Could not find checkAssertion_schema.json in resources")
+
+            resourceStream.bufferedReader().use { it.readText() }
+        }
+
+        val askForDefectsSchema: String = run {
             val resourceStream = this::class.java.getResourceAsStream("/askForDefects_schema.json")
                 ?: throw IllegalStateException("Could not find askForDefects_schema.json in resources")
 

diff --git a/maestro-ai/src/main/java/maestro/ai/DemoApp.kt b/maestro-ai/src/main/java/maestro/ai/DemoApp.kt
@@ -8,10 +8,7 @@ import com.github.ajalt.clikt.parameters.options.flag
 import com.github.ajalt.clikt.parameters.options.option
 import com.github.ajalt.clikt.parameters.types.float
 import com.github.ajalt.clikt.parameters.types.path
-import kotlinx.coroutines.CoroutineScope
-import kotlinx.coroutines.Dispatchers
 import kotlinx.coroutines.async
-import kotlinx.coroutines.launch
 import kotlinx.coroutines.runBlocking
 import maestro.ai.antrophic.Claude
 import maestro.ai.openai.OpenAI
@@ -79,7 +76,8 @@ class DemoApp : CliktCommand() {
             require(parts.size == 3) { "Screenshot name is invalid: ${file.name}" }
 
             val appName = parts[0]
-            val index = parts[1].toIntOrNull() ?: throw IllegalArgumentException("Invalid screenshot name: ${file.name}")
+            val index =
+                parts[1].toIntOrNull() ?: throw IllegalArgumentException("Invalid screenshot name: ${file.name}")
             val status = parts[2]
 
             val promptFile = "${file.parent}/${appName}_${index}_${status}.txt"
@@ -93,7 +91,7 @@ class DemoApp : CliktCommand() {
             TestCase(
                 screenshot = file,
                 appName = appName,
-                hasDefects = status == "bad",
+                shouldPass = status == "good",
                 index = index,
                 prompt = prompt,
             )
@@ -123,11 +121,9 @@ class DemoApp : CliktCommand() {
                     aiClient = aiClient,
                     screen = bytes,
                     previousFalsePositives = listOf(),
-                    assertion = testCase.prompt,
                     printPrompt = showPrompts,
                     printRawResponse = showRawResponse,
                 )
-
                 verify(testCase, defects)
             }
 
@@ -136,8 +132,8 @@ class DemoApp : CliktCommand() {
     }
 
     private fun verify(testCase: TestCase, defects: List<Defect>) {
-        if (testCase.hasDefects) {
-            // Check LLM found defects as well (i.e. didn't commit false negative)
+        if (!testCase.shouldPass) {
+            // Check if LLM found defects (i.e. didn't commit false negative)
             if (defects.isNotEmpty()) {
                 if (showOnlyFails) return
 
@@ -177,6 +173,6 @@ data class TestCase(
     val screenshot: File,
     val appName: String,
     val prompt: String?,
-    val hasDefects: Boolean,
+    val shouldPass: Boolean,
     val index: Int,
 )
diff --git a/maestro-ai/src/main/java/maestro/ai/Prediction.kt b/maestro-ai/src/main/java/maestro/ai/Prediction.kt
@@ -3,7 +3,6 @@ package maestro.ai
 import kotlinx.serialization.Serializable
 import kotlinx.serialization.json.Json
 import kotlinx.serialization.json.jsonObject
-import maestro.ai.antrophic.Claude
 import maestro.ai.openai.OpenAI
 
 @Serializable
@@ -17,18 +16,23 @@ private data class FindDefectsResponse(
     val defects: List<Defect>,
 )
 
+@Serializable
+data class PerformAssertionResult(
+    val passed: Boolean,
+    val reasoning: String,
+)
+
 object Prediction {
     private val json = Json { ignoreUnknownKeys = true }
 
-    private val categories = listOf(
+    private val defectCategories = listOf(
         "localization" to "Inconsistent use of language, for example mixed English and Portuguese",
         "layout" to "Some UI elements are overlapping or are cropped",
     )
 
     suspend fun findDefects(
         aiClient: AI,
         screen: ByteArray,
-        assertion: String?,
         previousFalsePositives: List<String>,
         printPrompt: Boolean = false,
         printRawResponse: Boolean = false,
@@ -54,7 +58,7 @@ object Prediction {
                 |
                 |RULES:
                 |* All defects you find must belong to one of the following categories:
-                |${categories.joinToString(separator = "\n") { "  * ${it.first}: ${it.second}" }}
+                |${defectCategories.joinToString(separator = "\n") { "  * ${it.first}: ${it.second}" }}
                 |* If you see defects, your response MUST only include defect name and detailed reasoning for each defect.
                 |* Provide response as a list of JSON objects, each representing <category>:<reasoning>
                 |* Do not raise false positives. Some example responses that have a high chance of being a false positive:
@@ -63,23 +67,10 @@ object Prediction {
                 """.trimMargin("|")
             )
 
-            if (assertion != null) {
-                append(
-                    """
-                    |
-                    |
-                    |Additionally, if the following assertion isn't true, consider it as a defect with category "assertion":
-                    |
-                    |  "${assertion.removeSuffix("\n")}"
-                    |
-                    |""".trimMargin("|")
-                )
-            }
-
             // Claude doesn't have a JSON mode as of 21-08-2024
             //  https://docs.anthropic.com/en/docs/test-and-evaluate/strengthen-guardrails/increase-consistency
-            //  We could do "if (aiClient is Claude)", but actually, this also helps with gpt-4o generating
-            //  never-ending stream of output.
+            //  We could do "if (aiClient is Claude)", but actually, this also helps with gpt-4o sometimes
+            //  generatig never-ending stream of output.
             append(
                 """
                 |
@@ -126,7 +117,7 @@ object Prediction {
             identifier = "find-defects",
             imageDetail = "high",
             images = listOf(screen),
-            jsonSchema = if (aiClient is OpenAI) json.parseToJsonElement(AI.assertVisualSchema).jsonObject else null,
+            jsonSchema = if (aiClient is OpenAI) json.parseToJsonElement(AI.askForDefectsSchema).jsonObject else null,
         )
 
         if (printRawResponse) {
@@ -138,4 +129,71 @@ object Prediction {
         val defects = json.decodeFromString<FindDefectsResponse>(aiResponse.response)
         return defects.defects
     }
+
+    suspend fun performAssertion(
+        aiClient: AI,
+        screen: ByteArray,
+        assertion: String,
+        printPrompt: Boolean = false,
+        printRawResponse: Boolean = false,
+    ): PerformAssertionResult {
+        val prompt = buildString {
+
+            appendLine(
+                """
+                |You are a QA engineer performing quality assurance for a mobile application.
+                |You are given a screenshot of the application and an assertion about the UI.
+                |Your task is to identify if the following assertion is true:
+                |
+                |  "${assertion.removeSuffix("\n")}"
+                |
+                """.trimMargin("|")
+            )
+
+            // Claude doesn't have a JSON mode as of 21-08-2024
+            //  https://docs.anthropic.com/en/docs/test-and-evaluate/strengthen-guardrails/increase-consistency
+            //  We could do "if (aiClient is Claude)", but actually, this also helps with gpt-4o sometimes
+            //  generating never-ending stream of output.
+            append(
+                """
+                |
+                |* You must provide result as a valid JSON object, matching this structure:
+                |
+                |  {
+                |      "result": {
+                |          "passed": "<boolean>",
+                |          "reasoning": "<string>"
+                |      },
+                |  }
+                |
+                |DO NOT output any other information in the JSON object.
+                """.trimMargin("|")
+            )
+        }
+
+        if (printPrompt) {
+            println("--- PROMPT START ---")
+            println(prompt)
+            println("--- PROMPT END ---")
+        }
+
+        val aiResponse = aiClient.chatCompletion(
+            prompt,
+            model = aiClient.defaultModel,
+            maxTokens = 4096,
+            identifier = "perform-assertion",
+            imageDetail = "high",
+            images = listOf(screen),
+            jsonSchema = if (aiClient is OpenAI) json.parseToJsonElement(AI.checkAssertion).jsonObject else null,
+        )
+
+        if (printRawResponse) {
+            println("--- RAW RESPONSE START ---")
+            println(aiResponse.response)
+            println("--- RAW RESPONSE END ---")
+        }
+
+        val result = json.decodeFromString<PerformAssertionResult>(aiResponse.response)
+        return result
+    }
 }
diff --git a/maestro-ai/src/main/resources/askForDefects_schema.json b/maestro-ai/src/main/resources/askForDefects_schema.json
@@ -1,6 +1,6 @@
 {
   "name": "askForDefects",
-  "description": "List of possible defects found in the mobile app's UI",
+  "description": "Returns a list of possible defects found in the mobile app's UI",
   "strict": true,
   "schema": {
     "type": "object",

diff --git a/maestro-ai/src/main/resources/checkAssertion_schema.json b/maestro-ai/src/main/resources/checkAssertion_schema.json
@@ -0,0 +1,25 @@
+{
+  "name": "checkAssertion",
+  "description": "Return whether the provided assertion about the mobile app's UI is true",
+  "strict": true,
+  "schema": {
+    "type": "object",
+    "required": ["defects"],
+    "additionalProperties": false,
+    "properties": {
+      "result": {
+        "type": "object",
+        "required": ["passed", "reasoning"],
+        "additionalProperties": false,
+        "properties": {
+            "passed": {
+              "type": "boolean"
+            },
+            "reasoning": {
+              "type": "string"
+            }
+          }
+      }
+    }
+  }
+}