Skip to content

Commit

Permalink
implement the new commands
Browse files Browse the repository at this point in the history
  • Loading branch information
bartekpacia committed Aug 27, 2024
1 parent 6f041c9 commit c0b69c5
Show file tree
Hide file tree
Showing 5 changed files with 118 additions and 32 deletions.
9 changes: 8 additions & 1 deletion maestro-ai/src/main/java/maestro/ai/AI.kt
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,14 @@ abstract class AI(
// * OpenAI: https://platform.openai.com/docs/guides/structured-outputs
// * Gemini: https://ai.google.dev/gemini-api/docs/json-mode

val assertVisualSchema: String = run {
val checkAssertion: String = run {
val resourceStream = this::class.java.getResourceAsStream("/checkAssertion_schema.json")
?: throw IllegalStateException("Could not find checkAssertion_schema.json in resources")

resourceStream.bufferedReader().use { it.readText() }
}

val askForDefectsSchema: String = run {
val resourceStream = this::class.java.getResourceAsStream("/askForDefects_schema.json")
?: throw IllegalStateException("Could not find askForDefects_schema.json in resources")

Expand Down
16 changes: 6 additions & 10 deletions maestro-ai/src/main/java/maestro/ai/DemoApp.kt
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,7 @@ import com.github.ajalt.clikt.parameters.options.flag
import com.github.ajalt.clikt.parameters.options.option
import com.github.ajalt.clikt.parameters.types.float
import com.github.ajalt.clikt.parameters.types.path
import kotlinx.coroutines.CoroutineScope
import kotlinx.coroutines.Dispatchers
import kotlinx.coroutines.async
import kotlinx.coroutines.launch
import kotlinx.coroutines.runBlocking
import maestro.ai.antrophic.Claude
import maestro.ai.openai.OpenAI
Expand Down Expand Up @@ -79,7 +76,8 @@ class DemoApp : CliktCommand() {
require(parts.size == 3) { "Screenshot name is invalid: ${file.name}" }

val appName = parts[0]
val index = parts[1].toIntOrNull() ?: throw IllegalArgumentException("Invalid screenshot name: ${file.name}")
val index =
parts[1].toIntOrNull() ?: throw IllegalArgumentException("Invalid screenshot name: ${file.name}")
val status = parts[2]

val promptFile = "${file.parent}/${appName}_${index}_${status}.txt"
Expand All @@ -93,7 +91,7 @@ class DemoApp : CliktCommand() {
TestCase(
screenshot = file,
appName = appName,
hasDefects = status == "bad",
shouldPass = status == "good",
index = index,
prompt = prompt,
)
Expand Down Expand Up @@ -123,11 +121,9 @@ class DemoApp : CliktCommand() {
aiClient = aiClient,
screen = bytes,
previousFalsePositives = listOf(),
assertion = testCase.prompt,
printPrompt = showPrompts,
printRawResponse = showRawResponse,
)

verify(testCase, defects)
}

Expand All @@ -136,8 +132,8 @@ class DemoApp : CliktCommand() {
}

private fun verify(testCase: TestCase, defects: List<Defect>) {
if (testCase.hasDefects) {
// Check LLM found defects as well (i.e. didn't commit false negative)
if (!testCase.shouldPass) {
// Check if LLM found defects (i.e. didn't commit false negative)
if (defects.isNotEmpty()) {
if (showOnlyFails) return

Expand Down Expand Up @@ -177,6 +173,6 @@ data class TestCase(
val screenshot: File,
val appName: String,
val prompt: String?,
val hasDefects: Boolean,
val shouldPass: Boolean,
val index: Int,
)
98 changes: 78 additions & 20 deletions maestro-ai/src/main/java/maestro/ai/Prediction.kt
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ package maestro.ai
import kotlinx.serialization.Serializable
import kotlinx.serialization.json.Json
import kotlinx.serialization.json.jsonObject
import maestro.ai.antrophic.Claude
import maestro.ai.openai.OpenAI

@Serializable
Expand All @@ -17,18 +16,23 @@ private data class FindDefectsResponse(
val defects: List<Defect>,
)

@Serializable
data class PerformAssertionResult(
val passed: Boolean,
val reasoning: String,
)

object Prediction {
private val json = Json { ignoreUnknownKeys = true }

private val categories = listOf(
private val defectCategories = listOf(
"localization" to "Inconsistent use of language, for example mixed English and Portuguese",
"layout" to "Some UI elements are overlapping or are cropped",
)

suspend fun findDefects(
aiClient: AI,
screen: ByteArray,
assertion: String?,
previousFalsePositives: List<String>,
printPrompt: Boolean = false,
printRawResponse: Boolean = false,
Expand All @@ -54,7 +58,7 @@ object Prediction {
|
|RULES:
|* All defects you find must belong to one of the following categories:
|${categories.joinToString(separator = "\n") { " * ${it.first}: ${it.second}" }}
|${defectCategories.joinToString(separator = "\n") { " * ${it.first}: ${it.second}" }}
|* If you see defects, your response MUST only include defect name and detailed reasoning for each defect.
|* Provide response as a list of JSON objects, each representing <category>:<reasoning>
|* Do not raise false positives. Some example responses that have a high chance of being a false positive:
Expand All @@ -63,23 +67,10 @@ object Prediction {
""".trimMargin("|")
)

if (assertion != null) {
append(
"""
|
|
|Additionally, if the following assertion isn't true, consider it as a defect with category "assertion":
|
| "${assertion.removeSuffix("\n")}"
|
|""".trimMargin("|")
)
}

// Claude doesn't have a JSON mode as of 21-08-2024
// https://docs.anthropic.com/en/docs/test-and-evaluate/strengthen-guardrails/increase-consistency
// We could do "if (aiClient is Claude)", but actually, this also helps with gpt-4o generating
// never-ending stream of output.
// We could do "if (aiClient is Claude)", but actually, this also helps with gpt-4o sometimes
// generatig never-ending stream of output.
append(
"""
|
Expand Down Expand Up @@ -126,7 +117,7 @@ object Prediction {
identifier = "find-defects",
imageDetail = "high",
images = listOf(screen),
jsonSchema = if (aiClient is OpenAI) json.parseToJsonElement(AI.assertVisualSchema).jsonObject else null,
jsonSchema = if (aiClient is OpenAI) json.parseToJsonElement(AI.askForDefectsSchema).jsonObject else null,
)

if (printRawResponse) {
Expand All @@ -138,4 +129,71 @@ object Prediction {
val defects = json.decodeFromString<FindDefectsResponse>(aiResponse.response)
return defects.defects
}

suspend fun performAssertion(
aiClient: AI,
screen: ByteArray,
assertion: String,
printPrompt: Boolean = false,
printRawResponse: Boolean = false,
): PerformAssertionResult {
val prompt = buildString {

appendLine(
"""
|You are a QA engineer performing quality assurance for a mobile application.
|You are given a screenshot of the application and an assertion about the UI.
|Your task is to identify if the following assertion is true:
|
| "${assertion.removeSuffix("\n")}"
|
""".trimMargin("|")
)

// Claude doesn't have a JSON mode as of 21-08-2024
// https://docs.anthropic.com/en/docs/test-and-evaluate/strengthen-guardrails/increase-consistency
// We could do "if (aiClient is Claude)", but actually, this also helps with gpt-4o sometimes
// generating never-ending stream of output.
append(
"""
|
|* You must provide result as a valid JSON object, matching this structure:
|
| {
| "result": {
| "passed": "<boolean>",
| "reasoning": "<string>"
| },
| }
|
|DO NOT output any other information in the JSON object.
""".trimMargin("|")
)
}

if (printPrompt) {
println("--- PROMPT START ---")
println(prompt)
println("--- PROMPT END ---")
}

val aiResponse = aiClient.chatCompletion(
prompt,
model = aiClient.defaultModel,
maxTokens = 4096,
identifier = "perform-assertion",
imageDetail = "high",
images = listOf(screen),
jsonSchema = if (aiClient is OpenAI) json.parseToJsonElement(AI.checkAssertion).jsonObject else null,
)

if (printRawResponse) {
println("--- RAW RESPONSE START ---")
println(aiResponse.response)
println("--- RAW RESPONSE END ---")
}

val result = json.decodeFromString<PerformAssertionResult>(aiResponse.response)
return result
}
}
2 changes: 1 addition & 1 deletion maestro-ai/src/main/resources/askForDefects_schema.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "askForDefects",
"description": "List of possible defects found in the mobile app's UI",
"description": "Returns a list of possible defects found in the mobile app's UI",
"strict": true,
"schema": {
"type": "object",
Expand Down
25 changes: 25 additions & 0 deletions maestro-ai/src/main/resources/checkAssertion_schema.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
{
"name": "checkAssertion",
"description": "Return whether the provided assertion about the mobile app's UI is true",
"strict": true,
"schema": {
"type": "object",
"required": ["defects"],
"additionalProperties": false,
"properties": {
"result": {
"type": "object",
"required": ["passed", "reasoning"],
"additionalProperties": false,
"properties": {
"passed": {
"type": "boolean"
},
"reasoning": {
"type": "string"
}
}
}
}
}
}

0 comments on commit c0b69c5

Please sign in to comment.