From 9ac4914a48df31c513b5236ca4960e9346093cc7 Mon Sep 17 00:00:00 2001 From: Justin Torre Date: Thu, 16 May 2024 23:11:32 -0700 Subject: [PATCH] Adding a meta column to datasets to link prompts to them (#1900) --- docs/swagger.json | 34 ++++++++++++++----- helicone-cron/src/db/database.types.ts | 3 ++ .../20240516222421_experiments-meta.sql | 3 ++ .../public/experimentDatasetController.ts | 14 ++++++-- valhalla/jawn/src/lib/db/database.types.ts | 3 ++ .../src/managers/dataset/DatasetManager.ts | 16 ++++++--- .../jawn/src/managers/inputs/InputsManager.ts | 7 ++++ valhalla/jawn/src/tsoa-build/public/routes.ts | 14 ++++++-- .../jawn/src/tsoa-build/public/swagger.json | 34 ++++++++++++++----- .../experiments/id/experimentIdPage.tsx | 8 ++--- .../prompts/id/promptNewExperiment.tsx | 5 ++- .../prompts/id/selectRandomDataset.tsx | 6 ++++ web/lib/clients/jawnTypes/public.ts | 11 ++++-- web/services/hooks/prompts/datasets.tsx | 6 ++-- web/supabase/database.types.ts | 3 ++ worker/supabase/database.types.ts | 3 ++ 16 files changed, 135 insertions(+), 35 deletions(-) create mode 100644 supabase/migrations/20240516222421_experiments-meta.sql diff --git a/docs/swagger.json b/docs/swagger.json index ec1f0baeaa..2c2db1f942 100644 --- a/docs/swagger.json +++ b/docs/swagger.json @@ -1485,6 +1485,21 @@ } ] }, + "DatasetMetadata": { + "properties": { + "promptId": { + "type": "string" + }, + "inputRecordsIds": { + "items": { + "type": "string" + }, + "type": "array" + } + }, + "type": "object", + "additionalProperties": false + }, "NewDatasetParams": { "properties": { "datasetName": { @@ -1495,6 +1510,9 @@ "type": "string" }, "type": "array" + }, + "meta": { + "$ref": "#/components/schemas/DatasetMetadata" } }, "required": [ @@ -1608,20 +1626,16 @@ "name": { "type": "string" }, - "request_ids": { - "items": { - "type": "string" - }, - "type": "array" - }, "created_at": { "type": "string" + }, + "meta": { + "$ref": "#/components/schemas/DatasetMetadata" } }, "required": [ "id", "name", - "request_ids", "created_at" ], "type": "object", @@ -2836,7 +2850,11 @@ "content": { "application/json": { "schema": { - "properties": {}, + "properties": { + "promptId": { + "type": "string" + } + }, "type": "object" } } diff --git a/helicone-cron/src/db/database.types.ts b/helicone-cron/src/db/database.types.ts index e3e51db3ac..bfa7f3d3da 100644 --- a/helicone-cron/src/db/database.types.ts +++ b/helicone-cron/src/db/database.types.ts @@ -250,18 +250,21 @@ export type Database = { Row: { created_at: string | null id: string + meta: Json | null name: string | null organization: string } Insert: { created_at?: string | null id?: string + meta?: Json | null name?: string | null organization: string } Update: { created_at?: string | null id?: string + meta?: Json | null name?: string | null organization?: string } diff --git a/supabase/migrations/20240516222421_experiments-meta.sql b/supabase/migrations/20240516222421_experiments-meta.sql new file mode 100644 index 0000000000..25a871effe --- /dev/null +++ b/supabase/migrations/20240516222421_experiments-meta.sql @@ -0,0 +1,3 @@ +alter table "public"."experiment_dataset_v2" add column "meta" jsonb; + + diff --git a/valhalla/jawn/src/controllers/public/experimentDatasetController.ts b/valhalla/jawn/src/controllers/public/experimentDatasetController.ts index b423deee97..1586a10468 100644 --- a/valhalla/jawn/src/controllers/public/experimentDatasetController.ts +++ b/valhalla/jawn/src/controllers/public/experimentDatasetController.ts @@ -18,16 +18,22 @@ type DatasetFilterNode = | DatasetFilterBranch | "all"; +export interface DatasetMetadata { + promptId?: string; + inputRecordsIds?: string[]; +} + export interface NewDatasetParams { datasetName: string; requestIds: string[]; + meta?: DatasetMetadata; } export interface DatasetResult { id: string; name: string; - request_ids: string[]; created_at: string; + meta?: DatasetMetadata; } export interface RandomDatasetParams { @@ -99,11 +105,13 @@ export class ExperimentDatasetController extends Controller { @Post("/query") public async getDatasets( @Body() - requestBody: {}, + requestBody: { + promptId?: string; + }, @Request() request: JawnAuthenticatedRequest ): Promise> { const datasetManager = new DatasetManager(request.authParams); - const result = await datasetManager.getDatasets(); + const result = await datasetManager.getDatasets(requestBody.promptId); if (result.error || !result.data) { this.setStatus(500); } else { diff --git a/valhalla/jawn/src/lib/db/database.types.ts b/valhalla/jawn/src/lib/db/database.types.ts index e3e51db3ac..bfa7f3d3da 100644 --- a/valhalla/jawn/src/lib/db/database.types.ts +++ b/valhalla/jawn/src/lib/db/database.types.ts @@ -250,18 +250,21 @@ export type Database = { Row: { created_at: string | null id: string + meta: Json | null name: string | null organization: string } Insert: { created_at?: string | null id?: string + meta?: Json | null name?: string | null organization: string } Update: { created_at?: string | null id?: string + meta?: Json | null name?: string | null organization?: string } diff --git a/valhalla/jawn/src/managers/dataset/DatasetManager.ts b/valhalla/jawn/src/managers/dataset/DatasetManager.ts index 45e6482e87..fe04ff8c48 100644 --- a/valhalla/jawn/src/managers/dataset/DatasetManager.ts +++ b/valhalla/jawn/src/managers/dataset/DatasetManager.ts @@ -1,5 +1,6 @@ // src/users/usersService.ts import { + DatasetMetadata, DatasetResult, NewDatasetParams, RandomDatasetParams, @@ -19,28 +20,32 @@ import { buildFilterPostgres } from "../../lib/shared/filters/filters"; import { resultMap } from "../../lib/shared/result"; import { User } from "../../models/user"; import { BaseManager } from "../BaseManager"; +import { Json } from "../../lib/db/database.types"; // A post request should not contain an id. export type UserCreationParams = Pick; export class DatasetManager extends BaseManager { - async getDatasets(): Promise> { + async getDatasets( + promptId?: string + ): Promise> { const result = dbExecute<{ id: string; name: string; - request_ids: string[]; created_at: string; + meta: DatasetMetadata; }>( ` SELECT id, name, - created_at + created_at, + meta FROM experiment_dataset_v2 - WHERE organization = $1 + WHERE organization = $1 ${promptId ? "AND meta->>'promptId' = $2" : ""} LIMIT 100 `, - [this.authParams.organizationId] + [this.authParams.organizationId].concat(promptId ? [promptId] : []) ); return result; } @@ -51,6 +56,7 @@ export class DatasetManager extends BaseManager { .insert({ name: params.datasetName, organization: this.authParams.organizationId, + meta: (params.meta ?? null) as Json, }) .select("*") .single(); diff --git a/valhalla/jawn/src/managers/inputs/InputsManager.ts b/valhalla/jawn/src/managers/inputs/InputsManager.ts index 6d18f69419..77408435cf 100644 --- a/valhalla/jawn/src/managers/inputs/InputsManager.ts +++ b/valhalla/jawn/src/managers/inputs/InputsManager.ts @@ -16,6 +16,7 @@ import { resultMap } from "../../lib/shared/result"; import { User } from "../../models/user"; import { BaseManager } from "../BaseManager"; import { S3Client } from "../../lib/shared/db/s3Client"; +import { RequestResponseBodyStore } from "../../lib/stores/request/RequestResponseBodyStore"; async function fetchImageAsBase64(url: string): Promise { try { @@ -113,12 +114,18 @@ export class InputsManager extends BaseManager { `, [this.authParams.organizationId, promptVersion, limit] ); + const bodyStore = new RequestResponseBodyStore( + this.authParams.organizationId + ); return promiseResultMap(result, async (data) => { return Promise.all( data.map(async (record) => { return { ...record, + response_body: + (await bodyStore.getRequestResponseBody(record.source_request)) + .data?.response ?? {}, inputs: await getAllSignedURLsFromInputs( record.inputs, this.authParams.organizationId, diff --git a/valhalla/jawn/src/tsoa-build/public/routes.ts b/valhalla/jawn/src/tsoa-build/public/routes.ts index b65f229c4a..d2b7f2ba06 100644 --- a/valhalla/jawn/src/tsoa-build/public/routes.ts +++ b/valhalla/jawn/src/tsoa-build/public/routes.ts @@ -545,11 +545,21 @@ const models: TsoaRoute.Models = { "type": {"dataType":"union","subSchemas":[{"ref":"ResultSuccess__datasetId-string__"},{"ref":"ResultError_string_"}],"validators":{}}, }, // WARNING: This file was auto-generated with tsoa. Please do not modify it. Re-run tsoa to re-generate this file: https://github.com/lukeautry/tsoa + "DatasetMetadata": { + "dataType": "refObject", + "properties": { + "promptId": {"dataType":"string"}, + "inputRecordsIds": {"dataType":"array","array":{"dataType":"string"}}, + }, + "additionalProperties": false, + }, + // WARNING: This file was auto-generated with tsoa. Please do not modify it. Re-run tsoa to re-generate this file: https://github.com/lukeautry/tsoa "NewDatasetParams": { "dataType": "refObject", "properties": { "datasetName": {"dataType":"string","required":true}, "requestIds": {"dataType":"array","array":{"dataType":"string"},"required":true}, + "meta": {"ref":"DatasetMetadata"}, }, "additionalProperties": false, }, @@ -595,8 +605,8 @@ const models: TsoaRoute.Models = { "properties": { "id": {"dataType":"string","required":true}, "name": {"dataType":"string","required":true}, - "request_ids": {"dataType":"array","array":{"dataType":"string"},"required":true}, "created_at": {"dataType":"string","required":true}, + "meta": {"ref":"DatasetMetadata"}, }, "additionalProperties": false, }, @@ -1233,7 +1243,7 @@ export function RegisterRoutes(app: Router) { function ExperimentDatasetController_getDatasets(request: ExRequest, response: ExResponse, next: any) { const args: Record = { - requestBody: {"in":"body","name":"requestBody","required":true,"dataType":"nestedObjectLiteral","nestedProperties":{}}, + requestBody: {"in":"body","name":"requestBody","required":true,"dataType":"nestedObjectLiteral","nestedProperties":{"promptId":{"dataType":"string"}}}, request: {"in":"request","name":"request","required":true,"dataType":"object"}, }; diff --git a/valhalla/jawn/src/tsoa-build/public/swagger.json b/valhalla/jawn/src/tsoa-build/public/swagger.json index ec1f0baeaa..2c2db1f942 100644 --- a/valhalla/jawn/src/tsoa-build/public/swagger.json +++ b/valhalla/jawn/src/tsoa-build/public/swagger.json @@ -1485,6 +1485,21 @@ } ] }, + "DatasetMetadata": { + "properties": { + "promptId": { + "type": "string" + }, + "inputRecordsIds": { + "items": { + "type": "string" + }, + "type": "array" + } + }, + "type": "object", + "additionalProperties": false + }, "NewDatasetParams": { "properties": { "datasetName": { @@ -1495,6 +1510,9 @@ "type": "string" }, "type": "array" + }, + "meta": { + "$ref": "#/components/schemas/DatasetMetadata" } }, "required": [ @@ -1608,20 +1626,16 @@ "name": { "type": "string" }, - "request_ids": { - "items": { - "type": "string" - }, - "type": "array" - }, "created_at": { "type": "string" + }, + "meta": { + "$ref": "#/components/schemas/DatasetMetadata" } }, "required": [ "id", "name", - "request_ids", "created_at" ], "type": "object", @@ -2836,7 +2850,11 @@ "content": { "application/json": { "schema": { - "properties": {}, + "properties": { + "promptId": { + "type": "string" + } + }, "type": "object" } } diff --git a/web/components/templates/prompts/experiments/id/experimentIdPage.tsx b/web/components/templates/prompts/experiments/id/experimentIdPage.tsx index d0f862011e..c5c9f50c93 100644 --- a/web/components/templates/prompts/experiments/id/experimentIdPage.tsx +++ b/web/components/templates/prompts/experiments/id/experimentIdPage.tsx @@ -274,8 +274,8 @@ const ExperimentIdPage = (props: PromptIdPageProps) => { `w-max items-center rounded-lg px-2 py-1 -my-1 text-xs font-medium ring-1 ring-inset` )} > - {run.testResult.response?.completionTokens}{" "} - input tokens + {run.testResult.response?.promptTokens} input + tokens { `w-max items-center rounded-lg px-2 py-1 -my-1 text-xs font-medium ring-1 ring-inset` )} > - {run.testResult.response?.promptTokens} prompt - tokens + {run.testResult.response?.completionTokens}{" "} + output tokens { datasets: datasets, isLoading: isDataSetsLoading, refetch: refetchDataSets, - } = useGetDataSets(); + } = useGetDataSets(id); const selectedDataset = datasets.find( (dataset) => dataset.id === selectedDatasetId @@ -448,6 +448,9 @@ const PromptNewExperimentPage = (props: PromptIdPageProps) => { open={openConfirmModal} setOpen={setOpenConfirmModal} requestIds={requestIds} + meta={{ + promptVersionId: prompt?.id, + }} onSuccess={(datasetId) => { setSelectedDatasetId(datasetId); diff --git a/web/components/templates/prompts/id/selectRandomDataset.tsx b/web/components/templates/prompts/id/selectRandomDataset.tsx index f3d8b5f3db..c657dede47 100644 --- a/web/components/templates/prompts/id/selectRandomDataset.tsx +++ b/web/components/templates/prompts/id/selectRandomDataset.tsx @@ -19,6 +19,9 @@ interface SelectRandomDatasetProps { created_at: string; }[]; onSuccess?: (dataSetId: string | undefined) => void; + meta?: { + promptVersionId?: string; + }; } const RANDOM_SAMPLE_SIZE = 10; @@ -123,6 +126,9 @@ const SelectRandomDataset = (props: SelectRandomDatasetProps) => { datasetName: `EXP-DATASET-${new Date().getTime()}`, requestIds: selectedRequests?.map((r) => r.source_request) ?? [], + meta: { + promptId: props.meta?.promptVersionId, + }, }, }); if (dataset.data?.error !== null) { diff --git a/web/lib/clients/jawnTypes/public.ts b/web/lib/clients/jawnTypes/public.ts index 1771115da8..995e0d4f9c 100644 --- a/web/lib/clients/jawnTypes/public.ts +++ b/web/lib/clients/jawnTypes/public.ts @@ -473,9 +473,14 @@ Json: JsonObject; error: null; }; "Result__datasetId-string_.string_": components["schemas"]["ResultSuccess__datasetId-string__"] | components["schemas"]["ResultError_string_"]; + DatasetMetadata: { + promptId?: string; + inputRecordsIds?: string[]; + }; NewDatasetParams: { datasetName: string; requestIds: string[]; + meta?: components["schemas"]["DatasetMetadata"]; }; /** @description Make all properties in T optional */ Partial_PromptVersionsToOperators_: { @@ -508,8 +513,8 @@ Json: JsonObject; DatasetResult: { id: string; name: string; - request_ids: string[]; created_at: string; + meta?: components["schemas"]["DatasetMetadata"]; }; "ResultSuccess_DatasetResult-Array_": { data: components["schemas"]["DatasetResult"][]; @@ -928,7 +933,9 @@ export interface operations { GetDatasets: { requestBody: { content: { - "application/json": Record; + "application/json": { + promptId?: string; + }; }; }; responses: { diff --git a/web/services/hooks/prompts/datasets.tsx b/web/services/hooks/prompts/datasets.tsx index 6ce55bfae1..d40d8ccc5b 100644 --- a/web/services/hooks/prompts/datasets.tsx +++ b/web/services/hooks/prompts/datasets.tsx @@ -1,7 +1,7 @@ import { useQuery } from "@tanstack/react-query"; import { useJawnClient } from "../../../lib/clients/jawnHook"; -const useGetDataSets = () => { +const useGetDataSets = (promptId?: string) => { const jawn = useJawnClient(); const { data, isLoading, refetch, isRefetching } = useQuery({ @@ -10,7 +10,9 @@ const useGetDataSets = () => { const jawn = query.queryKey[1] as ReturnType; return jawn.POST("/v1/experiment/dataset/query", { - body: {}, + body: { + promptId: promptId, + }, }); }, refetchOnWindowFocus: false, diff --git a/web/supabase/database.types.ts b/web/supabase/database.types.ts index e3e51db3ac..bfa7f3d3da 100644 --- a/web/supabase/database.types.ts +++ b/web/supabase/database.types.ts @@ -250,18 +250,21 @@ export type Database = { Row: { created_at: string | null id: string + meta: Json | null name: string | null organization: string } Insert: { created_at?: string | null id?: string + meta?: Json | null name?: string | null organization: string } Update: { created_at?: string | null id?: string + meta?: Json | null name?: string | null organization?: string } diff --git a/worker/supabase/database.types.ts b/worker/supabase/database.types.ts index e3e51db3ac..bfa7f3d3da 100644 --- a/worker/supabase/database.types.ts +++ b/worker/supabase/database.types.ts @@ -250,18 +250,21 @@ export type Database = { Row: { created_at: string | null id: string + meta: Json | null name: string | null organization: string } Insert: { created_at?: string | null id?: string + meta?: Json | null name?: string | null organization: string } Update: { created_at?: string | null id?: string + meta?: Json | null name?: string | null organization?: string }