Skip to content

Commit

Permalink
Add text embedding processor (#304)
Browse files Browse the repository at this point in the history
* Add text embedding processor

Signed-off-by: miguel-vila <[email protected]>
  • Loading branch information
miguel-vila committed Jun 5, 2024
1 parent 6f71b12 commit 7acd0fc
Show file tree
Hide file tree
Showing 2 changed files with 56 additions and 2 deletions.
23 changes: 21 additions & 2 deletions spec/schemas/ingest._common.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,6 @@ components:
$ref: '_common.yaml#/components/schemas/VersionNumber'
_meta:
$ref: '_common.yaml#/components/schemas/Metadata'
required:
- _meta
ProcessorContainer:
type: object
properties:
Expand Down Expand Up @@ -101,6 +99,8 @@ components:
$ref: '#/components/schemas/CircleProcessor'
inference:
$ref: '#/components/schemas/InferenceProcessor'
text_embedding:
$ref: '#/components/schemas/TextEmbeddingProcessor'
minProperties: 1
maxProperties: 1
AttachmentProcessor:
Expand Down Expand Up @@ -870,3 +870,22 @@ components:
Specifies the type of the predicted field to write.
Valid values are: `string`, `number`, `boolean`.
type: string
TextEmbeddingProcessor:
allOf:
- $ref: '#/components/schemas/ProcessorBase'
- type: object
properties:
model_id:
$ref: '_common.yaml#/components/schemas/Id'
field_map:
description: |-
Contains key-value pairs that specify the mapping of a text field to a vector field.
type: object
additionalProperties:
type: string
description:
type: string
description: A brief description of the processor.
required:
- model_id
- field_map
35 changes: 35 additions & 0 deletions tests/text_embedding_processor.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
$schema: ../json_schemas/test_story.schema.yaml

skip: false
description: |
This test story checks that we can create an ingest pipeline with a text
embedding processor
epilogues:
- path: /_ingest/pipeline/books_pipeline
method: DELETE
status: [200, 404]
chapters:
- synopsis: Create ingest pipeline for text embedding
path: /_ingest/pipeline/{id}
method: PUT
parameters:
id: books_pipeline
request_body:
payload:
description: "Extracts text from field and embeds it"
processors:
- text_embedding:
model_id: "text-embedding-model"
field_map:
text: "passage_embedding"
response:
status: 200
payload:
acknowledged: true
- synopsis: Query created pipeline
path: /_ingest/pipeline/{id}
method: GET
parameters:
id: books_pipeline
response:
status: 200

0 comments on commit 7acd0fc

Please sign in to comment.