Simplify ui_metadata; various cleanup and refactoring (#206) (#207)

Signed-off-by: Tyler Ohlsen <[email protected]> (cherry picked from commit ad03380) Co-authored-by: Tyler Ohlsen <[email protected]>
opensearch-project · Jul 9, 2024 · 3b8cb17 · 3b8cb17
1 parent 5f56022
commit 3b8cb17
Show file tree

Hide file tree

Showing 32 changed files with 195 additions and 1,893 deletions.
diff --git a/common/constants.ts b/common/constants.ts
@@ -3,13 +3,7 @@
  * SPDX-License-Identifier: Apache-2.0
  */
 
-import {
-  MODEL_ALGORITHM,
-  PRETRAINED_MODEL_FORMAT,
-  PretrainedSentenceTransformer,
-  PretrainedSparseEncodingModel,
-  WORKFLOW_STATE,
-} from './interfaces';
+import { WORKFLOW_STATE } from './interfaces';
 
 export const PLUGIN_ID = 'flow-framework';
 
@@ -55,70 +49,6 @@ export const GET_PRESET_WORKFLOWS_NODE_API_PATH = `${BASE_WORKFLOW_NODE_API_PATH
 export const BASE_MODEL_NODE_API_PATH = `${BASE_NODE_API_PATH}/model`;
 export const SEARCH_MODELS_NODE_API_PATH = `${BASE_MODEL_NODE_API_PATH}/search`;
 
-/**
- * ML PLUGIN PRETRAINED MODELS
- * (based off of https://opensearch.org/docs/latest/ml-commons-plugin/pretrained-models)
- */
-
-// ---- SENTENCE TRANSFORMERS ----
-export const ROBERTA_SENTENCE_TRANSFORMER = {
-  name: 'huggingface/sentence-transformers/all-distilroberta-v1',
-  shortenedName: 'all-distilroberta-v1',
-  description: 'A sentence transformer from Hugging Face',
-  format: PRETRAINED_MODEL_FORMAT.TORCH_SCRIPT,
-  algorithm: MODEL_ALGORITHM.TEXT_EMBEDDING,
-  version: '1.0.1',
-  vectorDimensions: 768,
-} as PretrainedSentenceTransformer;
-
-export const MPNET_SENTENCE_TRANSFORMER = {
-  name: 'huggingface/sentence-transformers/all-mpnet-base-v2',
-  shortenedName: 'all-mpnet-base-v2',
-  description: 'A sentence transformer from Hugging Face',
-  format: PRETRAINED_MODEL_FORMAT.TORCH_SCRIPT,
-  algorithm: MODEL_ALGORITHM.TEXT_EMBEDDING,
-  version: '1.0.1',
-  vectorDimensions: 768,
-} as PretrainedSentenceTransformer;
-
-export const BERT_SENTENCE_TRANSFORMER = {
-  name: 'huggingface/sentence-transformers/msmarco-distilbert-base-tas-b',
-  shortenedName: 'msmarco-distilbert-base-tas-b',
-  description: 'A sentence transformer from Hugging Face',
-  format: PRETRAINED_MODEL_FORMAT.TORCH_SCRIPT,
-  algorithm: MODEL_ALGORITHM.TEXT_EMBEDDING,
-  version: '1.0.2',
-  vectorDimensions: 768,
-} as PretrainedSentenceTransformer;
-
-// ---- SPARSE ENCODERS ----
-export const NEURAL_SPARSE_TRANSFORMER = {
-  name: 'amazon/neural-sparse/opensearch-neural-sparse-encoding-v1',
-  shortenedName: 'opensearch-neural-sparse-encoding-v1',
-  description: 'A general neural sparse encoding model',
-  format: PRETRAINED_MODEL_FORMAT.TORCH_SCRIPT,
-  algorithm: MODEL_ALGORITHM.SPARSE_ENCODING,
-  version: '1.0.1',
-} as PretrainedSparseEncodingModel;
-
-export const NEURAL_SPARSE_DOC_TRANSFORMER = {
-  name: 'amazon/neural-sparse/opensearch-neural-sparse-encoding-doc-v1',
-  shortenedName: 'opensearch-neural-sparse-encoding-doc-v1',
-  description: 'A general neural sparse encoding model',
-  format: PRETRAINED_MODEL_FORMAT.TORCH_SCRIPT,
-  algorithm: MODEL_ALGORITHM.SPARSE_ENCODING,
-  version: '1.0.1',
-} as PretrainedSparseEncodingModel;
-
-export const NEURAL_SPARSE_TOKENIZER_TRANSFORMER = {
-  name: 'amazon/neural-sparse/opensearch-neural-sparse-tokenizer-v1',
-  shortenedName: 'opensearch-neural-sparse-tokenizer-v1',
-  description: 'A neural sparse tokenizer model',
-  format: PRETRAINED_MODEL_FORMAT.TORCH_SCRIPT,
-  algorithm: MODEL_ALGORITHM.SPARSE_ENCODING,
-  version: '1.0.1',
-} as PretrainedSparseEncodingModel;
-
 /**
  * Various constants pertaining to Workflow configs
  */

diff --git a/common/interfaces.ts b/common/interfaces.ts
@@ -19,17 +19,18 @@ TODO: over time these can become less generic as the form inputs & UX becomes fi
  */
 
 export type ConfigFieldType = 'string' | 'json' | 'select' | 'model' | 'map';
-export type ConfigSelectType = 'model';
 export type ConfigFieldValue = string | {};
 export interface IConfigField {
-  label: string;
   type: ConfigFieldType;
   id: string;
   value?: ConfigFieldValue;
+  // TODO: remove below fields out of this interface and directly into the necessary components.
+  // This is to minimize what we persist here, which is added into ui_metadata and indexed.
+  // Once the config for ML inference processors is finalized, we can migrate these out.
+  label?: string;
   placeholder?: string;
   helpText?: string;
   helpLink?: string;
-  selectType?: ConfigSelectType;
 }
 export interface IConfig {
   id: string;
@@ -91,7 +92,6 @@ export type WorkflowSchema = ObjectSchema<WorkflowSchemaObj>;
  */
 
 export type FieldType = 'string' | 'json' | 'select' | 'model';
-export type SelectType = 'model';
 export type FieldValue = string | {};
 export type ComponentFormValues = FormikValues;
 export type WorkspaceFormValues = {
@@ -125,7 +125,6 @@ export interface IComponentField {
   placeholder?: string;
   helpText?: string;
   helpLink?: string;
-  selectType?: SelectType;
 }
 
 /**
@@ -164,6 +163,7 @@ type ReactFlowViewport = {
 export type UIState = {
   config: WorkflowConfig;
   type: WORKFLOW_TYPE;
+  // Will be used in future when changing from form-based to flow-based configs via drag-and-drop
   workspace_flow?: WorkspaceFlowState;
 };
 
@@ -271,16 +271,6 @@ export type CreateIndexNode = TemplateNode & {
   };
 };
 
-export type RegisterPretrainedModelNode = TemplateNode & {
-  user_inputs: {
-    name: string;
-    description: string;
-    model_format: string;
-    version: string;
-    deploy: boolean;
-  };
-};
-
 export type TemplateEdge = {
   source: string;
   dest: string;
@@ -365,30 +355,6 @@ export enum MODEL_ALGORITHM {
   AGENT = 'Agent',
 }
 
-export enum MODEL_CATEGORY {
-  DEPLOYED = 'Deployed',
-  PRETRAINED = 'Pretrained',
-}
-
-export enum PRETRAINED_MODEL_FORMAT {
-  TORCH_SCRIPT = 'TORCH_SCRIPT',
-}
-
-export type PretrainedModel = {
-  name: string;
-  shortenedName: string;
-  description: string;
-  format: PRETRAINED_MODEL_FORMAT;
-  algorithm: MODEL_ALGORITHM;
-  version: string;
-};
-
-export type PretrainedSentenceTransformer = PretrainedModel & {
-  vectorDimensions: number;
-};
-
-export type PretrainedSparseEncodingModel = PretrainedModel & {};
-
 export type ModelConfig = {
   modelType?: string;
   embeddingDimension?: number;
@@ -408,7 +374,6 @@ export type ModelDict = {
 
 export type ModelFormValue = {
   id: string;
-  category?: MODEL_CATEGORY;
   algorithm?: MODEL_ALGORITHM;
 };
 
@@ -450,8 +415,6 @@ export enum WORKFLOW_STEP_TYPE {
   CREATE_INGEST_PIPELINE_STEP_TYPE = 'create_ingest_pipeline',
   CREATE_SEARCH_PIPELINE_STEP_TYPE = 'create_search_pipeline',
   CREATE_INDEX_STEP_TYPE = 'create_index',
-  REGISTER_LOCAL_PRETRAINED_MODEL_STEP_TYPE = 'register_local_pretrained_model',
-  REGISTER_LOCAL_SPARSE_ENCODING_MODEL_STEP_TYPE = 'register_local_sparse_encoding_model',
 }
 
 // We cannot disambiguate ingest vs. search pipelines based on workflow resource type. To work around
@@ -460,9 +423,6 @@ export enum WORKFLOW_STEP_TO_RESOURCE_TYPE_MAP {
   'create_ingest_pipeline' = 'Ingest pipeline',
   'create_search_pipeline' = 'Search pipeline',
   'create_index' = 'Index',
-  'register_local_pretrained_model' = 'Model',
-  'register_local_sparse_encoding_model' = 'Model',
-  'deploy_model' = 'Model',
 }
 
 export type WorkflowDict = {

diff --git a/public/configs/ml_processor.ts b/public/configs/ml_processor.ts
@@ -17,17 +17,14 @@ export abstract class MLProcessor extends Processor {
     this.name = 'ML Inference Processor';
     this.fields = [
       {
-        label: 'Model',
         id: 'model',
         type: 'model',
-        helpText: 'The model ID.',
-        helpLink:
-          'https://opensearch.org/docs/latest/ml-commons-plugin/integrating-ml-models/#choosing-a-model',
       },
       {
         label: 'Input Map',
         id: 'inputMap',
         type: 'map',
+        // TODO: move these fields directly into the component once design is finalized
         helpText: `An array specifying how to map fields from the ingested document to the model’s input.`,
         helpLink:
           'https://opensearch.org/docs/latest/ingest-pipelines/processors/ml-inference/#configuration-parameters',
@@ -36,6 +33,7 @@ export abstract class MLProcessor extends Processor {
         label: 'Output Map',
         id: 'outputMap',
         type: 'map',
+        // TODO: move these fields directly into the component once design is finalized
         helpText: `An array specifying how to map the model’s output to new fields.`,
         helpLink:
           'https://opensearch.org/docs/latest/ingest-pipelines/processors/ml-inference/#configuration-parameters',