From 5fef403825110205d472bae90b63866c850efed6 Mon Sep 17 00:00:00 2001 From: Tomoya Oda <38136327+tmyoda@users.noreply.github.com> Date: Wed, 23 Aug 2023 22:48:43 +0100 Subject: [PATCH] feat(stepfunctions-tasks): FastFile mode for SageMaker Training Job (#26675) According to the AWS documentation, the TrainingInputMode for a SageMaker Training Job can be one of the following: `Pipe | File | FastFile` https://docs.aws.amazon.com/sagemaker/latest/APIReference/API_Channel.html#sagemaker-Type-Channel-InputMode https://docs.aws.amazon.com/sagemaker/latest/APIReference/API_AlgorithmSpecification.html#API_AlgorithmSpecification_Contents I have just added `FastFile` below to align with the official documentation. https://github.com/aws/aws-cdk/blob/v2.90.0/packages/aws-cdk-lib/aws-stepfunctions-tasks/lib/sagemaker/base-types.ts#L458 Closes #26653. ---- *By submitting this pull request, I confirm that my contribution is made under the terms of the Apache-2.0 license* --- .../cdk.out | 2 +- .../integ-stepfunctions-sagemaker.assets.json | 6 +- ...nteg-stepfunctions-sagemaker.template.json | 14 ++--- .../integ.json | 2 +- .../manifest.json | 4 +- .../tree.json | 60 +++++++++---------- .../sagemaker/integ.create-training-job.ts | 5 +- .../aws-stepfunctions-tasks/README.md | 6 ++ .../lib/sagemaker/base-types.ts | 7 ++- .../sagemaker/create-training-job.test.ts | 4 +- 10 files changed, 61 insertions(+), 49 deletions(-) diff --git a/packages/@aws-cdk-testing/framework-integ/test/aws-stepfunctions-tasks/test/sagemaker/integ.create-training-job.js.snapshot/cdk.out b/packages/@aws-cdk-testing/framework-integ/test/aws-stepfunctions-tasks/test/sagemaker/integ.create-training-job.js.snapshot/cdk.out index ae4b03c54e770..560dae10d018f 100644 --- a/packages/@aws-cdk-testing/framework-integ/test/aws-stepfunctions-tasks/test/sagemaker/integ.create-training-job.js.snapshot/cdk.out +++ b/packages/@aws-cdk-testing/framework-integ/test/aws-stepfunctions-tasks/test/sagemaker/integ.create-training-job.js.snapshot/cdk.out @@ -1 +1 @@ -{"version":"30.0.0"} \ No newline at end of file +{"version":"33.0.0"} \ No newline at end of file diff --git a/packages/@aws-cdk-testing/framework-integ/test/aws-stepfunctions-tasks/test/sagemaker/integ.create-training-job.js.snapshot/integ-stepfunctions-sagemaker.assets.json b/packages/@aws-cdk-testing/framework-integ/test/aws-stepfunctions-tasks/test/sagemaker/integ.create-training-job.js.snapshot/integ-stepfunctions-sagemaker.assets.json index 4eece88e7c92c..348af0fc6edfa 100644 --- a/packages/@aws-cdk-testing/framework-integ/test/aws-stepfunctions-tasks/test/sagemaker/integ.create-training-job.js.snapshot/integ-stepfunctions-sagemaker.assets.json +++ b/packages/@aws-cdk-testing/framework-integ/test/aws-stepfunctions-tasks/test/sagemaker/integ.create-training-job.js.snapshot/integ-stepfunctions-sagemaker.assets.json @@ -1,7 +1,7 @@ { - "version": "30.0.0", + "version": "33.0.0", "files": { - "81a9a9ef07dfc045ad84514bf0771e76c20386202c7c26130f4f954b8411de26": { + "11bc70dd89e4afb387b1b639ea823eb492c0e7a7dd664e50aa483f10ef3e204a": { "source": { "path": "integ-stepfunctions-sagemaker.template.json", "packaging": "file" @@ -9,7 +9,7 @@ "destinations": { "current_account-current_region": { "bucketName": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}", - "objectKey": "81a9a9ef07dfc045ad84514bf0771e76c20386202c7c26130f4f954b8411de26.json", + "objectKey": "11bc70dd89e4afb387b1b639ea823eb492c0e7a7dd664e50aa483f10ef3e204a.json", "assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-file-publishing-role-${AWS::AccountId}-${AWS::Region}" } } diff --git a/packages/@aws-cdk-testing/framework-integ/test/aws-stepfunctions-tasks/test/sagemaker/integ.create-training-job.js.snapshot/integ-stepfunctions-sagemaker.template.json b/packages/@aws-cdk-testing/framework-integ/test/aws-stepfunctions-tasks/test/sagemaker/integ.create-training-job.js.snapshot/integ-stepfunctions-sagemaker.template.json index 61c634750abc4..c3525778207e9 100644 --- a/packages/@aws-cdk-testing/framework-integ/test/aws-stepfunctions-tasks/test/sagemaker/integ.create-training-job.js.snapshot/integ-stepfunctions-sagemaker.template.json +++ b/packages/@aws-cdk-testing/framework-integ/test/aws-stepfunctions-tasks/test/sagemaker/integ.create-training-job.js.snapshot/integ-stepfunctions-sagemaker.template.json @@ -276,12 +276,6 @@ "StateMachine2E01A3A5": { "Type": "AWS::StepFunctions::StateMachine", "Properties": { - "RoleArn": { - "Fn::GetAtt": [ - "StateMachineRoleB840431D", - "Arn" - ] - }, "DefinitionString": { "Fn::Join": [ "", @@ -297,7 +291,7 @@ "Arn" ] }, - "\",\"AlgorithmSpecification\":{\"TrainingInputMode\":\"File\",\"AlgorithmName\":\"arn:aws:sagemaker:us-east-1:865070037744:algorithm/scikit-decision-trees-15423055-57b73412d2e93e9239e4e16f83298b8f\"},\"InputDataConfig\":[{\"ChannelName\":\"InputData\",\"DataSource\":{\"S3DataSource\":{\"S3Uri\":\"https://s3.", + "\",\"AlgorithmSpecification\":{\"TrainingInputMode\":\"FastFile\",\"AlgorithmName\":\"BlazingText\"},\"InputDataConfig\":[{\"ChannelName\":\"InputData\",\"DataSource\":{\"S3DataSource\":{\"S3Uri\":\"https://s3.", { "Ref": "AWS::Region" }, @@ -324,6 +318,12 @@ "/result/\"},\"ResourceConfig\":{\"InstanceCount\":1,\"InstanceType\":\"ml.m4.xlarge\",\"VolumeSizeInGB\":10},\"StoppingCondition\":{\"MaxRuntimeInSeconds\":3600}}}}}" ] ] + }, + "RoleArn": { + "Fn::GetAtt": [ + "StateMachineRoleB840431D", + "Arn" + ] } }, "DependsOn": [ diff --git a/packages/@aws-cdk-testing/framework-integ/test/aws-stepfunctions-tasks/test/sagemaker/integ.create-training-job.js.snapshot/integ.json b/packages/@aws-cdk-testing/framework-integ/test/aws-stepfunctions-tasks/test/sagemaker/integ.create-training-job.js.snapshot/integ.json index 81307a7853194..77a3a44fc5844 100644 --- a/packages/@aws-cdk-testing/framework-integ/test/aws-stepfunctions-tasks/test/sagemaker/integ.create-training-job.js.snapshot/integ.json +++ b/packages/@aws-cdk-testing/framework-integ/test/aws-stepfunctions-tasks/test/sagemaker/integ.create-training-job.js.snapshot/integ.json @@ -1,5 +1,5 @@ { - "version": "30.0.0", + "version": "33.0.0", "testCases": { "integ.create-training-job": { "stacks": [ diff --git a/packages/@aws-cdk-testing/framework-integ/test/aws-stepfunctions-tasks/test/sagemaker/integ.create-training-job.js.snapshot/manifest.json b/packages/@aws-cdk-testing/framework-integ/test/aws-stepfunctions-tasks/test/sagemaker/integ.create-training-job.js.snapshot/manifest.json index 4f8b732c01a3f..43d36434b4bfc 100644 --- a/packages/@aws-cdk-testing/framework-integ/test/aws-stepfunctions-tasks/test/sagemaker/integ.create-training-job.js.snapshot/manifest.json +++ b/packages/@aws-cdk-testing/framework-integ/test/aws-stepfunctions-tasks/test/sagemaker/integ.create-training-job.js.snapshot/manifest.json @@ -1,5 +1,5 @@ { - "version": "30.0.0", + "version": "33.0.0", "artifacts": { "integ-stepfunctions-sagemaker.assets": { "type": "cdk:asset-manifest", @@ -17,7 +17,7 @@ "validateOnSynth": false, "assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-deploy-role-${AWS::AccountId}-${AWS::Region}", "cloudFormationExecutionRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-cfn-exec-role-${AWS::AccountId}-${AWS::Region}", - "stackTemplateAssetObjectUrl": "s3://cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}/81a9a9ef07dfc045ad84514bf0771e76c20386202c7c26130f4f954b8411de26.json", + "stackTemplateAssetObjectUrl": "s3://cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}/11bc70dd89e4afb387b1b639ea823eb492c0e7a7dd664e50aa483f10ef3e204a.json", "requiresBootstrapStackVersion": 6, "bootstrapStackVersionSsmParameter": "/cdk-bootstrap/hnb659fds/version", "additionalDependencies": [ diff --git a/packages/@aws-cdk-testing/framework-integ/test/aws-stepfunctions-tasks/test/sagemaker/integ.create-training-job.js.snapshot/tree.json b/packages/@aws-cdk-testing/framework-integ/test/aws-stepfunctions-tasks/test/sagemaker/integ.create-training-job.js.snapshot/tree.json index 99805c2c1e4b0..3f52cb2cf799f 100644 --- a/packages/@aws-cdk-testing/framework-integ/test/aws-stepfunctions-tasks/test/sagemaker/integ.create-training-job.js.snapshot/tree.json +++ b/packages/@aws-cdk-testing/framework-integ/test/aws-stepfunctions-tasks/test/sagemaker/integ.create-training-job.js.snapshot/tree.json @@ -49,13 +49,13 @@ } }, "constructInfo": { - "fqn": "@aws-cdk/aws-kms.CfnKey", + "fqn": "aws-cdk-lib.aws_kms.CfnKey", "version": "0.0.0" } } }, "constructInfo": { - "fqn": "@aws-cdk/aws-kms.Key", + "fqn": "aws-cdk-lib.aws_kms.Key", "version": "0.0.0" } }, @@ -87,13 +87,13 @@ } }, "constructInfo": { - "fqn": "@aws-cdk/aws-s3.CfnBucket", + "fqn": "aws-cdk-lib.aws_s3.CfnBucket", "version": "0.0.0" } } }, "constructInfo": { - "fqn": "@aws-cdk/aws-s3.Bucket", + "fqn": "aws-cdk-lib.aws_s3.Bucket", "version": "0.0.0" } }, @@ -109,7 +109,7 @@ "id": "ImportSagemakerRole", "path": "integ-stepfunctions-sagemaker/TrainTask/SagemakerRole/ImportSagemakerRole", "constructInfo": { - "fqn": "@aws-cdk/core.Resource", + "fqn": "aws-cdk-lib.Resource", "version": "0.0.0" } }, @@ -156,7 +156,7 @@ } }, "constructInfo": { - "fqn": "@aws-cdk/aws-iam.CfnRole", + "fqn": "aws-cdk-lib.aws_iam.CfnRole", "version": "0.0.0" } }, @@ -264,25 +264,25 @@ } }, "constructInfo": { - "fqn": "@aws-cdk/aws-iam.CfnPolicy", + "fqn": "aws-cdk-lib.aws_iam.CfnPolicy", "version": "0.0.0" } } }, "constructInfo": { - "fqn": "@aws-cdk/aws-iam.Policy", + "fqn": "aws-cdk-lib.aws_iam.Policy", "version": "0.0.0" } } }, "constructInfo": { - "fqn": "@aws-cdk/aws-iam.Role", + "fqn": "aws-cdk-lib.aws_iam.Role", "version": "0.0.0" } } }, "constructInfo": { - "fqn": "@aws-cdk/aws-stepfunctions-tasks.SageMakerCreateTrainingJob", + "fqn": "aws-cdk-lib.aws_stepfunctions_tasks.SageMakerCreateTrainingJob", "version": "0.0.0" } }, @@ -298,7 +298,7 @@ "id": "ImportRole", "path": "integ-stepfunctions-sagemaker/StateMachine/Role/ImportRole", "constructInfo": { - "fqn": "@aws-cdk/core.Resource", + "fqn": "aws-cdk-lib.Resource", "version": "0.0.0" } }, @@ -323,7 +323,7 @@ } }, "constructInfo": { - "fqn": "@aws-cdk/aws-iam.CfnRole", + "fqn": "aws-cdk-lib.aws_iam.CfnRole", "version": "0.0.0" } }, @@ -399,19 +399,19 @@ } }, "constructInfo": { - "fqn": "@aws-cdk/aws-iam.CfnPolicy", + "fqn": "aws-cdk-lib.aws_iam.CfnPolicy", "version": "0.0.0" } } }, "constructInfo": { - "fqn": "@aws-cdk/aws-iam.Policy", + "fqn": "aws-cdk-lib.aws_iam.Policy", "version": "0.0.0" } } }, "constructInfo": { - "fqn": "@aws-cdk/aws-iam.Role", + "fqn": "aws-cdk-lib.aws_iam.Role", "version": "0.0.0" } }, @@ -421,12 +421,6 @@ "attributes": { "aws:cdk:cloudformation:type": "AWS::StepFunctions::StateMachine", "aws:cdk:cloudformation:props": { - "roleArn": { - "Fn::GetAtt": [ - "StateMachineRoleB840431D", - "Arn" - ] - }, "definitionString": { "Fn::Join": [ "", @@ -442,7 +436,7 @@ "Arn" ] }, - "\",\"AlgorithmSpecification\":{\"TrainingInputMode\":\"File\",\"AlgorithmName\":\"arn:aws:sagemaker:us-east-1:865070037744:algorithm/scikit-decision-trees-15423055-57b73412d2e93e9239e4e16f83298b8f\"},\"InputDataConfig\":[{\"ChannelName\":\"InputData\",\"DataSource\":{\"S3DataSource\":{\"S3Uri\":\"https://s3.", + "\",\"AlgorithmSpecification\":{\"TrainingInputMode\":\"FastFile\",\"AlgorithmName\":\"BlazingText\"},\"InputDataConfig\":[{\"ChannelName\":\"InputData\",\"DataSource\":{\"S3DataSource\":{\"S3Uri\":\"https://s3.", { "Ref": "AWS::Region" }, @@ -469,17 +463,23 @@ "/result/\"},\"ResourceConfig\":{\"InstanceCount\":1,\"InstanceType\":\"ml.m4.xlarge\",\"VolumeSizeInGB\":10},\"StoppingCondition\":{\"MaxRuntimeInSeconds\":3600}}}}}" ] ] + }, + "roleArn": { + "Fn::GetAtt": [ + "StateMachineRoleB840431D", + "Arn" + ] } } }, "constructInfo": { - "fqn": "@aws-cdk/aws-stepfunctions.CfnStateMachine", + "fqn": "aws-cdk-lib.aws_stepfunctions.CfnStateMachine", "version": "0.0.0" } } }, "constructInfo": { - "fqn": "@aws-cdk/aws-stepfunctions.StateMachine", + "fqn": "aws-cdk-lib.aws_stepfunctions.StateMachine", "version": "0.0.0" } }, @@ -487,7 +487,7 @@ "id": "stateMachineArn", "path": "integ-stepfunctions-sagemaker/stateMachineArn", "constructInfo": { - "fqn": "@aws-cdk/core.CfnOutput", + "fqn": "aws-cdk-lib.CfnOutput", "version": "0.0.0" } }, @@ -495,7 +495,7 @@ "id": "BootstrapVersion", "path": "integ-stepfunctions-sagemaker/BootstrapVersion", "constructInfo": { - "fqn": "@aws-cdk/core.CfnParameter", + "fqn": "aws-cdk-lib.CfnParameter", "version": "0.0.0" } }, @@ -503,13 +503,13 @@ "id": "CheckBootstrapVersion", "path": "integ-stepfunctions-sagemaker/CheckBootstrapVersion", "constructInfo": { - "fqn": "@aws-cdk/core.CfnRule", + "fqn": "aws-cdk-lib.CfnRule", "version": "0.0.0" } } }, "constructInfo": { - "fqn": "@aws-cdk/core.Stack", + "fqn": "aws-cdk-lib.Stack", "version": "0.0.0" } }, @@ -518,12 +518,12 @@ "path": "Tree", "constructInfo": { "fqn": "constructs.Construct", - "version": "10.1.237" + "version": "10.2.69" } } }, "constructInfo": { - "fqn": "@aws-cdk/core.App", + "fqn": "aws-cdk-lib.App", "version": "0.0.0" } } diff --git a/packages/@aws-cdk-testing/framework-integ/test/aws-stepfunctions-tasks/test/sagemaker/integ.create-training-job.ts b/packages/@aws-cdk-testing/framework-integ/test/aws-stepfunctions-tasks/test/sagemaker/integ.create-training-job.ts index aafd6f282b410..3d3e138e080cd 100644 --- a/packages/@aws-cdk-testing/framework-integ/test/aws-stepfunctions-tasks/test/sagemaker/integ.create-training-job.ts +++ b/packages/@aws-cdk-testing/framework-integ/test/aws-stepfunctions-tasks/test/sagemaker/integ.create-training-job.ts @@ -2,7 +2,7 @@ import { Key } from 'aws-cdk-lib/aws-kms'; import { Bucket, BucketEncryption } from 'aws-cdk-lib/aws-s3'; import { StateMachine } from 'aws-cdk-lib/aws-stepfunctions'; import { App, CfnOutput, RemovalPolicy, Stack } from 'aws-cdk-lib'; -import { S3Location, SageMakerCreateTrainingJob } from 'aws-cdk-lib/aws-stepfunctions-tasks'; +import { S3Location, SageMakerCreateTrainingJob, InputMode } from 'aws-cdk-lib/aws-stepfunctions-tasks'; /* * Creates a state machine with a task state to create a training job in AWS SageMaker @@ -35,7 +35,8 @@ const trainingData = new Bucket(stack, 'TrainingData', { const sm = new StateMachine(stack, 'StateMachine', { definition: new SageMakerCreateTrainingJob(stack, 'TrainTask', { algorithmSpecification: { - algorithmName: 'arn:aws:sagemaker:us-east-1:865070037744:algorithm/scikit-decision-trees-15423055-57b73412d2e93e9239e4e16f83298b8f', + algorithmName: 'BlazingText', + trainingInputMode: InputMode.FAST_FILE, }, inputDataConfig: [{ channelName: 'InputData', diff --git a/packages/aws-cdk-lib/aws-stepfunctions-tasks/README.md b/packages/aws-cdk-lib/aws-stepfunctions-tasks/README.md index 7cff3c209fb2f..d291ebf894b4c 100644 --- a/packages/aws-cdk-lib/aws-stepfunctions-tasks/README.md +++ b/packages/aws-cdk-lib/aws-stepfunctions-tasks/README.md @@ -1060,6 +1060,12 @@ new tasks.SageMakerCreateTrainingJob(this, 'TrainSagemaker', { }); ``` +You can specify [TrainingInputMode](https://docs.aws.amazon.com/sagemaker/latest/APIReference/API_AlgorithmSpecification.html#API_AlgorithmSpecification_Contents) via the trainingInputMode property. + +- To download the data from Amazon Simple Storage Service (Amazon S3) to the provisioned ML storage volume, and mount the directory to a Docker volume, choose `InputMode.FILE` if an algorithm supports it. +- To stream data directly from Amazon S3 to the container, choose `InputMode.PIPE` if an algorithm supports it. +- To stream data directly from Amazon S3 to the container with no code changes and to provide file system access to the data, choose `InputMode.FAST_FILE` if an algorithm supports it. + ### Create Transform Job You can call the [`CreateTransformJob`](https://docs.aws.amazon.com/sagemaker/latest/dg/API_CreateTransformJob.html) API from a `Task` state. diff --git a/packages/aws-cdk-lib/aws-stepfunctions-tasks/lib/sagemaker/base-types.ts b/packages/aws-cdk-lib/aws-stepfunctions-tasks/lib/sagemaker/base-types.ts index 7abaea9b6b0e6..6fe886c8c7846 100644 --- a/packages/aws-cdk-lib/aws-stepfunctions-tasks/lib/sagemaker/base-types.ts +++ b/packages/aws-cdk-lib/aws-stepfunctions-tasks/lib/sagemaker/base-types.ts @@ -464,7 +464,12 @@ export enum InputMode { /** * File mode. */ - FILE = 'File' + FILE = 'File', + + /** + * FastFile mode. + */ + FAST_FILE = 'FastFile' } /** diff --git a/packages/aws-cdk-lib/aws-stepfunctions-tasks/test/sagemaker/create-training-job.test.ts b/packages/aws-cdk-lib/aws-stepfunctions-tasks/test/sagemaker/create-training-job.test.ts index 31bf02c7c8a18..8e0f1a43df1fa 100644 --- a/packages/aws-cdk-lib/aws-stepfunctions-tasks/test/sagemaker/create-training-job.test.ts +++ b/packages/aws-cdk-lib/aws-stepfunctions-tasks/test/sagemaker/create-training-job.test.ts @@ -134,7 +134,7 @@ test('create complex training job', () => { role, algorithmSpecification: { algorithmName: 'BlazingText', - trainingInputMode: tasks.InputMode.FILE, + trainingInputMode: tasks.InputMode.FAST_FILE, metricDefinitions: [ { name: 'mymetric', regex: 'regex_pattern', @@ -218,7 +218,7 @@ test('create complex training job', () => { TrainingJobName: 'MyTrainJob', RoleArn: { 'Fn::GetAtt': ['Role1ABCC5F0', 'Arn'] }, AlgorithmSpecification: { - TrainingInputMode: 'File', + TrainingInputMode: 'FastFile', AlgorithmName: 'BlazingText', MetricDefinitions: [ { Name: 'mymetric', Regex: 'regex_pattern' },