Skip to content

Commit

Permalink
feat(stepfunctions-tasks): FastFile mode for SageMaker Training Job (#…
Browse files Browse the repository at this point in the history
…26675)

According to the AWS documentation, the TrainingInputMode for a SageMaker Training Job can be one of the following: `Pipe | File | FastFile`

https://docs.aws.amazon.com/sagemaker/latest/APIReference/API_Channel.html#sagemaker-Type-Channel-InputMode

https://docs.aws.amazon.com/sagemaker/latest/APIReference/API_AlgorithmSpecification.html#API_AlgorithmSpecification_Contents


I have just added `FastFile` below to align with the official documentation.
https://github.com/aws/aws-cdk/blob/v2.90.0/packages/aws-cdk-lib/aws-stepfunctions-tasks/lib/sagemaker/base-types.ts#L458

Closes #26653.

----

*By submitting this pull request, I confirm that my contribution is made under the terms of the Apache-2.0 license*
  • Loading branch information
tmyoda committed Aug 23, 2023
1 parent 7e1c038 commit 5fef403
Show file tree
Hide file tree
Showing 10 changed files with 61 additions and 49 deletions.
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"version":"30.0.0"}
{"version":"33.0.0"}
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
{
"version": "30.0.0",
"version": "33.0.0",
"files": {
"81a9a9ef07dfc045ad84514bf0771e76c20386202c7c26130f4f954b8411de26": {
"11bc70dd89e4afb387b1b639ea823eb492c0e7a7dd664e50aa483f10ef3e204a": {
"source": {
"path": "integ-stepfunctions-sagemaker.template.json",
"packaging": "file"
},
"destinations": {
"current_account-current_region": {
"bucketName": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}",
"objectKey": "81a9a9ef07dfc045ad84514bf0771e76c20386202c7c26130f4f954b8411de26.json",
"objectKey": "11bc70dd89e4afb387b1b639ea823eb492c0e7a7dd664e50aa483f10ef3e204a.json",
"assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-file-publishing-role-${AWS::AccountId}-${AWS::Region}"
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -276,12 +276,6 @@
"StateMachine2E01A3A5": {
"Type": "AWS::StepFunctions::StateMachine",
"Properties": {
"RoleArn": {
"Fn::GetAtt": [
"StateMachineRoleB840431D",
"Arn"
]
},
"DefinitionString": {
"Fn::Join": [
"",
Expand All @@ -297,7 +291,7 @@
"Arn"
]
},
"\",\"AlgorithmSpecification\":{\"TrainingInputMode\":\"File\",\"AlgorithmName\":\"arn:aws:sagemaker:us-east-1:865070037744:algorithm/scikit-decision-trees-15423055-57b73412d2e93e9239e4e16f83298b8f\"},\"InputDataConfig\":[{\"ChannelName\":\"InputData\",\"DataSource\":{\"S3DataSource\":{\"S3Uri\":\"https://s3.",
"\",\"AlgorithmSpecification\":{\"TrainingInputMode\":\"FastFile\",\"AlgorithmName\":\"BlazingText\"},\"InputDataConfig\":[{\"ChannelName\":\"InputData\",\"DataSource\":{\"S3DataSource\":{\"S3Uri\":\"https://s3.",
{
"Ref": "AWS::Region"
},
Expand All @@ -324,6 +318,12 @@
"/result/\"},\"ResourceConfig\":{\"InstanceCount\":1,\"InstanceType\":\"ml.m4.xlarge\",\"VolumeSizeInGB\":10},\"StoppingCondition\":{\"MaxRuntimeInSeconds\":3600}}}}}"
]
]
},
"RoleArn": {
"Fn::GetAtt": [
"StateMachineRoleB840431D",
"Arn"
]
}
},
"DependsOn": [
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"version": "30.0.0",
"version": "33.0.0",
"testCases": {
"integ.create-training-job": {
"stacks": [
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"version": "30.0.0",
"version": "33.0.0",
"artifacts": {
"integ-stepfunctions-sagemaker.assets": {
"type": "cdk:asset-manifest",
Expand All @@ -17,7 +17,7 @@
"validateOnSynth": false,
"assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-deploy-role-${AWS::AccountId}-${AWS::Region}",
"cloudFormationExecutionRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-cfn-exec-role-${AWS::AccountId}-${AWS::Region}",
"stackTemplateAssetObjectUrl": "s3://cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}/81a9a9ef07dfc045ad84514bf0771e76c20386202c7c26130f4f954b8411de26.json",
"stackTemplateAssetObjectUrl": "s3://cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}/11bc70dd89e4afb387b1b639ea823eb492c0e7a7dd664e50aa483f10ef3e204a.json",
"requiresBootstrapStackVersion": 6,
"bootstrapStackVersionSsmParameter": "/cdk-bootstrap/hnb659fds/version",
"additionalDependencies": [
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,13 +49,13 @@
}
},
"constructInfo": {
"fqn": "@aws-cdk/aws-kms.CfnKey",
"fqn": "aws-cdk-lib.aws_kms.CfnKey",
"version": "0.0.0"
}
}
},
"constructInfo": {
"fqn": "@aws-cdk/aws-kms.Key",
"fqn": "aws-cdk-lib.aws_kms.Key",
"version": "0.0.0"
}
},
Expand Down Expand Up @@ -87,13 +87,13 @@
}
},
"constructInfo": {
"fqn": "@aws-cdk/aws-s3.CfnBucket",
"fqn": "aws-cdk-lib.aws_s3.CfnBucket",
"version": "0.0.0"
}
}
},
"constructInfo": {
"fqn": "@aws-cdk/aws-s3.Bucket",
"fqn": "aws-cdk-lib.aws_s3.Bucket",
"version": "0.0.0"
}
},
Expand All @@ -109,7 +109,7 @@
"id": "ImportSagemakerRole",
"path": "integ-stepfunctions-sagemaker/TrainTask/SagemakerRole/ImportSagemakerRole",
"constructInfo": {
"fqn": "@aws-cdk/core.Resource",
"fqn": "aws-cdk-lib.Resource",
"version": "0.0.0"
}
},
Expand Down Expand Up @@ -156,7 +156,7 @@
}
},
"constructInfo": {
"fqn": "@aws-cdk/aws-iam.CfnRole",
"fqn": "aws-cdk-lib.aws_iam.CfnRole",
"version": "0.0.0"
}
},
Expand Down Expand Up @@ -264,25 +264,25 @@
}
},
"constructInfo": {
"fqn": "@aws-cdk/aws-iam.CfnPolicy",
"fqn": "aws-cdk-lib.aws_iam.CfnPolicy",
"version": "0.0.0"
}
}
},
"constructInfo": {
"fqn": "@aws-cdk/aws-iam.Policy",
"fqn": "aws-cdk-lib.aws_iam.Policy",
"version": "0.0.0"
}
}
},
"constructInfo": {
"fqn": "@aws-cdk/aws-iam.Role",
"fqn": "aws-cdk-lib.aws_iam.Role",
"version": "0.0.0"
}
}
},
"constructInfo": {
"fqn": "@aws-cdk/aws-stepfunctions-tasks.SageMakerCreateTrainingJob",
"fqn": "aws-cdk-lib.aws_stepfunctions_tasks.SageMakerCreateTrainingJob",
"version": "0.0.0"
}
},
Expand All @@ -298,7 +298,7 @@
"id": "ImportRole",
"path": "integ-stepfunctions-sagemaker/StateMachine/Role/ImportRole",
"constructInfo": {
"fqn": "@aws-cdk/core.Resource",
"fqn": "aws-cdk-lib.Resource",
"version": "0.0.0"
}
},
Expand All @@ -323,7 +323,7 @@
}
},
"constructInfo": {
"fqn": "@aws-cdk/aws-iam.CfnRole",
"fqn": "aws-cdk-lib.aws_iam.CfnRole",
"version": "0.0.0"
}
},
Expand Down Expand Up @@ -399,19 +399,19 @@
}
},
"constructInfo": {
"fqn": "@aws-cdk/aws-iam.CfnPolicy",
"fqn": "aws-cdk-lib.aws_iam.CfnPolicy",
"version": "0.0.0"
}
}
},
"constructInfo": {
"fqn": "@aws-cdk/aws-iam.Policy",
"fqn": "aws-cdk-lib.aws_iam.Policy",
"version": "0.0.0"
}
}
},
"constructInfo": {
"fqn": "@aws-cdk/aws-iam.Role",
"fqn": "aws-cdk-lib.aws_iam.Role",
"version": "0.0.0"
}
},
Expand All @@ -421,12 +421,6 @@
"attributes": {
"aws:cdk:cloudformation:type": "AWS::StepFunctions::StateMachine",
"aws:cdk:cloudformation:props": {
"roleArn": {
"Fn::GetAtt": [
"StateMachineRoleB840431D",
"Arn"
]
},
"definitionString": {
"Fn::Join": [
"",
Expand All @@ -442,7 +436,7 @@
"Arn"
]
},
"\",\"AlgorithmSpecification\":{\"TrainingInputMode\":\"File\",\"AlgorithmName\":\"arn:aws:sagemaker:us-east-1:865070037744:algorithm/scikit-decision-trees-15423055-57b73412d2e93e9239e4e16f83298b8f\"},\"InputDataConfig\":[{\"ChannelName\":\"InputData\",\"DataSource\":{\"S3DataSource\":{\"S3Uri\":\"https://s3.",
"\",\"AlgorithmSpecification\":{\"TrainingInputMode\":\"FastFile\",\"AlgorithmName\":\"BlazingText\"},\"InputDataConfig\":[{\"ChannelName\":\"InputData\",\"DataSource\":{\"S3DataSource\":{\"S3Uri\":\"https://s3.",
{
"Ref": "AWS::Region"
},
Expand All @@ -469,47 +463,53 @@
"/result/\"},\"ResourceConfig\":{\"InstanceCount\":1,\"InstanceType\":\"ml.m4.xlarge\",\"VolumeSizeInGB\":10},\"StoppingCondition\":{\"MaxRuntimeInSeconds\":3600}}}}}"
]
]
},
"roleArn": {
"Fn::GetAtt": [
"StateMachineRoleB840431D",
"Arn"
]
}
}
},
"constructInfo": {
"fqn": "@aws-cdk/aws-stepfunctions.CfnStateMachine",
"fqn": "aws-cdk-lib.aws_stepfunctions.CfnStateMachine",
"version": "0.0.0"
}
}
},
"constructInfo": {
"fqn": "@aws-cdk/aws-stepfunctions.StateMachine",
"fqn": "aws-cdk-lib.aws_stepfunctions.StateMachine",
"version": "0.0.0"
}
},
"stateMachineArn": {
"id": "stateMachineArn",
"path": "integ-stepfunctions-sagemaker/stateMachineArn",
"constructInfo": {
"fqn": "@aws-cdk/core.CfnOutput",
"fqn": "aws-cdk-lib.CfnOutput",
"version": "0.0.0"
}
},
"BootstrapVersion": {
"id": "BootstrapVersion",
"path": "integ-stepfunctions-sagemaker/BootstrapVersion",
"constructInfo": {
"fqn": "@aws-cdk/core.CfnParameter",
"fqn": "aws-cdk-lib.CfnParameter",
"version": "0.0.0"
}
},
"CheckBootstrapVersion": {
"id": "CheckBootstrapVersion",
"path": "integ-stepfunctions-sagemaker/CheckBootstrapVersion",
"constructInfo": {
"fqn": "@aws-cdk/core.CfnRule",
"fqn": "aws-cdk-lib.CfnRule",
"version": "0.0.0"
}
}
},
"constructInfo": {
"fqn": "@aws-cdk/core.Stack",
"fqn": "aws-cdk-lib.Stack",
"version": "0.0.0"
}
},
Expand All @@ -518,12 +518,12 @@
"path": "Tree",
"constructInfo": {
"fqn": "constructs.Construct",
"version": "10.1.237"
"version": "10.2.69"
}
}
},
"constructInfo": {
"fqn": "@aws-cdk/core.App",
"fqn": "aws-cdk-lib.App",
"version": "0.0.0"
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ import { Key } from 'aws-cdk-lib/aws-kms';
import { Bucket, BucketEncryption } from 'aws-cdk-lib/aws-s3';
import { StateMachine } from 'aws-cdk-lib/aws-stepfunctions';
import { App, CfnOutput, RemovalPolicy, Stack } from 'aws-cdk-lib';
import { S3Location, SageMakerCreateTrainingJob } from 'aws-cdk-lib/aws-stepfunctions-tasks';
import { S3Location, SageMakerCreateTrainingJob, InputMode } from 'aws-cdk-lib/aws-stepfunctions-tasks';

/*
* Creates a state machine with a task state to create a training job in AWS SageMaker
Expand Down Expand Up @@ -35,7 +35,8 @@ const trainingData = new Bucket(stack, 'TrainingData', {
const sm = new StateMachine(stack, 'StateMachine', {
definition: new SageMakerCreateTrainingJob(stack, 'TrainTask', {
algorithmSpecification: {
algorithmName: 'arn:aws:sagemaker:us-east-1:865070037744:algorithm/scikit-decision-trees-15423055-57b73412d2e93e9239e4e16f83298b8f',
algorithmName: 'BlazingText',
trainingInputMode: InputMode.FAST_FILE,
},
inputDataConfig: [{
channelName: 'InputData',
Expand Down
6 changes: 6 additions & 0 deletions packages/aws-cdk-lib/aws-stepfunctions-tasks/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -1060,6 +1060,12 @@ new tasks.SageMakerCreateTrainingJob(this, 'TrainSagemaker', {
});
```

You can specify [TrainingInputMode](https://docs.aws.amazon.com/sagemaker/latest/APIReference/API_AlgorithmSpecification.html#API_AlgorithmSpecification_Contents) via the trainingInputMode property.

- To download the data from Amazon Simple Storage Service (Amazon S3) to the provisioned ML storage volume, and mount the directory to a Docker volume, choose `InputMode.FILE` if an algorithm supports it.
- To stream data directly from Amazon S3 to the container, choose `InputMode.PIPE` if an algorithm supports it.
- To stream data directly from Amazon S3 to the container with no code changes and to provide file system access to the data, choose `InputMode.FAST_FILE` if an algorithm supports it.

### Create Transform Job

You can call the [`CreateTransformJob`](https://docs.aws.amazon.com/sagemaker/latest/dg/API_CreateTransformJob.html) API from a `Task` state.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -464,7 +464,12 @@ export enum InputMode {
/**
* File mode.
*/
FILE = 'File'
FILE = 'File',

/**
* FastFile mode.
*/
FAST_FILE = 'FastFile'
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ test('create complex training job', () => {
role,
algorithmSpecification: {
algorithmName: 'BlazingText',
trainingInputMode: tasks.InputMode.FILE,
trainingInputMode: tasks.InputMode.FAST_FILE,
metricDefinitions: [
{
name: 'mymetric', regex: 'regex_pattern',
Expand Down Expand Up @@ -218,7 +218,7 @@ test('create complex training job', () => {
TrainingJobName: 'MyTrainJob',
RoleArn: { 'Fn::GetAtt': ['Role1ABCC5F0', 'Arn'] },
AlgorithmSpecification: {
TrainingInputMode: 'File',
TrainingInputMode: 'FastFile',
AlgorithmName: 'BlazingText',
MetricDefinitions: [
{ Name: 'mymetric', Regex: 'regex_pattern' },
Expand Down

0 comments on commit 5fef403

Please sign in to comment.