From 3ad90d15b32d3b02e76f199426bfbcbafc720ec0 Mon Sep 17 00:00:00 2001
From: Satish Pasumarthi <35979860+satishpasumarthi@users.noreply.github.com>
Date: Fri, 8 Jul 2022 15:29:43 -0700
Subject: [PATCH] fix: CI (#234)

---
 CONTRIBUTING.md                               |  6 +-
 buildspec-gputests.yml                        | 66 +++++++++----------
 buildspec-release.yml                         |  2 +-
 buildspec-unittests.yml                       |  2 +-
 buildspec.yml                                 | 28 ++++----
 setup.py                                      |  6 +-
 test/conftest.py                              |  4 +-
 .../{1.6.0 => 1.11.0}/Dockerfile.dlc.cpu      |  0
 .../{1.6.0 => 1.11.0}/Dockerfile.dlc.gpu      |  2 +-
 .../{1.6.0 => 1.11.0}/Dockerfile.pytorch      |  2 +-
 test/container/1.4.0/Dockerfile.dlc.cpu       | 10 ---
 test/container/1.4.0/Dockerfile.dlc.gpu       | 28 --------
 test/container/1.4.0/Dockerfile.pytorch       | 20 ------
 test/integration/sagemaker/test_horovod.py    |  2 +-
 tox.ini                                       |  2 +-
 15 files changed, 56 insertions(+), 124 deletions(-)
 rename test/container/{1.6.0 => 1.11.0}/Dockerfile.dlc.cpu (100%)
 rename test/container/{1.6.0 => 1.11.0}/Dockerfile.dlc.gpu (66%)
 rename test/container/{1.6.0 => 1.11.0}/Dockerfile.pytorch (92%)
 delete mode 100644 test/container/1.4.0/Dockerfile.dlc.cpu
 delete mode 100644 test/container/1.4.0/Dockerfile.dlc.gpu
 delete mode 100644 test/container/1.4.0/Dockerfile.pytorch

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 216b1a11..a58134e3 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -11,7 +11,7 @@ information to effectively respond to your bug report or contribution.
 
 We welcome you to use the GitHub issue tracker to report bugs or suggest features.
 
-When filing an issue, please check [existing open](https://github.com/aws-samples/sagemaker-pytorch-containers/issues), or [recently closed](https://github.com/aws-samples/sagemaker-pytorch-containers/issues?utf8=%E2%9C%93&q=is%3Aissue%20is%3Aclosed%20), issues to make sure somebody else hasn't already 
+When filing an issue, please check [existing open](https://github.com/aws/sagemaker-pytorch-training-toolkit/issues), or [recently closed](https://github.com/aws/sagemaker-pytorch-training-toolkit/issues?utf8=%E2%9C%93&q=is%3Aissue%20is%3Aclosed%20), issues to make sure somebody else hasn't already
 reported the issue. Please try to include as much information as you can. Details like these are incredibly useful:
 
 * A reproducible test case or series of steps
@@ -41,7 +41,7 @@ GitHub provides additional document on [forking a repository](https://help.githu
 
 
 ## Finding contributions to work on
-Looking at the existing issues is a great way to find something to contribute on. As our projects, by default, use the default GitHub issue labels ((enhancement/bug/duplicate/help wanted/invalid/question/wontfix), looking at any ['help wanted'](https://github.com/aws-samples/sagemaker-pytorch-containers/labels/help%20wanted) issues is a great place to start. 
+Looking at the existing issues is a great way to find something to contribute on. As our projects, by default, use the default GitHub issue labels ((enhancement/bug/duplicate/help wanted/invalid/question/wontfix), looking at any ['help wanted'](https://github.com/aws/sagemaker-pytorch-training-toolkit/labels/help%20wanted) issues is a great place to start.
 
 
 ## Code of Conduct
@@ -56,6 +56,6 @@ If you discover a potential security issue in this project we ask that you notif
 
 ## Licensing
 
-See the [LICENSE](https://github.com/aws-samples/sagemaker-pytorch-containers/blob/master/LICENSE) file for our project's licensing. We will ask you to confirm the licensing of your contribution.
+See the [LICENSE](https://github.com/aws/sagemaker-pytorch-training-toolkit/blob/master/LICENSE) file for our project's licensing. We will ask you to confirm the licensing of your contribution.
 
 We may ask you to sign a [Contributor License Agreement (CLA)](http://en.wikipedia.org/wiki/Contributor_License_Agreement) for larger changes.
diff --git a/buildspec-gputests.yml b/buildspec-gputests.yml
index e6b73a32..d072298a 100644
--- a/buildspec-gputests.yml
+++ b/buildspec-gputests.yml
@@ -2,8 +2,8 @@ version: 0.2
 
 env:
   variables:
-    FRAMEWORK_VERSION: '1.6.0'
-    GPU_INSTANCE_TYPE: 'ml.p2.8xlarge'
+    FRAMEWORK_VERSION: '1.11.0'
+    GPU_INSTANCE_TYPE: 'ml.p3.16xlarge'
     ECR_REPO: 'sagemaker-test'
     GITHUB_REPO: 'sagemaker-pytorch-container'
     DLC_ACCOUNT: '763104351884'
@@ -26,46 +26,40 @@ phases:
       - pip3 install -U -e .[test]
 
       # define tags
-      - GENERIC_TAG="$FRAMEWORK_VERSION-pytorch-$BUILD_ID"
       - DLC_GPU_TAG="$FRAMEWORK_VERSION-dlc-gpu-$BUILD_ID"
-
-      # launch remote GPU instance
-      - prefix='ml.'
-      - instance_type=${GPU_INSTANCE_TYPE#"$prefix"}
-      - create-key-pair
-      - launch-ec2-instance --instance-type $instance_type --ami-name dlami-ubuntu-latest
+      - echo 'Skipping DLC creation as it is taken care in DLC pipelines'
+      # # launch remote GPU instance
+      # - prefix='ml.'
+      # - instance_type=${GPU_INSTANCE_TYPE#"$prefix"}
+      # - create-key-pair
+      # - launch-ec2-instance --instance-type $instance_type --ami-name dlami-ubuntu-latest
 
       # build DLC GPU image because the base DLC image is too big and takes too long to build as part of the test
-      - python3 setup.py sdist
-      - build_dir="test/container/$FRAMEWORK_VERSION"
-      - $(aws ecr get-login --registry-ids $DLC_ACCOUNT --no-include-email --region $AWS_DEFAULT_REGION)
-      - build_cmd="docker build -f "$build_dir/Dockerfile.dlc.gpu" -t $PREPROD_IMAGE:$DLC_GPU_TAG --build-arg region=$AWS_DEFAULT_REGION ."
-      - execute-command-if-has-matching-changes "$build_cmd" "test/" "src/*.py" "setup.py" "setup.cfg" "buildspec.yml" "lib/*"
-      # push DLC GPU image to ECR
-      - $(aws ecr get-login --registry-ids $ACCOUNT --no-include-email --region $AWS_DEFAULT_REGION)
-      - push_cmd="docker push $PREPROD_IMAGE:$DLC_GPU_TAG"
-      - execute-command-if-has-matching-changes "$push_cmd" "test/" "src/*.py" "setup.py" "setup.cfg" "buildspec.yml" "lib/*"
+      # - python3 setup.py sdist
+      # - build_dir="test/container/$FRAMEWORK_VERSION"
+      # - $(aws ecr get-login --registry-ids $DLC_ACCOUNT --no-include-email --region $AWS_DEFAULT_REGION)
+      # - build_cmd="docker build -f "$build_dir/Dockerfile.dlc.gpu" -t $PREPROD_IMAGE:$DLC_GPU_TAG --build-arg region=$AWS_DEFAULT_REGION ."
+      # - execute-command-if-has-matching-changes "$build_cmd" "test/" "src/*.py" "setup.py" "setup.cfg" "buildspec.yml" "lib/*"
+      # # push DLC GPU image to ECR
+      # - $(aws ecr get-login --registry-ids $ACCOUNT --no-include-email --region $AWS_DEFAULT_REGION)
+      # - push_cmd="docker push $PREPROD_IMAGE:$DLC_GPU_TAG"
+      # - execute-command-if-has-matching-changes "$push_cmd" "test/" "src/*.py" "setup.py" "setup.cfg" "buildspec.yml" "lib/*"
 
-      # run GPU local integration tests
-      - printf "$SETUP_CMDS" > $SETUP_FILE
-      - generic_cmd="pytest test/integration/local --build-image --push-image --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --dockerfile-type pytorch --aws-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --processor gpu --tag $GENERIC_TAG"
-      - test_cmd="remote-test --github-repo $GITHUB_REPO --test-cmd \"$generic_cmd\" --setup-file $SETUP_FILE --pr-number \"$PR_NUM\""
-      - execute-command-if-has-matching-changes "$test_cmd" "test/" "src/*.py" "setup.py" "setup.cfg" "buildspec.yml" "lib/*"
-      - dlc_cmd="pytest test/integration/local --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --dockerfile-type dlc.gpu --aws-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --processor gpu --tag $DLC_GPU_TAG"
-      - test_cmd="remote-test --github-repo $GITHUB_REPO --test-cmd \"$dlc_cmd\" --setup-file $SETUP_FILE --pr-number \"$PR_NUM\" --skip-setup"
-      - execute-command-if-has-matching-changes "$test_cmd" "test/" "src/*.py" "setup.py" "setup.cfg" "buildspec.yml" "lib/*"
+      # # run GPU local integration tests
+      # - printf "$SETUP_CMDS" > $SETUP_FILE
+      # - dlc_cmd="pytest test/integration/local --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --dockerfile-type dlc.gpu --aws-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --processor gpu --tag $DLC_GPU_TAG"
+      # - test_cmd="remote-test --github-repo $GITHUB_REPO --test-cmd \"$dlc_cmd\" --setup-file $SETUP_FILE --pr-number \"$PR_NUM\" --skip-setup"
+      # - execute-command-if-has-matching-changes "$test_cmd" "test/" "src/*.py" "setup.py" "setup.cfg" "buildspec.yml" "lib/*"
 
-      # run GPU sagemaker integration tests
-      - test_cmd="pytest -n 10 test/integration/sagemaker --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --dockerfile-type pytorch --aws-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --processor gpu --instance-type $GPU_INSTANCE_TYPE --tag $GENERIC_TAG"
-      - execute-command-if-has-matching-changes "$test_cmd" "test/" "src/*.py" "setup.py" "setup.cfg" "buildspec.yml" "lib/*"
-      - test_cmd="pytest -n 10 test/integration/sagemaker --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --dockerfile-type dlc.gpu --aws-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --processor gpu --instance-type $GPU_INSTANCE_TYPE --tag $DLC_GPU_TAG"
-      - execute-command-if-has-matching-changes "$test_cmd" "test/" "src/*.py" "setup.py" "setup.cfg" "buildspec.yml" "lib/*"
+      # # run GPU sagemaker integration tests
+      # - test_cmd="pytest -n 10 test/integration/sagemaker --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --dockerfile-type dlc.gpu --aws-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --processor gpu --instance-type $GPU_INSTANCE_TYPE --tag $DLC_GPU_TAG"
+      # - execute-command-if-has-matching-changes "$test_cmd" "test/" "src/*.py" "setup.py" "setup.cfg" "buildspec.yml" "lib/*"
 
     finally:
+      - echo 'Done'
       # shut down remote GPU instance
-      - cleanup-gpu-instances
-      - cleanup-key-pairs
+      # - cleanup-gpu-instances
+      # - cleanup-key-pairs
 
-      # remove ECR image
-      - aws ecr batch-delete-image --repository-name $ECR_REPO --region $AWS_DEFAULT_REGION --image-ids imageTag=$GENERIC_TAG
-      - aws ecr batch-delete-image --repository-name $ECR_REPO --region $AWS_DEFAULT_REGION --image-ids imageTag=$DLC_GPU_TAG
+      # # remove ECR image
+      # - aws ecr batch-delete-image --repository-name $ECR_REPO --region $AWS_DEFAULT_REGION --image-ids imageTag=$DLC_GPU_TAG
diff --git a/buildspec-release.yml b/buildspec-release.yml
index 7285bb3b..ca45377c 100644
--- a/buildspec-release.yml
+++ b/buildspec-release.yml
@@ -12,7 +12,7 @@ phases:
       # run unit tests
       - AWS_ACCESS_KEY_ID= AWS_SECRET_ACCESS_KEY= AWS_SESSION_TOKEN=
         AWS_CONTAINER_CREDENTIALS_RELATIVE_URI= AWS_DEFAULT_REGION=
-        tox -e py27,py36,py37 -- test/unit
+        tox -e py38 -- test/unit
 
       # run local integ tests
       #- $(aws ecr get-login --no-include-email --region us-west-2)
diff --git a/buildspec-unittests.yml b/buildspec-unittests.yml
index 7efb931a..1f77ecd4 100644
--- a/buildspec-unittests.yml
+++ b/buildspec-unittests.yml
@@ -13,4 +13,4 @@ phases:
       - tox -e flake8,twine
 
       # run unit tests
-      - tox -e py27,py36,py37 test/unit
+      - tox -e py38 test/unit
diff --git a/buildspec.yml b/buildspec.yml
index f43aba1a..c8ead2c4 100644
--- a/buildspec.yml
+++ b/buildspec.yml
@@ -2,7 +2,7 @@ version: 0.2
 
 env:
   variables:
-    FRAMEWORK_VERSION: '1.6.0'
+    FRAMEWORK_VERSION: '1.11.0'
     CPU_INSTANCE_TYPE: 'ml.c4.xlarge'
     ECR_REPO: 'sagemaker-test'
 
@@ -21,22 +21,18 @@ phases:
       - pip3 install -U -e .[test]
       
       # define tags
-      - GENERIC_TAG="$FRAMEWORK_VERSION-pytorch-$BUILD_ID"
       - DLC_CPU_TAG="$FRAMEWORK_VERSION-dlc-cpu-$BUILD_ID"
+      - echo 'Skipping DLC creation as it is taken care in DLC pipelines'
+      # # run local CPU integration tests (build and push the image to ECR repo)
+      # - test_cmd="pytest test/integration/local --build-image --push-image --dockerfile-type dlc.cpu --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --aws-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --processor cpu --tag $DLC_CPU_TAG"
+      # # execute-command-if-has-matching-changes "$test_cmd" "test/" "src/*.py" "setup.py" "setup.cfg" "buildspec.yml" "lib/*"
+      # - "$test_cmd" "test/" "src/*.py" "setup.py" "setup.cfg" "buildspec.yml" "lib/*"
 
-      # run local CPU integration tests (build and push the image to ECR repo)
-      - test_cmd="pytest test/integration/local --build-image --push-image --dockerfile-type pytorch --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --aws-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --processor cpu --tag $GENERIC_TAG"
-      - execute-command-if-has-matching-changes "$test_cmd" "test/" "src/*.py" "setup.py" "setup.cfg" "buildspec.yml" "lib/*"
-      - test_cmd="pytest test/integration/local --build-image --push-image --dockerfile-type dlc.cpu --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --aws-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --processor cpu --tag $DLC_CPU_TAG"
-      - execute-command-if-has-matching-changes "$test_cmd" "test/" "src/*.py" "setup.py" "setup.cfg" "buildspec.yml" "lib/*"
-
-      # run CPU sagemaker integration tests
-      - test_cmd="pytest -n 10 test/integration/sagemaker --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --dockerfile-type pytorch --aws-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --processor cpu --instance-type $CPU_INSTANCE_TYPE --tag $GENERIC_TAG"
-      - execute-command-if-has-matching-changes "$test_cmd" "test/" "src/*.py" "setup.py" "setup.cfg" "buildspec.yml" "lib/*"
-      - test_cmd="pytest -n 10 test/integration/sagemaker --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --dockerfile-type dlc.cpu --aws-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --processor cpu --instance-type $CPU_INSTANCE_TYPE --tag $DLC_CPU_TAG"
-      - execute-command-if-has-matching-changes "$test_cmd" "test/" "src/*.py" "setup.py" "setup.cfg" "buildspec.yml" "lib/*"
+      # # run CPU sagemaker integration tests
+      # - test_cmd="pytest -n 10 test/integration/sagemaker --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --dockerfile-type dlc.cpu --aws-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --processor cpu --instance-type $CPU_INSTANCE_TYPE --tag $DLC_CPU_TAG"
+      # - execute-command-if-has-matching-changes "$test_cmd" "test/" "src/*.py" "setup.py" "setup.cfg" "buildspec.yml" "lib/*"
 
     finally:
-      # remove ECR image
-      - aws ecr batch-delete-image --repository-name $ECR_REPO --region $AWS_DEFAULT_REGION --image-ids imageTag=$GENERIC_TAG
-      - aws ecr batch-delete-image --repository-name $ECR_REPO --region $AWS_DEFAULT_REGION --image-ids imageTag=$DLC_CPU_TAG
+      - echo 'Done'
+      # # remove ECR image
+      # - aws ecr batch-delete-image --repository-name $ECR_REPO --region $AWS_DEFAULT_REGION --image-ids imageTag=$DLC_CPU_TAG
diff --git a/setup.py b/setup.py
index 8b031859..fe698668 100644
--- a/setup.py
+++ b/setup.py
@@ -48,12 +48,12 @@ def read(fname):
         "Natural Language :: English",
         "License :: OSI Approved :: Apache Software License",
         "Programming Language :: Python",
-        'Programming Language :: Python :: 2.7',
-        'Programming Language :: Python :: 3.6',
         'Programming Language :: Python :: 3.7',
+        'Programming Language :: Python :: 3.8',
+        'Programming Language :: Python :: 3.9',
     ],
 
-    install_requires=['retrying', 'sagemaker-training>=3.7.0', 'six>=1.12.0'],
+    install_requires=['retrying', 'sagemaker-training>=4.2.0', 'six>=1.12.0'],
     extras_require={
         'test': test_dependencies
     },
diff --git a/test/conftest.py b/test/conftest.py
index 0b0dc8b3..1adfc029 100644
--- a/test/conftest.py
+++ b/test/conftest.py
@@ -46,13 +46,13 @@ def pytest_addoption(parser):
     parser.addoption('--build-image', '-B', action='store_true')
     parser.addoption('--push-image', '-P', action='store_true')
     parser.addoption('--dockerfile-type', '-T', choices=['dlc.cpu', 'dlc.gpu', 'pytorch'],
-                     default=None)
+                     default='pytorch')
     parser.addoption('--dockerfile', '-D', default=None)
     parser.addoption('--aws-id', default=None)
     parser.addoption('--instance-type')
     parser.addoption('--docker-base-name', default='sagemaker-pytorch-training')
     parser.addoption('--region', default='us-west-2')
-    parser.addoption('--framework-version', default="1.4.0")
+    parser.addoption('--framework-version', default="1.10.0")
     parser.addoption('--py-version', choices=['2', '3'], default=str(sys.version_info.major))
     parser.addoption('--processor', choices=['gpu', 'cpu'], default='cpu')
     # If not specified, will default to {framework-version}-{processor}-py{py-version}
diff --git a/test/container/1.6.0/Dockerfile.dlc.cpu b/test/container/1.11.0/Dockerfile.dlc.cpu
similarity index 100%
rename from test/container/1.6.0/Dockerfile.dlc.cpu
rename to test/container/1.11.0/Dockerfile.dlc.cpu
diff --git a/test/container/1.6.0/Dockerfile.dlc.gpu b/test/container/1.11.0/Dockerfile.dlc.gpu
similarity index 66%
rename from test/container/1.6.0/Dockerfile.dlc.gpu
rename to test/container/1.11.0/Dockerfile.dlc.gpu
index 72cb7328..2dcafe7d 100644
--- a/test/container/1.6.0/Dockerfile.dlc.gpu
+++ b/test/container/1.11.0/Dockerfile.dlc.gpu
@@ -1,5 +1,5 @@
 ARG region
-from 763104351884.dkr.ecr.$region.amazonaws.com/pytorch-training:1.6.0-gpu-py36-cu110-ubuntu18.04
+FROM 763104351884.dkr.ecr.$region.amazonaws.com/pytorch-training:1.11.0-gpu-py38-cu113-ubuntu20.04-sagemaker
 
 COPY dist/sagemaker_pytorch_training-*.tar.gz /sagemaker_pytorch_training.tar.gz
 RUN pip install --upgrade --no-cache-dir /sagemaker_pytorch_training.tar.gz && \
diff --git a/test/container/1.6.0/Dockerfile.pytorch b/test/container/1.11.0/Dockerfile.pytorch
similarity index 92%
rename from test/container/1.6.0/Dockerfile.pytorch
rename to test/container/1.11.0/Dockerfile.pytorch
index 0ad7b8f0..b7b6c9d4 100644
--- a/test/container/1.6.0/Dockerfile.pytorch
+++ b/test/container/1.11.0/Dockerfile.pytorch
@@ -1,4 +1,4 @@
-from pytorch/pytorch:1.6.0-cuda10.1-cudnn7-runtime
+FROM pytorch/pytorch:1.11.0-cuda11.3-cudnn8-runtime
 
 RUN apt-get update && apt-get install -y --no-install-recommends \
     jq \
diff --git a/test/container/1.4.0/Dockerfile.dlc.cpu b/test/container/1.4.0/Dockerfile.dlc.cpu
deleted file mode 100644
index b86f14c6..00000000
--- a/test/container/1.4.0/Dockerfile.dlc.cpu
+++ /dev/null
@@ -1,10 +0,0 @@
-ARG region
-from 763104351884.dkr.ecr.$region.amazonaws.com/pytorch-training:1.4.0-cpu-py2
-
-COPY lib/changehostname.c /
-COPY lib/start_with_right_hostname.sh /usr/local/bin/start_with_right_hostname.sh
-RUN chmod +x /usr/local/bin/start_with_right_hostname.sh
-
-COPY dist/sagemaker_pytorch_training-*.tar.gz /sagemaker_pytorch_training.tar.gz
-RUN pip install --upgrade --no-cache-dir /sagemaker_pytorch_training.tar.gz && \
-    rm /sagemaker_pytorch_training.tar.gz
diff --git a/test/container/1.4.0/Dockerfile.dlc.gpu b/test/container/1.4.0/Dockerfile.dlc.gpu
deleted file mode 100644
index d391f92d..00000000
--- a/test/container/1.4.0/Dockerfile.dlc.gpu
+++ /dev/null
@@ -1,28 +0,0 @@
-ARG region
-from 763104351884.dkr.ecr.$region.amazonaws.com/pytorch-training:1.4.0-gpu-py3
-
-# TODO(@bvveeram): Remove once the 1.4.0-gpu-py3 DLC image installs mpi4py
-RUN pip3 install mpi4py==3.0.3
-
-# TODO(@bvveeram): Remove once the 1.4.0-gpu-py3 DLC image fixes OpenSSH config
-# Configure OpenSSH so that nodes can communicate with each other
-RUN mkdir -p /var/run/sshd && \
- sed 's@session\s*required\s*pam_loginuid.so@session optional pam_loginuid.so@g' -i /etc/pam.d/sshd
-
-RUN rm -rf /root/.ssh/ && \
- mkdir -p /root/.ssh/ && \
- ssh-keygen -q -t rsa -N '' -f /root/.ssh/id_rsa && \
- cp /root/.ssh/id_rsa.pub /root/.ssh/authorized_keys \
- && printf "Host *\n StrictHostKeyChecking no\n" >> /root/.ssh/config
-
-# TODO(@bvveeram): Remove once the 1.4.0-gpu-py3 DLC image fixes MPI config
-# Comment line in MPI config to prevent mutually exclusive MCA settings
-RUN sed -i '62,62 s/^/#/' /home/.openmpi/etc/openmpi-mca-params.conf
-
-COPY lib/changehostname.c /
-COPY lib/start_with_right_hostname.sh /usr/local/bin/start_with_right_hostname.sh
-RUN chmod +x /usr/local/bin/start_with_right_hostname.sh
-
-COPY dist/sagemaker_pytorch_training-*.tar.gz /sagemaker_pytorch_training.tar.gz
-RUN pip install --upgrade --no-cache-dir /sagemaker_pytorch_training.tar.gz && \
-    rm /sagemaker_pytorch_training.tar.gz
diff --git a/test/container/1.4.0/Dockerfile.pytorch b/test/container/1.4.0/Dockerfile.pytorch
deleted file mode 100644
index 9849c68b..00000000
--- a/test/container/1.4.0/Dockerfile.pytorch
+++ /dev/null
@@ -1,20 +0,0 @@
-from pytorch/pytorch:1.4-cuda10.1-cudnn7-runtime
-
-RUN apt-get update \
- && apt-get install -y --no-install-recommends jq \
- && rm -rf /var/lib/apt/lists/*
-
-COPY lib/changehostname.c /
-COPY lib/start_with_right_hostname.sh /usr/local/bin/start_with_right_hostname.sh
-RUN chmod +x /usr/local/bin/start_with_right_hostname.sh
-
-COPY dist/sagemaker_pytorch_training-*.tar.gz /sagemaker_pytorch_training.tar.gz
-RUN pip install --no-cache-dir /sagemaker_pytorch_training.tar.gz && \
-    rm /sagemaker_pytorch_training.tar.gz
-
-ENV SAGEMAKER_TRAINING_MODULE=sagemaker_pytorch_container.training:main
-
-WORKDIR /
-
-# Starts framework
-ENTRYPOINT ["bash", "-m", "start_with_right_hostname.sh"]
diff --git a/test/integration/sagemaker/test_horovod.py b/test/integration/sagemaker/test_horovod.py
index f0d3cf40..09276e44 100644
--- a/test/integration/sagemaker/test_horovod.py
+++ b/test/integration/sagemaker/test_horovod.py
@@ -27,7 +27,7 @@
 @pytest.mark.skip_generic
 @pytest.mark.parametrize(
     "instances, processes, train_instance_type",
-    [(1, 8, "ml.p2.8xlarge"), (2, 4, "ml.p3.8xlarge")],
+    [(2, 4, "ml.p3.8xlarge")],
 )
 def test_horovod_simple(
     instances,
diff --git a/tox.ini b/tox.ini
index a6d7983b..9b732dfe 100644
--- a/tox.ini
+++ b/tox.ini
@@ -4,7 +4,7 @@
 # and then run "tox" from this directory.
 
 [tox]
-envlist = flake8,twine,py27,py36,py37
+envlist = flake8,twine,py38
 skip_missing_interpreters = False
 
 [flake8]