diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md new file mode 100644 index 00000000..421934d9 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -0,0 +1,28 @@ +--- +name: Bug report +about: Create a bug report a module in MLOps-Modules +title: "[BUG]" +labels: bug +assignees: '' + +--- + +**Describe the bug** +A clear and concise description of what the bug is. Be ABSOLUTELY sure to give the path of the module (ex `modules/sagemaker/sagemaker-studio`). +Any bug that does not explicitly refer to a pertinent module will be closed without inspection. + +**To Reproduce** +Steps to reproduce the behavior: +1. Go to '...' +2. Click on '....' +3. Scroll down to '....' +4. See error + +**Expected behavior** +A clear and concise description of what you expected to happen. + +**Screenshots** +If applicable, add screenshots to help explain your problem. + +**Additional context** +Add any other context about the problem here. diff --git a/.github/ISSUE_TEMPLATE/config.yaml b/.github/ISSUE_TEMPLATE/config.yaml new file mode 100644 index 00000000..ec4bb386 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/config.yaml @@ -0,0 +1 @@ +blank_issues_enabled: false \ No newline at end of file diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md new file mode 100644 index 00000000..e45ce7ff --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature_request.md @@ -0,0 +1,21 @@ +--- +name: Feature request +about: Suggest a new feature for MLOps-Module +title: "[FEATURE]" +labels: enhancement +assignees: '' + +--- + +**Is your feature request related to a problem? Please describe.** +A clear and concise description of what the problem is. If the feature is related to an existing module, +be ABSOLUTELY sure to give the path of the module (ex `modules/sagemaker/sagemaker-studio`). + +**Describe the solution you'd like** +A clear and concise description of what you want to happen. + +**Describe alternatives you've considered** +A clear and concise description of any alternative solutions or features you've considered. + +**Additional context** +Add any other context or screenshots about the feature request here. diff --git a/.github/workflows/module-checks.yml b/.github/workflows/module-checks.yml new file mode 100644 index 00000000..0867111c --- /dev/null +++ b/.github/workflows/module-checks.yml @@ -0,0 +1,67 @@ +name: Module Checks + +on: + push: + branches: ["main"] + paths: ['modules/**'] + + pull_request: + branches: ["main", "release/*", "stable"] + paths: ['modules/**'] + + workflow_dispatch: + +jobs: + get-modules: + name: Get Modules + runs-on: ubuntu-latest + outputs: + matrix: ${{ steps.get-modules.outputs.matrix }} + steps: + - name: Checkout + uses: actions/checkout@v3 + with: + fetch-depth: 0 + - name: Get modules + id: get-modules + run: | + set -x + # Get all the modules that have the directory "tests" + MODULES=$(find modules -type d -name "tests" | cut -d/ -f 2-3 | uniq) + # Create our json structure [{"module_name": "..."}] + MODULES_JSON=$(echo "$MODULES" | jq -R -s 'split("\n")' | jq '[ .[] | select(length > 0) ]' | jq 'map({"module_name": .})') + # Export the modules as json to the outputs + echo 'matrix<> $GITHUB_OUTPUT + echo $MODULES_JSON >> $GITHUB_OUTPUT + echo 'EOF' >> $GITHUB_OUTPUT + + test: + name: Run unit tests for module ${{ matrix.modules.module_name }} + needs: get-modules + strategy: + fail-fast: false + matrix: + modules: ${{ fromJson(needs.get-modules.outputs.matrix) }} + python-version: [3.9] + runs-on: ubuntu-latest + env: + MODULE_PATH: 'modules/${{ matrix.modules.module_name }}' + steps: + - name: Checkout + uses: actions/checkout@v3 + with: + fetch-depth: 0 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + - name: Install Requirements + run: | + set -x + python -m pip install --upgrade pip + pip install -r requirements-dev.txt + pip install -r $MODULE_PATH/requirements.txt + - name: Static checks and linting (mypy, flake8, black, isort) + run: scripts/validate.sh --language python --path $MODULE_PATH/ + - name: Pytest + run: cd $MODULE_PATH/ && pytest \ No newline at end of file diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000..46950b75 --- /dev/null +++ b/.gitignore @@ -0,0 +1,389 @@ +# Created by .ignore support plugin (hsz.mobi) +### Linux template +*~ + +# temporary files which can be created if a process still has a handle open of a deleted file +.fuse_hidden* + +# KDE directory preferences +.directory + +# Linux trash folder which might appear on any partition or disk +.Trash-* + +# .nfs files are created when an open file is removed but is still being accessed +.nfs* + +### macOS template +# General +.DS_Store +.AppleDouble +.LSOverride + +# Icon must end with two \r +Icon + +# Thumbnails +._* + +# Files that might appear in the root of a volume +.DocumentRevisions-V100 +.fseventsd +.Spotlight-V100 +.TemporaryItems +.Trashes +.VolumeIcon.icns +.com.apple.timemachine.donotpresent + +# Directories potentially created on remote AFP share +.AppleDB +.AppleDesktop +Network Trash Folder +Temporary Items +.apdisk + +### Node template +# Logs +logs +*.log +npm-debug.log* +yarn-debug.log* +yarn-error.log* +lerna-debug.log* + +# Diagnostic reports (https://nodejs.org/api/report.html) +report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json + +# Runtime data +pids +*.pid +*.seed +*.pid.lock + +# Directory for instrumented libs generated by jscoverage/JSCover +lib-cov + +# Coverage directory used by tools like istanbul +coverage +*.lcov + +# nyc test coverage +.nyc_output + +# Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files) +.grunt + +# Bower dependency directory (https://bower.io/) +bower_components + +# node-waf configuration +.lock-wscript + +# Compiled binary addons (https://nodejs.org/api/addons.html) +build/Release + +# Dependency directories +node_modules/ +jspm_packages/ + +# Snowpack dependency directory (https://snowpack.dev/) +web_modules/ + +# TypeScript cache +*.tsbuildinfo + +# Optional npm cache directory +.npm + +# Optional eslint cache +.eslintcache + +# Microbundle cache +.rpt2_cache/ +.rts2_cache_cjs/ +.rts2_cache_es/ +.rts2_cache_umd/ + +# Optional REPL history +.node_repl_history + +# Output of 'npm pack' +*.tgz + +# Yarn Integrity file +.yarn-integrity + +# dotenv environment variables file +.env +.env.test + +# parcel-bundler cache (https://parceljs.org/) +.cache +.parcel-cache + +# Next.js build output +.next +out + +# Nuxt.js build / generate output +.nuxt +dist + +# Gatsby files +.cache/ +# Comment in the public line in if your project uses Gatsby and not Next.js +# https://nextjs.org/blog/next-9-1#public-directory-support +# public + +# vuepress build output +.vuepress/dist + +# Serverless directories +.serverless/ + +# FuseBox cache +.fusebox/ + +# DynamoDB Local files +.dynamodb/ + +# TernJS port file +.tern-port + +# Stores VSCode versions used for testing VSCode extensions +.vscode-test +.vscode + +# yarn v2 +.yarn/cache +.yarn/unplugged +.yarn/build-state.yml +.yarn/install-state.gz +.pnp.* + +### Python template +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.venv +# env/ +venv/ +# ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +### JetBrains template +# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider +# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 + +# User-specific stuff +.idea/**/workspace.xml +.idea/**/tasks.xml +.idea/**/usage.statistics.xml +.idea/**/dictionaries +.idea/**/shelf + +# Generated files +.idea/**/contentModel.xml + +# Sensitive or high-churn files +.idea/**/dataSources/ +.idea/**/dataSources.ids +.idea/**/dataSources.local.xml +.idea/**/sqlDataSources.xml +.idea/**/dynamic.xml +.idea/**/uiDesigner.xml +.idea/**/dbnavigator.xml + +# Gradle +.idea/**/gradle.xml +.idea/**/libraries + +# Gradle and Maven with auto-import +# When using Gradle or Maven with auto-import, you should exclude module files, +# since they will be recreated, and may cause churn. Uncomment if using +# auto-import. +# .idea/artifacts +# .idea/compiler.xml +# .idea/jarRepositories.xml +# .idea/modules.xml +# .idea/*.iml +# .idea/modules +# *.iml +# *.ipr + +# CMake +cmake-build-*/ + +# Mongo Explorer plugin +.idea/**/mongoSettings.xml + +# File-based project format +*.iws + +# IntelliJ +out/ + +# mpeltonen/sbt-idea plugin +.idea_modules/ + +# JIRA plugin +atlassian-ide-plugin.xml + +# Cursive Clojure plugin +.idea/replstate.xml + +# Crashlytics plugin (for Android Studio and IntelliJ) +com_crashlytics_export_strings.xml +crashlytics.properties +crashlytics-build.properties +fabric.properties + +# Editor-based Rest Client +.idea/httpRequests + +# Android studio 3.1+ serialized cache file +.idea/caches/build_file_checksums.ser + +# IDE +.idea + +# cdk +cdk.out +cdk.context.json +cdk.json + +# codeseeder +codeseeder.out + +# env files +.env +seedfarmer.gitmodules/ +archive/ + +# Integration Testing Files +**/*snapshot/* diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index c4b6a1c5..63a1e04d 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -29,12 +29,36 @@ Contributions via pull requests are much appreciated. Before sending us a pull r To send us a pull request, please: -1. Fork the repository. -2. Modify the source; please focus on the specific change you are contributing. If you also reformat all the code, it will be hard for us to focus on your change. -3. Ensure local tests pass. -4. Commit to your fork using clear commit messages. -5. Send us a pull request, answering any default questions in the pull request interface. -6. Pay attention to any automated CI failures reported in the pull request, and stay involved in the conversation. +1. Fork the repository and follow the [deployment guide](docs/deployment_guide.md) for deploying MLOps modules. + +```sh +git clone https://github.com/{your-account}/mlops-modules.git +``` + +2. Then, prepare your local environment before you move forward with the development. + +```sh +cd mlops-modules +git checkout -b <> +python3 -m venv .venv && source .venv/bin/activate +pip install -r requirements.txt -r requirements-dev.txt +``` + +3. Modify the source; please focus on the specific change you are contributing. If you also reformat all the code, it will be hard for us to focus on your change. + +4. Ensure deployment/testing to personal environments pass. + +- create a copy of your config file under `config` directory and name it as `dev-.env` +- Replace the necessary values of the env vars inside your `dev-.env` file +- Run the below command to deploy the infrastructure + +```sh +seedfarmer apply manifests/local/deployment.yaml --env-file config/dev-.env --debug +``` + +5. Commit to your fork using clear commit messages. +6. Send us a pull request, answering any default questions in the pull request interface. +7. Pay attention to any automated CI failures reported in the pull request, and stay involved in the conversation. GitHub provides additional document on [forking a repository](https://help.github.com/articles/fork-a-repo/) and [creating a pull request](https://help.github.com/articles/creating-a-pull-request/). diff --git a/README.md b/README.md index 847260ca..4e465607 100644 --- a/README.md +++ b/README.md @@ -1,17 +1,15 @@ -## My Project +# MLOps Modules -TODO: Fill this README out! +MLOps modules is a collection of resuable Infrastructure as Code (IAC) modules that works with [SeedFarmer CLI](https://github.com/awslabs/seed-farmer). Please see the [DOCS](https://seed-farmer.readthedocs.io/en/latest/) for all things seed-farmer. -Be sure to: +The modules in this repository are decoupled from each other and can be aggregated together using GitOps (manifest file) principles provided by `seedfarmer` and achieve the desired use cases. It removes the undifferentiated heavy lifting for an end user by providing hardended modules and enables them to focus on building business on top of them. -* Change the title in this README -* Edit your repository description on GitHub +## General Information -## Security +The modules in this repository are / must be generic for resuse without affiliation to any one particular project or use case or any vertical. -See [CONTRIBUTING](CONTRIBUTING.md#security-issue-notifications) for more information. - -## License - -This project is licensed under the Apache-2.0 License. +All modules in this repository adhere to the module strutucture defined in the the [SeedFarmer Guide](https://seed-farmer.readthedocs.io/en/latest) +- [Project Structure](https://seed-farmer.readthedocs.io/en/latest/project_development.html) +- [Module Development](https://seed-farmer.readthedocs.io/en/latest/module_development.html) +- [Module Manifest Guide](https://seed-farmer.readthedocs.io/en/latest/manifests.html) diff --git a/requirements-dev.txt b/requirements-dev.txt new file mode 100644 index 00000000..479ce252 --- /dev/null +++ b/requirements-dev.txt @@ -0,0 +1,20 @@ +black~=22.3.0 +isort~=5.10.1 +flake8~=4.0.1 +pylint~=2.14.1 +mypy~=0.961 +pip-tools~=6.6.2 +python-dotenv~=0.21.0 +cfn-lint~=0.61.0 +check-manifest~=0.48 +pydot~=1.4.2 +pyroma~=4.0 +pytest~=7.1.2 +pytest-cov~=4.0.0 +pytest-ordering~=0.6 +twine~=4.0.1 +types-PyYAML~=6.0.8 +types-setuptools~=57.4.17 +wheel~=0.37.1 +cdk-nag==2.12.29 +click==8.0.2 diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 00000000..3547448e --- /dev/null +++ b/requirements.txt @@ -0,0 +1 @@ +seed-farmer~=2.10.0 diff --git a/scripts/fix.sh b/scripts/fix.sh new file mode 100755 index 00000000..38c41fb7 --- /dev/null +++ b/scripts/fix.sh @@ -0,0 +1,58 @@ +#!/usr/bin/env bash +# +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). +# You may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +set -e + +DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" +LANGUAGE="unknown" + +while [ $# -gt 0 ] +do + case $1 in + --language) + LANGUAGE=${2} + shift # Remove --language from processing + shift # Remove $2 from processing + ;; + --path) + FIX_PATH="${DIR}/../${2}" + shift # Remove --path from processing + shift # Remove $2 from processing + ;; + -*|--*) + echo "Unknown option $1" + exit 1 + ;; + esac +done + +cd ${FIX_PATH} +FIX_PATH=`pwd` + +echo "Fixing: ${FIX_PATH}, Language: ${LANGUAGE}" + +if [[ $LANGUAGE == "python" ]]; then + echo "Running isort, black" + isort . + black . +elif [[ $LANGUAGE == "typescript" ]]; then + echo "Running prettier" + npx prettier --write . +else + echo "Language: ${LANGUAGE}" + exit 1 +fi diff --git a/scripts/setup-secrets-dockerhub.sh b/scripts/setup-secrets-dockerhub.sh new file mode 100755 index 00000000..4f09b50d --- /dev/null +++ b/scripts/setup-secrets-dockerhub.sh @@ -0,0 +1,30 @@ +#!/bin/bash + +set -euo pipefail +set +x + +SECRET_NAME="aws-mlops-docker-credentials" + +read -p "DockerHub Username: " DOCKERHUB_USER +read -sp "DockerHub Password: " DOCKERHUB_PASS + +echo -e "\nCreating/Updating Secret" + +SECRET_VALUE="{\"docker.io\": { \"username\": \"$DOCKERHUB_USER\", \"password\": \"$DOCKERHUB_PASS\" }}" + +if `aws secretsmanager describe-secret --secret-id $SECRET_NAME > /dev/null 2>&1` ; then + echo "Secret ($SECRET_NAME) exists. Updating" + aws secretsmanager put-secret-value \ + --secret-id $SECRET_NAME \ + --secret-string "$SECRET_VALUE" + + echo "$SECRET_NAME updated" +else + echo "Secret ($SECRET_NAME) doesn't exist. Creating" + aws secretsmanager create-secret \ + --name $SECRET_NAME \ + --description "Credentials for DockerHub" \ + --secret-string "$SECRET_VALUE" + + echo "$SECRET_NAME created" +fi diff --git a/scripts/validate.sh b/scripts/validate.sh new file mode 100755 index 00000000..157245ac --- /dev/null +++ b/scripts/validate.sh @@ -0,0 +1,82 @@ +#!/usr/bin/env bash +# +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). +# You may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +set -e + +DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" +LANGUAGE="python" +SKIP_STATIC_CHECKS="false" + +while [ $# -gt 0 ] +do + case $1 in + --language) + LANGUAGE=${2} + shift # Remove --language from processing + shift # Remove $2 from processing + ;; + --skip-static-checks) + SKIP_STATIC_CHECKS="true" + shift # Remove --python from processing + ;; + --path) + VALIDATE_PATH="${DIR}/../${2}" + shift # Remove --path from processing + shift # Remove $2 from processing + ;; + -*|--*) + echo "Unknown option $1" + exit 1 + ;; + esac +done + +cd ${VALIDATE_PATH} +VALIDATE_PATH=`pwd` + +echo "Validating: ${VALIDATE_PATH}, Language: ${LANGUAGE}" + +echo "Validating Formatting" +if [[ $LANGUAGE == "python" ]]; then + echo "Checking isort, black" + isort --check . + black --check . +elif [[ $LANGUAGE == "typescript" ]]; then + echo "Checking prettier" + npx prettier -c . +else + echo "ERROR Language: ${LANGUAGE}" + exit 1 +fi + +if [[ $SKIP_STATIC_CHECKS == "false" ]]; then + echo "Validating Static Checks" + if [[ $LANGUAGE == "python" ]]; then + echo "Checking flake8, mypy" + flake8 . + mypy --ignore-missing-imports . + else + echo "ERROR Language: ${LANGUAGE}" + exit 1 + fi +fi + +if [[ -f ${VALIDATE_PATH}/modulestack.yaml ]]; then + echo "Checking cfn-lint on modulestack.yaml" + cfn-lint -i E1029,E3031 --template ${VALIDATE_PATH}/modulestack.yaml +fi + diff --git a/seedfarmer.yaml b/seedfarmer.yaml new file mode 100644 index 00000000..24ede2de --- /dev/null +++ b/seedfarmer.yaml @@ -0,0 +1,3 @@ +project: mlops-modules +description: This is for local testing - intended for contributions +#projectPolicyPath: