From 43d3f7244d341529b3e350d998a4f696315ad143 Mon Sep 17 00:00:00 2001 From: Taylor Barnes Date: Fri, 17 Mar 2023 09:23:32 -0400 Subject: [PATCH] Add GPU support --- mdimechanic/cmd_build.py | 7 +++- mdimechanic/cmd_interactive.py | 2 + mdimechanic/cmd_report.py | 6 ++- mdimechanic/cmd_run.py | 6 ++- mdimechanic/cmd_rundriver.py | 6 ++- .../docker/mpi_nvidia/docker-compose.yml | 36 ++++++++++++++++++ mdimechanic/docker/mpi_nvidia/mdi_appfile | 2 + .../docker/run_nvidia/docker-compose.yml | 25 ++++++++++++ .../docker/tcp_nvidia/docker-compose.yml | 38 +++++++++++++++++++ 9 files changed, 124 insertions(+), 4 deletions(-) create mode 100644 mdimechanic/docker/mpi_nvidia/docker-compose.yml create mode 100644 mdimechanic/docker/mpi_nvidia/mdi_appfile create mode 100644 mdimechanic/docker/run_nvidia/docker-compose.yml create mode 100644 mdimechanic/docker/tcp_nvidia/docker-compose.yml diff --git a/mdimechanic/cmd_build.py b/mdimechanic/cmd_build.py index fccb032..e251ded 100644 --- a/mdimechanic/cmd_build.py +++ b/mdimechanic/cmd_build.py @@ -75,8 +75,13 @@ def install_all( base_path ): os.makedirs(os.path.dirname(build_entry_path), exist_ok=True) ut.write_as_bytes( build_entry_script, build_entry_path ) + # Check if there are any custom GPU options in mdimechanic.yml + gpu_options = "" + if 'gpu' in mdimechanic_yaml['docker']: + gpu_options = " --gpus all" + # Build the engine, within its Docker image - docker_string = "docker run --rm -v " + str(base_path) + ":/repo -v " + str(package_path) + ":/MDI_Mechanic " + mdimechanic_yaml['docker']['image_name'] + " bash /repo/docker/.temp/build_entry.sh" + docker_string = "docker run --rm" + str(gpu_options) + " -v " + str(base_path) + ":/repo -v " + str(package_path) + ":/MDI_Mechanic " + mdimechanic_yaml['docker']['image_name'] + " bash /repo/docker/.temp/build_entry.sh" ret = os.system(docker_string) if ret != 0: raise Exception("Unable to build the engine") diff --git a/mdimechanic/cmd_interactive.py b/mdimechanic/cmd_interactive.py index 317ba77..66150f0 100644 --- a/mdimechanic/cmd_interactive.py +++ b/mdimechanic/cmd_interactive.py @@ -57,5 +57,7 @@ def start( base_path ): run_line += " -v " + str( base_path ) + ":/repo" run_line += gitconfig_line run_line += ssh_line + if 'gpu' in mdimechanic_yaml['docker']: + run_line += " --gpus all" run_line += " -it " + str(image_name) + " bash /repo/docker/.temp/interactive_entry.sh" os.system(run_line) diff --git a/mdimechanic/cmd_report.py b/mdimechanic/cmd_report.py index 550e123..409d6ec 100644 --- a/mdimechanic/cmd_report.py +++ b/mdimechanic/cmd_report.py @@ -21,7 +21,11 @@ def generate_report( base_path ): # Ensure that there are no orphaned containers / networks running try: #docker_path = os.path.join( base_path, "MDI_Mechanic", "docker" ) - compose_path = ut.get_compose_path( "tcp" ) + compose_path = None + if 'gpu' in mdimechanic_yaml['docker']: + ut.get_compose_path( "nvidia_tcp" ) + else: + ut.get_compose_path( "tcp" ) down_proc = subprocess.Popen( ["docker-compose", "down"], stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=compose_path) diff --git a/mdimechanic/cmd_run.py b/mdimechanic/cmd_run.py index 3c3f78f..d030a2f 100755 --- a/mdimechanic/cmd_run.py +++ b/mdimechanic/cmd_run.py @@ -8,7 +8,11 @@ def run( script_name, base_path ): mdimechanic_yaml = get_mdimechanic_yaml( base_path ) # Get the path to the docker-compose file - docker_path = get_compose_path( "run" ) + docker_path = None + if 'gpu' in mdimechanic_yaml['docker']: + get_compose_path( "nvidia_run" ) + else: + get_compose_path( "run" ) # Write the run script for the engine #script_lines = mdimechanic_yaml['engine_tests']['script'] diff --git a/mdimechanic/cmd_rundriver.py b/mdimechanic/cmd_rundriver.py index fe38a3d..ad398ff 100644 --- a/mdimechanic/cmd_rundriver.py +++ b/mdimechanic/cmd_rundriver.py @@ -8,7 +8,11 @@ def test_driver( driver_name, base_path ): mdimechanic_yaml = get_mdimechanic_yaml( base_path ) # Get the path to the docker-compose file - docker_path = get_compose_path( "tcp" ) + docker_path = None + if 'gpu' in mdimechanic_yaml['docker']: + get_compose_path( "nvidia_tcp" ) + else: + get_compose_path( "tcp" ) # Write the run script for MDI Mechanic docker_file = os.path.join( base_path, ".mdimechanic", ".temp", "docker_mdi_mechanic.sh" ) diff --git a/mdimechanic/docker/mpi_nvidia/docker-compose.yml b/mdimechanic/docker/mpi_nvidia/docker-compose.yml new file mode 100644 index 0000000..cf31075 --- /dev/null +++ b/mdimechanic/docker/mpi_nvidia/docker-compose.yml @@ -0,0 +1,36 @@ +version: '3' + +services: + mdi_mechanic: + #build: . + image: "mdi_mechanic/mdi_mechanic" + volumes: + - "${MDIMECH_WORKDIR}:/repo" + - "${MDIMECH_PACKAGEDIR}:/MDI_Mechanic" + networks: + mdinet: + aliases: + - driverhost + depends_on: + - engine + engine: + #build: ../../user + image: "${MDIMECH_ENGINE_NAME}" + deploy: + resources: + reservations: + devices: + - driver: nvidia + count: all + capabilities: [gpu] + volumes: + - "${MDIMECH_WORKDIR}:/repo" + - "${MDIMECH_PACKAGEDIR}:/MDI_Mechanic" + networks: + mdinet: + aliases: + - enginehost + +networks: + mdinet: + driver: "bridge" \ No newline at end of file diff --git a/mdimechanic/docker/mpi_nvidia/mdi_appfile b/mdimechanic/docker/mpi_nvidia/mdi_appfile new file mode 100644 index 0000000..2061102 --- /dev/null +++ b/mdimechanic/docker/mpi_nvidia/mdi_appfile @@ -0,0 +1,2 @@ +-host driverhost -np 1 bash /repo/MDI_Mechanic/.temp/docker_mdi_mechanic.sh +-host enginehost -np 1 bash /repo/MDI_Mechanic/.temp/docker_mdi_engine.sh \ No newline at end of file diff --git a/mdimechanic/docker/run_nvidia/docker-compose.yml b/mdimechanic/docker/run_nvidia/docker-compose.yml new file mode 100644 index 0000000..661c68a --- /dev/null +++ b/mdimechanic/docker/run_nvidia/docker-compose.yml @@ -0,0 +1,25 @@ +version: '3' + +services: + engine: + #build: ../../user + image: "${MDIMECH_ENGINE_NAME}" + command: bash -c "bash /repo/.mdimechanic/.temp/docker_mdi_engine.sh" + deploy: + resources: + reservations: + devices: + - driver: nvidia + count: all + capabilities: [gpu] + volumes: + - "${MDIMECH_WORKDIR}:/repo" + - "${MDIMECH_PACKAGEDIR}:/MDI_Mechanic" + networks: + mdinet: + aliases: + - enginehost + +networks: + mdinet: + driver: "bridge" \ No newline at end of file diff --git a/mdimechanic/docker/tcp_nvidia/docker-compose.yml b/mdimechanic/docker/tcp_nvidia/docker-compose.yml new file mode 100644 index 0000000..02c3a4b --- /dev/null +++ b/mdimechanic/docker/tcp_nvidia/docker-compose.yml @@ -0,0 +1,38 @@ +version: '3' + +services: + mdi_mechanic: + #build: . + image: "mdi_mechanic/mdi_mechanic" + command: bash -c "bash /repo/.mdimechanic/.temp/docker_mdi_mechanic.sh" + volumes: + - "${MDIMECH_WORKDIR}:/repo" + - "${MDIMECH_PACKAGEDIR}:/MDI_Mechanic" + networks: + mdinet: + aliases: + - driverhost + engine: + #build: ../../user + image: "${MDIMECH_ENGINE_NAME}" + command: bash -c "bash /repo/.mdimechanic/.temp/docker_mdi_engine.sh" + deploy: + resources: + reservations: + devices: + - driver: nvidia + count: all + capabilities: [gpu] + volumes: + - "${MDIMECH_WORKDIR}:/repo" + - "${MDIMECH_PACKAGEDIR}:/MDI_Mechanic" + networks: + mdinet: + aliases: + - enginehost + depends_on: + - mdi_mechanic + +networks: + mdinet: + driver: "bridge" \ No newline at end of file