diff --git a/.github/workflows/code-quality.yaml b/.github/workflows/code-quality.yaml index 5a2c86221f..f78bdfe60b 100644 --- a/.github/workflows/code-quality.yaml +++ b/.github/workflows/code-quality.yaml @@ -34,7 +34,7 @@ jobs: uses: actions/checkout@v3 with: repository: mosaicml/ci-testing - ref: v0.0.9 + ref: v0.2.2 path: ./ci-testing - uses: ./ci-testing/.github/actions/code-quality with: diff --git a/.github/workflows/coverage.yaml b/.github/workflows/coverage.yaml index fc511d7e60..8e8222d4c6 100644 --- a/.github/workflows/coverage.yaml +++ b/.github/workflows/coverage.yaml @@ -16,7 +16,7 @@ jobs: uses: actions/checkout@v3 with: repository: mosaicml/ci-testing - ref: v0.0.9 + ref: v0.2.2 path: ./ci-testing - uses: ./ci-testing/.github/actions/coverage with: diff --git a/.github/workflows/daily.yaml b/.github/workflows/daily.yaml index 2d5dc12f12..401c3a7e22 100644 --- a/.github/workflows/daily.yaml +++ b/.github/workflows/daily.yaml @@ -72,7 +72,7 @@ jobs: composer_package_name: mosaicml steps: - name: Run PR CPU Tests - uses: mosaicml/ci-testing/.github/actions/pytest-cpu@v0.1.2 + uses: mosaicml/ci-testing/.github/actions/pytest-cpu@v0.2.2 with: name: ${{ matrix.name }} pip_deps: "[all]" diff --git a/.github/workflows/pr-cpu.yaml b/.github/workflows/pr-cpu.yaml index fe41fe1fca..41572cf190 100644 --- a/.github/workflows/pr-cpu.yaml +++ b/.github/workflows/pr-cpu.yaml @@ -34,7 +34,7 @@ jobs: pytest_command: coverage run -m pytest tests/test_docs.py steps: - name: Run PR CPU Tests - uses: mosaicml/ci-testing/.github/actions/pytest-cpu@v0.1.2 + uses: mosaicml/ci-testing/.github/actions/pytest-cpu@v0.2.2 with: name: ${{ matrix.name }} pip_deps: "[all]" diff --git a/.github/workflows/pr-gpu.yaml b/.github/workflows/pr-gpu.yaml index e85bab43d2..e7c55cbe95 100644 --- a/.github/workflows/pr-gpu.yaml +++ b/.github/workflows/pr-gpu.yaml @@ -24,7 +24,7 @@ jobs: - name: Checkout code uses: actions/checkout@v3 - name: Run PR GPU Tests - uses: mosaicml/ci-testing/.github/actions/pytest-gpu@v0.1.2 + uses: mosaicml/ci-testing/.github/actions/pytest-gpu@v0.2.2 with: name: ${{ matrix.name }} composer_package_name: ${{ matrix.composer_package_name }} @@ -34,10 +34,10 @@ jobs: pip_deps: "[all]" pytest_command: ${{ matrix.pytest_command }} pytest_markers: ${{ matrix.markers }} - python_version: 3.9 + python_version: 3.11 gpu_num: 1 mcloud_api_key: ${{ secrets.MCLOUD_API_KEY }} - ci_repo_gpu_test_ref: v0.1.2 + ci_repo_gpu_test_ref: v0.2.2 pytest-gpu-2: name: ${{ matrix.name }} runs-on: ubuntu-latest @@ -54,7 +54,7 @@ jobs: - name: Checkout code uses: actions/checkout@v3 - name: Run PR GPU Tests - uses: mosaicml/ci-testing/.github/actions/pytest-gpu@v0.1.2 + uses: mosaicml/ci-testing/.github/actions/pytest-gpu@v0.2.2 with: name: ${{ matrix.name }} composer_package_name: ${{ matrix.composer_package_name }} @@ -64,10 +64,10 @@ jobs: pip_deps: "[all]" pytest_command: ${{ matrix.pytest_command }} pytest_markers: ${{ matrix.markers }} - python_version: 3.9 + python_version: 3.11 gpu_num: 2 mcloud_api_key: ${{ secrets.MCLOUD_API_KEY }} - ci_repo_gpu_test_ref: v0.1.2 + ci_repo_gpu_test_ref: v0.2.2 pytest-gpu-4: name: ${{ matrix.name }} runs-on: ubuntu-latest @@ -84,7 +84,7 @@ jobs: - name: Checkout code uses: actions/checkout@v3 - name: Run PR GPU Tests - uses: mosaicml/ci-testing/.github/actions/pytest-gpu@v0.1.2 + uses: mosaicml/ci-testing/.github/actions/pytest-gpu@v0.2.2 with: name: ${{ matrix.name }} composer_package_name: ${{ matrix.composer_package_name }} @@ -94,7 +94,7 @@ jobs: pip_deps: "[all]" pytest_command: ${{ matrix.pytest_command }} pytest_markers: ${{ matrix.markers }} - python_version: 3.9 + python_version: 3.11 gpu_num: 4 mcloud_api_key: ${{ secrets.MCLOUD_API_KEY }} - ci_repo_gpu_test_ref: v0.1.2 + ci_repo_gpu_test_ref: v0.2.2 diff --git a/.github/workflows/smoketest.yaml b/.github/workflows/smoketest.yaml index a0a6c445d2..f4d714bb3e 100644 --- a/.github/workflows/smoketest.yaml +++ b/.github/workflows/smoketest.yaml @@ -33,7 +33,7 @@ jobs: uses: actions/checkout@v3 with: repository: mosaicml/ci-testing - ref: v0.0.9 + ref: v0.2.2 path: ./ci-testing - uses: ./ci-testing/.github/actions/smoketest with: diff --git a/composer/loggers/mlflow_logger.py b/composer/loggers/mlflow_logger.py index 7a8acdd511..660c315c8e 100644 --- a/composer/loggers/mlflow_logger.py +++ b/composer/loggers/mlflow_logger.py @@ -174,9 +174,9 @@ def __init__( if log_system_metrics: # Set system metrics sampling interval and samples before logging so that system metrics - # are collected every 5s, and aggregated over 3 samples before being logged - # (logging per 15s). - mlflow.set_system_metrics_samples_before_logging(3) + # are collected every 5s, and aggregated over 6 samples before being logged + # (logging per 30s). + mlflow.set_system_metrics_samples_before_logging(6) mlflow.set_system_metrics_sampling_interval(5) self._rank_zero_only = rank_zero_only @@ -545,7 +545,11 @@ def register_model_with_run_id( """ if self._enabled: from mlflow.exceptions import MlflowException - from mlflow.protos.databricks_pb2 import ALREADY_EXISTS, RESOURCE_ALREADY_EXISTS, ErrorCode + from mlflow.protos.databricks_pb2 import ( + ALREADY_EXISTS, + RESOURCE_ALREADY_EXISTS, + ErrorCode, + ) full_name = f'{self.model_registry_prefix}.{name}' if len(self.model_registry_prefix) > 0 else name @@ -601,7 +605,7 @@ def log_images( assert isinstance(self._run_id, str) self._mlflow_client.log_image( image=image, - key=f'{name}_{step}_{im_ind}', + key=f'{name}_{im_ind}', run_id=self._run_id, step=step, ) diff --git a/setup.py b/setup.py index 3ce3fa5756..89f1abc894 100644 --- a/setup.py +++ b/setup.py @@ -154,6 +154,7 @@ def package_files(prefix: str, directory: str, extension: str): } extra_deps['deepspeed'] = [ + 'numpy<2', 'deepspeed==0.8.3', 'pydantic>=1.0,<2', ] @@ -224,13 +225,13 @@ def package_files(prefix: str, directory: str, extension: str): extra_deps['mlflow'] = [ 'mlflow>=2.14.1,<3.0', - 'databricks-sdk==0.31.1', + 'databricks-sdk==0.32.0', 'pynvml>=11.5.0,<12', ] extra_deps['pandas'] = ['pandas>=2.0.0,<3.0'] -extra_deps['databricks'] = ['databricks-sdk==0.31.1'] +extra_deps['databricks'] = ['databricks-sdk==0.32.0'] extra_deps['all'] = {dep for deps in extra_deps.values() for dep in deps}