From dbe494a2112de76839ec37b079e5507908cab1ba Mon Sep 17 00:00:00 2001 From: danellecline Date: Mon, 15 Jul 2024 14:50:50 -0700 Subject: [PATCH] ci: add pip package called sdcat with poetry build and other minor corrections in comments --- .github/workflows/pytest.yml | 12 +++-- .github/workflows/release.yml | 9 +++- DEVELOPMENT.md | 68 ++++++++++++++++++++++++++ README.md | 69 ++++++++------------------- docker/{Dockerfile => Dockerfile.bak} | 0 pyproject.toml | 58 +++++++++++++++++++++- requirements.txt | 1 - sdcat/__main__.py | 2 +- sdcat/cluster/commands.py | 2 +- 9 files changed, 162 insertions(+), 59 deletions(-) create mode 100644 DEVELOPMENT.md rename docker/{Dockerfile => Dockerfile.bak} (100%) diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index 9fd48aa..e8be933 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -20,14 +20,16 @@ jobs: with: python-version: '3.11' - - name: Install dependencies - run: pip install -r requirements.txt + - name: Install Poetry + run: | + curl -sSL https://install.python-poetry.org | python3 - + echo "${HOME}/.local/bin" >> $GITHUB_PATH - - name: Install pytest - run: pip install pytest + - name: Install dependencies + run: poetry install - name: Set PYTHONPATH to subdirectory sdcat run: echo "PYTHONPATH=." >> $GITHUB_ENV - name: Run pytest - run: pytest + run: poetry run pytest \ No newline at end of file diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index ccda88c..c5a30b0 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -18,7 +18,14 @@ jobs: uses: python-semantic-release/python-semantic-release@master with: github_token: ${{ secrets.GITHUB_TOKEN }} - + + - name: Publish PyPi + if: steps.semantic.outputs.released == 'true' + run: | + pip install poetry + poetry config pypi-token.pypi ${{ secrets.PYPI_API_TOKEN }} + poetry publish --build + - name: docker push version if: steps.semantic.outputs.released == 'true' run: | diff --git a/DEVELOPMENT.md b/DEVELOPMENT.md new file mode 100644 index 0000000..e3b2c94 --- /dev/null +++ b/DEVELOPMENT.md @@ -0,0 +1,68 @@ +# Development + +## Development Environment + +For development, an Anaconda environment is recommended. This will create a conda environment +called `sdcat`. + +```shell +conda env create +conda activate sdcat +``` + +Alternatively, you can use `poetry` to manage the environment. This will create a virtual environment +called `sdcat`. + +```shell +poetry install +poetry shell +``` + + +## Running the tests + +Run tests before checking code back in. To run tests, first install pytest: + +```shell +pip install pytest +pytest +``` + +The tests should run and pass. + +```shell +pytest +``` + +```shell +=========================================================================================================================================================================================================================== test session starts ============================================================================================================================================================================================================================ +platform darwin -- Python 3.10.13, pytest-7.4.4, pluggy-1.3.0 +rootdir: /Users/dcline/Dropbox/code/sdcat +plugins: napari-plugin-engine-0.2.0, anyio-3.7.1, napari-0.4.18, npe2-0.7.3 +collected 3 items + +tests/test_detect.py ... [100%] + +======================================================================================================================================================================================================================= 3 passed in 61.48s (0:01:01) ======================================================================================================================================================================================================================== +``` +In poetry, you can run the tests with: + +```shell +poetry run pytest +``` + +# Building python package + +To build the python package, run the following command: + +```shell +poetry build +``` + +This will create a `dist` directory with the package in it. + +Test the package by installing it in a new environment, e.g.: + +```shell +pip install dist/mbari-sdcat-0.1.0.tar.gz +``` \ No newline at end of file diff --git a/README.md b/README.md index 91e98c1..6b5ff09 100644 --- a/README.md +++ b/README.md @@ -36,42 +36,37 @@ The algorithm workflow looks like this: ![](https://raw.githubusercontent.com/mbari-org/sdcat/main/docs/imgs/cluster_workflow.png) # Installation + +Pip install the sdcat package including all [requirements](https://github.com/mbari-org/sdcat/blob/main/pyproject.toml) with: -This code can be run from a command-line or from a jupyter notebook. The following instructions are for running from the command-line. - -```shell -git clone https://github.com/mbari/sdcat.git -cd sdcat -conda env create -f environment.yml +```bash +pip install sdcat ``` -Or, from a jupyter notebook. - +Alternatively, [Docker](https://www.docker.com) can be used to run the code. A pre-built docker image is available at [Docker Hub](https://hub.docker.com/r/mbari/sdcat) with the latest version of the code. + +Detection +```shell +docker run -it -v $(pwd):/data mbari/sdcat detect --image-dir /data/images --save-dir /data/detections --model MBARI-org/uav-yolov5 ``` -conda activate sdcat -pip install ipykernel -python -m ipykernel install --user --name=sdcat -jupyter notebook -``` - -Or, from a docker container. - +Followed by clustering ```shell -docker run -it -v $(pwd):/data mbari/sdcat +docker run -it -v $(pwd):/data mbari/sdcat cluster detections --det-dir /data/detections/ --save-dir /data/detections --model MBARI-org/uav-yolov5 ``` A GPU is recommended for clustering and detection. If you don't have a GPU, you can still run the code, but it will be slower. If running on a CPU, multiple cores are recommended and will speed up processing. -For large datasets, the RapidsAI cuDF library is recommended for faster processing, although it does not currently support -custom metrics such as cosine similarity, so the clustering performance will not be as good as with the CPU. -See: https://rapids.ai/start.html#get-rapids for installation instructions. + +```shell +docker run -it --gpus all -v $(pwd):/data mbari/sdcat:latest-cuda12 detect --image-dir /data/images --save-dir /data/detections --model MBARI-org/uav-yolov5 +``` # Commands To get all options available, use the --help option. For example: ```shell -python sdcat --help +sdcat --help ``` which will print out the following: ```shell @@ -92,8 +87,9 @@ Commands: To get details on a particular command, use the --help option with the command. For example, with the **cluster** command: ```shell - python sdcat cluster --help + sdcat cluster --help ``` + which will print out the following: ```shell Usage: sdcat cluster [OPTIONS] @@ -145,7 +141,7 @@ and good for experiments and quick results. a smaller slice size will take longer to process. ```shell -python sdcat detect --image-dir --save-dir --model yolov5s --slice-size-width 900 --slice-size-height 900 +sdcat detect --image-dir --save-dir --model yolov5s --slice-size-width 900 --slice-size-height 900 ``` ## Cluster detections from the YOLOv5 model @@ -154,34 +150,9 @@ Cluster the detections from the YOLOv5 model. The detections are clustered usin features from a FaceBook Vision Transformer (ViT) model. ```shell -python sdcat cluster --det-dir --save-dir --model yolov5s +sdcat cluster --det-dir --save-dir --model yolov5s ``` -### Testing - -Please run tests before checking code back in. To run tests, first install pytest: - -```shell -pip install pytest -``` - -The tests should run and pass. - -```shell -pytest -``` - -```shell -=========================================================================================================================================================================================================================== test session starts ============================================================================================================================================================================================================================ -platform darwin -- Python 3.10.13, pytest-7.4.4, pluggy-1.3.0 -rootdir: /Users/dcline/Dropbox/code/sdcat -plugins: napari-plugin-engine-0.2.0, anyio-3.7.1, napari-0.4.18, npe2-0.7.3 -collected 3 items - -tests/test_detect.py ... [100%] - -======================================================================================================================================================================================================================= 3 passed in 61.48s (0:01:01) ======================================================================================================================================================================================================================== -``` # Related work * https://github.com/obss/sahi diff --git a/docker/Dockerfile b/docker/Dockerfile.bak similarity index 100% rename from docker/Dockerfile rename to docker/Dockerfile.bak diff --git a/pyproject.toml b/pyproject.toml index 6f08541..64d7743 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,4 +24,60 @@ exclude_commit_patterns = [ '''Initial Commit.*''', # Old semantic-release version commits '''^\d+\.\d+\.\d+''', -] \ No newline at end of file +] + +[tool.poetry] +name = "sdcat" +version = "1.7.0" +description = "Sliced Detection and Clustering Analysis Toolkit - Developed by MBARI" +authors = ["danellecline "] +license = "Apache" +readme = "README.md" +packages = [ + { include = "sdcat" } +] + +[tool.poetry.scripts] +sdcat = "sdcat.__main__:cli" + +[tool.poetry.dependencies] +python = ">=3.9,<3.12" +pillow = "^10.4.0" +bs4 = "^0.0.2" +opencv-contrib-python-headless = "^4.10.0.84" +sahi = "^0.11.18" +mmdet = "2.21.0" +mmengine = "0.7.2" +click = "^8.1.7" +hdbscan = ">=0.8.27" +torch = "2.3.1" +piexif = "^1.1.3" +yolov5 = "7.0.13" +torchvision = "0.18.1" +transformers = "^4.42.4" +timm = "^1.0.7" +pandas = ">=1.2.4" +ultralytics = "^8.2.57" +scikit-learn = "1.4.0rc1" +scikit-image = "^0.24.0" +umap-learn = "0.5.5" +matplotlib = "3.7.0" +numba = "0.57" +ephem = "^4.1.5" +tqdm = "^4.66.4" + +[tool.poetry.group.dev.dependencies] +build = "^1.2.1" +twine = "^5.1.1" + +[build-system] +requires = ["poetry-core"] +build-backend = "poetry.core.masonry.api" + +[tool.mypy] +show_error_codes = true +ignore_missing_imports = true + +[tool.pytest.ini_options] +testpaths = ["sdcat/tests",] +pythonpath = ["."] \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 9c7e7c2..19a30dc 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,7 +7,6 @@ mmengine==0.7.2 click hdbscan==0.8.33 torch==2.3.1 -tator piexif yolov5==7.0.13 torchvision==0.18.1 diff --git a/sdcat/__main__.py b/sdcat/__main__.py index 2fb47e0..02114c4 100644 --- a/sdcat/__main__.py +++ b/sdcat/__main__.py @@ -25,7 +25,7 @@ ) def cli(): """ - Process images from a command line. + Process images either to detect or cluster similar objects from a command line. """ pass diff --git a/sdcat/cluster/commands.py b/sdcat/cluster/commands.py index c8151aa..dce1e6b 100644 --- a/sdcat/cluster/commands.py +++ b/sdcat/cluster/commands.py @@ -1,4 +1,4 @@ -# sightwire, Apache-2.0 license +# sdcat, Apache-2.0 license # Filename: cluster/commands.py # Description: Clustering commands