diff --git a/.devcontainer/README.MD b/.devcontainer/README.MD new file mode 100644 index 00000000000..d9fe31f38f3 --- /dev/null +++ b/.devcontainer/README.MD @@ -0,0 +1 @@ +The files in this directory configure a development container for GitHub Codespaces. \ No newline at end of file diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json new file mode 100644 index 00000000000..8e0a3108c13 --- /dev/null +++ b/.devcontainer/devcontainer.json @@ -0,0 +1,15 @@ +{ + "name": "OpenDevin Codespaces", + "image": "mcr.microsoft.com/devcontainers/universal", + "customizations":{ + "vscode":{ + "extensions": [ + "ms-python.python" + ] + } + }, + "onCreateCommand": "sh ./.devcontainer/on_create.sh", + "postCreateCommand": "make build", + "postStartCommand": "USE_HOST_NETWORK=True nohup bash -c 'make run &'" + +} diff --git a/.devcontainer/on_create.sh b/.devcontainer/on_create.sh new file mode 100644 index 00000000000..3293c5699af --- /dev/null +++ b/.devcontainer/on_create.sh @@ -0,0 +1,8 @@ +#!/usr/bin/env bash +sudo apt update +sudo apt install -y netcat +sudo add-apt-repository -y ppa:deadsnakes/ppa +sudo apt install -y python3.11 +curl -sSL https://install.python-poetry.org | python3.11 - +# chromadb requires SQLite > 3.35 but SQLite in Python3.11.9 comes with 3.31.1 +sudo cp /opt/conda/lib/libsqlite3.so.0 /lib/x86_64-linux-gnu/libsqlite3.so.0 diff --git a/.github/ISSUE_TEMPLATE/bug_template.yml b/.github/ISSUE_TEMPLATE/bug_template.yml index e0286ea3fcd..e7a3b4f5796 100644 --- a/.github/ISSUE_TEMPLATE/bug_template.yml +++ b/.github/ISSUE_TEMPLATE/bug_template.yml @@ -12,7 +12,7 @@ body: label: Is there an existing issue for the same bug? description: Please check if an issue already exists for the bug you encountered. options: - - label: I have checked the troubleshooting document at https://opendevin.github.io/OpenDevin/modules/usage/troubleshooting + - label: I have checked the troubleshooting document at https://docs.all-hands.dev/modules/usage/troubleshooting required: true - label: I have checked the existing issues. required: true diff --git a/.github/dependabot.yml b/.github/dependabot.yml index 2669165c607..09ef6a92abf 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -5,18 +5,34 @@ version: 2 updates: - - package-ecosystem: "pip" # See documentation for possible values - directory: "/" # Location of package manifests + - package-ecosystem: "pip" + directory: "/" schedule: interval: "daily" open-pull-requests-limit: 20 - - package-ecosystem: "npm" # See documentation for possible values - directory: "/frontend" # Location of package manifests + + - package-ecosystem: "npm" + directory: "/frontend" schedule: interval: "daily" open-pull-requests-limit: 20 - - package-ecosystem: "npm" # See documentation for possible values - directory: "/docs" # Location of package manifests + groups: + docusaurus: + patterns: + - "*docusaurus*" + eslint: + patterns: + - "*eslint*" + + - package-ecosystem: "npm" + directory: "/docs" schedule: interval: "daily" open-pull-requests-limit: 20 + groups: + docusaurus: + patterns: + - "*docusaurus*" + eslint: + patterns: + - "*eslint*" diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md index c96fe4c1f85..bc649f42669 100644 --- a/.github/pull_request_template.md +++ b/.github/pull_request_template.md @@ -1,5 +1,11 @@ **What is the problem that this fixes or functionality that this introduces? Does it fix any open issues?** -**Give a brief summary of what the PR does, explaining any non-trivial design decisions** + +--- +**Give a summary of what the PR does, explaining any non-trivial design decisions** + + + +--- **Other references** diff --git a/.github/workflows/clean-up.yml b/.github/workflows/clean-up.yml new file mode 100644 index 00000000000..a00dad7282d --- /dev/null +++ b/.github/workflows/clean-up.yml @@ -0,0 +1,68 @@ +# Workflow that cleans up outdated and old workflows to prevent out of disk issues +name: Delete old workflow runs + +on: + workflow_dispatch: + inputs: + days: + description: 'Days-worth of runs to keep for each workflow' + required: true + default: '30' + minimum_runs: + description: 'Minimum runs to keep for each workflow' + required: true + default: '10' + delete_workflow_pattern: + description: 'Name or filename of the workflow (if not set, all workflows are targeted)' + required: false + delete_workflow_by_state_pattern: + description: 'Filter workflows by state: active, deleted, disabled_fork, disabled_inactivity, disabled_manually' + required: true + default: "ALL" + type: choice + options: + - "ALL" + - active + - deleted + - disabled_inactivity + - disabled_manually + delete_run_by_conclusion_pattern: + description: 'Remove runs based on conclusion: action_required, cancelled, failure, skipped, success' + required: true + default: 'ALL' + type: choice + options: + - 'ALL' + - 'Unsuccessful: action_required,cancelled,failure,skipped' + - action_required + - cancelled + - failure + - skipped + - success + dry_run: + description: 'Logs simulated changes, no deletions are performed' + required: false + +jobs: + del_runs: + runs-on: ubuntu-latest + permissions: + actions: write + contents: read + steps: + - name: Delete workflow runs + uses: Mattraks/delete-workflow-runs@v2 + with: + token: ${{ github.token }} + repository: ${{ github.repository }} + retain_days: ${{ github.event.inputs.days }} + keep_minimum_runs: ${{ github.event.inputs.minimum_runs }} + delete_workflow_pattern: ${{ github.event.inputs.delete_workflow_pattern }} + delete_workflow_by_state_pattern: ${{ github.event.inputs.delete_workflow_by_state_pattern }} + delete_run_by_conclusion_pattern: >- + ${{ + startsWith(github.event.inputs.delete_run_by_conclusion_pattern, 'Unsuccessful:') + && 'action_required,cancelled,failure,skipped' + || github.event.inputs.delete_run_by_conclusion_pattern + }} + dry_run: ${{ github.event.inputs.dry_run }} diff --git a/.github/workflows/deploy-docs.yml b/.github/workflows/deploy-docs.yml index 939e72bcb18..70df755595c 100644 --- a/.github/workflows/deploy-docs.yml +++ b/.github/workflows/deploy-docs.yml @@ -1,14 +1,26 @@ +# Workflow that builds and deploys the documentation website name: Deploy Docs to GitHub Pages +# Only run one workflow of the same group at a time. +# There can be at most one running and one pending job in a concurrency group at any time. +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: ${{ github.ref != 'refs/heads/main' }} + +# * Always run on "main" +# * Run on PRs that target the "main" branch and have changes in the "docs" folder on: push: branches: - main pull_request: + paths: + - 'docs/**' branches: - main jobs: + # Build the documentation website build: name: Build Docusaurus runs-on: ubuntu-latest @@ -25,23 +37,23 @@ jobs: - name: Set up Python uses: actions/setup-python@v5 with: - python-version: "3.11" - + python-version: '3.11' - name: Generate Python Docs run: rm -rf docs/modules/python && pip install pydoc-markdown && pydoc-markdown - name: Install dependencies run: cd docs && npm ci - name: Build website run: cd docs && npm run build - - name: Upload Build Artifact if: github.ref == 'refs/heads/main' uses: actions/upload-pages-artifact@v3 with: path: docs/build + # Deploy the documentation website deploy: name: Deploy to GitHub Pages + runs-on: ubuntu-latest needs: build if: github.ref == 'refs/heads/main' && github.repository == 'OpenDevin/OpenDevin' # Grant GITHUB_TOKEN the permissions required to make a Pages deployment @@ -52,7 +64,6 @@ jobs: environment: name: github-pages url: ${{ steps.deployment.outputs.page_url }} - runs-on: ubuntu-latest steps: - name: Deploy to GitHub Pages id: deployment diff --git a/.github/workflows/dummy-agent-test.yml b/.github/workflows/dummy-agent-test.yml index 8422f0c361e..4633837f509 100644 --- a/.github/workflows/dummy-agent-test.yml +++ b/.github/workflows/dummy-agent-test.yml @@ -1,5 +1,8 @@ +# Workflow that uses the DummyAgent to run a simple task name: Run E2E test with dummy agent +# Only run one workflow of the same group at a time. +# There can be at most one running and one pending job in a concurrency group at any time. concurrency: group: ${{ github.workflow }}-${{ github.ref }} cancel-in-progress: ${{ github.ref != 'refs/heads/main' }} @@ -10,9 +13,6 @@ on: - main pull_request: -env: - PERSIST_SANDBOX : "false" - jobs: test: runs-on: ubuntu-latest @@ -25,7 +25,7 @@ jobs: - name: Set up environment run: | curl -sSL https://install.python-poetry.org | python3 - - poetry install --without evaluation + poetry install --without evaluation,llama-index poetry run playwright install --with-deps chromium wget https://huggingface.co/BAAI/bge-small-en-v1.5/raw/main/1_Pooling/config.json -P /tmp/llama_index/models--BAAI--bge-small-en-v1.5/snapshots/5c38ec7c405ec4b44b94cc5a9bb96e735b38267a/1_Pooling/ - name: Run tests diff --git a/.github/workflows/fe-unit-tests.yml b/.github/workflows/fe-unit-tests.yml new file mode 100644 index 00000000000..eed8315c38b --- /dev/null +++ b/.github/workflows/fe-unit-tests.yml @@ -0,0 +1,39 @@ +# Workflow that runs frontend unit tests +name: Run Frontend Unit Tests + +# * Always run on "main" +# * Run on PRs that have changes in the "frontend" folder or this workflow +on: + push: + branches: + - main + pull_request: + paths: + - 'frontend/**' + - '.github/workflows/fe-unit-tests.yml' + +jobs: + # Run frontend unit tests + fe-test: + name: FE Unit Tests + runs-on: ubuntu-latest + strategy: + matrix: + node-version: [20] + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Set up Node.js + uses: actions/setup-node@v4 + with: + node-version: ${{ matrix.node-version }} + - name: Install dependencies + working-directory: ./frontend + run: npm ci + - name: Run tests and collect coverage + working-directory: ./frontend + run: npm run test:coverage + - name: Upload coverage to Codecov + uses: codecov/codecov-action@v4 + env: + CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} diff --git a/.github/workflows/ghcr.yml b/.github/workflows/ghcr.yml index 852438e2748..bcade7cadc8 100644 --- a/.github/workflows/ghcr.yml +++ b/.github/workflows/ghcr.yml @@ -1,5 +1,8 @@ -name: Build Publish and Test Docker Image +# Workflow that builds, tests and then pushes the docker images to the ghcr.io repository +name: Build Publish and Test Runtime Image +# Only run one workflow of the same group at a time. +# There can be at most one running and one pending job in a concurrency group at any time. concurrency: group: ${{ github.workflow }}-${{ github.ref }} cancel-in-progress: ${{ github.ref != 'refs/heads/main' }} @@ -19,25 +22,21 @@ on: default: '' jobs: + # Builds the OpenDevin Docker images ghcr_build: runs-on: ubuntu-latest - outputs: tags: ${{ steps.capture-tags.outputs.tags }} - permissions: contents: read packages: write - strategy: matrix: - image: ["sandbox", "opendevin"] - platform: ["amd64", "arm64"] - + image: ['opendevin'] + platform: ['amd64', 'arm64'] steps: - name: Checkout uses: actions/checkout@v4 - - name: Free Disk Space (Ubuntu) uses: jlumbroso/free-disk-space@main with: @@ -52,62 +51,152 @@ jobs: large-packages: true docker-images: false swap-storage: true - - name: Set up QEMU uses: docker/setup-qemu-action@v3 - - name: Set up Docker Buildx id: buildx uses: docker/setup-buildx-action@v3 - - name: Build and export image id: build run: ./containers/build.sh ${{ matrix.image }} ${{ github.repository_owner }} ${{ matrix.platform }} - - name: Capture tags id: capture-tags run: | tags=$(cat tags.txt) echo "tags=$tags" echo "tags=$tags" >> $GITHUB_OUTPUT + - name: Upload Docker image as artifact + uses: actions/upload-artifact@v4 + with: + name: ${{ matrix.image }}-docker-image-${{ matrix.platform }} + path: /tmp/${{ matrix.image }}_image_${{ matrix.platform }}.tar + retention-days: 14 + # Builds the runtime Docker images + ghcr_build_runtime: + runs-on: ubuntu-latest + outputs: + tags: ${{ steps.capture-tags.outputs.tags }} + permissions: + contents: read + packages: write + strategy: + matrix: + image: ['od_runtime'] + base_image: ['nikolaik/python-nodejs:python3.11-nodejs22'] + platform: ['amd64', 'arm64'] + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Free Disk Space (Ubuntu) + uses: jlumbroso/free-disk-space@main + with: + # this might remove tools that are actually needed, + # if set to "true" but frees about 6 GB + tool-cache: true + # all of these default to true, but feel free to set to + # "false" if necessary for your workflow + android: true + dotnet: true + haskell: true + large-packages: true + docker-images: false + swap-storage: true + - name: Set up QEMU + uses: docker/setup-qemu-action@v3 + - name: Set up Docker Buildx + id: buildx + uses: docker/setup-buildx-action@v3 + - name: Install poetry via pipx + run: pipx install poetry + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.11' + cache: 'poetry' + - name: Install Python dependencies using Poetry + run: make install-python-dependencies + - name: Create source distribution and Dockerfile + run: poetry run python3 opendevin/runtime/utils/runtime_build.py --base_image ${{ matrix.base_image }} --build_folder containers/runtime --force_rebuild + - name: Build and export image + id: build + run: | + if [ -f 'containers/runtime/Dockerfile' ]; then + echo 'Dockerfile detected, building runtime image...' + ./containers/build.sh ${{ matrix.image }} ${{ github.repository_owner }} ${{ matrix.platform }} + else + echo 'No Dockerfile detected which means an exact image is already built. Pulling the image and saving it to a tar file...' + source containers/runtime/config.sh + echo "$DOCKER_IMAGE_TAG $DOCKER_IMAGE_HASH_TAG" >> tags.txt + echo "Pulling image $DOCKER_IMAGE/$DOCKER_IMAGE_HASH_TAG to /tmp/${{ matrix.image }}_image_${{ matrix.platform }}.tar" + docker pull $DOCKER_IMAGE:$DOCKER_IMAGE_HASH_TAG + docker save $DOCKER_IMAGE:$DOCKER_IMAGE_HASH_TAG -o /tmp/${{ matrix.image }}_image_${{ matrix.platform }}.tar + fi + - name: Capture tags + id: capture-tags + run: | + tags=$(cat tags.txt) + echo "tags=$tags" + echo "tags=$tags" >> $GITHUB_OUTPUT - name: Upload Docker image as artifact uses: actions/upload-artifact@v4 with: name: ${{ matrix.image }}-docker-image-${{ matrix.platform }} path: /tmp/${{ matrix.image }}_image_${{ matrix.platform }}.tar + retention-days: 14 - test-for-sandbox: - name: Test for Sandbox + # Run unit tests with the EventStream and Server runtime Docker images + test_runtime: + name: Test Runtime runs-on: ubuntu-latest - needs: ghcr_build - env: - PERSIST_SANDBOX: "false" + needs: [ghcr_build_runtime, ghcr_build] + strategy: + matrix: + runtime_type: ['eventstream'] steps: - uses: actions/checkout@v4 - + - name: Free Disk Space (Ubuntu) + uses: jlumbroso/free-disk-space@main + with: + # this might remove tools that are actually needed, + # when set to "true" but frees about 6 GB + tool-cache: true + # all of these default to true, but feel free to set to + # "false" if necessary for your workflow + android: true + dotnet: true + haskell: true + large-packages: true + swap-storage: true - name: Install poetry via pipx run: pipx install poetry - - name: Set up Python uses: actions/setup-python@v5 with: - python-version: "3.11" - cache: "poetry" - + python-version: '3.11' + cache: 'poetry' - name: Install Python dependencies using Poetry run: make install-python-dependencies - - - name: Download sandbox Docker image + - name: Download Runtime Docker image + if: matrix.runtime_type == 'eventstream' + uses: actions/download-artifact@v4 + with: + name: od_runtime-docker-image-amd64 + path: /tmp/ + - name: Download Sandbox Docker image + if: matrix.runtime_type == 'server' uses: actions/download-artifact@v4 with: name: sandbox-docker-image-amd64 path: /tmp/ - - - name: Load sandbox image and run sandbox tests + - name: Load Runtime image and run runtime tests run: | # Load the Docker image and capture the output - output=$(docker load -i /tmp/sandbox_image_amd64.tar) + if [ "${{ matrix.runtime_type }}" == "eventstream" ]; then + output=$(docker load -i /tmp/od_runtime_image_amd64.tar) + else + output=$(docker load -i /tmp/sandbox_image_amd64.tar) + fi # Extract the first image name from the output image_name=$(echo "$output" | grep -oP 'Loaded image: \K.*' | head -n 1) @@ -115,51 +204,48 @@ jobs: # Print the full name of the image echo "Loaded Docker image: $image_name" - SANDBOX_CONTAINER_IMAGE=$image_name TEST_IN_CI=true poetry run pytest --cov=agenthub --cov=opendevin --cov-report=xml -s ./tests/unit/test_sandbox.py - + TEST_RUNTIME=${{ matrix.runtime_type }} SANDBOX_USER_ID=$(id -u) SANDBOX_CONTAINER_IMAGE=$image_name TEST_IN_CI=true poetry run pytest --cov=agenthub --cov=opendevin --cov-report=xml -s ./tests/unit/test_runtime.py - name: Upload coverage to Codecov uses: codecov/codecov-action@v4 env: CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} - integration-tests-on-linux: - name: Integration Tests on Linux + # Run integration tests with the eventstream runtime Docker image + runtime_integration_tests_on_linux: + name: Runtime Integration Tests on Linux runs-on: ubuntu-latest - needs: ghcr_build - env: - PERSIST_SANDBOX: "false" + needs: [ghcr_build_runtime] strategy: fail-fast: false matrix: - python-version: ["3.11"] - sandbox: ["ssh", "local"] + python-version: ['3.11'] + # server is tested in a separate workflow + runtime_type: ['eventstream'] steps: - uses: actions/checkout@v4 - - name: Install poetry via pipx run: pipx install poetry - - name: Set up Python uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} cache: 'poetry' - - name: Install Python dependencies using Poetry run: make install-python-dependencies - - - name: Download sandbox Docker image + - name: Download Runtime Docker image uses: actions/download-artifact@v4 with: - name: sandbox-docker-image-amd64 + name: od_runtime-docker-image-amd64 path: /tmp/ - - - name: Load sandbox image and run integration tests - env: - SANDBOX_BOX_TYPE: ${{ matrix.sandbox }} + - name: Load runtime image and run integration tests run: | # Load the Docker image and capture the output - output=$(docker load -i /tmp/sandbox_image_amd64.tar) + if [ "${{ matrix.runtime_type }}" == "eventstream" ]; then + output=$(docker load -i /tmp/od_runtime_image_amd64.tar) + else + echo "No Runtime Docker image to load" + exit 1 + fi # Extract the first image name from the output image_name=$(echo "$output" | grep -oP 'Loaded image: \K.*' | head -n 1) @@ -167,48 +253,40 @@ jobs: # Print the full name of the image echo "Loaded Docker image: $image_name" - SANDBOX_CONTAINER_IMAGE=$image_name TEST_IN_CI=true TEST_ONLY=true ./tests/integration/regenerate.sh - + TEST_RUNTIME=${{ matrix.runtime_type }} SANDBOX_USER_ID=$(id -u) SANDBOX_CONTAINER_IMAGE=$image_name TEST_IN_CI=true TEST_ONLY=true ./tests/integration/regenerate.sh - name: Upload coverage to Codecov uses: codecov/codecov-action@v4 env: CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} + # Push the OpenDevin and sandbox Docker images to the ghcr.io repository ghcr_push: runs-on: ubuntu-latest - # don't push if integration tests or sandbox tests fail - needs: [ghcr_build, integration-tests-on-linux, test-for-sandbox] + needs: [ghcr_build] if: github.ref == 'refs/heads/main' || startsWith(github.ref, 'refs/tags/') - env: tags: ${{ needs.ghcr_build.outputs.tags }} - permissions: contents: read packages: write - strategy: matrix: - image: ["sandbox", "opendevin"] - platform: ["amd64", "arm64"] - + image: ['opendevin'] + platform: ['amd64', 'arm64'] steps: - name: Checkout code uses: actions/checkout@v4 - - name: Login to GHCR uses: docker/login-action@v2 with: registry: ghcr.io username: ${{ github.repository_owner }} password: ${{ secrets.GITHUB_TOKEN }} - - name: Download Docker images uses: actions/download-artifact@v4 with: name: ${{ matrix.image }}-docker-image-${{ matrix.platform }} path: /tmp/${{ matrix.platform }} - - name: Load images and push to registry run: | mv /tmp/${{ matrix.platform }}/${{ matrix.image }}_image_${{ matrix.platform }}.tar . @@ -223,33 +301,90 @@ jobs: docker push $image_name:${tag}_${{ matrix.platform }} done + # Push the runtime Docker images to the ghcr.io repository + ghcr_push_runtime: + runs-on: ubuntu-latest + needs: [ghcr_build_runtime, test_runtime, runtime_integration_tests_on_linux] + if: github.ref == 'refs/heads/main' || startsWith(github.ref, 'refs/tags/') + env: + RUNTIME_TAGS: ${{ needs.ghcr_build_runtime.outputs.tags }} + permissions: + contents: read + packages: write + strategy: + matrix: + image: ['od_runtime'] + platform: ['amd64', 'arm64'] + steps: + - name: Checkout code + uses: actions/checkout@v4 + - name: Free Disk Space (Ubuntu) + uses: jlumbroso/free-disk-space@main + with: + tool-cache: true + android: true + dotnet: true + haskell: true + large-packages: true + docker-images: false + swap-storage: true + - name: Login to GHCR + uses: docker/login-action@v2 + with: + registry: ghcr.io + username: ${{ github.repository_owner }} + password: ${{ secrets.GITHUB_TOKEN }} + - name: Download Docker images + uses: actions/download-artifact@v4 + with: + name: ${{ matrix.image }}-docker-image-${{ matrix.platform }} + path: /tmp/${{ matrix.platform }} + - name: List downloaded files + run: | + ls -la /tmp/${{ matrix.platform }} + file /tmp/${{ matrix.platform }}/* + - name: Load images and push to registry + run: | + mv /tmp/${{ matrix.platform }}/${{ matrix.image }}_image_${{ matrix.platform }}.tar ./${{ matrix.image }}_image_${{ matrix.platform }}.tar + if ! loaded_image=$(docker load -i ${{ matrix.image }}_image_${{ matrix.platform }}.tar | grep "Loaded image:" | head -n 1 | awk '{print $3}'); then + echo "Failed to load Docker image" + exit 1 + fi + echo "loaded image = $loaded_image" + image_name=$(echo "ghcr.io/${{ github.repository_owner }}/${{ matrix.image }}" | tr '[:upper:]' '[:lower:]') + echo "image name = $image_name" + echo "$RUNTIME_TAGS" | tr ' ' '\n' | while read -r tag; do + echo "tag = $tag" + if [ -n "$image_name" ] && [ -n "$tag" ]; then + docker tag $loaded_image $image_name:${tag}_${{ matrix.platform }} + docker push $image_name:${tag}_${{ matrix.platform }} + else + echo "Skipping tag and push due to empty image_name or tag" + fi + done + + # Creates and pushes the OpenDevin and sandbox Docker image manifests create_manifest: runs-on: ubuntu-latest needs: [ghcr_build, ghcr_push] if: github.ref == 'refs/heads/main' || startsWith(github.ref, 'refs/tags/') - env: tags: ${{ needs.ghcr_build.outputs.tags }} - strategy: matrix: - image: ["sandbox", "opendevin"] - + image: ['opendevin'] permissions: contents: read packages: write - steps: - name: Checkout code uses: actions/checkout@v4 - - name: Login to GHCR uses: docker/login-action@v2 with: registry: ghcr.io username: ${{ github.repository_owner }} password: ${{ secrets.GITHUB_TOKEN }} - - name: Create and push multi-platform manifest run: | image_name=$(echo "ghcr.io/${{ github.repository_owner }}/${{ matrix.image }}" | tr '[:upper:]' '[:lower:]') @@ -262,10 +397,36 @@ jobs: $image_name:${tag}_arm64 done - # FIXME: an admin needs to mark this as non-mandatory, and then we can remove it - docker_build_success: - name: Docker Build Success + # Creates and pushes the runtime Docker image manifest + create_manifest_runtime: runs-on: ubuntu-latest - needs: ghcr_build + needs: [ghcr_build_runtime, ghcr_push_runtime] + if: github.ref == 'refs/heads/main' || startsWith(github.ref, 'refs/tags/') + env: + tags: ${{ needs.ghcr_build_runtime.outputs.tags }} + strategy: + matrix: + image: ['od_runtime'] + permissions: + contents: read + packages: write steps: - - run: echo Done! + - name: Checkout code + uses: actions/checkout@v4 + - name: Login to GHCR + uses: docker/login-action@v2 + with: + registry: ghcr.io + username: ${{ github.repository_owner }} + password: ${{ secrets.GITHUB_TOKEN }} + - name: Create and push multi-platform manifest + run: | + image_name=$(echo "ghcr.io/${{ github.repository_owner }}/${{ matrix.image }}" | tr '[:upper:]' '[:lower:]') + echo "image name = $image_name" + tags=$(echo ${tags} | tr ' ' '\n') + for tag in $tags; do + echo 'tag = $tag' + docker buildx imagetools create --tag $image_name:$tag \ + $image_name:${tag}_amd64 \ + $image_name:${tag}_arm64 + done diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 7233bdb25ae..37676fdede1 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -1,5 +1,8 @@ +# Workflow that runs lint on the frontend and python code name: Lint +# Only run one workflow of the same group at a time. +# There can be at most one running and one pending job in a concurrency group at any time. concurrency: group: ${{ github.workflow }}-${{ github.ref }} cancel-in-progress: ${{ github.ref != 'refs/heads/main' }} @@ -11,27 +14,26 @@ on: pull_request: jobs: + # Run lint on the frontend code lint-frontend: name: Lint frontend runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - - name: Install Node.js 20 uses: actions/setup-node@v4 with: node-version: 20 - - name: Install dependencies run: | cd frontend npm install --frozen-lockfile - - name: Lint run: | cd frontend npm run lint + # Run lint on the python code lint-python: name: Lint python runs-on: ubuntu-latest diff --git a/.github/workflows/run-unit-tests.yml b/.github/workflows/py-unit-tests.yml similarity index 52% rename from .github/workflows/run-unit-tests.yml rename to .github/workflows/py-unit-tests.yml index f4c21e68e5c..acb005a057d 100644 --- a/.github/workflows/run-unit-tests.yml +++ b/.github/workflows/py-unit-tests.yml @@ -1,137 +1,117 @@ -name: Run Unit Tests - -concurrency: - group: ${{ github.workflow }}-${{ github.ref }} - cancel-in-progress: ${{ github.ref != 'refs/heads/main' }} +# Workflow that runs python unit tests +name: Run Python Unit Tests +# The jobs in this workflow are required, so they must run at all times +# * Always run on "main" +# * Always run on PRs on: push: branches: - main - paths-ignore: - - '**/*.md' - - 'frontend/**' - - 'docs/**' - - 'evaluation/**' pull_request: -env: - PERSIST_SANDBOX : "false" - jobs: - fe-test: - runs-on: ubuntu-latest - - strategy: - matrix: - node-version: [20] - - steps: - - name: Checkout - uses: actions/checkout@v4 - - - name: Set up Node.js - uses: actions/setup-node@v4 - with: - node-version: ${{ matrix.node-version }} - - - name: Install dependencies - working-directory: ./frontend - run: npm ci - - - name: Run tests and collect coverage - working-directory: ./frontend - run: npm run test:coverage - - - name: Upload coverage to Codecov - uses: codecov/codecov-action@v4 - env: - CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} - + # Run python unit tests on macOS test-on-macos: - name: Test on macOS + name: Python Unit Tests on macOS runs-on: macos-12 env: - INSTALL_DOCKER: "1" # Set to '0' to skip Docker installation + INSTALL_DOCKER: '1' # Set to '0' to skip Docker installation strategy: matrix: - python-version: ["3.11"] - + python-version: ['3.11'] steps: - uses: actions/checkout@v4 - - name: Install poetry via pipx run: pipx install poetry - - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} - cache: "poetry" - + cache: 'poetry' - name: Install Python dependencies using Poetry - run: poetry install - + run: poetry install --without evaluation,llama-index - name: Install & Start Docker if: env.INSTALL_DOCKER == '1' run: | + INSTANCE_NAME="colima-${GITHUB_RUN_ID}" + # Uninstall colima to upgrade to the latest version if brew list colima &>/dev/null; then - brew uninstall colima - # unlinking colima dependency: go - brew uninstall go@1.21 + brew uninstall colima + # unlinking colima dependency: go + brew uninstall go@1.21 fi rm -rf ~/.colima ~/.lima brew install --HEAD colima - brew services start colima brew install docker - colima delete - colima start --network-address --arch x86_64 --cpu=1 --memory=1 + + start_colima() { + # Find a free port in the range 10000-20000 + RANDOM_PORT=$((RANDOM % 10001 + 10000)) + + # Original line: + if ! colima start --network-address --arch x86_64 --cpu=1 --memory=1 --verbose --ssh-port $RANDOM_PORT; then + echo "Failed to start Colima." + return 1 + fi + return 0 + } + + # Attempt to start Colima for 5 total attempts: + ATTEMPT_LIMIT=5 + for ((i=1; i<=ATTEMPT_LIMIT; i++)); do + + if start_colima; then + echo "Colima started successfully." + break + else + colima stop -f + sleep 10 + colima delete -f + if [ $i -eq $ATTEMPT_LIMIT ]; then + exit 1 + fi + sleep 10 + fi + done # For testcontainers to find the Colima socket # https://github.com/abiosoft/colima/blob/main/docs/FAQ.md#cannot-connect-to-the-docker-daemon-at-unixvarrundockersock-is-the-docker-daemon-running sudo ln -sf $HOME/.colima/default/docker.sock /var/run/docker.sock - - name: Build Environment run: make build - - name: Run Tests - run: poetry run pytest --forked --cov=agenthub --cov=opendevin --cov-report=xml ./tests/unit -k "not test_sandbox" - + run: poetry run pytest --forked --cov=agenthub --cov=opendevin --cov-report=xml ./tests/unit -k "not test_runtime.py" - name: Upload coverage to Codecov uses: codecov/codecov-action@v4 env: CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} + + # Run python unit tests on Linux test-on-linux: - name: Test on Linux + name: Python Unit Tests on Linux runs-on: ubuntu-latest env: - INSTALL_DOCKER: "0" # Set to '0' to skip Docker installation + INSTALL_DOCKER: '0' # Set to '0' to skip Docker installation strategy: matrix: - python-version: ["3.11"] - + python-version: ['3.11'] steps: - uses: actions/checkout@v4 - - name: Install poetry via pipx run: pipx install poetry - - name: Set up Python uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} - cache: "poetry" - + cache: 'poetry' - name: Install Python dependencies using Poetry - run: poetry install --without evaluation - + run: poetry install --without evaluation,llama-index - name: Build Environment run: make build - - name: Run Tests - run: poetry run pytest --forked --cov=agenthub --cov=opendevin --cov-report=xml ./tests/unit -k "not test_sandbox" - + run: poetry run pytest --forked --cov=agenthub --cov=opendevin --cov-report=xml ./tests/unit -k "not test_runtime.py" - name: Upload coverage to Codecov uses: codecov/codecov-action@v4 env: diff --git a/.github/workflows/review-pr.yml b/.github/workflows/review-pr.yml index 1a9aeccc391..6d7771e5701 100644 --- a/.github/workflows/review-pr.yml +++ b/.github/workflows/review-pr.yml @@ -1,3 +1,4 @@ +# Workflow that uses OpenDevin to review a pull request. PR must be labeled 'review-this' name: Use OpenDevin to Review Pull Request on: @@ -12,29 +13,28 @@ jobs: dogfood: if: contains(github.event.pull_request.labels.*.name, 'review-this') runs-on: ubuntu-latest - container: - image: ghcr.io/opendevin/opendevin - volumes: - - /var/run/docker.sock:/var/run/docker.sock - steps: + - uses: actions/checkout@v4 + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.11' - name: install git, github cli run: | - apt-get install -y git gh + sudo apt-get install -y git gh git config --global --add safe.directory $PWD - - name: Checkout Repository uses: actions/checkout@v4 with: ref: ${{ github.event.pull_request.base.ref }} # check out the target branch - - name: Download Diff run: | curl -O "${{ github.event.pull_request.diff_url }}" -L - - name: Write Task File run: | - echo "Your coworker wants to apply a pull request to this project. Read and review ${{ github.event.pull_request.number }}.diff file. Create a review-${{ github.event.pull_request.number }}.txt and write your concise comments and suggestions there." > task.txt + echo "Your coworker wants to apply a pull request to this project." > task.txt + echo "Read and review ${{ github.event.pull_request.number }}.diff file. Create a review-${{ github.event.pull_request.number }}.txt and write your concise comments and suggestions there." >> task.txt + echo "Do not ask me for confirmation at any point." >> task.txt echo "" >> task.txt echo "Title" >> task.txt echo "${{ github.event.pull_request.title }}" >> task.txt @@ -43,27 +43,25 @@ jobs: echo "${{ github.event.pull_request.body }}" >> task.txt echo "" >> task.txt echo "Diff file is: ${{ github.event.pull_request.number }}.diff" >> task.txt - - name: Set up environment run: | curl -sSL https://install.python-poetry.org | python3 - export PATH="/github/home/.local/bin:$PATH" - poetry install --without evaluation + poetry install --without evaluation,llama-index poetry run playwright install --with-deps chromium - - name: Run OpenDevin env: - LLM_API_KEY: ${{ secrets.OPENAI_API_KEY }} - OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} - SANDBOX_BOX_TYPE: ssh + LLM_API_KEY: ${{ secrets.LLM_API_KEY }} + LLM_MODEL: ${{ vars.LLM_MODEL }} run: | # Append path to launch poetry export PATH="/github/home/.local/bin:$PATH" # Append path to correctly import package, note: must set pwd at first export PYTHONPATH=$(pwd):$PYTHONPATH - WORKSPACE_MOUNT_PATH=$GITHUB_WORKSPACE poetry run python ./opendevin/core/main.py -i 50 -f task.txt -d $GITHUB_WORKSPACE + export WORKSPACE_MOUNT_PATH=$GITHUB_WORKSPACE + export WORKSPACE_BASE=$GITHUB_WORKSPACE + echo -e "/exit\n" | poetry run python opendevin/core/main.py -i 50 -f task.txt rm task.txt - - name: Check if review file is non-empty id: check_file run: | @@ -72,7 +70,6 @@ jobs: echo "non_empty=true" >> $GITHUB_OUTPUT fi shell: bash - - name: Create PR review if file is non-empty env: GH_TOKEN: ${{ github.token }} diff --git a/.github/workflows/solve-issue.yml b/.github/workflows/solve-issue.yml index df965b95542..8e075761ab2 100644 --- a/.github/workflows/solve-issue.yml +++ b/.github/workflows/solve-issue.yml @@ -1,3 +1,4 @@ +# Workflow that uses OpenDevin to resolve a GitHub issue. Issue must be labeled 'solve-this' name: Use OpenDevin to Resolve GitHub Issue on: @@ -17,14 +18,11 @@ jobs: image: ghcr.io/opendevin/opendevin volumes: - /var/run/docker.sock:/var/run/docker.sock - steps: - name: install git, github cli run: apt-get install -y git gh - - name: Checkout Repository uses: actions/checkout@v4 - - name: Write Task File env: ISSUE_TITLE: ${{ github.event.issue.title }} @@ -35,22 +33,18 @@ jobs: echo "" >> task.txt echo "BODY:" >> task.txt echo "${ISSUE_BODY}" >> task.txt - - name: Set up environment run: | curl -sSL https://install.python-poetry.org | python3 - export PATH="/github/home/.local/bin:$PATH" - poetry install --without evaluation + poetry install --without evaluation,llama-index poetry run playwright install --with-deps chromium - - - name: Run OpenDevin env: ISSUE_TITLE: ${{ github.event.issue.title }} ISSUE_BODY: ${{ github.event.issue.body }} LLM_API_KEY: ${{ secrets.OPENAI_API_KEY }} OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} - SANDBOX_BOX_TYPE: ssh run: | # Append path to launch poetry export PATH="/github/home/.local/bin:$PATH" @@ -58,7 +52,6 @@ jobs: export PYTHONPATH=$(pwd):$PYTHONPATH WORKSPACE_MOUNT_PATH=$GITHUB_WORKSPACE poetry run python ./opendevin/core/main.py -i 50 -f task.txt -d $GITHUB_WORKSPACE rm task.txt - - name: Setup Git, Create Branch, and Commit Changes run: | # Setup Git configuration @@ -84,7 +77,6 @@ jobs: # Push changes git push --set-upstream origin $BRANCH_NAME - - name: Fetch Default Branch env: GH_TOKEN: ${{ github.token }} @@ -93,7 +85,6 @@ jobs: DEFAULT_BRANCH=$(gh repo view --json defaultBranchRef --jq .defaultBranchRef.name) echo "Default branch is $DEFAULT_BRANCH" echo "DEFAULT_BRANCH=$DEFAULT_BRANCH" >> $GITHUB_ENV - - name: Generate PR env: GH_TOKEN: ${{ github.token }} diff --git a/.github/workflows/stale.yml b/.github/workflows/stale.yml index b7e48311e48..6897fc79ade 100644 --- a/.github/workflows/stale.yml +++ b/.github/workflows/stale.yml @@ -1,4 +1,6 @@ +# Workflow that marks issues and PRs with no activity for 30 days with "Stale" and closes them after 7 more days of no activity name: 'Close stale issues' + on: schedule: - cron: '30 1 * * *' @@ -9,21 +11,9 @@ jobs: steps: - uses: actions/stale@v9 with: - # Aggressively close issues that have been explicitly labeled `age-out` - any-of-labels: age-out - stale-issue-message: 'This issue is stale because it has been open for 7 days with no activity. Remove stale label or comment or this will be closed in 1 day.' - close-issue-message: 'This issue was closed because it has been stalled for over 7 days with no activity.' - stale-pr-message: 'This PR is stale because it has been open for 7 days with no activity. Remove stale label or comment or this will be closed in 1 days.' - close-pr-message: 'This PR was closed because it has been stalled for over 7 days with no activity.' - days-before-stale: 7 - days-before-close: 1 - - - uses: actions/stale@v9 - with: - # Be more lenient with other issues stale-issue-message: 'This issue is stale because it has been open for 30 days with no activity. Remove stale label or comment or this will be closed in 7 days.' - close-issue-message: 'This issue was closed because it has been stalled for over 30 days with no activity.' stale-pr-message: 'This PR is stale because it has been open for 30 days with no activity. Remove stale label or comment or this will be closed in 7 days.' - close-pr-message: 'This PR was closed because it has been stalled for over 30 days with no activity.' days-before-stale: 30 + close-issue-message: 'This issue was closed because it has been stalled for over 30 days with no activity.' + close-pr-message: 'This PR was closed because it has been stalled for over 30 days with no activity.' days-before-close: 7 diff --git a/.github/workflows/update-pyproject-version.yml b/.github/workflows/update-pyproject-version.yml deleted file mode 100644 index 24fa5429c55..00000000000 --- a/.github/workflows/update-pyproject-version.yml +++ /dev/null @@ -1,48 +0,0 @@ -name: Update pyproject.toml Version and Tags - -on: - release: - types: - - published - -jobs: - update-pyproject-and-tags: - runs-on: ubuntu-latest - - steps: - - name: Checkout code - uses: actions/checkout@v4 - with: - fetch-depth: 0 # Fetch all history for all branches and tags - - - name: Set up Python - uses: actions/setup-python@v5 - with: - python-version: "3.11" - - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install toml - - - name: Get release tag - id: get_release_tag - run: echo "RELEASE_TAG=${GITHUB_REF#refs/tags/}" >> $GITHUB_ENV - - - name: Update pyproject.toml with release tag - run: | - python -c " - import toml - with open('pyproject.toml', 'r') as f: - data = toml.load(f) - data['tool']['poetry']['version'] = '${{ env.RELEASE_TAG }}' - with open('pyproject.toml', 'w') as f: - toml.dump(data, f) - " - - - name: Commit and push pyproject.toml changes - uses: stefanzweifel/git-auto-commit-action@v4 - with: - commit_message: "Update pyproject.toml version to ${{ env.RELEASE_TAG }}" - branch: main - file_pattern: pyproject.toml diff --git a/.gitignore b/.gitignore index daa5a473580..cac17cdfa34 100644 --- a/.gitignore +++ b/.gitignore @@ -169,6 +169,10 @@ evaluation/outputs evaluation/swe_bench/eval_workspace* evaluation/SWE-bench/data evaluation/webarena/scripts/webarena_env.sh +evaluation/bird/data +evaluation/gaia/data +evaluation/gorilla/data +evaluation/toolqa/data # frontend @@ -210,6 +214,7 @@ cache # configuration config.toml +config.toml_ config.toml.bak containers/agnostic_sandbox @@ -217,3 +222,9 @@ containers/agnostic_sandbox # swe-bench-eval image_build_logs run_instance_logs + +od_runtime_*.tar + +# docker build +containers/runtime/Dockerfile +containers/runtime/project.tar.gz diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md index 1f5f3a2c196..8c66075aef2 100644 --- a/CODE_OF_CONDUCT.md +++ b/CODE_OF_CONDUCT.md @@ -61,7 +61,7 @@ representative at an online or offline event. Instances of abusive, harassing, or otherwise unacceptable behavior may be reported to the community leaders responsible for enforcement at -contact@rbren.io +contact@all-hands.dev All complaints will be reviewed and investigated promptly and fairly. All community leaders are obligated to respect the privacy and security of the diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 6a0849d24de..212e8ff3a94 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1,13 +1,13 @@ # Contributing -Thanks for your interest in contributing to OpenDevin! We welcome and appreciate contributions. +Thanks for your interest in contributing to OpenDevin! We welcome and appreciate contributions. ## How Can I Contribute? There are many ways that you can contribute: 1. **Download and use** OpenDevin, and send [issues](https://github.com/OpenDevin/OpenDevin/issues) when you encounter something that isn't working or a feature that you'd like to see. -2. **Send feedback** after each session by [clicking the thumbs-up thumbs-down buttons](https://opendevin.github.io/OpenDevin/modules/usage/feedback), so we can see where things are working and failing, and also build an open dataset for training code agents. +2. **Send feedback** after each session by [clicking the thumbs-up thumbs-down buttons](https://docs.all-hands.dev/modules/usage/feedback), so we can see where things are working and failing, and also build an open dataset for training code agents. 3. **Improve the Codebase** by sending PRs (see details below). In particular, we have some [good first issue](https://github.com/OpenDevin/OpenDevin/labels/good%20first%20issue) issues that may be ones to start on. ## Understanding OpenDevin's CodeBase @@ -83,7 +83,7 @@ git push origin my_branch - Set `base repository` to `OpenDevin/OpenDevin` - Set `base` to `main` - Click `Create pull request` - + The PR should appear in [OpenDevin PRs](https://github.com/OpenDevin/OpenDevin/pulls). Then the OpenDevin team will review your code. @@ -114,4 +114,3 @@ You may also check out previous PRs in the [PR list](https://github.com/OpenDevi ### 2. Pull Request description - If your PR is small (such as a typo fix), you can go brief. - If it contains a lot of changes, it's better to write more details. - diff --git a/Development.md b/Development.md index 302d0b17dfc..8a72c8af1e5 100644 --- a/Development.md +++ b/Development.md @@ -39,18 +39,18 @@ make build OpenDevin supports a diverse array of Language Models (LMs) through the powerful [litellm](https://docs.litellm.ai) library. By default, we've chosen the mighty GPT-4 from OpenAI as our go-to model, but the world is your oyster! You can unleash the potential of Anthropic's suave Claude, the enigmatic Llama, or any other LM that piques your interest. To configure the LM of your choice, run: - + ```bash make setup-config ``` - + This command will prompt you to enter the LLM API key, model name, and other variables ensuring that OpenDevin is tailored to your specific needs. Note that the model name will apply only when you run headless. If you use the UI, please set the model in the UI. - + Note: If you have previously run OpenDevin using the docker command, you may have already set some environmental variables in your terminal. The final configurations are set from highest to lowest priority: Environment variables > config.toml variables > default variables **Note on Alternative Models:** -Some alternative models may prove more challenging to tame than others. Fear not, brave adventurer! We shall soon unveil LLM-specific documentation to guide you on your quest. +Some alternative models may prove more challenging to tame than others. Fear not, brave adventurer! We shall soon unveil LLM-specific documentation to guide you on your quest. And if you've already mastered the art of wielding a model other than OpenAI's GPT, we encourage you to share your setup instructions with us by creating instructions and adding it [to our documentation](https://github.com/OpenDevin/OpenDevin/tree/main/docs/modules/usage/llms). For a full list of the LM providers and models available, please consult the [litellm documentation](https://docs.litellm.ai/docs/providers). @@ -84,10 +84,11 @@ make help ``` ### 8. Testing +To run tests, refer to the following: #### Unit tests ```bash -poetry run pytest ./tests/unit/test_sandbox.py +poetry run pytest ./tests/unit/test_*.py ``` #### Integration tests diff --git a/ISSUE_TRIAGE.md b/ISSUE_TRIAGE.md new file mode 100644 index 00000000000..f251ad7263f --- /dev/null +++ b/ISSUE_TRIAGE.md @@ -0,0 +1,25 @@ +# Issue Triage +These are the procedures and guidelines on how issues are triaged in this repo by the maintainers. + +## General +* Most issues must be tagged with **enhancement** or **bug** +* Issues may be tagged with what it relates to (**backend**, **frontend**, **agent quality**, etc.) + +## Severity +* **Low**: Minor issues, single user report +* **Medium**: Affecting multiple users +* **Critical**: Affecting all users or potential security issues + +## Effort +* Issues may be estimated with effort required (**small effort**, **medium effort**, **large effort**) + +## Difficulty +* Issues with low implementation difficulty may be tagged with **good first issue** + +## Not Enough Information +* User is asked to provide more information (logs, how to reproduce, etc.) when the issue is not clear +* If an issue is unclear and the author does not provide more information or respond to a request, the issue may be closed as **not planned** (Usually after a week) + +## Multiple Requests/Fixes in One Issue +* These issues will be narrowed down to one request/fix so the issue is more easily tracked and fixed +* Issues may be broken down into multiple issues if required diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 00000000000..8dd8203373b --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,5 @@ +# Exclude all Python bytecode files +global-exclude *.pyc + +# Exclude Python cache directories +global-exclude __pycache__ diff --git a/Makefile b/Makefile index f2c4eca9297..c4f145c15dd 100644 --- a/Makefile +++ b/Makefile @@ -23,9 +23,6 @@ RESET=$(shell tput -Txterm sgr0) build: @echo "$(GREEN)Building project...$(RESET)" @$(MAKE) -s check-dependencies -ifeq ($(INSTALL_DOCKER),) - @$(MAKE) -s pull-docker-image -endif @$(MAKE) -s install-python-dependencies @$(MAKE) -s install-frontend-dependencies @$(MAKE) -s install-pre-commit-hooks @@ -124,11 +121,6 @@ check-poetry: exit 1; \ fi -pull-docker-image: - @echo "$(YELLOW)Pulling Docker image...$(RESET)" - @docker pull $(DOCKER_IMAGE) - @echo "$(GREEN)Docker image pulled successfully.$(RESET)" - install-python-dependencies: @echo "$(GREEN)Installing Python dependencies...$(RESET)" @if [ -z "${TZ}" ]; then \ @@ -141,7 +133,7 @@ install-python-dependencies: export HNSWLIB_NO_NATIVE=1; \ poetry run pip install chroma-hnswlib; \ fi - @poetry install + @poetry install --without llama-index @if [ -f "/etc/manjaro-release" ]; then \ echo "$(BLUE)Detected Manjaro Linux. Installing Playwright dependencies...$(RESET)"; \ poetry run pip install playwright; \ @@ -162,11 +154,8 @@ install-frontend-dependencies: @echo "$(YELLOW)Setting up frontend environment...$(RESET)" @echo "$(YELLOW)Detect Node.js version...$(RESET)" @cd frontend && node ./scripts/detect-node-version.js - @cd frontend && \ - echo "$(BLUE)Installing frontend dependencies with npm...$(RESET)" && \ - npm install && \ - echo "$(BLUE)Running make-i18n with npm...$(RESET)" && \ - npm run make-i18n + echo "$(BLUE)Installing frontend dependencies with npm...$(RESET)" + @cd frontend && npm install @echo "$(GREEN)Frontend dependencies installed successfully.$(RESET)" install-pre-commit-hooks: @@ -249,16 +238,6 @@ setup-config-prompts: workspace_dir=$${workspace_dir:-$(DEFAULT_WORKSPACE_DIR)}; \ echo "workspace_base=\"$$workspace_dir\"" >> $(CONFIG_FILE).tmp - @read -p "Do you want to persist the sandbox container? [true/false] [default: false]: " persist_sandbox; \ - persist_sandbox=$${persist_sandbox:-false}; \ - if [ "$$persist_sandbox" = "true" ]; then \ - read -p "Enter a password for the sandbox container: " ssh_password; \ - echo "ssh_password=\"$$ssh_password\"" >> $(CONFIG_FILE).tmp; \ - echo "persist_sandbox=$$persist_sandbox" >> $(CONFIG_FILE).tmp; \ - else \ - echo "persist_sandbox=$$persist_sandbox" >> $(CONFIG_FILE).tmp; \ - fi - @echo "" >> $(CONFIG_FILE).tmp @echo "[llm]" >> $(CONFIG_FILE).tmp @@ -319,4 +298,4 @@ help: @echo " $(GREEN)help$(RESET) - Display this help message, providing information on available targets." # Phony targets -.PHONY: build check-dependencies check-python check-npm check-docker check-poetry pull-docker-image install-python-dependencies install-frontend-dependencies install-pre-commit-hooks lint start-backend start-frontend run run-wsl setup-config setup-config-prompts help +.PHONY: build check-dependencies check-python check-npm check-docker check-poetry install-python-dependencies install-frontend-dependencies install-pre-commit-hooks lint start-backend start-frontend run run-wsl setup-config setup-config-prompts help diff --git a/README.md b/README.md index 3beab757a36..41161e9a44c 100644 --- a/README.md +++ b/README.md @@ -24,7 +24,7 @@ Issues MIT License
- Join our Slack community + Join our Slack community Join our Discord community CodeCov @@ -33,8 +33,10 @@
Logo

OpenDevin: Code Less, Make More

- Check out the documentation - Evaluation Benchmark + Check out the documentation + Paper on Arxiv +
+ Evaluation Benchmark

@@ -45,7 +47,7 @@ OpenDevin agents collaborate with human developers to write code, fix bugs, and ![App screenshot](./docs/static/img/screenshot.png) ## ⚡ Getting Started -The easiest way to run OpenDevin is inside a Docker container. It works best with the most recent version of Docker, `26.0.0`. +OpenDevin works best with Docker version 26.0.0+ (Docker Desktop 4.31.0+). You must be using Linux, Mac OS, or WSL on Windows. To start OpenDevin in a docker container, run the following commands in your terminal: @@ -64,14 +66,14 @@ docker run -it \ -p 3000:3000 \ --add-host host.docker.internal:host-gateway \ --name opendevin-app-$(date +%Y%m%d%H%M%S) \ - ghcr.io/opendevin/opendevin + ghcr.io/opendevin/opendevin:0.8 ``` > [!NOTE] > By default, this command pulls the `latest` tag, which represents the most recent release of OpenDevin. You have other options as well: > - For a specific release version, use `ghcr.io/opendevin/opendevin:` (replace with the desired version number). > - For the most up-to-date development version, use `ghcr.io/opendevin/opendevin:main`. This version may be **(unstable!)** and is recommended for testing or development purposes only. -> +> > Choose the tag that best suits your needs based on stability requirements and desired features. You'll find OpenDevin running at [http://localhost:3000](http://localhost:3000) with access to `./workspace`. To have OpenDevin operate on your code, place it in `./workspace`. @@ -82,12 +84,12 @@ the `Settings` button (gear icon) in the UI. If the required `Model` does not ex For the development workflow, see [Development.md](https://github.com/OpenDevin/OpenDevin/blob/main/Development.md). -Are you having trouble? Check out our [Troubleshooting Guide](https://opendevin.github.io/OpenDevin/modules/usage/troubleshooting). +Are you having trouble? Check out our [Troubleshooting Guide](https://docs.all-hands.dev/modules/usage/troubleshooting). ## 🚀 Documentation To learn more about the project, and for tips on using OpenDevin, -**check out our [documentation](https://opendevin.github.io/OpenDevin/modules/usage/intro)**. +**check out our [documentation](https://docs.all-hands.dev/modules/usage/intro)**. There you'll find resources on how to use different LLM providers (like ollama and Anthropic's Claude), troubleshooting resources, and advanced configuration options. @@ -109,7 +111,7 @@ For details, please check [CONTRIBUTING.md](./CONTRIBUTING.md). Whether you're a developer, a researcher, or simply enthusiastic about OpenDevin, we'd love to have you in our community. Let's make software engineering better together! -- [Slack workspace](https://join.slack.com/t/opendevin/shared_invite/zt-2jsrl32uf-fTeeFjNyNYxqSZt5NPY3fA) - Here we talk about research, architecture, and future development. +- [Slack workspace](https://join.slack.com/t/opendevin/shared_invite/zt-2ngejmfw6-9gW4APWOC9XUp1n~SiQ6iw) - Here we talk about research, architecture, and future development. - [Discord server](https://discord.gg/ESHStjSjD4) - This is a community-run server for general discussion, questions, and feedback. ## 📈 Progress @@ -138,12 +140,13 @@ Distributed under the MIT License. See [`LICENSE`](./LICENSE) for more informati ## 📚 Cite ``` -@misc{opendevin2024, - author = {{OpenDevin Team}}, - title = {{OpenDevin: An Open Platform for AI Software Developers as Generalist Agents}}, - year = {2024}, - version = {v1.0}, - howpublished = {\url{https://github.com/OpenDevin/OpenDevin}}, - note = {Accessed: ENTER THE DATE YOU ACCESSED THE PROJECT} +@misc{opendevin, + title={{OpenDevin: An Open Platform for AI Software Developers as Generalist Agents}}, + author={Xingyao Wang and Boxuan Li and Yufan Song and Frank F. Xu and Xiangru Tang and Mingchen Zhuge and Jiayi Pan and Yueqi Song and Bowen Li and Jaskirat Singh and Hoang H. Tran and Fuqiang Li and Ren Ma and Mingzhang Zheng and Bill Qian and Yanjun Shao and Niklas Muennighoff and Yizhe Zhang and Binyuan Hui and Junyang Lin and Robert Brennan and Hao Peng and Heng Ji and Graham Neubig}, + year={2024}, + eprint={2407.16741}, + archivePrefix={arXiv}, + primaryClass={cs.SE}, + url={https://arxiv.org/abs/2407.16741}, } ``` diff --git a/agenthub/README.md b/agenthub/README.md index a6f6f1ce9ef..cb2a09b1f00 100644 --- a/agenthub/README.md +++ b/agenthub/README.md @@ -1,4 +1,4 @@ -# Agent Framework Research +# Agent Hub In this folder, there may exist multiple implementations of `Agent` that will be used by the framework. @@ -33,7 +33,6 @@ Here is a list of available Actions, which can be returned by `agent.step()`: - [`FileReadAction`](../opendevin/events/action/files.py) - Reads the content of a file - [`FileWriteAction`](../opendevin/events/action/files.py) - Writes new content to a file - [`BrowseURLAction`](../opendevin/events/action/browse.py) - Gets the content of a URL -- [`AgentRecallAction`](../opendevin/events/action/agent.py) - Searches memory (e.g. a vector database) - [`AddTaskAction`](../opendevin/events/action/tasks.py) - Adds a subtask to the plan - [`ModifyTaskAction`](../opendevin/events/action/tasks.py) - Changes the state of a subtask. - [`AgentFinishAction`](../opendevin/events/action/agent.py) - Stops the control loop, allowing the user/delegator agent to enter a new task @@ -54,7 +53,6 @@ Here is a list of available Observations: - [`BrowserOutputObservation`](../opendevin/events/observation/browse.py) - [`FileReadObservation`](../opendevin/events/observation/files.py) - [`FileWriteObservation`](../opendevin/events/observation/files.py) -- [`AgentRecallObservation`](../opendevin/events/observation/recall.py) - [`ErrorObservation`](../opendevin/events/observation/error.py) - [`SuccessObservation`](../opendevin/events/observation/success.py) @@ -72,14 +70,3 @@ def step(self, state: "State") -> "Action" `step` moves the agent forward one step towards its goal. This probably means sending a prompt to the LLM, then parsing the response into an `Action`. - -### `search_memory` - -``` -def search_memory(self, query: str) -> list[str]: -``` - -`search_memory` should return a list of events that match the query. This will be used -for the `recall` action. - -You can optionally just return `[]` for this method, meaning the agent has no long-term memory. diff --git a/agenthub/__init__.py b/agenthub/__init__.py index 3e53ce5e758..c66a7471f35 100644 --- a/agenthub/__init__.py +++ b/agenthub/__init__.py @@ -15,12 +15,10 @@ codeact_swe_agent, delegator_agent, dummy_agent, - monologue_agent, planner_agent, ) __all__ = [ - 'monologue_agent', 'codeact_agent', 'codeact_swe_agent', 'planner_agent', diff --git a/agenthub/browsing_agent/browsing_agent.py b/agenthub/browsing_agent/browsing_agent.py index 926d9c8338f..448e8b13929 100644 --- a/agenthub/browsing_agent/browsing_agent.py +++ b/agenthub/browsing_agent/browsing_agent.py @@ -7,6 +7,7 @@ from opendevin.controller.agent import Agent from opendevin.controller.state.state import State from opendevin.core.logger import opendevin_logger as logger +from opendevin.core.message import Message, TextContent from opendevin.events.action import ( Action, AgentFinishAction, @@ -99,8 +100,7 @@ def __init__( self, llm: LLM, ) -> None: - """ - Initializes a new instance of the BrowsingAgent class. + """Initializes a new instance of the BrowsingAgent class. Parameters: - llm (LLM): The llm to be used by this agent @@ -120,16 +120,13 @@ def __init__( self.reset() def reset(self) -> None: - """ - Resets the Browsing Agent. - """ + """Resets the Browsing Agent.""" super().reset() self.cost_accumulator = 0 self.error_accumulator = 0 def step(self, state: State) -> Action: - """ - Performs one step using the Browsing Agent. + """Performs one step using the Browsing Agent. This includes gathering information on previous steps and prompting the model to make a browsing command to execute. Parameters: @@ -140,7 +137,7 @@ def step(self, state: State) -> Action: - MessageAction(content) - Message action to run (e.g. ask for clarification) - AgentFinishAction() - end the interaction """ - messages = [] + messages: list[Message] = [] prev_actions = [] cur_axtree_txt = '' error_prefix = '' @@ -195,24 +192,24 @@ def step(self, state: State) -> Action: ) return MessageAction('Error encountered when browsing.') - if (goal := state.get_current_user_intent()) is None: + goal, _ = state.get_current_user_intent() + + if goal is None: goal = state.inputs['task'] + system_msg = get_system_message( goal, self.action_space.describe(with_long_description=False, with_examples=True), ) - messages.append({'role': 'system', 'content': system_msg}) + messages.append(Message(role='system', content=[TextContent(text=system_msg)])) prompt = get_prompt(error_prefix, cur_axtree_txt, prev_action_str) - messages.append({'role': 'user', 'content': prompt}) + messages.append(Message(role='user', content=[TextContent(text=prompt)])) logger.debug(prompt) response = self.llm.completion( - messages=messages, + messages=[message.model_dump() for message in messages], temperature=0.0, stop=[')```', ')\n```'], ) return self.response_parser.parse(response) - - def search_memory(self, query: str) -> list[str]: - raise NotImplementedError('Implement this abstract method') diff --git a/agenthub/browsing_agent/prompt.py b/agenthub/browsing_agent/prompt.py index 0a40d97c76b..3dfd7f5022b 100644 --- a/agenthub/browsing_agent/prompt.py +++ b/agenthub/browsing_agent/prompt.py @@ -75,7 +75,8 @@ class PromptElement: Prompt elements are used to build the prompt. Use flags to control which prompt elements are visible. We use class attributes as a convenient way to implement static prompts, but feel free to override them with instance - attributes or @property decorator.""" + attributes or @property decorator. + """ _prompt = '' _abstract_ex = '' @@ -200,11 +201,10 @@ def fit_tokens( model_name : str, optional The name of the model used when tokenizing. - Returns + Returns: ------- str : the prompt after shrinking. """ - if max_prompt_chars is None: return shrinkable.prompt @@ -579,8 +579,8 @@ def _parse_answer(self, text_answer): def diff(previous, new): """Return a string showing the difference between original and new. - If the difference is above diff_threshold, return the diff string.""" - + If the difference is above diff_threshold, return the diff string. + """ if previous == new: return 'Identical', [] diff --git a/agenthub/browsing_agent/response_parser.py b/agenthub/browsing_agent/response_parser.py index a784d6668d7..9b64748880e 100644 --- a/agenthub/browsing_agent/response_parser.py +++ b/agenthub/browsing_agent/response_parser.py @@ -20,7 +20,10 @@ def parse(self, response: str) -> Action: return self.parse_action(action_str) def parse_response(self, response) -> str: - action_str = response['choices'][0]['message']['content'].strip() + action_str = response['choices'][0]['message']['content'] + if action_str is None: + return '' + action_str = action_str.strip() if not action_str.endswith('```'): action_str = action_str + ')```' logger.info(action_str) @@ -34,9 +37,8 @@ def parse_action(self, action_str: str) -> Action: class BrowsingActionParserMessage(ActionParser): - """ - Parser action: - - BrowseInteractiveAction(browser_actions) - unexpected response format, message back to user + """Parser action: + - BrowseInteractiveAction(browser_actions) - unexpected response format, message back to user """ def __init__( @@ -57,9 +59,8 @@ def parse(self, action_str: str) -> Action: class BrowsingActionParserBrowseInteractive(ActionParser): - """ - Parser action: - - BrowseInteractiveAction(browser_actions) - handle send message to user function call in BrowserGym + """Parser action: + - BrowseInteractiveAction(browser_actions) - handle send message to user function call in BrowserGym """ def __init__( diff --git a/agenthub/browsing_agent/utils.py b/agenthub/browsing_agent/utils.py index e98ae3c5ce2..8e67679966a 100644 --- a/agenthub/browsing_agent/utils.py +++ b/agenthub/browsing_agent/utils.py @@ -7,7 +7,6 @@ def yaml_parser(message): """Parse a yaml message for the retry function.""" - # saves gpt-3.5 from some yaml parsing errors message = re.sub(r':\s*\n(?=\S|\n)', ': ', message) @@ -47,7 +46,6 @@ def _compress_chunks(text, identifier, skip_list, split_regex='\n\n+'): def compress_string(text): """Compress a string by replacing redundant paragraphs and lines with identifiers.""" - # Perform paragraph-level compression def_dict, compressed_text = _compress_chunks( text, identifier='§', skip_list=[], split_regex='\n\n+' @@ -79,12 +77,12 @@ def extract_html_tags(text, keys): keys : list of str The HTML tags to extract the content from. - Returns + Returns: ------- dict A dictionary mapping each key to a list of subset in `text` that match the key. - Notes + Notes: ----- All text and keys will be converted to lowercase before matching. @@ -126,7 +124,7 @@ def parse_html_tags(text, keys=(), optional_keys=(), merge_multiple=False): optional_keys : list of str The HTML tags to extract the content from, but are optional. - Returns + Returns: ------- dict A dictionary mapping each key to subset of `text` that match the key. diff --git a/agenthub/codeact_agent/action_parser.py b/agenthub/codeact_agent/action_parser.py index f901b0f8ea1..2491efcb37c 100644 --- a/agenthub/codeact_agent/action_parser.py +++ b/agenthub/codeact_agent/action_parser.py @@ -12,13 +12,12 @@ class CodeActResponseParser(ResponseParser): - """ - Parser action: - - CmdRunAction(command) - bash command to run - - IPythonRunCellAction(code) - IPython code to run - - AgentDelegateAction(agent, inputs) - delegate action for (sub)task - - MessageAction(content) - Message action to run (e.g. ask for clarification) - - AgentFinishAction() - end the interaction + """Parser action: + - CmdRunAction(command) - bash command to run + - IPythonRunCellAction(code) - IPython code to run + - AgentDelegateAction(agent, inputs) - delegate action for (sub)task + - MessageAction(content) - Message action to run (e.g. ask for clarification) + - AgentFinishAction() - end the interaction """ def __init__(self): @@ -38,6 +37,8 @@ def parse(self, response) -> Action: def parse_response(self, response) -> str: action = response.choices[0].message.content + if action is None: + return '' for lang in ['bash', 'ipython', 'browse']: if f'' in action and f'' not in action: action += f'' @@ -51,9 +52,8 @@ def parse_action(self, action_str: str) -> Action: class CodeActActionParserFinish(ActionParser): - """ - Parser action: - - AgentFinishAction() - end the interaction + """Parser action: + - AgentFinishAction() - end the interaction """ def __init__( @@ -74,10 +74,9 @@ def parse(self, action_str: str) -> Action: class CodeActActionParserCmdRun(ActionParser): - """ - Parser action: - - CmdRunAction(command) - bash command to run - - AgentFinishAction() - end the interaction + """Parser action: + - CmdRunAction(command) - bash command to run + - AgentFinishAction() - end the interaction """ def __init__( @@ -99,14 +98,13 @@ def parse(self, action_str: str) -> Action: # a command was found command_group = self.bash_command.group(1).strip() if command_group.strip() == 'exit': - return AgentFinishAction() + return AgentFinishAction(thought=thought) return CmdRunAction(command=command_group, thought=thought) class CodeActActionParserIPythonRunCell(ActionParser): - """ - Parser action: - - IPythonRunCellAction(code) - IPython code to run + """Parser action: + - IPythonRunCellAction(code) - IPython code to run """ def __init__( @@ -135,9 +133,8 @@ def parse(self, action_str: str) -> Action: class CodeActActionParserAgentDelegate(ActionParser): - """ - Parser action: - - AgentDelegateAction(agent, inputs) - delegate action for (sub)task + """Parser action: + - AgentDelegateAction(agent, inputs) - delegate action for (sub)task """ def __init__( @@ -162,9 +159,8 @@ def parse(self, action_str: str) -> Action: class CodeActActionParserMessage(ActionParser): - """ - Parser action: - - MessageAction(content) - Message action to run (e.g. ask for clarification) + """Parser action: + - MessageAction(content) - Message action to run (e.g. ask for clarification) """ def __init__( diff --git a/agenthub/codeact_agent/codeact_agent.py b/agenthub/codeact_agent/codeact_agent.py index 0b7114ade6d..559e53641f7 100644 --- a/agenthub/codeact_agent/codeact_agent.py +++ b/agenthub/codeact_agent/codeact_agent.py @@ -8,7 +8,7 @@ ) from opendevin.controller.agent import Agent from opendevin.controller.state.state import State -from opendevin.core.config import config +from opendevin.core.message import ImageContent, Message, TextContent from opendevin.events.action import ( Action, AgentDelegateAction, @@ -22,6 +22,8 @@ CmdOutputObservation, IPythonRunCellObservation, ) +from opendevin.events.observation.error import ErrorObservation +from opendevin.events.observation.observation import Observation from opendevin.events.serialization.event import truncate_content from opendevin.llm.llm import LLM from opendevin.runtime.plugins import ( @@ -34,62 +36,6 @@ ENABLE_GITHUB = True -def action_to_str(action: Action) -> str: - if isinstance(action, CmdRunAction): - return f'{action.thought}\n\n{action.command}\n' - elif isinstance(action, IPythonRunCellAction): - return f'{action.thought}\n\n{action.code}\n' - elif isinstance(action, AgentDelegateAction): - return f'{action.thought}\n\n{action.inputs["task"]}\n' - elif isinstance(action, MessageAction): - return action.content - return '' - - -def get_action_message(action: Action) -> dict[str, str] | None: - if ( - isinstance(action, AgentDelegateAction) - or isinstance(action, CmdRunAction) - or isinstance(action, IPythonRunCellAction) - or isinstance(action, MessageAction) - ): - return { - 'role': 'user' if action.source == 'user' else 'assistant', - 'content': action_to_str(action), - } - return None - - -def get_observation_message(obs) -> dict[str, str] | None: - max_message_chars = config.get_llm_config_from_agent( - 'CodeActAgent' - ).max_message_chars - if isinstance(obs, CmdOutputObservation): - content = 'OBSERVATION:\n' + truncate_content(obs.content, max_message_chars) - content += ( - f'\n[Command {obs.command_id} finished with exit code {obs.exit_code}]' - ) - return {'role': 'user', 'content': content} - elif isinstance(obs, IPythonRunCellObservation): - content = 'OBSERVATION:\n' + obs.content - # replace base64 images with a placeholder - splitted = content.split('\n') - for i, line in enumerate(splitted): - if '![image](data:image/png;base64,' in line: - splitted[i] = ( - '![image](data:image/png;base64, ...) already displayed to user' - ) - content = '\n'.join(splitted) - content = truncate_content(content, max_message_chars) - return {'role': 'user', 'content': content} - elif isinstance(obs, AgentDelegateObservation): - content = 'OBSERVATION:\n' + truncate_content( - str(obs.outputs), max_message_chars - ) - return {'role': 'user', 'content': content} - return None - - # FIXME: We can tweak these two settings to create MicroAgents specialized toward different area def get_system_message() -> str: if ENABLE_GITHUB: @@ -103,14 +49,14 @@ def get_in_context_example() -> str: class CodeActAgent(Agent): - VERSION = '1.7' + VERSION = '1.8' """ The Code Act Agent is a minimalist agent. The agent works by passing the model a list of action-observation pairs and prompting the model to take the next step. ### Overview - This agent implements the CodeAct idea ([paper](https://arxiv.org/abs/2402.13463), [tweet](https://twitter.com/xingyaow_/status/1754556835703751087)) that consolidates LLM agents’ **act**ions into a unified **code** action space for both *simplicity* and *performance* (see paper for more details). + This agent implements the CodeAct idea ([paper](https://arxiv.org/abs/2402.01030), [tweet](https://twitter.com/xingyaow_/status/1754556835703751087)) that consolidates LLM agents’ **act**ions into a unified **code** action space for both *simplicity* and *performance* (see paper for more details). The conceptual idea is illustrated below. At each turn, the agent can: @@ -158,8 +104,7 @@ def __init__( self, llm: LLM, ) -> None: - """ - Initializes a new instance of the CodeActAgent class. + """Initializes a new instance of the CodeActAgent class. Parameters: - llm (LLM): The llm to be used by this agent @@ -167,15 +112,79 @@ def __init__( super().__init__(llm) self.reset() + def action_to_str(self, action: Action) -> str: + if isinstance(action, CmdRunAction): + return ( + f'{action.thought}\n\n{action.command}\n' + ) + elif isinstance(action, IPythonRunCellAction): + return f'{action.thought}\n\n{action.code}\n' + elif isinstance(action, AgentDelegateAction): + return f'{action.thought}\n\n{action.inputs["task"]}\n' + elif isinstance(action, MessageAction): + return action.content + elif isinstance(action, AgentFinishAction) and action.source == 'agent': + return action.thought + return '' + + def get_action_message(self, action: Action) -> Message | None: + if ( + isinstance(action, AgentDelegateAction) + or isinstance(action, CmdRunAction) + or isinstance(action, IPythonRunCellAction) + or isinstance(action, MessageAction) + or (isinstance(action, AgentFinishAction) and action.source == 'agent') + ): + content = [TextContent(text=self.action_to_str(action))] + + if isinstance(action, MessageAction) and action.images_urls: + content.append(ImageContent(image_urls=action.images_urls)) + + return Message( + role='user' if action.source == 'user' else 'assistant', content=content + ) + return None + + def get_observation_message(self, obs: Observation) -> Message | None: + max_message_chars = self.llm.config.max_message_chars + if isinstance(obs, CmdOutputObservation): + text = 'OBSERVATION:\n' + truncate_content(obs.content, max_message_chars) + text += ( + f'\n[Command {obs.command_id} finished with exit code {obs.exit_code}]' + ) + return Message(role='user', content=[TextContent(text=text)]) + elif isinstance(obs, IPythonRunCellObservation): + text = 'OBSERVATION:\n' + obs.content + # replace base64 images with a placeholder + splitted = text.split('\n') + for i, line in enumerate(splitted): + if '![image](data:image/png;base64,' in line: + splitted[i] = ( + '![image](data:image/png;base64, ...) already displayed to user' + ) + text = '\n'.join(splitted) + text = truncate_content(text, max_message_chars) + return Message(role='user', content=[TextContent(text=text)]) + elif isinstance(obs, AgentDelegateObservation): + text = 'OBSERVATION:\n' + truncate_content( + str(obs.outputs), max_message_chars + ) + return Message(role='user', content=[TextContent(text=text)]) + elif isinstance(obs, ErrorObservation): + text = 'OBSERVATION:\n' + truncate_content(obs.content, max_message_chars) + text += '\n[Error occurred in processing last action]' + return Message(role='user', content=[TextContent(text=text)]) + else: + # If an observation message is not returned, it will cause an error + # when the LLM tries to return the next message + raise ValueError(f'Unknown observation type: {type(obs)}') + def reset(self) -> None: - """ - Resets the CodeAct Agent. - """ + """Resets the CodeAct Agent.""" super().reset() def step(self, state: State) -> Action: - """ - Performs one step using the CodeAct Agent. + """Performs one step using the CodeAct Agent. This includes gathering info on previous steps and prompting the model to make a command to execute. Parameters: @@ -188,17 +197,16 @@ def step(self, state: State) -> Action: - MessageAction(content) - Message action to run (e.g. ask for clarification) - AgentFinishAction() - end the interaction """ - # if we're done, go back latest_user_message = state.history.get_last_user_message() if latest_user_message and latest_user_message.strip() == '/exit': return AgentFinishAction() # prepare what we want to send to the LLM - messages: list[dict[str, str]] = self._get_messages(state) + messages = self._get_messages(state) response = self.llm.completion( - messages=messages, + messages=[message.model_dump() for message in messages], stop=[ '', '', @@ -208,37 +216,61 @@ def step(self, state: State) -> Action: ) return self.action_parser.parse(response) - def search_memory(self, query: str) -> list[str]: - raise NotImplementedError('Implement this abstract method') - - def _get_messages(self, state: State) -> list[dict[str, str]]: - messages = [ - {'role': 'system', 'content': self.system_message}, - {'role': 'user', 'content': self.in_context_example}, + def _get_messages(self, state: State) -> list[Message]: + messages: list[Message] = [ + Message(role='system', content=[TextContent(text=self.system_message)]), + Message(role='user', content=[TextContent(text=self.in_context_example)]), ] for event in state.history.get_events(): # create a regular message from an event - message = ( - get_action_message(event) - if isinstance(event, Action) - else get_observation_message(event) - ) + if isinstance(event, Action): + message = self.get_action_message(event) + elif isinstance(event, Observation): + message = self.get_observation_message(event) + else: + raise ValueError(f'Unknown event type: {type(event)}') # add regular message if message: - messages.append(message) + # handle error if the message is the SAME role as the previous message + # litellm.exceptions.BadRequestError: litellm.BadRequestError: OpenAIException - Error code: 400 - {'detail': 'Only supports u/a/u/a/u...'} + # there should not have two consecutive messages from the same role + if messages and messages[-1].role == message.role: + messages[-1].content.extend(message.content) + else: + messages.append(message) # the latest user message is important: # we want to remind the agent of the environment constraints latest_user_message = next( - (m for m in reversed(messages) if m['role'] == 'user'), None + ( + m + for m in reversed(messages) + if m.role == 'user' + and any(isinstance(c, TextContent) for c in m.content) + ), + None, ) - # add a reminder to the prompt + # Get the last user text inside content if latest_user_message: - latest_user_message['content'] += ( - f'\n\nENVIRONMENT REMINDER: You have {state.max_iterations - state.iteration} turns left to complete the task. When finished reply with ' + latest_user_message_text = next( + ( + t + for t in reversed(latest_user_message.content) + if isinstance(t, TextContent) + ) ) + # add a reminder to the prompt + reminder_text = f'\n\nENVIRONMENT REMINDER: You have {state.max_iterations - state.iteration} turns left to complete the task. When finished reply with .' + + if latest_user_message_text: + latest_user_message_text.text = ( + latest_user_message_text.text + reminder_text + ) + else: + latest_user_message_text = TextContent(text=reminder_text) + latest_user_message.content.append(latest_user_message_text) return messages diff --git a/agenthub/codeact_agent/prompt.py b/agenthub/codeact_agent/prompt.py index 7737f8cabfe..0ba804ccb2e 100644 --- a/agenthub/codeact_agent/prompt.py +++ b/agenthub/codeact_agent/prompt.py @@ -5,7 +5,7 @@ COMMAND_DOCS = ( '\nApart from the standard Python library, the assistant can also use the following functions (already imported) in environment:\n' f'{_AGENT_SKILLS_DOCS}' - "Please note that THE `edit_file` and `insert_content_at_line` FUNCTIONS REQUIRE PROPER INDENTATION. If the assistant would like to add the line ' print(x)', it must fully write that out, with all those spaces before the code! Indentation is important and code that is not indented correctly will fail and require fixing before it can be run." + "Please note that THE `edit_file_by_replace`, `append_file` and `insert_content_at_line` FUNCTIONS REQUIRE PROPER INDENTATION. If the assistant would like to add the line ' print(x)', it must fully write that out, with all those spaces before the code! Indentation is important and code that is not indented correctly will fail and require fixing before it can be run." ) # ======= SYSTEM MESSAGE ======= @@ -60,7 +60,9 @@ USER: OBSERVATION: [File: /workspace/app.py (1 lines total)] +(this is the beginning of the file) 1| +(this is the end of the file) [File app.py created.] ASSISTANT: @@ -86,6 +88,7 @@ def index(): USER: OBSERVATION: +(this is the beginning of the file) 1|from flask import Flask 2|app = Flask(__name__) 3| @@ -96,8 +99,8 @@ def index(): 8| 9|if __name__ == '__main__': 10| app.run(port=5000) -11| -[File updated. Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary.] +(this is the end of the file) +[File updated (edited at line 1). Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary.] ASSISTANT: I have created a Python file `app.py` that will display a list of numbers from 1 to 10 when you run it. Let me run the Python file for you: @@ -206,7 +209,8 @@ def index(): USER: -[File: /workspace/app.py (11 lines total)] +[File: /workspace/app.py (10 lines total)] +(this is the beginning of the file) 1|from flask import Flask 2|app = Flask(__name__) 3| @@ -217,12 +221,12 @@ def index(): 8| 9|if __name__ == '__main__': 10| app.run(port=5000) -11| +(this is the end of the file) ASSISTANT: I should edit the file to display the numbers in a table format. I should include correct indentation. Let me update the file: -edit_file( +edit_file_by_replace( 'app.py', to_replace=" return str(numbers)", new_content=" return '' + ''.join([f'' for i in numbers]) + '
{i}
'", @@ -231,7 +235,8 @@ def index(): USER: Observation: -[File: /workspace/app.py (11 lines total after edit)] +[File: /workspace/app.py (10 lines total after edit)] +(this is the beginning of the file) 1|from flask import Flask 2|app = Flask(__name__) 3| @@ -242,8 +247,8 @@ def index(): 8| 9|if __name__ == '__main__': 10| app.run(port=5000) -11| -[File updated. Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary.] +(this is the end of the file) +[File updated (edited at line 7). Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary.] ASSISTANT: Running the updated file: diff --git a/agenthub/codeact_swe_agent/action_parser.py b/agenthub/codeact_swe_agent/action_parser.py index 5e57f66b3ad..d51a6c6ec12 100644 --- a/agenthub/codeact_swe_agent/action_parser.py +++ b/agenthub/codeact_swe_agent/action_parser.py @@ -11,9 +11,8 @@ class CodeActSWEActionParserFinish(ActionParser): - """ - Parser action: - - AgentFinishAction() - end the interaction + """Parser action: + - AgentFinishAction() - end the interaction """ def __init__( @@ -34,10 +33,9 @@ def parse(self, action_str: str) -> Action: class CodeActSWEActionParserCmdRun(ActionParser): - """ - Parser action: - - CmdRunAction(command) - bash command to run - - AgentFinishAction() - end the interaction + """Parser action: + - CmdRunAction(command) - bash command to run + - AgentFinishAction() - end the interaction """ def __init__( @@ -64,9 +62,8 @@ def parse(self, action_str: str) -> Action: class CodeActSWEActionParserIPythonRunCell(ActionParser): - """ - Parser action: - - IPythonRunCellAction(code) - IPython code to run + """Parser action: + - IPythonRunCellAction(code) - IPython code to run """ def __init__( @@ -95,9 +92,8 @@ def parse(self, action_str: str) -> Action: class CodeActSWEActionParserMessage(ActionParser): - """ - Parser action: - - MessageAction(content) - Message action to run (e.g. ask for clarification) + """Parser action: + - MessageAction(content) - Message action to run (e.g. ask for clarification) """ def __init__( diff --git a/agenthub/codeact_swe_agent/codeact_swe_agent.py b/agenthub/codeact_swe_agent/codeact_swe_agent.py index c18041fdcd9..906bf258c3a 100644 --- a/agenthub/codeact_swe_agent/codeact_swe_agent.py +++ b/agenthub/codeact_swe_agent/codeact_swe_agent.py @@ -7,7 +7,7 @@ from agenthub.codeact_swe_agent.response_parser import CodeActSWEResponseParser from opendevin.controller.agent import Agent from opendevin.controller.state.state import State -from opendevin.core.config import config +from opendevin.core.message import ImageContent, Message, TextContent from opendevin.events.action import ( Action, AgentFinishAction, @@ -19,6 +19,8 @@ CmdOutputObservation, IPythonRunCellObservation, ) +from opendevin.events.observation.error import ErrorObservation +from opendevin.events.observation.observation import Observation from opendevin.events.serialization.event import truncate_content from opendevin.llm.llm import LLM from opendevin.runtime.plugins import ( @@ -29,54 +31,6 @@ from opendevin.runtime.tools import RuntimeTool -def action_to_str(action: Action) -> str: - if isinstance(action, CmdRunAction): - return f'{action.thought}\n\n{action.command}\n' - elif isinstance(action, IPythonRunCellAction): - return f'{action.thought}\n\n{action.code}\n' - elif isinstance(action, MessageAction): - return action.content - return '' - - -def get_action_message(action: Action) -> dict[str, str] | None: - if ( - isinstance(action, CmdRunAction) - or isinstance(action, IPythonRunCellAction) - or isinstance(action, MessageAction) - ): - return { - 'role': 'user' if action.source == 'user' else 'assistant', - 'content': action_to_str(action), - } - return None - - -def get_observation_message(obs) -> dict[str, str] | None: - max_message_chars = config.get_llm_config_from_agent( - 'CodeActSWEAgent' - ).max_message_chars - if isinstance(obs, CmdOutputObservation): - content = 'OBSERVATION:\n' + truncate_content(obs.content, max_message_chars) - content += ( - f'\n[Command {obs.command_id} finished with exit code {obs.exit_code}]' - ) - return {'role': 'user', 'content': content} - elif isinstance(obs, IPythonRunCellObservation): - content = 'OBSERVATION:\n' + obs.content - # replace base64 images with a placeholder - splitted = content.split('\n') - for i, line in enumerate(splitted): - if '![image](data:image/png;base64,' in line: - splitted[i] = ( - '![image](data:image/png;base64, ...) already displayed to user' - ) - content = '\n'.join(splitted) - content = truncate_content(content, max_message_chars) - return {'role': 'user', 'content': content} - return None - - def get_system_message() -> str: return f'{SYSTEM_PREFIX}\n\n{COMMAND_DOCS}\n\n{SYSTEM_SUFFIX}' @@ -113,8 +67,7 @@ def __init__( self, llm: LLM, ) -> None: - """ - Initializes a new instance of the CodeActAgent class. + """Initializes a new instance of the CodeActSWEAgent class. Parameters: - llm (LLM): The llm to be used by this agent @@ -122,15 +75,69 @@ def __init__( super().__init__(llm) self.reset() + def action_to_str(self, action: Action) -> str: + if isinstance(action, CmdRunAction): + return ( + f'{action.thought}\n\n{action.command}\n' + ) + elif isinstance(action, IPythonRunCellAction): + return f'{action.thought}\n\n{action.code}\n' + elif isinstance(action, MessageAction): + return action.content + return '' + + def get_action_message(self, action: Action) -> Message | None: + if ( + isinstance(action, CmdRunAction) + or isinstance(action, IPythonRunCellAction) + or isinstance(action, MessageAction) + ): + content = [TextContent(text=self.action_to_str(action))] + + if isinstance(action, MessageAction) and action.images_urls: + content.append(ImageContent(image_urls=action.images_urls)) + + return Message( + role='user' if action.source == 'user' else 'assistant', content=content + ) + + return None + + def get_observation_message(self, obs: Observation) -> Message | None: + max_message_chars = self.llm.config.max_message_chars + if isinstance(obs, CmdOutputObservation): + text = 'OBSERVATION:\n' + truncate_content(obs.content, max_message_chars) + text += ( + f'\n[Command {obs.command_id} finished with exit code {obs.exit_code}]' + ) + return Message(role='user', content=[TextContent(text=text)]) + elif isinstance(obs, IPythonRunCellObservation): + text = 'OBSERVATION:\n' + obs.content + # replace base64 images with a placeholder + splitted = text.split('\n') + for i, line in enumerate(splitted): + if '![image](data:image/png;base64,' in line: + splitted[i] = ( + '![image](data:image/png;base64, ...) already displayed to user' + ) + text = '\n'.join(splitted) + text = truncate_content(text, max_message_chars) + return Message(role='user', content=[TextContent(text=text)]) + elif isinstance(obs, ErrorObservation): + text = 'OBSERVATION:\n' + truncate_content(obs.content, max_message_chars) + text += '\n[Error occurred in processing last action]' + return Message(role='user', content=[TextContent(text=text)]) + else: + # If an observation message is not returned, it will cause an error + # when the LLM tries to return the next message + raise ValueError(f'Unknown observation type: {type(obs)}') + def reset(self) -> None: - """ - Resets the CodeAct Agent. - """ + """Resets the CodeAct Agent.""" super().reset() def step(self, state: State) -> Action: - """ - Performs one step using the CodeAct Agent. + """Performs one step using the CodeAct Agent. This includes gathering info on previous steps and prompting the model to make a command to execute. Parameters: @@ -142,17 +149,16 @@ def step(self, state: State) -> Action: - MessageAction(content) - Message action to run (e.g. ask for clarification) - AgentFinishAction() - end the interaction """ - # if we're done, go back latest_user_message = state.history.get_last_user_message() if latest_user_message and latest_user_message.strip() == '/exit': return AgentFinishAction() # prepare what we want to send to the LLM - messages: list[dict[str, str]] = self._get_messages(state) + messages: list[Message] = self._get_messages(state) response = self.llm.completion( - messages=messages, + messages=[message.model_dump() for message in messages], stop=[ '
', '', @@ -162,37 +168,55 @@ def step(self, state: State) -> Action: return self.response_parser.parse(response) - def search_memory(self, query: str) -> list[str]: - raise NotImplementedError('Implement this abstract method') - - def _get_messages(self, state: State) -> list[dict[str, str]]: - messages = [ - {'role': 'system', 'content': self.system_message}, - {'role': 'user', 'content': self.in_context_example}, + def _get_messages(self, state: State) -> list[Message]: + messages: list[Message] = [ + Message(role='system', content=[TextContent(text=self.system_message)]), + Message(role='user', content=[TextContent(text=self.in_context_example)]), ] for event in state.history.get_events(): # create a regular message from an event - message = ( - get_action_message(event) - if isinstance(event, Action) - else get_observation_message(event) - ) + if isinstance(event, Action): + message = self.get_action_message(event) + elif isinstance(event, Observation): + message = self.get_observation_message(event) + else: + raise ValueError(f'Unknown event type: {type(event)}') # add regular message if message: - messages.append(message) + # handle error if the message is the SAME role as the previous message + # litellm.exceptions.BadRequestError: litellm.BadRequestError: OpenAIException - Error code: 400 - {'detail': 'Only supports u/a/u/a/u...'} + # there should not have two consecutive messages from the same role + if messages and messages[-1].role == message.role: + messages[-1].content.extend(message.content) + else: + messages.append(message) # the latest user message is important: # we want to remind the agent of the environment constraints latest_user_message = next( - (m for m in reversed(messages) if m['role'] == 'user'), None + (m for m in reversed(messages) if m.role == 'user'), None ) - # add a reminder to the prompt + # Get the last user text inside content if latest_user_message: - latest_user_message['content'] += ( - f'\n\nENVIRONMENT REMINDER: You have {state.max_iterations - state.iteration} turns left to complete the task.' + latest_user_message_text = next( + ( + t + for t in reversed(latest_user_message.content) + if isinstance(t, TextContent) + ) ) + # add a reminder to the prompt + reminder_text = f'\n\nENVIRONMENT REMINDER: You have {state.max_iterations - state.iteration} turns left to complete the task. When finished reply with .' + + if latest_user_message_text: + latest_user_message_text.text = ( + latest_user_message_text.text + reminder_text + ) + else: + latest_user_message_text = TextContent(text=reminder_text) + latest_user_message.content.append(latest_user_message_text) return messages diff --git a/agenthub/codeact_swe_agent/response_parser.py b/agenthub/codeact_swe_agent/response_parser.py index c9661d18095..e8e8d34e9e5 100644 --- a/agenthub/codeact_swe_agent/response_parser.py +++ b/agenthub/codeact_swe_agent/response_parser.py @@ -9,12 +9,11 @@ class CodeActSWEResponseParser(ResponseParser): - """ - Parser action: - - CmdRunAction(command) - bash command to run - - IPythonRunCellAction(code) - IPython code to run - - MessageAction(content) - Message action to run (e.g. ask for clarification) - - AgentFinishAction() - end the interaction + """Parser action: + - CmdRunAction(command) - bash command to run + - IPythonRunCellAction(code) - IPython code to run + - MessageAction(content) - Message action to run (e.g. ask for clarification) + - AgentFinishAction() - end the interaction """ def __init__(self): @@ -33,6 +32,8 @@ def parse(self, response: str) -> Action: def parse_response(self, response) -> str: action = response.choices[0].message.content + if action is None: + return '' for lang in ['bash', 'ipython']: if f'' in action and f'' not in action: action += f'' diff --git a/agenthub/delegator_agent/agent.py b/agenthub/delegator_agent/agent.py index f97c5e5096c..1e49603bf36 100644 --- a/agenthub/delegator_agent/agent.py +++ b/agenthub/delegator_agent/agent.py @@ -14,8 +14,7 @@ class DelegatorAgent(Agent): current_delegate: str = '' def __init__(self, llm: LLM): - """ - Initialize the Delegator Agent with an LLM + """Initialize the Delegator Agent with an LLM Parameters: - llm (LLM): The llm to be used by this agent @@ -23,8 +22,7 @@ def __init__(self, llm: LLM): super().__init__(llm) def step(self, state: State) -> Action: - """ - Checks to see if current step is completed, returns AgentFinishAction if True. + """Checks to see if current step is completed, returns AgentFinishAction if True. Otherwise, delegates the task to the next agent in the pipeline. Parameters: @@ -36,7 +34,7 @@ def step(self, state: State) -> Action: """ if self.current_delegate == '': self.current_delegate = 'study' - task = state.get_current_user_intent() + task, _ = state.get_current_user_intent() return AgentDelegateAction( agent='StudyRepoForTaskAgent', inputs={'task': task} ) @@ -47,7 +45,7 @@ def step(self, state: State) -> Action: if not isinstance(last_observation, AgentDelegateObservation): raise Exception('Last observation is not an AgentDelegateObservation') - goal = state.get_current_user_intent() + goal, _ = state.get_current_user_intent() if self.current_delegate == 'study': self.current_delegate = 'coder' return AgentDelegateAction( @@ -82,6 +80,3 @@ def step(self, state: State) -> Action: ) else: raise Exception('Invalid delegate state') - - def search_memory(self, query: str) -> list[str]: - return [] diff --git a/agenthub/dummy_agent/agent.py b/agenthub/dummy_agent/agent.py index ea72e86e3ce..f0fa8de9b4e 100644 --- a/agenthub/dummy_agent/agent.py +++ b/agenthub/dummy_agent/agent.py @@ -1,13 +1,12 @@ -import time -from typing import TypedDict +from typing import TypedDict, Union from opendevin.controller.agent import Agent from opendevin.controller.state.state import State +from opendevin.core.schema import AgentState from opendevin.events.action import ( Action, AddTaskAction, AgentFinishAction, - AgentRecallAction, AgentRejectAction, BrowseInteractiveAction, BrowseURLAction, @@ -18,7 +17,7 @@ ModifyTaskAction, ) from opendevin.events.observation import ( - AgentRecallObservation, + AgentStateChangedObservation, CmdOutputObservation, FileReadObservation, FileWriteObservation, @@ -50,32 +49,40 @@ def __init__(self, llm: LLM): super().__init__(llm) self.steps: list[ActionObs] = [ { - 'action': AddTaskAction(parent='0', goal='check the current directory'), - 'observations': [NullObservation('')], + 'action': AddTaskAction( + parent='None', goal='check the current directory' + ), + 'observations': [], }, { - 'action': AddTaskAction(parent='0.0', goal='run ls'), - 'observations': [NullObservation('')], + 'action': AddTaskAction(parent='0', goal='run ls'), + 'observations': [], }, { - 'action': ModifyTaskAction(task_id='0.0', state='in_progress'), - 'observations': [NullObservation('')], + 'action': ModifyTaskAction(task_id='0', state='in_progress'), + 'observations': [], }, { 'action': MessageAction('Time to get started!'), - 'observations': [NullObservation('')], + 'observations': [], }, { 'action': CmdRunAction(command='echo "foo"'), 'observations': [ - CmdOutputObservation('foo', command_id=-1, command='echo "foo"') + CmdOutputObservation( + 'foo', command_id=-1, command='echo "foo"', exit_code=0 + ) ], }, { 'action': FileWriteAction( content='echo "Hello, World!"', path='hello.sh' ), - 'observations': [FileWriteObservation('', path='hello.sh')], + 'observations': [ + FileWriteObservation( + content='echo "Hello, World!"', path='hello.sh' + ) + ], }, { 'action': FileReadAction(path='hello.sh'), @@ -87,20 +94,17 @@ def __init__(self, llm: LLM): 'action': CmdRunAction(command='bash hello.sh'), 'observations': [ CmdOutputObservation( - 'Hello, World!', command_id=-1, command='bash hello.sh' + 'bash: hello.sh: No such file or directory', + command_id=-1, + command='bash workspace/hello.sh', + exit_code=127, ) ], }, - { - 'action': AgentRecallAction(query='who am I?'), - 'observations': [ - AgentRecallObservation('', memories=['I am a computer.']), - ], - }, { 'action': BrowseURLAction(url='https://google.com'), 'observations': [ - # BrowserOutputObservation('', url='https://google.com', screenshot=""), + # BrowserOutputObservation('Simulated Google page',url='https://google.com',screenshot=''), ], }, { @@ -108,50 +112,99 @@ def __init__(self, llm: LLM): browser_actions='goto("https://google.com")' ), 'observations': [ - # BrowserOutputObservation('', url='https://google.com', screenshot=""), + # BrowserOutputObservation('Simulated Google page after interaction',url='https://google.com',screenshot=''), ], }, { - 'action': AgentFinishAction(), - 'observations': [], + 'action': AgentRejectAction(), + 'observations': [NullObservation('')], }, { - 'action': AgentRejectAction(), - 'observations': [], + 'action': AgentFinishAction( + outputs={}, thought='Task completed', action='finish' + ), + 'observations': [AgentStateChangedObservation('', AgentState.FINISHED)], }, ] def step(self, state: State) -> Action: - time.sleep(0.1) + if state.iteration >= len(self.steps): + return AgentFinishAction() + + current_step = self.steps[state.iteration] + action = current_step['action'] + + # If the action is AddTaskAction or ModifyTaskAction, update the parent ID or task_id + if isinstance(action, AddTaskAction): + if action.parent == 'None': + action.parent = '' # Root task has no parent + elif action.parent == '0': + action.parent = state.root_task.id + elif action.parent.startswith('0.'): + action.parent = f'{state.root_task.id}{action.parent[1:]}' + elif isinstance(action, ModifyTaskAction): + if action.task_id == '0': + action.task_id = state.root_task.id + elif action.task_id.startswith('0.'): + action.task_id = f'{state.root_task.id}{action.task_id[1:]}' + # Ensure the task_id doesn't start with a dot + if action.task_id.startswith('.'): + action.task_id = action.task_id[1:] + elif isinstance(action, (BrowseURLAction, BrowseInteractiveAction)): + try: + return self.simulate_browser_action(action) + except ( + Exception + ): # This could be a specific exception for browser unavailability + return self.handle_browser_unavailable(action) + if state.iteration > 0: prev_step = self.steps[state.iteration - 1] - # a step is (action, observations list) - if 'observations' in prev_step: - # one obs, at most + if 'observations' in prev_step and prev_step['observations']: expected_observations = prev_step['observations'] - - # check if the history matches the expected observations hist_events = state.history.get_last_events(len(expected_observations)) - for i in range(len(expected_observations)): + + if len(hist_events) < len(expected_observations): + print( + f'Warning: Expected {len(expected_observations)} observations, but got {len(hist_events)}' + ) + + for i in range(min(len(expected_observations), len(hist_events))): hist_obs = event_to_dict(hist_events[i]) expected_obs = event_to_dict(expected_observations[i]) - if ( - 'command_id' in hist_obs['extras'] - and hist_obs['extras']['command_id'] != -1 - ): - del hist_obs['extras']['command_id'] - hist_obs['content'] = '' - if ( - 'command_id' in expected_obs['extras'] - and expected_obs['extras']['command_id'] != -1 - ): - del expected_obs['extras']['command_id'] - expected_obs['content'] = '' - assert ( - hist_obs == expected_obs - ), f'Expected observation {expected_obs}, got {hist_obs}' - return self.steps[state.iteration]['action'] - - def search_memory(self, query: str) -> list[str]: - return ['I am a computer.'] + + # Remove dynamic fields for comparison + for obs in [hist_obs, expected_obs]: + obs.pop('id', None) + obs.pop('timestamp', None) + obs.pop('cause', None) + obs.pop('source', None) + if 'extras' in obs: + obs['extras'].pop('command_id', None) + + if hist_obs != expected_obs: + print( + f'Warning: Observation mismatch. Expected {expected_obs}, got {hist_obs}' + ) + + return action + + def simulate_browser_action( + self, action: Union[BrowseURLAction, BrowseInteractiveAction] + ) -> Action: + # Instead of simulating, we'll reject the browser action + return self.handle_browser_unavailable(action) + + def handle_browser_unavailable( + self, action: Union[BrowseURLAction, BrowseInteractiveAction] + ) -> Action: + # Create a message action to inform that browsing is not available + message = 'Browser actions are not available in the DummyAgent environment.' + if isinstance(action, BrowseURLAction): + message += f' Unable to browse URL: {action.url}' + elif isinstance(action, BrowseInteractiveAction): + message += ( + f' Unable to perform interactive browsing: {action.browser_actions}' + ) + return MessageAction(content=message) diff --git a/agenthub/micro/agent.py b/agenthub/micro/agent.py index d4ed2ce8a38..0d305f70ec8 100644 --- a/agenthub/micro/agent.py +++ b/agenthub/micro/agent.py @@ -2,7 +2,7 @@ from opendevin.controller.agent import Agent from opendevin.controller.state.state import State -from opendevin.core.config import config +from opendevin.core.message import ImageContent, Message, TextContent from opendevin.core.utils import json from opendevin.events.action import Action from opendevin.events.serialization.action import action_from_dict @@ -23,40 +23,37 @@ def parse_response(orig_response: str) -> Action: def to_json(obj, **kwargs): - """ - Serialize an object to str format - """ + """Serialize an object to str format""" return json.dumps(obj, **kwargs) -def history_to_json(history: ShortTermHistory, max_events=20, **kwargs): - """ - Serialize and simplify history to str format - """ - # TODO: get agent specific llm config - llm_config = config.get_llm_config() - max_message_chars = llm_config.max_message_chars - - processed_history = [] - event_count = 0 - - for event in history.get_events(reverse=True): - if event_count >= max_events: - break - processed_history.append(event_to_memory(event, max_message_chars)) - event_count += 1 - - # history is in reverse order, let's fix it - processed_history.reverse() - - return json.dumps(processed_history, **kwargs) - - class MicroAgent(Agent): VERSION = '1.0' prompt = '' agent_definition: dict = {} + def history_to_json( + self, history: ShortTermHistory, max_events: int = 20, **kwargs + ): + """ + Serialize and simplify history to str format + """ + processed_history = [] + event_count = 0 + + for event in history.get_events(reverse=True): + if event_count >= max_events: + break + processed_history.append( + event_to_memory(event, self.llm.config.max_message_chars) + ) + event_count += 1 + + # history is in reverse order, let's fix it + processed_history.reverse() + + return json.dumps(processed_history, **kwargs) + def __init__(self, llm: LLM): super().__init__(llm) if 'name' not in self.agent_definition: @@ -66,19 +63,20 @@ def __init__(self, llm: LLM): del self.delegates[self.agent_definition['name']] def step(self, state: State) -> Action: + last_user_message, last_image_urls = state.get_current_user_intent() prompt = self.prompt_template.render( state=state, instructions=instructions, to_json=to_json, - history_to_json=history_to_json, + history_to_json=self.history_to_json, delegates=self.delegates, - latest_user_message=state.get_current_user_intent(), + latest_user_message=last_user_message, ) - messages = [{'content': prompt, 'role': 'user'}] - resp = self.llm.completion(messages=messages) + content = [TextContent(text=prompt)] + if last_image_urls: + content.append(ImageContent(image_urls=last_image_urls)) + message = Message(role='user', content=content) + resp = self.llm.completion(messages=[message.model_dump()]) action_resp = resp['choices'][0]['message']['content'] action = parse_response(action_resp) return action - - def search_memory(self, query: str) -> list[str]: - return [] diff --git a/agenthub/micro/commit_writer/README.md b/agenthub/micro/commit_writer/README.md index 927bc67da28..f82484b91d9 100644 --- a/agenthub/micro/commit_writer/README.md +++ b/agenthub/micro/commit_writer/README.md @@ -3,7 +3,7 @@ CommitWriterAgent can help write git commit message. Example: ```bash -WORKSPACE_MOUNT_PATH="`PWD`" SANDBOX_BOX_TYPE="ssh" \ +WORKSPACE_MOUNT_PATH="`PWD`" \ poetry run python opendevin/core/main.py -t "dummy task" -c CommitWriterAgent -d ./ ``` diff --git a/agenthub/monologue_agent/.dockerfileignore b/agenthub/monologue_agent/.dockerfileignore deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/agenthub/monologue_agent/.dockerignore b/agenthub/monologue_agent/.dockerignore deleted file mode 100644 index e3a71baf536..00000000000 --- a/agenthub/monologue_agent/.dockerignore +++ /dev/null @@ -1,2 +0,0 @@ -.envrc -workspace diff --git a/agenthub/monologue_agent/README.md b/agenthub/monologue_agent/README.md deleted file mode 100644 index 87f2165a855..00000000000 --- a/agenthub/monologue_agent/README.md +++ /dev/null @@ -1,8 +0,0 @@ -# LLM control loop -This is currently a standalone utility. It will need to be integrated into OpenDevin's backend. - -## Usage -```bash -# Run this in project root -./agenthub/monologue_agent/build-and-run.sh "write a bash script that prints 'hello world'" -``` diff --git a/agenthub/monologue_agent/TODO.md b/agenthub/monologue_agent/TODO.md deleted file mode 100644 index 5f44db4bef0..00000000000 --- a/agenthub/monologue_agent/TODO.md +++ /dev/null @@ -1,8 +0,0 @@ -# TODO -There's a lot of low-hanging fruit for this agent: - -* Strip `