From e3016e62253eacdbb0c74cc4f593d7d867742519 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20P=C5=82atek?= Date: Fri, 27 Jan 2023 20:40:12 +0100 Subject: [PATCH] Pre release breaking changes (#26) * rename torch->pytorch; add descriptions * rename al rules yml->yaml * add technology and references to all rules * update readme; add contributing * small changes to workflows * codeowners --- .github/workflows/main.yml | 18 ---- .github/workflows/semgrep-rules-test.yml | 33 +++---- .github/workflows/update-semgrep-registry.yml | 16 ++++ CODEOWNERS | 3 + CONTRIBUTING.md | 96 +++++++++++++++++++ README.md | 88 +++++++++++------ ...tion.yml => anonymous-race-condition.yaml} | 2 + ...g-goroutine.yml => hanging-goroutine.yaml} | 2 + ...> invalid-usage-of-modified-variable.yaml} | 4 + ...ty-map.yml => iterate-over-empty-map.yaml} | 6 +- ...ex.yml => missing-runlock-on-rwmutex.yaml} | 3 + ....yml => missing-unlock-before-return.yaml} | 3 + ...ter-call.yml => nil-check-after-call.yaml} | 6 +- ...to-slice.yml => racy-append-to-slice.yaml} | 4 + ...rite-to-map.yml => racy-write-to-map.yaml} | 4 + ...rcodec-readrequestbody-unhandled-nil.yaml} | 2 + ...yml => string-to-int-signedness-cast.yaml} | 4 + ...opied.yml => sync-mutex-value-copied.yaml} | 2 + ...ll-loading.yml => unsafe-dll-loading.yaml} | 4 + ...aitgroup-add-called-inside-goroutine.yaml} | 2 + ...op.yml => waitgroup-wait-inside-loop.yaml} | 2 + ...ning.yml => automatic-memory-pinning.yaml} | 2 + ...lxml-in-pandas.yml => lxml-in-pandas.yaml} | 2 + ...mpy-distutils.yml => numpy-distutils.yaml} | 2 + ...py-compile.yml => numpy-f2py-compile.yaml} | 2 + ...tasets.py => numpy-in-pytorch-datasets.py} | 10 +- ...ets.yml => numpy-in-pytorch-datasets.yaml} | 6 +- ...ules.yml => numpy-in-pytorch-modules.yaml} | 6 +- ...ad-library.yml => numpy-load-library.yaml} | 2 + ...-options.yml => onnx-session-options.yaml} | 2 + ...les-in-numpy.yml => pickles-in-numpy.yaml} | 2 + ...s-in-pandas.yml => pickles-in-pandas.yaml} | 2 + ...d.py => pickles-in-pytorch-distributed.py} | 10 +- ...ml => pickles-in-pytorch-distributed.yaml} | 4 +- ...in-pytorch.yml => pickles-in-pytorch.yaml} | 2 + ...ary.py => pytorch-classes-load-library.py} | 12 +-- ....yml => pytorch-classes-load-library.yaml} | 4 +- .../{torch-package.py => pytorch-package.py} | 8 +- ...torch-package.yml => pytorch-package.yaml} | 4 +- python/{torch-tensor.py => pytorch-tensor.py} | 10 +- .../{torch-tensor.yml => pytorch-tensor.yaml} | 4 +- ...oblib-load.yml => scikit-joblib-load.yaml} | 2 + ....yml => tarfile-extractall-traversal.yaml} | 2 + ...brary.yml => tensorflow-load-library.yaml} | 2 + ...py => waiting-with-pytorch-distributed.py} | 8 +- ... => waiting-with-pytorch-distributed.yaml} | 6 +- ...> panic-in-function-returning-result.yaml} | 4 + rules_table_generator.py | 64 +++++++++++++ 48 files changed, 382 insertions(+), 106 deletions(-) delete mode 100644 .github/workflows/main.yml create mode 100644 .github/workflows/update-semgrep-registry.yml create mode 100644 CODEOWNERS create mode 100644 CONTRIBUTING.md rename go/{anonymous-race-condition.yml => anonymous-race-condition.yaml} (96%) rename go/{hanging-goroutine.yml => hanging-goroutine.yaml} (97%) rename go/{invalid-usage-of-modified-variable.yml => invalid-usage-of-modified-variable.yaml} (77%) rename go/{iterate-over-empty-map.yml => iterate-over-empty-map.yaml} (83%) rename go/{missing-runlock-on-rwmutex.yml => missing-runlock-on-rwmutex.yaml} (77%) rename go/{missing-unlock-before-return.yml => missing-unlock-before-return.yaml} (80%) rename go/{nil-check-after-call.yml => nil-check-after-call.yaml} (90%) rename go/{racy-append-to-slice.yml => racy-append-to-slice.yaml} (86%) rename go/{racy-write-to-map.yml => racy-write-to-map.yaml} (81%) rename go/{servercodec-readrequestbody-unhandled-nil.yml => servercodec-readrequestbody-unhandled-nil.yaml} (89%) rename go/{string-to-int-signedness-cast.yml => string-to-int-signedness-cast.yaml} (95%) rename go/{sync-mutex-value-copied.yml => sync-mutex-value-copied.yaml} (89%) rename go/{unsafe-dll-loading.yml => unsafe-dll-loading.yaml} (84%) rename go/{waitgroup-add-called-inside-goroutine.yml => waitgroup-add-called-inside-goroutine.yaml} (89%) rename go/{waitgroup-wait-inside-loop.yml => waitgroup-wait-inside-loop.yaml} (93%) rename python/{automatic-memory-pinning.yml => automatic-memory-pinning.yaml} (88%) rename python/{lxml-in-pandas.yml => lxml-in-pandas.yaml} (91%) rename python/{numpy-distutils.yml => numpy-distutils.yaml} (85%) rename python/{numpy-f2py-compile.yml => numpy-f2py-compile.yaml} (83%) rename python/{numpy-in-torch-datasets.py => numpy-in-pytorch-datasets.py} (86%) rename python/{numpy-in-torch-datasets.yml => numpy-in-pytorch-datasets.yaml} (69%) rename python/{numpy-in-pytorch-modules.yml => numpy-in-pytorch-modules.yaml} (67%) rename python/{numpy-load-library.yml => numpy-load-library.yaml} (84%) rename python/{onnx-session-options.yml => onnx-session-options.yaml} (86%) rename python/{pickles-in-numpy.yml => pickles-in-numpy.yaml} (88%) rename python/{pickles-in-pandas.yml => pickles-in-pandas.yaml} (88%) rename python/{pickles-in-torch-distributed.py => pickles-in-pytorch-distributed.py} (69%) rename python/{pickles-in-torch-distributed.yml => pickles-in-pytorch-distributed.yaml} (80%) rename python/{pickles-in-pytorch.yml => pickles-in-pytorch.yaml} (87%) rename python/{torch-classes-load-library.py => pytorch-classes-load-library.py} (59%) rename python/{torch-classes-load-library.yml => pytorch-classes-load-library.yaml} (78%) rename python/{torch-package.py => pytorch-package.py} (51%) rename python/{torch-package.yml => pytorch-package.yaml} (81%) rename python/{torch-tensor.py => pytorch-tensor.py} (57%) rename python/{torch-tensor.yml => pytorch-tensor.yaml} (70%) rename python/{scikit-joblib-load.yml => scikit-joblib-load.yaml} (81%) rename python/{tarfile-extractall-traversal.yml => tarfile-extractall-traversal.yaml} (90%) rename python/{tensorflow-load-library.yml => tensorflow-load-library.yaml} (86%) rename python/{waiting-with-torch-distributed.py => waiting-with-pytorch-distributed.py} (73%) rename python/{waiting-with-torch-distributed.yml => waiting-with-pytorch-distributed.yaml} (68%) rename rs/{panic-in-function-returning-result.yml => panic-in-function-returning-result.yaml} (86%) create mode 100644 rules_table_generator.py diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml deleted file mode 100644 index 2ef0120..0000000 --- a/.github/workflows/main.yml +++ /dev/null @@ -1,18 +0,0 @@ -name: update-semgrep-registry - -on: - push: - branches: - - main - -jobs: - build: - name: Update semgrep.dev - runs-on: ubuntu-latest - steps: - - name: update dev.semgrep.dev - run: curl --fail -X POST -L https://dev.semgrep.dev/api/admin/update-registry - - name: update staging.semgrep.dev - run: curl --fail -X POST -L https://staging.semgrep.dev/api/admin/update-registry - - name: update semgrep.dev - run: curl --fail -X POST -L https://semgrep.dev/api/admin/update-registry diff --git a/.github/workflows/semgrep-rules-test.yml b/.github/workflows/semgrep-rules-test.yml index ff4be6c..3b64344 100644 --- a/.github/workflows/semgrep-rules-test.yml +++ b/.github/workflows/semgrep-rules-test.yml @@ -1,29 +1,24 @@ name: semgrep-rules-test - on: pull_request: push: branches: - - main - + - main jobs: build: name: run semgrep rules tests runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 - - name: Set up Python 3.9 - uses: actions/setup-python@v1 - with: - python-version: 3.9 - - - name: installation - run: | - python -m pip install --upgrade pip - python3 -m pip install semgrep - - - name: tests - run: | - python -m semgrep --quiet --test python/ - python -m semgrep --quiet --test rust/ - python -m semgrep --quiet --test go/ + - uses: actions/checkout@v2 + - name: Set up Python 3.9 + uses: actions/setup-python@v2 + with: + python-version: 3.9 + - name: installation + run: | + python -m pip install --upgrade pip + python3 -m pip install semgrep + - name: validations + run: python -m semgrep --validate --config . + - name: tests + run: python -m semgrep --test --test-ignore-todo diff --git a/.github/workflows/update-semgrep-registry.yml b/.github/workflows/update-semgrep-registry.yml new file mode 100644 index 0000000..b10be5d --- /dev/null +++ b/.github/workflows/update-semgrep-registry.yml @@ -0,0 +1,16 @@ +name: update-semgrep-registry +on: + push: + branches: + - main +jobs: + build: + name: Update semgrep.dev + runs-on: ubuntu-latest + steps: + - name: update dev.semgrep.dev + run: curl --fail -X POST -L https://dev.semgrep.dev/api/admin/update-registry + - name: update staging.semgrep.dev + run: curl --fail -X POST -L https://staging.semgrep.dev/api/admin/update-registry + - name: update semgrep.dev + run: curl --fail -X POST -L https://semgrep.dev/api/admin/update-registry diff --git a/CODEOWNERS b/CODEOWNERS new file mode 100644 index 0000000..a205a5f --- /dev/null +++ b/CODEOWNERS @@ -0,0 +1,3 @@ +* @GrosQuildu +/go/ @hex0punk @GrosQuildu @Vasco-jofra +/python/ @suhacker1 @GrosQuildu @Vasco-jofra diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..7243d8a --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,96 @@ +Contributing to Trail of Bits Semgrep Rules +========================= + +Thank you for your interest in contributing to ToB `semgrep-rules`! + +The information below will help you set up a local development environment, +as well as performing common development tasks. + +## Requirements + +`semgrep-rules`'s only development environment requirement *should* be Python 3.7 +or newer. Development and testing is actively performed on macOS and Linux, +but Windows and other supported platforms that are supported by Python +should also work. + +## Development steps + +First, clone this repository: + +```bash +git clone https://github.com/trailofbits/semgrep-rules +cd semgrep-rules +``` + +Then [install semgrep CLI](https://semgrep.dev/docs/getting-started/), and you are good to start development. + +### Linting + +Currenty we don't use any linting tools. In the future we plan to use `yamlfmt`. + +### Testing + +You can run tests locally with: + +```bash +semgrep --test --test-ignore-todo --metrics=off +``` + +To test a specific file: + +```bash +semgrep --test --test-ignore-todo --metrics=off --config ./go/iterate-over-empty-map.yaml ./go/iterate-over-empty-map.go +``` + +### Development practices + +Before publishing a new rule, or updating an existing one, make sure to review the checklist below: + +- [ ] Add metadata. Semgrep [defines which metadata fields are required](https://semgrep.dev/docs/contributing/contributing-to-semgrep-rules-repository/#writing-a-rule-for-semgrep-registry) + - [ ] Add a non-standard `metadata.description` field. It will be used as a description in the `semgrep-rules` README table. + - For `metadata.references` provide a link to official documentation, Trail of Bits blogpost, GitHub issue, or some reputable website. Avoid linking to websites that may disappear in the future. + +- [ ] Validate metadata against the official schema + - Download python validation script `wget https://raw.githubusercontent.com/returntocorp/semgrep-rules/develop/.github/scripts/validate-metadata.py` + - Download rules schema `wget https://raw.githubusercontent.com/returntocorp/semgrep-rules/develop/metadata-schema.yaml.schm` + - Run `python ./validate-metadata.py -s ./metadata-schema.yaml.schm -f .` + +- [ ] Add tests + - [ ] At least one true positive (`ruleid: ` comment) + - [ ] At least one true negative (`ok: ` comment) + - Tests are allowed to crash when running them directly or to be meaningless + - However, try writing tests that can be compiled or parsed by the language interpreter + - The first few test cases should be easy to understand, the later should be more complex or check for edge-cases + - [ ] Make sure all tests pass, run `semgrep --test --test-ignore-todo --metrics=off` + +- [ ] Run official semgrep lints with `semgrep --validate --metrics=off --config ./.yaml` + +- [ ] Review style of the rules + - [ ] Use 2 spaces for indentation + - [ ] Use `>-` for multiline messages + - [ ] Use backticks in messages e.g., `$VAR`, `$FUNC`, `some.method()` + - The `languages` field in `[go, java]` format are preferable (not `- go \n -java`) + +- [ ] Check amount of false-positives on some large public repositories + +- [ ] Check performance - take a look at [r2c methodology](https://github.com/returntocorp/semgrep-rules/blob/main/tests/performance/test_public_repos.py) + +- [ ] Add the new rules to the README + - Run `python ./rules_table_generator.py` to re-generate the table + - Manually check if the table was correctly generated + +### Documentation + +We don't provide any documentation for the rules. All information that you need to understand a rule is inside it. Semgrep documentation can be found [here](https://semgrep.dev/docs/). + +### Releasing + +**NOTE**: If you're a non-maintaining contributor, you don't need the steps +here! They're documented for completeness and for onboarding future maintainers. + +We don't have a release cycle yet. + +All changes to the repository's `main` branch are automatically pushed to the semgrep registry (with a GitHub action). + +Modifying rule's filename, path, or ID will result in duplication of the rule in the registry. +This is a known issue, r2c team still works on resolving it. diff --git a/README.md b/README.md index f58749f..e988a52 100644 --- a/README.md +++ b/README.md @@ -45,33 +45,61 @@ $ semgrep --config /path/to/semgrep-rules/hanging-goroutine.yml -o leaks.txt' ## Rules -Rule ID | Language | What it Finds ---- | --- | --- -[anonymous-race-condition](go/anonymous-race-condition.yml) | Go | Race conditions within anonymous goroutines -[hanging-goroutine](go/hanging-goroutine.yml) | Go | Goroutine leaks -[iterate-over-empty-collection](go/iterate-over-empty-collection.yml) | Go | Iterations over empty collection -[nil-check-after-call](go/nil-check-after-call.yml) | Go | Possible nil dereferences -[invalid-usage-of-modified-variable](go/invalid-usage-of-modified-variable.yml) | Go | Possible unintentional assignment when an error occurs -[servercodec-readrequestbody-unhandled-nil](go/servercodec-readrequestbody-unhandled-nil.yml) | Go | Possible incorrect `ServerCodec` interface implementation -[string-to-int-signedness-cast](go/string-to-int-signedness-cast.yml) | Go | Integer underflows -[sync-mutex-value-copied](go/sync-mutex-value-copied.yml) | Go | Copying of `sync.Mutex` via value receivers -[waitgroup-add-called-inside-goroutine](go/waitgroup-add-called-inside-goroutine.yml) | Go | Calls to `sync.WaitGroup.Add` inside of anonymous goroutines -[waitgroup-wait-inside-loop](go/waitgroup-wait-inside-loop.yml) | Go | Calls to `sync.WaitGroup.Wait` inside a loop -[racy-append-to-slice](go/racy-append-to-slice.yml) | Go | Concurrent calls to `append` from multiple goroutines -[racy-write-to-map](go/racy-write-to-map.yml) | Go | Concurrent writes to the same map in multiple goroutines -[missing-unlock-before-return](go/missing-unlock-before-return.yml) | Go | Missing mutex unlock before returning from a function. This could cause panics resulting from double lock operations -[missing-runlock-on-rwmutex](go/missing-runlock-on-rwmutex.yml) | Go | Missing RUnlock on an RWMutex lock before returning from a function -[unsafe-dll-loading.yml](go/unsafe-dll-loading.yml) | Go | Use of function vulnerable to DLL hijacking attacks -[tarfile-extractall-traversal](python/tarfile-extractall-traversal.yml) | Python | Potential path traversal in call to `extractall` for a `tarfile` -[automatic-memory-pinning](python/automatic-memory-pinning.yml) | Python | Memory is not automatically pinned -[lxml-in-pandas](python/lxml-in-pandas.yml) | Python | Potential XXE attacks from loading lxml in pandas -[numpy-in-pytorch-modules](python/numpy-in-pytorch-modules.yml) | Python | Uses NumPy functions inside PyTorch modules -[numpy-in-torch-datasets](python/numpy-in-torch-datasets.yml) | Python | Calls to the Numpy RNG inside of a Torch dataset -[pickles-in-numpy](python/pickles-in-numpy.yml) | Python | Potential arbitrary code execution from NumPy functions reliant on pickling -[pickles-in-pandas](python/pickles-in-pandas.yml) | Python | Potential arbitrary code execution from Pandas functions reliant on pickling -[pickles-in-pytorch](python/pickles-in-pytorch.yml) | Python | Potential arbitrary code execution from PyTorch functions reliant on pickling -[pickles-in-torch-distributed](python/pickles-in-torch-distributed.yml) | Python | Potential arbitrary code execution from PyTorch Distributed functions reliant on pickling -[torch-package](python/torch-package.yml) | Python | Potential arbitrary code execution from torch.package -[torch-tensor](python/torch-tensor.yml) | Python | Possible parsing issues and inefficiency from improper tensor creation -[waiting-with-torch-distributed](python/waiting-with-torch-distributed.yml) | Python | Possible undefined behavior when not waiting for requests -[panic-in-function-returning-result](rs/panic-in-function-returning-result.yml) | Rust | Calling `unwrap` or `expect` in a function returning a `Result` +### go + +| ID | Playground | Impact | Confidence | Description | +| -- | :--------: | :----: | :--------: | ----------- | +| [anonymous-race-condition](go/anonymous-race-condition.yaml) | [🛝🔗](https://semgrep.dev/playground/r/trailofbits.go.anonymous-race-condition.anonymous-race-condition) | 🟧 | 🌗 | Race conditions within anonymous goroutines | +| [hanging-goroutine](go/hanging-goroutine.yaml) | [🛝🔗](https://semgrep.dev/playground/r/trailofbits.go.hanging-goroutine.hanging-goroutine) | 🟩 | 🌗 | Goroutine leaks | +| [invalid-usage-of-modified-variable](go/invalid-usage-of-modified-variable.yaml) | [🛝🔗](https://semgrep.dev/playground/r/trailofbits.go.invalid-usage-of-modified-variable.invalid-usage-of-modified-variable) | 🟧 | 🌘 | Possible unintentional assignment when an error occurs | +| [iterate-over-empty-map](go/iterate-over-empty-map.yaml) | [🛝🔗](https://semgrep.dev/playground/r/trailofbits.go.iterate-over-empty-map.iterate-over-empty-map) | 🟩 | 🌗 | Probably redundant iteration over an empty map | +| [missing-runlock-on-rwmutex](go/missing-runlock-on-rwmutex.yaml) | [🛝🔗](https://semgrep.dev/playground/r/trailofbits.go.missing-runlock-on-rwmutex.missing-runlock-on-rwmutex) | 🟧 | 🌗 | Missing `RUnlock` on an `RWMutex` lock before returning from a function | +| [missing-unlock-before-return](go/missing-unlock-before-return.yaml) | [🛝🔗](https://semgrep.dev/playground/r/trailofbits.go.missing-unlock-before-return.missing-unlock-before-return) | 🟧 | 🌗 | Missing `mutex` unlock before returning from a function | +| [nil-check-after-call](go/nil-check-after-call.yaml) | [🛝🔗](https://semgrep.dev/playground/r/trailofbits.go.nil-check-after-call.nil-check-after-call) | 🟧 | 🌗 | Possible nil dereferences | +| [racy-append-to-slice](go/racy-append-to-slice.yaml) | [🛝🔗](https://semgrep.dev/playground/r/trailofbits.go.racy-append-to-slice.racy-append-to-slice) | 🟧 | 🌗 | Concurrent calls to `append` from multiple goroutines | +| [racy-write-to-map](go/racy-write-to-map.yaml) | [🛝🔗](https://semgrep.dev/playground/r/trailofbits.go.racy-write-to-map.racy-write-to-map) | 🟧 | 🌗 | Concurrent writes to the same map in multiple goroutines | +| [servercodec-readrequestbody-unhandled-nil](go/servercodec-readrequestbody-unhandled-nil.yaml) | [🛝🔗](https://semgrep.dev/playground/r/trailofbits.go.servercodec-readrequestbody-unhandled-nil.servercodec-readrequestbody-unhandled-nil) | 🟩 | 🌘 | Possible incorrect `ServerCodec` interface implementation | +| [string-to-int-signedness-cast](go/string-to-int-signedness-cast.yaml) | [🛝🔗](https://semgrep.dev/playground/r/trailofbits.go.string-to-int-signedness-cast.string-to-int-signedness-cast) | 🟧 | 🌘 | Integer underflows | +| [sync-mutex-value-copied](go/sync-mutex-value-copied.yaml) | [🛝🔗](https://semgrep.dev/playground/r/trailofbits.go.sync-mutex-value-copied.sync-mutex-value-copied) | 🟩 | 🌘 | Copying of `sync.Mutex` via value receivers | +| [unsafe-dll-loading](go/unsafe-dll-loading.yaml) | [🛝🔗](https://semgrep.dev/playground/r/trailofbits.go.unsafe-dll-loading.unsafe-dll-loading) | 🟥 | 🌘 | Use of function vulnerable to DLL hijacking attacks | +| [waitgroup-add-called-inside-goroutine](go/waitgroup-add-called-inside-goroutine.yaml) | [🛝🔗](https://semgrep.dev/playground/r/trailofbits.go.waitgroup-add-called-inside-goroutine.waitgroup-add-called-inside-goroutine) | 🟧 | 🌗 | Calls to `sync.WaitGroup.Add` inside of anonymous goroutines | +| [waitgroup-wait-inside-loop](go/waitgroup-wait-inside-loop.yaml) | [🛝🔗](https://semgrep.dev/playground/r/trailofbits.go.waitgroup-wait-inside-loop.waitgroup-wait-inside-loop) | 🟧 | 🌗 | Calls to `sync.WaitGroup.Wait` inside a loop | + + +### python + +| ID | Playground | Impact | Confidence | Description | +| -- | :--------: | :----: | :--------: | ----------- | +| [automatic-memory-pinning](python/automatic-memory-pinning.yaml) | [🛝🔗](https://semgrep.dev/playground/r/trailofbits.python.automatic-memory-pinning.automatic-memory-pinning) | 🟩 | 🌘 | `PyTorch` memory not automatically pinned | +| [lxml-in-pandas](python/lxml-in-pandas.yaml) | [🛝🔗](https://semgrep.dev/playground/r/trailofbits.python.lxml-in-pandas.lxml-in-pandas) | 🟧 | 🌘 | Potential XXE attacks from loading `lxml` in pandas | +| [numpy-distutils](python/numpy-distutils.yaml) | [🛝🔗](https://semgrep.dev/playground/r/trailofbits.python.numpy-distutils.numpy-distutils) | 🟩 | 🌘 | Use of deprecated `numpy.distutils` | +| [numpy-f2py-compile](python/numpy-f2py-compile.yaml) | [🛝🔗](https://semgrep.dev/playground/r/trailofbits.python.numpy-f2py-compile.numpy-f2py-compile) | 🟥 | 🌗 | Potential arbitrary code execution from `NumPy` `f2py` compilation | +| [numpy-in-pytorch-datasets](python/numpy-in-pytorch-datasets.yaml) | [🛝🔗](https://semgrep.dev/playground/r/trailofbits.python.numpy-in-pytorch-datasets.numpy-in-pytorch-datasets) | 🟩 | 🌘 | Calls to the `NumPy` RNG inside of a `Torch` dataset | +| [numpy-in-pytorch-modules](python/numpy-in-pytorch-modules.yaml) | [🛝🔗](https://semgrep.dev/playground/r/trailofbits.python.numpy-in-pytorch-modules.numpy-in-pytorch-modules) | 🌫️ | 🌗 | Uses of `NumPy` functions inside `PyTorch` modules | +| [numpy-load-library](python/numpy-load-library.yaml) | [🛝🔗](https://semgrep.dev/playground/r/trailofbits.python.numpy-load-library.numpy-load-library) | 🟥 | 🌗 | Potential arbitrary code execution from `NumPy` library loading | +| [onnx-session-options](python/onnx-session-options.yaml) | [🛝🔗](https://semgrep.dev/playground/r/trailofbits.python.onnx-session-options.onnx-session-options) | 🟥 | 🌗 | Potential arbitrary code execution from `ONNX` library loading | +| [pickles-in-numpy](python/pickles-in-numpy.yaml) | [🛝🔗](https://semgrep.dev/playground/r/trailofbits.python.pickles-in-numpy.pickles-in-numpy) | 🟥 | 🌗 | Potential arbitrary code execution from `NumPy` functions reliant on pickling | +| [pickles-in-pandas](python/pickles-in-pandas.yaml) | [🛝🔗](https://semgrep.dev/playground/r/trailofbits.python.pickles-in-pandas.pickles-in-pandas) | 🟥 | 🌗 | Potential arbitrary code execution from `Pandas` functions reliant on pickling | +| [pickles-in-pytorch-distributed](python/pickles-in-pytorch-distributed.yaml) | [🛝🔗](https://semgrep.dev/playground/r/trailofbits.python.pickles-in-pytorch-distributed.pickles-in-pytorch-distributed) | 🟥 | 🌗 | Potential arbitrary code execution from `PyTorch.Distributed` functions reliant on pickling | +| [pickles-in-pytorch](python/pickles-in-pytorch.yaml) | [🛝🔗](https://semgrep.dev/playground/r/trailofbits.python.pickles-in-pytorch.pickles-in-pytorch) | 🟥 | 🌗 | Potential arbitrary code execution from `PyTorch` functions reliant on pickling | +| [pytorch-classes-load-library](python/pytorch-classes-load-library.yaml) | [🛝🔗](https://semgrep.dev/playground/r/trailofbits.python.pytorch-classes-load-library.pytorch-classes-load-library) | 🟥 | 🌗 | Potential arbitrary code execution from `PyTorch` library loading | +| [pytorch-package](python/pytorch-package.yaml) | [🛝🔗](https://semgrep.dev/playground/r/trailofbits.python.pytorch-package.pytorch-package) | 🟥 | 🌕 | Potential arbitrary code execution from `torch.package` | +| [pytorch-tensor](python/pytorch-tensor.yaml) | [🛝🔗](https://semgrep.dev/playground/r/trailofbits.python.pytorch-tensor.pytorch-tensor) | 🌫️ | 🌘 | Possible parsing issues and inefficiency from improper tensor creation | +| [scikit-joblib-load](python/scikit-joblib-load.yaml) | [🛝🔗](https://semgrep.dev/playground/r/trailofbits.python.scikit-joblib-load.scikit-joblib-load) | 🟥 | 🌗 | Potential arbitrary code execution from `SciKit.Joblib` functions reliant on pickling | +| [tarfile-extractall-traversal](python/tarfile-extractall-traversal.yaml) | [🛝🔗](https://semgrep.dev/playground/r/trailofbits.python.tarfile-extractall-traversal.tarfile-extractall-traversal) | 🟧 | 🌗 | Potential path traversal in call to `extractall` for a `tarfile` | +| [tensorflow-load-library](python/tensorflow-load-library.yaml) | [🛝🔗](https://semgrep.dev/playground/r/trailofbits.python.tensorflow-load-library.tensorflow-load-library) | 🟥 | 🌗 | Potential arbitrary code execution from `TensorFlow` library loading | +| [waiting-with-pytorch-distributed](python/waiting-with-pytorch-distributed.yaml) | [🛝🔗](https://semgrep.dev/playground/r/trailofbits.python.waiting-with-pytorch-distributed.waiting-with-pytorch-distributed) | 🟩 | 🌗 | Possible `PyTorch` undefined behavior when not waiting for requests | + + +### rs + +| ID | Playground | Impact | Confidence | Description | +| -- | :--------: | :----: | :--------: | ----------- | +| [panic-in-function-returning-result](rs/panic-in-function-returning-result.yaml) | [🛝🔗](https://semgrep.dev/playground/r/trailofbits.rs.panic-in-function-returning-result.panic-in-function-returning-result) | 🟩 | 🌘 | Calling `unwrap` or `expect` in a function returning a `Result` | + + +## Contributing + +Pull Requests and issues are welcomed! + +See [CONTRIBUTING.md](CONTRIBUTING.md) for more information. diff --git a/go/anonymous-race-condition.yml b/go/anonymous-race-condition.yaml similarity index 96% rename from go/anonymous-race-condition.yml rename to go/anonymous-race-condition.yaml index 9846323..c4c194c 100644 --- a/go/anonymous-race-condition.yml +++ b/go/anonymous-race-condition.yaml @@ -11,6 +11,8 @@ rules: confidence: MEDIUM likelihood: HIGH impact: MEDIUM + technology: [--no-technology--] + description: "Race conditions within anonymous goroutines" references: - https://github.com/golang/go/wiki/CommonMistakes#using-goroutines-on-loop-iterator-variables diff --git a/go/hanging-goroutine.yml b/go/hanging-goroutine.yaml similarity index 97% rename from go/hanging-goroutine.yml rename to go/hanging-goroutine.yaml index 6ed55b1..05b7e45 100644 --- a/go/hanging-goroutine.yml +++ b/go/hanging-goroutine.yaml @@ -11,6 +11,8 @@ rules: confidence: MEDIUM likelihood: MEDIUM impact: LOW + technology: [--no-technology--] + description: "Goroutine leaks" references: - https://blog.trailofbits.com/2021/11/08/discovering-goroutine-leaks-with-semgrep diff --git a/go/invalid-usage-of-modified-variable.yml b/go/invalid-usage-of-modified-variable.yaml similarity index 77% rename from go/invalid-usage-of-modified-variable.yml rename to go/invalid-usage-of-modified-variable.yaml index 8116925..95922a4 100644 --- a/go/invalid-usage-of-modified-variable.yml +++ b/go/invalid-usage-of-modified-variable.yaml @@ -12,6 +12,10 @@ rules: confidence: HIGH likelihood: MEDIUM impact: MEDIUM + technology: [--no-technology--] + description: "Possible unintentional assignment when an error occurs" + references: + - https://blog.trailofbits.com/2019/11/07/attacking-go-vr-ttps/ patterns: - pattern-either: diff --git a/go/iterate-over-empty-map.yml b/go/iterate-over-empty-map.yaml similarity index 83% rename from go/iterate-over-empty-map.yml rename to go/iterate-over-empty-map.yaml index c898433..ded1426 100644 --- a/go/iterate-over-empty-map.yml +++ b/go/iterate-over-empty-map.yaml @@ -11,7 +11,11 @@ rules: confidence: MEDIUM likelihood: LOW impact: LOW - + technology: [--no-technology--] + description: "Probably redundant iteration over an empty map" + references: + - https://blog.trailofbits.com/2019/11/07/attacking-go-vr-ttps/ + patterns: - pattern: | $C = make(map[$T1] $T2, ...) diff --git a/go/missing-runlock-on-rwmutex.yml b/go/missing-runlock-on-rwmutex.yaml similarity index 77% rename from go/missing-runlock-on-rwmutex.yml rename to go/missing-runlock-on-rwmutex.yaml index 574906a..170bbf8 100644 --- a/go/missing-runlock-on-rwmutex.yml +++ b/go/missing-runlock-on-rwmutex.yaml @@ -11,8 +11,11 @@ rules: confidence: MEDIUM likelihood: HIGH impact: MEDIUM + technology: [--no-technology--] + description: "Missing `RUnlock` on an `RWMutex` lock before returning from a function" references: - https://pkg.go.dev/sync#RWMutex + - https://blog.trailofbits.com/2020/06/09/how-to-check-if-a-mutex-is-locked-in-go/ patterns: - pattern-either: diff --git a/go/missing-unlock-before-return.yml b/go/missing-unlock-before-return.yaml similarity index 80% rename from go/missing-unlock-before-return.yml rename to go/missing-unlock-before-return.yaml index 9862256..5a70f45 100644 --- a/go/missing-unlock-before-return.yml +++ b/go/missing-unlock-before-return.yaml @@ -12,8 +12,11 @@ rules: confidence: MEDIUM likelihood: HIGH impact: MEDIUM + technology: [--no-technology--] + description: "Missing `mutex` unlock before returning from a function" references: - https://pkg.go.dev/sync#Mutex + - https://blog.trailofbits.com/2020/06/09/how-to-check-if-a-mutex-is-locked-in-go/ patterns: - pattern-either: diff --git a/go/nil-check-after-call.yml b/go/nil-check-after-call.yaml similarity index 90% rename from go/nil-check-after-call.yml rename to go/nil-check-after-call.yaml index 117d5f6..374ed9b 100644 --- a/go/nil-check-after-call.yml +++ b/go/nil-check-after-call.yaml @@ -10,7 +10,11 @@ rules: confidence: MEDIUM likelihood: MEDIUM impact: MEDIUM - + technology: [--no-technology--] + description: "Possible nil dereferences" + references: + - https://blog.trailofbits.com/2019/11/07/attacking-go-vr-ttps/ + patterns: - pattern-either: - pattern: | diff --git a/go/racy-append-to-slice.yml b/go/racy-append-to-slice.yaml similarity index 86% rename from go/racy-append-to-slice.yml rename to go/racy-append-to-slice.yaml index 88c6491..8bbaaef 100644 --- a/go/racy-append-to-slice.yml +++ b/go/racy-append-to-slice.yaml @@ -11,6 +11,10 @@ rules: confidence: MEDIUM likelihood: HIGH impact: MEDIUM + technology: [--no-technology--] + description: "Concurrent calls to `append` from multiple goroutines" + references: + - https://go.dev/blog/maps#concurrency patterns: - pattern: | diff --git a/go/racy-write-to-map.yml b/go/racy-write-to-map.yaml similarity index 81% rename from go/racy-write-to-map.yml rename to go/racy-write-to-map.yaml index 2073032..4db1710 100644 --- a/go/racy-write-to-map.yml +++ b/go/racy-write-to-map.yaml @@ -11,6 +11,10 @@ rules: confidence: MEDIUM likelihood: HIGH impact: MEDIUM + technology: [--no-technology--] + description: "Concurrent writes to the same map in multiple goroutines" + references: + - https://go.dev/blog/maps#concurrency patterns: - pattern: | diff --git a/go/servercodec-readrequestbody-unhandled-nil.yml b/go/servercodec-readrequestbody-unhandled-nil.yaml similarity index 89% rename from go/servercodec-readrequestbody-unhandled-nil.yml rename to go/servercodec-readrequestbody-unhandled-nil.yaml index 4d0eea6..a65f2c1 100644 --- a/go/servercodec-readrequestbody-unhandled-nil.yml +++ b/go/servercodec-readrequestbody-unhandled-nil.yaml @@ -12,6 +12,8 @@ rules: confidence: HIGH likelihood: MEDIUM impact: LOW + technology: [--no-technology--] + description: "Possible incorrect `ServerCodec` interface implementation" references: - https://github.com/golang/go/blob/go1.15.2/src/net/rpc/server.go#L643-L658 diff --git a/go/string-to-int-signedness-cast.yml b/go/string-to-int-signedness-cast.yaml similarity index 95% rename from go/string-to-int-signedness-cast.yml rename to go/string-to-int-signedness-cast.yaml index 0071899..4689e4d 100644 --- a/go/string-to-int-signedness-cast.yml +++ b/go/string-to-int-signedness-cast.yaml @@ -10,6 +10,10 @@ rules: confidence: HIGH likelihood: LOW impact: MEDIUM + technology: [--no-technology--] + description: "Integer underflows" + references: + - https://github.com/golang/go/issues/30209 pattern-either: # 64 -> lower or sign diff --git a/go/sync-mutex-value-copied.yml b/go/sync-mutex-value-copied.yaml similarity index 89% rename from go/sync-mutex-value-copied.yml rename to go/sync-mutex-value-copied.yaml index 19fbcea..5e2d928 100644 --- a/go/sync-mutex-value-copied.yml +++ b/go/sync-mutex-value-copied.yaml @@ -12,6 +12,8 @@ rules: confidence: HIGH likelihood: HIGH impact: LOW + technology: [--no-technology--] + description: "Copying of `sync.Mutex` via value receivers" references: - https://go101.org/article/concurrent-common-mistakes.html diff --git a/go/unsafe-dll-loading.yml b/go/unsafe-dll-loading.yaml similarity index 84% rename from go/unsafe-dll-loading.yml rename to go/unsafe-dll-loading.yaml index d138b44..3cfad88 100644 --- a/go/unsafe-dll-loading.yml +++ b/go/unsafe-dll-loading.yaml @@ -12,6 +12,10 @@ rules: confidence: HIGH likelihood: MEDIUM impact: HIGH + technology: [--no-technology--] + description: "Use of function vulnerable to DLL hijacking attacks" + references: + - https://pkg.go.dev/golang.org/x/sys/windows#LazyDLL patterns: - pattern-either: diff --git a/go/waitgroup-add-called-inside-goroutine.yml b/go/waitgroup-add-called-inside-goroutine.yaml similarity index 89% rename from go/waitgroup-add-called-inside-goroutine.yml rename to go/waitgroup-add-called-inside-goroutine.yaml index 3afe275..5ba575e 100644 --- a/go/waitgroup-add-called-inside-goroutine.yml +++ b/go/waitgroup-add-called-inside-goroutine.yaml @@ -12,6 +12,8 @@ rules: confidence: MEDIUM likelihood: MEDIUM impact: MEDIUM + technology: [--no-technology--] + description: "Calls to `sync.WaitGroup.Add` inside of anonymous goroutines" references: - https://go101.org/article/concurrent-common-mistakes.html diff --git a/go/waitgroup-wait-inside-loop.yml b/go/waitgroup-wait-inside-loop.yaml similarity index 93% rename from go/waitgroup-wait-inside-loop.yml rename to go/waitgroup-wait-inside-loop.yaml index fb5bb45..416410e 100644 --- a/go/waitgroup-wait-inside-loop.yml +++ b/go/waitgroup-wait-inside-loop.yaml @@ -11,6 +11,8 @@ rules: confidence: MEDIUM likelihood: MEDIUM impact: MEDIUM + technology: [--no-technology--] + description: "Calls to `sync.WaitGroup.Wait` inside a loop" references: - https://go101.org/article/concurrent-common-mistakes.html diff --git a/python/automatic-memory-pinning.yml b/python/automatic-memory-pinning.yaml similarity index 88% rename from python/automatic-memory-pinning.yml rename to python/automatic-memory-pinning.yaml index 0401fe6..d5d1c75 100644 --- a/python/automatic-memory-pinning.yml +++ b/python/automatic-memory-pinning.yaml @@ -11,6 +11,8 @@ rules: confidence: HIGH likelihood: LOW impact: LOW + technology: [pytorch] + description: "`PyTorch` memory not automatically pinned" references: - https://pytorch.org/docs/stable/data.html#memory-pinning diff --git a/python/lxml-in-pandas.yml b/python/lxml-in-pandas.yaml similarity index 91% rename from python/lxml-in-pandas.yml rename to python/lxml-in-pandas.yaml index 1806e31..66a96d3 100644 --- a/python/lxml-in-pandas.yml +++ b/python/lxml-in-pandas.yaml @@ -11,6 +11,8 @@ rules: confidence: HIGH likelihood: MEDIUM impact: MEDIUM + technology: [pandas] + description: "Potential XXE attacks from loading `lxml` in pandas" references: - https://lxml.de/FAQ.html diff --git a/python/numpy-distutils.yml b/python/numpy-distutils.yaml similarity index 85% rename from python/numpy-distutils.yml rename to python/numpy-distutils.yaml index 5268745..7c8baee 100644 --- a/python/numpy-distutils.yml +++ b/python/numpy-distutils.yaml @@ -10,6 +10,8 @@ rules: confidence: HIGH likelihood: MEDIUM impact: LOW + technology: [numpy] + description: "Use of deprecated `numpy.distutils`" references: - https://numpy.org/doc/stable/reference/distutils.html diff --git a/python/numpy-f2py-compile.yml b/python/numpy-f2py-compile.yaml similarity index 83% rename from python/numpy-f2py-compile.yml rename to python/numpy-f2py-compile.yaml index 167c50b..0d65ba3 100644 --- a/python/numpy-f2py-compile.yml +++ b/python/numpy-f2py-compile.yaml @@ -10,6 +10,8 @@ rules: confidence: MEDIUM likelihood: MEDIUM impact: HIGH + technology: [numpy] + description: "Potential arbitrary code execution from `NumPy` `f2py` compilation" references: - https://numpy.org/doc/stable/f2py/usage.html diff --git a/python/numpy-in-torch-datasets.py b/python/numpy-in-pytorch-datasets.py similarity index 86% rename from python/numpy-in-torch-datasets.py rename to python/numpy-in-pytorch-datasets.py index c8c8fcb..aa60f79 100644 --- a/python/numpy-in-torch-datasets.py +++ b/python/numpy-in-pytorch-datasets.py @@ -2,7 +2,7 @@ from torch.utils.data import Dataset from tob.strangelib import Dataset as DatasetStrange -# ruleid: numpy-in-torch-datasets +# ruleid: numpy-in-pytorch-datasets class RandomDataset(Dataset): def __getitem__(self, index): return np.random.randint(0, 1000, 3) @@ -11,7 +11,7 @@ def __len__(self): return 1000 -# ruleid: numpy-in-torch-datasets +# ruleid: numpy-in-pytorch-datasets class AnotherRandomDataset(Dataset): def __len__(self): return 1000 @@ -21,7 +21,7 @@ def __getitem__(self, index): x = np.random.randint(0, 1000, 3) return x -# ruleid: numpy-in-torch-datasets +# ruleid: numpy-in-pytorch-datasets class AnotherRandomDatasetOther(Dataset): def __len__(self): return 1000 @@ -31,7 +31,7 @@ def __getitem__(self, index): x = numpy.random.randint(0, 1000, 3) return x -# ok: numpy-in-torch-datasets +# ok: numpy-in-pytorch-datasets class NotTorchDataset(DatasetStrange): def __len__(self): return 1000 @@ -41,7 +41,7 @@ def __getitem__(self, index): x = numpy.random.randint(0, 1000, 3) return x -# ok: numpy-in-torch-datasets +# ok: numpy-in-pytorch-datasets class YetAnotherRandomDataset(Dataset): def __len__(self): return 1000 diff --git a/python/numpy-in-torch-datasets.yml b/python/numpy-in-pytorch-datasets.yaml similarity index 69% rename from python/numpy-in-torch-datasets.yml rename to python/numpy-in-pytorch-datasets.yaml index f201fc7..cd78142 100644 --- a/python/numpy-in-torch-datasets.yml +++ b/python/numpy-in-pytorch-datasets.yaml @@ -1,7 +1,7 @@ rules: -- id: numpy-in-torch-datasets +- id: numpy-in-pytorch-datasets message: >- - Using the NumPy RNG inside of a Torch dataset can lead to a number of issues with loading data, including identical augmentations. + Using the NumPy RNG inside of a PyTorch dataset can lead to a number of issues with loading data, including identical augmentations. Instead, use the random number generators built into Python and PyTorch languages: [python] severity: WARNING @@ -12,6 +12,8 @@ rules: confidence: HIGH likelihood: MEDIUM impact: LOW + technology: [pytorch, numpy] + description: "Calls to the `NumPy` RNG inside of a `Torch` dataset" references: - https://tanelp.github.io/posts/a-bug-that-plagues-thousands-of-open-source-ml-projects diff --git a/python/numpy-in-pytorch-modules.yml b/python/numpy-in-pytorch-modules.yaml similarity index 67% rename from python/numpy-in-pytorch-modules.yml rename to python/numpy-in-pytorch-modules.yaml index becf77b..d7710ee 100644 --- a/python/numpy-in-pytorch-modules.yml +++ b/python/numpy-in-pytorch-modules.yaml @@ -9,7 +9,11 @@ rules: category: performance subcategory: [audit] confidence: MEDIUM - + technology: [pytorch, numpy] + description: "Uses of `NumPy` functions inside `PyTorch` modules" + references: + - https://tanelp.github.io/posts/a-bug-that-plagues-thousands-of-open-source-ml-projects + patterns: - pattern: $RESULT = numpy.$FUNCTION(...) - pattern-inside: | diff --git a/python/numpy-load-library.yml b/python/numpy-load-library.yaml similarity index 84% rename from python/numpy-load-library.yml rename to python/numpy-load-library.yaml index c9a14b4..f9c9748 100644 --- a/python/numpy-load-library.yml +++ b/python/numpy-load-library.yaml @@ -10,6 +10,8 @@ rules: confidence: MEDIUM likelihood: MEDIUM impact: HIGH + technology: [numpy] + description: "Potential arbitrary code execution from `NumPy` library loading" references: - https://numpy.org/doc/stable/reference/routines.ctypeslib.html#numpy.ctypeslib.load_library diff --git a/python/onnx-session-options.yml b/python/onnx-session-options.yaml similarity index 86% rename from python/onnx-session-options.yml rename to python/onnx-session-options.yaml index b78aadf..0e022e6 100644 --- a/python/onnx-session-options.yml +++ b/python/onnx-session-options.yaml @@ -10,6 +10,8 @@ rules: confidence: MEDIUM likelihood: MEDIUM impact: HIGH + technology: [onnx] + description: "Potential arbitrary code execution from `ONNX` library loading" references: - https://onnxruntime.ai/docs/reference/operators/add-custom-op.html diff --git a/python/pickles-in-numpy.yml b/python/pickles-in-numpy.yaml similarity index 88% rename from python/pickles-in-numpy.yml rename to python/pickles-in-numpy.yaml index 57e8423..49d9eb2 100644 --- a/python/pickles-in-numpy.yml +++ b/python/pickles-in-numpy.yaml @@ -12,6 +12,8 @@ rules: confidence: MEDIUM likelihood: MEDIUM impact: HIGH + technology: [numpy] + description: "Potential arbitrary code execution from `NumPy` functions reliant on pickling" references: - https://blog.trailofbits.com/2021/03/15/never-a-dill-moment-exploiting-machine-learning-pickle-files/ diff --git a/python/pickles-in-pandas.yml b/python/pickles-in-pandas.yaml similarity index 88% rename from python/pickles-in-pandas.yml rename to python/pickles-in-pandas.yaml index cf08859..40df088 100644 --- a/python/pickles-in-pandas.yml +++ b/python/pickles-in-pandas.yaml @@ -12,6 +12,8 @@ rules: confidence: MEDIUM likelihood: MEDIUM impact: HIGH + technology: [pandas] + description: "Potential arbitrary code execution from `Pandas` functions reliant on pickling" references: - https://blog.trailofbits.com/2021/03/15/never-a-dill-moment-exploiting-machine-learning-pickle-files/ diff --git a/python/pickles-in-torch-distributed.py b/python/pickles-in-pytorch-distributed.py similarity index 69% rename from python/pickles-in-torch-distributed.py rename to python/pickles-in-pytorch-distributed.py index 2724e1c..077b2f7 100644 --- a/python/pickles-in-torch-distributed.py +++ b/python/pickles-in-pytorch-distributed.py @@ -5,21 +5,21 @@ else: objects = [None, None] -# ruleid: pickles-in-torch-distributed +# ruleid: pickles-in-pytorch-distributed dist.broadcast_object_list(objects, src=0) -# ruleid: pickles-in-torch-distributed +# ruleid: pickles-in-pytorch-distributed dist.all_gather_object(output, gather_objects[dist.get_rank()]) -# ruleid: pickles-in-torch-distributed +# ruleid: pickles-in-pytorch-distributed dist.gather_object( gather_objects[dist.get_rank()], output if dist.get_rank() == 0 else None, dst=0 ) -# ruleid: pickles-in-torch-distributed +# ruleid: pickles-in-pytorch-distributed dist.scatter_object_list(output_list, objects, src=0) -# ok: pickles-in-torch-distributed +# ok: pickles-in-pytorch-distributed dist.scatter(output_list, objects, src=0) diff --git a/python/pickles-in-torch-distributed.yml b/python/pickles-in-pytorch-distributed.yaml similarity index 80% rename from python/pickles-in-torch-distributed.yml rename to python/pickles-in-pytorch-distributed.yaml index 4ffbe85..d20a5fe 100644 --- a/python/pickles-in-torch-distributed.yml +++ b/python/pickles-in-pytorch-distributed.yaml @@ -1,5 +1,5 @@ rules: -- id: pickles-in-torch-distributed +- id: pickles-in-pytorch-distributed message: >- Functions reliant on pickle can result in arbitrary code execution languages: [python] @@ -11,6 +11,8 @@ rules: confidence: MEDIUM likelihood: MEDIUM impact: HIGH + technology: [pytorch] + description: "Potential arbitrary code execution from `PyTorch.Distributed` functions reliant on pickling" references: - https://blog.trailofbits.com/2021/03/15/never-a-dill-moment-exploiting-machine-learning-pickle-files/ diff --git a/python/pickles-in-pytorch.yml b/python/pickles-in-pytorch.yaml similarity index 87% rename from python/pickles-in-pytorch.yml rename to python/pickles-in-pytorch.yaml index 8ff5d2d..a12af33 100644 --- a/python/pickles-in-pytorch.yml +++ b/python/pickles-in-pytorch.yaml @@ -12,6 +12,8 @@ rules: confidence: MEDIUM likelihood: MEDIUM impact: HIGH + technology: [pytorch] + description: "Potential arbitrary code execution from `PyTorch` functions reliant on pickling" references: - https://blog.trailofbits.com/2021/03/15/never-a-dill-moment-exploiting-machine-learning-pickle-files/ diff --git a/python/torch-classes-load-library.py b/python/pytorch-classes-load-library.py similarity index 59% rename from python/torch-classes-load-library.py rename to python/pytorch-classes-load-library.py index 45db056..46292b0 100644 --- a/python/torch-classes-load-library.py +++ b/python/pytorch-classes-load-library.py @@ -2,26 +2,26 @@ path = "lib.so" -# ok: torch-classes-load-library +# ok: pytorch-classes-load-library torch.classes.load_library(path) -# ruleid: torch-classes-load-library +# ruleid: pytorch-classes-load-library torch.classes.load_library(input()) def test1(p): - # ruleid: torch-classes-load-library + # ruleid: pytorch-classes-load-library torch.classes.load_library(input() + p) def test2(p): - # ok: torch-classes-load-library + # ok: pytorch-classes-load-library load_library(p) def test3(p): from torch.classes import load_library - # ruleid: torch-classes-load-library + # ruleid: pytorch-classes-load-library load_library(p) def test4(p): from torch.classes import load_library - # ok: torch-classes-load-library + # ok: pytorch-classes-load-library load_library(path) diff --git a/python/torch-classes-load-library.yml b/python/pytorch-classes-load-library.yaml similarity index 78% rename from python/torch-classes-load-library.yml rename to python/pytorch-classes-load-library.yaml index f81236d..b98e78f 100644 --- a/python/torch-classes-load-library.yml +++ b/python/pytorch-classes-load-library.yaml @@ -1,5 +1,5 @@ rules: -- id: torch-classes-load-library +- id: pytorch-classes-load-library message: Loading custom operator libraries can result in arbitrary code execution languages: [python] severity: ERROR @@ -10,6 +10,8 @@ rules: confidence: MEDIUM likelihood: MEDIUM impact: HIGH + technology: [pytorch] + description: "Potential arbitrary code execution from `PyTorch` library loading" references: - https://pytorch.org/tutorials/advanced/torch_script_custom_classes.html diff --git a/python/torch-package.py b/python/pytorch-package.py similarity index 51% rename from python/torch-package.py rename to python/pytorch-package.py index 3ee3ea3..3055a51 100644 --- a/python/torch-package.py +++ b/python/pytorch-package.py @@ -1,11 +1,11 @@ -# ruleid: torch-package +# ruleid: pytorch-package import torch.package -# ruleid: torch-package +# ruleid: pytorch-package from torch import package -# ruleid: torch-package +# ruleid: pytorch-package import torch.package as tp -# ok: torch-package +# ok: pytorch-package import torchx.package as tp diff --git a/python/torch-package.yml b/python/pytorch-package.yaml similarity index 81% rename from python/torch-package.yml rename to python/pytorch-package.yaml index ccb02ae..f347eca 100644 --- a/python/torch-package.yml +++ b/python/pytorch-package.yaml @@ -1,5 +1,5 @@ rules: -- id: torch-package +- id: pytorch-package message: Avoid importing torch.package - it can result in arbitrary code execution via pickle languages: [python] severity: WARNING @@ -10,6 +10,8 @@ rules: confidence: LOW likelihood: MEDIUM impact: HIGH + technology: [pytorch] + description: "Potential arbitrary code execution from `torch.package`" references: - https://pytorch.org/docs/1.13/package.html#torch-package - https://blog.trailofbits.com/2021/03/15/never-a-dill-moment-exploiting-machine-learning-pickle-files/ diff --git a/python/torch-tensor.py b/python/pytorch-tensor.py similarity index 57% rename from python/torch-tensor.py rename to python/pytorch-tensor.py index 5b849e2..992f778 100644 --- a/python/torch-tensor.py +++ b/python/pytorch-tensor.py @@ -1,21 +1,21 @@ import torch -# ruleid: torch-tensor +# ruleid: pytorch-tensor y = torch.Tensor(x) def foo(x): - # ruleid: torch-tensor + # ruleid: pytorch-tensor return torch.Tensor(x) import torch as t -# ruleid: torch-tensor +# ruleid: pytorch-tensor y = t.Tensor(x) def foo(x): - # ruleid: torch-tensor + # ruleid: pytorch-tensor return t.Tensor(x) -# ok: torch-tensor +# ok: pytorch-tensor y = torch.tensor([0, 1]) \ No newline at end of file diff --git a/python/torch-tensor.yml b/python/pytorch-tensor.yaml similarity index 70% rename from python/torch-tensor.yml rename to python/pytorch-tensor.yaml index a10e69e..e8b1d60 100644 --- a/python/torch-tensor.yml +++ b/python/pytorch-tensor.yaml @@ -1,5 +1,5 @@ rules: -- id: torch-tensor +- id: pytorch-tensor message: Avoid using `torch.Tensor()` to directly create a tensor for efficiency and proper parsing languages: [python] severity: WARNING @@ -7,6 +7,8 @@ rules: category: performance subcategory: [audit] confidence: HIGH + technology: [pytorch] + description: "Possible parsing issues and inefficiency from improper tensor creation" references: - https://pytorch.org/docs/stable/tensors.html diff --git a/python/scikit-joblib-load.yml b/python/scikit-joblib-load.yaml similarity index 81% rename from python/scikit-joblib-load.yml rename to python/scikit-joblib-load.yaml index 6735acf..574c50f 100644 --- a/python/scikit-joblib-load.yml +++ b/python/scikit-joblib-load.yaml @@ -12,6 +12,8 @@ rules: confidence: MEDIUM likelihood: MEDIUM impact: HIGH + technology: [scikit] + description: "Potential arbitrary code execution from `SciKit.Joblib` functions reliant on pickling" references: - https://scikit-learn.org/stable/model_persistence.html diff --git a/python/tarfile-extractall-traversal.yml b/python/tarfile-extractall-traversal.yaml similarity index 90% rename from python/tarfile-extractall-traversal.yml rename to python/tarfile-extractall-traversal.yaml index 86240c9..eb617c7 100644 --- a/python/tarfile-extractall-traversal.yml +++ b/python/tarfile-extractall-traversal.yaml @@ -11,6 +11,8 @@ rules: confidence: MEDIUM likelihood: MEDIUM impact: MEDIUM + technology: [--no-technology--] + description: "Potential path traversal in call to `extractall` for a `tarfile`" references: - https://docs.python.org/3/library/tarfile.html#tarfile.TarFile.extractall diff --git a/python/tensorflow-load-library.yml b/python/tensorflow-load-library.yaml similarity index 86% rename from python/tensorflow-load-library.yml rename to python/tensorflow-load-library.yaml index 01f4811..6bdd814 100644 --- a/python/tensorflow-load-library.yml +++ b/python/tensorflow-load-library.yaml @@ -10,6 +10,8 @@ rules: confidence: MEDIUM likelihood: MEDIUM impact: HIGH + technology: [tensorflow] + description: "Potential arbitrary code execution from `TensorFlow` library loading" references: - https://www.tensorflow.org/api_docs/python/tf/load_library - https://www.tensorflow.org/api_docs/python/tf/load_op_library diff --git a/python/waiting-with-torch-distributed.py b/python/waiting-with-pytorch-distributed.py similarity index 73% rename from python/waiting-with-torch-distributed.py rename to python/waiting-with-pytorch-distributed.py index 5f00d06..6f9dec8 100644 --- a/python/waiting-with-torch-distributed.py +++ b/python/waiting-with-pytorch-distributed.py @@ -6,19 +6,19 @@ def run(rank, size): req = None if rank == 0: tensor += 1 - # ok: waiting-with-torch-distributed + # ok: waiting-with-pytorch-distributed req = dist.isend(tensor=tensor, dst=1) print('Rank 0 started sending') else: - # ok: waiting-with-torch-distributed + # ok: waiting-with-pytorch-distributed req = dist.irecv(tensor=tensor, src=0) print('Rank 1 started receiving') req.wait() print('Rank ', rank, ' has data ', tensor[0]) - # ruleid: waiting-with-torch-distributed + # ruleid: waiting-with-pytorch-distributed req = dist.isend(tensor=tensor, dst=1) - # ruleid: waiting-with-torch-distributed + # ruleid: waiting-with-pytorch-distributed req = dist.irecv(tensor=tensor, src=0) return req diff --git a/python/waiting-with-torch-distributed.yml b/python/waiting-with-pytorch-distributed.yaml similarity index 68% rename from python/waiting-with-torch-distributed.yml rename to python/waiting-with-pytorch-distributed.yaml index e3f4fcc..0e5f1ff 100644 --- a/python/waiting-with-torch-distributed.yml +++ b/python/waiting-with-pytorch-distributed.yaml @@ -1,5 +1,5 @@ rules: -- id: waiting-with-torch-distributed +- id: waiting-with-pytorch-distributed message: Not waiting for requests is a source of undefined behavior languages: [python] severity: WARNING @@ -10,6 +10,10 @@ rules: confidence: MEDIUM likelihood: LOW impact: LOW + technology: [pytorch] + description: "Possible `PyTorch` undefined behavior when not waiting for requests" + references: + - https://pytorch.org/docs/stable/distributed.html#torch.distributed.isend patterns: - pattern-either: diff --git a/rs/panic-in-function-returning-result.yml b/rs/panic-in-function-returning-result.yaml similarity index 86% rename from rs/panic-in-function-returning-result.yml rename to rs/panic-in-function-returning-result.yaml index fca5c6a..8fedd5e 100644 --- a/rs/panic-in-function-returning-result.yml +++ b/rs/panic-in-function-returning-result.yaml @@ -11,6 +11,10 @@ rules: confidence: HIGH likelihood: MEDIUM impact: LOW + technology: [--no-technology--] + description: "Calling `unwrap` or `expect` in a function returning a `Result`" + references: + - https://doc.rust-lang.org/std/result/ patterns: - pattern-either: diff --git a/rules_table_generator.py b/rules_table_generator.py new file mode 100644 index 0000000..fa99e79 --- /dev/null +++ b/rules_table_generator.py @@ -0,0 +1,64 @@ +#!/usr/bin/env python + +from pathlib import Path +from urllib.parse import quote +import yaml +import sys + + +LANGUAGES = ['go', 'python', 'rs'] +IMPACT_MAP = { + 'LOW': "🟩", + 'MEDIUM': "🟧", + 'HIGH': "🟥", + None: "🌫️", +} +CONFIDENCE_MAP = { + 'LOW': "🌕", + 'MEDIUM': "🌗", + 'HIGH': "🌘", + None: "", +} + +def main(): + for language in LANGUAGES: + + rules_for_lang = [] + for rule in Path(language).rglob('*.yaml'): + try: + rule_data = yaml.safe_load(rule.open()) + except yaml.YAMLError as err: + print(f"Error reading {rule} - {err}", file=sys.stderr) + continue + + if rule_data is None or 'rules' not in rule_data: + print(f"Error for {rule} - missing rules", file=sys.stderr) + continue + rule_data = rule_data['rules'] + if len(rule_data) > 1: + print(f"Error for {rule} - only one rule per file is supported", file=sys.stderr) + continue + if len(rule_data) == 0: + print(f"Error for {rule} - missing any rule", file=sys.stderr) + continue + rule_data = rule_data[0] + + rule_link = '.'.join(rule.parts[1:-1] + (rule.stem,)) + rule_link = '.'.join(("r/trailofbits", language, rule_link, rule_data['id'])) + rules_for_lang.append((rule, rule_data, rule_link)) + + if len(rules_for_lang) > 0: + print(f"### {language}") + print("") + print("| ID | Playground | Impact | Confidence | Description |") + print("| -- | :--------: | :----: | :--------: | ----------- |") + + for rule, rule_data, rule_link in sorted(rules_for_lang): + rule_meta = rule_data.get('metadata', {}) + print(f"| [{rule_data['id']}]({rule}) | [🛝🔗](https://semgrep.dev/playground/{quote(rule_link)}) | {IMPACT_MAP[rule_meta.get('impact')]} | {CONFIDENCE_MAP[rule_meta.get('confidence')]} | {rule_meta.get('description', '')} |") + + print("\n") + + +if __name__ == "__main__": + main()