Skip to content

Commit

Permalink
Make numpy optional (#12)
Browse files Browse the repository at this point in the history
  • Loading branch information
mmaitre314 committed Apr 25, 2023
1 parent c97c7d2 commit 41a70a5
Show file tree
Hide file tree
Showing 5 changed files with 14 additions and 15 deletions.
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ picklescan --path downloads
picklescan --url https://huggingface.co/sshleifer/tiny-distilbert-base-cased-distilled-squad/resolve/main/pytorch_model.bin
```

To scan Numpy's `.npy` files, pip install the `numpy` package first.

The scanner exit status codes are (a-la [ClamAV](https://www.clamav.net/)):
- `0`: scan did not find malware
- `1`: scan found malware
Expand Down
2 changes: 1 addition & 1 deletion conda.test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ name: picklescan-test
channels:
- nodefaults
dependencies:
- python=3.7
- python=3.9
- pip
- pip:
- picklescan==0.0.2
2 changes: 1 addition & 1 deletion conda.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ name: picklescan
channels:
- nodefaults
dependencies:
- python=3.7
- python=3.9
- pip
- pip:
- -r requirements.txt
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[metadata]
name = picklescan
version = 0.0.8
version = 0.0.9
author = Matthieu Maitre
author_email = [email protected]
description = Security scanner detecting Python Pickle files performing suspicious actions
Expand Down
21 changes: 9 additions & 12 deletions src/picklescan/scanner.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,6 @@
import urllib.parse
import zipfile

from numpy.lib.format import (
MAGIC_PREFIX as NUMPY_MAGIC_PREFIX,
_check_version,
_read_array_header,
read_magic,
)

from .torch import (
get_magic_number,
InvalidMagicError,
Expand Down Expand Up @@ -289,23 +282,27 @@ def scan_zip_bytes(data: IO[bytes], file_id) -> ScanResult:


def scan_numpy(data: IO[bytes], file_id) -> ScanResult:

# Delay import to avoid dependency on NumPy
import numpy as np

# Code to distinguish from NumPy binary files and pickles.
_ZIP_PREFIX = b"PK\x03\x04"
_ZIP_SUFFIX = b"PK\x05\x06" # empty zip files start with this
N = len(NUMPY_MAGIC_PREFIX)
N = len(np.lib.format.MAGIC_PREFIX)
magic = data.read(N)
# If the file size is less than N, we need to make sure not
# to seek past the beginning of the file
data.seek(-min(N, len(magic)), 1) # back-up
if magic.startswith(_ZIP_PREFIX) or magic.startswith(_ZIP_SUFFIX):
# .npz file
raise NotImplementedError("Scanning of .npz files is not implemented yet")
elif magic == NUMPY_MAGIC_PREFIX:
elif magic == np.lib.format.MAGIC_PREFIX:
# .npy file

version = read_magic(data)
_check_version(version)
_, _, dtype = _read_array_header(data, version)
version = np.lib.format.read_magic(data)
np.lib.format._check_version(version)
_, _, dtype = np.lib.format._read_array_header(data, version)

if dtype.hasobject:
return scan_pickle_bytes(data, file_id)
Expand Down

0 comments on commit 41a70a5

Please sign in to comment.