diff --git a/README.md b/README.md index 041a6c5..ca9c23d 100644 --- a/README.md +++ b/README.md @@ -28,6 +28,8 @@ picklescan --path downloads picklescan --url https://huggingface.co/sshleifer/tiny-distilbert-base-cased-distilled-squad/resolve/main/pytorch_model.bin ``` +To scan Numpy's `.npy` files, pip install the `numpy` package first. + The scanner exit status codes are (a-la [ClamAV](https://www.clamav.net/)): - `0`: scan did not find malware - `1`: scan found malware diff --git a/conda.test.yaml b/conda.test.yaml index 81e28bc..8f91c82 100644 --- a/conda.test.yaml +++ b/conda.test.yaml @@ -2,7 +2,7 @@ name: picklescan-test channels: - nodefaults dependencies: -- python=3.7 +- python=3.9 - pip - pip: - picklescan==0.0.2 diff --git a/conda.yaml b/conda.yaml index 6ccbc14..593a620 100644 --- a/conda.yaml +++ b/conda.yaml @@ -2,7 +2,7 @@ name: picklescan channels: - nodefaults dependencies: -- python=3.7 +- python=3.9 - pip - pip: - -r requirements.txt diff --git a/setup.cfg b/setup.cfg index 3477a2a..2c47364 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ [metadata] name = picklescan -version = 0.0.8 +version = 0.0.9 author = Matthieu Maitre author_email = mmaitre314@users.noreply.github.com description = Security scanner detecting Python Pickle files performing suspicious actions diff --git a/src/picklescan/scanner.py b/src/picklescan/scanner.py index 2908fcc..0b1bd8e 100644 --- a/src/picklescan/scanner.py +++ b/src/picklescan/scanner.py @@ -11,13 +11,6 @@ import urllib.parse import zipfile -from numpy.lib.format import ( - MAGIC_PREFIX as NUMPY_MAGIC_PREFIX, - _check_version, - _read_array_header, - read_magic, -) - from .torch import ( get_magic_number, InvalidMagicError, @@ -289,10 +282,14 @@ def scan_zip_bytes(data: IO[bytes], file_id) -> ScanResult: def scan_numpy(data: IO[bytes], file_id) -> ScanResult: + + # Delay import to avoid dependency on NumPy + import numpy as np + # Code to distinguish from NumPy binary files and pickles. _ZIP_PREFIX = b"PK\x03\x04" _ZIP_SUFFIX = b"PK\x05\x06" # empty zip files start with this - N = len(NUMPY_MAGIC_PREFIX) + N = len(np.lib.format.MAGIC_PREFIX) magic = data.read(N) # If the file size is less than N, we need to make sure not # to seek past the beginning of the file @@ -300,12 +297,12 @@ def scan_numpy(data: IO[bytes], file_id) -> ScanResult: if magic.startswith(_ZIP_PREFIX) or magic.startswith(_ZIP_SUFFIX): # .npz file raise NotImplementedError("Scanning of .npz files is not implemented yet") - elif magic == NUMPY_MAGIC_PREFIX: + elif magic == np.lib.format.MAGIC_PREFIX: # .npy file - version = read_magic(data) - _check_version(version) - _, _, dtype = _read_array_header(data, version) + version = np.lib.format.read_magic(data) + np.lib.format._check_version(version) + _, _, dtype = np.lib.format._read_array_header(data, version) if dtype.hasobject: return scan_pickle_bytes(data, file_id)