Skip to content

Commit

Permalink
feat: add purl2cpe as a data source (#4179)
Browse files Browse the repository at this point in the history
* feat: added purl2cpe as a data source
* feat: Separated data source integration from previous PR

Signed-off-by: Meet Soni <[email protected]>
  • Loading branch information
inosmeet authored Jun 11, 2024
1 parent 49883ec commit 94e5a2d
Show file tree
Hide file tree
Showing 4 changed files with 86 additions and 5 deletions.
5 changes: 5 additions & 0 deletions cve_bin_tool/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@
gad_source,
nvd_source,
osv_source,
purl2cpe_source,
redhat_source,
)
from cve_bin_tool.error_handler import (
Expand Down Expand Up @@ -722,6 +723,10 @@ def main(argv=None):
source_curl = curl_source.Curl_Source()
enabled_sources.append(source_curl)

if "PURL2CPE" not in disabled_sources:
source_purl2cpe = purl2cpe_source.PURL2CPE_Source()
enabled_sources.append(source_purl2cpe)

if "NVD" not in disabled_sources:
source_nvd = nvd_source.NVD_Source(
nvd_type=nvd_type,
Expand Down
2 changes: 2 additions & 0 deletions cve_bin_tool/cvedb.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
gad_source,
nvd_source,
osv_source,
purl2cpe_source,
)
from cve_bin_tool.error_handler import ERROR_CODES, CVEDBError, ErrorMode, SigningError
from cve_bin_tool.fetch_json_db import Fetch_JSON_DB
Expand Down Expand Up @@ -57,6 +58,7 @@ class CVEDB:
curl_source.Curl_Source,
osv_source.OSV_Source,
gad_source.GAD_Source,
purl2cpe_source.PURL2CPE_Source,
nvd_source.NVD_Source, # last to avoid data overwrites
]

Expand Down
76 changes: 76 additions & 0 deletions cve_bin_tool/data_sources/purl2cpe_source.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
from __future__ import annotations

import zipfile
from io import BytesIO
from pathlib import Path

import aiohttp

from cve_bin_tool.data_sources import DISK_LOCATION_DEFAULT, Data_Source
from cve_bin_tool.error_handler import ErrorMode
from cve_bin_tool.log import LOGGER
from cve_bin_tool.version import HTTP_HEADERS


class PURL2CPE_Source(Data_Source):
"""Class to retrieve purl-cpe mapping database (PURL2CPE)"""

SOURCE = "PURL2CPE"
CACHEDIR = DISK_LOCATION_DEFAULT
LOGGER = LOGGER.getChild("CVEDB")
PURL2CPE_URL = "https://github.com/scanoss/purl2cpe/raw/main/purl2cpe.db.zip"

def __init__(
self, error_mode: ErrorMode = ErrorMode.TruncTrace, incremental_update=False
):
self.cachedir = self.CACHEDIR
self.purl2cpe_path = str(Path(self.cachedir) / "purl2cpe")
self.source_name = self.SOURCE
self.error_mode = error_mode
self.incremental_update = incremental_update
self.purl2cpe_url = self.PURL2CPE_URL
self.session = None

async def fetch_cves(self):
"""Fetches PURL2CPE database and places it in purl2cpe_path."""
LOGGER.info("Getting PURL2CPE data...")

if not Path(self.purl2cpe_path).exists():
Path(self.purl2cpe_path).mkdir()

if not self.session:
connector = aiohttp.TCPConnector(limit_per_host=10)
self.session = aiohttp.ClientSession(
connector=connector, headers=HTTP_HEADERS, trust_env=True
)

try:
response = await self.session.get(self.purl2cpe_url)
if response.status == 200:
data = await response.read()
with zipfile.ZipFile(BytesIO(data), "r") as zip_ref:
zip_ref.extractall(self.purl2cpe_path)
else:
LOGGER.debug(f"Failed to download file. Status code: {response.status}")

except Exception as e:
LOGGER.debug(f"Error fetching PURL2CPE data: {e}")

await self.session.close()
self.session = None

async def get_cve_data(self):
"""Fetches PURL2CPE Database."""
# skip if connection fails
try:
await self.fetch_cves()
except Exception as e:
LOGGER.debug(f"Error while fetching PURL2CPE Data: {e}")
LOGGER.error("Unable to fetch PURL2CPE Data, skipping PURL2CPE.")
if self.session is not None:
await self.session.close()
return (list(), list()), self.source_name

if self.session is not None:
await self.session.close()
return (list(), list()), self.source_name
8 changes: 3 additions & 5 deletions cve_bin_tool/parsers/python.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,12 @@ class PythonRequirementsParser(Parser):

def __init__(self, cve_db, logger):
"""Initialize the python requirements file parser."""
self.purl_pkg_type = "pypi"
super().__init__(cve_db, logger)
self.purl_pkg_type = "pypi"

def generate_purl(self, product, vendor, qualifier={}, subpath=None):
"""Generates PURL after normalizing all components."""
product = re.sub(r"[^a-zA-Z0-9._-]", "", product).lower()
vendor = "UNKNOWN"

if not product:
return None
Expand Down Expand Up @@ -98,6 +97,7 @@ def run_checker(self, filename):
product = line["metadata"]["name"]
version = line["metadata"]["version"]
vendor = self.find_vendor(product, version)

if vendor is not None:
yield from vendor
self.logger.debug(f"Done scanning file: {self.filename}")
Expand All @@ -112,13 +112,12 @@ class PythonParser(Parser):

def __init__(self, cve_db, logger):
"""Initialize the python package metadata parser."""
self.purl_pkg_type = "pypi"
super().__init__(cve_db, logger)
self.purl_pkg_type = "pypi"

def generate_purl(self, product, vendor, qualifier={}, subpath=None):
"""Generates PURL after normalizing all components."""
product = re.sub(r"[^a-zA-Z0-9._-]", "", product).lower()
vendor = "UNKNOWN"

if not product:
return None
Expand Down Expand Up @@ -154,7 +153,6 @@ def run_checker(self, filename):
yield ScanInfo(
ProductInfo(vendor, product, version, location), file_path
)

# There are packages with a METADATA file in them containing different data from what the tool expects
except AttributeError:
self.logger.debug(f"{filename} is an invalid METADATA/PKG-INFO")
Expand Down

0 comments on commit 94e5a2d

Please sign in to comment.