Skip to content

Commit

Permalink
feat: add support for generation of SBOM from requirements.txt file
Browse files Browse the repository at this point in the history
  • Loading branch information
anthonyharrison committed Aug 1, 2024
1 parent 0940f38 commit f015fb8
Show file tree
Hide file tree
Showing 3 changed files with 129 additions and 83 deletions.
11 changes: 10 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ It identifies all of the dependent components which are
explicity defined (typically via requirements.txt file) or implicitly as a
hidden dependency.

It can also be used to create a SBOM from a requirements.txt file. In this case no transitive components will be identified.

It is intended to be used as part of a continuous integration system to enable accurate records of SBOMs to be maintained
and also to support subsequent audit needs to determine if a particular component (and version) has been used.

Expand Down Expand Up @@ -57,6 +59,8 @@ options:
Input:
-m MODULE, --module MODULE
identity of python module
-r REQUIREMENT, --requirement REQUIREMENT
name of requirements.txt file
--system include all installed python modules within system
--exclude-license suppress detecting the license of components
--include-file include reporting files associated with module
Expand All @@ -76,7 +80,10 @@ Output:
## Operation
The `--module` option is used to identify the Python module. The `--system` option is used to indicate that the SBOM is to include all installed
Python modules. Either `--module` or `--system` must be specified
Python modules. The `--requirement` option is used to create an SBOM from a requirements.txt file. In this case, no transitive dependencies will be
identified if this option is specified.
One of `--module`, `--requirement` or `--system` must be specified. If multiple options are specified, the order of priority is `--module`, `--system` and `--requirement`.
The `--sbom` option is used to specify the format of the generated SBOM (the default is SPDX). The `--format` option
can be used to specify the formatting of the SBOM (the default is Tag Value format for a SPDX SBOM). JSON format is supported for both
Expand Down Expand Up @@ -108,6 +115,8 @@ This tool is meant to support software development and security audit functions.
which is provided to the tool. Unfortunately, the tool is unable to determine the validity or completeness of such a SBOM file; users of the tool
are therefore reminded that they should assert the quality of any data which is provided to the tool.
The `--requirement` option will only process modules in the file which have pinned versions. Any modules which not specify a version will be ignored.
When processing and validating licenses, the application will use a set of synonyms to attempt to map some license identifiers to the correct [SPDX License Identifiers](https://spdx.org/licenses/). However, the
user of the tool is reminded that they should assert the quality of any data which is provided by the tool particularly where the license identifier has been modified.
Expand Down
11 changes: 11 additions & 0 deletions sbom4python/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,13 @@ def main(argv=None):
default="",
help="identity of python module",
)
input_group.add_argument(
"-r",
"--requirement",
action="store",
default="",
help="name of requirements.txt file",
)
input_group.add_argument(
"--system",
action="store_true",
Expand Down Expand Up @@ -100,6 +107,7 @@ def main(argv=None):

defaults = {
"module": "",
"requirement": "",
"include_file": False,
"exclude_license": False,
"system": False,
Expand Down Expand Up @@ -129,6 +137,7 @@ def main(argv=None):
print("Exclude Licences:", args["exclude_license"])
print("Include Files:", args["include_file"])
print("Module", module_name)
print("Requirements file", args["requirement"])
print("System", args["system"])
print("SBOM type:", args["sbom"])
print("Format:", bom_format)
Expand All @@ -144,6 +153,8 @@ def main(argv=None):
sbom_scan.process_python_module(module_name)
elif args["system"]:
sbom_scan.process_system()
elif len(args["requirement"]) > 0:
sbom_scan.process_requirements(args["requirement"])
else:
print("[ERROR] Nothing to process")
return -1
Expand Down
190 changes: 108 additions & 82 deletions sbom4python/scanner.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@
import unicodedata

from lib4package.metadata import Metadata
from lib4sbom.data.package import SBOMPackage
from lib4sbom.data.document import SBOMDocument
from lib4sbom.data.package import SBOMPackage
from lib4sbom.data.relationship import SBOMRelationship
from lib4sbom.license import LicenseScanner
from sbom4files.filescanner import FileScanner
Expand All @@ -20,7 +20,9 @@ class SBOMScanner:
Simple SBOM Generator for Python module.
"""

def __init__(self, debug, include_file=False, exclude_license=False, lifecycle="build"):
def __init__(
self, debug, include_file=False, exclude_license=False, lifecycle="build"
):
self.record = []
self.debug = debug
self.include_file = include_file
Expand All @@ -36,7 +38,8 @@ def __init__(self, debug, include_file=False, exclude_license=False, lifecycle="
self.parent = "NOT_DEFINED"
self.package_metadata = Metadata("python", debug=self.debug)
self.python_version = platform.python_version()
self.sbom_document.set_value("lifecycle", lifecycle)
self.set_lifecycle(lifecycle)
self.metadata = {}

def set_parent(self, module):
self.parent = f"Python-{module}"
Expand All @@ -49,6 +52,9 @@ def run_program(self, command_line):
res = subprocess.run(params, capture_output=True, text=True)
return res.stdout.splitlines()

def set_lifecycle(self, lifecycle):
self.sbom_document.set_value("lifecycle", lifecycle)

def _format_supplier(self, supplier_info, include_email=True):
# See https://stackoverflow.com/questions/1207457/convert-a-unicode-string-to-a-string-in-python-containing-extra-symbols
# And convert byte object to a string
Expand All @@ -75,6 +81,82 @@ def _format_supplier(self, supplier_info, include_email=True):
supplier = supplier + "(" + emails[-1] + ")"
return re.sub(" +", " ", supplier.strip())

def _create_package(self, package, version, parent="-"):
self.sbom_package.initialise()
self.package_metadata.get_package(package)
self.sbom_package.set_name(package)
self.sbom_package.set_property("language", "Python")
self.sbom_package.set_property("python_version", self.python_version)
self.sbom_package.set_version(version)
if parent == "-":
self.sbom_package.set_type("application")
self.sbom_package.set_filesanalysis(self.include_file)
license = self.license.find_license(self.get("License"))
# Report license as reported by metadata. If not valid SPDX, report NOASSERTION
if license != self.get("License"):
self.sbom_package.set_licensedeclared("NOASSERTION")
else:
self.sbom_package.set_licensedeclared(license)
# Report license if valid SPDX identifier
self.sbom_package.set_licenseconcluded(license)
# Add comment if metadata license was modified
license_comment = ""
if len(self.get("License")) > 0 and license != self.get("License"):
license_comment = f"{self.get('Name')} declares {self.get('License')} which is not currently a valid SPDX License identifier or expression."
# Report if license is deprecated
if self.license.deprecated(license):
deprecated_comment = f"{license} is now deprecated."
if len(license_comment) > 0:
license_comment = f"{license_comment} {deprecated_comment}"
else:
license_comment = deprecated_comment
if len(license_comment) > 0:
self.sbom_package.set_licensecomments(license_comment)
supplier = self.get("Author") + " " + self.get("Author-email")
if len(supplier.split()) > 3:
self.sbom_package.set_supplier(
"Organization", self._format_supplier(supplier)
)
elif len(supplier) > 1:
self.sbom_package.set_supplier("Person", self._format_supplier(supplier))
else:
self.sbom_package.set_supplier("UNKNOWN", "NOASSERTION")
if self.get("Home-page") != "":
self.sbom_package.set_homepage(self.get("Home-page"))
if self.get("Summary") != "":
self.sbom_package.set_summary(self.get("Summary"))
self.sbom_package.set_downloadlocation(
f'https://pypi.org/project/{self.get("Name")}/{version}'
)
# External references
self.sbom_package.set_purl(f"pkg:pypi/{package}@{version}")
if len(supplier) > 1:
component_supplier = self._format_supplier(supplier, include_email=False)
cpe_version = version.replace(":", "\\:")
self.sbom_package.set_cpe(
f"cpe:2.3:a:{component_supplier.replace(' ', '_').lower()}:{package}:{cpe_version}:*:*:*:*:*:*:*"
)
checksum = self.package_metadata.get_checksum(version=version)
if checksum is not None:
self.sbom_package.set_checksum("SHA1", checksum)
# Store package data
self.sbom_packages[
(
self.sbom_package.get_name(),
self.sbom_package.get_value("version"),
)
] = self.sbom_package.get_package()

def _create_relationship(self, package, parent="-"):
self.sbom_relationship.initialise()
if parent != "-":
self.sbom_relationship.set_relationship(
parent.lower(), "DEPENDS_ON", package
)
else:
self.sbom_relationship.set_relationship(self.parent, "DESCRIBES", package)
self.sbom_relationships.append(self.sbom_relationship.get_relationship())

def process_module(self, module, parent="-"):
if self.debug:
print(f"Process Module {module}")
Expand All @@ -93,91 +175,14 @@ def process_module(self, module, parent="-"):
if self.debug:
print(f"Metadata for {module}\n{self.metadata}")

self.sbom_package.initialise()
package = self.get("Name").lower().replace("_", "-")
version = self.get("Version")
if (package, version) in self.sbom_packages:
if self.debug:
print(f"Already processed {package} {version}")
else:
self.sbom_package.set_name(package)
self.sbom_package.set_property("language", "Python")
self.sbom_package.set_property("python_version", self.python_version)
self.sbom_package.set_version(version)
if parent == "-":
self.sbom_package.set_type("application")
self.sbom_package.set_filesanalysis(self.include_file)
license = self.license.find_license(self.get("License"))
# Report license as reported by metadata. If not valid SPDX, report NOASSERTION
if license != self.get("License"):
self.sbom_package.set_licensedeclared("NOASSERTION")
else:
self.sbom_package.set_licensedeclared(license)
# Report license if valid SPDX identifier
self.sbom_package.set_licenseconcluded(license)
# Add comment if metadata license was modified
license_comment = ""
if len(self.get("License")) > 0 and license != self.get("License"):
license_comment = f"{self.get('Name')} declares {self.get('License')} which is not currently a valid SPDX License identifier or expression."
# Report if license is deprecated
if self.license.deprecated(license):
deprecated_comment = f"{license} is now deprecated."
if len(license_comment) > 0:
license_comment = f"{license_comment} {deprecated_comment}"
else:
license_comment = deprecated_comment
if len(license_comment) > 0:
self.sbom_package.set_licensecomments(license_comment)
supplier = self.get("Author") + " " + self.get("Author-email")
if len(supplier.split()) > 3:
self.sbom_package.set_supplier(
"Organization", self._format_supplier(supplier)
)
elif len(supplier) > 1:
self.sbom_package.set_supplier(
"Person", self._format_supplier(supplier)
)
else:
self.sbom_package.set_supplier("UNKNOWN", "NOASSERTION")
if self.get("Home-page") != "":
self.sbom_package.set_homepage(self.get("Home-page"))
if self.get("Summary") != "":
self.sbom_package.set_summary(self.get("Summary"))
self.sbom_package.set_downloadlocation(
f'https://pypi.org/project/{self.get("Name")}/{version}'
)
# External references
self.sbom_package.set_purl(f"pkg:pypi/{package}@{version}")
if len(supplier) > 1:
component_supplier = self._format_supplier(
supplier, include_email=False
)
cpe_version = version.replace(":", "\\:")
self.sbom_package.set_cpe(
f"cpe:2.3:a:{component_supplier.replace(' ', '_').lower()}:{package}:{cpe_version}:*:*:*:*:*:*:*"
)
self.package_metadata.get_package(package)
checksum = self.package_metadata.get_checksum(version=version)
if checksum is not None:
self.sbom_package.set_checksum("SHA1", checksum)
# Store package data
self.sbom_packages[
(
self.sbom_package.get_name(),
self.sbom_package.get_value("version"),
)
] = self.sbom_package.get_package()
# Add relationship
self.sbom_relationship.initialise()
if parent != "-":
self.sbom_relationship.set_relationship(
parent.lower(), "DEPENDS_ON", package
)
else:
self.sbom_relationship.set_relationship(
self.parent, "DESCRIBES", package
)
self.sbom_relationships.append(self.sbom_relationship.get_relationship())
self._create_package(package, version, parent)
self._create_relationship(package, parent)
if self.include_file:
directory_location = f'{self.get("Location")}/{self.get("Name").lower().replace("-","_")}'
file_dir = pathlib.Path(directory_location)
Expand Down Expand Up @@ -262,3 +267,24 @@ def process_system(self):
for module_name in modules:
if self.process_module(module_name):
self.analyze(self.get("Name"), self.get("Requires"))

def process_requirements(self, filename):
if len(filename) > 0:
# Check file exists
filePath = pathlib.Path(filename)
# Check path exists and is a valid file
if filePath.exists() and filePath.is_file():
with open(filename) as dir_file:
lines = dir_file.readlines()
self.set_lifecycle("pre-build")
for line in lines:
# Extract package and version
component = line.strip().split("==")
if len(component) == 2:
# Package and version found
package = component[0]
version = component[1]
if self.debug:
print(f"Processing {package} version {version}")
self._create_package(package, version)
self._create_relationship(package)

0 comments on commit f015fb8

Please sign in to comment.