diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index e359d177..717a00de 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -2,10 +2,16 @@
Changelog
2023-xx-xx
- Release 10.0.1
+ Release 10.1.0
* Fixed `transform` with nested list #531
* Added curl dependency in Dockerfile #532
+ * Introduce spdx_license_expression
+ * Ability to transform spdx license key from spdx_license_expression to
+ license_expression (i.e. Generate attribution with
+ spdx_license_expression) #513
+ * Ability to configure the proxy settings #533
+ * Fixed licenses issue #534
2023-08-20
Release 10.0.0
diff --git a/docs/source/reference.rst b/docs/source/reference.rst
index 48765718..3660bec8 100644
--- a/docs/source/reference.rst
+++ b/docs/source/reference.rst
@@ -83,8 +83,8 @@ Options
Purpose
-------
-Generate an attribution file which contains license information
-from the INPUT along with the license text.
+Generate an attribution file which contains license information from the INPUT
+along with the license text.
Assume the following:
@@ -421,6 +421,60 @@ Details
This option tells the tool to show all errors found.
The default behavior will only show 'CRITICAL', 'ERROR', and 'WARNING'
+Special Notes
+-------------
+If the input contains values for license_file, the tool will attempt to
+associate the license_file with the corresponding license_key.
+
+sample.csv
+
++----------------+------+---------------------+--------------+
+| about_resource | name | license_expression | license_file |
++================+======+=====================+==============+
+| /project/test.c| test.c | mit AND custom | custom.txt |
++----------------+------+---------------------+--------------+
+
+If the user does not utilize the **--fetch-license** option, the input will
+contain two license keys and one license file. In this scenario, the tool cannot
+determine which license key the license file is referencing. As a result, the
+license_file will be saved separately.
+
+i.e.
+
+ .. code-block:: none
+
+ about_resource: test.c
+ name: test.c
+ license_expression: mit AND custom
+ licenses:
+ - key: mit
+ name: mit
+ - key: custom
+ name: custom
+ - file: custom.txt
+
+On the other hand, if the user generates ABOUT files using the
+**--fetch-license** option, the MIT license will be retrieved. This will result
+in having one license key and one license file. In such cases, the tool will
+consider it a successful match.
+
+i.e.
+
+ .. code-block:: none
+
+ about_resource: test.c
+ name: test.c
+ license_expression: mit AND custom
+ licenses:
+ - key: mit
+ name: MIT License
+ file: mit.LICENSE
+ url: https://scancode-licensedb.aboutcode.org/mit.LICENSE
+ spdx_license_key: MIT
+ - key: custom
+ name: custom
+ file: custom.txt
+
gen_license
===========
@@ -780,3 +834,20 @@ version 32.0.0 or later. If you are using an earlier version of Scancode Toolkit
specifically version 31 or older, it will only be compatible with prior versions
of AboutCode Toolkit.
+
+Configure proxy
+---------------
+The `requests` library is used since AboutCode Toolkit version 10.1.0. To do the
+http request, users can set the standard environment variables **http_proxy**,
+**https_proxy**, **no_proxy**, **all_proxy** with the export statement
+
+i.e.
+
+ .. code-block:: none
+
+ $ export HTTP_PROXY="http://10.10.1.10:3128"
+ $ export HTTPS_PROXY="http://10.10.1.10:1080"
+ $ export ALL_PROXY="socks5://10.10.1.10:3434"
+
+See https://requests.readthedocs.io/en/latest/user/advanced/#proxies for
+references
diff --git a/src/attributecode/attrib.py b/src/attributecode/attrib.py
index 48a61095..6c0207a7 100644
--- a/src/attributecode/attrib.py
+++ b/src/attributecode/attrib.py
@@ -323,7 +323,7 @@ def generate_and_save(abouts, is_about_input, license_dict, output_location, sca
)
if rendering_error:
- errors.extend(rendering_error)
+ errors.append(rendering_error)
if rendered:
output_location = add_unc(output_location)
diff --git a/src/attributecode/model.py b/src/attributecode/model.py
index 13d97ab2..0ea0ae6d 100644
--- a/src/attributecode/model.py
+++ b/src/attributecode/model.py
@@ -55,6 +55,7 @@
from attributecode.util import csv
from attributecode.util import file_fields
from attributecode.util import filter_errors
+from attributecode.util import get_spdx_key_and_lic_key_from_licdb
from attributecode.util import is_valid_name
from attributecode.util import on_windows
from attributecode.util import norm
@@ -802,6 +803,7 @@ def set_standard_fields(self):
('license_name', ListField()),
('license_file', FileTextField()),
('license_url', UrlListField()),
+ ('spdx_license_expression', StringField()),
('spdx_license_key', ListField()),
('copyright', StringField()),
('notice_file', FileTextField()),
@@ -1222,6 +1224,13 @@ def dumps(self, licenses_dict=None):
else:
if field.value:
data[field.name] = field.value
+ # If there is no license_key value, parse the license_expression
+ # and get the parsed license key
+ if 'license_expression' in data:
+ if not license_key and data['license_expression']:
+ _spec_char, lic_list = parse_license_expression(
+ data['license_expression'])
+ license_key = lic_list
# Group the same license information in a list
# This `licenses_dict` is a dictionary with license key as the key and the
@@ -1244,20 +1253,35 @@ def dumps(self, licenses_dict=None):
lic_dict['spdx_license_key'] = spdx_lic_key
# Remove the license information if it has been handled
- lic_key_copy.remove(lic_key)
- if lic_name in license_name:
- license_name.remove(lic_name)
- if lic_url in license_url:
- license_url.remove(lic_url)
- if lic_filename in license_file:
- license_file.remove(lic_filename)
- if spdx_lic_key in spdx_license_key:
- spdx_license_key.remove(spdx_lic_key)
- lic_dict_list.append(lic_dict)
+ # The following condition is to check if license information
+ # has been fetched, the license key is invalid or custom if
+ # no value for lic_name
+ if lic_name:
+ lic_key_copy.remove(lic_key)
+ if lic_name in license_name:
+ license_name.remove(lic_name)
+ if lic_url in license_url:
+ license_url.remove(lic_url)
+ if lic_filename in license_file:
+ license_file.remove(lic_filename)
+ if spdx_lic_key in spdx_license_key:
+ spdx_license_key.remove(spdx_lic_key)
+ lic_dict_list.append(lic_dict)
# Handle license information that have not been handled.
- license_group = list(zip_longest(
- lic_key_copy, license_name, license_file, license_url, spdx_license_key))
+ # If the len of the lic_key is the same as the lic_file, the tool should
+ # assume the lic_file (custom license) is referring this specific lic_key
+ # otherwise, the tool shouldn't group them
+ if len(lic_key_copy) == len(license_file):
+ license_group = list(zip_longest(
+ lic_key_copy, license_name, license_file, license_url, spdx_license_key))
+ else:
+ license_group = list(zip_longest(
+ lic_key_copy, license_name, [], license_url, spdx_license_key))
+ # Add the unhandled_lic_file if any
+ if license_file:
+ for lic_file in license_file:
+ license_group.append((None, None, lic_file, None, None))
for lic_group in license_group:
lic_dict = {}
@@ -1278,15 +1302,15 @@ def dumps(self, licenses_dict=None):
lic_dict_list.append(lic_dict)
# Format the license information in the same order of the license expression
- if license_key:
- for key in license_key:
- for lic_dict in lic_dict_list:
- if key == lic_dict['key']:
- data.setdefault('licenses', []).append(lic_dict)
- break
- else:
+ for key in license_key:
for lic_dict in lic_dict_list:
- data.setdefault('licenses', []).append(lic_dict)
+ if key == lic_dict['key']:
+ data.setdefault('licenses', []).append(lic_dict)
+ lic_dict_list.remove(lic_dict)
+ break
+
+ for lic_dict in lic_dict_list:
+ data.setdefault('licenses', []).append(lic_dict)
return saneyaml.dump(data)
@@ -1764,6 +1788,7 @@ def pre_process_and_fetch_license_dict(abouts, from_check=False, api_url=None, a
if errors:
return key_text_dict, errors
+ spdx_sclickey_dict = get_spdx_key_and_lic_key_from_licdb()
for about in abouts:
# No need to go through all the about objects if '--api_key' is invalid
auth_error = Error(
@@ -1779,6 +1804,27 @@ def pre_process_and_fetch_license_dict(abouts, from_check=False, api_url=None, a
about.license_expression.value = lic_exp
about.license_expression.present = True
+ if not about.license_expression.value and about.spdx_license_expression.value:
+ lic_exp_value = ""
+ special_char_in_expression, lic_list = parse_license_expression(
+ about.spdx_license_expression.value)
+ if special_char_in_expression:
+ msg = (about.about_file_path + u": The following character(s) cannot be in the spdx_license_expression: " +
+ str(special_char_in_expression))
+ errors.append(Error(ERROR, msg))
+ else:
+ spdx_lic_exp_segment = about.spdx_license_expression.value.split()
+ for spdx_lic_key in spdx_lic_exp_segment:
+ if lic_exp_value:
+ lic_exp_value = lic_exp_value + " " + convert_spdx_expression_to_lic_expression(
+ spdx_lic_key, spdx_sclickey_dict)
+ else:
+ lic_exp_value = convert_spdx_expression_to_lic_expression(
+ spdx_lic_key, spdx_sclickey_dict)
+ if lic_exp_value:
+ about.license_expression.value = lic_exp_value
+ about.license_expression.present = True
+
if about.license_expression.value:
special_char_in_expression, lic_list = parse_license_expression(
about.license_expression.value)
@@ -1855,6 +1901,30 @@ def pre_process_and_fetch_license_dict(abouts, from_check=False, api_url=None, a
return key_text_dict, errors
+def convert_spdx_expression_to_lic_expression(spdx_key, spdx_lic_dict):
+ """
+ Translate the spdx_license_expression to license_expression and return
+ errors if spdx_license_key is not matched
+ """
+ value = ""
+ if spdx_key in spdx_lic_dict:
+ value = spdx_lic_dict[spdx_key]
+ else:
+ if spdx_key.startswith('('):
+ mod_key = spdx_key.partition('(')[2]
+ value = '(' + \
+ convert_spdx_expression_to_lic_expression(
+ mod_key, spdx_lic_dict)
+ elif spdx_key.endswith(')'):
+ mod_key = spdx_key.rpartition(')')[0]
+ value = convert_spdx_expression_to_lic_expression(
+ mod_key, spdx_lic_dict) + ')'
+ else:
+ # This can be operator or key that don't have match
+ value = spdx_key
+ return value
+
+
def parse_license_expression(lic_expression):
licensing = Licensing()
lic_list = []
diff --git a/src/attributecode/util.py b/src/attributecode/util.py
index 45919d66..c87167f5 100644
--- a/src/attributecode/util.py
+++ b/src/attributecode/util.py
@@ -192,6 +192,50 @@ def norm(p):
return p
+def get_spdx_key_and_lic_key_from_licdb():
+ """
+ Return a dictionary list that fetch all licenses from licenseDB. The
+ "spdx_license_key" will be the key of the dictionary and the "license_key"
+ will be the value of the directionary
+ """
+ import requests
+ lic_dict = dict()
+
+ # URL of the license index
+ url = "https://scancode-licensedb.aboutcode.org/index.json"
+
+ """
+ Sample of one of the license in the index.json
+ {
+ "license_key": "bsd-new",
+ "category": "Permissive",
+ "spdx_license_key": "BSD-3-Clause",
+ "other_spdx_license_keys": [
+ "LicenseRef-scancode-libzip"
+ ],
+ "is_exception": false,
+ "is_deprecated": false,
+ "json": "bsd-new.json",
+ "yaml": "bsd-new.yml",
+ "html": "bsd-new.html",
+ "license": "bsd-new.LICENSE"
+ },
+ """
+ response = requests.get(url)
+ # Check if the request was successful (status code 200)
+ if response.status_code == 200:
+ # Retrieve the JSON data from the response
+ licenses_index = response.json()
+
+ for license in licenses_index:
+ lic_dict[license['spdx_license_key']] = license['license_key']
+ if license['other_spdx_license_keys']:
+ for other_spdx in license['other_spdx_license_keys']:
+ lic_dict[other_spdx] = license['license_key']
+
+ return lic_dict
+
+
def get_relative_path(base_loc, full_loc):
"""
Return a posix path for a given full location relative to a base location.
diff --git a/tests/test_gen.py b/tests/test_gen.py
index 06a1ea38..6feeef71 100644
--- a/tests/test_gen.py
+++ b/tests/test_gen.py
@@ -32,13 +32,15 @@ class GenTest(unittest.TestCase):
def test_check_duplicated_columns(self):
test_file = get_test_loc('test_gen/dup_keys.csv')
- expected = [Error(ERROR, 'Duplicated column name(s): copyright with copyright\nPlease correct the input and re-run.')]
+ expected = [Error(
+ ERROR, 'Duplicated column name(s): copyright with copyright\nPlease correct the input and re-run.')]
result = gen.check_duplicated_columns(test_file)
assert expected == result
def test_check_duplicated_columns_handles_lower_upper_case(self):
test_file = get_test_loc('test_gen/dup_keys_with_diff_case.csv')
- expected = [Error(ERROR, 'Duplicated column name(s): copyright with Copyright\nPlease correct the input and re-run.')]
+ expected = [Error(
+ ERROR, 'Duplicated column name(s): copyright with Copyright\nPlease correct the input and re-run.')]
result = gen.check_duplicated_columns(test_file)
assert expected == result
@@ -47,15 +49,17 @@ def test_check_duplicated_about_resource(self):
arp1 = '/test/test.c'
arp2 = '/test/tmp/test.c'
expected = Error(CRITICAL,
- "The input has duplicated values in 'about_resource' field: " + arp1)
+ "The input has duplicated values in 'about_resource' field: " + arp1)
result1 = gen.check_duplicated_about_resource(arp1, arp_list)
result2 = gen.check_duplicated_about_resource(arp2, arp_list)
assert result1 == expected
assert result2 == ''
def test_check_newline_in_file_field(self):
- test_dict1 = {'about_resource': '/test/test.c', 'name': 'test.c', 'notice_file': 'NOTICE\nNOTICE2'}
- test_dict2 = {'about_resource': '/test/test.c', 'name': 'test.c', 'notice_file': 'NOTICE, NOTICE2'}
+ test_dict1 = {'about_resource': '/test/test.c',
+ 'name': 'test.c', 'notice_file': 'NOTICE\nNOTICE2'}
+ test_dict2 = {'about_resource': '/test/test.c',
+ 'name': 'test.c', 'notice_file': 'NOTICE, NOTICE2'}
expected = [
Error(CRITICAL,
"New line character detected in 'notice_file' for '/test/test.c' which is not supported."
@@ -69,7 +73,7 @@ def test_check_about_resource_filename(self):
arp1 = '/test/t@est.c'
arp2 = '/test/t|est.c'
msg = ("Invalid characters present in 'about_resource' "
- "field: " + arp2)
+ "field: " + arp2)
expected2 = Error(ERROR, msg)
result1 = gen.check_about_resource_filename(arp1)
result2 = gen.check_about_resource_filename(arp2)
@@ -85,7 +89,7 @@ def test_load_inventory(self):
assert len(errors) == expected_num_errors
expected = (
-'''about_resource: .
+ '''about_resource: .
name: AboutCode
version: 0.11.0
description: |
@@ -103,8 +107,10 @@ def test_load_inventory_without_about_resource(self):
location = get_test_loc('test_gen/inv_no_about_resource.csv')
base_dir = get_temp_dir()
from_attrib = False
- errors, abouts = gen.load_inventory(location, base_dir=base_dir, from_attrib=from_attrib)
- expected_error = [Error(CRITICAL, "The essential field 'about_resource' is not found in the ")]
+ errors, abouts = gen.load_inventory(
+ location, base_dir=base_dir, from_attrib=from_attrib)
+ expected_error = [Error(
+ CRITICAL, "The essential field 'about_resource' is not found in the ")]
assert errors == expected_error
assert abouts == []
@@ -113,16 +119,20 @@ def test_load_inventory_without_about_resource_from_attrib(self):
location = get_test_loc('test_gen/inv_no_about_resource.csv')
base_dir = get_temp_dir()
from_attrib = True
- errors, abouts = gen.load_inventory(location, base_dir=base_dir, from_attrib=from_attrib)
+ errors, abouts = gen.load_inventory(
+ location, base_dir=base_dir, from_attrib=from_attrib)
expected_num_errors = 0
assert len(errors) == expected_num_errors
expected = (
-'''about_resource: .
+ '''about_resource: .
name: AboutCode
version: 0.11.0
license_expression: apache-2.0
+licenses:
+ - key: apache-2.0
+ name: apache-2.0
'''
)
result = [a.dumps() for a in abouts]
@@ -133,7 +143,8 @@ def test_load_inventory_with_errors(self):
base_dir = get_temp_dir()
errors, abouts = gen.load_inventory(location, base_dir=base_dir)
expected_errors = [
- Error(WARNING, "Field name: ['confirmed copyright'] contains illegal name characters (or empty spaces) and is ignored."),
+ Error(
+ WARNING, "Field name: ['confirmed copyright'] contains illegal name characters (or empty spaces) and is ignored."),
Error(INFO, 'Field about_resource: Path'),
Error(INFO, "Field ['resource', 'test'] is a custom field.")
]
@@ -173,7 +184,6 @@ def test_load_inventory_simple_xlsx(self):
assert abouts[0].license_expression.value == 'bsd-new and mit'
assert abouts[1].license_expression.value == 'mit'
-
def test_load_scancode_json(self):
location = get_test_loc('test_gen/load/clean-text-0.3.0-lceupi.json')
inventory = gen.load_scancode_json(location)
@@ -192,9 +202,9 @@ def test_load_scancode_json(self):
# We will only check the first element in the inventory list
assert inventory[0] == expected
-
def test_generation_dir_endswith_space(self):
- location = get_test_loc('test_gen/inventory/complex/about_file_path_dir_endswith_space.csv')
+ location = get_test_loc(
+ 'test_gen/inventory/complex/about_file_path_dir_endswith_space.csv')
base_dir = get_temp_dir()
errors, _abouts = gen.generate(location, base_dir)
expected_errors_msg1 = 'contains directory name ends with spaces which is not allowed. Generation skipped.'
@@ -248,7 +258,7 @@ def test_generate(self):
result = [a.dumps() for a in abouts][0]
expected = (
-'''about_resource: .
+ '''about_resource: .
name: AboutCode
version: 0.11.0
description: |
@@ -269,7 +279,7 @@ def test_generate_multi_lic_issue_443(self):
result = [a.dumps() for a in abouts][0]
expected = (
-'''about_resource: test
+ '''about_resource: test
name: test
version: '1.5'
licenses:
@@ -294,7 +304,7 @@ def test_generate_multi_lic_issue_444(self):
result = [a.dumps() for a in abouts][0]
expected = (
-'''about_resource: test.c
+ '''about_resource: test.c
name: test.c
licenses:
- key: License1
@@ -305,35 +315,83 @@ def test_generate_multi_lic_issue_444(self):
assert expected == result
def test_generate_license_key_with_custom_file_450_no_fetch(self):
- location = get_test_loc('test_gen/lic_issue_450/custom_and_valid_lic_key_with_file.csv')
+ location = get_test_loc(
+ 'test_gen/lic_issue_450/custom_and_valid_lic_key_with_file.csv')
base_dir = get_temp_dir()
errors, abouts = gen.generate(location, base_dir)
result = [a.dumps() for a in abouts][0]
expected = (
-'''about_resource: test.c
+ '''about_resource: test.c
name: test.c
license_expression: mit AND custom
licenses:
+ - key: mit
+ name: mit
+ - key: custom
+ name: custom
- file: custom.txt
'''
)
assert expected == result
+ def test_generate_with_no_license_key_custom_lic_file(self):
+ location = get_test_loc(
+ 'test_gen/lic_key_custom_lic_file/no_lic_key_with_custom_lic_file.csv')
+ base_dir = get_temp_dir()
+
+ errors, abouts = gen.generate(location, base_dir)
+
+ # The first row from the test file
+ a = abouts[0]
+ result1 = a.dumps()
+
+ expected1 = (
+ '''about_resource: test.c
+name: test.c
+licenses:
+ - file: custom.txt
+'''
+ )
+ assert expected1 == result1
+
+ def test_generate_with_license_key_custom_lic_file(self):
+ location = get_test_loc(
+ 'test_gen/lic_key_custom_lic_file/lic_key_with_custom_lic_file.csv')
+ base_dir = get_temp_dir()
+
+ errors, abouts = gen.generate(location, base_dir)
+
+ # The first row from the test file
+ a = abouts[0]
+ result1 = a.dumps()
+
+ expected1 = (
+ '''about_resource: test.c
+name: test.c
+license_expression: custom
+licenses:
+ - key: custom
+ name: custom
+ file: custom.txt
+'''
+ )
+ assert expected1 == result1
def test_generate_license_key_with_custom_file_450_with_fetch_with_order(self):
- location = get_test_loc('test_gen/lic_issue_450/custom_and_valid_lic_key_with_file.csv')
+ location = get_test_loc(
+ 'test_gen/lic_issue_450/custom_and_valid_lic_key_with_file.csv')
base_dir = get_temp_dir()
errors, abouts = gen.generate(location, base_dir)
lic_dict = {u'mit': [u'MIT License',
- u'mit.LICENSE',
- u'This component is released under MIT License.',
- u'https://enterprise.dejacode.com/urn/?urn=urn:dje:license:mit',
- u'mit'
- ]}
+ u'mit.LICENSE',
+ u'This component is released under MIT License.',
+ u'https://enterprise.dejacode.com/urn/?urn=urn:dje:license:mit',
+ u'mit'
+ ]}
# The first row from the test file
a = abouts[0]
a.license_key.value.append('mit')
@@ -346,7 +404,7 @@ def test_generate_license_key_with_custom_file_450_with_fetch_with_order(self):
result2 = b.dumps(lic_dict)
expected1 = (
-'''about_resource: test.c
+ '''about_resource: test.c
name: test.c
license_expression: mit AND custom
licenses:
@@ -362,7 +420,7 @@ def test_generate_license_key_with_custom_file_450_with_fetch_with_order(self):
)
expected2 = (
-'''about_resource: test.h
+ '''about_resource: test.h
name: test.h
license_expression: custom AND mit
licenses:
diff --git a/tests/testdata/test_gen/lic_key_custom_lic_file/lic_key_with_custom_lic_file.csv b/tests/testdata/test_gen/lic_key_custom_lic_file/lic_key_with_custom_lic_file.csv
new file mode 100644
index 00000000..16f113e9
--- /dev/null
+++ b/tests/testdata/test_gen/lic_key_custom_lic_file/lic_key_with_custom_lic_file.csv
@@ -0,0 +1,2 @@
+about_resource,name,license_expression,license_file
+test.c,test.c,custom,custom.txt
diff --git a/tests/testdata/test_gen/lic_key_custom_lic_file/no_lic_key_with_custom_lic_file.csv b/tests/testdata/test_gen/lic_key_custom_lic_file/no_lic_key_with_custom_lic_file.csv
new file mode 100644
index 00000000..d36c6304
--- /dev/null
+++ b/tests/testdata/test_gen/lic_key_custom_lic_file/no_lic_key_with_custom_lic_file.csv
@@ -0,0 +1,2 @@
+about_resource,name,license_expression,license_file
+test.c,test.c,,custom.txt