diff --git a/CHANGELOG.rst b/CHANGELOG.rst index e359d177..717a00de 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -2,10 +2,16 @@ Changelog 2023-xx-xx - Release 10.0.1 + Release 10.1.0 * Fixed `transform` with nested list #531 * Added curl dependency in Dockerfile #532 + * Introduce spdx_license_expression + * Ability to transform spdx license key from spdx_license_expression to + license_expression (i.e. Generate attribution with + spdx_license_expression) #513 + * Ability to configure the proxy settings #533 + * Fixed licenses issue #534 2023-08-20 Release 10.0.0 diff --git a/docs/source/reference.rst b/docs/source/reference.rst index 48765718..3660bec8 100644 --- a/docs/source/reference.rst +++ b/docs/source/reference.rst @@ -83,8 +83,8 @@ Options Purpose ------- -Generate an attribution file which contains license information -from the INPUT along with the license text. +Generate an attribution file which contains license information from the INPUT +along with the license text. Assume the following: @@ -421,6 +421,60 @@ Details This option tells the tool to show all errors found. The default behavior will only show 'CRITICAL', 'ERROR', and 'WARNING' +Special Notes +------------- +If the input contains values for license_file, the tool will attempt to +associate the license_file with the corresponding license_key. + +sample.csv + ++----------------+------+---------------------+--------------+ +| about_resource | name | license_expression | license_file | ++================+======+=====================+==============+ +| /project/test.c| test.c | mit AND custom | custom.txt | ++----------------+------+---------------------+--------------+ + +If the user does not utilize the **--fetch-license** option, the input will +contain two license keys and one license file. In this scenario, the tool cannot +determine which license key the license file is referencing. As a result, the +license_file will be saved separately. + +i.e. + + .. code-block:: none + + about_resource: test.c + name: test.c + license_expression: mit AND custom + licenses: + - key: mit + name: mit + - key: custom + name: custom + - file: custom.txt + +On the other hand, if the user generates ABOUT files using the +**--fetch-license** option, the MIT license will be retrieved. This will result +in having one license key and one license file. In such cases, the tool will +consider it a successful match. + +i.e. + + .. code-block:: none + + about_resource: test.c + name: test.c + license_expression: mit AND custom + licenses: + - key: mit + name: MIT License + file: mit.LICENSE + url: https://scancode-licensedb.aboutcode.org/mit.LICENSE + spdx_license_key: MIT + - key: custom + name: custom + file: custom.txt + gen_license =========== @@ -780,3 +834,20 @@ version 32.0.0 or later. If you are using an earlier version of Scancode Toolkit specifically version 31 or older, it will only be compatible with prior versions of AboutCode Toolkit. + +Configure proxy +--------------- +The `requests` library is used since AboutCode Toolkit version 10.1.0. To do the +http request, users can set the standard environment variables **http_proxy**, +**https_proxy**, **no_proxy**, **all_proxy** with the export statement + +i.e. + + .. code-block:: none + + $ export HTTP_PROXY="http://10.10.1.10:3128" + $ export HTTPS_PROXY="http://10.10.1.10:1080" + $ export ALL_PROXY="socks5://10.10.1.10:3434" + +See https://requests.readthedocs.io/en/latest/user/advanced/#proxies for +references diff --git a/src/attributecode/attrib.py b/src/attributecode/attrib.py index 48a61095..6c0207a7 100644 --- a/src/attributecode/attrib.py +++ b/src/attributecode/attrib.py @@ -323,7 +323,7 @@ def generate_and_save(abouts, is_about_input, license_dict, output_location, sca ) if rendering_error: - errors.extend(rendering_error) + errors.append(rendering_error) if rendered: output_location = add_unc(output_location) diff --git a/src/attributecode/model.py b/src/attributecode/model.py index 13d97ab2..0ea0ae6d 100644 --- a/src/attributecode/model.py +++ b/src/attributecode/model.py @@ -55,6 +55,7 @@ from attributecode.util import csv from attributecode.util import file_fields from attributecode.util import filter_errors +from attributecode.util import get_spdx_key_and_lic_key_from_licdb from attributecode.util import is_valid_name from attributecode.util import on_windows from attributecode.util import norm @@ -802,6 +803,7 @@ def set_standard_fields(self): ('license_name', ListField()), ('license_file', FileTextField()), ('license_url', UrlListField()), + ('spdx_license_expression', StringField()), ('spdx_license_key', ListField()), ('copyright', StringField()), ('notice_file', FileTextField()), @@ -1222,6 +1224,13 @@ def dumps(self, licenses_dict=None): else: if field.value: data[field.name] = field.value + # If there is no license_key value, parse the license_expression + # and get the parsed license key + if 'license_expression' in data: + if not license_key and data['license_expression']: + _spec_char, lic_list = parse_license_expression( + data['license_expression']) + license_key = lic_list # Group the same license information in a list # This `licenses_dict` is a dictionary with license key as the key and the @@ -1244,20 +1253,35 @@ def dumps(self, licenses_dict=None): lic_dict['spdx_license_key'] = spdx_lic_key # Remove the license information if it has been handled - lic_key_copy.remove(lic_key) - if lic_name in license_name: - license_name.remove(lic_name) - if lic_url in license_url: - license_url.remove(lic_url) - if lic_filename in license_file: - license_file.remove(lic_filename) - if spdx_lic_key in spdx_license_key: - spdx_license_key.remove(spdx_lic_key) - lic_dict_list.append(lic_dict) + # The following condition is to check if license information + # has been fetched, the license key is invalid or custom if + # no value for lic_name + if lic_name: + lic_key_copy.remove(lic_key) + if lic_name in license_name: + license_name.remove(lic_name) + if lic_url in license_url: + license_url.remove(lic_url) + if lic_filename in license_file: + license_file.remove(lic_filename) + if spdx_lic_key in spdx_license_key: + spdx_license_key.remove(spdx_lic_key) + lic_dict_list.append(lic_dict) # Handle license information that have not been handled. - license_group = list(zip_longest( - lic_key_copy, license_name, license_file, license_url, spdx_license_key)) + # If the len of the lic_key is the same as the lic_file, the tool should + # assume the lic_file (custom license) is referring this specific lic_key + # otherwise, the tool shouldn't group them + if len(lic_key_copy) == len(license_file): + license_group = list(zip_longest( + lic_key_copy, license_name, license_file, license_url, spdx_license_key)) + else: + license_group = list(zip_longest( + lic_key_copy, license_name, [], license_url, spdx_license_key)) + # Add the unhandled_lic_file if any + if license_file: + for lic_file in license_file: + license_group.append((None, None, lic_file, None, None)) for lic_group in license_group: lic_dict = {} @@ -1278,15 +1302,15 @@ def dumps(self, licenses_dict=None): lic_dict_list.append(lic_dict) # Format the license information in the same order of the license expression - if license_key: - for key in license_key: - for lic_dict in lic_dict_list: - if key == lic_dict['key']: - data.setdefault('licenses', []).append(lic_dict) - break - else: + for key in license_key: for lic_dict in lic_dict_list: - data.setdefault('licenses', []).append(lic_dict) + if key == lic_dict['key']: + data.setdefault('licenses', []).append(lic_dict) + lic_dict_list.remove(lic_dict) + break + + for lic_dict in lic_dict_list: + data.setdefault('licenses', []).append(lic_dict) return saneyaml.dump(data) @@ -1764,6 +1788,7 @@ def pre_process_and_fetch_license_dict(abouts, from_check=False, api_url=None, a if errors: return key_text_dict, errors + spdx_sclickey_dict = get_spdx_key_and_lic_key_from_licdb() for about in abouts: # No need to go through all the about objects if '--api_key' is invalid auth_error = Error( @@ -1779,6 +1804,27 @@ def pre_process_and_fetch_license_dict(abouts, from_check=False, api_url=None, a about.license_expression.value = lic_exp about.license_expression.present = True + if not about.license_expression.value and about.spdx_license_expression.value: + lic_exp_value = "" + special_char_in_expression, lic_list = parse_license_expression( + about.spdx_license_expression.value) + if special_char_in_expression: + msg = (about.about_file_path + u": The following character(s) cannot be in the spdx_license_expression: " + + str(special_char_in_expression)) + errors.append(Error(ERROR, msg)) + else: + spdx_lic_exp_segment = about.spdx_license_expression.value.split() + for spdx_lic_key in spdx_lic_exp_segment: + if lic_exp_value: + lic_exp_value = lic_exp_value + " " + convert_spdx_expression_to_lic_expression( + spdx_lic_key, spdx_sclickey_dict) + else: + lic_exp_value = convert_spdx_expression_to_lic_expression( + spdx_lic_key, spdx_sclickey_dict) + if lic_exp_value: + about.license_expression.value = lic_exp_value + about.license_expression.present = True + if about.license_expression.value: special_char_in_expression, lic_list = parse_license_expression( about.license_expression.value) @@ -1855,6 +1901,30 @@ def pre_process_and_fetch_license_dict(abouts, from_check=False, api_url=None, a return key_text_dict, errors +def convert_spdx_expression_to_lic_expression(spdx_key, spdx_lic_dict): + """ + Translate the spdx_license_expression to license_expression and return + errors if spdx_license_key is not matched + """ + value = "" + if spdx_key in spdx_lic_dict: + value = spdx_lic_dict[spdx_key] + else: + if spdx_key.startswith('('): + mod_key = spdx_key.partition('(')[2] + value = '(' + \ + convert_spdx_expression_to_lic_expression( + mod_key, spdx_lic_dict) + elif spdx_key.endswith(')'): + mod_key = spdx_key.rpartition(')')[0] + value = convert_spdx_expression_to_lic_expression( + mod_key, spdx_lic_dict) + ')' + else: + # This can be operator or key that don't have match + value = spdx_key + return value + + def parse_license_expression(lic_expression): licensing = Licensing() lic_list = [] diff --git a/src/attributecode/util.py b/src/attributecode/util.py index 45919d66..c87167f5 100644 --- a/src/attributecode/util.py +++ b/src/attributecode/util.py @@ -192,6 +192,50 @@ def norm(p): return p +def get_spdx_key_and_lic_key_from_licdb(): + """ + Return a dictionary list that fetch all licenses from licenseDB. The + "spdx_license_key" will be the key of the dictionary and the "license_key" + will be the value of the directionary + """ + import requests + lic_dict = dict() + + # URL of the license index + url = "https://scancode-licensedb.aboutcode.org/index.json" + + """ + Sample of one of the license in the index.json + { + "license_key": "bsd-new", + "category": "Permissive", + "spdx_license_key": "BSD-3-Clause", + "other_spdx_license_keys": [ + "LicenseRef-scancode-libzip" + ], + "is_exception": false, + "is_deprecated": false, + "json": "bsd-new.json", + "yaml": "bsd-new.yml", + "html": "bsd-new.html", + "license": "bsd-new.LICENSE" + }, + """ + response = requests.get(url) + # Check if the request was successful (status code 200) + if response.status_code == 200: + # Retrieve the JSON data from the response + licenses_index = response.json() + + for license in licenses_index: + lic_dict[license['spdx_license_key']] = license['license_key'] + if license['other_spdx_license_keys']: + for other_spdx in license['other_spdx_license_keys']: + lic_dict[other_spdx] = license['license_key'] + + return lic_dict + + def get_relative_path(base_loc, full_loc): """ Return a posix path for a given full location relative to a base location. diff --git a/tests/test_gen.py b/tests/test_gen.py index 06a1ea38..6feeef71 100644 --- a/tests/test_gen.py +++ b/tests/test_gen.py @@ -32,13 +32,15 @@ class GenTest(unittest.TestCase): def test_check_duplicated_columns(self): test_file = get_test_loc('test_gen/dup_keys.csv') - expected = [Error(ERROR, 'Duplicated column name(s): copyright with copyright\nPlease correct the input and re-run.')] + expected = [Error( + ERROR, 'Duplicated column name(s): copyright with copyright\nPlease correct the input and re-run.')] result = gen.check_duplicated_columns(test_file) assert expected == result def test_check_duplicated_columns_handles_lower_upper_case(self): test_file = get_test_loc('test_gen/dup_keys_with_diff_case.csv') - expected = [Error(ERROR, 'Duplicated column name(s): copyright with Copyright\nPlease correct the input and re-run.')] + expected = [Error( + ERROR, 'Duplicated column name(s): copyright with Copyright\nPlease correct the input and re-run.')] result = gen.check_duplicated_columns(test_file) assert expected == result @@ -47,15 +49,17 @@ def test_check_duplicated_about_resource(self): arp1 = '/test/test.c' arp2 = '/test/tmp/test.c' expected = Error(CRITICAL, - "The input has duplicated values in 'about_resource' field: " + arp1) + "The input has duplicated values in 'about_resource' field: " + arp1) result1 = gen.check_duplicated_about_resource(arp1, arp_list) result2 = gen.check_duplicated_about_resource(arp2, arp_list) assert result1 == expected assert result2 == '' def test_check_newline_in_file_field(self): - test_dict1 = {'about_resource': '/test/test.c', 'name': 'test.c', 'notice_file': 'NOTICE\nNOTICE2'} - test_dict2 = {'about_resource': '/test/test.c', 'name': 'test.c', 'notice_file': 'NOTICE, NOTICE2'} + test_dict1 = {'about_resource': '/test/test.c', + 'name': 'test.c', 'notice_file': 'NOTICE\nNOTICE2'} + test_dict2 = {'about_resource': '/test/test.c', + 'name': 'test.c', 'notice_file': 'NOTICE, NOTICE2'} expected = [ Error(CRITICAL, "New line character detected in 'notice_file' for '/test/test.c' which is not supported." @@ -69,7 +73,7 @@ def test_check_about_resource_filename(self): arp1 = '/test/t@est.c' arp2 = '/test/t|est.c' msg = ("Invalid characters present in 'about_resource' " - "field: " + arp2) + "field: " + arp2) expected2 = Error(ERROR, msg) result1 = gen.check_about_resource_filename(arp1) result2 = gen.check_about_resource_filename(arp2) @@ -85,7 +89,7 @@ def test_load_inventory(self): assert len(errors) == expected_num_errors expected = ( -'''about_resource: . + '''about_resource: . name: AboutCode version: 0.11.0 description: | @@ -103,8 +107,10 @@ def test_load_inventory_without_about_resource(self): location = get_test_loc('test_gen/inv_no_about_resource.csv') base_dir = get_temp_dir() from_attrib = False - errors, abouts = gen.load_inventory(location, base_dir=base_dir, from_attrib=from_attrib) - expected_error = [Error(CRITICAL, "The essential field 'about_resource' is not found in the ")] + errors, abouts = gen.load_inventory( + location, base_dir=base_dir, from_attrib=from_attrib) + expected_error = [Error( + CRITICAL, "The essential field 'about_resource' is not found in the ")] assert errors == expected_error assert abouts == [] @@ -113,16 +119,20 @@ def test_load_inventory_without_about_resource_from_attrib(self): location = get_test_loc('test_gen/inv_no_about_resource.csv') base_dir = get_temp_dir() from_attrib = True - errors, abouts = gen.load_inventory(location, base_dir=base_dir, from_attrib=from_attrib) + errors, abouts = gen.load_inventory( + location, base_dir=base_dir, from_attrib=from_attrib) expected_num_errors = 0 assert len(errors) == expected_num_errors expected = ( -'''about_resource: . + '''about_resource: . name: AboutCode version: 0.11.0 license_expression: apache-2.0 +licenses: + - key: apache-2.0 + name: apache-2.0 ''' ) result = [a.dumps() for a in abouts] @@ -133,7 +143,8 @@ def test_load_inventory_with_errors(self): base_dir = get_temp_dir() errors, abouts = gen.load_inventory(location, base_dir=base_dir) expected_errors = [ - Error(WARNING, "Field name: ['confirmed copyright'] contains illegal name characters (or empty spaces) and is ignored."), + Error( + WARNING, "Field name: ['confirmed copyright'] contains illegal name characters (or empty spaces) and is ignored."), Error(INFO, 'Field about_resource: Path'), Error(INFO, "Field ['resource', 'test'] is a custom field.") ] @@ -173,7 +184,6 @@ def test_load_inventory_simple_xlsx(self): assert abouts[0].license_expression.value == 'bsd-new and mit' assert abouts[1].license_expression.value == 'mit' - def test_load_scancode_json(self): location = get_test_loc('test_gen/load/clean-text-0.3.0-lceupi.json') inventory = gen.load_scancode_json(location) @@ -192,9 +202,9 @@ def test_load_scancode_json(self): # We will only check the first element in the inventory list assert inventory[0] == expected - def test_generation_dir_endswith_space(self): - location = get_test_loc('test_gen/inventory/complex/about_file_path_dir_endswith_space.csv') + location = get_test_loc( + 'test_gen/inventory/complex/about_file_path_dir_endswith_space.csv') base_dir = get_temp_dir() errors, _abouts = gen.generate(location, base_dir) expected_errors_msg1 = 'contains directory name ends with spaces which is not allowed. Generation skipped.' @@ -248,7 +258,7 @@ def test_generate(self): result = [a.dumps() for a in abouts][0] expected = ( -'''about_resource: . + '''about_resource: . name: AboutCode version: 0.11.0 description: | @@ -269,7 +279,7 @@ def test_generate_multi_lic_issue_443(self): result = [a.dumps() for a in abouts][0] expected = ( -'''about_resource: test + '''about_resource: test name: test version: '1.5' licenses: @@ -294,7 +304,7 @@ def test_generate_multi_lic_issue_444(self): result = [a.dumps() for a in abouts][0] expected = ( -'''about_resource: test.c + '''about_resource: test.c name: test.c licenses: - key: License1 @@ -305,35 +315,83 @@ def test_generate_multi_lic_issue_444(self): assert expected == result def test_generate_license_key_with_custom_file_450_no_fetch(self): - location = get_test_loc('test_gen/lic_issue_450/custom_and_valid_lic_key_with_file.csv') + location = get_test_loc( + 'test_gen/lic_issue_450/custom_and_valid_lic_key_with_file.csv') base_dir = get_temp_dir() errors, abouts = gen.generate(location, base_dir) result = [a.dumps() for a in abouts][0] expected = ( -'''about_resource: test.c + '''about_resource: test.c name: test.c license_expression: mit AND custom licenses: + - key: mit + name: mit + - key: custom + name: custom - file: custom.txt ''' ) assert expected == result + def test_generate_with_no_license_key_custom_lic_file(self): + location = get_test_loc( + 'test_gen/lic_key_custom_lic_file/no_lic_key_with_custom_lic_file.csv') + base_dir = get_temp_dir() + + errors, abouts = gen.generate(location, base_dir) + + # The first row from the test file + a = abouts[0] + result1 = a.dumps() + + expected1 = ( + '''about_resource: test.c +name: test.c +licenses: + - file: custom.txt +''' + ) + assert expected1 == result1 + + def test_generate_with_license_key_custom_lic_file(self): + location = get_test_loc( + 'test_gen/lic_key_custom_lic_file/lic_key_with_custom_lic_file.csv') + base_dir = get_temp_dir() + + errors, abouts = gen.generate(location, base_dir) + + # The first row from the test file + a = abouts[0] + result1 = a.dumps() + + expected1 = ( + '''about_resource: test.c +name: test.c +license_expression: custom +licenses: + - key: custom + name: custom + file: custom.txt +''' + ) + assert expected1 == result1 def test_generate_license_key_with_custom_file_450_with_fetch_with_order(self): - location = get_test_loc('test_gen/lic_issue_450/custom_and_valid_lic_key_with_file.csv') + location = get_test_loc( + 'test_gen/lic_issue_450/custom_and_valid_lic_key_with_file.csv') base_dir = get_temp_dir() errors, abouts = gen.generate(location, base_dir) lic_dict = {u'mit': [u'MIT License', - u'mit.LICENSE', - u'This component is released under MIT License.', - u'https://enterprise.dejacode.com/urn/?urn=urn:dje:license:mit', - u'mit' - ]} + u'mit.LICENSE', + u'This component is released under MIT License.', + u'https://enterprise.dejacode.com/urn/?urn=urn:dje:license:mit', + u'mit' + ]} # The first row from the test file a = abouts[0] a.license_key.value.append('mit') @@ -346,7 +404,7 @@ def test_generate_license_key_with_custom_file_450_with_fetch_with_order(self): result2 = b.dumps(lic_dict) expected1 = ( -'''about_resource: test.c + '''about_resource: test.c name: test.c license_expression: mit AND custom licenses: @@ -362,7 +420,7 @@ def test_generate_license_key_with_custom_file_450_with_fetch_with_order(self): ) expected2 = ( -'''about_resource: test.h + '''about_resource: test.h name: test.h license_expression: custom AND mit licenses: diff --git a/tests/testdata/test_gen/lic_key_custom_lic_file/lic_key_with_custom_lic_file.csv b/tests/testdata/test_gen/lic_key_custom_lic_file/lic_key_with_custom_lic_file.csv new file mode 100644 index 00000000..16f113e9 --- /dev/null +++ b/tests/testdata/test_gen/lic_key_custom_lic_file/lic_key_with_custom_lic_file.csv @@ -0,0 +1,2 @@ +about_resource,name,license_expression,license_file +test.c,test.c,custom,custom.txt diff --git a/tests/testdata/test_gen/lic_key_custom_lic_file/no_lic_key_with_custom_lic_file.csv b/tests/testdata/test_gen/lic_key_custom_lic_file/no_lic_key_with_custom_lic_file.csv new file mode 100644 index 00000000..d36c6304 --- /dev/null +++ b/tests/testdata/test_gen/lic_key_custom_lic_file/no_lic_key_with_custom_lic_file.csv @@ -0,0 +1,2 @@ +about_resource,name,license_expression,license_file +test.c,test.c,,custom.txt