Skip to content

Commit

Permalink
Merge branch '513_attrib_from_spdx' into develop
Browse files Browse the repository at this point in the history
  • Loading branch information
chinyeungli committed Sep 22, 2023
2 parents e2110d2 + 6fe7856 commit d5f8752
Show file tree
Hide file tree
Showing 8 changed files with 305 additions and 52 deletions.
8 changes: 7 additions & 1 deletion CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,16 @@
Changelog

2023-xx-xx
Release 10.0.1
Release 10.1.0

* Fixed `transform` with nested list #531
* Added curl dependency in Dockerfile #532
* Introduce spdx_license_expression
* Ability to transform spdx license key from spdx_license_expression to
license_expression (i.e. Generate attribution with
spdx_license_expression) #513
* Ability to configure the proxy settings #533
* Fixed licenses issue #534

2023-08-20
Release 10.0.0
Expand Down
75 changes: 73 additions & 2 deletions docs/source/reference.rst
Original file line number Diff line number Diff line change
Expand Up @@ -83,8 +83,8 @@ Options
Purpose
-------

Generate an attribution file which contains license information
from the INPUT along with the license text.
Generate an attribution file which contains license information from the INPUT
along with the license text.

Assume the following:

Expand Down Expand Up @@ -421,6 +421,60 @@ Details
This option tells the tool to show all errors found.
The default behavior will only show 'CRITICAL', 'ERROR', and 'WARNING'
Special Notes
-------------
If the input contains values for license_file, the tool will attempt to
associate the license_file with the corresponding license_key.

sample.csv

+----------------+------+---------------------+--------------+
| about_resource | name | license_expression | license_file |
+================+======+=====================+==============+
| /project/test.c| test.c | mit AND custom | custom.txt |
+----------------+------+---------------------+--------------+

If the user does not utilize the **--fetch-license** option, the input will
contain two license keys and one license file. In this scenario, the tool cannot
determine which license key the license file is referencing. As a result, the
license_file will be saved separately.

i.e.

.. code-block:: none
about_resource: test.c
name: test.c
license_expression: mit AND custom
licenses:
- key: mit
name: mit
- key: custom
name: custom
- file: custom.txt
On the other hand, if the user generates ABOUT files using the
**--fetch-license** option, the MIT license will be retrieved. This will result
in having one license key and one license file. In such cases, the tool will
consider it a successful match.

i.e.

.. code-block:: none
about_resource: test.c
name: test.c
license_expression: mit AND custom
licenses:
- key: mit
name: MIT License
file: mit.LICENSE
url: https://scancode-licensedb.aboutcode.org/mit.LICENSE
spdx_license_key: MIT
- key: custom
name: custom
file: custom.txt
gen_license
===========

Expand Down Expand Up @@ -780,3 +834,20 @@ version 32.0.0 or later. If you are using an earlier version of Scancode Toolkit
specifically version 31 or older, it will only be compatible with prior versions
of AboutCode Toolkit.


Configure proxy
---------------
The `requests` library is used since AboutCode Toolkit version 10.1.0. To do the
http request, users can set the standard environment variables **http_proxy**,
**https_proxy**, **no_proxy**, **all_proxy** with the export statement

i.e.

.. code-block:: none
$ export HTTP_PROXY="http://10.10.1.10:3128"
$ export HTTPS_PROXY="http://10.10.1.10:1080"
$ export ALL_PROXY="socks5://10.10.1.10:3434"
See https://requests.readthedocs.io/en/latest/user/advanced/#proxies for
references
2 changes: 1 addition & 1 deletion src/attributecode/attrib.py
Original file line number Diff line number Diff line change
Expand Up @@ -323,7 +323,7 @@ def generate_and_save(abouts, is_about_input, license_dict, output_location, sca
)

if rendering_error:
errors.extend(rendering_error)
errors.append(rendering_error)

if rendered:
output_location = add_unc(output_location)
Expand Down
110 changes: 90 additions & 20 deletions src/attributecode/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@
from attributecode.util import csv
from attributecode.util import file_fields
from attributecode.util import filter_errors
from attributecode.util import get_spdx_key_and_lic_key_from_licdb
from attributecode.util import is_valid_name
from attributecode.util import on_windows
from attributecode.util import norm
Expand Down Expand Up @@ -802,6 +803,7 @@ def set_standard_fields(self):
('license_name', ListField()),
('license_file', FileTextField()),
('license_url', UrlListField()),
('spdx_license_expression', StringField()),
('spdx_license_key', ListField()),
('copyright', StringField()),
('notice_file', FileTextField()),
Expand Down Expand Up @@ -1222,6 +1224,13 @@ def dumps(self, licenses_dict=None):
else:
if field.value:
data[field.name] = field.value
# If there is no license_key value, parse the license_expression
# and get the parsed license key
if 'license_expression' in data:
if not license_key and data['license_expression']:
_spec_char, lic_list = parse_license_expression(
data['license_expression'])
license_key = lic_list

# Group the same license information in a list
# This `licenses_dict` is a dictionary with license key as the key and the
Expand All @@ -1244,20 +1253,35 @@ def dumps(self, licenses_dict=None):
lic_dict['spdx_license_key'] = spdx_lic_key

# Remove the license information if it has been handled
lic_key_copy.remove(lic_key)
if lic_name in license_name:
license_name.remove(lic_name)
if lic_url in license_url:
license_url.remove(lic_url)
if lic_filename in license_file:
license_file.remove(lic_filename)
if spdx_lic_key in spdx_license_key:
spdx_license_key.remove(spdx_lic_key)
lic_dict_list.append(lic_dict)
# The following condition is to check if license information
# has been fetched, the license key is invalid or custom if
# no value for lic_name
if lic_name:
lic_key_copy.remove(lic_key)
if lic_name in license_name:
license_name.remove(lic_name)
if lic_url in license_url:
license_url.remove(lic_url)
if lic_filename in license_file:
license_file.remove(lic_filename)
if spdx_lic_key in spdx_license_key:
spdx_license_key.remove(spdx_lic_key)
lic_dict_list.append(lic_dict)

# Handle license information that have not been handled.
license_group = list(zip_longest(
lic_key_copy, license_name, license_file, license_url, spdx_license_key))
# If the len of the lic_key is the same as the lic_file, the tool should
# assume the lic_file (custom license) is referring this specific lic_key
# otherwise, the tool shouldn't group them
if len(lic_key_copy) == len(license_file):
license_group = list(zip_longest(
lic_key_copy, license_name, license_file, license_url, spdx_license_key))
else:
license_group = list(zip_longest(
lic_key_copy, license_name, [], license_url, spdx_license_key))
# Add the unhandled_lic_file if any
if license_file:
for lic_file in license_file:
license_group.append((None, None, lic_file, None, None))

for lic_group in license_group:
lic_dict = {}
Expand All @@ -1278,15 +1302,15 @@ def dumps(self, licenses_dict=None):
lic_dict_list.append(lic_dict)

# Format the license information in the same order of the license expression
if license_key:
for key in license_key:
for lic_dict in lic_dict_list:
if key == lic_dict['key']:
data.setdefault('licenses', []).append(lic_dict)
break
else:
for key in license_key:
for lic_dict in lic_dict_list:
data.setdefault('licenses', []).append(lic_dict)
if key == lic_dict['key']:
data.setdefault('licenses', []).append(lic_dict)
lic_dict_list.remove(lic_dict)
break

for lic_dict in lic_dict_list:
data.setdefault('licenses', []).append(lic_dict)

return saneyaml.dump(data)

Expand Down Expand Up @@ -1764,6 +1788,7 @@ def pre_process_and_fetch_license_dict(abouts, from_check=False, api_url=None, a
if errors:
return key_text_dict, errors

spdx_sclickey_dict = get_spdx_key_and_lic_key_from_licdb()
for about in abouts:
# No need to go through all the about objects if '--api_key' is invalid
auth_error = Error(
Expand All @@ -1779,6 +1804,27 @@ def pre_process_and_fetch_license_dict(abouts, from_check=False, api_url=None, a
about.license_expression.value = lic_exp
about.license_expression.present = True

if not about.license_expression.value and about.spdx_license_expression.value:
lic_exp_value = ""
special_char_in_expression, lic_list = parse_license_expression(
about.spdx_license_expression.value)
if special_char_in_expression:
msg = (about.about_file_path + u": The following character(s) cannot be in the spdx_license_expression: " +
str(special_char_in_expression))
errors.append(Error(ERROR, msg))
else:
spdx_lic_exp_segment = about.spdx_license_expression.value.split()
for spdx_lic_key in spdx_lic_exp_segment:
if lic_exp_value:
lic_exp_value = lic_exp_value + " " + convert_spdx_expression_to_lic_expression(
spdx_lic_key, spdx_sclickey_dict)
else:
lic_exp_value = convert_spdx_expression_to_lic_expression(
spdx_lic_key, spdx_sclickey_dict)
if lic_exp_value:
about.license_expression.value = lic_exp_value
about.license_expression.present = True

if about.license_expression.value:
special_char_in_expression, lic_list = parse_license_expression(
about.license_expression.value)
Expand Down Expand Up @@ -1855,6 +1901,30 @@ def pre_process_and_fetch_license_dict(abouts, from_check=False, api_url=None, a
return key_text_dict, errors


def convert_spdx_expression_to_lic_expression(spdx_key, spdx_lic_dict):
"""
Translate the spdx_license_expression to license_expression and return
errors if spdx_license_key is not matched
"""
value = ""
if spdx_key in spdx_lic_dict:
value = spdx_lic_dict[spdx_key]
else:
if spdx_key.startswith('('):
mod_key = spdx_key.partition('(')[2]
value = '(' + \
convert_spdx_expression_to_lic_expression(
mod_key, spdx_lic_dict)
elif spdx_key.endswith(')'):
mod_key = spdx_key.rpartition(')')[0]
value = convert_spdx_expression_to_lic_expression(
mod_key, spdx_lic_dict) + ')'
else:
# This can be operator or key that don't have match
value = spdx_key
return value


def parse_license_expression(lic_expression):
licensing = Licensing()
lic_list = []
Expand Down
44 changes: 44 additions & 0 deletions src/attributecode/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,50 @@ def norm(p):
return p


def get_spdx_key_and_lic_key_from_licdb():
"""
Return a dictionary list that fetch all licenses from licenseDB. The
"spdx_license_key" will be the key of the dictionary and the "license_key"
will be the value of the directionary
"""
import requests
lic_dict = dict()

# URL of the license index
url = "https://scancode-licensedb.aboutcode.org/index.json"

"""
Sample of one of the license in the index.json
{
"license_key": "bsd-new",
"category": "Permissive",
"spdx_license_key": "BSD-3-Clause",
"other_spdx_license_keys": [
"LicenseRef-scancode-libzip"
],
"is_exception": false,
"is_deprecated": false,
"json": "bsd-new.json",
"yaml": "bsd-new.yml",
"html": "bsd-new.html",
"license": "bsd-new.LICENSE"
},
"""
response = requests.get(url)
# Check if the request was successful (status code 200)
if response.status_code == 200:
# Retrieve the JSON data from the response
licenses_index = response.json()

for license in licenses_index:
lic_dict[license['spdx_license_key']] = license['license_key']
if license['other_spdx_license_keys']:
for other_spdx in license['other_spdx_license_keys']:
lic_dict[other_spdx] = license['license_key']

return lic_dict


def get_relative_path(base_loc, full_loc):
"""
Return a posix path for a given full location relative to a base location.
Expand Down
Loading

0 comments on commit d5f8752

Please sign in to comment.