Skip to content

Commit

Permalink
Automatically map scancode data to NormalizedLicenses (#231)
Browse files Browse the repository at this point in the history
* Core implementation of automatic mapping of scancode generated RawLicenses to NormalizedLicenses (#230)

* completion of own license info

(cherry picked from commit 94359b68a42a554846d6ba66a60cad4f942ef864)

* Update documentation

* add ignorelist

* change logic for detecting if dataStatus is WITH_ISSUES to use regex list

* Refactoring of regex list processing

* some cleanup

* fixed spelling issues
  • Loading branch information
ohecker authored Feb 7, 2024
1 parent 629f447 commit c894348
Show file tree
Hide file tree
Showing 15 changed files with 832 additions and 18 deletions.
5 changes: 5 additions & 0 deletions core/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,11 @@
<artifactId>java-jwt</artifactId>
<version>4.4.0</version>
</dependency>
<dependency>
<groupId>org.spdx</groupId>
<artifactId>java-spdx-library</artifactId>
<version>1.1.10</version>
</dependency>
<dependency>
<groupId>${project.groupId}</groupId>
<artifactId>solicitor-documentation</artifactId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,6 @@ public enum LogMessages {
MULTIPLE_DECISIONTABLES(51, "Multiple decision tables in both .xls and .csv format. Prioritizing '{}.xls'."), //
ADDING_ADDITIONALWRITER_CONFIG(52, "Merging config: Adding additional writers to base config from {}"), //
NOT_A_VALID_NPM_PACKAGE_IDENTIFIER(53, "{} is not a valid identifier for an NPM package"), //
NOT_A_VALID_NPM_PACKAGE_NAME(54, "{} is not a valid name for an NPM package"), //
SCANCODE_PROCESSOR_STARTING(54,
"Experimental feature ACTIVE: Start enriching the inventory data with Scancode data (as far as available)"), //
SCANCODE_FEATURE_DEACTIVATED(55,
Expand All @@ -88,7 +87,19 @@ public enum LogMessages {
CLASSPATHEXCEPTION_WITHOUT_GPL(60, "ClassPathException was found but no GPL License exists for {}"), //
CLASSPATHEXCEPTION_MULTIPLE_GPL(61, "ClassPathException was found but there are multiple GPL Licenses for {}"), //
CYCLONEDX_UNSUPPORTED_PURL(62,
"The CycloneDX file contains the PackageURL '{}' with unsupported type which will be ignored. Solicitor reports might be incomplete.");
"The CycloneDX file contains the PackageURL '{}' with unsupported type which will be ignored. Solicitor reports might be incomplete."), //
SCANCODE_AUTOMAPPING_STARTED(63,
"Attempting to automatically map scancode license id to create NormalizedLicense objects. Blacklist: '{}', Ignorelist: '{}'"), //
SCANCODE_AUTOMAPPING_FEATURE_DEACTIVATED(64,
"The feature of attempting to automatically map scancode license ids is DEACTIVATED"), //
SCANCODE_NO_MAPPING(65,
"The license info '{}' from Scancode could not be mapped to OSS-SPDX or SCANCODE type license info"), //
SCANCODE_MAPPING_STATISTICS(66,
"Statistics for automatic mapping of scancode license ids to NormalizedLicenses: Total processed: {}, skipped due to blacklist: {}, "
+ "skipped due to unkown SPDX: {}, mapped using type SCANCODE: {}, mapped using type OSS-SPDX: {}, mapped to IGNORE: {}"), //
NOT_A_VALID_NPM_PACKAGE_NAME(67, "{} is not a valid name for an NPM package"), //
SCANCODE_ISSUE_DETECTION_REGEX(68,
"The list of regular expressions for detecting licenses from scancode having issues is set to '{}'");

private final String message;

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
/**
* SPDX-License-Identifier: Apache-2.0
*/
package com.devonfw.tools.solicitor.common;

import java.util.ArrayList;
import java.util.List;
import java.util.function.Predicate;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
* Encapsulates a list of regular expression {@link Pattern}s and allows checking if a string matches any of those
* patterns.
*/
public class RegexListPredicate implements Predicate<String> {

private static final Logger LOG = LoggerFactory.getLogger(RegexListPredicate.class);

private Pattern[] patterns = new Pattern[0];

/**
* The constructor.
*/
public RegexListPredicate() {

super();
}

/**
* Checks if the given argument matches any of the predefined regular expression patterns.
*
* @param t the string to be checked
* @param debugLogTemplate template to be used for creating a debug log message if the argument matches. This template
* should have two placeholders <code>{}</code> which will be filled with the value of the argument and the
* matching pattern. If this parameter is set to <code>null</code> then no debug logging will be done.
* @return <code>true</code> if the argument matches any of the patterns, <code>false</code> otherwise.
*/
public boolean test(String t, String debugLogTemplate) {

if (t == null) {
return false;
}
for (Pattern p : this.patterns) {
if (p.matcher(t).matches()) {
if (LOG.isDebugEnabled() && debugLogTemplate != null) {
LOG.debug(debugLogTemplate, t, p.toString());
}
return true;
}
}
return false;
}

/**
* Tests the predicate without any debug logging.
*
* @param t the argument to be tested
* @return <code>true</code> if the argument matches any of the patterns, <code>false</code> otherwise.
* @see #test(String, String)
* @see Predicate#test(Object)
*/
@Override
public boolean test(String t) {

return test(t, null);
}

/**
* Sets the regular expressions to be tested by this object.
*
* @param regexes Array of strings, each being a valid regular expression expression as defined in {@link Pattern}.
* @throws PatternSyntaxException If any of the expressions have invalid syntax
*/
public void setRegexes(String[] regexes) {

if (regexes != null) {
this.patterns = new Pattern[regexes.length];
for (int i = 0; i < regexes.length; i++) {
this.patterns[i] = Pattern.compile(regexes[i]);
}
}
}

/**
* Returns the list of configured regular expressions as a String
*
* @return The regexes as a single string, separated via comma.
*/
public String getRegexesAsString() {

List<String> patternStrings = new ArrayList();
for (Pattern p : this.patterns) {
patternStrings.add(p.toString());
}
return String.join(",", patternStrings);
}

}
Original file line number Diff line number Diff line change
@@ -1,9 +1,13 @@
package com.devonfw.tools.solicitor.componentinfo.curation;

import java.util.Arrays;
import java.util.Collection;
import java.util.List;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Value;

import com.devonfw.tools.solicitor.common.LogMessages;
import com.devonfw.tools.solicitor.common.RegexListPredicate;
import com.devonfw.tools.solicitor.componentinfo.ComponentInfo;
import com.devonfw.tools.solicitor.componentinfo.ComponentInfoAdapter;
import com.devonfw.tools.solicitor.componentinfo.ComponentInfoAdapterException;
Expand All @@ -18,10 +22,16 @@
*/
public class CuratingComponentInfoAdapter implements ComponentInfoAdapter {

private static final Logger LOG = LoggerFactory.getLogger(CuratingComponentInfoAdapter.class);

private FilteredComponentInfoProvider filteredComponentInfoProvider;

private ComponentInfoCurator componentInfoCurator;

private RegexListPredicate licenseIdIssuesPredicate = new RegexListPredicate();

private boolean regexesLogged = false;

/**
* The constructor.
*
Expand Down Expand Up @@ -86,16 +96,11 @@ private ComponentInfo checkForIssues(ComponentInfo componentInfo) {
return componentInfo;
}
boolean issueExisting = false;
List<String> possibleIssues = Arrays.asList("LicenseRef-scancode-free-unknown");
for (LicenseInfo li : licenses) {
for (String key : possibleIssues) {
if (key.equals(li.getSpdxid())) {
issueExisting = true;
break;
}
}
if (issueExisting)
if (isIssue(li.getSpdxid())) {
issueExisting = true;
break;
}
}
if (issueExisting) {
DefaultComponentInfoImpl result = new DefaultComponentInfoImpl(componentInfo);
Expand All @@ -117,4 +122,32 @@ protected boolean isFeatureActive() {
return true;
}

/**
* Checks if the given license id falls in the category of "WITH_ISSUES".
*
* @param license the license id to check
* @return <code>true</code> if the license id matches the issue list.
*/
protected boolean isIssue(String license) {

if (!this.regexesLogged) {
LOG.info(LogMessages.SCANCODE_ISSUE_DETECTION_REGEX.msg(), this.licenseIdIssuesPredicate.getRegexesAsString());
this.regexesLogged = true;
}
return this.licenseIdIssuesPredicate.test(license,
"License id '{}' matches issue list via regex '{}' and result will be set to status WITH_ISSUES");
}

/**
* Sets the list of license ids which will be regarded as "WITH_ISSUES"
*
* @param licenseIdIssuesRegexes an array of regular expressions which define a the patterns of license ids which will
* be regarded as "WITH_ISSUES".
*/
@Value("${solicitor.scancode.issuelistpatterns}")
public void setLicenseIdIssuesRegexes(String[] licenseIdIssuesRegexes) {

this.licenseIdIssuesPredicate.setRegexes(licenseIdIssuesRegexes);
}

}
Loading

0 comments on commit c894348

Please sign in to comment.