Skip to content

Commit

Permalink
Relax required phrase filtering
Browse files Browse the repository at this point in the history
Only process stopwords this for "is_continuous" rules

Signed-off-by: Philippe Ombredanne <[email protected]>
  • Loading branch information
pombredanne committed Oct 13, 2024
1 parent e5bff98 commit 6eff903
Showing 1 changed file with 22 additions and 18 deletions.
40 changes: 22 additions & 18 deletions src/licensedcode/match.py
Original file line number Diff line number Diff line change
Expand Up @@ -2272,33 +2272,37 @@ def filter_matches_missing_required_phrases(
is_valid = False
break

has_same_stopwords_pos = True
for qpos, ipos in zip(qspan, ispan):
if qpos not in qkey_span or qpos == qkey_span_end:
continue

if istopwords_by_pos_get(ipos) != qstopwords_by_pos_get(qpos):
has_same_stopwords_pos = False
if is_continuous:
has_same_stopwords_pos = True
for qpos, ipos in zip(qspan, ispan):
if qpos not in qkey_span or qpos == qkey_span_end:
continue

if istopwords_by_pos_get(ipos) != qstopwords_by_pos_get(qpos):
has_same_stopwords_pos = False
break

if not has_same_stopwords_pos:
logger_debug(
' ==> DISCARDING, REQUIRED PHRASES PRESENT, BUT STOPWORDS NOT SAME:',
'qkey_span:', qkey_span, 'qpan:', qspan,
'istopwords_by_pos:', istopwords_by_pos,
'qstopwords_by_pos:', qstopwords_by_pos
)

is_valid = False
break

if not has_same_stopwords_pos:
logger_debug(
' ==> DISCARDING, REQUIRED PHRASES PRESENT, BUT STOPWORDS NOT SAME:',
'qkey_span:', qkey_span, 'qpan:', qspan,
'istopwords_by_pos:', istopwords_by_pos,
'qstopwords_by_pos:', qstopwords_by_pos
)

is_valid = False
break

if is_valid:
logger_debug(' ==> KEEPING, REQUIRED PHRASES PRESENT, CONTINUOUS AND NO UNKNOWNS')
kept_append(match)
else:
match.discard_reason = reason
discarded_append(match)

if discarded and not kept:
logger_debug(' ==> REINSTATING DISCARDED MISSING REQUIRED PHRASES')

if trace:
print()

Expand Down

0 comments on commit 6eff903

Please sign in to comment.