Skip to content

Commit

Permalink
resolve conflicts
Browse files Browse the repository at this point in the history
  • Loading branch information
affemitkaraffe committed Jun 19, 2024
2 parents 2c70f09 + 721c745 commit 2cd7e2c
Show file tree
Hide file tree
Showing 581 changed files with 128,281 additions and 207,174 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,15 @@
.class
.classpath
.idea
.run
.ignore
.project
.settings
.metadata
.vscode/
build
target
target-ide
atomFeedChecksDB
derby.log
.surefire-*
Expand Down
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ Portuguese, Polish, Dutch, and [more than 20 other languages](https://languageto
It finds many errors that a simple spell checker cannot detect.

* **[Jobs at LanguageTool](https://languagetool.org/careers)**
* [LanguageTool Forum](https://forum.languagetool.org)
* [How to run your own LanguageTool server](https://dev.languagetool.org/http-server)
* [HTTP API documentation](https://languagetool.org/http-api/swagger-ui/#!/default/post_check)
* [How to use our public server via HTTP](https://dev.languagetool.org/public-http-api)
Expand Down
36 changes: 20 additions & 16 deletions build.sh
Original file line number Diff line number Diff line change
@@ -1,30 +1,34 @@
#!/bin/bash
#!/usr/bin/env bash

if [ $# -lt 2 ]
then
self=$(basename $0)

if [ $# -lt 2 ]; then
echo "Helps building parts of LanguageTool - for a complete build, run mvn directly"
echo "Usage: `basename $0` <project> <goals...>"
echo "Usage: $self <project> <goals...>"
echo "Examples:"
echo " ./`basename $0` languagetool-standalone clean package (will package the standalone module)"
echo " ./`basename $0` languagetool-standalone clean package -DskipTests (as above but without running tests)"
echo " ./`basename $0` en clean test (will test the English module)"
echo " ./$self languagetool-standalone clean package (will package the standalone module)"
echo " ./$self languagetool-standalone clean package -DskipTests (as above but without running tests)"
echo " ./$self en clean test (will test the English module)"
exit 1
fi

MODULE=$1
module=$1

if [ \! -d $MODULE ]
then
MODULE="languagetool-language-modules/$MODULE"
if [ ! -d $module ]; then
module_path="languagetool-language-modules/$module"
else
module_path="$module"
fi

COMMAND="mvn --projects $MODULE --also-make ${@:2}"
echo "Running: $COMMAND"
command="mvn --projects $module_path --also-make ${@:2}"
echo "Running: $command"

$command
exitcode=$?

$COMMAND
BUILDEXITCODE=$?
# these don't work on their own, so delete them to avoid confusion:
rm languagetool-standalone/target/languagetool-standalone-*.jar 2> /dev/null
rm languagetool-wikipedia/target/languagetool-wikipedia-*.jar 2> /dev/null
rm languagetool-commandline/target/languagetool-commandline-*.jar 2> /dev/null
exit $BUILDEXITCODE

exit $exitcode
Original file line number Diff line number Diff line change
Expand Up @@ -297,6 +297,15 @@ public AnalyzedToken readingWithTagRegex(String posTagRegex) {
}
return null;
}

public AnalyzedToken readingWithTagRegex(Pattern pattern) {
for (AnalyzedToken reading : anTokReadings) {
if (reading.getPOSTag() != null && pattern.matcher(reading.getPOSTag()).matches()) {
return reading;
}
}
return null;
}

/**
* Returns the first reading that matches a given lemma.
Expand Down
57 changes: 47 additions & 10 deletions languagetool-core/src/main/java/org/languagetool/JLanguageTool.java
Original file line number Diff line number Diff line change
Expand Up @@ -1008,11 +1008,12 @@ protected CheckResults checkInternal(AnnotatedText annotatedText, ParagraphHandl
paraMode, annotatedText, listener, mode, level, remoteRulesThreadPool == null, toneTags);
long textCheckEnd = System.nanoTime();

Map<String, RemoteRuleResult> remoteRulesResults = null;
try {
TelemetryProvider.INSTANCE.createSpan("fetch-remote-rules",
remoteRulesResults = TelemetryProvider.INSTANCE.createSpan("fetch-remote-rules",
Attributes.builder().put("check.remote_rules.count", remoteRules.size()).build(),
() -> fetchRemoteRuleResults(deadlineStartNanos, mode, level, analyzedSentences, remoteMatches, remoteRuleTasks, remoteRules, requestSize,
cachedResults, matchOffset, annotatedText, textSessionID, toneTags));
cachedResults, matchOffset, annotatedText, textSessionID, toneTags));
} catch (Exception e) {
throw new RuntimeException(e);
}
Expand All @@ -1034,7 +1035,7 @@ protected CheckResults checkInternal(AnnotatedText annotatedText, ParagraphHandl
return res;
}

ruleMatches = filterMatches(annotatedText, rules, ruleMatches, level, toneTags);
ruleMatches = filterMatches(annotatedText, rules, ruleMatches, level, toneTags, remoteRulesResults);

// decide if this should be done right after performCheck, before waiting for remote rule results
// better for latency, remote rules probably don't need resorting
Expand All @@ -1058,26 +1059,51 @@ protected CheckResults checkInternal(AnnotatedText annotatedText, ParagraphHandl
return new CheckResults(ruleMatches, res.getIgnoredRanges(), res.getExtendedSentenceRanges());
}

private List<RuleMatch> filterMatches(AnnotatedText annotatedText, RuleSet rules, List<RuleMatch> ruleMatches, Level level, Set<ToneTag> toneTags) {
private List<RuleMatch> filterMatches(AnnotatedText annotatedText, RuleSet rules, List<RuleMatch> ruleMatches, Level level,
Set<ToneTag> toneTags, Map<String, RemoteRuleResult> remoteRulesResults) {
// rules can create matches with rule IDs different from the original rule (see e.g. RemoteRules)
// so while we can't avoid execution of these rules, we still want disabling them to work
// so do another pass with ignoreRule here
ruleMatches = ruleMatches.stream().filter(match -> !ignoreRule(match.getRule())).collect(Collectors.toList());

ruleMatches = ruleMatches.stream().filter(match -> isRuleActiveForLanguageWithModel(
match.getRule(), language, remoteRulesResults)).collect(Collectors.toList());

ruleMatches = ruleMatches.stream().filter(match -> isRuleActiveForLevelAndToneTags(
match.getRule(), level, toneTags)).collect(Collectors.toList());

ruleMatches = new SameRuleGroupFilter().filter(ruleMatches);
// no sorting: SameRuleGroupFilter sorts rule matches already
ruleMatches = new LanguageDependentMergeSuggestionFilter(language, rules).filter(ruleMatches, annotatedText);
ruleMatches = new LanguageDependentRuleMatchFilter(language, rules).filter(ruleMatches, annotatedText);
if (cleanOverlappingMatches) {
ruleMatches = new CleanOverlappingFilter(language, userConfig.getHidePremiumMatches()).filter(ruleMatches);
}
ruleMatches = new LanguageDependentFilter(language, rules).filter(ruleMatches);

return applyCustomFilters(ruleMatches, annotatedText);
}

private boolean isRuleActiveForLanguageWithModel(Rule rule, Language language, Map<String, RemoteRuleResult> remoteRulesResults) {
if (language.getShortCode().equals("fr")) {
List<String> disableFrenchRuleGroups = Arrays.asList("OU", "OU_FIGEES", "VIRG_NON_TROUVEE", "A_A_ACCENT","A_ACCENT_A", "CONFUSION_A_AS", "AGREEMENT_POSTPONED_ADJ", "LA_OU", "CONFUSION_RULE_PREMIUM_AIRE_AIR"); // List for rule group IDs
List<String> disableFrenchSpecificSubrules = Arrays.asList("A_A_ACCENT2[1]", "ACCORD_SUJET_VERBE[55]", "PAS_DE_VIRGULE[42]"); // List for specific subrules
RemoteRuleResult remoteRulesResult = remoteRulesResults.get("AI_FR_GGEC");
if (remoteRulesResult != null && remoteRulesResult.isSuccess()) {
boolean isDisabledGroup = disableFrenchRuleGroups.contains(rule.getId());
boolean isDisabledSubrule = disableFrenchSpecificSubrules.contains(rule.getFullId());
return !(isDisabledGroup || isDisabledSubrule);
}
}
if (language.getShortCode().equals("es")) {
List<String> disableSpanishRules = Arrays.asList("AGREEMENT_POSTPONED_ADJ");
RemoteRuleResult remoteRulesResult = remoteRulesResults.get("AI_ES_GGEC");
if (remoteRulesResult != null) {
if (remoteRulesResult.isSuccess()) {
return !disableSpanishRules.contains(rule.getId());
}
}
}
return true;
}

private final Map<LevelToneTagCacheKey, RuleSet> ruleSetCache = new ConcurrentHashMap<>();

static boolean isRuleActiveForLevelAndToneTags(Rule rule, Level level, Set<ToneTag> toneTags) {
Expand Down Expand Up @@ -1122,13 +1148,14 @@ private RuleSet getActiveRulesForLevelAndToneTags(Level level, Set<ToneTag> tone
});
}

protected void fetchRemoteRuleResults(long deadlineStartNanos, Mode mode, Level level, List<AnalyzedSentence> analyzedSentences, List<RuleMatch> remoteMatches,
protected Map<String, RemoteRuleResult> fetchRemoteRuleResults(long deadlineStartNanos, Mode mode, Level level, List<AnalyzedSentence> analyzedSentences, List<RuleMatch> remoteMatches,
List<FutureTask<RemoteRuleResult>> remoteRuleTasks, List<RemoteRule> remoteRules,
List<Integer> requestSize,
Map<Integer, List<RuleMatch>> cachedResults,
Map<Integer, Integer> matchOffset,
AnnotatedText annotatedText, Long textSessionID,
Set<ToneTag> toneTags) {
Map<String, RemoteRuleResult> remoteRuleResults = new HashMap<>();
if (remoteRuleTasks != null && !remoteRuleTasks.isEmpty()) {
int timeout = IntStream.range(0, requestSize.size()).map(i ->
(int) remoteRules.get(i).getTimeout(requestSize.get(i))
Expand All @@ -1153,13 +1180,17 @@ protected void fetchRemoteRuleResults(long deadlineStartNanos, Mode mode, Level
RemoteRuleMetrics.request(ruleKey, deadlineStartNanos, chars, RemoteRuleMetrics.RequestResult.DOWN);
continue;
}
RemoteRuleResult remoteRuleResult = null;
try {
//logger.info("Fetching results for remote rule for {} chars", chars);
RemoteRuleMetrics.inCircuitBreaker(deadlineStartNanos, rule.circuitBreaker(), ruleKey, chars, () ->
remoteRuleResult = RemoteRuleMetrics.inCircuitBreaker(deadlineStartNanos, rule.circuitBreaker(), ruleKey, chars, () ->
fetchResults(deadlineStartNanos, mode, level, analyzedSentences, remoteMatches, matchOffset, annotatedText, textSessionID, chars, deadlineEndNanos, task, rule, ruleKey, toneTags));
} catch (InterruptedException e) {
break;
}
if (remoteRuleResult != null) {
remoteRuleResults.put(ruleKey, remoteRuleResult);
}
}

for (Integer cachedSentenceIndex : cachedResults.keySet()) {
Expand All @@ -1180,6 +1211,7 @@ protected void fetchRemoteRuleResults(long deadlineStartNanos, Mode mode, Level
// cancel any remaining tasks (e.g. after interrupt because request timed out)
remoteRuleTasks.stream().filter(Objects::nonNull).forEach(t -> t.cancel(true));
}
return remoteRuleResults;
}

private RemoteRuleResult fetchResults(long deadlineStartNanos, Mode mode, Level level, List<AnalyzedSentence> analyzedSentences, List<RuleMatch> remoteMatches, Map<Integer, Integer> matchOffset, AnnotatedText annotatedText, Long textSessionID, long chars, long deadlineEndNanos, FutureTask<RemoteRuleResult> task, RemoteRule rule, String ruleKey, Set<ToneTag> toneTags) throws InterruptedException, ExecutionException, TimeoutException {
Expand Down Expand Up @@ -1381,6 +1413,11 @@ protected final List<SentenceData> computeSentenceData(List<AnalyzedSentence> an
int lineCount = 0;
int columnCount = 1;
List<SentenceData> result = new ArrayList<>(texts.size());

if (analyzedSentences == null || analyzedSentences.isEmpty()) {
return result;
}

for (int i = 0; i < texts.size(); i++) {
String sentence = texts.get(i);
result.add(new SentenceData(analyzedSentences.get(i), sentence, charCount, lineCount, columnCount));
Expand Down Expand Up @@ -2133,7 +2170,7 @@ private LineColumnPosition(int line, int column) {
}
}

public void setConfigValues(Map<String, Integer> v) {
public void setConfigValues(Map<String, Object[]> v) {
userConfig.insertConfigValues(v);
}

Expand Down
24 changes: 13 additions & 11 deletions languagetool-core/src/main/java/org/languagetool/Language.java
Original file line number Diff line number Diff line change
Expand Up @@ -839,12 +839,22 @@ protected int getPriorityForId(String id) {
public int getRulePriority(Rule rule) {
int categoryPriority = this.getPriorityForId(rule.getCategory().getId().toString());
int rulePriority = this.getPriorityForId(rule.getId());
int rulePriorityFromRule = rule.getPriority();
// if there is a priority defined for rule it takes precedence over category priority
if (rulePriority != 0) {
return rulePriority;
} else {
} else if ( rulePriorityFromRule != 0) {
return rulePriorityFromRule;
} else if (categoryPriority != 0) {
return categoryPriority;
} else if (getDefaultRulePriorityForStyle() != 0 && rule.getLocQualityIssueType().equals(ITSIssueType.Style)) {
return getDefaultRulePriorityForStyle();
}
return 0;
}

protected int getDefaultRulePriorityForStyle() {
return 0;
}

/**
Expand Down Expand Up @@ -976,15 +986,7 @@ public int hashCode() {
public boolean hasMinMatchesRules() {
return false;
}

/**
* @since 5.6
* Adjust suggestions depending on the enabled rules
*/
public List<RuleMatch> adaptSuggestions(List<RuleMatch> ruleMatches, Set<String> enabledRules) {
return ruleMatches;
}


/**
* @since 6.0
* Adjust suggestion
Expand All @@ -1009,7 +1011,7 @@ public List<String> prepareLineForSpeller(String s) {
* This function is called by JLanguageTool before CleanOverlappingFilter removes overlapping ruleMatches
* @return filtered ruleMatches
*/
public List<RuleMatch> mergeSuggestions(List<RuleMatch> ruleMatches, AnnotatedText text, Set<String> enabledRules) {
public List<RuleMatch> filterRuleMatches(List<RuleMatch> ruleMatches, AnnotatedText text, Set<String> enabledRules) {
return ruleMatches;
}

Expand Down
26 changes: 13 additions & 13 deletions languagetool-core/src/main/java/org/languagetool/UserConfig.java
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ public static boolean hasABTestsEnabled() {
private final Long userDictCacheSize;
private final String userDictName;
private final Long premiumUid;
private final Map<String, Integer> configurableRuleValues = new HashMap<>();
private final Map<String, Object[]> configurableRuleValues = new HashMap<>();
private final LinguServices linguServices;
// needs to be in UserConfig so it is considered both in ResultCache and in PipelinePool
private final boolean filterDictionaryMatches;
Expand All @@ -71,19 +71,19 @@ public UserConfig(List<String> userSpecificSpellerWords) {
this(userSpecificSpellerWords, new HashMap<>());
}

public UserConfig(Map<String, Integer> ruleValues) {
public UserConfig(Map<String, Object[]> ruleValues) {
this(new ArrayList<>(), Objects.requireNonNull(ruleValues));
}

public UserConfig(Map<String, Integer> ruleValues, LinguServices linguServices) {
public UserConfig(Map<String, Object[]> ruleValues, LinguServices linguServices) {
this(new ArrayList<>(), Objects.requireNonNull(ruleValues), 0, 0L, null, 0L, linguServices);
}

public UserConfig(List<String> userSpecificSpellerWords, Map<String, Integer> ruleValues) {
public UserConfig(List<String> userSpecificSpellerWords, Map<String, Object[]> ruleValues) {
this(userSpecificSpellerWords, ruleValues, 0, null, null, null, null);
}

public UserConfig(List<String> userSpecificSpellerWords, Map<String, Integer> ruleValues,
public UserConfig(List<String> userSpecificSpellerWords, Map<String, Object[]> ruleValues,
int maxSpellingSuggestions, Long premiumUid, String userDictName, Long userDictCacheSize,
LinguServices linguServices) {
this(userSpecificSpellerWords, Collections.emptyList(), ruleValues, maxSpellingSuggestions, premiumUid, userDictName, userDictCacheSize, linguServices,
Expand All @@ -92,7 +92,7 @@ public UserConfig(List<String> userSpecificSpellerWords, Map<String, Integer> ru

public UserConfig(List<String> userSpecificSpellerWords,
List<Rule> userSpecificRules,
Map<String, Integer> ruleValues,
Map<String, Object[]> ruleValues,
int maxSpellingSuggestions, Long premiumUid, String userDictName,
Long userDictCacheSize,
LinguServices linguServices, boolean filterDictionaryMatches,
Expand All @@ -103,7 +103,7 @@ public UserConfig(List<String> userSpecificSpellerWords,

public UserConfig(List<String> userSpecificSpellerWords,
List<Rule> userSpecificRules,
Map<String, Integer> ruleValues,
Map<String, Object[]> ruleValues,
int maxSpellingSuggestions, Long premiumUid, String userDictName,
Long userDictCacheSize,
LinguServices linguServices, boolean filterDictionaryMatches,
Expand All @@ -112,7 +112,7 @@ public UserConfig(List<String> userSpecificSpellerWords,
boolean untrustedSource) {
this.userSpecificSpellerWords = Objects.requireNonNull(userSpecificSpellerWords);
this.userSpecificRules = Objects.requireNonNull(userSpecificRules);
for (Map.Entry<String, Integer> entry : ruleValues.entrySet()) {
for (Map.Entry<String, Object[]> entry : ruleValues.entrySet()) {
this.configurableRuleValues.put(entry.getKey(), entry.getValue());
}
this.maxSpellingSuggestions = maxSpellingSuggestions;
Expand Down Expand Up @@ -174,21 +174,21 @@ public int getMaxSpellingSuggestions() {
return maxSpellingSuggestions;
}

public Map<String, Integer> getConfigValues() {
public Map<String, Object[]> getConfigValues() {
return configurableRuleValues;
}

public void insertConfigValues(Map<String, Integer> ruleValues) {
for (Map.Entry<String, Integer> entry : ruleValues.entrySet()) {
public void insertConfigValues(Map<String, Object[]> ruleValues) {
for (Map.Entry<String, Object[]> entry : ruleValues.entrySet()) {
this.configurableRuleValues.put(entry.getKey(), entry.getValue());
}
}

public int getConfigValueByID(String ruleID) {
public Object[] getConfigValueByID(String ruleID) {
if (configurableRuleValues.containsKey(ruleID)) {
return configurableRuleValues.get(ruleID);
}
return -1;
return null;
}

public boolean hasLinguServices() {
Expand Down
Loading

0 comments on commit 2cd7e2c

Please sign in to comment.