Skip to content

Commit

Permalink
Merge pull request #10592 from St-ac-y/DE-de_match_merging
Browse files Browse the repository at this point in the history
[de] match merging of two matches next to each other
  • Loading branch information
St-ac-y authored Jun 3, 2024
2 parents 78a5fb8 + c01da81 commit 41ededb
Show file tree
Hide file tree
Showing 2 changed files with 109 additions and 4 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable;
import org.languagetool.*;
import org.languagetool.markup.AnnotatedText;
import org.languagetool.chunking.Chunker;
import org.languagetool.chunking.GermanChunker;
import org.languagetool.languagemodel.LanguageModel;
Expand Down Expand Up @@ -605,6 +606,84 @@ public boolean hasMinMatchesRules() {
return true;
}

@Override
public List<RuleMatch> mergeSuggestions(List<RuleMatch> ruleMatches, AnnotatedText text, Set<String> enabledRules) {
List<RuleMatch> resultMatches = new ArrayList<>();
RuleMatch previousMatch = null;
for (int i = 0; i < ruleMatches.size(); i++) {
RuleMatch currentMatch = ruleMatches.get(i);
if (previousMatch != null && previousMatch.getRule().getId().startsWith("AI_DE_GGEC") &&
currentMatch.getRule().getId().startsWith("AI_DE_GGEC")) {
if (previousMatch.getToPos() > currentMatch.getFromPos()) {
continue; // Skip overlapping matches
}
// Check if matches are adjacent and share the same 'picky' status
if ((previousMatch.getToPos() == currentMatch.getFromPos() || previousMatch.getToPos() + 1 == currentMatch.getFromPos()) &&
(previousMatch.getRule().getTags().contains(Tag.picky) == currentMatch.getRule().getTags().contains(Tag.picky))) {
// Merge if they have the same ITSIssueType
if (previousMatch.getRule().getLocQualityIssueType() == currentMatch.getRule().getLocQualityIssueType()) {
RuleMatch mergedMatch = new RuleMatch(mergeMatches(previousMatch, currentMatch));
previousMatch = mergedMatch;
continue;
}
// If matches have different ITSIssueTypes but neither is a style match, merge them
if (previousMatch.getRule().getLocQualityIssueType() != currentMatch.getRule().getLocQualityIssueType() &&
previousMatch.getRule().getLocQualityIssueType() != ITSIssueType.Style && currentMatch.getRule().getLocQualityIssueType() != ITSIssueType.Style) {
RuleMatch mergedMatch = new RuleMatch(mergeMatches(previousMatch, currentMatch));
previousMatch = mergedMatch;
continue;
}
}
// If no merge happened, add the previous match to results
resultMatches.add(previousMatch);
previousMatch = currentMatch; // Move to next match
} else {
// Ensure current match becomes previous if no merging criteria are met
if (previousMatch != null) {
resultMatches.add(previousMatch);
}
previousMatch = currentMatch;
}
}
// Add the last processed match if it exists and hasn't been added yet
if (previousMatch != null) {
resultMatches.add(previousMatch);
}
return resultMatches;
}



private RuleMatch mergeMatches(RuleMatch match1, RuleMatch match2) {
// Calculate separator based on position
String separator = "";
if (match1.getToPos() + 1 == match2.getFromPos()) {
separator = " ";
}
// Merge original error strings and suggested replacements
String newErrorStr = match1.getOriginalErrorStr() + separator + match2.getOriginalErrorStr();
String newReplacement = match1.getSuggestedReplacements().get(0) + separator + match2.getSuggestedReplacements().get(0);

// Create a new merged RuleMatch object
RuleMatch mergedMatch = new RuleMatch(match1.getRule(), match1.getSentence(), match1.getFromPos(), match2.getToPos(),
"Hier scheint es einen Fehler zu geben.", "Potenzieller Fehler");
mergedMatch.setOriginalErrorStr(newErrorStr);
mergedMatch.setSuggestedReplacement(newReplacement);

// Create a new specific rule ID based on conditions
String newId = "AI_DE_MERGED_MATCH";
mergedMatch.setSpecificRuleId(newId);

// If issue types differ, set to Grammar unless both are Style
if (match1.getRule().getLocQualityIssueType() != match2.getRule().getLocQualityIssueType()) {
mergedMatch.getRule().setLocQualityIssueType(ITSIssueType.Grammar);
} else if (match1.getRule().getLocQualityIssueType() == ITSIssueType.Style && match2.getRule().getLocQualityIssueType() == ITSIssueType.Style) {
mergedMatch.getRule().setLocQualityIssueType(ITSIssueType.Style);
}

return mergedMatch;
}

@Override
public List<String> prepareLineForSpeller(String line) {
List<String> results = new ArrayList<>();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,8 @@
package org.languagetool.rules.de;

import org.junit.Test;
import org.languagetool.JLanguageTool;
import org.languagetool.Language;
import org.languagetool.LanguageSpecificTest;
import org.languagetool.Languages;
import org.languagetool.*;
import org.languagetool.language.German;
import org.languagetool.rules.Rule;
import org.languagetool.rules.patterns.AbstractPatternRule;
import org.languagetool.rules.patterns.PatternRuleLoader;
Expand All @@ -39,6 +37,10 @@
import static org.hamcrest.CoreMatchers.is;
import static org.hamcrest.MatcherAssert.assertThat;

import org.languagetool.rules.FakeRule;
import org.languagetool.rules.RuleMatch;
import static org.junit.Assert.assertEquals;

public class GermanTest extends LanguageSpecificTest {

@Test
Expand Down Expand Up @@ -141,6 +143,30 @@ public void testGenderCharsAgainstAllRules() throws IOException {
assertThat(lt.check("Das ist flasch_nittrichtig.").size(), is(2)); //not treated by GermanSpellerRule.removeGenderCompoundMatches
}

@Test
public void testMergingOfGrammarCorrections() throws IOException {
Language lang = new German();
JLanguageTool lt = new JLanguageTool(lang);
AnalyzedSentence analyzedSentence = lt.getAnalyzedSentence("Er ist sich da absolute sich");

// Mocking two adjacent grammar issues
RuleMatch ruleMatch1 = new RuleMatch(new FakeRule("AI_DE_GGEC_TEST"), analyzedSentence, 16, 24, "Adjektivfehler");
ruleMatch1.setSuggestedReplacement("absolute");
RuleMatch ruleMatch2 = new RuleMatch(new FakeRule("AI_DE_GGEC_TEST2"), analyzedSentence, 25, 28, "Adjektivfehler");
ruleMatch2.setSuggestedReplacement("sich");

List<RuleMatch> ruleMatches = new ArrayList<>();
ruleMatches.add(ruleMatch1);
ruleMatches.add(ruleMatch2);

// Process the rule matches
List<RuleMatch> processedMatches = lang.mergeSuggestions(ruleMatches, null, null);

// Asserts
assertEquals("absolute sich", processedMatches.get(0).getSuggestedReplacements().get(0));
assertEquals("AI_DE_MERGED_MATCH", processedMatches.get(0).getSpecificRuleId());
}

// test that patterns with 'ß' also contain that pattern with 'ss' so the rule can match for de-CH users
@Test
public void testSwissSpellingVariants() throws IOException {
Expand Down

0 comments on commit 41ededb

Please sign in to comment.