Skip to content

Commit

Permalink
[de] swiss german salutation rule
Browse files Browse the repository at this point in the history
  • Loading branch information
St-ac-y committed Aug 2, 2024
1 parent 6113520 commit 514488f
Show file tree
Hide file tree
Showing 3 changed files with 93 additions and 6 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,11 @@
* @since 3.3
*/
public class CategoryId {

private final String id;

// Predefined category ids
public static final CategoryId PUNCTUATION = new CategoryId("PUNCTUATION");

public CategoryId(String id) {
Objects.requireNonNull(id, "Category id must not be null.");
if (id.trim().isEmpty()) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,13 +34,13 @@
import org.languagetool.tagging.de.SwissGermanTagger;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.languagetool.rules.de.SwissGermanSalutationRule;

import java.io.IOException;
import java.util.*;

@SuppressWarnings("deprecation")
public class SwissGerman extends German {

private static final Logger logger = LoggerFactory.getLogger(SwissGerman.class);

@NotNull
Expand All @@ -62,7 +62,11 @@ public String getName() {
@Override
public List<Rule> getRelevantRules(ResourceBundle messages, UserConfig userConfig, Language motherTongue, List<Language> altLanguages) throws IOException {
List<Rule> rules = new ArrayList<>(super.getRelevantRules(messages, userConfig, motherTongue, altLanguages));

// Add custom SwissGermanSalutationRule
rules.add(new SwissGermanSalutationRule(messages));
rules.add(new SwissCompoundRule(messages, this, userConfig));

return rules;
}

Expand All @@ -74,8 +78,7 @@ public SpellingCheckRule createDefaultSpellingRule(ResourceBundle messages) thro
@Override
public List<Rule> getRelevantLanguageModelCapableRules(ResourceBundle messages, @Nullable LanguageModel languageModel, GlobalConfig globalConfig, UserConfig userConfig, Language motherTongue, List<Language> altLanguages) throws IOException {
List<Rule> rules = new ArrayList<>(super.getRelevantLanguageModelCapableRules(messages, languageModel, globalConfig, userConfig, motherTongue, altLanguages));
rules.add(new SwissGermanSpellerRule(messages, this,
userConfig, languageModel));
rules.add(new SwissGermanSpellerRule(messages, this, userConfig, languageModel));
return rules;
}

Expand All @@ -91,14 +94,17 @@ public List<Rule> getRelevantRemoteRules(ResourceBundle messageBundle, List<Remo

@Override
public List<RuleMatch> filterRuleMatches(List<RuleMatch> ruleMatches, AnnotatedText text, Set<String> enabledRules) {
//First, use the filter in German.java
// First, use the filter in German.java
ruleMatches = super.filterRuleMatches(ruleMatches, text, enabledRules);
List<RuleMatch> newRuleMatches = new ArrayList<>();

for (RuleMatch rm : ruleMatches) {
//TODO: replace this by supporting remote-rule-filter for language variants
// TODO: replace this by supporting remote-rule-filter for language variants
String ruleId = rm.getRule().getId();

if (ruleId.equals("AI_DE_GGEC_REPLACEMENT_ORTHOGRAPHY_SPELL") || ruleId.equals("AI_DE_GGEC_REPLACEMENT_ADJECTIVE_FORM")) {
String matchingString = null;

if (rm.getSentence() != null) {
if (rm.getFromPos() > -1 && rm.getToPos() > -1) {
String sentenceStr = rm.getSentence().getText();
Expand All @@ -107,6 +113,7 @@ public List<RuleMatch> filterRuleMatches(List<RuleMatch> ruleMatches, AnnotatedT
}
}
}

String finalMatchingString = matchingString;
if (finalMatchingString != null && finalMatchingString.contains("ss") && rm.getSuggestedReplacements().stream().anyMatch(suggestion -> suggestion.equals(finalMatchingString.replace("ss", "ß")))) {
logger.info("Remove match with ruleID: {} ({} -> {})", ruleId, matchingString, rm.getSuggestedReplacements());
Expand All @@ -116,15 +123,18 @@ public List<RuleMatch> filterRuleMatches(List<RuleMatch> ruleMatches, AnnotatedT

List<SuggestedReplacement> replacements = rm.getSuggestedReplacementObjects();
List<SuggestedReplacement> newReplacements = new ArrayList<>();

for (SuggestedReplacement s : replacements) {
String newReplStr = s.getReplacement().replaceAll("ß", "ss");
SuggestedReplacement newRepl = new SuggestedReplacement(s);
newRepl.setReplacement(newReplStr);
newReplacements.add(newRepl);
}

RuleMatch newMatch = new RuleMatch(rm, newReplacements);
newRuleMatches.add(newMatch);
}

return newRuleMatches;
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
package org.languagetool.rules.de;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.ResourceBundle;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.languagetool.AnalyzedSentence;
import org.languagetool.AnalyzedTokenReadings;
import org.languagetool.rules.RuleMatch;
import org.languagetool.rules.Rule;
import org.languagetool.rules.Category;
import org.languagetool.rules.CategoryId;

public class SwissGermanSalutationRule extends Rule {
// Update the salutations list as needed, consider that these elements can include regex patterns.
private static final String[] SALUTATIONS = {
"Hallo\\s+([A-Za-z]+)?",
"Hey\\s+([A-Za-z]+)?",
"Hi\\s+([A-Za-z]+)?",
"Greetings",
"Guten\\s+(Morgen|Mittag|Abend)",
"Sehr geehrter?\\s+(Frau|Herr|Professor|Doktor)\\s+[A-Za-z]+"
};

private static final String SALUTATIONS_REGEX = String.join("|", SALUTATIONS);
private static final Pattern SALUTATION_PATTERN = Pattern.compile(
"(?i)\\b(?:"
+ SALUTATIONS_REGEX
+ ")\\b,\\n"
);

private static final Category PUNCTUATION_CATEGORY = new Category(CategoryId.PUNCTUATION, "Punctuation");

public SwissGermanSalutationRule(ResourceBundle messages) {
super(messages);
setCategory(PUNCTUATION_CATEGORY); // Set the PUNCTUATION category
}

@Override
public String getId() {
return "SWISS_GERMAN_SALUTATION_RULE";
}

@Override
public String getDescription() {
return "Detects salutations followed by a comma and a newline, and suggests to remove the comma";
}

@Override
public RuleMatch[] match(AnalyzedSentence sentence) throws IOException {
List<RuleMatch> ruleMatches = new ArrayList<>();
AnalyzedTokenReadings[] tokens = sentence.getTokensWithoutWhitespace();
String sentenceText = sentence.getText();
Matcher matcher = SALUTATION_PATTERN.matcher(sentenceText);

while (matcher.find()) {
int start = matcher.start(0);
int end = matcher.end(0);
String matchedText = matcher.group();

// Remove the comma before the newline
String correctedText = matchedText.replaceFirst(",\\n", "\n");
RuleMatch ruleMatch = new RuleMatch(this, sentence, start, end,
"The salutation should not be followed by a comma before a newline.",
"Remove the comma");
ruleMatch.setSuggestedReplacements(Arrays.asList(correctedText));
ruleMatches.add(ruleMatch);
}
return ruleMatches.toArray(new RuleMatch[0]);
}
}

0 comments on commit 514488f

Please sign in to comment.