diff --git a/languagetool-core/src/main/java/org/languagetool/rules/CategoryId.java b/languagetool-core/src/main/java/org/languagetool/rules/CategoryId.java index 86d17d54d60f..ee5074997c17 100644 --- a/languagetool-core/src/main/java/org/languagetool/rules/CategoryId.java +++ b/languagetool-core/src/main/java/org/languagetool/rules/CategoryId.java @@ -25,9 +25,11 @@ * @since 3.3 */ public class CategoryId { - private final String id; + // Predefined category ids + public static final CategoryId PUNCTUATION = new CategoryId("PUNCTUATION"); + public CategoryId(String id) { Objects.requireNonNull(id, "Category id must not be null."); if (id.trim().isEmpty()) { diff --git a/languagetool-language-modules/de/src/main/java/org/languagetool/language/SwissGerman.java b/languagetool-language-modules/de/src/main/java/org/languagetool/language/SwissGerman.java index e02e2db07844..d3cf45d783bd 100644 --- a/languagetool-language-modules/de/src/main/java/org/languagetool/language/SwissGerman.java +++ b/languagetool-language-modules/de/src/main/java/org/languagetool/language/SwissGerman.java @@ -34,13 +34,13 @@ import org.languagetool.tagging.de.SwissGermanTagger; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.languagetool.rules.de.SwissGermanSalutationRule; import java.io.IOException; import java.util.*; @SuppressWarnings("deprecation") public class SwissGerman extends German { - private static final Logger logger = LoggerFactory.getLogger(SwissGerman.class); @NotNull @@ -62,7 +62,11 @@ public String getName() { @Override public List getRelevantRules(ResourceBundle messages, UserConfig userConfig, Language motherTongue, List altLanguages) throws IOException { List rules = new ArrayList<>(super.getRelevantRules(messages, userConfig, motherTongue, altLanguages)); + + // Add custom SwissGermanSalutationRule + rules.add(new SwissGermanSalutationRule(messages)); rules.add(new SwissCompoundRule(messages, this, userConfig)); + return rules; } @@ -74,8 +78,7 @@ public SpellingCheckRule createDefaultSpellingRule(ResourceBundle messages) thro @Override public List getRelevantLanguageModelCapableRules(ResourceBundle messages, @Nullable LanguageModel languageModel, GlobalConfig globalConfig, UserConfig userConfig, Language motherTongue, List altLanguages) throws IOException { List rules = new ArrayList<>(super.getRelevantLanguageModelCapableRules(messages, languageModel, globalConfig, userConfig, motherTongue, altLanguages)); - rules.add(new SwissGermanSpellerRule(messages, this, - userConfig, languageModel)); + rules.add(new SwissGermanSpellerRule(messages, this, userConfig, languageModel)); return rules; } @@ -91,14 +94,17 @@ public List getRelevantRemoteRules(ResourceBundle messageBundle, List filterRuleMatches(List ruleMatches, AnnotatedText text, Set enabledRules) { - //First, use the filter in German.java + // First, use the filter in German.java ruleMatches = super.filterRuleMatches(ruleMatches, text, enabledRules); List newRuleMatches = new ArrayList<>(); + for (RuleMatch rm : ruleMatches) { - //TODO: replace this by supporting remote-rule-filter for language variants + // TODO: replace this by supporting remote-rule-filter for language variants String ruleId = rm.getRule().getId(); + if (ruleId.equals("AI_DE_GGEC_REPLACEMENT_ORTHOGRAPHY_SPELL") || ruleId.equals("AI_DE_GGEC_REPLACEMENT_ADJECTIVE_FORM")) { String matchingString = null; + if (rm.getSentence() != null) { if (rm.getFromPos() > -1 && rm.getToPos() > -1) { String sentenceStr = rm.getSentence().getText(); @@ -107,6 +113,7 @@ public List filterRuleMatches(List ruleMatches, AnnotatedT } } } + String finalMatchingString = matchingString; if (finalMatchingString != null && finalMatchingString.contains("ss") && rm.getSuggestedReplacements().stream().anyMatch(suggestion -> suggestion.equals(finalMatchingString.replace("ss", "ß")))) { logger.info("Remove match with ruleID: {} ({} -> {})", ruleId, matchingString, rm.getSuggestedReplacements()); @@ -116,15 +123,18 @@ public List filterRuleMatches(List ruleMatches, AnnotatedT List replacements = rm.getSuggestedReplacementObjects(); List newReplacements = new ArrayList<>(); + for (SuggestedReplacement s : replacements) { String newReplStr = s.getReplacement().replaceAll("ß", "ss"); SuggestedReplacement newRepl = new SuggestedReplacement(s); newRepl.setReplacement(newReplStr); newReplacements.add(newRepl); } + RuleMatch newMatch = new RuleMatch(rm, newReplacements); newRuleMatches.add(newMatch); } + return newRuleMatches; } diff --git a/languagetool-language-modules/de/src/main/java/org/languagetool/rules/de/SwissGermanSalutationRule.java b/languagetool-language-modules/de/src/main/java/org/languagetool/rules/de/SwissGermanSalutationRule.java new file mode 100644 index 000000000000..cf1f614e2653 --- /dev/null +++ b/languagetool-language-modules/de/src/main/java/org/languagetool/rules/de/SwissGermanSalutationRule.java @@ -0,0 +1,75 @@ +package org.languagetool.rules.de; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.ResourceBundle; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import org.languagetool.AnalyzedSentence; +import org.languagetool.AnalyzedTokenReadings; +import org.languagetool.rules.RuleMatch; +import org.languagetool.rules.Rule; +import org.languagetool.rules.Category; +import org.languagetool.rules.CategoryId; + +public class SwissGermanSalutationRule extends Rule { + // Update the salutations list as needed, consider that these elements can include regex patterns. + private static final String[] SALUTATIONS = { + "Hallo\\s+([A-Za-z]+)?", + "Hey\\s+([A-Za-z]+)?", + "Hi\\s+([A-Za-z]+)?", + "Greetings", + "Guten\\s+(Morgen|Mittag|Abend)", + "Sehr geehrter?\\s+(Frau|Herr|Professor|Doktor)\\s+[A-Za-z]+" + }; + + private static final String SALUTATIONS_REGEX = String.join("|", SALUTATIONS); + private static final Pattern SALUTATION_PATTERN = Pattern.compile( + "(?i)\\b(?:" + + SALUTATIONS_REGEX + + ")\\b,\\n" + ); + + private static final Category PUNCTUATION_CATEGORY = new Category(CategoryId.PUNCTUATION, "Punctuation"); + + public SwissGermanSalutationRule(ResourceBundle messages) { + super(messages); + setCategory(PUNCTUATION_CATEGORY); // Set the PUNCTUATION category + } + + @Override + public String getId() { + return "SWISS_GERMAN_SALUTATION_RULE"; + } + + @Override + public String getDescription() { + return "Detects salutations followed by a comma and a newline, and suggests to remove the comma"; + } + + @Override + public RuleMatch[] match(AnalyzedSentence sentence) throws IOException { + List ruleMatches = new ArrayList<>(); + AnalyzedTokenReadings[] tokens = sentence.getTokensWithoutWhitespace(); + String sentenceText = sentence.getText(); + Matcher matcher = SALUTATION_PATTERN.matcher(sentenceText); + + while (matcher.find()) { + int start = matcher.start(0); + int end = matcher.end(0); + String matchedText = matcher.group(); + + // Remove the comma before the newline + String correctedText = matchedText.replaceFirst(",\\n", "\n"); + RuleMatch ruleMatch = new RuleMatch(this, sentence, start, end, + "The salutation should not be followed by a comma before a newline.", + "Remove the comma"); + ruleMatch.setSuggestedReplacements(Arrays.asList(correctedText)); + ruleMatches.add(ruleMatch); + } + return ruleMatches.toArray(new RuleMatch[0]); + } +} \ No newline at end of file