Skip to content

Commit

Permalink
[pt] Improve compounding rules (#9213)
Browse files Browse the repository at this point in the history
* [pt] Remove contra-ataques from txt and add rule

* [pt] Add BOTA_FORA

* [pt] Add SEM_ABRIGO

* [pt] Add CORTA_PAPEL

* [pt] Comment out bem-parecido from compounds

* [pt] Add DEUS_DARA rule

* [pt] Add PT_COLOUR_HYPHENATION rule

 - extract all colour names into a separate TXT file;

 - add new Java rule for the colour compounding.
  • Loading branch information
p-goulart authored Aug 31, 2023
1 parent 4f1c724 commit 78ed25a
Show file tree
Hide file tree
Showing 6 changed files with 673 additions and 468 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,7 @@ public List<Rule> getRelevantRules(ResourceBundle messages, UserConfig userConfi
new PunctuationMarkAtParagraphEnd(messages, this, true),
//Specific to Portuguese:
new PostReformPortugueseCompoundRule(messages, this, userConfig),
new PortugueseColourHyphenationRule(messages, this, userConfig),
new PortugueseReplaceRule(messages, this),
new PortugueseBarbarismsRule(messages, "/pt/barbarisms.txt", this),
//new PortugueseArchaismsRule(messages, "/pt/archaisms-pt.txt"), // see https://github.com/languagetool-org/languagetool/issues/3095
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
/* LanguageTool, a natural language style checker
* Copyright (C) 2006 Daniel Naber (http://www.danielnaber.de)
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
* USA
*/
package org.languagetool.rules.pt;

import org.apache.commons.lang3.StringUtils;
import org.languagetool.Language;
import org.languagetool.UserConfig;
import org.languagetool.rules.*;
import org.languagetool.tools.Tools;

import java.io.IOException;
import java.net.URL;
import java.util.ResourceBundle;

/**
* Checks that compounds (if in the list) are not written as separate words.
* @since 2.6
*/
public class PortugueseColourHyphenationRule extends AbstractCompoundRule {

private static volatile CompoundRuleData compoundData;

public PortugueseColourHyphenationRule(ResourceBundle messages, Language lang, UserConfig userConfig) throws IOException {
super(messages, lang, userConfig,
"Nomes de cores são palavras compostas e devem ser hifenizados.",
"Esta palavra é composta por justaposição.",
"Esta palavra pode ser composta por justaposição ou hifenizada.",
"Nomes de cores são palavras compostas.");
super.setCategory(Categories.COMPOUNDING.getCategory(messages));
setLocQualityIssueType(ITSIssueType.Grammar);
useSubRuleSpecificIds();
}

@Override
public String getId() {
return "PT_COLOUR_HYPHENATION";
}

@Override
public String getDescription() {
return "Nomes de cores devem ser hifenizados: \"$match\"";
}

@Override
public URL getUrl() {
return Tools.getUrl("https://pt.wikipedia.org/wiki/Lista_das_alterações_previstas_pelo_acordo_ortográfico_de_1990");
}

@Override
public CompoundRuleData getCompoundRuleData() {
CompoundRuleData data = compoundData;
if (data == null) {
synchronized (PortugueseColourHyphenationRule.class) {
data = compoundData;
if (data == null) {
compoundData = data = new CompoundRuleData("/pt/compound_colours.txt");
}
}
}

return data;
}
}
Loading

0 comments on commit 78ed25a

Please sign in to comment.