Skip to content

Commit

Permalink
[pt] Add geographical vocab to speller
Browse files Browse the repository at this point in the history
  • Loading branch information
p-goulart committed Jun 19, 2024
1 parent 9cc42e4 commit fc9d6eb
Show file tree
Hide file tree
Showing 5 changed files with 80 additions and 5 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -211,3 +211,35 @@ Eds Ed NCMPS00
EF EF NCCS000
BT BT AQ0CN0
Linux Linux AQ0CN0

# To be added before 1.2.0
Vermont Vermont NPMSG00
Newport Newport NPCSG00
Connecticut Connecticut NPCSG00
Delaware Delaware NPCSG00
Maryland Maryland NPCSG00
Mississippi Mississippi NPMSG00
cajun cajun AQ0CN0
Tennessee Tennessee NPMSG00
Nashville Nashville NPCSG00
Kentucky Kentucky NPMSG00
Iowa Iowa NPMSG00
Dakota Dakota NPFSG00
Nebraska Nebraska NPMSG00
Idaho Idaho NPMSG00
Oregon Oregon NPMSG00
Seattle Seattle NPCSG00
Yukon Yukon NPMSG00
Saskatchewan Saskatchewan NPCSG00
Saskatoon Saskatoon NPCSG00
Manitoba Manitoba NPCSG00
Winnipeg Winnipeg NPCSG00
Niágara Niágara NPMSG00
Hobart Hobart NPCSG00
Brisbane Brisbane NPCSG00
Cairns Cairns NPCSG00
Perth Perth NPCSG00
outback outback NCMS000
outbacks outback NCMS000
Erie Erie NPMSG00
Huron Huron NPMSG00
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
<!ENTITY english_common_verbs "be|am|are|is|was|were|been|being|have|has|had|
having|did|does|done|doing|can|cannot|should|must|
ought|will|could|would|shall">
<!ENTITY english_conjunctions "and|but|if">
<!ENTITY english_conjunctions "and|but|if|or">

<!-- copied from French and adapted -->
<!ENTITY english_forward "as?|no|[cs]ome|for|(?-i)I">
Expand Down Expand Up @@ -214,10 +214,11 @@
kitchen|(bath|bed)room|survivors?|catch(es)?|caught|tells?|told|forbid(den)?|
prohibit(ed|s|ing)?|cats?|strikes?|strikes?|afternoons?|soldiers?|troops?|
count(y|ies|s)?|earls?|viscounts?|hells?|heavens?|damn|[fs]uck(ing|s|ed|ers?)|
stupid|good|bad|wors[et]|fast(er|est)?|royalty|defen[sc]es?|defend(ed|s|ers?)?|
stupid|good|bad|wors[et]|fast(er|est)?|royal(ty)?|defen[sc]es?|defend(ed|s|ers?)?|
chemistry|chemical|chemist|grew|grow([sn]|ing)?|blew|blow([sn]|ing)?|
boys?|girls?|ugl(y|ie(r|st))|beaut(y|ies)|robbers?|forever|big(ge(r|st))?|
small(er|est)?|slow(er|est|ly)?|tall(er|est)?|short(ly|er|est)?
small(er|est)?|slow(er|est|ly)?|tall(er|est)?|short(ly|er|est)?|
live[sd]|living|die[sd]|dying|free|gardens?
">

<!ENTITY english_common "&english_wh_words;|&english_prepositions;|&english_adverbs;|&english_pronouns;|&english_determiners;|&english_common_verbs;|&english_conjunctions;|&english_word_list;|&english_suffixed_word;|&english_prefixed_word;">
Original file line number Diff line number Diff line change
Expand Up @@ -820,7 +820,7 @@ Chrome Store NPCNO00_
Ciudad Juárez NPCNG00_
Ciudad Real NPCNG00_
Cliff Richard NPMSSP0_
Colúmbia Britànica NPCNG00_
Colúmbia Britânica NPCNG00_
Compra Garantida NPMSO00_
Consell de Cent NPMSO00_
Copy Club NPCSO00_
Expand Down Expand Up @@ -12053,3 +12053,9 @@ code reviews NCCP000_

# Looks to be a pt-BR-only way of abbreviating the name of the RHCP lol
Red Hot NPMNS00_

Nova Hampshire NPFSG00_
Nova Jersey NPFSG00_
Nova Jérsia NPFSG00_
Monte Rushmore NPMSG00_
Sydney Harbour Bridge NPFSG00_
Original file line number Diff line number Diff line change
Expand Up @@ -303,3 +303,37 @@ maquillage
surmenage
assemblage
paul

# To be added before 1.2.0

Vermont
Newport
Connecticut
Delaware
Maryland
Mississippi
cajun
Tennessee
Nashville
Kentucky
Iowa
Dakota
Nebraska
Idaho
Oregon
Seattle
Yukon
Saskatchewan
Saskatoon
Manitoba
Winnipeg
Niágara
Hobart
Brisbane
Cairns
Perth
outback
outbacks
Erie
Huron
hidrologicamente
Original file line number Diff line number Diff line change
Expand Up @@ -608,7 +608,8 @@ public void testIgnoreEnglishWordsInPortuguese() throws IOException {
"Birmingham City Football Club.",
"Narra, segundo o historiador americano Will Durant, uma das maiores aventuras da história humana.",
"Duas décadas mais tarde, os Gipsy Kings incorporaram aquilo.",
"Valente teve três irmãos, um dos quais, Silvio Francesco, também esteve no show business."
"Valente teve três irmãos, um dos quais, Silvio Francesco, também esteve no show business.",
"O lema do estado de Nova Hampshire é Livre Free or Die"
};
for (String sentence : noErrorSentences) {
List<RuleMatch> matches = lt.check(sentence);
Expand All @@ -621,6 +622,7 @@ public void testIgnoreEnglishWordsInPortuguese() throws IOException {
errorSentences.put("A comunidade do ghetto de Veneza.", "gueto"); // in isolation, it is not tagged with _english_ignore_
// because "as" is blocked and "Endeavour" is not in the list of 'common' English words, we don't tag with _english_ignore_
errorSentences.put("Acho que se chamava As Endeavour.", "EndeavourOS");
errorSentences.put("Clique settings e veja o que acontece.", "sétimas"); // "settings" is isolated; "clique" is English but specifically blocked
for (Map.Entry<String, String> entry : errorSentences.entrySet()) {
List<RuleMatch> matches = lt.check(entry.getKey());
assert !matches.isEmpty();
Expand Down

0 comments on commit fc9d6eb

Please sign in to comment.