Skip to content

Commit

Permalink
Merge pull request #9 from fedecosta/develop
Browse files Browse the repository at this point in the history
Develop
  • Loading branch information
fedecosta committed Dec 28, 2023
2 parents 40c5eee + 64a23ab commit f82e680
Show file tree
Hide file tree
Showing 19 changed files with 413 additions and 186 deletions.
Binary file modified data/ca-ba/g2p/model.crf
Binary file not shown.
Binary file modified data/ca-ba/lexicon.db
Binary file not shown.
2 changes: 1 addition & 1 deletion data/ca-ba/phonemes.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# https://en.wikipedia.org/wiki/Catalan_phonology
# Catalan Central accent phonemes
# Catalan phonemes

p [p]ala
b [b]ala
Expand Down
Binary file modified data/ca-ce/g2p/model.crf
Binary file not shown.
Binary file modified data/ca-ce/lexicon.db
Binary file not shown.
2 changes: 1 addition & 1 deletion data/ca-ce/phonemes.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# https://en.wikipedia.org/wiki/Catalan_phonology
# Catalan Central accent phonemes
# Catalan phonemes

p [p]ala
b [b]ala
Expand Down
Binary file added data/ca-no/g2p/model.crf
Binary file not shown.
50 changes: 50 additions & 0 deletions data/ca-no/language.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
---

language:
name: "Nord-Occidental Catalan"
code: "ca-no"
phonemes: !env "${config_dir}/phonemes.txt"
keep_stress: true

lexicon: !env "${config_dir}/lexicon.db"

g2p:
model: !env "${config_dir}/g2p.fst"

symbols:
casing: "lower"
number_regex: "^-?\\d+([,.]\\d+)*$"
token_split: "\\s+"
token_join: " "
minor_breaks:
- ","
- ":"
- ";"
- "..."
major_breaks:
- "."
- "?"
- "!"
replace:
"[\\<\\>\\(\\)\\[\\]\"]+": ""
"\\B'": "\""
"'\\B": "\""
"": "'"
"'": ""
"-": ""
"l·l": "l"
punctuations:
- "\""
- ""
- ""
- ""
- "«"
- "»"
- ","
- ":"
- ";"
- "."
- "?"
- "¿"
- "!"
- "¡"
Binary file added data/ca-no/lexicon.db
Binary file not shown.
44 changes: 44 additions & 0 deletions data/ca-no/phonemes.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
# https://en.wikipedia.org/wiki/Catalan_phonology
# Catalan phonemes

p [p]ala
b [b]ala
t [t]ela
d [d]onar
k [k]ala
ɡ [g]ala
m [m]ala
ɲ fa[ng]
β aca[b]a
ð ca[d]a
ɣ ama[g]ar
f [f]als
v a[f]ganès
s [s]ala
z ca[s]a
ʃ [x]oc
ʒ mà[g]ic
tʃ co[tx]e
dʒ me[tg]e
l [l]íquid
ʎ [ll]amp
r ca[rr]o
ɾ ca[r]a
w ve[u]en
uw ca[u]re
j ca[i]re
y [i]a[i]a
n [n]ena
ŋ pi[n]güí
ts po[ts]er
dz do[tz]e

# Vowels
i r[i]c
e c[e]c
ɛ s[e]c
a s[a]c
ɔ f[o]c
o s[ó]c
u s[u]c
ə [a]mor
Binary file added data/ca-va/g2p/model.crf
Binary file not shown.
50 changes: 50 additions & 0 deletions data/ca-va/language.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
---

language:
name: "Valencià Catalan"
code: "ca-va"
phonemes: !env "${config_dir}/phonemes.txt"
keep_stress: true

lexicon: !env "${config_dir}/lexicon.db"

g2p:
model: !env "${config_dir}/g2p.fst"

symbols:
casing: "lower"
number_regex: "^-?\\d+([,.]\\d+)*$"
token_split: "\\s+"
token_join: " "
minor_breaks:
- ","
- ":"
- ";"
- "..."
major_breaks:
- "."
- "?"
- "!"
replace:
"[\\<\\>\\(\\)\\[\\]\"]+": ""
"\\B'": "\""
"'\\B": "\""
"": "'"
"'": ""
"-": ""
"l·l": "l"
punctuations:
- "\""
- ""
- ""
- ""
- "«"
- "»"
- ","
- ":"
- ";"
- "."
- "?"
- "¿"
- "!"
- "¡"
Binary file added data/ca-va/lexicon.db
Binary file not shown.
44 changes: 44 additions & 0 deletions data/ca-va/phonemes.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
# https://en.wikipedia.org/wiki/Catalan_phonology
# Catalan phonemes

p [p]ala
b [b]ala
t [t]ela
d [d]onar
k [k]ala
ɡ [g]ala
m [m]ala
ɲ fa[ng]
β aca[b]a
ð ca[d]a
ɣ ama[g]ar
f [f]als
v a[f]ganès
s [s]ala
z ca[s]a
ʃ [x]oc
ʒ mà[g]ic
tʃ co[tx]e
dʒ me[tg]e
l [l]íquid
ʎ [ll]amp
r ca[rr]o
ɾ ca[r]a
w ve[u]en
uw ca[u]re
j ca[i]re
y [i]a[i]a
n [n]ena
ŋ pi[n]güí
ts po[ts]er
dz do[tz]e

# Vowels
i r[i]c
e c[e]c
ɛ s[e]c
a s[a]c
ɔ f[o]c
o s[ó]c
u s[u]c
ə [a]mor
Binary file modified gruut-lang-ca/gruut_lang_ca/g2p/model.crf
Binary file not shown.
Binary file modified gruut-lang-ca/gruut_lang_ca/lexicon.db
Binary file not shown.
2 changes: 1 addition & 1 deletion gruut-lang-ca/gruut_lang_ca/phonemes.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# https://en.wikipedia.org/wiki/Catalan_phonology
# Catalan Central accent phonemes
# Catalan phonemes

p [p]ala
b [b]ala
Expand Down
2 changes: 2 additions & 0 deletions gruut/const.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
"ca": "ca-ce",
"ca-ce": "ca-ce",
"ca-ba": "ca-ba",
"ca-no": "ca-no",
"ca-va": "ca-va",
"cs": "cs-cz",
"de": "de-de",
"en": "en-us",
Expand Down
Loading

0 comments on commit f82e680

Please sign in to comment.