Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Crimean Tatar language rules updated and expanded #10675

Closed
wants to merge 6 commits into from
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@ USA

<rules lang="crh" xsi:noNamespaceSchemaLocation="../../../../../../../../../languagetool-core/src/main/resources/org/languagetool/rules/rules.xsd" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">

<!-- COMMON TYPOGRAPHY -->

<category id="TYPOS" name="Possible Typo" type="misspelling">
<rulegroup id="PLURAL_AFFIX_CONFUSION" name="Cemi afiksi hatası">
<rule>
Expand Down Expand Up @@ -66,7 +68,7 @@ USA
<example correction="Тереклер"><marker>Тереклар</marker> корьдим.</example>
</rule>
</rulegroup>
<rulegroup id="MIDDLE_VOWEL_EXCLUSIOON" name="Orta sozuq sesiniñ tüşmesi">
<rulegroup id="MIDDLE_VOWEL_EXCLUSION" name="Orta sozuq sesiniñ tüşmesi">
<rule>
<pattern>
<token regexp="yes">ağızı.*</token>
Expand Down Expand Up @@ -221,8 +223,203 @@ USA
<example correction="aman-aman">Ahtem uçurımdan özenge <marker>aman aman</marker> tüşti.</example>
</rule>
</rulegroup>

<rulegroup id="NUMBER_DOUBLING" name="Sayı ekileşmesi">
<rule>
<pattern>
<token>bir</token>
<token>eki</token>
</pattern>
<message>Ekileşmeli sayılar sızıq ile yazılmalı: <suggestion ><match no="1"/>-<match no="2"/></suggestion></message>
<example correction="bir-eki">Evde <marker>bir eki</marker> adam bar.</example>
</rule>
<rule>
<pattern>
<token>eki</token>
<token>üç</token>
</pattern>
<message>Ekileşmeli sayılar sızıq ile yazılmalı: <suggestion ><match no="1"/>-<match no="2"/></suggestion></message>
<example correction="eki-üç">Evde <marker>eki üç</marker> adam bar.</example>
</rule>
<rule>
<pattern>
<token>üç</token>
<token>dört</token>
</pattern>
<message>Ekileşmeli sayılar sızıq ile yazılmalı: <suggestion ><match no="1"/>-<match no="2"/></suggestion></message>
<example correction="üç-dört">Evde <marker>üç dört</marker> adam bar.</example>
</rule>
<rule>
<pattern>
<token>dört</token>
<token>beş</token>
</pattern>
<message>Ekileşmeli sayılar sızıq ile yazılmalı: <suggestion ><match no="1"/>-<match no="2"/></suggestion></message>
<example correction="dört-beş">Evde <marker>dört beş</marker> adam bar.</example>
</rule>
</rulegroup>
<rulegroup id="LASTING_ADVERB_AFFIX" name="Soñki zarf afiksi hatası">
<rule>
<pattern>
<token regexp="yes">.*[ıuoa]+[şçfhkpst]+kence</token>
</pattern>
<message>Soñki zarf afiksi hatası: <suggestion ><match no="1" regexp_match="(.*[ıuoa]+[şçfhkpst]+)?(kence)" regexp_replace="$1qanca"/></suggestion></message>
<example correction="qışqanca"><marker>qışkence</marker></example>
</rule>
<rule>
<pattern>
<token regexp="yes">.*[ıuoa]+[bcdğjlmnñrvyzuoa]+gence</token>
</pattern>
<message>Soñki zarf afiksi hatası: <suggestion ><match no="1" regexp_match="(.*[ıuoa]+[bcdğjlmnñrvyzuoa]+)?(gence)" regexp_replace="$1ğanca"/></suggestion></message>
<example correction="olğanca"><marker>olgence</marker></example>
</rule>
<rule>
<pattern>
<token regexp="yes">.*[ieüöâ]+[şçfhkpst]+qanca</token>
</pattern>
<message>Soñki zarf afiksi hatası: <suggestion ><match no="1" regexp_match="(.*[eüöâi]+[şçfhkpst]+)(qanca)" regexp_replace="$1kence"/></suggestion></message>
<example correction="pişkence"><marker>pişqanca</marker></example>
</rule>
<rule>
<pattern>
<token regexp="yes">.*[ieüöâ]+[bcdğjlmnñrvyzeüöâ]+ğanca</token>
</pattern>
<message>Soñki zarf afiksi hatası: <suggestion ><match no="1" regexp_match="(.*[eüöâi]+[bcdğjlmnñrvyzeüöâ]+)(ğanca)" regexp_replace="$1gence"/></suggestion></message>
<example correction="ölgence"><marker>ölğanca</marker></example>
</rule>
</rulegroup>
<rulegroup id="COLLOQUIAL_TYPOS" name="Minnetdarlıq hataları">
<rule>
<pattern>
<token>elbetteki</token>
</pattern>
<message>Minnetdarliilq formas: <suggestion>elbette ki</suggestion></message>
<example correction="elbette ki">O <marker>elbetteki</marker> evdedir.</example>
</rule>
<rule>
<pattern>
<token regexp="yes">[sS]alam</token>
<token regexp="yes">aleyk[uü]m</token>
</pattern>
<message>Toponimika hataları: <suggestion><match no="1" regexp_match="([sS]alam)" regexp_replace="Selâm"/> <match no="2"/></suggestion></message>
<example correction="Selâm aleykum"><marker>Salam aleykum</marker> dostum.</example>
</rule>
</rulegroup>
<rulegroup id="TOPONOMY_TYPOS" name="Toponimika hataları">
<rule>
<pattern>
<token regexp="yes">Qrım.*</token>
</pattern>
<message>Toponimika hataları: <suggestion><match no="1" regexp_match="(Qrım)(.*)" regexp_replace="Qırım$2"/></suggestion></message>
<example correction="Qırımda"><marker>Qrımda</marker> yaşamız.</example>
</rule>
</rulegroup>
</category>



<!-- NUMBER TYPOGRAPHY -->

<category id="NUMBER_TYPOS" name="Possible Number Typo" type="misspelling">
<rulegroup id="NUMBER_AFTER_DEFIS_MISSING" name="Defis ya da boşluq yoqtur">
<rule>
<pattern>
<token regexp="yes">[$+-]?[0-9,-]*[0-9][a-zñğüşöçâ\.]+</token>
</pattern>
<message>Söz ve sayı arasında defis yoqtur: <suggestion><match no="1" regexp_match="([$+-]?[0-9,-]*[0-9])([a-zñğüşöçâ\.]+)" regexp_replace="$1-$2"/></suggestion></message>
<example correction="1981-s"><marker>1981s</marker>.</example>
</rule>
<rule>
<pattern>
<token regexp="yes">[$+-]?[0-9,-]*[0-9][a-zñğüşöçâ\.]+</token>
</pattern>
<message>Söz ve sayı arasında defis yoqtur: <suggestion><match no="1" regexp_match="([$+-]?[0-9,-]*[0-9])([a-zñğüşöçâ\.]+)" regexp_replace="$1 $2"/></suggestion></message>
<example correction="1981 s"><marker>1981s</marker>.</example>
</rule>
<rule>
<pattern>
<token regexp="yes">[$+-]?[0-9,-]*[0-9][а-яё\.]+</token>
</pattern>
<message>Сёз ве сайы арасында дефис ёкътур: <suggestion><match no="1" regexp_match="([$+-]?[0-9,-]*[0-9])([а-яё\.]+)" regexp_replace="$1-$2"/></suggestion></message>
<example correction="1981-с"><marker>1981с</marker>.</example>
</rule>
<rule>
<pattern>
<token regexp="yes">[$+-]?[0-9,-]*[0-9][а-яё\.]+</token>
</pattern>
<message>Сёз ве сайы арасында дефис ёкътур: <suggestion><match no="1" regexp_match="([$+-]?[0-9,-]*[0-9])([а-яё\.]+)" regexp_replace="$1 $2"/></suggestion></message>
<example correction="1981 с"><marker>1981с</marker>.</example>
</rule>
</rulegroup>
<rulegroup id="NUMBER_BEFORE_DEFIS_MISSING" name="Defis ya da boşluq yoqtur">
<rule>
<pattern>
<token regexp="yes">[A-Za-zñğüşöçâ\.]+[0-9][0-9.,:–—-]*%?</token>
</pattern>
<message>Söz ve sayı arasında defis yoqtur: <suggestion><match no="1" regexp_match="([A-Za-zñğüşöçâ\.]+)([0-9][0-9.,:–—-]*%?)" regexp_replace="$1-$2"/></suggestion></message>
<example correction="F-32"><marker>F32</marker> savaş uçağı.</example>
</rule>
<rule>
<pattern>
<token regexp="yes">[A-Za-zñğüşöçâ\.]+[0-9][0-9.,:–—-]*%?</token>
</pattern>
<message>Söz ve sayı arasında defis yoqtur: <suggestion><match no="1" regexp_match="([A-Za-zñğüşöçâ\.]+)([0-9][0-9.,:–—-]*%?)" regexp_replace="$1 $2"/></suggestion></message>
<example correction="menim 1">Ve <marker>menim1</marker> kitabım.</example>
</rule>
<rule>
<pattern>
<token regexp="yes">[А-Яа-яё\.]+[0-9][0-9.,:–—-]*%?</token>
</pattern>
<message>Сёз ве сайы арасында дефис ёкътур: <suggestion><match no="1" regexp_match="([А-Яа-яё\.]+)([0-9][0-9.,:–—-]*%?)" regexp_replace="$1-$2"/></suggestion></message>
<example correction="Ф-32"><marker>Ф32</marker> саваш учагъы.</example>
</rule>
<rule>
<pattern>
<token regexp="yes">[А-Яа-яё\.]+[0-9][0-9.,:–—-]*%?</token>
</pattern>
<message>Сёз ве сайы арасында дефис ёкътур: <suggestion><match no="1" regexp_match="([А-Яа-яё\.]+)([0-9][0-9.,:–—-]*%?)" regexp_replace="$1 $2"/></suggestion></message>
<example correction="меним 1">Ве <marker>меним1</marker> китабым.</example>
</rule>
</rulegroup>
<rulegroup id="COMPLEX_NUMBER_DEFIS_MISSING" name="Cemi afiksi hatası">
<rule>
<pattern>
<marker>
<token regexp="yes">[$+-]?[0-9,-]*[0-9,][A-Za-zñğüşöçâ][A-Za-zñğüşöçâ-]*[.²³]?
<exception case_sensitive="yes">1С</exception>
<exception regexp="yes">[0-9]+'|[0-9]+k</exception>
<exception case_sensitive="yes" regexp="yes">[+-]?[0-9,-]*[0-9][оo][СC]</exception>
<!-- both lat and cyr o and C, taken care by rule below -->
</token>
</marker>
</pattern>
<message>Söz ve sayı arasında defis yoqtur: </message>
<suggestion><match no="1" regexp_match="([0-9,-]*[0-9,])([A-Za-zñğüşöçâ-])" regexp_replace="$1 $2"/></suggestion>
<example correction="21 fayız"><marker>21fayız</marker>.</example>
</rule>
<rule>
<pattern>
<marker>
<token regexp="yes">[$+-]?[0-9,-]*[0-9,][A-Za-zñğüşöçâ][A-Za-zñğüşöçâ-]*[.²³]?
<exception case_sensitive="yes">1С</exception>
<exception regexp="yes">[0-9]+'|[0-9]+k</exception>
<exception case_sensitive="yes" regexp="yes">[+-]?[0-9,-]*[0-9][оo][СC]</exception>
<!-- both lat and cyr o and C, taken care by rule below -->
</token>
</marker>
</pattern>
<message>Söz ve sayı arasında defis yoqtur: </message>
<suggestion><match no="1" regexp_match="([0-9,-]*[0-9,])([A-Za-zñğüşöçâ-])" regexp_replace="$1-$2"/></suggestion>
<example correction="5-nci"><marker>5nci</marker>.</example>
</rule>
</rulegroup>

</category>


<!-- GRAMMAR RULES -->


<category id="GRAMMAR" name="Grammar mistakes" type="grammar">

<rulegroup id="QUESTION_PARTICLE_SEPARATION" name="Sual edadı ayırması">
Expand Down Expand Up @@ -394,6 +591,30 @@ USA
</message>
<example correction="Terekke"><marker>Terekniñ</marker> qarşı oturdı.</example>
</rule>
<rule>
<pattern>
<marker>
<token postag_regexp="yes" postag="(NOUN|ADJF|NUMR|PRTF).*(nomn|gent|accs|loct|datv).*"></token>
</marker>
<token regexp="yes">ğayrı|başqa|soñ|sebep|berli|evel|burun|ögüne</token>
</pattern>
<message>Munasebetçi kelişi şöyle uyğulaşmalı:
<suggestion><match no="1" postag_regexp="yes" postag="((NPRO|NOUN|ADJF|NUMR|PRTF).*)(nomn|gent|accs|loct|datv)(.*)?" postag_replace="$1exod$4"/></suggestion>
</message>
<example correction="zamandan">Çoq <marker>zaman</marker> berli.</example>
</rule>
<rule>
<pattern>
<marker>
<token postag_regexp="yes" postag="(NOUN|ADJF|NUMR|PRTF).*(exod|accs|loct|datv).*"></token>
</marker>
<token regexp="yes">içün|ile|dayın|kibi|aqqında</token>
</pattern>
<message>Munasebetçi kelişi şöyle uyğulaşmalı:
<suggestion><match no="1" postag_regexp="yes" postag="((NPRO|NOUN|ADJF|NUMR|PRTF).*)(exod|accs|loct|datv)(.*)?" postag_replace="$1nomn$4"/></suggestion>
</message>
<example correction="Dost"><marker>Dostqa</marker> dayın.</example>
</rule>
</rulegroup>

<rule id="EXODIVE_VERB_CASE_COLLOCATION" name="Fiildeki çıqış kelişi uyğulaşması">
Expand Down Expand Up @@ -477,5 +698,27 @@ USA
</rule>
</rulegroup>

<rulegroup id="PERCENTAGE_COORDINATION" name="Faiz uyğulaşması">
<rule>
<pattern>
<token postag="NUMR:subst:sing:nomn"></token>
<token>faiz</token>
</pattern>
<message>Faiz uyğulaşması:
<suggestion>yüzde <match no="1"/></suggestion>
</message>
<example correction="yüzde elli"><marker>elli faiz</marker>.</example>
</rule>
<rule>
<pattern>
<token postag="NUMR:subst:sing:nomn"></token>
<token>fayız</token>
</pattern>
<message>Faiz uyğulaşması:
<suggestion>yüzde <match no="1"/></suggestion>
</message>
<example correction="yüzde elli"><marker>elli fayız</marker>.</example>
</rule>
</rulegroup>
</category>
</rules>