Skip to content

Commit

Permalink
Merge pull request #24 from allo-media/master
Browse files Browse the repository at this point in the history
New release: 2.1.1
  • Loading branch information
rtxm committed Nov 28, 2019
2 parents 73014d7 + ea83e59 commit cf6bca4
Show file tree
Hide file tree
Showing 7 changed files with 45 additions and 15 deletions.
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from setuptools import setup, find_packages


VERSION = "2.1.0"
VERSION = "2.1.1"


def readme():
Expand Down
8 changes: 6 additions & 2 deletions tests/test_text_to_num_en.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,11 +150,15 @@ def test_alpha2digit_signed(self):
self.assertEqual(alpha2digit(source, "en"), expected)

def test_one_as_noun_or_article(self):
source = "This is the one I'm looking for. One moment please! Twenty one."
expected = "This is the one I'm looking for. One moment please! 21."
source = "This is the one I'm looking for. One moment please! Twenty one cats. One two three four!"
expected = "This is the one I'm looking for. One moment please! 21 cats. 1 2 3 4!"
self.assertEqual(alpha2digit(source, "en"), expected)
source = "No one is innocent. Another one bites the dust."
self.assertEqual(alpha2digit(source, "en"), source)
# End of segment
source = "No one. Another one. One one. Twenty one"
expected = "No one. Another one. 1 1. 21"
self.assertEqual(alpha2digit(source, "en"), expected)

def test_second_as_time_unit_vs_ordinal(self):
source = "One second please! twenty second is parsed as twenty-second and is different from twenty seconds."
Expand Down
4 changes: 4 additions & 0 deletions tests/test_text_to_num_fr.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,3 +181,7 @@ def test_article(self):
"les uns et les autres ; une suite de chiffres : 1, 2, 3 !"
)
self.assertEqual(alpha2digit(source, "fr"), expected)

def test_un_pronoun(self):
source = "Je n'en veux qu'un. J'annonce: le un"
self.assertEqual(alpha2digit(source, "fr"), source)
4 changes: 2 additions & 2 deletions text_to_num/lang/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ class Language:

AND_NUMS: Set[str]
AND: str
UNIT_ARTICLES: Set[str]
NEVER_IF_ALONE: Set[str]

# Relaxed composed numbers (two-words only)
# start => (next, target)
Expand All @@ -68,5 +68,5 @@ def normalize(self, word: str) -> str:

def not_numeric_word(self, word: Optional[str]) -> bool:
return (
word is not None and word != self.DECIMAL_SEP and word not in self.NUMBERS
word is None or word != self.DECIMAL_SEP and word not in self.NUMBERS
)
2 changes: 1 addition & 1 deletion text_to_num/lang/english.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ class English(Language):

AND_NUMS: Set[str] = set()
AND = "and"
UNIT_ARTICLES = {"one"}
NEVER_IF_ALONE = {"one"}

# Relaxed composed numbers (two-words only)
# start => (next, target)
Expand Down
2 changes: 1 addition & 1 deletion text_to_num/lang/french.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ class French(Language):

AND_NUMS = {"un", "une", "unième", "onze", "onzième"}
AND = "et"
UNIT_ARTICLES = {"un", "une"}
NEVER_IF_ALONE = {"un", "une"}

# Relaxed composed numbers (two-words only)
# start => (next, target)
Expand Down
38 changes: 30 additions & 8 deletions text_to_num/parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,22 @@ class WordToDigitParser:
The engine incrementaly recognize a stream of words as a valid cardinal, ordinal,
decimal or formal number (including leading zeros) and build the corresponding digit string.
The submitted stream must be logically bounded: it is a phrase, it has a beginning and an end and does not
contain sub-phrases. Formally, it does not contain punctuation nor voice pauses.
For example, this text:
« You don't understand. I want two cups of coffee, three cups of tea and an apple pie. »
contains three phrases:
- « you don't understand »
- « I want two cups of coffee »
- « three cups of tea and an apple pie »
In other words, a stream must not cross (nor include) punctuation marks or voice pauses. Otherwise
you may get unexpected, illogical, results.
Zeros are not treated as isolates but are considered as starting a new formal number
and are concatenated to the following digit.
Expand Down Expand Up @@ -215,6 +231,7 @@ def __init__(
self.in_frac = False
self.closed = False # For deferred stop
self.open = False # For efficiency
self.last_word: Optional[str] = None # For context

@property
def value(self) -> str:
Expand Down Expand Up @@ -246,17 +263,19 @@ def at_start(self) -> bool:
"""Return True if nothing valid parsed yet."""
return not self.open

def is_article(self, word: str, following: Optional[str]) -> bool:
return (
not self.open
and word in self.lang.UNIT_ARTICLES
and self.lang.not_numeric_word(following)
)

def _push(self, word: str, look_ahead: Optional[str]) -> bool:
builder = self.frac_builder if self.in_frac else self.int_builder
return builder.push(word, look_ahead)

def is_alone(self, word: str, next_word: Optional[str]) -> bool:
return (
not self.open
and word in self.lang.NEVER_IF_ALONE
and self.lang.not_numeric_word(next_word)
and self.lang.not_numeric_word(self.last_word)
and not (next_word is None and self.last_word is None)
)

def push(self, word: str, look_ahead: Optional[str] = None) -> bool:
"""Push next word from the stream.
Expand All @@ -272,7 +291,8 @@ def push(self, word: str, look_ahead: Optional[str] = None) -> bool:
again from the last word you tried (the one that has just been rejected).
"""

if self.closed or self.is_article(word, look_ahead):
if self.closed or self.is_alone(word, look_ahead):
self.last_word = word
return False

if (
Expand Down Expand Up @@ -315,7 +335,9 @@ def push(self, word: str, look_ahead: Optional[str] = None) -> bool:
elif not self._push(word, look_ahead):
if self.open:
self.close()
self.last_word = word
return False

self.open = True
self.last_word = word
return True

0 comments on commit cf6bca4

Please sign in to comment.