Skip to content

Commit

Permalink
📝 Improve documentation references (#26)
Browse files Browse the repository at this point in the history
  • Loading branch information
ddelange committed Feb 22, 2024
1 parent f7d39b9 commit 2544110
Show file tree
Hide file tree
Showing 3 changed files with 75 additions and 28 deletions.
22 changes: 11 additions & 11 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,19 +33,19 @@ trie.add("abxy")
assert trie.pattern() == "(?:ab(?:c|s(?:olute)?|xy?)|foo)"
```

A Trie may be populated with zero or more strings at instantiation or via `.add`, from which method chaining is possible. Two Trie may be merged with the `+` and `+=` operators and will compare equal if their data dictionaries are equal.
A `Trie` may be populated with zero or more strings at instantiation or via `Trie.add`, from which method chaining is possible. Two instances can be merged with the `+` (new instance) and `+=` (in-place update) operators. Instances will compare equal if their data dictionaries are equal.

```py
trie = Trie()
trie += Trie("abc")
assert (
trie + Trie().add("foo")
== Trie("abc", "foo")
== Trie(*["abc", "foo"])
== Trie().add(*["abc", "foo"])
== Trie().add("abc", "foo")
== Trie().add("abc").add("foo")
)
trie = Trie()
trie += Trie("abc")
assert (
trie + Trie().add("foo")
== Trie("abc", "foo")
== Trie(*["abc", "foo"])
== Trie().add(*["abc", "foo"])
== Trie().add("abc", "foo")
== Trie().add("abc").add("foo")
)
```


Expand Down
57 changes: 47 additions & 10 deletions src/retrie/retrie.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,11 @@ def parse_re_flags(
cls,
re_flags, # type: re_flag_type
): # type: (...) -> int
"""Convert re_flags to integer."""
"""Convert re_flags to integer.
Args:
re_flags (re.RegexFlag | int | None): The flags to cast to integer.
"""
return int(re_flags) if re_flags else 0

def pattern(self): # type: (...) -> Text
Expand Down Expand Up @@ -208,7 +212,8 @@ def __init__(

Retrie.__init__(self, word_boundary=word_boundary, re_flags=re_flags)

self.trie.add(*keys)
for key in keys: # lazy exhaust in case keys is a huge generator
self.trie.add(key)

@cached_property
def compiled(self): # type: (...) -> Pattern[Text]
Expand All @@ -218,13 +223,21 @@ def compiled(self): # type: (...) -> Pattern[Text]
def is_listed(
self, term # type: Text
): # type: (...) -> bool
"""Return True if Pattern is found in term."""
"""Return True if Pattern is found in term.
Args:
term (str): The string to search.
"""
return bool(self.compiled.search(term))

def not_listed(
self, term # type: Text
): # type: (...) -> bool
"""Return True if Pattern is not found in term."""
"""Return True if Pattern is not found in term.
Args:
term (str): The string to search.
"""
return not self.is_listed(term)


Expand Down Expand Up @@ -260,20 +273,32 @@ def __init__(
def is_blacklisted(
self, term # type: Text
): # type: (...) -> bool
"""Return True if Pattern is found in term."""
"""Return True if Pattern is found in term.
Args:
term (str): The string to search.
"""
return self.is_listed(term)

def filter( # noqa:A003
self,
sequence, # type: Sequence[Text]
): # type: (...) -> Iterator[Text]
"""Construct an iterator from those elements of sequence not blacklisted."""
"""Construct an iterator from those elements of sequence not blacklisted.
Args:
sequence (Sequence): The sequence of strings to filter.
"""
return filter(self.not_listed, sequence)

def cleanse_text(
self, term # type: Text
): # type: (...) -> Text
"""Return text, removing all blacklisted terms."""
"""Return text, removing all blacklisted terms.
Args:
term (str): The string to search.
"""
return self.compiled.sub("", term)


Expand Down Expand Up @@ -309,20 +334,32 @@ def __init__(
def is_whitelisted(
self, term # type: Text
): # type: (...) -> bool
"""Return True if Pattern is found in term."""
"""Return True if Pattern is found in term.
Args:
term (str): The string to search.
"""
return self.is_listed(term)

def filter( # noqa:A003
self,
sequence, # type: Sequence[Text]
): # type: (...) -> Iterator[Text]
"""Construct an iterator from whitelisted elements of sequence."""
"""Construct an iterator from whitelisted elements of sequence.
Args:
sequence (Sequence): The sequence of strings to filter.
"""
return filter(self.is_listed, sequence)

def cleanse_text(
self, term # type: Text
): # type: (...) -> Text
"""Return text, only keeping whitelisted terms."""
"""Return text, only keeping whitelisted terms.
Args:
term (str): The string to search.
"""
return "".join(self.compiled.findall(term))


Expand Down
24 changes: 17 additions & 7 deletions src/retrie/trie.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,9 @@
trie.add("abxy")
assert trie.pattern() == "(?:ab(?:c|s(?:olute)?|xy?)|foo)"
A Trie may be populated with zero or more strings at instantiation or via `.add`, from
which method chaining is possible. Two Trie may be merged with the `+` and `+=`
operators and will compare equal if their data dictionaries are equal.
A :class:`Trie` may be populated with zero or more strings at instantiation or via :meth:`Trie.add`, from
which method chaining is possible. Two instances can be merged with the ``+`` (new instance) and
``+=`` (in-place update) operators. Instances will compare equal if their data dictionaries are equal.
::
trie = Trie()
Expand All @@ -44,11 +44,17 @@


class Trie:
"""Create a Trie for a sequence of strings.
"""Create a Trie with zero or more words at instantiation or (later via :meth:`Trie.add`).
The Trie can be exported to a Regex pattern, which should match much faster than a
simple Regex union.
The Trie can be exported to a Regex pattern via :meth:`Trie.pattern`, which should match
much faster than a simple Regex union. For best performance, pass the pattern to :func:`re.compile`
and cache it to avoid recompiling for every search. See also :attr:`retrie.retrie.Checklist.compiled`.
Two instances can be merged with the ``+`` (new instance) and ``+=`` (in-place update) operators.
Instances will compare equal if their data dictionaries are equal.
Args:
word (str): A string to add to the Trie.
"""

__slots__ = "data"
Expand Down Expand Up @@ -105,7 +111,11 @@ def _merge_subtrie(
def add(
self, *word # type: Text
): # type: (...) -> "Trie"
"""Add one or more words to the current Trie."""
"""Add one or more words to the current Trie.
Args:
word (str): A string to add to the Trie.
"""
for word in word:
ref = self.data
for char in word:
Expand Down

0 comments on commit 2544110

Please sign in to comment.