Skip to content

Commit

Permalink
Feature/mwestats callable (#8)
Browse files Browse the repository at this point in the history
Callable MWE information on a single match
  • Loading branch information
oktaal authored Jun 7, 2023
1 parent 5814aa6 commit 5ba0899
Show file tree
Hide file tree
Showing 13 changed files with 3,982 additions and 3,830 deletions.
5 changes: 5 additions & 0 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -46,3 +46,8 @@ jobs:
- name: Run unit tests
run: |
python -m unittest discover tests/
- name: Debug information
if: ${{ !success() }}
run: |
cd tests/data/mwetreebanks/output
more * | cat
15 changes: 15 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ python -m mwe_query

## Use as Library

### Create Queries

```python
from mwe_query import Mwe
from alpino_query import parse_sentence
Expand All @@ -39,6 +41,19 @@ print(superset.rank)
# 3
```

### Analyze Results

```python
from mwe_query import analyze_mwe_hit

# hit: etree._Element containing the node which matched on of the
# queries
# queries: generated query objects
# tree: etree._Element of the entire utterance tree
info = analyze_mwe_hit(hit, queries, tree)
print(hit.components.marked_utt)
```

## Upload to PyPi

```bash
Expand Down
26 changes: 23 additions & 3 deletions mwe_query/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,11 @@
import re
from alpino_query import parse_sentence # type: ignore
from copy import deepcopy
from typing import cast, Dict, List, Optional
from typing import Union, cast, Dict, Iterable, List, Optional
from sastadev.sastatypes import SynTree
import time
from .basex_query import list_databases, perform_xpath
from .mwestats import MweHitInfo
import os
import xml.etree.ElementTree as ET

Expand Down Expand Up @@ -104,10 +106,13 @@ def set_tree(self, alpino_xml: str) -> None:
self.parsed = ET.fromstring(alpino_xml)

def generate_queries(self) -> List['MweQuery']: # noqa: C901
"""_summary_
"""Generates the MWE, near-miss and superset queries
Raises:
ValueError: unexpected parse structure
Returns:
_type_: _description_
List[MweQuery]: list containing three queries
"""
# expand index nodes in parse
mwe = expand_index_nodes(self.parsed)
Expand Down Expand Up @@ -333,6 +338,21 @@ def expand_index_nodes(sentence: ET.Element, index_dict: Optional[Dict[str, ET.E
return sentence


def analyze_mwe_hit(hit: SynTree, queries: Union[Iterable[str], Iterable[MweQuery]], tree: SynTree) -> MweHitInfo:
"""Analyses a match found by applying an MWE query on a treebank.
Args:
hit (SynTree): contains the node which matched
queries (Iterable[MweQuery]): query objects which were used for searching
tree (SynTree): entire utterance tree
Returns:
MweHitInfo: information describing the properties of the found expression
"""
xpaths = (query.xpath if isinstance(query, MweQuery) else query for query in queries)
return MweHitInfo(hit, xpaths, tree)


def main():
# MWE = 'iemand zal de schepen achter zich verbranden'
# MWE = 'iemand zal de dans ontspringen'
Expand Down
5 changes: 4 additions & 1 deletion mwe_query/canonicalform.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,10 @@ def tokenize(sentence):
sentence = re.sub(r'\s+', r' ', sentence)
return sentence.split()


T = TypeVar('T')


def listofsets2setoflists(listofset: Iterable[Iterable[T]]) -> List[List[T]]:
resultset: List[List[T]]
if listofset == []:
Expand Down Expand Up @@ -1442,7 +1445,7 @@ def removeemptyalts(stree: SynTree) -> SynTree:
return newstree


def mknearmissstructs(mwetrees: List[SynTree]) -> List[SynTree]:
def mknearmissstructs(mwetrees: List[SynTree]) -> List[SynTree]:
reducedmwetrees = []
for mwetree in mwetrees:
reducedmwetree = copy.deepcopy(mwetree)
Expand Down
2 changes: 1 addition & 1 deletion mwe_query/gramconfig.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import cast, Any, Dict, List, Optional
from typing import cast, Dict, List, Optional
from sastadev.sastatypes import SynTree
from sastadev.treebankfunctions import getattval as gav
from .canonicalform import listofsets2setoflists
Expand Down
Loading

0 comments on commit 5ba0899

Please sign in to comment.