Skip to content

Commit

Permalink
improvements for R-pronouns and relativisation (#10) (#11)
Browse files Browse the repository at this point in the history
Co-authored-by: Jan Odijk <[email protected]>
  • Loading branch information
bbonf and JanOdijk authored Dec 20, 2023
1 parent 1e558f4 commit cd92257
Show file tree
Hide file tree
Showing 3 changed files with 76 additions and 6 deletions.
23 changes: 18 additions & 5 deletions mwe_query/canonicalform.py
Original file line number Diff line number Diff line change
Expand Up @@ -386,7 +386,14 @@ def mknewnode(stree, mwetop, atts, annotations):
newnode.attrib['maxnodecount'] = f'{len(stree)}'
return newnode


def expandnonheadwordnode(nonheadwordnode, phrasenodeproperties):
phraserel = gav(nonheadwordnode, 'rel')
newnonheadwordnode = copy.copy(nonheadwordnode)
newnonheadwordnode.attrib['rel'] = 'hd'
phrasenode = ET.Element('node', attrib=phrasenodeproperties)
phrasenode.attrib['rel'] = phraserel
phrasenode.append(newnonheadwordnode)
return phrasenode
def zullenheadclause(stree: SynTree) -> bool:
if stree.tag == 'node':
cat = gav(stree, 'cat')
Expand Down Expand Up @@ -1016,9 +1023,10 @@ def newgenvariants(stree: SynTree) -> List[SynTree]:
Rpronounobj1node = copy.copy(obj1node)
Rpronounobj1node.attrib['lemma'] = 'er|hier|daar|waar|ergens|nergens|overal'
Rpronounobj1node.attrib['pt'] = 'vnw'
newphrase = expandnonheadwordnode(Rpronounobj1node, {})
for child in newppnode2:
newppnode2.remove(child)
newppnode2.append(Rpronounobj1node)
newppnode2.append(newphrase)
newppnode2.append(newvz2)

# pp with R-pronoun object which has been replaced by a full NO with a dummymod
Expand Down Expand Up @@ -1047,11 +1055,15 @@ def newgenvariants(stree: SynTree) -> List[SynTree]:
pppronadvvcnode.append(pronadvnode1)
pppronadvvcnode.append(newvcnode)

# pp's with a pronominal adverb. e.g. daarnaar
pprel = gav(ppnode, 'rel')
pronadvnode = getpronadv(vzlemma, pprel)
pronadvppnode = expandnonheadwordnode(pronadvnode, {'cat': 'pp', 'rel': pprel})
pronadvnode.attrib['rel'] = 'hd'
pronadvppnode.append(pronadvnode)

alternativesnode = mkalternativesnode([[ppnode], [newppnode2], [newppnode3], [
pppobj1vcnode], [pppronadvvcnode], [pronadvnode]])
pppobj1vcnode], [pppronadvvcnode], [pronadvppnode]])
parent.append(alternativesnode)

vblgennpnodeids = newstree.xpath(
Expand Down Expand Up @@ -1404,8 +1416,9 @@ def relpronsubst(stree: SynTree) -> SynTree:
def expandfull(stree: SynTree) -> SynTree:
# possibly add getlcat
stree1 = relpronsubst(stree)
stree2 = indextransform(stree1)
return stree2
stree2 = expandnonheadwords(stree1)
stree3 = indextransform(stree2)
return stree3


def gettopnode(stree):
Expand Down
7 changes: 6 additions & 1 deletion mwe_query/lcat.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
import copy
import lxml.etree as ET

dummy = 'dummy'


def expandnonheadwords(stree: SynTree) -> SynTree:
# it is presupposed that the input stree is not None
Expand Down Expand Up @@ -47,7 +49,8 @@ def getlcatatt(node: SynTree) -> str:

def mkphrase(child: SynTree) -> SynTree:
newnode = ET.Element('node')
newnode.attrib['id'] = child.attrib['id'] + 'a'
if 'íd' in child.attrib:
newnode.attrib['id'] = child.attrib['id'] + 'a'
lcat = getlcatatt(child)
if lcat in validcats:
newnode.attrib['cat'] = lcat
Expand Down Expand Up @@ -176,6 +179,8 @@ def getlcat(node: SynTree, prel=None) -> str: # noqa: C901
result = 'np'
elif pt == 'spec':
result = None
elif pt == dummy:
result = None
else:
print('Unknown att value (pt) encountered in:')
ET.dump(node)
Expand Down
52 changes: 52 additions & 0 deletions mwe_query/trymwes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
from sastadev.alpinoparsing import parse
from lcat import expandnonheadwords
from sastadev.treebankfunctions import indextransform
from lxml import etree
from canonicalform import generatequeries, expandfull

debug = False

geenhaankraaien = ('0geen *haan zal naar iets kraaien',
['Daar kraait geen haan naar', 'Hier heeft geen haan naar gekraaid',
'geen haan kraaide daarnaar', 'geen haan kraaide ernaar dat hij niet kwam',
'geen haan kraaide er naar dat hij niet kwam',
'er is geen haan die daar naar kraait', ]
)

def select(mweutts, utt=None):
if utt is None:
result = mweutts
else:
result = (mweutts[0], [mweutts[1][utt]])
return result

def getparses(utterances):
uttparses = []
for utterance in utterances:
uttparse = parse(utterance)
uttparses.append(uttparse)
return uttparses

def trysomemwes():
mwe, utterances = select(geenhaankraaien)
mwequeries = generatequeries(mwe)
labeledmwequeries = (('MWEQ', mwequeries[0]), ('NMQ', mwequeries[1]), ('MLQ', mwequeries[2]))
uttparses = getparses(utterances)
for utterance, uttparse in zip(utterances, uttparses):
print(f'{utterance}:')
expandeduttparse = expandfull(uttparse)
if debug:
etree.dump(expandeduttparse)
for label, mwequery in labeledmwequeries:
results = expandeduttparse.xpath(mwequery)
if debug:
print('Found hits:')
for result in results:
etree.dump(result)
print(f'{label}: {len(results)}')




if __name__ == '__main__':
trysomemwes()

0 comments on commit cd92257

Please sign in to comment.