Skip to content

Commit

Permalink
Implemented frog selection options #25
Browse files Browse the repository at this point in the history
  • Loading branch information
proycon committed Apr 10, 2018
1 parent 89d29f1 commit 5b77747
Show file tree
Hide file tree
Showing 3 changed files with 33 additions and 7 deletions.
11 changes: 8 additions & 3 deletions webservice/picclservice/picclservice.py
Original file line number Diff line number Diff line change
Expand Up @@ -396,8 +396,13 @@
ChoiceParameter('distance','How many edits?','Search a distance of N characters for variants',choices=[('2','Up to two edits'),('1','Only one edit')]) #old TICCL -L
]),
('Automatic Linguistic Enrichment', [
BooleanParameter('tok','Perform Tokenisation',"Perform tokenisation."),
BooleanParameter('frog','Perform Linguistic Enrichment',"Performs tokenisation, Part-of-Speech tagging, lemmatisation, named entity recognition and more. This is implemented only for Dutch (uses Frog)!!!")
BooleanParameter('tok','Tokenisation',"Perform tokenisation", default=True),
BooleanParameter('pos','Part-of-Speech Tagging',"Part-of-speech Tagging (for Dutch only!)",default=True),
BooleanParameter('lemma','Lemmatisation',"Lemmatisation (for Dutch only!)", default=True),
BooleanParameter('morph','Morphological Analysis',"Morphological Analysis (for Dutch only!)", default=False),
BooleanParameter('ner','Named Entity Recognition',"Named Entity Recognition", default=False),
BooleanParameter('parser','Dependency Parser',"Dependency parser (for Dutch only!)", default=False),
BooleanParameter('chunker','Chunker / Shallow-parser Parser',"Chunker / Shallow parser (for Dutch only!)", default=False),
]),
#('Focus Word Selection', [
# IntegerParameter('minlength','Minimum Word Length','Integer between zero and one hundred',default=5,minvalue=0, maxvalue=100), #old ticcl -x
Expand All @@ -421,7 +426,7 @@
]


# ======== DISPATCHING (ADVANCED! YOU CAN SAFELY SKIP THIS!) ========
# ======== DISPATCHING (ADVANCED! YOU CAN SAFELY SmedKIP THIS!) ========

#The dispatcher to use (defaults to clamdispatcher.py), you almost never want to change this
#DISPATCHER = 'clamdispatcher.py'
Expand Down
27 changes: 24 additions & 3 deletions webservice/picclservice/picclservice_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,15 +205,36 @@ def nextflowout(prefix):
frog_inputdir = 'ocr_output'
textclass_opts = "--inputclass \"OCR\" --outputclass \"current\"" #extra textclass opts for both frog and/or ucto


if 'frog' in clamdata and clamdata['frog']:
frog = False
if lang == "nld":
for key in ('pos','lemma','morph','ner','parser','chunker'):
if key in clamdata and clamdata[key]:
frog = True
if frog:
skip = ""
#PoS can't be skipped
if 'lemma' not in clamdata or not clamdata['lemma']:
skip += 'l'
if 'parser' not in clamdata or not clamdata['parser']:
skip += 'mp'
if 'morph' not in clamdata or not clamdata['morph']:
skip += 'a'
if 'ner' not in clamdata or not clamdata['ner']:
skip += 'n'
if 'chunker' not in clamdata or not clamdata['chunker']:
skip += 'c'
if skip:
skip = "--skip=" + skip

if frog:
print("Running Frog...",file=sys.stderr)
clam.common.status.write(statusfile, "Running Frog Pipeline (linguistic enrichment)",75) # status update
if os.system(run_piccl + "frog.nf " + textclass_opts + " --inputdir " + shellsafe(frog_inputdir,'"') + " --inputformat folia --extension folia.xml --outputdir " + shellsafe(outputdir,'"') + " -with-trace >frog.nextflow.out.log 2>frog.nextflow.err.log" ) != 0:
if os.system(run_piccl + "frog.nf " + textclass_opts + " " + skip + " --inputdir " + shellsafe(frog_inputdir,'"') + " --inputformat folia --extension folia.xml --outputdir " + shellsafe(outputdir,'"') + " -with-trace >frog.nextflow.out.log 2>frog.nextflow.err.log" ) != 0:
fail('frog')
nextflowout('frog')
elif 'tok' in clamdata and clamdata['tok']:
clam.common.status.write(statusfile, "Running Tokeniser (ucto)",75) # status update

if os.system(run_piccl + "tokenize.nf " + textclass_opts + " --language " + shellsafe(lang,'"') + " --inputformat folia --inputdir " + shellsafe(frog_inputdir,'"') + " --extension folia.xml --outputdir " + shellsafe(outputdir,'"') + " -with-trace >ucto.nextflow.out.log 2>ucto.nextflow.err.log" ) != 0:
fail('ucto')
nextflowout('ucto')
Expand Down
2 changes: 1 addition & 1 deletion webservice/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

setup(
name = "PICCL",
version = "0.5",
version = "0.5.1",
author = "Martin Reynaert, Maarten van Gompel",
author_email = "[email protected]",
description = ("Webservice for PICCL"),
Expand Down

5 comments on commit 5b77747

@zeusttu
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this commit considered stable? It got a new version number in the setup but did not get tagged. I don't know what your tagging policy is but it would be beneficial for the deployment of our test server if this commit got tagged v0.5.1.
In case it matters, yesterday I ran a test where I think I managed to pin all Piccl's files to this commit's version*, and the Piccl pipeline including frog seemed to work perfectly as far as I can tell 🙂

* LaMachine checked out the commit after this one, but I manually reverted the one file changed in that commit.

@proycon
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I indeed didn't release it yet cause I hadn't tested it extensively yet and there were more changes coming up. But I can do a quick release from this commit if this is a good state for you to test with.

@zeusttu
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It would be appreciated! I think LaMachine also has an option to specify a specific (custom) version so I could look into that alternatively.

@proycon
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Released: https://github.com/LanguageMachines/PICCL/releases/tag/v0.5.1

Yeah, that's the plan for LaMachine but that part is not implemented yet. (it wasn't used much in LaMachine v1 so I gave it a lower priority)

@zeusttu
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Many thanks! 😃

Please sign in to comment.