diff --git a/webservice/picclservice/picclservice.py b/webservice/picclservice/picclservice.py index 979ad47..e788c4a 100644 --- a/webservice/picclservice/picclservice.py +++ b/webservice/picclservice/picclservice.py @@ -396,8 +396,13 @@ ChoiceParameter('distance','How many edits?','Search a distance of N characters for variants',choices=[('2','Up to two edits'),('1','Only one edit')]) #old TICCL -L ]), ('Automatic Linguistic Enrichment', [ - BooleanParameter('tok','Perform Tokenisation',"Perform tokenisation."), - BooleanParameter('frog','Perform Linguistic Enrichment',"Performs tokenisation, Part-of-Speech tagging, lemmatisation, named entity recognition and more. This is implemented only for Dutch (uses Frog)!!!") + BooleanParameter('tok','Tokenisation',"Perform tokenisation", default=True), + BooleanParameter('pos','Part-of-Speech Tagging',"Part-of-speech Tagging (for Dutch only!)",default=True), + BooleanParameter('lemma','Lemmatisation',"Lemmatisation (for Dutch only!)", default=True), + BooleanParameter('morph','Morphological Analysis',"Morphological Analysis (for Dutch only!)", default=False), + BooleanParameter('ner','Named Entity Recognition',"Named Entity Recognition", default=False), + BooleanParameter('parser','Dependency Parser',"Dependency parser (for Dutch only!)", default=False), + BooleanParameter('chunker','Chunker / Shallow-parser Parser',"Chunker / Shallow parser (for Dutch only!)", default=False), ]), #('Focus Word Selection', [ # IntegerParameter('minlength','Minimum Word Length','Integer between zero and one hundred',default=5,minvalue=0, maxvalue=100), #old ticcl -x @@ -421,7 +426,7 @@ ] -# ======== DISPATCHING (ADVANCED! YOU CAN SAFELY SKIP THIS!) ======== +# ======== DISPATCHING (ADVANCED! YOU CAN SAFELY SmedKIP THIS!) ======== #The dispatcher to use (defaults to clamdispatcher.py), you almost never want to change this #DISPATCHER = 'clamdispatcher.py' diff --git a/webservice/picclservice/picclservice_wrapper.py b/webservice/picclservice/picclservice_wrapper.py index f388e0c..a1e1358 100755 --- a/webservice/picclservice/picclservice_wrapper.py +++ b/webservice/picclservice/picclservice_wrapper.py @@ -205,15 +205,36 @@ def nextflowout(prefix): frog_inputdir = 'ocr_output' textclass_opts = "--inputclass \"OCR\" --outputclass \"current\"" #extra textclass opts for both frog and/or ucto - -if 'frog' in clamdata and clamdata['frog']: +frog = False +if lang == "nld": + for key in ('pos','lemma','morph','ner','parser','chunker'): + if key in clamdata and clamdata[key]: + frog = True + if frog: + skip = "" + #PoS can't be skipped + if 'lemma' not in clamdata or not clamdata['lemma']: + skip += 'l' + if 'parser' not in clamdata or not clamdata['parser']: + skip += 'mp' + if 'morph' not in clamdata or not clamdata['morph']: + skip += 'a' + if 'ner' not in clamdata or not clamdata['ner']: + skip += 'n' + if 'chunker' not in clamdata or not clamdata['chunker']: + skip += 'c' + if skip: + skip = "--skip=" + skip + +if frog: print("Running Frog...",file=sys.stderr) clam.common.status.write(statusfile, "Running Frog Pipeline (linguistic enrichment)",75) # status update - if os.system(run_piccl + "frog.nf " + textclass_opts + " --inputdir " + shellsafe(frog_inputdir,'"') + " --inputformat folia --extension folia.xml --outputdir " + shellsafe(outputdir,'"') + " -with-trace >frog.nextflow.out.log 2>frog.nextflow.err.log" ) != 0: + if os.system(run_piccl + "frog.nf " + textclass_opts + " " + skip + " --inputdir " + shellsafe(frog_inputdir,'"') + " --inputformat folia --extension folia.xml --outputdir " + shellsafe(outputdir,'"') + " -with-trace >frog.nextflow.out.log 2>frog.nextflow.err.log" ) != 0: fail('frog') nextflowout('frog') elif 'tok' in clamdata and clamdata['tok']: clam.common.status.write(statusfile, "Running Tokeniser (ucto)",75) # status update + if os.system(run_piccl + "tokenize.nf " + textclass_opts + " --language " + shellsafe(lang,'"') + " --inputformat folia --inputdir " + shellsafe(frog_inputdir,'"') + " --extension folia.xml --outputdir " + shellsafe(outputdir,'"') + " -with-trace >ucto.nextflow.out.log 2>ucto.nextflow.err.log" ) != 0: fail('ucto') nextflowout('ucto') diff --git a/webservice/setup.py b/webservice/setup.py index 790ab89..11a4e46 100755 --- a/webservice/setup.py +++ b/webservice/setup.py @@ -10,7 +10,7 @@ setup( name = "PICCL", - version = "0.5", + version = "0.5.1", author = "Martin Reynaert, Maarten van Gompel", author_email = "reynaert@uvt.nl", description = ("Webservice for PICCL"),