diff --git a/.github/workflows/condapublish.yml b/.github/workflows/condapublish.yml deleted file mode 100644 index 4e0fe866..00000000 --- a/.github/workflows/condapublish.yml +++ /dev/null @@ -1,82 +0,0 @@ -# This is a basic workflow to help you get started with Actions - -name: Publish on Conda-forge - -# Controls when the workflow will run -on: - - release: - types: [ published ] - - workflow_dispatch: - -# A workflow run is made up of one or more jobs that can run sequentially or in parallel -jobs: - # This workflow contains a single job called "build" - publish: - # The type of runner that the job will run on - runs-on: ubuntu-latest - - # variables - env: - REPO_NAME: ${{ github.event.repository.name }} - OWNER_NAME: ${{ github.repository_owner }} - - # Steps represent a sequence of tasks that will be executed as part of the job - steps: - # Check-out src repository - - uses: actions/checkout@v3 - with: - path: src - - # Check-out feedstock - - uses: actions/checkout@v2 - with: - token: ${{ secrets.PAT_GITHUB }} - repository: ${{ github.repository_owner }}/${{ github.event.repository.name }}-feedstock - path: dst - - # Re-sync - - name: Re-sync - run: | - cd dst - git remote add upstream https://github.com/conda-forge/${REPO_NAME}-feedstock.git - git fetch upstream - git checkout main - git merge upstream/main - - # Generate meta.yaml - - name: Generate and push meta.yaml - run: | - PACKAGE_NAME=`echo $REPO_NAME | sed -e 's/-//g'` - cd src/${PACKAGE_NAME} - VERSION=`python -c 'exec(open("PandaToolsPkgInfo.py").read());print (release_version)'` - cd - - echo REPO_NAME=$REPO_NAME - echo "REPO_NAME=$REPO_NAME" >> $GITHUB_ENV - echo PACKAGE_NAME=$PACKAGE_NAME - echo VERSION=$VERSION - echo "VERSION=$VERSION" >> $GITHUB_ENV - wget https://github.com/${OWNER_NAME}/${REPO_NAME}/archive/refs/tags/${VERSION}.tar.gz -q -O dummy.tar.gz - SHA256SUM=`sha256sum dummy.tar.gz` - SHA256SUM=${SHA256SUM% *} - echo SHA256SUM=$SHA256SUM - sed -e "s/___PACKAGE_VERSION___/${VERSION}/g" src/templates/conda_meta.yaml.template \ - | sed -e "s/___SHA256SUM___/${SHA256SUM}/g" > dst/recipe/meta.yaml - - - name: Push the change - run: | - cd dst - # use personal info since github-actions/github-actions@github.com doesn't work for forked repos - git config --global user.name 'Tadashi Maeno' - git config --global user.email 'tmaeno@bnl.gov' - git diff --quiet && git diff --staged --quiet || git commit -am "${VERSION} github action" - git push - - - name: Request pull request - env: - # use PAT instead of GITHUB_TOKEN since the latter cannot submit a PR - GITHUB_TOKEN: ${{ secrets.PAT_GITHUB }} - run: | - cd dst - gh pr create -t "${REPO_NAME} ${VERSION} github action" -b "automatic pull request" diff --git a/.github/workflows/pythonpublish.yml b/.github/workflows/pythonpublish.yml deleted file mode 100644 index 9e8f510e..00000000 --- a/.github/workflows/pythonpublish.yml +++ /dev/null @@ -1,40 +0,0 @@ -name: Upload Python Package - -on: - release: - types: [published] - workflow_dispatch: - -jobs: - deploy: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v3 - with: - fetch-depth: 0 - - - name: Set up Python - uses: actions/setup-python@v4 - with: - python-version: '3.x' - - - name: Install dependencies - run: | - python -m pip install --upgrade pip setuptools wheel - python -m pip install hatch twine build - python -m pip list - - - name: Build a sdist - run: | - python -m build -s - cp packages/light/pyproject.toml . - hatch build -t wheel - - - name: Verify the distribution - run: twine check dist/* - - - name: Publish distribution to PyPI - if: github.event_name == 'release' && github.event.action == 'published' && github.repository == 'PanDAWMS/panda-client' - uses: pypa/gh-action-pypi-publish@release/v1 - with: - password: ${{ secrets.PYPI_API_TOKEN }} diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 00000000..90ca8e5e --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,15 @@ +repos: + +- repo: https://github.com/psf/black + rev: 23.9.1 + hooks: + - id: black + types: [python] + args: ["--config", "packages/light/pyproject.toml"] + +- repo: https://github.com/pycqa/isort + rev: 5.12.0 + hooks: + - id: isort + name: isort (python) + args: ["--settings-path", "packages/light/pyproject.toml"] \ No newline at end of file diff --git a/ChangeLog.txt b/ChangeLog.txt index 0089f5c2..e93c7bfb 100644 --- a/ChangeLog.txt +++ b/ChangeLog.txt @@ -1,5 +1,9 @@ ** Release Notes +1.5.67 + * added reload_input to pbook + * added memory warning and -y to pathena/prun + 1.5.66 * fixed FileSpec import diff --git a/packages/light/pyproject.toml b/packages/light/pyproject.toml index b18a2823..23fc6385 100644 --- a/packages/light/pyproject.toml +++ b/packages/light/pyproject.toml @@ -43,3 +43,18 @@ directory = "dist" [tool.hatch.build.targets.wheel] packages = ["pandaclient"] + + +[tool.black] +line-length=160 + +[tool.autopep8] +# https://pypi.org/project/autopep8/#pyproject-toml +max_line_length = 160 +ignore = ["E501", "W6"] +in-place = true +recursive = true +aggressive = 3 + +[tool.isort] +profile = "black" diff --git a/pandaclient/MiscUtils.py b/pandaclient/MiscUtils.py index 79a976bf..a159279e 100644 --- a/pandaclient/MiscUtils.py +++ b/pandaclient/MiscUtils.py @@ -1,11 +1,12 @@ +import datetime +import json +import os import re +import subprocess import sys -import os -import json -import uuid -import datetime import traceback -import subprocess +import uuid + try: import cPickle as pickle except ImportError: @@ -24,11 +25,14 @@ from pandaserver.taskbuffer.JobSpec import JobSpec except ImportError: import pandaclient - sys.modules['pandaserver'] = pandaclient + + sys.modules["pandaserver"] = pandaclient from . import JobSpec - sys.modules['pandaserver.taskbuffer.JobSpec'] = JobSpec + + sys.modules["pandaserver.taskbuffer.JobSpec"] = JobSpec from . import FileSpec - sys.modules['pandaserver.taskbuffer.FileSpec'] = FileSpec + + sys.modules["pandaserver.taskbuffer.FileSpec"] = FileSpec # wrapper for uuidgen @@ -37,63 +41,79 @@ def wrappedUuidGen(): # make JEDI job parameter -def makeJediJobParam(lfn,dataset,paramType,padding=True,hidden=False,expand=False, - include='',exclude='',nFilesPerJob=None,offset=0,destination='', - token='',useNumFilesAsRatio=False,randomAtt=False,reusableAtt=False, - allowNoOutput=None, outDS=None, file_list=None): +def makeJediJobParam( + lfn, + dataset, + paramType, + padding=True, + hidden=False, + expand=False, + include="", + exclude="", + nFilesPerJob=None, + offset=0, + destination="", + token="", + useNumFilesAsRatio=False, + randomAtt=False, + reusableAtt=False, + allowNoOutput=None, + outDS=None, + file_list=None, +): dictItem = {} - if paramType == 'output': - dictItem['type'] = 'template' - dictItem['value'] = lfn - dictItem['param_type'] = paramType - dictItem['dataset'] = dataset - dictItem['container'] = dataset - if destination != '': - dictItem['destination'] = destination - if token != '': - dictItem['token'] = token + if paramType == "output": + dictItem["type"] = "template" + dictItem["value"] = lfn + dictItem["param_type"] = paramType + dictItem["dataset"] = dataset + dictItem["container"] = dataset + if destination != "": + dictItem["destination"] = destination + if token != "": + dictItem["token"] = token if not padding: - dictItem['padding'] = padding + dictItem["padding"] = padding if allowNoOutput is not None: for tmpPatt in allowNoOutput: - if tmpPatt == '': + if tmpPatt == "": continue - tmpPatt = '^.*'+tmpPatt+'$' - if re.search(tmpPatt,lfn) is not None: - dictItem['allowNoOutput'] = True + tmpPatt = "^.*" + tmpPatt + "$" + if re.search(tmpPatt, lfn) is not None: + dictItem["allowNoOutput"] = True break - elif paramType == 'input': - dictItem['type'] = 'template' - dictItem['value'] = lfn - dictItem['param_type'] = paramType - dictItem['dataset'] = dataset + elif paramType == "input": + dictItem["type"] = "template" + dictItem["value"] = lfn + dictItem["param_type"] = paramType + dictItem["dataset"] = dataset if offset > 0: - dictItem['offset'] = offset - if include != '': - dictItem['include'] = include - if exclude != '': - dictItem['exclude'] = exclude + dictItem["offset"] = offset + if include != "": + dictItem["include"] = include + if exclude != "": + dictItem["exclude"] = exclude if expand: - dictItem['expand'] = expand + dictItem["expand"] = expand elif outDS: - dictItem['consolidate'] = '.'.join(outDS.split('.')[:2]) + '.' + wrappedUuidGen() + '/' - if nFilesPerJob not in [None,0]: - dictItem['nFilesPerJob'] = nFilesPerJob - if useNumFilesAsRatio and nFilesPerJob not in [None,0]: - dictItem['ratio'] = nFilesPerJob + dictItem["consolidate"] = ".".join(outDS.split(".")[:2]) + "." + wrappedUuidGen() + "/" + if nFilesPerJob not in [None, 0]: + dictItem["nFilesPerJob"] = nFilesPerJob + if useNumFilesAsRatio and nFilesPerJob not in [None, 0]: + dictItem["ratio"] = nFilesPerJob if file_list: - dictItem['files'] = file_list + dictItem["files"] = file_list if hidden: - dictItem['hidden'] = hidden + dictItem["hidden"] = hidden if randomAtt: - dictItem['random'] = True + dictItem["random"] = True if reusableAtt: - dictItem['reusable'] = True + dictItem["reusable"] = True return [dictItem] # get dataset name and num of files for a stream -def getDatasetNameAndNumFiles(streamDS,nFilesPerJob,streamName): +def getDatasetNameAndNumFiles(streamDS, nFilesPerJob, streamName): if streamDS == "": # read from stdin print("\nThis job uses %s stream" % streamName) @@ -112,7 +132,7 @@ def getDatasetNameAndNumFiles(streamDS,nFilesPerJob,streamName): except Exception: pass # return - return streamDS,nFilesPerJob + return streamDS, nFilesPerJob # convert UTF-8 to ASCII in json dumps @@ -129,7 +149,7 @@ def unicodeConvert(input): retList.append(unicodeConvert(tmpItem)) return retList elif isinstance(input, unicode): - return input.encode('ascii', 'ignore').decode() + return input.encode("ascii", "ignore").decode() return input @@ -141,12 +161,17 @@ def decodeJSON(input_file): # replacement for commands def commands_get_status_output(com): - data = '' + data = "" try: # for python 2.6 - #data = subprocess.check_output(com, shell=True, universal_newlines=True, stderr=subprocess.STDOUT) - p = subprocess.Popen(com, shell=True, universal_newlines=True, stdout=subprocess.PIPE, - stderr=subprocess.STDOUT) + # data = subprocess.check_output(com, shell=True, universal_newlines=True, stderr=subprocess.STDOUT) + p = subprocess.Popen( + com, + shell=True, + universal_newlines=True, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + ) data, unused_err = p.communicate() retcode = p.poll() if retcode: @@ -155,9 +180,9 @@ def commands_get_status_output(com): status = 0 except subprocess.CalledProcessError as ex: # for python 2.6 - #data = ex.output + # data = ex.output status = ex.returncode - if data[-1:] == '\n': + if data[-1:] == "\n": data = data[:-1] return status, data @@ -167,70 +192,74 @@ def commands_get_output(com): def commands_fail_on_non_zero_exit_status( - com, error_status_on_failure, - verbose_cmd=False, verbose_output=False, - logger=None, error_log_msg=""): - + com, + error_status_on_failure, + verbose_cmd=False, + verbose_output=False, + logger=None, + error_log_msg="", +): # print command if verbose if verbose_cmd: print(com) - - # execute command, get status code and message printed by the command + + # execute command, get status code and message printed by the command status, data = commands_get_status_output(com) - - # fail for non zero exit status + + # fail for non zero exit status if status != 0: if not verbose_cmd: print(com) # print error message before failing print(data) - # report error message if logger and log message have been provided + # report error message if logger and log message have been provided if logger and error_log_msg: logger.error(error_log_msg) - + if type(error_status_on_failure) == int: # use error status provided to the function sys.exit(error_status_on_failure) elif error_status_on_failure == "sameAsStatus": - # use error status exit code returned + # use error status exit code returned # by the execution of the command sys.exit(status) - else: + else: # default exit status otherwise sys.exit(1) - + # print command output message if verbose if verbose_output and data: print(data) - return status,data + return status, data # decorator to run with the original environment def run_with_original_env(func): def new_func(*args, **kwargs): - if 'LD_LIBRARY_PATH_ORIG' in os.environ and 'LD_LIBRARY_PATH' in os.environ: - os.environ['LD_LIBRARY_PATH_RESERVE'] = os.environ['LD_LIBRARY_PATH'] - os.environ['LD_LIBRARY_PATH'] = os.environ['LD_LIBRARY_PATH_ORIG'] - if 'PYTHONPATH_ORIG' in os.environ: - os.environ['PYTHONPATH_RESERVE'] = os.environ['PYTHONPATH'] - os.environ['PYTHONPATH'] = os.environ['PYTHONPATH_ORIG'] - if 'PYTHONHOME_ORIG' in os.environ and os.environ['PYTHONHOME_ORIG'] != '': - if 'PYTHONHOME' in os.environ: - os.environ['PYTHONHOME_RESERVE'] = os.environ['PYTHONHOME'] - os.environ['PYTHONHOME'] = os.environ['PYTHONHOME_ORIG'] + if "LD_LIBRARY_PATH_ORIG" in os.environ and "LD_LIBRARY_PATH" in os.environ: + os.environ["LD_LIBRARY_PATH_RESERVE"] = os.environ["LD_LIBRARY_PATH"] + os.environ["LD_LIBRARY_PATH"] = os.environ["LD_LIBRARY_PATH_ORIG"] + if "PYTHONPATH_ORIG" in os.environ: + os.environ["PYTHONPATH_RESERVE"] = os.environ["PYTHONPATH"] + os.environ["PYTHONPATH"] = os.environ["PYTHONPATH_ORIG"] + if "PYTHONHOME_ORIG" in os.environ and os.environ["PYTHONHOME_ORIG"] != "": + if "PYTHONHOME" in os.environ: + os.environ["PYTHONHOME_RESERVE"] = os.environ["PYTHONHOME"] + os.environ["PYTHONHOME"] = os.environ["PYTHONHOME_ORIG"] try: return func(*args, **kwargs) except Exception as e: print(str(e) + traceback.format_exc()) raise e finally: - if 'LD_LIBRARY_PATH_RESERVE' in os.environ: - os.environ['LD_LIBRARY_PATH'] = os.environ['LD_LIBRARY_PATH_RESERVE'] - if 'PYTHONPATH_RESERVE' in os.environ: - os.environ['PYTHONPATH'] = os.environ['PYTHONPATH_RESERVE'] - if 'PYTHONHOME_RESERVE' in os.environ: - os.environ['PYTHONHOME'] = os.environ['PYTHONHOME_RESERVE'] + if "LD_LIBRARY_PATH_RESERVE" in os.environ: + os.environ["LD_LIBRARY_PATH"] = os.environ["LD_LIBRARY_PATH_RESERVE"] + if "PYTHONPATH_RESERVE" in os.environ: + os.environ["PYTHONPATH"] = os.environ["PYTHONPATH_RESERVE"] + if "PYTHONHOME_RESERVE" in os.environ: + os.environ["PYTHONHOME"] = os.environ["PYTHONHOME_RESERVE"] + return new_func @@ -251,48 +280,48 @@ def pickle_loads(str_input): return pickle.loads(str_input) except Exception: try: - return pickle.loads(str_input.encode('utf-8'), encoding='latin1') + return pickle.loads(str_input.encode("utf-8"), encoding="latin1") except Exception: return str_input # parse secondary dataset option def parse_secondary_datasets_opt(secondaryDSs): - if secondaryDSs != '': + if secondaryDSs != "": # parse tmpMap = {} - for tmpItem in secondaryDSs.split(','): + for tmpItem in secondaryDSs.split(","): if "#" in tmpItem: - tmpItems = tmpItem.split('#') + tmpItems = tmpItem.split("#") else: - tmpItems = tmpItem.split(':') + tmpItems = tmpItem.split(":") if 3 <= len(tmpItems) <= 6: tmpDsName = tmpItems[2] # change ^ to , - tmpDsName = tmpDsName.replace('^',',') + tmpDsName = tmpDsName.replace("^", ",") # make map - tmpMap[tmpDsName] = {'nFiles' : int(tmpItems[1]), - 'streamName' : tmpItems[0], - 'pattern' : '', - 'nSkip' : 0, - 'files' : []} + tmpMap[tmpDsName] = { + "nFiles": int(tmpItems[1]), + "streamName": tmpItems[0], + "pattern": "", + "nSkip": 0, + "files": [], + } # using filtering pattern if len(tmpItems) >= 4 and tmpItems[3]: - tmpMap[tmpItems[2]]['pattern'] = tmpItems[3] + tmpMap[tmpItems[2]]["pattern"] = tmpItems[3] # nSkip if len(tmpItems) >= 5 and tmpItems[4]: - tmpMap[tmpItems[2]]['nSkip'] = int(tmpItems[4]) + tmpMap[tmpItems[2]]["nSkip"] = int(tmpItems[4]) # files if len(tmpItems) >= 6 and tmpItems[5]: with open(tmpItems[5]) as f: for l in f: l = l.strip() if l: - tmpMap[tmpItems[2]]['files'].append(l) + tmpMap[tmpItems[2]]["files"].append(l) else: - errStr = "Wrong format %s in --secondaryDSs. Must be "\ - "StreamName:nFilesPerJob:DatasetName[:Pattern[:nSkipFiles[:FileNameList]]]" \ - % tmpItem + errStr = "Wrong format %s in --secondaryDSs. Must be " "StreamName:nFilesPerJob:DatasetName[:Pattern[:nSkipFiles[:FileNameList]]]" % tmpItem return False, errStr # set secondaryDSs = tmpMap @@ -333,3 +362,17 @@ def load_jobs_json(state): job_spec.load_from_json_serializable(job_state) jobs.append(job_spec) return jobs + + +# ask a yes/no question and return answer +def query_yes_no(question): + prompt = "[y/n]: " + valid = {"yes": True, "y": True, "ye": True, "no": False, "n": False} + info_str = " (Use -y if you are confident and want to skip this question) " + while True: + sys.stdout.write(question + info_str + prompt) + choice = raw_input().lower() + if choice in valid: + return valid[choice] + else: + sys.stdout.write("Please respond with 'y' or 'n'") diff --git a/pandaclient/PBookCore.py b/pandaclient/PBookCore.py index e1c06969..5ccd3af6 100644 --- a/pandaclient/PBookCore.py +++ b/pandaclient/PBookCore.py @@ -1,34 +1,32 @@ -import json import base64 -import time import copy -import sys +import json import re +import sys +import time try: long() except Exception: long = int -from . import Client -from . import PLogger -from pandaclient import queryPandaMonUtils -from pandaclient import localSpecs -from pandaclient import PsubUtils +from pandaclient import PsubUtils, localSpecs, queryPandaMonUtils + +from . import Client, PLogger def is_reqid(id): """ whether an id is a reqID (otherwise jediTaskID) """ - return (id < 10 ** 7) + return id < 10**7 + def _get_one_task(self, taskID, verbose=False): """ get one task spec by ID """ - ts, url, data = queryPandaMonUtils.query_tasks(username=self.username, jeditaskid=taskID, - verbose=verbose) + ts, url, data = queryPandaMonUtils.query_tasks(username=self.username, jeditaskid=taskID, verbose=verbose) if isinstance(data, list) and data: task = data[0] taskspec = localSpecs.LocalTaskSpec(task, source_url=url, timestamp=ts) @@ -36,12 +34,12 @@ def _get_one_task(self, taskID, verbose=False): else: return None + def _get_tasks_from_reqid(self, reqID, verbose=False): """ get a list of task spec by reqID """ - ts, url, data = queryPandaMonUtils.query_tasks(username=self.username, reqid=reqID, - verbose=verbose) + ts, url, data = queryPandaMonUtils.query_tasks(username=self.username, reqid=reqID, verbose=verbose) if isinstance(data, list) and data: taskspec_list = [] for task in data: @@ -59,6 +57,7 @@ def check_task_owner(func): """ sanity check decorator of user ownership vs the task """ + # Wrapper def wrapper(self, *args, **kwargs): # Make logger @@ -71,7 +70,7 @@ def wrapper(self, *args, **kwargs): if args: taskid = args[0] if taskid is None: - tmpLog.error('no taskID sepcified, nothing done') + tmpLog.error("no taskID sepcified, nothing done") return # taskspec = _get_one_task(self, taskid, self.verbose) if is_reqid(taskid): @@ -79,12 +78,11 @@ def wrapper(self, *args, **kwargs): else: taskspec_list = [_get_one_task(self, taskid, self.verbose)] except Exception as e: - tmpLog.error('got {0}: {1}'.format(e.__class__.__name__, e)) + tmpLog.error("got {0}: {1}".format(e.__class__.__name__, e)) else: ret = True if taskspec_list is None: - sys.stdout.write('Permission denied: reqID={0} is not owned by {1} \n'.format( - taskid, self.username)) + sys.stdout.write("Permission denied: reqID={0} is not owned by {1} \n".format(taskid, self.username)) ret = False else: for taskspec in taskspec_list: @@ -92,19 +90,18 @@ def wrapper(self, *args, **kwargs): args_new = (taskspec.jeditaskid,) + args[1:] ret = ret and func(self, *args_new, **kwargs) else: - sys.stdout.write('Permission denied: taskID={0} is not owned by {1} \n'.format( - taskid, self.username)) + sys.stdout.write("Permission denied: taskID={0} is not owned by {1} \n".format(taskid, self.username)) ret = False global func_return_value func_return_value = ret return ret + wrapper.original_func = func return wrapper # core class for book keeping class PBookCore(object): - # constructor def __init__(self, verbose=False): # verbose @@ -120,10 +117,9 @@ def init(self, sanity_check=True): username_from_proxy = PsubUtils.extract_voms_proxy_username() if username_from_proxy: self.username = username_from_proxy - sys.stdout.write('PBook user: {0} \n'.format(self.username)) + sys.stdout.write("PBook user: {0} \n".format(self.username)) else: - sys.stderr.write('ERROR : Cannot get user name from proxy or token. ' - 'Please generate a new one using "generate_credential"\n') + sys.stderr.write("ERROR : Cannot get user name from proxy or token. " 'Please generate a new one using "generate_credential"\n') sys.exit(1) # kill @@ -132,21 +128,21 @@ def kill(self, taskID): # get logger tmpLog = PLogger.getPandaLogger() # kill JEDI task - tmpLog.info('Sending killTask command ...') + tmpLog.info("Sending killTask command ...") status, output = Client.killTask(taskID, self.verbose) # communication error if status != 0: tmpLog.error(output) - tmpLog.error('Failed to kill jediTaskID=%s' % taskID) + tmpLog.error("Failed to kill jediTaskID=%s" % taskID) return False tmpStat, tmpDiag = output if not tmpStat: tmpLog.error(tmpDiag) - tmpLog.error('Failed to kill jediTaskID=%s' % taskID) + tmpLog.error("Failed to kill jediTaskID=%s" % taskID) return False tmpLog.info(tmpDiag) # done - tmpLog.info('Done, jediTaskID=%s will be killed in 30 min' % taskID) + tmpLog.info("Done, jediTaskID=%s will be killed in 30 min" % taskID) return True # finish @@ -155,21 +151,21 @@ def finish(self, taskID, soft=False): # get logger tmpLog = PLogger.getPandaLogger() # finish JEDI task - tmpLog.info('Sending finishTask command ...') + tmpLog.info("Sending finishTask command ...") status, output = Client.finishTask(taskID, soft, self.verbose) # communication error if status != 0: tmpLog.error(output) - tmpLog.error('Failed to finish jediTaskID=%s' % taskID) + tmpLog.error("Failed to finish jediTaskID=%s" % taskID) return False tmpStat, tmpDiag = output if not tmpStat: tmpLog.error(tmpDiag) - tmpLog.error('Failed to finish jediTaskID=%s' % taskID) + tmpLog.error("Failed to finish jediTaskID=%s" % taskID) return False tmpLog.info(tmpDiag) # done - tmpLog.info('Done, jediTaskID=%s will be finished soon' % taskID) + tmpLog.info("Done, jediTaskID=%s will be finished soon" % taskID) return True # set debug mode @@ -177,10 +173,10 @@ def debug(self, pandaID, modeOn): # get logger tmpLog = PLogger.getPandaLogger() # set - status,output = Client.setDebugMode(pandaID,modeOn,self.verbose) + status, output = Client.setDebugMode(pandaID, modeOn, self.verbose) if status != 0: tmpLog.error(output) - tmpLog.error('Failed to set debug mode for %s' % pandaID) + tmpLog.error("Failed to set debug mode for %s" % pandaID) return # done tmpLog.info(output) @@ -195,7 +191,7 @@ def killAndRetry(self, taskID, newOpts=None): if not retK: return False # sleep - tmpLog.info('Going to sleep for 3 sec') + tmpLog.info("Going to sleep for 3 sec") time.sleep(3) nTry = 6 for iTry in range(nTry): @@ -205,20 +201,19 @@ def killAndRetry(self, taskID, newOpts=None): if taskspec.is_terminated(): break else: - tmpLog.info('Some sub-jobs are still running') + tmpLog.info("Some sub-jobs are still running") else: - tmpLog.warning('Could not get task status from panda monitor...') + tmpLog.warning("Could not get task status from panda monitor...") if iTry + 1 < nTry: # sleep - tmpLog.info('Going to sleep for 30 sec') + tmpLog.info("Going to sleep for 30 sec") time.sleep(30) else: - tmpLog.info('Max attempts exceeded. Please try later') + tmpLog.info("Max attempts exceeded. Please try later") return False # retry return self.retry(taskID, newOpts=newOpts) - # retry @check_task_owner def retry(self, taskID, newOpts=None): @@ -230,23 +225,20 @@ def retry(self, taskID, newOpts=None): else: newOpts = copy.deepcopy(newOpts) # warning for PQ - site = newOpts.get('site', None) - excludedSite = newOpts.get('excludedSite', None) + site = newOpts.get("site", None) + excludedSite = newOpts.get("excludedSite", None) PsubUtils.get_warning_for_pq(site, excludedSite, tmpLog) # for JEDI - status,out = Client.retryTask( taskID, - verbose=self.verbose, - properErrorCode=True, - newParams=newOpts) + status, out = Client.retryTask(taskID, verbose=self.verbose, properErrorCode=True, newParams=newOpts) if status != 0: tmpLog.error(status) tmpLog.error(out) - tmpLog.error('Failed to retry TaskID=%s' % taskID) + tmpLog.error("Failed to retry TaskID=%s" % taskID) return False - tmpStat,tmpDiag = out - if (tmpStat not in [0,True] and newOpts == {}) or (newOpts != {} and tmpStat != 3): + tmpStat, tmpDiag = out + if (tmpStat not in [0, True] and newOpts == {}) or (newOpts != {} and tmpStat != 3): tmpLog.error(tmpDiag) - tmpLog.error('Failed to retry TaskID=%s' % taskID) + tmpLog.error("Failed to retry TaskID=%s" % taskID) return False tmpLog.info(tmpDiag) return True @@ -257,17 +249,17 @@ def recover_lost_files(self, taskID, test_mode=False): # get logger tmpLog = PLogger.getPandaLogger() # kill JEDI task - tmpLog.info('Sending recovery request ...') + tmpLog.info("Sending recovery request ...") status, output = Client.send_file_recovery_request(taskID, test_mode, self.verbose) # communication error if status != 0: tmpLog.error(output) - tmpLog.error('Communication failure with the server') + tmpLog.error("Communication failure with the server") return False tmpStat, tmpDiag = output if not tmpStat: tmpLog.error(tmpDiag) - tmpLog.error('request was not received') + tmpLog.error("request was not received") return False # done tmpLog.info(tmpDiag) @@ -278,15 +270,15 @@ def getUserJobMetadata(self, taskID, output_filename): # get logger tmpLog = PLogger.getPandaLogger() # get metadata - tmpLog.info('getting metadata') + tmpLog.info("getting metadata") status, metadata = Client.getUserJobMetadata(taskID, verbose=self.verbose) if status != 0: tmpLog.error(metadata) tmpLog.error("Failed to get metadata") return False - with open(output_filename, 'w') as f: + with open(output_filename, "w") as f: json.dump(metadata, f) - tmpLog.info('dumped to {0}'.format(output_filename)) + tmpLog.info("dumped to {0}".format(output_filename)) # return return True @@ -295,18 +287,34 @@ def get_active_tasks(self): """ get all reachable task specs of the user """ - active_superstatus_str = '|'.join(localSpecs.task_active_superstatus_list) - ts, url, data = queryPandaMonUtils.query_tasks(username=self.username, superstatus=active_superstatus_str, - verbose=self.verbose) + active_superstatus_str = "|".join(localSpecs.task_active_superstatus_list) + ts, url, data = queryPandaMonUtils.query_tasks( + username=self.username, + superstatus=active_superstatus_str, + verbose=self.verbose, + ) if isinstance(data, list) and list: - taskspec_list = [ localSpecs.LocalTaskSpec(task, source_url=url, timestamp=ts) for task in data ] + taskspec_list = [localSpecs.LocalTaskSpec(task, source_url=url, timestamp=ts) for task in data] return taskspec_list else: return None # show status - def show(self, some_ids=None, username=None, limit=1000, taskname=None, days=14, jeditaskid=None, - reqid=None, status=None, superstatus=None, metadata=False, sync=False, format='standard'): + def show( + self, + some_ids=None, + username=None, + limit=1000, + taskname=None, + days=14, + jeditaskid=None, + reqid=None, + status=None, + superstatus=None, + metadata=False, + sync=False, + format="standard", + ): # user name if username is None: username = self.username @@ -318,45 +326,57 @@ def show(self, some_ids=None, username=None, limit=1000, taskname=None, days=14, jeditaskid = str(some_ids) elif isinstance(some_ids, (list, tuple)) and some_ids: first_id = some_ids[0] - ids_str = '|'.join([str(x) for x in some_ids]) + ids_str = "|".join([str(x) for x in some_ids]) if first_id and isinstance(first_id, (int, long)) and is_reqid(first_id): reqid = ids_str else: jeditaskid = ids_str - elif some_ids == 'run': - superstatus = '|'.join(localSpecs.task_active_superstatus_list) - elif some_ids == 'fin': - superstatus = '|'.join(localSpecs.task_final_superstatus_list) + elif some_ids == "run": + superstatus = "|".join(localSpecs.task_active_superstatus_list) + elif some_ids == "fin": + superstatus = "|".join(localSpecs.task_final_superstatus_list) # print - if format != 'json': - sys.stderr.write('Showing only max {limit} tasks in last {days} days. One can set days=N to see tasks in last N days, and limit=M to see at most M latest tasks \n' - .format(days=days, limit=limit)) + if format != "json": + sys.stderr.write( + "Showing only max {limit} tasks in last {days} days. One can set days=N to see tasks in last N days, and limit=M to see at most M latest tasks \n".format( + days=days, limit=limit + ) + ) # query - ts, url, data = queryPandaMonUtils.query_tasks( username=username, limit=limit, reqid=reqid, - status=status, superstatus=superstatus, - taskname=taskname, days=days, jeditaskid=jeditaskid, - metadata=metadata, sync=sync, verbose=self.verbose) + ts, url, data = queryPandaMonUtils.query_tasks( + username=username, + limit=limit, + reqid=reqid, + status=status, + superstatus=superstatus, + taskname=taskname, + days=days, + jeditaskid=jeditaskid, + metadata=metadata, + sync=sync, + verbose=self.verbose, + ) # print header row _tmpts = localSpecs.LocalTaskSpec - if format in ['json', 'plain']: + if format in ["json", "plain"]: pass - elif format == 'long': - print(_tmpts.head_dict['long']) + elif format == "long": + print(_tmpts.head_dict["long"]) else: - print(_tmpts.head_dict['standard']) + print(_tmpts.head_dict["standard"]) # print tasks - if format == 'json': + if format == "json": return data - elif format == 'plain': + elif format == "plain": for task in data: taskspec = localSpecs.LocalTaskSpec(task, source_url=url, timestamp=ts) taskspec.print_plain() - elif format == 'long': + elif format == "long": i_count = 1 for task in data: taskspec = localSpecs.LocalTaskSpec(task, source_url=url, timestamp=ts) if i_count % 10 == 0: - print(_tmpts.head_dict['long']) + print(_tmpts.head_dict["long"]) taskspec.print_long() i_count += 1 else: @@ -367,9 +387,8 @@ def show(self, some_ids=None, username=None, limit=1000, taskname=None, days=14, # execute workflow command def execute_workflow_command(self, command_name, request_id): tmpLog = PLogger.getPandaLogger() - tmpLog.info('executing {}'.format(command_name)) - status, output = Client.call_idds_user_workflow_command(command_name, {'request_id': request_id}, - self.verbose) + tmpLog.info("executing {}".format(command_name)) + status, output = Client.call_idds_user_workflow_command(command_name, {"request_id": request_id}, self.verbose) if status != 0: tmpLog.error(output) tmpLog.error("Failed to execute {}".format(command_name)) @@ -385,10 +404,10 @@ def set_secret(self, key, value, is_file=False): # get logger tmpLog = PLogger.getPandaLogger() if is_file: - with open(value, 'rb') as f: + with open(value, "rb") as f: value = base64.b64encode(f.read()).decode() # add prefix - key = '___file___:'+format(key) + key = "___file___:" + format(key) size_limit = 1000 if value and len(value) > size_limit * 1024: tmpLog.error("The value length exceeds the limit ({0} kB)".format(size_limit)) @@ -418,22 +437,22 @@ def list_secrets(self, full=False): status, data = output if status: if data: - prefix = '^___[a-z]+___:' - msg = '\n' - keys = [re.sub(prefix, '', k) for k in data.keys()] + prefix = "^___[a-z]+___:" + msg = "\n" + keys = [re.sub(prefix, "", k) for k in data.keys()] big_key = len(max(keys, key=len)) - template = '{{:{}s}}: {{}}\n'.format(big_key+1) - msg += template.format('Key', 'Value') - msg += template.format('-'*big_key, '-'*20) + template = "{{:{}s}}: {{}}\n".format(big_key + 1) + msg += template.format("Key", "Value") + msg += template.format("-" * big_key, "-" * 20) keys.sort() max_len = 50 for k in data: value = data[k] # hide prefix if re.search(prefix, k): - k = re.sub(prefix, '', k) + k = re.sub(prefix, "", k) if not full and len(value) > max_len: - value = value[:max_len] + '...' + value = value[:max_len] + "..." msg += template.format(k, value) else: msg = "No secrets" @@ -448,22 +467,22 @@ def pause(self, task_id): # get logger tmpLog = PLogger.getPandaLogger() # pause JEDI task - tmpLog.info('Sending pause command ...') + tmpLog.info("Sending pause command ...") status, output = Client.pauseTask(task_id, self.verbose) # communication error if status != 0: tmpLog.error(output) - tmpLog.error('Failed to pause jediTaskID=%s' % task_id) + tmpLog.error("Failed to pause jediTaskID=%s" % task_id) return False tmpStat, tmpDiag = output if tmpStat != 0: print(tmpStat) tmpLog.error(tmpDiag) - tmpLog.error('Failed to pause jediTaskID=%s' % task_id) + tmpLog.error("Failed to pause jediTaskID=%s" % task_id) return False tmpLog.info(tmpDiag) # done - tmpLog.info('Done') + tmpLog.info("Done") return True # resume @@ -471,23 +490,41 @@ def resume(self, task_id): # get logger tmpLog = PLogger.getPandaLogger() # pause JEDI task - tmpLog.info('Sending resume command ...') + tmpLog.info("Sending resume command ...") status, output = Client.resumeTask(task_id, self.verbose) # communication error if status != 0: tmpLog.error(output) - tmpLog.error('Failed to resume jediTaskID=%s' % task_id) + tmpLog.error("Failed to resume jediTaskID=%s" % task_id) return False tmpStat, tmpDiag = output if tmpStat != 0: tmpLog.error(tmpDiag) - tmpLog.error('Failed to resume jediTaskID=%s' % task_id) + tmpLog.error("Failed to resume jediTaskID=%s" % task_id) return False tmpLog.info(tmpDiag) # done - tmpLog.info('Done') + tmpLog.info("Done") return True # generate_credential def generate_credential(self): return PsubUtils.check_proxy(self.verbose, None, generate_new=True) + + # reload input and then retry the task + def reload_input(self, task_id): + # get logger + tmp_log = PLogger.getPandaLogger() + # set + status, output = Client.reload_input(task_id, self.verbose) + if status != 0: + tmp_log.error(output) + tmp_log.error("Failed to reload input %s" % task_id) + return False + elif output[0] != 0: + tmp_log.error(output[-1]) + tmp_log.error("Failed to reload input %s" % task_id) + return False + # done + tmp_log.info("command is registered. will be executed in a few minutes") + return True diff --git a/pandaclient/PBookScript.py b/pandaclient/PBookScript.py index 92cdbf8e..e0cdc976 100644 --- a/pandaclient/PBookScript.py +++ b/pandaclient/PBookScript.py @@ -3,14 +3,15 @@ Import PBookCore instead. """ -import os -import sys -import code import atexit +import code +import os import signal +import sys import tempfile from pandaclient.MiscUtils import commands_get_output + try: long() except Exception: @@ -19,9 +20,12 @@ try: from concurrent.futures import ThreadPoolExecutor except ImportError: + def list_parallel_exec(func, array): - return [ func(x) for x in array ] + return [func(x) for x in array] + else: + def list_parallel_exec(func, array): with ThreadPoolExecutor(8) as thread_pool: dataIterator = thread_pool.map(func, array) @@ -29,21 +33,20 @@ def list_parallel_exec(func, array): import argparse -import readline import pydoc +import readline -from pandaclient import Client -from pandaclient import PandaToolsPkgInfo +from pandaclient import Client, PandaToolsPkgInfo # readline support -readline.parse_and_bind('tab: complete') -readline.parse_and_bind('set show-all-if-ambiguous On') +readline.parse_and_bind("tab: complete") +readline.parse_and_bind("set show-all-if-ambiguous On") # history support -pconfDir = os.path.expanduser(os.environ['PANDA_CONFIG_ROOT']) +pconfDir = os.path.expanduser(os.environ["PANDA_CONFIG_ROOT"]) if not os.path.exists(pconfDir): os.makedirs(pconfDir) -historyFile = '%s/.history' % pconfDir +historyFile = "%s/.history" % pconfDir # history file if os.path.exists(historyFile): try: @@ -54,8 +57,8 @@ def list_parallel_exec(func, array): readline.set_history_length(1024) # set dummy CMTSITE -if 'CMTSITE' not in os.environ: - os.environ['CMTSITE'] = '' +if "CMTSITE" not in os.environ: + os.environ["CMTSITE"] = "" # make tmp dir tmpDir = tempfile.mkdtemp() @@ -66,33 +69,34 @@ def list_parallel_exec(func, array): # fork PID fork_child_pid = None + # exit action -def _onExit(dirName,hFile): +def _onExit(dirName, hFile): # save history only for master process if fork_child_pid == 0: readline.write_history_file(hFile) # remove tmp dir - commands_get_output('rm -rf %s' % dirName) -atexit.register(_onExit,tmpDir,historyFile) + commands_get_output("rm -rf %s" % dirName) + + +atexit.register(_onExit, tmpDir, historyFile) # look for PandaTools package for path in sys.path: - if path == '': - path = '.' - if os.path.exists(path) and os.path.isdir(path) and 'pandaclient' in os.listdir(path) \ - and os.path.exists('%s/pandaclient/__init__.py' % path): + if path == "": + path = "." + if os.path.exists(path) and os.path.isdir(path) and "pandaclient" in os.listdir(path) and os.path.exists("%s/pandaclient/__init__.py" % path): # make symlink for module name - os.symlink('%s/pandaclient' % path,'%s/taskbuffer' % tmpDir) + os.symlink("%s/pandaclient" % path, "%s/taskbuffer" % tmpDir) break -sys.path = [tmpDir]+sys.path +sys.path = [tmpDir] + sys.path -from pandaclient import PBookCore # noqa: E402 +from pandaclient import PBookCore # noqa: E402 # main for interactive session def intmain(pbookCore, comString, args_list): - # help def help(*arg): """ @@ -121,6 +125,7 @@ def help(*arg): kill_and_retry get_user_job_metadata recover_lost_files + reload_input show_workflow kill_workflow retry_workflow @@ -167,7 +172,7 @@ def showl(*args, **kwargs): >>> showl(12345678) >>> showl(taskname='my_task_name') """ - kwargs['format'] = 'long' + kwargs["format"] = "long" return pbookCore.show(*args, **kwargs) # kill @@ -180,16 +185,16 @@ def kill(taskIDs): >>> kill([123, 345, 567]) >>> kill('all') """ - if taskIDs == 'all': + if taskIDs == "all": # active tasks task_list = pbookCore.get_active_tasks() ret = list_parallel_exec(lambda task: pbookCore.kill(task.jeditaskid), task_list) elif isinstance(taskIDs, (list, tuple)): ret = list_parallel_exec(lambda taskID: pbookCore.kill(taskID), taskIDs) elif isinstance(taskIDs, (int, long)): - ret = [ pbookCore.kill(taskIDs) ] + ret = [pbookCore.kill(taskIDs)] else: - print('Error: Invalid argument') + print("Error: Invalid argument") ret = None return ret @@ -207,18 +212,19 @@ def finish(taskIDs, soft=False): >>> finish([123, 345, 567]) >>> finish('all') """ - if taskIDs == 'all': + if taskIDs == "all": # active tasks task_list = pbookCore.get_active_tasks() - ret = list_parallel_exec(lambda task: pbookCore.finish.original_func(pbookCore, - task.jeditaskid, soft=soft), - task_list) + ret = list_parallel_exec( + lambda task: pbookCore.finish.original_func(pbookCore, task.jeditaskid, soft=soft), + task_list, + ) elif isinstance(taskIDs, (list, tuple)): ret = list_parallel_exec(lambda taskID: pbookCore.finish(taskID, soft=soft), taskIDs) elif isinstance(taskIDs, (int, long)): - ret = [ pbookCore.finish(taskIDs, soft=soft) ] + ret = [pbookCore.finish(taskIDs, soft=soft)] else: - print('Error: Invalid argument') + print("Error: Invalid argument") ret = None return ret @@ -253,13 +259,14 @@ def retry(taskIDs, newOpts=None, days=14, limit=1000, **kwargs): ret = list_parallel_exec(lambda taskID: pbookCore.retry(taskID, newOpts=newOpts), taskIDs) elif isinstance(taskIDs, (int, long)): ret = [pbookCore.retry(taskIDs, newOpts=newOpts)] - elif taskIDs == 'all': - dataList = pbookCore.show(status='finished', days=days, limit=limit, sync=True, format='json') - ret = list_parallel_exec(lambda data: pbookCore.retry.original_func(pbookCore, data['jeditaskid'], - newOpts=newOpts), - dataList) + elif taskIDs == "all": + dataList = pbookCore.show(status="finished", days=days, limit=limit, sync=True, format="json") + ret = list_parallel_exec( + lambda data: pbookCore.retry.original_func(pbookCore, data["jeditaskid"], newOpts=newOpts), + dataList, + ) else: - print('Error: Invalid argument') + print("Error: Invalid argument") ret = None return ret @@ -280,9 +287,9 @@ def killAndRetry(taskIDs, newOpts=None): if isinstance(taskIDs, (list, tuple)): ret = list_parallel_exec(lambda taskID: pbookCore.killAndRetry(taskID, newOpts=newOpts), taskIDs) elif isinstance(taskIDs, (int, long)): - ret = [ pbookCore.killAndRetry(taskIDs, newOpts=newOpts) ] + ret = [pbookCore.killAndRetry(taskIDs, newOpts=newOpts)] else: - print('Error: Invalid argument') + print("Error: Invalid argument") ret = None return ret @@ -315,6 +322,17 @@ def get_user_job_metadata(taskID, outputFileName): """ getUserJobMetadata(taskID, outputFileName) + # reload input dataset and retry + def reload_input(task_id): + """ + Reload input dataset and retry the task with new contents. This is useful when input dataset contents are + changed after the task is submitted + + example: + >>> reload_input(123) + """ + pbookCore.reload_input(task_id) + # recover lost files def recover_lost_files(taskID, test_mode=False): """ @@ -332,7 +350,7 @@ def finish_workflow(request_id): Send a request to finish a workflow """ - status, output = pbookCore.execute_workflow_command('finish', request_id) + status, output = pbookCore.execute_workflow_command("finish", request_id) if output: print(output[0][-1]) @@ -342,7 +360,7 @@ def kill_workflow(request_id): Send a request to kill a workflow """ - status, output = pbookCore.execute_workflow_command('abort', request_id) + status, output = pbookCore.execute_workflow_command("abort", request_id) if output: print(output[0][-1]) @@ -352,7 +370,7 @@ def pause_workflow(request_id): Send a request to pause a workflow """ - status, output = pbookCore.execute_workflow_command('suspend', request_id) + status, output = pbookCore.execute_workflow_command("suspend", request_id) if output: print(output[0][-1]) @@ -362,7 +380,7 @@ def resume_workflow(request_id): Send a request to resume a workflow """ - status, output = pbookCore.execute_workflow_command('resume', request_id) + status, output = pbookCore.execute_workflow_command("resume", request_id) if output: print(output[0][-1]) @@ -372,7 +390,7 @@ def retry_workflow(request_id): Send a request to retry a workflow """ - status, output = pbookCore.execute_workflow_command('retry', request_id) + status, output = pbookCore.execute_workflow_command("retry", request_id) if output: print(output[0][-1]) @@ -382,7 +400,7 @@ def show_workflow(request_id): Show a workflow """ - status, output = pbookCore.execute_workflow_command('get_status', request_id) + status, output = pbookCore.execute_workflow_command("get_status", request_id) if output: print(output) @@ -429,7 +447,7 @@ def generate_credential(): main_locals = locals() # execute command in the batch mode - if comString != '': + if comString != "": pbookCore.init() exec(comString) in globals(), locals() # exit @@ -447,17 +465,17 @@ def generate_credential(): # convert arg string def _conv_str(some_string): - if ',' in some_string: + if "," in some_string: try: - return [int(s) for s in some_string.split(',')] + return [int(s) for s in some_string.split(",")] except Exception: - return some_string.split(',') + return some_string.split(",") else: - if some_string == 'None': + if some_string == "None": return None - if some_string == 'True': + if some_string == "True": return True - if some_string == 'False': + if some_string == "False": return False try: return int(some_string) @@ -468,13 +486,13 @@ def _conv_str(some_string): args = [] kwargs = {} for arg in args_list: - if '=' in arg: - k, v = arg.split('=') + if "=" in arg: + k, v = arg.split("=") kwargs[k] = _conv_str(v) else: args.append(_conv_str(arg)) # execute - if func_name not in ['help', 'generate_credential']: + if func_name not in ["help", "generate_credential"]: pbookCore.init(sanity_check=False) locals()[func_name](*args, **kwargs) @@ -486,16 +504,15 @@ def _conv_str(some_string): # go to interactive prompt pbookCore.init() - code.interact(banner="\nStart pBook %s" % PandaToolsPkgInfo.release_version, - local=locals()) + code.interact(banner="\nStart pBook %s" % PandaToolsPkgInfo.release_version, local=locals()) # kill whole process def catch_sig(sig, frame): # cleanup - _onExit(tmpDir,historyFile) + _onExit(tmpDir, historyFile) # kill - commands_get_output('kill -9 -- -%s' % os.getpgrp()) + commands_get_output("kill -9 -- -%s" % os.getpgrp()) # overall main @@ -545,21 +562,49 @@ def main(): $ pbook help command_name """ parser = argparse.ArgumentParser(conflict_handler="resolve", usage=usage) - parser.add_argument("-v",action="store_true",dest="verbose",default=False, - help="Verbose") - parser.add_argument('-c',action='store',dest='comString',default='',type=str, - help='Execute a python code snippet') - parser.add_argument("-3", action="store_true", dest="python3", default=False, - help="Use python3") - parser.add_argument('--version',action='store_const',const=True,dest='version',default=False, - help='Displays version') - parser.add_argument('--devSrv',action='store_const',const=True,dest='devSrv',default=False, - help=argparse.SUPPRESS) - parser.add_argument('--intrSrv',action='store_const',const=True, dest='intrSrv',default=False, - help=argparse.SUPPRESS) + parser.add_argument("-v", action="store_true", dest="verbose", default=False, help="Verbose") + parser.add_argument( + "-c", + action="store", + dest="comString", + default="", + type=str, + help="Execute a python code snippet", + ) + parser.add_argument("-3", action="store_true", dest="python3", default=False, help="Use python3") + parser.add_argument( + "--version", + action="store_const", + const=True, + dest="version", + default=False, + help="Displays version", + ) + parser.add_argument( + "--devSrv", + action="store_const", + const=True, + dest="devSrv", + default=False, + help=argparse.SUPPRESS, + ) + parser.add_argument( + "--intrSrv", + action="store_const", + const=True, + dest="intrSrv", + default=False, + help=argparse.SUPPRESS, + ) # option for jupyter notebook - parser.add_argument('--prompt_with_newline', action='store_const', const=True, dest='prompt_with_newline', - default=False, help=argparse.SUPPRESS) + parser.add_argument( + "--prompt_with_newline", + action="store_const", + const=True, + dest="prompt_with_newline", + default=False, + help=argparse.SUPPRESS, + ) options, args = parser.parse_known_args() @@ -596,7 +641,7 @@ def main(): # set handler signal.signal(signal.SIGINT, catch_sig) signal.signal(signal.SIGHUP, catch_sig) - signal.signal(signal.SIGTERM,catch_sig) + signal.signal(signal.SIGTERM, catch_sig) pid, status = os.wait() if os.WIFSIGNALED(status): sys.exit(-os.WTERMSIG(status)) diff --git a/pandaclient/PandaToolsPkgInfo.py b/pandaclient/PandaToolsPkgInfo.py index f956b533..866e8b14 100644 --- a/pandaclient/PandaToolsPkgInfo.py +++ b/pandaclient/PandaToolsPkgInfo.py @@ -1 +1 @@ -release_version = "1.5.66" +release_version = "1.5.67" diff --git a/pandaclient/PathenaScript.py b/pandaclient/PathenaScript.py index f88ca17a..295cc605 100644 --- a/pandaclient/PathenaScript.py +++ b/pandaclient/PathenaScript.py @@ -1,15 +1,16 @@ +import argparse +import atexit +import copy +import json import os +import pickle +import random import re -import sys -import copy import shutil -import atexit -import argparse -from pandaclient.Group_argparse import get_parser -import random -import pickle -import json +import sys import time + +from pandaclient.Group_argparse import get_parser from pandaclient.MiscUtils import parse_secondary_datasets_opt try: @@ -20,19 +21,19 @@ #################################################################### # error code -EC_Config = 10 -EC_CMT = 20 +EC_Config = 10 +EC_CMT = 20 EC_Extractor = 30 -EC_Dataset = 40 -EC_Post = 50 -EC_Archive = 60 -EC_Split = 70 -EC_MyProxy = 80 -EC_Submit = 90 +EC_Dataset = 40 +EC_Post = 50 +EC_Archive = 60 +EC_Split = 70 +EC_MyProxy = 80 +EC_Submit = 90 # tweak sys.argv sys.argv.pop(0) -sys.argv.insert(0, 'pathena') +sys.argv.insert(0, "pathena") usage = """pathena [options] [ [...]] @@ -45,453 +46,1318 @@ """ removedOpts = [ # list of deprecated options w.r.t version 0.4.9 - "--ara", - "--araOutFile", - "--ares", - "--blong", - "--burstSubmit", - "--cloud", - "--configJEM", - "--corCheck", - "--crossSite", - "--dbRunNumber", - "--disableRebrokerage", - "--enableJEM", - "--eventPickStagedDS", - "--individualOutDS", - "--libDS", - "--long", - "--mcData", - "--myproxy", - "--nCavPerJob", - "--nHighMinPerJob", - "--nLowMinPerJob", - "--nMinPerJob", - "--nSkipFiles", - "--noLock", - "--notUseTagLookup", - "--outputPath", - "--panda_cacheSrvURL", - "--panda_dbRelease", - "--panda_devidedByGUID", - "--panda_eventPickRunEvtDat", - "--panda_fullPathJobOs", - "--panda_inDS", - "--panda_inDSForEP", - "--panda_jobsetID", - "--panda_origFullExecString", - "--panda_parentJobsetID", - "--panda_runConfig", - "--panda_singleLine", - "--panda_srcName", - "--panda_srvURL", - "--panda_suppressMsg", - "--panda_tagParentFile", - "--panda_trf", - "--parentDS", - "--prestage", - "--provenanceID", - "--removeBurstLimit", - "--removeFileList", - "--removedDS", - "--seriesLabel", - "--skipScan", - "--splitWithNthFiledOfLFN", - "--tagQuery", - "--tagStreamRef", - "--transferredDS", - "--useAIDA", - "--useChirpServer", - "--useContElementBoundary", - "--useDirectIOSites", - "--useExperimental", - "--useGOForOutput", - "--useNthFieldForLFN", - "--useOldStyleOutput", - "--useShortLivedReplicas", - "--useSiteGroup", - "--useTagInTRF", - "-l" + "--ara", + "--araOutFile", + "--ares", + "--blong", + "--burstSubmit", + "--cloud", + "--configJEM", + "--corCheck", + "--crossSite", + "--dbRunNumber", + "--disableRebrokerage", + "--enableJEM", + "--eventPickStagedDS", + "--individualOutDS", + "--libDS", + "--long", + "--mcData", + "--myproxy", + "--nCavPerJob", + "--nHighMinPerJob", + "--nLowMinPerJob", + "--nMinPerJob", + "--nSkipFiles", + "--noLock", + "--notUseTagLookup", + "--outputPath", + "--panda_cacheSrvURL", + "--panda_dbRelease", + "--panda_devidedByGUID", + "--panda_eventPickRunEvtDat", + "--panda_fullPathJobOs", + "--panda_inDS", + "--panda_inDSForEP", + "--panda_jobsetID", + "--panda_origFullExecString", + "--panda_parentJobsetID", + "--panda_runConfig", + "--panda_singleLine", + "--panda_srcName", + "--panda_srvURL", + "--panda_suppressMsg", + "--panda_tagParentFile", + "--panda_trf", + "--parentDS", + "--prestage", + "--provenanceID", + "--removeBurstLimit", + "--removeFileList", + "--removedDS", + "--seriesLabel", + "--skipScan", + "--splitWithNthFiledOfLFN", + "--tagQuery", + "--tagStreamRef", + "--transferredDS", + "--useAIDA", + "--useChirpServer", + "--useContElementBoundary", + "--useDirectIOSites", + "--useExperimental", + "--useGOForOutput", + "--useNthFieldForLFN", + "--useOldStyleOutput", + "--useShortLivedReplicas", + "--useSiteGroup", + "--useTagInTRF", + "-l", ] optP = get_parser(usage=usage, conflict_handler="resolve") optP.set_examples(examples) # define option groups -group_print = optP.add_group('print', 'info print') -group_pathena = optP.add_group('pathena', 'about pathena itself') -group_config = optP.add_group('config', 'single configuration file to set multiple options') -group_input = optP.add_group('input', 'input dataset(s)/files/format/seed') -group_output = optP.add_group('output', 'output dataset/files') -group_job = optP.add_group('job', 'job running control on grid') -group_build = optP.add_group('build', 'build/compile the package and env setup') -group_submit = optP.add_group('submit', 'job submission/site/retry') -group_evtFilter = optP.add_group('evtFilter', 'event filter such as good run and event pick') -group_expert = optP.add_group('expert', 'for experts/developers only') - -usage_containerJob="""Visit the following wiki page for examples: +group_print = optP.add_group("print", "info print") +group_pathena = optP.add_group("pathena", "about pathena itself") +group_config = optP.add_group("config", "single configuration file to set multiple options") +group_input = optP.add_group("input", "input dataset(s)/files/format/seed") +group_output = optP.add_group("output", "output dataset/files") +group_job = optP.add_group("job", "job running control on grid") +group_build = optP.add_group("build", "build/compile the package and env setup") +group_submit = optP.add_group("submit", "job submission/site/retry") +group_evtFilter = optP.add_group("evtFilter", "event filter such as good run and event pick") +group_expert = optP.add_group("expert", "for experts/developers only") + +usage_containerJob = """Visit the following wiki page for examples: https://twiki.cern.ch/twiki/bin/view/PanDA/PandaRun#Run_user_containers_jobs Please test the job interactively first prior to submitting to the grid. Check the following on how to test container job interactively: https://twiki.cern.ch/twiki/bin/viewauth/AtlasComputing/SingularityInAtlas """ -group_containerJob = optP.add_group('containerJob', "For container-based jobs", usage=usage_containerJob) +group_containerJob = optP.add_group("containerJob", "For container-based jobs", usage=usage_containerJob) -optP.add_helpGroup(addHelp='Some options such as --inOutDsJson may SPAN several groups') +optP.add_helpGroup(addHelp="Some options such as --inOutDsJson may SPAN several groups") # special options -group_pathena.add_argument('--version',action='store_const',const=True,dest='version',default=False, - help='Displays version') -group_job.add_argument('--split', '--nJobs', metavar='nJobs', action='store', dest='split', default=-1, - type=int, help='Number of sub-jobs to be generated.') - -group_job.add_argument('--nFilesPerJob', action='store', dest='nFilesPerJob', default=-1, type=int, help='Number of files on which each sub-job runs') -group_job.add_argument('--nEventsPerJob', action='store', dest='nEventsPerJob', default=-1, - type=int, help='Number of events per subjob. This info is used mainly for job splitting. If you run on MC datasets, the total number of subjobs is nEventsPerFile*nFiles/nEventsPerJob. For data, the number of events for each file is retrieved from AMI and subjobs are created accordingly. Note that if you run transformations you need to explicitly specify maxEvents or something in --trf to set the number of events processed in each subjob. If you run normal jobOption files, evtMax and skipEvents in appMgr are automatically set on WN.') -action = group_job.add_argument('--nEventsPerFile', action='store', dest='nEventsPerFile', default=0, - type=int, help='Number of events per file') +group_pathena.add_argument( + "--version", + action="store_const", + const=True, + dest="version", + default=False, + help="Displays version", +) +group_job.add_argument( + "--split", + "--nJobs", + metavar="nJobs", + action="store", + dest="split", + default=-1, + type=int, + help="Number of sub-jobs to be generated.", +) + +group_job.add_argument( + "--nFilesPerJob", + action="store", + dest="nFilesPerJob", + default=-1, + type=int, + help="Number of files on which each sub-job runs", +) +group_job.add_argument( + "--nEventsPerJob", + action="store", + dest="nEventsPerJob", + default=-1, + type=int, + help="Number of events per subjob. This info is used mainly for job splitting. If you run on MC datasets, the total number of subjobs is nEventsPerFile*nFiles/nEventsPerJob. For data, the number of events for each file is retrieved from AMI and subjobs are created accordingly. Note that if you run transformations you need to explicitly specify maxEvents or something in --trf to set the number of events processed in each subjob. If you run normal jobOption files, evtMax and skipEvents in appMgr are automatically set on WN.", +) +action = group_job.add_argument( + "--nEventsPerFile", + action="store", + dest="nEventsPerFile", + default=0, + type=int, + help="Number of events per file", +) group_input.shareWithMe(action) -group_job.add_argument('--nGBPerJob',action='store',dest='nGBPerJob',default=-1, help='Instantiate one sub job per NGBPERJOB GB of input files. --nGBPerJob=MAX sets the size to the default maximum value') -group_job.add_argument('--nGBPerMergeJob', action='store', dest='nGBPerMergeJob', default="MAX", help='Instantiate one merge job per NGBPERMERGEJOB GB of pre-merged files') -group_submit.add_argument('--site', action='store', dest='site', default="AUTO", - type=str, help='Site name where jobs are sent. If omitted, jobs are automatically sent to sites where input is available. A comma-separated list of sites can be specified (e.g. siteA,siteB,siteC), so that best sites are chosen from the given site list. If AUTO is appended at the end of the list (e.g. siteA,siteB,siteC,AUTO), jobs are sent to any sites if input is not found in the previous sites') -group_build.add_argument('--athenaTag',action='store',dest='athenaTag',default='',type=str, - help='Use differnet version of Athena on remote WN. By defualt the same version which you are locally using is set up on WN. e.g., --athenaTag=AtlasProduction,14.2.24.3') -group_input.add_argument('--inDS', action='store', dest='inDS', default='', - type=str, help='Input dataset names. wildcard and/or comma can be used to concatenate multiple datasets') -group_input.add_argument('--notExpandInDS', action='store_const', const=True, dest='notExpandInDS',default=False, - help='Allow jobs to use files across dataset boundaries in input dataset container') -group_input.add_argument('--inDsTxt',action='store',dest='inDsTxt',default='', - type=str, help='a text file which contains the list of datasets to run over. newlines are replaced by commas and the result is set to --inDS. lines starting with # are ignored') -action = group_input.add_argument('--inOutDsJson', action='store', dest='inOutDsJson', default='', - help="A json file to specify input and output datasets for bulk submission. " - "It contains a json dump of [{'inDS': a comma-concatenated input dataset names, " - "'outDS': output dataset name}, ...]. " - "When this option is used --bulkSubmission is automatically set internally.") +group_job.add_argument( + "--nGBPerJob", + action="store", + dest="nGBPerJob", + default=-1, + help="Instantiate one sub job per NGBPERJOB GB of input files. --nGBPerJob=MAX sets the size to the default maximum value", +) +group_job.add_argument( + "--nGBPerMergeJob", + action="store", + dest="nGBPerMergeJob", + default="MAX", + help="Instantiate one merge job per NGBPERMERGEJOB GB of pre-merged files", +) +group_submit.add_argument( + "--site", + action="store", + dest="site", + default="AUTO", + type=str, + help="Site name where jobs are sent. If omitted, jobs are automatically sent to sites where input is available. A comma-separated list of sites can be specified (e.g. siteA,siteB,siteC), so that best sites are chosen from the given site list. If AUTO is appended at the end of the list (e.g. siteA,siteB,siteC,AUTO), jobs are sent to any sites if input is not found in the previous sites", +) +group_build.add_argument( + "--athenaTag", + action="store", + dest="athenaTag", + default="", + type=str, + help="Use differnet version of Athena on remote WN. By defualt the same version which you are locally using is set up on WN. e.g., --athenaTag=AtlasProduction,14.2.24.3", +) +group_input.add_argument( + "--inDS", + action="store", + dest="inDS", + default="", + type=str, + help="Input dataset names. wildcard and/or comma can be used to concatenate multiple datasets", +) +group_input.add_argument( + "--notExpandInDS", + action="store_const", + const=True, + dest="notExpandInDS", + default=False, + help="Allow jobs to use files across dataset boundaries in input dataset container", +) +group_input.add_argument( + "--inDsTxt", + action="store", + dest="inDsTxt", + default="", + type=str, + help="a text file which contains the list of datasets to run over. newlines are replaced by commas and the result is set to --inDS. lines starting with # are ignored", +) +action = group_input.add_argument( + "--inOutDsJson", + action="store", + dest="inOutDsJson", + default="", + help="A json file to specify input and output datasets for bulk submission. " + "It contains a json dump of [{'inDS': a comma-concatenated input dataset names, " + "'outDS': output dataset name}, ...]. " + "When this option is used --bulkSubmission is automatically set internally.", +) group_output.shareWithMe(action) -group_input.add_argument('--secondaryDSs', action='store', dest='secondaryDSs', default='', - help='A versatile option to specify arbitrary secondary inputs that takes a list of ' - 'secondary datasets. See PandaRun wiki page for detail') -group_input.add_argument('--notExpandSecDSs', action='store_const', const=True, dest='notExpandSecDSs', default=False, - help = 'Use files across dataset boundaries in secondary dataset containers') -group_input.add_argument('--minDS', action='store', dest='minDS', default='', - type=str, help='Dataset name for minimum bias stream') -group_job.add_argument('--nMin', action='store', dest='nMin', default=-1, - type=int, help='Number of minimum bias files per sub job') -group_input.add_argument('--notExpandMinDS', action='store_const', const=True, dest='notExpandMinDS',default=False, - help='Allow jobs to use files across dataset boundaries in minimum bias dataset container') -group_input.add_argument('--lowMinDS', action='store', dest='lowMinDS', default='', - type=str, help='Dataset name for low pT minimum bias stream') -group_job.add_argument('--nLowMin', action='store', dest='nLowMin', default=-1, - type=int, help='Number of low pT minimum bias files per job') -group_input.add_argument('--notExpandLowMinDS', action='store_const', const=True, dest='notExpandLowMinDS',default=False, - help='Allow jobs to use files across dataset boundaries in low minimum bias dataset container') -group_input.add_argument('--highMinDS', action='store', dest='highMinDS', default='', - type=str, help='Dataset name for high pT minimum bias stream') -group_job.add_argument('--nHighMin', action='store', dest='nHighMin', default=-1, - type=int, help='Number of high pT minimum bias files per job') -group_input.add_argument('--notExpandHighMinDS', action='store_const', const=True, dest='notExpandHighMinDS',default=False, - help='Allow jobs to use files across dataset boundaries in high minimum bias dataset container') -group_input.add_argument('--randomMin',action='store_const',const=True,dest='randomMin',default=False, - help='randomize files in minimum bias dataset') -group_input.add_argument('--cavDS', action='store', dest='cavDS', default='', - type=str, help='Dataset name for cavern stream') -group_job.add_argument('--nCav', action='store', dest='nCav', default=-1, - type=int, help='Number of cavern files per job') -group_input.add_argument('--notExpandCavDS', action='store_const', const=True, dest='notExpandCavDS',default=False, - help='Allow jobs to use files across dataset boundaries in cavern dataset container') -group_input.add_argument('--randomCav',action='store_const',const=True,dest='randomCav',default=False, - help='randomize files in cavern dataset') -group_evtFilter.add_argument('--goodRunListXML', action='store', dest='goodRunListXML', default='', - type=str, help='Good Run List XML which will be converted to datasets by AMI') -group_evtFilter.add_argument('--goodRunListDataType', action='store', dest='goodRunDataType', default='', - type=str, help='specify data type when converting Good Run List XML to datasets, e.g, AOD (default)') -group_evtFilter.add_argument('--goodRunListProdStep', action='store', dest='goodRunProdStep', default='', - type=str, help='specify production step when converting Good Run List to datasets, e.g, merge (default)') -action = group_evtFilter.add_argument('--goodRunListDS', action='store', dest='goodRunListDS', default='', - type=str, help='A comma-separated list of pattern strings. Datasets which are converted from Good Run List XML will be used when they match with one of the pattern strings. Either \ or "" is required when a wild-card is used. If this option is omitted all datasets will be used') +group_input.add_argument( + "--secondaryDSs", + action="store", + dest="secondaryDSs", + default="", + help="A versatile option to specify arbitrary secondary inputs that takes a list of " "secondary datasets. See PandaRun wiki page for detail", +) +group_input.add_argument( + "--notExpandSecDSs", + action="store_const", + const=True, + dest="notExpandSecDSs", + default=False, + help="Use files across dataset boundaries in secondary dataset containers", +) +group_input.add_argument( + "--minDS", + action="store", + dest="minDS", + default="", + type=str, + help="Dataset name for minimum bias stream", +) +group_job.add_argument( + "--nMin", + action="store", + dest="nMin", + default=-1, + type=int, + help="Number of minimum bias files per sub job", +) +group_input.add_argument( + "--notExpandMinDS", + action="store_const", + const=True, + dest="notExpandMinDS", + default=False, + help="Allow jobs to use files across dataset boundaries in minimum bias dataset container", +) +group_input.add_argument( + "--lowMinDS", + action="store", + dest="lowMinDS", + default="", + type=str, + help="Dataset name for low pT minimum bias stream", +) +group_job.add_argument( + "--nLowMin", + action="store", + dest="nLowMin", + default=-1, + type=int, + help="Number of low pT minimum bias files per job", +) +group_input.add_argument( + "--notExpandLowMinDS", + action="store_const", + const=True, + dest="notExpandLowMinDS", + default=False, + help="Allow jobs to use files across dataset boundaries in low minimum bias dataset container", +) +group_input.add_argument( + "--highMinDS", + action="store", + dest="highMinDS", + default="", + type=str, + help="Dataset name for high pT minimum bias stream", +) +group_job.add_argument( + "--nHighMin", + action="store", + dest="nHighMin", + default=-1, + type=int, + help="Number of high pT minimum bias files per job", +) +group_input.add_argument( + "--notExpandHighMinDS", + action="store_const", + const=True, + dest="notExpandHighMinDS", + default=False, + help="Allow jobs to use files across dataset boundaries in high minimum bias dataset container", +) +group_input.add_argument( + "--randomMin", + action="store_const", + const=True, + dest="randomMin", + default=False, + help="randomize files in minimum bias dataset", +) +group_input.add_argument( + "--cavDS", + action="store", + dest="cavDS", + default="", + type=str, + help="Dataset name for cavern stream", +) +group_job.add_argument( + "--nCav", + action="store", + dest="nCav", + default=-1, + type=int, + help="Number of cavern files per job", +) +group_input.add_argument( + "--notExpandCavDS", + action="store_const", + const=True, + dest="notExpandCavDS", + default=False, + help="Allow jobs to use files across dataset boundaries in cavern dataset container", +) +group_input.add_argument( + "--randomCav", + action="store_const", + const=True, + dest="randomCav", + default=False, + help="randomize files in cavern dataset", +) +group_evtFilter.add_argument( + "--goodRunListXML", + action="store", + dest="goodRunListXML", + default="", + type=str, + help="Good Run List XML which will be converted to datasets by AMI", +) +group_evtFilter.add_argument( + "--goodRunListDataType", + action="store", + dest="goodRunDataType", + default="", + type=str, + help="specify data type when converting Good Run List XML to datasets, e.g, AOD (default)", +) +group_evtFilter.add_argument( + "--goodRunListProdStep", + action="store", + dest="goodRunProdStep", + default="", + type=str, + help="specify production step when converting Good Run List to datasets, e.g, merge (default)", +) +action = group_evtFilter.add_argument( + "--goodRunListDS", + action="store", + dest="goodRunListDS", + default="", + type=str, + help='A comma-separated list of pattern strings. Datasets which are converted from Good Run List XML will be used when they match with one of the pattern strings. Either \ or "" is required when a wild-card is used. If this option is omitted all datasets will be used', +) group_input.shareWithMe(action) -group_evtFilter.add_argument('--eventPickEvtList',action='store',dest='eventPickEvtList',default='', - type=str, help='a file name which contains a list of runs/events for event picking') -group_evtFilter.add_argument('--eventPickDataType',action='store',dest='eventPickDataType',default='', - type=str, help='type of data for event picking. one of AOD,ESD,RAW') -group_evtFilter.add_argument('--ei_api',action='store',dest='ei_api',default='', - type=str, help='flag to signalise mc in event picking') -group_evtFilter.add_argument('--eventPickStreamName',action='store',dest='eventPickStreamName',default='', - type=str, help='stream name for event picking. e.g., physics_CosmicCaloEM') -action = group_evtFilter.add_argument('--eventPickDS',action='store',dest='eventPickDS',default='', - type=str, help='A comma-separated list of pattern strings. Datasets which are converted from the run/event list will be used when they match with one of the pattern strings. Either \ or "" is required when a wild-card is used. e.g., data\*') +group_evtFilter.add_argument( + "--eventPickEvtList", + action="store", + dest="eventPickEvtList", + default="", + type=str, + help="a file name which contains a list of runs/events for event picking", +) +group_evtFilter.add_argument( + "--eventPickDataType", + action="store", + dest="eventPickDataType", + default="", + type=str, + help="type of data for event picking. one of AOD,ESD,RAW", +) +group_evtFilter.add_argument( + "--ei_api", + action="store", + dest="ei_api", + default="", + type=str, + help="flag to signalise mc in event picking", +) +group_evtFilter.add_argument( + "--eventPickStreamName", + action="store", + dest="eventPickStreamName", + default="", + type=str, + help="stream name for event picking. e.g., physics_CosmicCaloEM", +) +action = group_evtFilter.add_argument( + "--eventPickDS", + action="store", + dest="eventPickDS", + default="", + type=str, + help='A comma-separated list of pattern strings. Datasets which are converted from the run/event list will be used when they match with one of the pattern strings. Either \ or "" is required when a wild-card is used. e.g., data\*', +) group_input.shareWithMe(action) -group_evtFilter.add_argument('--eventPickAmiTag',action='store',dest='eventPickAmiTag',default='', - type=str, help='AMI tag used to match TAG collections names. This option is required when you are interested in older data than the latest one. Either \ or "" is required when a wild-card is used. e.g., f2\*') -group_evtFilter.add_argument('--eventPickWithGUID',action='store_const',const=True,dest='eventPickWithGUID',default=False, - help='Using GUIDs together with run and event numbers in eventPickEvtList to skip event lookup') -group_submit.add_argument('--sameSecRetry', action='store_const',const=False,dest='sameSecRetry',default=True, - help="Use the same secondary input files when jobs are retried") -group_submit.add_argument('--express', action='store_const',const=True,dest='express',default=False, - help="Send the job using express quota to have higher priority. The number of express subjobs in the queue and the total execution time used by express subjobs are limited (a few subjobs and several hours per day, respectively). This option is intended to be used for quick tests before bulk submission. Note that buildXYZ is not included in quota calculation. If this option is used when quota has already exceeded, the panda server will ignore the option so that subjobs have normal priorities. Also, if you submit 1 buildXYZ and N runXYZ subjobs when you only have quota of M (M < N), only the first M runXYZ subjobs will have higher priorities") -group_print.add_argument('--debugMode', action='store_const',const=True,dest='debugMode',default=False, - help="Send the job with the debug mode on. If this option is specified the subjob will send stdout to the panda monitor every 5 min. The number of debug subjobs per user is limited. When this option is used and the quota has already exceeded, the panda server supresses the option so that subjobs will run without the debug mode. If you submit multiple subjobs in a single job, only the first subjob will set the debug mode on. Note that you can turn the debug mode on/off by using pbook after jobs are submitted" ) -group_output.add_argument('--addNthFieldOfInDSToLFN',action='store',dest='addNthFieldOfInDSToLFN',default='',type=str, - help="A middle name is added to LFNs of output files when they are produced from one dataset in the input container or input dataset list. The middle name is extracted from the dataset name. E.g., if --addNthFieldOfInDSToLFN=2 and the dataset name is data10_7TeV.00160387.physics_Muon..., 00160387 is extracted and LFN is something like user.hoge.TASKID.00160387.blah. Concatenate multiple field numbers with commas if necessary, e.g., --addNthFieldOfInDSToLFN=2,6.") -group_output.add_argument('--addNthFieldOfInFileToLFN',action='store',dest='addNthFieldOfInFileToLFN',default='',type=str, - help="A middle name is added to LFNs of output files similarly as --addNthFieldOfInDSToLFN, but strings are extracted from input file names") -group_job.add_argument('--useAMIEventLevelSplit',action='store_const',const=True,dest='useAMIEventLevelSplit',default=None, - help="retrive the number of events per file from AMI to split the job using --nEventsPerJob") -group_output.add_argument('--appendStrToExtStream',action='store_const',const=True,dest='appendStrToExtStream',default=False, - help='append the first part of filenames to extra stream names for --individualOutDS. E.g., if this option is used together with --individualOutDS, %%OUT.AOD.pool.root will be contained in an EXT0_AOD dataset instead of an EXT0 dataset') -group_output.add_argument('--mergeOutput', action='store_const', const=True, dest='mergeOutput', default=False, - help="merge output files") -group_output.add_argument('--mergeLog', action='store_const', const=True, dest='mergeLog', default=False, - help="merge log files. relevant only with --mergeOutput") -action = group_job.add_argument('--mergeScript',action='store',dest='mergeScript',default='',type=str, - help='Specify user-defied script or execution string for output merging') +group_evtFilter.add_argument( + "--eventPickAmiTag", + action="store", + dest="eventPickAmiTag", + default="", + type=str, + help='AMI tag used to match TAG collections names. This option is required when you are interested in older data than the latest one. Either \ or "" is required when a wild-card is used. e.g., f2\*', +) +group_evtFilter.add_argument( + "--eventPickWithGUID", + action="store_const", + const=True, + dest="eventPickWithGUID", + default=False, + help="Using GUIDs together with run and event numbers in eventPickEvtList to skip event lookup", +) +group_submit.add_argument( + "--sameSecRetry", + action="store_const", + const=False, + dest="sameSecRetry", + default=True, + help="Use the same secondary input files when jobs are retried", +) +group_submit.add_argument( + "--express", + action="store_const", + const=True, + dest="express", + default=False, + help="Send the job using express quota to have higher priority. The number of express subjobs in the queue and the total execution time used by express subjobs are limited (a few subjobs and several hours per day, respectively). This option is intended to be used for quick tests before bulk submission. Note that buildXYZ is not included in quota calculation. If this option is used when quota has already exceeded, the panda server will ignore the option so that subjobs have normal priorities. Also, if you submit 1 buildXYZ and N runXYZ subjobs when you only have quota of M (M < N), only the first M runXYZ subjobs will have higher priorities", +) +group_print.add_argument( + "--debugMode", + action="store_const", + const=True, + dest="debugMode", + default=False, + help="Send the job with the debug mode on. If this option is specified the subjob will send stdout to the panda monitor every 5 min. The number of debug subjobs per user is limited. When this option is used and the quota has already exceeded, the panda server supresses the option so that subjobs will run without the debug mode. If you submit multiple subjobs in a single job, only the first subjob will set the debug mode on. Note that you can turn the debug mode on/off by using pbook after jobs are submitted", +) +group_output.add_argument( + "--addNthFieldOfInDSToLFN", + action="store", + dest="addNthFieldOfInDSToLFN", + default="", + type=str, + help="A middle name is added to LFNs of output files when they are produced from one dataset in the input container or input dataset list. The middle name is extracted from the dataset name. E.g., if --addNthFieldOfInDSToLFN=2 and the dataset name is data10_7TeV.00160387.physics_Muon..., 00160387 is extracted and LFN is something like user.hoge.TASKID.00160387.blah. Concatenate multiple field numbers with commas if necessary, e.g., --addNthFieldOfInDSToLFN=2,6.", +) +group_output.add_argument( + "--addNthFieldOfInFileToLFN", + action="store", + dest="addNthFieldOfInFileToLFN", + default="", + type=str, + help="A middle name is added to LFNs of output files similarly as --addNthFieldOfInDSToLFN, but strings are extracted from input file names", +) +group_job.add_argument( + "--useAMIEventLevelSplit", + action="store_const", + const=True, + dest="useAMIEventLevelSplit", + default=None, + help="retrive the number of events per file from AMI to split the job using --nEventsPerJob", +) +group_output.add_argument( + "--appendStrToExtStream", + action="store_const", + const=True, + dest="appendStrToExtStream", + default=False, + help="append the first part of filenames to extra stream names for --individualOutDS. E.g., if this option is used together with --individualOutDS, %%OUT.AOD.pool.root will be contained in an EXT0_AOD dataset instead of an EXT0 dataset", +) +group_output.add_argument( + "--mergeOutput", + action="store_const", + const=True, + dest="mergeOutput", + default=False, + help="merge output files", +) +group_output.add_argument( + "--mergeLog", + action="store_const", + const=True, + dest="mergeLog", + default=False, + help="merge log files. relevant only with --mergeOutput", +) +action = group_job.add_argument( + "--mergeScript", + action="store", + dest="mergeScript", + default="", + type=str, + help="Specify user-defied script or execution string for output merging", +) group_output.shareWithMe(action) -group_job.add_argument('--useCommonHalo', action='store_const', const=False, dest='useCommonHalo', default=True, - help="use an integrated DS for BeamHalo") -group_input.add_argument('--beamHaloDS', action='store', dest='beamHaloDS', default='', - type=str, help='Dataset name for beam halo') -group_input.add_argument('--beamHaloADS', action='store', dest='beamHaloADS', default='', - type=str, help='Dataset name for beam halo A-side') -group_input.add_argument('--beamHaloCDS', action='store', dest='beamHaloCDS', default='', - type=str, help='Dataset name for beam halo C-side') -group_job.add_argument('--nBeamHalo', action='store', dest='nBeamHalo', default=-1, - type=int, help='Number of beam halo files per sub job') -group_job.add_argument('--nBeamHaloA', action='store', dest='nBeamHaloA', default=-1, - type=int, help='Number of beam halo files for A-side per sub job') -group_job.add_argument('--nBeamHaloC', action='store', dest='nBeamHaloC', default=-1, - type=int, help='Number of beam halo files for C-side per sub job') -group_job.add_argument('--useCommonGas', action='store_const', const=False, dest='useCommonGas', default=True, - help="use an integrated DS for BeamGas") -group_input.add_argument('--beamGasDS', action='store', dest='beamGasDS', default='', - type=str, help='Dataset name for beam gas') -group_input.add_argument('--beamGasHDS', action='store', dest='beamGasHDS', default='', - type=str, help='Dataset name for beam gas Hydrogen') -group_input.add_argument('--beamGasCDS', action='store', dest='beamGasCDS', default='', - type=str, help='Dataset name for beam gas Carbon') -group_input.add_argument('--beamGasODS', action='store', dest='beamGasODS', default='', - type=str, help='Dataset name for beam gas Oxygen') -group_job.add_argument('--nBeamGas', action='store', dest='nBeamGas', default=-1, - type=int, help='Number of beam gas files per sub job') -group_job.add_argument('--nBeamGasH', action='store', dest='nBeamGasH', default=-1, - type=int, help='Number of beam gas files for Hydrogen per sub job') -group_job.add_argument('--nBeamGasC', action='store', dest='nBeamGasC', default=-1, - type=int, help='Number of beam gas files for Carbon per sub job') -group_job.add_argument('--nBeamGasO', action='store', dest='nBeamGasO', default=-1, - type=int, help='Number of beam gas files for Oxygen per sub job') -group_output.add_argument('--outDS', action='store', dest='outDS', default='', - type=str, help='Name of an output dataset. OUTDS will contain all output files') -group_output.add_argument('--destSE',action='store', dest='destSE',default='', - type=str, help='Destination strorage element') -group_input.add_argument('--nFiles', '--nfiles', action='store', dest='nfiles', default=0, - type=int, help='Use an limited number of files in the input dataset') -group_print.add_argument('-v', action='store_const', const=True, dest='verbose', default=False, - help='Verbose') -group_submit.add_argument('--noEmail', action='store_const', const=True, dest='noEmail', default=False, - help='Suppress email notification') -group_pathena.add_argument('--update', action='store_const', const=True, dest='update', default=False, - help='Update panda-client to the latest version') -group_build.add_argument('--noBuild', action='store_const', const=True, dest='noBuild', default=False, - help='Skip buildJob') -group_submit.add_argument('--bulkSubmission', action='store_const', const=True, dest='bulkSubmission', default=False, - help='Bulk submit tasks. When this option is used, --inOutDsJson is required while --inDS and --outDS are ignored. It is possible to use %%DATASET_IN and %%DATASET_OUT in --trf which are replaced with actual dataset names when tasks are submitted, and %%BULKSEQNUMBER which is replaced with a sequential number of tasks in the bulk submission') -group_build.add_argument('--noCompile', action='store_const',const=True,dest='noCompile',default=False, - help='Just upload a tarball in the build step to avoid the tighter size limit imposed by --noBuild. The tarball contains binaries compiled on your local computer, so that compilation is skipped in the build step on remote WN') -action = group_output.add_argument('--noOutput', action='store_const', const=True, dest='noOutput', default=False, - help='Send job even if there is no output file') +group_job.add_argument( + "--useCommonHalo", + action="store_const", + const=False, + dest="useCommonHalo", + default=True, + help="use an integrated DS for BeamHalo", +) +group_input.add_argument( + "--beamHaloDS", + action="store", + dest="beamHaloDS", + default="", + type=str, + help="Dataset name for beam halo", +) +group_input.add_argument( + "--beamHaloADS", + action="store", + dest="beamHaloADS", + default="", + type=str, + help="Dataset name for beam halo A-side", +) +group_input.add_argument( + "--beamHaloCDS", + action="store", + dest="beamHaloCDS", + default="", + type=str, + help="Dataset name for beam halo C-side", +) +group_job.add_argument( + "--nBeamHalo", + action="store", + dest="nBeamHalo", + default=-1, + type=int, + help="Number of beam halo files per sub job", +) +group_job.add_argument( + "--nBeamHaloA", + action="store", + dest="nBeamHaloA", + default=-1, + type=int, + help="Number of beam halo files for A-side per sub job", +) +group_job.add_argument( + "--nBeamHaloC", + action="store", + dest="nBeamHaloC", + default=-1, + type=int, + help="Number of beam halo files for C-side per sub job", +) +group_job.add_argument( + "--useCommonGas", + action="store_const", + const=False, + dest="useCommonGas", + default=True, + help="use an integrated DS for BeamGas", +) +group_input.add_argument( + "--beamGasDS", + action="store", + dest="beamGasDS", + default="", + type=str, + help="Dataset name for beam gas", +) +group_input.add_argument( + "--beamGasHDS", + action="store", + dest="beamGasHDS", + default="", + type=str, + help="Dataset name for beam gas Hydrogen", +) +group_input.add_argument( + "--beamGasCDS", + action="store", + dest="beamGasCDS", + default="", + type=str, + help="Dataset name for beam gas Carbon", +) +group_input.add_argument( + "--beamGasODS", + action="store", + dest="beamGasODS", + default="", + type=str, + help="Dataset name for beam gas Oxygen", +) +group_job.add_argument( + "--nBeamGas", + action="store", + dest="nBeamGas", + default=-1, + type=int, + help="Number of beam gas files per sub job", +) +group_job.add_argument( + "--nBeamGasH", + action="store", + dest="nBeamGasH", + default=-1, + type=int, + help="Number of beam gas files for Hydrogen per sub job", +) +group_job.add_argument( + "--nBeamGasC", + action="store", + dest="nBeamGasC", + default=-1, + type=int, + help="Number of beam gas files for Carbon per sub job", +) +group_job.add_argument( + "--nBeamGasO", + action="store", + dest="nBeamGasO", + default=-1, + type=int, + help="Number of beam gas files for Oxygen per sub job", +) +group_output.add_argument( + "--outDS", + action="store", + dest="outDS", + default="", + type=str, + help="Name of an output dataset. OUTDS will contain all output files", +) +group_output.add_argument( + "--destSE", + action="store", + dest="destSE", + default="", + type=str, + help="Destination strorage element", +) +group_input.add_argument( + "--nFiles", + "--nfiles", + action="store", + dest="nfiles", + default=0, + type=int, + help="Use an limited number of files in the input dataset", +) +group_print.add_argument( + "-v", + action="store_const", + const=True, + dest="verbose", + default=False, + help="Verbose", +) +group_submit.add_argument( + "--noEmail", + action="store_const", + const=True, + dest="noEmail", + default=False, + help="Suppress email notification", +) +group_pathena.add_argument( + "--update", + action="store_const", + const=True, + dest="update", + default=False, + help="Update panda-client to the latest version", +) +group_build.add_argument( + "--noBuild", + action="store_const", + const=True, + dest="noBuild", + default=False, + help="Skip buildJob", +) +group_submit.add_argument( + "--bulkSubmission", + action="store_const", + const=True, + dest="bulkSubmission", + default=False, + help="Bulk submit tasks. When this option is used, --inOutDsJson is required while --inDS and --outDS are ignored. It is possible to use %%DATASET_IN and %%DATASET_OUT in --trf which are replaced with actual dataset names when tasks are submitted, and %%BULKSEQNUMBER which is replaced with a sequential number of tasks in the bulk submission", +) +group_build.add_argument( + "--noCompile", + action="store_const", + const=True, + dest="noCompile", + default=False, + help="Just upload a tarball in the build step to avoid the tighter size limit imposed by --noBuild. The tarball contains binaries compiled on your local computer, so that compilation is skipped in the build step on remote WN", +) +action = group_output.add_argument( + "--noOutput", + action="store_const", + const=True, + dest="noOutput", + default=False, + help="Send job even if there is no output file", +) group_submit.shareWithMe(action) -group_input.add_argument('--noRandom', action='store_const', const=True, dest='norandom', default=False, - help='Enter random seeds manually') -group_job.add_argument('--useAMIAutoConf',action='store_const',const=True,dest='useAMIAutoConf',default=False, - help='Use AMI for AutoConfiguration') -group_submit.add_argument('--memory', action='store', dest='memory', default=-1, type=int, - help='Required memory size in MB per core. e.g., for 1GB per core --memory 1024') -group_submit.add_argument('--fixedRamCount', action='store_const', const=True, dest='fixedRamCount', default=False, - help='Use fixed memory size instead of estimated memory size') -group_submit.add_argument('--outDiskCount', action='store', dest='outDiskCount', default=None, type=int, - help="Expected output size in kB per 1 MB of input. The system automatically calculates this " - "value using successful jobs and the value contains a safety offset (100kB). " - "Use this option to disable it when jobs cannot have enough input files " - "due to the offset") -group_submit.add_argument('--nCore', action='store', dest='nCore', default=-1, - type=int, help='The number of CPU cores. Note that the system distinguishes only nCore=1 and nCore>1. This means that even if you set nCore=2 jobs can go to sites with nCore=8 and your application must use the 8 cores there. The number of available cores is defined in an environment variable, $ATHENA_PROC_NUMBER, on WNs. Your application must check the env variable when starting up to dynamically change the number of cores') -action = group_job.add_argument('--nThreads', action='store', dest='nThreads', default=-1, - type=int, help='The number of threads for AthenaMT. If this option is set to larger than 1, Athena is executed with --threads=$ATHENA_PROC_NUMBER at sites which have nCore>1. This means that even if you set nThreads=2 jobs can go to sites with nCore=8 and your application will use the 8 cores there') +group_input.add_argument( + "--noRandom", + action="store_const", + const=True, + dest="norandom", + default=False, + help="Enter random seeds manually", +) +group_job.add_argument( + "--useAMIAutoConf", + action="store_const", + const=True, + dest="useAMIAutoConf", + default=False, + help="Use AMI for AutoConfiguration", +) +group_submit.add_argument( + "--memory", + action="store", + dest="memory", + default=-1, + type=int, + help="Required memory size in MB per core. e.g., for 1GB per core --memory 1024", +) +group_submit.add_argument( + "--fixedRamCount", + action="store_const", + const=True, + dest="fixedRamCount", + default=False, + help="Use fixed memory size instead of estimated memory size", +) +group_submit.add_argument( + "--outDiskCount", + action="store", + dest="outDiskCount", + default=None, + type=int, + help="Expected output size in kB per 1 MB of input. The system automatically calculates this " + "value using successful jobs and the value contains a safety offset (100kB). " + "Use this option to disable it when jobs cannot have enough input files " + "due to the offset", +) +group_submit.add_argument( + "--nCore", + action="store", + dest="nCore", + default=-1, + type=int, + help="The number of CPU cores. Note that the system distinguishes only nCore=1 and nCore>1. This means that even if you set nCore=2 jobs can go to sites with nCore=8 and your application must use the 8 cores there. The number of available cores is defined in an environment variable, $ATHENA_PROC_NUMBER, on WNs. Your application must check the env variable when starting up to dynamically change the number of cores", +) +action = group_job.add_argument( + "--nThreads", + action="store", + dest="nThreads", + default=-1, + type=int, + help="The number of threads for AthenaMT. If this option is set to larger than 1, Athena is executed with --threads=$ATHENA_PROC_NUMBER at sites which have nCore>1. This means that even if you set nThreads=2 jobs can go to sites with nCore=8 and your application will use the 8 cores there", +) group_submit.shareWithMe(action) -group_input.add_argument('--forceStaged', action='store_const', const=True, dest='forceStaged', default=False, - help='Force files from primary DS to be staged to local disk, even if direct-access is possible') -group_input.add_argument('--avoidVP', action='store_const', const=True, dest='avoidVP', default=False, - help='Not to use sites where virtual placement is enabled') -group_submit.add_argument('--maxCpuCount', action='store', dest='maxCpuCount', default=0, type=int, - help=argparse.SUPPRESS) -group_expert.add_argument('--noLoopingCheck', action='store_const', const=True, dest='noLoopingCheck', default=False, - help="Disable looping job check") -group_output.add_argument('--official', action='store_const', const=True, dest='official', default=False, - help='Produce official dataset') -action = group_job.add_argument('--unlimitNumOutputs', action='store_const', const=True, dest='unlimitNumOutputs', default=False, - help='Remove the limit on the number of outputs. Note that having too many outputs per job causes a severe load on the system. You may be banned if you carelessly use this option') +group_input.add_argument( + "--forceStaged", + action="store_const", + const=True, + dest="forceStaged", + default=False, + help="Force files from primary DS to be staged to local disk, even if direct-access is possible", +) +group_input.add_argument( + "--avoidVP", + action="store_const", + const=True, + dest="avoidVP", + default=False, + help="Not to use sites where virtual placement is enabled", +) +group_submit.add_argument( + "--maxCpuCount", + action="store", + dest="maxCpuCount", + default=0, + type=int, + help=argparse.SUPPRESS, +) +group_expert.add_argument( + "--noLoopingCheck", + action="store_const", + const=True, + dest="noLoopingCheck", + default=False, + help="Disable looping job check", +) +group_output.add_argument( + "--official", + action="store_const", + const=True, + dest="official", + default=False, + help="Produce official dataset", +) +action = group_job.add_argument( + "--unlimitNumOutputs", + action="store_const", + const=True, + dest="unlimitNumOutputs", + default=False, + help="Remove the limit on the number of outputs. Note that having too many outputs per job causes a severe load on the system. You may be banned if you carelessly use this option", +) group_output.shareWithMe(action) -group_output.add_argument('--descriptionInLFN',action='store',dest='descriptionInLFN',default='', - help='LFN is user.nickname.jobsetID.something (e.g. user.harumaki.12345.AOD._00001.pool) by default. This option allows users to put a description string into LFN. i.e., user.nickname.jobsetID.description.something') -group_build.add_argument('--extFile', action='store', dest='extFile', default='', - help='pathena exports files with some special extensions (.C, .dat, .py .xml) in the current directory. If you want to add other files, specify their names, e.g., data1.root,data2.doc') -group_build.add_argument('--excludeFile',action='store',dest='excludeFile',default='', - help='specify a comma-separated string to exclude files and/or directories when gathering files in local working area. Either \ or "" is required when a wildcard is used. e.g., doc,\*.C') -group_output.add_argument('--extOutFile', action='store', dest='extOutFile', default='', - help='A comma-separated list of extra output files which cannot be extracted automatically. Either \ or "" is required when a wildcard is used. e.g., output1.txt,output2.dat,JiveXML_\*.xml') -group_output.add_argument('--supStream', action='store', dest='supStream', default='', - help='suppress some output streams. Either \ or "" is required when a wildcard is used. e.g., ESD,TAG,GLOBAL,StreamDESD\* ') -group_build.add_argument('--gluePackages', action='store', dest='gluePackages', default='', - help='list of glue packages which pathena cannot find due to empty i686-slc4-gcc34-opt. e.g., External/AtlasHepMC,External/Lhapdf') -action = group_job.add_argument('--allowNoOutput',action='store',dest='allowNoOutput',default='',type=str, - help='A comma-separated list of regexp patterns. Output files are allowed not to be produced if their filenames match with one of regexp patterns. Jobs go to finished even if they are not produced on WN') +group_output.add_argument( + "--descriptionInLFN", + action="store", + dest="descriptionInLFN", + default="", + help="LFN is user.nickname.jobsetID.something (e.g. user.harumaki.12345.AOD._00001.pool) by default. This option allows users to put a description string into LFN. i.e., user.nickname.jobsetID.description.something", +) +group_build.add_argument( + "--extFile", + action="store", + dest="extFile", + default="", + help="pathena exports files with some special extensions (.C, .dat, .py .xml) in the current directory. If you want to add other files, specify their names, e.g., data1.root,data2.doc", +) +group_build.add_argument( + "--excludeFile", + action="store", + dest="excludeFile", + default="", + help='specify a comma-separated string to exclude files and/or directories when gathering files in local working area. Either \ or "" is required when a wildcard is used. e.g., doc,\*.C', +) +group_output.add_argument( + "--extOutFile", + action="store", + dest="extOutFile", + default="", + help='A comma-separated list of extra output files which cannot be extracted automatically. Either \ or "" is required when a wildcard is used. e.g., output1.txt,output2.dat,JiveXML_\*.xml', +) +group_output.add_argument( + "--supStream", + action="store", + dest="supStream", + default="", + help='suppress some output streams. Either \ or "" is required when a wildcard is used. e.g., ESD,TAG,GLOBAL,StreamDESD\* ', +) +group_build.add_argument( + "--gluePackages", + action="store", + dest="gluePackages", + default="", + help="list of glue packages which pathena cannot find due to empty i686-slc4-gcc34-opt. e.g., External/AtlasHepMC,External/Lhapdf", +) +action = group_job.add_argument( + "--allowNoOutput", + action="store", + dest="allowNoOutput", + default="", + type=str, + help="A comma-separated list of regexp patterns. Output files are allowed not to be produced if their filenames match with one of regexp patterns. Jobs go to finished even if they are not produced on WN", +) group_output.shareWithMe(action) -group_submit.add_argument('--excludedSite', action='append', dest='excludedSite', default=[], - help="A comma-separated list of sites which are not used for site section, " - "e.g., ABC,OPQ*,XYZ which excludes ABC, XYZ, and OPQ due to the wildcard") -group_submit.add_argument('--noSubmit', action='store_const', const=True, dest='noSubmit', default=False, - help="Don't submit jobs") -group_submit.add_argument('--prodSourceLabel', action='store', dest='prodSourceLabel', default='', - help="set prodSourceLabel") -group_submit.add_argument('--processingType', action='store', dest='processingType', default='pathena', - help="set processingType") -group_submit.add_argument('--workingGroup', action='store', dest='workingGroup', default=None, - help="set workingGroup") -group_input.add_argument('--generalInput', action='store_const', const=True, dest='generalInput', default=False, - help='Read input files with general format except POOL,ROOT,ByteStream') -group_build.add_argument('--tmpDir', action='store', dest='tmpDir', default='', - type=str, help='Temporary directory in which an archive file is created') -group_input.add_argument('--shipInput', action='store_const', const=True, dest='shipinput', default=False, - help='Ship input files to remote WNs') -group_submit.add_argument('--disableAutoRetry',action='store_const',const=True,dest='disableAutoRetry',default=False, - help='disable automatic job retry on the server side') -group_input.add_argument('--fileList', action='store', dest='filelist', default='', - type=str, help=argparse.SUPPRESS) -group_build.add_argument('--dbRelease', action='store', dest='dbRelease', default='', - type=str, help='DBRelease or CDRelease (DatasetName:FileName). e.g., ddo.000001.Atlas.Ideal.DBRelease.v050101:DBRelease-5.1.1.tar.gz. If --dbRelease=LATEST, the latest DBRelease is used') -group_build.add_argument('--addPoolFC', action='store', dest='addPoolFC', default='', - help="file names to be inserted into PoolFileCatalog.xml except input files. e.g., MyCalib1.root,MyGeom2.root") -group_input.add_argument('--inputFileList', action='store', dest='inputFileList', default='', - type=str, help='A local file which specifies names of files to be used in the input dataset. ' - 'One filename per line in the the local file') -group_build.add_argument('--voms', action='store', dest='vomsRoles', default=None, type=str, - help="generate proxy with paticular roles. e.g., atlas:/atlas/ca/Role=production,atlas:/atlas/fr/Role=pilot") -group_job.add_argument('--useNextEvent', action='store_const', const=True, dest='useNextEvent', default=False, - help="Set this option if your jobO uses theApp.nextEvent(), e.g. for G4. Note that this option is not required when you run transformations using --trf") -group_job.add_argument('--trf', action='store', dest='trf', default=False, - help='run transformation, e.g. --trf "csc_atlfast_trf.py %%IN %%OUT.AOD.root %%OUT.ntuple.root -1 0"') -group_output.add_argument('--spaceToken', action='store', dest='spaceToken', default='', - type=str, help='spacetoken for outputs. e.g., ATLASLOCALGROUPDISK') -group_input.add_argument('--notSkipMissing', action='store_const', const=True, dest='notSkipMissing', default=False, - help='If input files are not read from SE, they will be skipped by default. This option disables the functionality') -group_input.add_argument('--forceDirectIO', action='store_const', const=True, dest='forceDirectIO', default=False, - help="Use directIO if directIO is available at the site ") -group_expert.add_argument('--expertOnly_skipScout', action='store_const',const=True,dest='skipScout',default=False, - help=argparse.SUPPRESS) -group_job.add_argument('--respectSplitRule', action='store_const',const=True,dest='respectSplitRule',default=False, - help="force scout jobs to follow split rules like nGBPerJob") -group_expert.add_argument('--devSrv', action='store_const', const=True, dest='devSrv', default=False, - help="Please don't use this option. Only for developers to use the dev panda server") -group_expert.add_argument('--intrSrv', action='store_const', const=True, dest='intrSrv', default=False, - help="Please don't use this option. Only for developers to use the intr panda server") -group_input.add_argument('--inputType', action='store', dest='inputType', default='', - type=str, help='A regular expression pattern. Only files matching with the pattern in input dataset are used') -group_build.add_argument('--outTarBall', action='store', dest='outTarBall', default='', - type=str, help='Save a gzipped tarball of local files which is the input to buildXYZ') -group_build.add_argument('--inTarBall', action='store', dest='inTarBall', default='', - type=str, help='Use a gzipped tarball of local files as input to buildXYZ. Generall the tarball is created by using --outTarBall') -group_config.add_argument('--outRunConfig', action='store', dest='outRunConfig', default='', - type=str, help='Save extracted config information to a local file') -group_config.add_argument('--inRunConfig', action='store', dest='inRunConfig', default='', - type=str, help='Use a saved config information to skip config extraction') -group_input.add_argument('--pfnList', action='store', dest='pfnList', default='', - type=str, help='Name of file which contains a list of input PFNs. Those files can be un-registered in DDM') -group_build.add_argument('--cmtConfig', action='store', dest='cmtConfig', default=None, - help='CMTCONFIG is extracted from local environment variables when tasks are submitted, ' - 'to set up the same environment on remote worker-nodes. ' - 'This option allows to set up another CMTCONFIG ' - 'remotely. e.g., --cmtConfig x86_64-slc5-gcc43-opt.') -group_output.add_argument('--allowTaskDuplication',action='store_const',const=True,dest='allowTaskDuplication',default=False, - help="As a general rule each task has a unique outDS and history of file usage is recorded per task. This option allows multiple tasks to contribute to the same outDS. Typically useful to submit a new task with the outDS which was used by another broken task. Use this option very carefully at your own risk, since file duplication happens when the second task runs on the same input which the first task successfully processed") -group_input.add_argument('--skipFilesUsedBy', action='store', dest='skipFilesUsedBy', default='', - type=str, help='A comma-separated list of TaskIDs. Files used by those tasks are skipped when running a new task') -group_submit.add_argument('--maxAttempt', action='store', dest='maxAttempt', default=-1, - type=int, help='Maximum number of reattempts for each job (3 by default and not larger than 50)') -group_containerJob.add_argument('--containerImage', action='store', dest='containerImage', default='', - type=str, help="Name of a container image") -group_containerJob.add_argument('--architecture', action='store', dest='architecture', default='', - help="Base OS platform, CPU, and/or GPU requirements. " - "The format is @base_platform#CPU_spec&GPU_spec " - "where base platform, CPU, or GPU spec can be omitted. " - "If base platform is not specified it is automatically taken from " - "$ALRB_USER_PLATFORM. " - "CPU_spec = architecture<-vendor<-instruction set>>, " - "GPU_spec = vendor<-model>. A wildcards can be used if there is no special " - "requirement for the attribute. E.g., #x86_64-*-avx2&nvidia to ask for x86_64 " - "CPU with avx2 support and nvidia GPU") -group_build.add_argument("-3", action="store_true", dest="python3", default=False, - help="Use python3") -group_input.add_argument('--respectLB', action='store_const', const=True, dest='respectLB', default=False, - help='To generate jobs repecting lumiblock boundaries') +group_submit.add_argument( + "--excludedSite", + action="append", + dest="excludedSite", + default=[], + help="A comma-separated list of sites which are not used for site section, " + "e.g., ABC,OPQ*,XYZ which excludes ABC, XYZ, and OPQ due to the wildcard", +) +group_submit.add_argument( + "--noSubmit", + action="store_const", + const=True, + dest="noSubmit", + default=False, + help="Don't submit jobs", +) +group_submit.add_argument( + "--prodSourceLabel", + action="store", + dest="prodSourceLabel", + default="", + help="set prodSourceLabel", +) +group_submit.add_argument( + "--processingType", + action="store", + dest="processingType", + default="pathena", + help="set processingType", +) +group_submit.add_argument( + "--workingGroup", + action="store", + dest="workingGroup", + default=None, + help="set workingGroup", +) +group_input.add_argument( + "--generalInput", + action="store_const", + const=True, + dest="generalInput", + default=False, + help="Read input files with general format except POOL,ROOT,ByteStream", +) +group_build.add_argument( + "--tmpDir", + action="store", + dest="tmpDir", + default="", + type=str, + help="Temporary directory in which an archive file is created", +) +group_input.add_argument( + "--shipInput", + action="store_const", + const=True, + dest="shipinput", + default=False, + help="Ship input files to remote WNs", +) +group_submit.add_argument( + "--disableAutoRetry", + action="store_const", + const=True, + dest="disableAutoRetry", + default=False, + help="disable automatic job retry on the server side", +) +group_input.add_argument( + "--fileList", + action="store", + dest="filelist", + default="", + type=str, + help=argparse.SUPPRESS, +) +group_build.add_argument( + "--dbRelease", + action="store", + dest="dbRelease", + default="", + type=str, + help="DBRelease or CDRelease (DatasetName:FileName). e.g., ddo.000001.Atlas.Ideal.DBRelease.v050101:DBRelease-5.1.1.tar.gz. If --dbRelease=LATEST, the latest DBRelease is used", +) +group_build.add_argument( + "--addPoolFC", + action="store", + dest="addPoolFC", + default="", + help="file names to be inserted into PoolFileCatalog.xml except input files. e.g., MyCalib1.root,MyGeom2.root", +) +group_input.add_argument( + "--inputFileList", + action="store", + dest="inputFileList", + default="", + type=str, + help="A local file which specifies names of files to be used in the input dataset. " "One filename per line in the the local file", +) +group_build.add_argument( + "--voms", + action="store", + dest="vomsRoles", + default=None, + type=str, + help="generate proxy with paticular roles. e.g., atlas:/atlas/ca/Role=production,atlas:/atlas/fr/Role=pilot", +) +group_job.add_argument( + "--useNextEvent", + action="store_const", + const=True, + dest="useNextEvent", + default=False, + help="Set this option if your jobO uses theApp.nextEvent(), e.g. for G4. Note that this option is not required when you run transformations using --trf", +) +group_job.add_argument( + "--trf", + action="store", + dest="trf", + default=False, + help='run transformation, e.g. --trf "csc_atlfast_trf.py %%IN %%OUT.AOD.root %%OUT.ntuple.root -1 0"', +) +group_output.add_argument( + "--spaceToken", + action="store", + dest="spaceToken", + default="", + type=str, + help="spacetoken for outputs. e.g., ATLASLOCALGROUPDISK", +) +group_input.add_argument( + "--notSkipMissing", + action="store_const", + const=True, + dest="notSkipMissing", + default=False, + help="If input files are not read from SE, they will be skipped by default. This option disables the functionality", +) +group_input.add_argument( + "--forceDirectIO", + action="store_const", + const=True, + dest="forceDirectIO", + default=False, + help="Use directIO if directIO is available at the site ", +) +group_expert.add_argument( + "--expertOnly_skipScout", + action="store_const", + const=True, + dest="skipScout", + default=False, + help=argparse.SUPPRESS, +) +group_job.add_argument( + "--respectSplitRule", + action="store_const", + const=True, + dest="respectSplitRule", + default=False, + help="force scout jobs to follow split rules like nGBPerJob", +) +group_expert.add_argument( + "--devSrv", + action="store_const", + const=True, + dest="devSrv", + default=False, + help="Please don't use this option. Only for developers to use the dev panda server", +) +group_expert.add_argument( + "--intrSrv", + action="store_const", + const=True, + dest="intrSrv", + default=False, + help="Please don't use this option. Only for developers to use the intr panda server", +) +group_input.add_argument( + "--inputType", + action="store", + dest="inputType", + default="", + type=str, + help="A regular expression pattern. Only files matching with the pattern in input dataset are used", +) +group_build.add_argument( + "--outTarBall", + action="store", + dest="outTarBall", + default="", + type=str, + help="Save a gzipped tarball of local files which is the input to buildXYZ", +) +group_build.add_argument( + "--inTarBall", + action="store", + dest="inTarBall", + default="", + type=str, + help="Use a gzipped tarball of local files as input to buildXYZ. Generall the tarball is created by using --outTarBall", +) +group_config.add_argument( + "--outRunConfig", + action="store", + dest="outRunConfig", + default="", + type=str, + help="Save extracted config information to a local file", +) +group_config.add_argument( + "--inRunConfig", + action="store", + dest="inRunConfig", + default="", + type=str, + help="Use a saved config information to skip config extraction", +) +group_input.add_argument( + "--pfnList", + action="store", + dest="pfnList", + default="", + type=str, + help="Name of file which contains a list of input PFNs. Those files can be un-registered in DDM", +) +group_build.add_argument( + "--cmtConfig", + action="store", + dest="cmtConfig", + default=None, + help="CMTCONFIG is extracted from local environment variables when tasks are submitted, " + "to set up the same environment on remote worker-nodes. " + "This option allows to set up another CMTCONFIG " + "remotely. e.g., --cmtConfig x86_64-slc5-gcc43-opt.", +) +group_output.add_argument( + "--allowTaskDuplication", + action="store_const", + const=True, + dest="allowTaskDuplication", + default=False, + help="As a general rule each task has a unique outDS and history of file usage is recorded per task. This option allows multiple tasks to contribute to the same outDS. Typically useful to submit a new task with the outDS which was used by another broken task. Use this option very carefully at your own risk, since file duplication happens when the second task runs on the same input which the first task successfully processed", +) +group_input.add_argument( + "--skipFilesUsedBy", + action="store", + dest="skipFilesUsedBy", + default="", + type=str, + help="A comma-separated list of TaskIDs. Files used by those tasks are skipped when running a new task", +) +group_submit.add_argument( + "--maxAttempt", + action="store", + dest="maxAttempt", + default=-1, + type=int, + help="Maximum number of reattempts for each job (3 by default and not larger than 50)", +) +group_submit.add_argument( + "-y", + action="store_true", + dest="is_confirmed", + default=False, + help="Answer yes for all questions", +) +group_containerJob.add_argument( + "--containerImage", + action="store", + dest="containerImage", + default="", + type=str, + help="Name of a container image", +) +group_containerJob.add_argument( + "--architecture", + action="store", + dest="architecture", + default="", + help="Base OS platform, CPU, and/or GPU requirements. " + "The format is @base_platform#CPU_spec&GPU_spec " + "where base platform, CPU, or GPU spec can be omitted. " + "If base platform is not specified it is automatically taken from " + "$ALRB_USER_PLATFORM. " + "CPU_spec = architecture<-vendor<-instruction set>>, " + "GPU_spec = vendor<-model>. A wildcards can be used if there is no special " + "requirement for the attribute. E.g., #x86_64-*-avx2&nvidia to ask for x86_64 " + "CPU with avx2 support and nvidia GPU", +) +group_build.add_argument("-3", action="store_true", dest="python3", default=False, help="Use python3") +group_input.add_argument( + "--respectLB", + action="store_const", + const=True, + dest="respectLB", + default=False, + help="To generate jobs repecting lumiblock boundaries", +) # athena options -group_job.add_argument('-c',action='store',dest='singleLine',type=str,default='',metavar='COMMAND', - help='One-liner, runs before any jobOs') -group_job.add_argument('-p',action='store',dest='preConfig',type=str,default='',metavar='BOOTSTRAP', - help='location of bootstrap file') -group_job.add_argument('-s',action='store_const',const=True,dest='codeTrace',default=False, - help='show printout of included files') - -group_expert.add_argument('--queueData', action='store', dest='queueData', default='', - type=str, help="Please don't use this option. Only for developers") -group_submit.add_argument('--useNewCode',action='store_const',const=True,dest='useNewCode',default=False, - help='When task are resubmitted with the same outDS, the original souce code is used to re-run on failed/unprocessed files. This option uploads new source code so that jobs will run with new binaries') -group_config.add_argument('--loadJson', action='store', dest='loadJson',default=None, - help="Read command-line parameters from a json file which contains a dict of {parameter: value}. Arguemnts for Athena can be specified as {'atehna_args': [...,]}") -group_config.add_argument('--dumpJson', action='store', dest='dumpJson', default=None, - help='Dump all command-line parameters and submission result such as returnCode, returnOut, jediTaskID, and bulkSeqNumber if --bulkSubmission is used, to a json file') -group_config.add_argument('--parentTaskID', '--parentTaskID', action='store', dest='parentTaskID', default=None, - type=int, - help='Set taskID of the paranet task to execute the task while the parent is still running') -group_submit.add_argument('--priority', action='store', dest='priority', default=None, type=int, - help='Set priority of the task (1000 by default). The value must be between 900 and 1100. ' \ - 'Note that priorities of tasks are relevant only in ' \ - "each user's share, i.e., your tasks cannot jump over other user's tasks " \ - 'even if you give higher priorities.') -group_submit.add_argument('--osMatching', action='store_const', const=True, dest='osMatching', default=False, - help='To let the brokerage choose sites which have the same OS as the local machine has.') -group_job.add_argument('--cpuTimePerEvent', action='store', dest='cpuTimePerEvent', default=-1, type=int, - help='Expected HS06 seconds per event (~= 10 * the expected duration per event in seconds)') -group_job.add_argument('--fixedCpuTime', action='store_const', const=True, dest='fixedCpuTime', default=False, - help='Use fixed cpuTime instead of estimated cpuTime') -group_job.add_argument('--maxWalltime', action='store', dest='maxWalltime', default=0, type=int, - help='Max walltime for each job in hours. Note that this option works only ' \ - 'when the nevents metadata of input files are available in rucio') +group_job.add_argument( + "-c", + action="store", + dest="singleLine", + type=str, + default="", + metavar="COMMAND", + help="One-liner, runs before any jobOs", +) +group_job.add_argument( + "-p", + action="store", + dest="preConfig", + type=str, + default="", + metavar="BOOTSTRAP", + help="location of bootstrap file", +) +group_job.add_argument( + "-s", + action="store_const", + const=True, + dest="codeTrace", + default=False, + help="show printout of included files", +) + +group_expert.add_argument( + "--queueData", + action="store", + dest="queueData", + default="", + type=str, + help="Please don't use this option. Only for developers", +) +group_submit.add_argument( + "--useNewCode", + action="store_const", + const=True, + dest="useNewCode", + default=False, + help="When task are resubmitted with the same outDS, the original souce code is used to re-run on failed/unprocessed files. This option uploads new source code so that jobs will run with new binaries", +) +group_config.add_argument( + "--loadJson", + action="store", + dest="loadJson", + default=None, + help="Read command-line parameters from a json file which contains a dict of {parameter: value}. Arguemnts for Athena can be specified as {'atehna_args': [...,]}", +) +group_config.add_argument( + "--dumpJson", + action="store", + dest="dumpJson", + default=None, + help="Dump all command-line parameters and submission result such as returnCode, returnOut, jediTaskID, and bulkSeqNumber if --bulkSubmission is used, to a json file", +) +group_config.add_argument( + "--parentTaskID", + "--parentTaskID", + action="store", + dest="parentTaskID", + default=None, + type=int, + help="Set taskID of the paranet task to execute the task while the parent is still running", +) +group_submit.add_argument( + "--priority", + action="store", + dest="priority", + default=None, + type=int, + help="Set priority of the task (1000 by default). The value must be between 900 and 1100. " + "Note that priorities of tasks are relevant only in " + "each user's share, i.e., your tasks cannot jump over other user's tasks " + "even if you give higher priorities.", +) +group_submit.add_argument( + "--osMatching", + action="store_const", + const=True, + dest="osMatching", + default=False, + help="To let the brokerage choose sites which have the same OS as the local machine has.", +) +group_job.add_argument( + "--cpuTimePerEvent", + action="store", + dest="cpuTimePerEvent", + default=-1, + type=int, + help="Expected HS06 seconds per event (~= 10 * the expected duration per event in seconds)", +) +group_job.add_argument( + "--fixedCpuTime", + action="store_const", + const=True, + dest="fixedCpuTime", + default=False, + help="Use fixed cpuTime instead of estimated cpuTime", +) +group_job.add_argument( + "--maxWalltime", + action="store", + dest="maxWalltime", + default=0, + type=int, + help="Max walltime for each job in hours. Note that this option works only " "when the nevents metadata of input files are available in rucio", +) from pandaclient import MiscUtils -from pandaclient.MiscUtils import commands_get_output, commands_get_status_output, commands_get_status_output_with_env +from pandaclient.MiscUtils import ( + commands_get_output, + commands_get_status_output, + commands_get_status_output_with_env, +) # parse options # check against the removed options first for arg in sys.argv[1:]: - optName = arg.split('=',1)[0] - if optName in removedOpts: - print("!!Warning!! option %s has been deprecated, pls dont use anymore\n" % optName) - sys.argv.remove(arg) + optName = arg.split("=", 1)[0] + if optName in removedOpts: + print("!!Warning!! option %s has been deprecated, pls dont use anymore\n" % optName) + sys.argv.remove(arg) # using parse_known_args for passing arguments with - options, args = optP.parse_known_args() if options.verbose: print(options) - print('args={0}'.format(args)) - print('') + print("args={0}".format(args)) + print("") # load json -jsonExecStr = '' +jsonExecStr = "" if options.loadJson is not None: loadOpts = MiscUtils.decodeJSON(options.loadJson) for k in loadOpts: @@ -502,16 +1368,16 @@ except Exception: pass origK = k - if k == 'athena_args': + if k == "athena_args": args = v continue if not hasattr(options, k): print("ERROR: unknown parameter {0} in {1}".format(k, options.loadJson)) sys.exit(1) else: - setattr(options,k, v) + setattr(options, k, v) if v is True: - jsonExecStr += ' --{0}'.format(origK) + jsonExecStr += " --{0}".format(origK) else: if isinstance(v, (str, unicode)): jsonExecStr += " --{0}='{1}'".format(origK, v) @@ -520,18 +1386,16 @@ if options.verbose: print("options after loading json") print(options) - print('') + print("") # display version from pandaclient import PandaToolsPkgInfo + if options.version: print("Version: %s" % PandaToolsPkgInfo.release_version) sys.exit(0) -from pandaclient import Client -from pandaclient import PsubUtils -from pandaclient import AthenaUtils -from pandaclient import PLogger +from pandaclient import AthenaUtils, Client, PLogger, PsubUtils # update panda-client if options.update: @@ -551,9 +1415,9 @@ # check if unknown arg for tmp_arg in args: # separator - if tmp_arg == '--': + if tmp_arg == "--": break - if tmp_arg.startswith('-'): + if tmp_arg.startswith("-"): tmpLog.error("unrecognized argument: {0}".format(tmp_arg)) sys.exit(EC_Config) @@ -573,7 +1437,7 @@ options.noBuild = True # set noBuild for container -if options.containerImage != '': +if options.containerImage != "": options.noBuild = True # files to be deleted @@ -585,30 +1449,35 @@ # warning for PQ PsubUtils.get_warning_for_pq(options.site, options.excludedSite, tmpLog) +# warning for memory +is_confirmed = PsubUtils.get_warning_for_memory(options.memory, options.is_confirmed, tmpLog) +if not is_confirmed: + sys.exit(0) + # exclude sites if options.excludedSite != []: options.excludedSite = PsubUtils.splitCommaConcatenatedItems(options.excludedSite) # use certain sites includedSite = None -if re.search(',',options.site) is not None: +if re.search(",", options.site) is not None: includedSite = PsubUtils.splitCommaConcatenatedItems([options.site]) - options.site = 'AUTO' + options.site = "AUTO" # site specified siteSpecified = True -if options.site == 'AUTO': +if options.site == "AUTO": siteSpecified = False # list of output files which can be skipped -options.allowNoOutput = options.allowNoOutput.split(',') +options.allowNoOutput = options.allowNoOutput.split(",") # use outputPath as outDS -if not options.outDS.endswith('/'): - options.outDS = options.outDS + '/' +if not options.outDS.endswith("/"): + options.outDS = options.outDS + "/" # read datasets from file -if options.inDsTxt != '': +if options.inDsTxt != "": options.inDS = PsubUtils.readDsFromFile(options.inDsTxt) # not expand inDS when setting parent @@ -617,63 +1486,63 @@ # bulk submission ioList = [] -if options.inOutDsJson != '': +if options.inOutDsJson != "": options.bulkSubmission = True if options.bulkSubmission: - if options.inOutDsJson == '': + if options.inOutDsJson == "": tmpLog.error("--inOutDsJson is missing") sys.exit(EC_Config) - if options.eventPickEvtList != '': + if options.eventPickEvtList != "": tmpLog.error("cannnot use --eventPickEvtList and --inOutDsJson at the same time") sys.exit(EC_Config) ioList = MiscUtils.decodeJSON(options.inOutDsJson) for ioItem in ioList: - if not ioItem['outDS'].endswith('/'): - ioItem['outDS'] += '/' - options.inDS = ioList[0]['inDS'] - options.outDS = ioList[0]['outDS'] + if not ioItem["outDS"].endswith("/"): + ioItem["outDS"] += "/" + options.inDS = ioList[0]["inDS"] + options.outDS = ioList[0]["outDS"] else: - ioList = [{'inDS': options.inDS, 'outDS': options.outDS}] + ioList = [{"inDS": options.inDS, "outDS": options.outDS}] # error -if options.outDS == '': +if options.outDS == "": tmpLog.error("no outDS is given\n pathena [--inDS input] --outDS output myJobO.py") sys.exit(EC_Config) -if options.split < -1 : +if options.split < -1: tmpLog.error("Number of jobs should be a positive integer") sys.exit(EC_Config) -if options.pfnList != '': - if options.inDS != '': +if options.pfnList != "": + if options.inDS != "": tmpLog.error("--pfnList and --inDS cannot be used at the same time") sys.exit(EC_Config) if options.shipinput: tmpLog.error("--shipInput and --inDS cannot be used at the same time") sys.exit(EC_Config) - if options.site == 'AUTO': + if options.site == "AUTO": tmpLog.error("--site must be specified when --pfnList is used") sys.exit(EC_Config) # absolute path for PFN list usePfnList = False -if options.pfnList != '': +if options.pfnList != "": options.pfnList = os.path.realpath(options.pfnList) usePfnList = True # split options are mutually exclusive -if (options.nFilesPerJob > 0 and options.nEventsPerJob > 0 and options.nGBPerJob != -1): +if options.nFilesPerJob > 0 and options.nEventsPerJob > 0 and options.nGBPerJob != -1: tmpLog.error("split by files, split by events and split by file size can not be used simultaneously") sys.exit(EC_Config) # split options are mutually exclusive -if (options.nEventsPerJob > 0 and options.nGBPerJob != -1): +if options.nEventsPerJob > 0 and options.nGBPerJob != -1: tmpLog.error("split by events and split by file size can not be used simultaneously") sys.exit(EC_Config) # check nGBPerJob -if not options.nGBPerJob in [-1,'MAX']: +if not options.nGBPerJob in [-1, "MAX"]: # convert to int try: - if options.nGBPerJob != 'MAX': + if options.nGBPerJob != "MAX": options.nGBPerJob = int(options.nGBPerJob) except Exception: tmpLog.error("--nGBPerJob must be an integer or MAX") @@ -689,36 +1558,36 @@ # trf parameter if options.trf == False: - orig_trfStr = '' + orig_trfStr = "" else: orig_trfStr = options.trf # AMI event-level split if options.useAMIEventLevelSplit is None: - if options.inDS.startswith('data') or options.goodRunListXML != '': + if options.inDS.startswith("data") or options.goodRunListXML != "": # use AMI for real data since the number of events per file is not uniform options.useAMIEventLevelSplit = True else: options.useAMIEventLevelSplit = False # check DBRelease -if options.dbRelease != '' and (options.dbRelease.find(':') == -1 and options.dbRelease !='LATEST'): +if options.dbRelease != "" and (options.dbRelease.find(":") == -1 and options.dbRelease != "LATEST"): tmpLog.error("invalid argument for --dbRelease. Must be DatasetName:FileName or LATEST") sys.exit(EC_Config) # Good Run List -if options.goodRunListXML != '' and options.inDS != '': +if options.goodRunListXML != "" and options.inDS != "": tmpLog.error("cannnot use --goodRunListXML and --inDS at the same time") sys.exit(EC_Config) # event picking -if options.eventPickEvtList != '' and options.inDS != '': +if options.eventPickEvtList != "" and options.inDS != "": tmpLog.error("cannnot use --eventPickEvtList and --inDS at the same time") sys.exit(EC_Config) # param check for event picking -if options.eventPickEvtList != '': - if options.eventPickDataType == '': +if options.eventPickEvtList != "": + if options.eventPickDataType == "": tmpLog.error("--eventPickDataType must be specified") sys.exit(EC_Config) if options.trf != False: @@ -727,20 +1596,20 @@ # additional files -options.extFile = options.extFile.split(',') +options.extFile = options.extFile.split(",") try: - options.extFile.remove('') + options.extFile.remove("") except Exception: pass -options.extOutFile = re.sub(' ','',options.extOutFile) -options.extOutFile = options.extOutFile.split(',') +options.extOutFile = re.sub(" ", "", options.extOutFile) +options.extOutFile = options.extOutFile.split(",") try: - options.extOutFile.remove('') + options.extOutFile.remove("") except Exception: pass # user-specified merging script -if options.mergeScript != '': +if options.mergeScript != "": # enable merging options.mergeOutput = True # add it to extFile @@ -748,9 +1617,9 @@ options.extFile.append(options.mergeScript) # glue packages -options.gluePackages = options.gluePackages.split(',') +options.gluePackages = options.gluePackages.split(",") try: - options.gluePackages.remove('') + options.gluePackages.remove("") except Exception: pass @@ -762,29 +1631,29 @@ AthenaUtils.enableExtendedExtStreamName() # file list -tmpList = options.filelist.split(',') +tmpList = options.filelist.split(",") options.filelist = [] for tmpItem in tmpList: - if tmpItem == '': + if tmpItem == "": continue # wild card - tmpItem = tmpItem.replace('*','.*') + tmpItem = tmpItem.replace("*", ".*") # append options.filelist.append(tmpItem) # read file list from file -if options.inputFileList != '': +if options.inputFileList != "": rFile = open(options.inputFileList) for line in rFile: - line = re.sub('\n','',line) + line = re.sub("\n", "", line) line = line.strip() - if line != '': + if line != "": options.filelist.append(line) rFile.close() # suppressed streams -options.supStream = options.supStream.upper().split(',') +options.supStream = options.supStream.upper().split(",") try: - options.supStream.remove('') + options.supStream.remove("") except Exception: pass @@ -807,18 +1676,20 @@ options.nFilesPerJob = 1 # check -if options.inDS != '' and options.split > 0 and options.nFilesPerJob < 0 and options.nfiles == 0 and options.nEventsPerJob < 0: +if options.inDS != "" and options.split > 0 and options.nFilesPerJob < 0 and options.nfiles == 0 and options.nEventsPerJob < 0: tmpLog.error("--split requires --nFilesPerJob or --nFiles or --nEventsPerJob when --inDS is specified") sys.exit(EC_Config) # remove whitespaces -if options.inDS != '': - options.inDS = options.inDS.replace(' ', '') +if options.inDS != "": + options.inDS = options.inDS.replace(" ", "") # warning if options.nFilesPerJob > 0 and options.nFilesPerJob < 5: - tmpLog.warning("Very small --nFilesPerJob tends to generate so many short jobs which could send your task to exhausted state " - "after scouts are done, since short jobs are problematic for the grid. Please consider not to use the option.") + tmpLog.warning( + "Very small --nFilesPerJob tends to generate so many short jobs which could send your task to exhausted state " + "after scouts are done, since short jobs are problematic for the grid. Please consider not to use the option." + ) # check grid-proxy PsubUtils.check_proxy(options.verbose, options.vomsRoles) @@ -826,22 +1697,22 @@ # get nickname nickName = PsubUtils.getNickname(options.verbose) -if nickName == '': +if nickName == "": sys.exit(EC_Config) # set Rucio accounting -PsubUtils.setRucioAccount(nickName,'pathena',True) +PsubUtils.setRucioAccount(nickName, "pathena", True) # convert in/outTarBall to full path -if options.inTarBall != '': +if options.inTarBall != "": options.inTarBall = os.path.abspath(os.path.expanduser(options.inTarBall)) -if options.outTarBall != '': +if options.outTarBall != "": options.outTarBall = os.path.abspath(os.path.expanduser(options.outTarBall)) # convert n/outRunConfig to full path -if options.inRunConfig != '': +if options.inRunConfig != "": options.inRunConfig = os.path.abspath(os.path.expanduser(options.inRunConfig)) -if options.outRunConfig != '': +if options.outRunConfig != "": options.outRunConfig = os.path.abspath(os.path.expanduser(options.outRunConfig)) # check maxCpuCount @@ -850,102 +1721,112 @@ sys.exit(EC_Config) # create tmp dir -if options.tmpDir == '': - tmpDir = '%s/%s' % (currentDir,MiscUtils.wrappedUuidGen()) +if options.tmpDir == "": + tmpDir = "%s/%s" % (currentDir, MiscUtils.wrappedUuidGen()) else: - tmpDir = '%s/%s' % (os.path.abspath(options.tmpDir),MiscUtils.wrappedUuidGen()) + tmpDir = "%s/%s" % (os.path.abspath(options.tmpDir), MiscUtils.wrappedUuidGen()) os.makedirs(tmpDir) # set tmp dir in Client Client.setGlobalTmpDir(tmpDir) + # exit action def _onExit(dir, files, del_command): for tmpFile in files: - del_command('rm -rf %s' % tmpFile) - del_command('rm -rf %s' % dir) + del_command("rm -rf %s" % tmpFile) + del_command("rm -rf %s" % dir) atexit.register(_onExit, tmpDir, delFilesOnExit, commands_get_output) # get Athena versions -if options.verbose or options.containerImage == '': +if options.verbose or options.containerImage == "": cmt_verbose = True else: cmt_verbose = False -stA,retA = AthenaUtils.getAthenaVer(cmt_verbose) +stA, retA = AthenaUtils.getAthenaVer(cmt_verbose) # failed if not stA: - if options.containerImage == '': + if options.containerImage == "": sys.exit(EC_CMT) # disable Athena checks when using container image without Athena runtime env - retA = {'workArea': os.getcwd(), 'athenaVer': '', 'groupArea': '', 'cacheVer':'', 'nightVer': '', 'cmtConfig': ''} - -workArea = retA['workArea'] -if retA['athenaVer']: - athenaVer = 'Atlas-%s' % retA['athenaVer'] + retA = { + "workArea": os.getcwd(), + "athenaVer": "", + "groupArea": "", + "cacheVer": "", + "nightVer": "", + "cmtConfig": "", + } + +workArea = retA["workArea"] +if retA["athenaVer"]: + athenaVer = "Atlas-%s" % retA["athenaVer"] else: - athenaVer = '' -groupArea = retA['groupArea'] -cacheVer = retA['cacheVer'] -nightVer = retA['nightVer'] + athenaVer = "" +groupArea = retA["groupArea"] +cacheVer = retA["cacheVer"] +nightVer = retA["nightVer"] # overwrite with athenaTag -if options.athenaTag != '': +if options.athenaTag != "": athenaVer, cacheVer, nightVer = AthenaUtils.parse_athena_tag(options.athenaTag, options.verbose, tmpLog) # set CMTCONFIG -options.cmtConfig = AthenaUtils.getCmtConfig(athenaVer,cacheVer,nightVer,options.cmtConfig,verbose=options.verbose) +options.cmtConfig = AthenaUtils.getCmtConfig(athenaVer, cacheVer, nightVer, options.cmtConfig, verbose=options.verbose) # check CMTCONFIG -if not AthenaUtils.checkCmtConfig(retA['cmtConfig'],options.cmtConfig,options.noBuild): +if not AthenaUtils.checkCmtConfig(retA["cmtConfig"], options.cmtConfig, options.noBuild): sys.exit(EC_CMT) -tmpLog.info('using CMTCONFIG=%s' % options.cmtConfig) +tmpLog.info("using CMTCONFIG=%s" % options.cmtConfig) # get run directory # remove special characters -sString=re.sub('[\+]','.',workArea) -runDir = re.sub('^%s' % sString, '', currentDir) -if runDir == currentDir and not AthenaUtils.useCMake() and options.containerImage == '': - errMsg = "You need to run pathena in a directory under %s. " % workArea +sString = re.sub("[\+]", ".", workArea) +runDir = re.sub("^%s" % sString, "", currentDir) +if runDir == currentDir and not AthenaUtils.useCMake() and options.containerImage == "": + errMsg = "You need to run pathena in a directory under %s. " % workArea errMsg += "If '%s' is a read-only directory, perhaps you did setup Athena without --testarea or the 'here' tag of asetup." % workArea tmpLog.error(errMsg) sys.exit(EC_Config) -elif runDir == '': - runDir = '.' -elif runDir.startswith('/'): +elif runDir == "": + runDir = "." +elif runDir.startswith("/"): runDir = runDir[1:] -runDir = runDir+'/' +runDir = runDir + "/" # event picking -if options.eventPickEvtList != '': - epLockedBy = 'pathena' +if options.eventPickEvtList != "": + epLockedBy = "pathena" if not options.noSubmit: - epStat,epOutput = Client.requestEventPicking(options.eventPickEvtList, - options.eventPickDataType, - options.eventPickStreamName, - options.eventPickDS, - options.eventPickAmiTag, - options.filelist, - '', - options.outDS, - epLockedBy, - fullExecString, - 1, - options.eventPickWithGUID, - options.ei_api, - options.verbose) + epStat, epOutput = Client.requestEventPicking( + options.eventPickEvtList, + options.eventPickDataType, + options.eventPickStreamName, + options.eventPickDS, + options.eventPickAmiTag, + options.filelist, + "", + options.outDS, + epLockedBy, + fullExecString, + 1, + options.eventPickWithGUID, + options.ei_api, + options.verbose, + ) # set input dataset options.inDS = epOutput else: - options.inDS = 'dummy' - tmpLog.info('requested Event Picking service to stage input as %s' % options.inDS) + options.inDS = "dummy" + tmpLog.info("requested Event Picking service to stage input as %s" % options.inDS) # make run/event list file for event picking - eventPickRunEvtDat = '%s/ep_%s.dat' % (currentDir,MiscUtils.wrappedUuidGen()) + eventPickRunEvtDat = "%s/ep_%s.dat" % (currentDir, MiscUtils.wrappedUuidGen()) evI = open(options.eventPickEvtList) - evO = open(eventPickRunEvtDat,'w') + evO = open(eventPickRunEvtDat, "w") evO.write(evI.read()) # close evI.close() @@ -954,38 +1835,45 @@ def _onExit(dir, files, del_command): delFilesOnExit.append(eventPickRunEvtDat) # get job options -jobO = '' +jobO = "" if options.trf: # replace : to = for backward compatibility - for optArg in ['DB','RNDM']: - options.trf = re.sub('%'+optArg+':','%'+optArg+'=',options.trf) + for optArg in ["DB", "RNDM"]: + options.trf = re.sub("%" + optArg + ":", "%" + optArg + "=", options.trf) # use trf's parameters jobO = options.trf else: # get jobOs from command-line - if options.preConfig != '': - jobO += '-p %s ' % options.preConfig - if options.singleLine != '': - options.singleLine = options.singleLine.replace('"','\'') + if options.preConfig != "": + jobO += "-p %s " % options.preConfig + if options.singleLine != "": + options.singleLine = options.singleLine.replace('"', "'") for arg in args: - jobO += ' %s' % arg + jobO += " %s" % arg if jobO == "": tmpLog.error("no jobOptions is given\n pathena [--inDS input] --outDS output myJobO.py") sys.exit(EC_Config) -if options.inRunConfig == '': +if options.inRunConfig == "": # extract run configuration - tmpLog.info('extracting run configuration') + tmpLog.info("extracting run configuration") # run ConfigExtractor for normal jobO - ret, runConfig = AthenaUtils.extractRunConfig(jobO, options.supStream, options.shipinput, - options.trf, verbose=options.verbose, - useAMI=options.useAMIAutoConf, inDS=options.inDS, - tmpDir=tmpDir, one_liner=options.singleLine) + ret, runConfig = AthenaUtils.extractRunConfig( + jobO, + options.supStream, + options.shipinput, + options.trf, + verbose=options.verbose, + useAMI=options.useAMIAutoConf, + inDS=options.inDS, + tmpDir=tmpDir, + one_liner=options.singleLine, + ) # save runconfig - if options.outRunConfig != '': - cFile = open(options.outRunConfig,'w') - pickle.dump(runConfig,cFile) + if options.outRunConfig != "": + cFile = open(options.outRunConfig, "w") + pickle.dump(runConfig, cFile) cFile.close() else: # load from file @@ -1002,9 +1890,9 @@ def _onExit(dir, files, del_command): for fileName in runConfig.other.inputFiles: # append .root for tag files if runConfig.other.inColl: - match = re.search('\.root(\.\d+)*$',fileName) + match = re.search("\.root(\.\d+)*$", fileName) if match is None: - fileName = '%s.root' % fileName + fileName = "%s.root" % fileName # check ship files in the current dir if not os.path.exists(fileName): tmpLog.error("%s needs exist in the current directory when --shipInput is used" % fileName) @@ -1012,9 +1900,9 @@ def _onExit(dir, files, del_command): # append to extFile options.extFile.append(fileName) if not runConfig.input.shipFiles: - runConfig.input['shipFiles'] = [] - if fileName not in runConfig.input['shipFiles']: - runConfig.input['shipFiles'].append(fileName) + runConfig.input["shipFiles"] = [] + if fileName not in runConfig.input["shipFiles"]: + runConfig.input["shipFiles"].append(fileName) # generator files if runConfig.other.rndmGenFile: # append to extFile @@ -1030,7 +1918,7 @@ def _onExit(dir, files, del_command): options.addPoolFC += ",%s" % fileName # set default ref name if not runConfig.input.collRefName: - runConfig.input.collRefName = 'Token' + runConfig.input.collRefName = "Token" # check dupication in extOutFile if runConfig.output.alloutputs != False: if options.verbose: @@ -1044,76 +1932,76 @@ def _onExit(dir, files, del_command): else: # parse parameters for trf # AMI tag - newJobO = '' - for tmpString in jobO.split(';'): - match = re.search(' AMI=',tmpString) + newJobO = "" + for tmpString in jobO.split(";"): + match = re.search(" AMI=", tmpString) if match is None: # use original command - newJobO += (tmpString + ';') + newJobO += tmpString + ";" else: - tmpLog.info('getting configration from AMI') + tmpLog.info("getting configration from AMI") # get configration using GetCommand.py - com = 'GetCommand.py ' + re.sub('^[^ ]+ ','',tmpString.strip()) + com = "GetCommand.py " + re.sub("^[^ ]+ ", "", tmpString.strip()) if options.verbose: tmpLog.debug(com) - amiSt,amiOut = commands_get_status_output_with_env(com) + amiSt, amiOut = commands_get_status_output_with_env(com) amiSt %= 255 if amiSt != 0: tmpLog.error(amiOut) - errSt = 'Failed to get configuration from AMI. ' - errSt += 'Using AMI=tag in --trf is disallowed since it may overload the AMI server. ' - errSt += 'Please use explicit configuration parameters in --trf' + errSt = "Failed to get configuration from AMI. " + errSt += "Using AMI=tag in --trf is disallowed since it may overload the AMI server. " + errSt += "Please use explicit configuration parameters in --trf" tmpLog.error(errSt) sys.exit(EC_Config) # get full command string - fullCommand = '' - for amiStr in amiOut.split('\n'): - if amiStr != '' and not amiStr.startswith('#') and not amiStr.startswith('*'): + fullCommand = "" + for amiStr in amiOut.split("\n"): + if amiStr != "" and not amiStr.startswith("#") and not amiStr.startswith("*"): fullCommand = amiStr # failed to extract configration - if fullCommand == '': + if fullCommand == "": tmpLog.error(amiOut) - errSt = "Failed to extract configuration from AMI's output" + errSt = "Failed to extract configuration from AMI's output" tmpLog.error(errSt) sys.exit(EC_Config) # replace - newJobO += (fullCommand + ';') + newJobO += fullCommand + ";" # remove redundant ; newJobO = newJobO[:-1] # replace - if newJobO != '': + if newJobO != "": jobO = newJobO if options.verbose: - tmpLog.debug('new jobO : '+jobO) + tmpLog.debug("new jobO : " + jobO) # output oneOut = False # replace ; for job sequence - tmpString = re.sub(';',' ',jobO) + tmpString = re.sub(";", " ", jobO) # look for --outputDAODFile and --reductionConf - match = re.search('--outputDAODFile[ =\"\']+([^ \"\',]+)',tmpString) + match = re.search("--outputDAODFile[ =\"']+([^ \"',]+)", tmpString) outputDAODFile = None if match is not None: outputDAODFile = match.group(1) # remove %OUT - outputDAODFile = re.sub(r'%OUT\.', '', outputDAODFile) - match = re.search(r'(--reductionConf|--formats)[ =\"\']+([^ \"\']+)', tmpString) + outputDAODFile = re.sub(r"%OUT\.", "", outputDAODFile) + match = re.search(r"(--reductionConf|--formats)[ =\"\']+([^ \"\']+)", tmpString) if match is not None: # remove %OUT from outputDAODFile - jobO = jobO.replace('%OUT.'+outputDAODFile, outputDAODFile) + jobO = jobO.replace("%OUT." + outputDAODFile, outputDAODFile) # loop over all configs reductionConf = match.group(2) - for reductionItem in reductionConf.split(','): + for reductionItem in reductionConf.split(","): reductionItem = reductionItem.strip() - if reductionItem == '': + if reductionItem == "": continue # make actual output names for derivation - tmpOutName = 'DAOD_{0}.{1}'.format(reductionItem, outputDAODFile) + tmpOutName = "DAOD_{0}.{1}".format(reductionItem, outputDAODFile) if tmpOutName not in options.extOutFile: options.extOutFile.append(tmpOutName) oneOut = True # look for %OUT for tmpItem in tmpString.split(): - match = re.search('\%OUT\.([^ \"\',]+)',tmpItem) + match = re.search("\%OUT\.([^ \"',]+)", tmpItem) if match: # append basenames to extOutFile tmpOutName = match.group(1) @@ -1128,25 +2016,29 @@ def _onExit(dir, files, del_command): tmpLog.warning("argument of --trf doesn't contain any %OUT") # check for maxEvents and skipEvents if options.nEventsPerJob > 0 and options.nEventsPerJob < options.nEventsPerFile: - if '%SKIPEVENTS' not in jobO: - tmpLog.warning("Argument of --trf doesn't contain %SKIPEVENTS. All jobs with the same input file " - "may process the same events unless first events are skipped by using a trf parameter " - "like skipEvents or something") - if 'maxEvents' not in jobO: - tmpLog.warning("Argument of --trf doesn't contain maxEvents or something equivalent. Each job may process all events " - "in the input file. Note that --nEventsPerJob doesn't automatically append maxEvents " - "to the argument. Please ignore this message if you limit the number of events " - "in each job by using another trf parameter") + if "%SKIPEVENTS" not in jobO: + tmpLog.warning( + "Argument of --trf doesn't contain %SKIPEVENTS. All jobs with the same input file " + "may process the same events unless first events are skipped by using a trf parameter " + "like skipEvents or something" + ) + if "maxEvents" not in jobO: + tmpLog.warning( + "Argument of --trf doesn't contain maxEvents or something equivalent. Each job may process all events " + "in the input file. Note that --nEventsPerJob doesn't automatically append maxEvents " + "to the argument. Please ignore this message if you limit the number of events " + "in each job by using another trf parameter" + ) # no output jobs tmpOutKeys = list(runConfig.output) -for tmpIgnorKey in ['outUserData','alloutputs']: +for tmpIgnorKey in ["outUserData", "alloutputs"]: try: tmpOutKeys.remove(tmpIgnorKey) except Exception: pass if tmpOutKeys == [] and options.extOutFile == [] and not options.noOutput: - errStr = "No output stream was extracted from jobOs or --trf. " + errStr = "No output stream was extracted from jobOs or --trf. " if not options.trf: errStr += "If your job defines an output without Athena framework " errStr += "(e.g., using ROOT.TFile.Open instead of THistSvc) " @@ -1160,7 +2052,7 @@ def _onExit(dir, files, del_command): # set extOutFile to runConfig if options.extOutFile != []: - runConfig.output['extOutFile'] = options.extOutFile + runConfig.output["extOutFile"] = options.extOutFile # check ship files in the current dir if not runConfig.input.shipFiles: @@ -1171,7 +2063,7 @@ def _onExit(dir, files, del_command): sys.exit(EC_Extractor) # get random number -runConfig.other['rndmNumbers'] = [] +runConfig.other["rndmNumbers"] = [] if not runConfig.other.rndmStream: runConfig.other.rndmStream = [] if len(runConfig.other.rndmStream) != 0: @@ -1180,7 +2072,7 @@ def _onExit(dir, files, del_command): print("Initial random seeds need to be defined.") print("Enter two numbers for each random stream.") print(" e.g., PYTHIA : 4789899 989240512") - print('') + print("") for stream in runConfig.other.rndmStream: if options.norandom: # enter manually @@ -1190,18 +2082,18 @@ def _onExit(dir, files, del_command): if len(num) == 2: break print(" Two numbers are needed") - runConfig.other.rndmNumbers.append([int(num[0]),int(num[1])]) + runConfig.other.rndmNumbers.append([int(num[0]), int(num[1])]) else: # automatic - runConfig.other.rndmNumbers.append([random.randint(1,5000000),random.randint(1,5000000)]) + runConfig.other.rndmNumbers.append([random.randint(1, 5000000), random.randint(1, 5000000)]) if options.norandom: - print('') + print("") if runConfig.other.G4RandomSeeds == True: if options.norandom: - print('') + print("") print("Initial G4 random seeds need to be defined.") print("Enter one positive number.") - print('') + print("") # enter manually while True: num = input("SimFlags.SeedsG4=") @@ -1212,88 +2104,71 @@ def _onExit(dir, files, del_command): break except Exception: pass - print('') + print("") else: # automatic - runConfig.other.G4RandomSeeds = random.randint(1,10000) + runConfig.other.G4RandomSeeds = random.randint(1, 10000) else: # set -1 to disable G4 Random Seeds runConfig.other.G4RandomSeeds = -1 - ##################################################################### # input datasets -if options.inDS != '' or options.shipinput or options.pfnList != '': +if options.inDS != "" or options.shipinput or options.pfnList != "": # minimum bias dataset - if options.trf and jobO.find('%MININ') != -1: + if options.trf and jobO.find("%MININ") != -1: runConfig.input.inMinBias = True if runConfig.input.inMinBias: - options.minDS,options.nMin = MiscUtils.getDatasetNameAndNumFiles(options.minDS, - options.nMin, - 'Minimum-Bias') + options.minDS, options.nMin = MiscUtils.getDatasetNameAndNumFiles(options.minDS, options.nMin, "Minimum-Bias") # low pT minimum bias dataset - if options.trf and jobO.find('%LOMBIN') != -1: + if options.trf and jobO.find("%LOMBIN") != -1: runConfig.input.inLoMinBias = True if runConfig.input.inLoMinBias: - options.lowMinDS,options.nLowMin = MiscUtils.getDatasetNameAndNumFiles(options.lowMinDS, - options.nLowMin, - 'Low pT Minimum-Bias') + options.lowMinDS, options.nLowMin = MiscUtils.getDatasetNameAndNumFiles(options.lowMinDS, options.nLowMin, "Low pT Minimum-Bias") # high pT minimum bias dataset - if options.trf and jobO.find('%HIMBIN') != -1: + if options.trf and jobO.find("%HIMBIN") != -1: runConfig.input.inHiMinBias = True if runConfig.input.inHiMinBias: - options.highMinDS,options.nHighMin = MiscUtils.getDatasetNameAndNumFiles(options.highMinDS, - options.nHighMin, - 'High pT Minimum-Bias') + options.highMinDS, options.nHighMin = MiscUtils.getDatasetNameAndNumFiles(options.highMinDS, options.nHighMin, "High pT Minimum-Bias") # cavern dataset - if options.trf and jobO.find('%CAVIN') != -1: + if options.trf and jobO.find("%CAVIN") != -1: runConfig.input.inCavern = True if runConfig.input.inCavern: - options.cavDS,options.nCav = MiscUtils.getDatasetNameAndNumFiles(options.cavDS, - options.nCav, - 'Cavern') + options.cavDS, options.nCav = MiscUtils.getDatasetNameAndNumFiles(options.cavDS, options.nCav, "Cavern") # beam halo dataset - if options.trf and jobO.find('%BHIN') != -1: + if options.trf and jobO.find("%BHIN") != -1: runConfig.input.inBeamHalo = True if runConfig.input.inBeamHalo: # use common DS if options.useCommonHalo: - options.beamHaloDS,options.nBeamHalo = MiscUtils.getDatasetNameAndNumFiles(options.beamHaloDS, - options.nBeamHalo, - 'BeamHalo') + options.beamHaloDS, options.nBeamHalo = MiscUtils.getDatasetNameAndNumFiles(options.beamHaloDS, options.nBeamHalo, "BeamHalo") else: # get DS for A-side - options.beamHaloADS,options.nBeamHaloA = MiscUtils.getDatasetNameAndNumFiles(options.beamHaloADS, - options.nBeamHaloA, - 'BeamHalo A-side') + ( + options.beamHaloADS, + options.nBeamHaloA, + ) = MiscUtils.getDatasetNameAndNumFiles(options.beamHaloADS, options.nBeamHaloA, "BeamHalo A-side") # get DS for C-side - options.beamHaloCDS,options.nBeamHaloC = MiscUtils.getDatasetNameAndNumFiles(options.beamHaloCDS, - options.nBeamHaloC, - 'BeamHalo C-side') + ( + options.beamHaloCDS, + options.nBeamHaloC, + ) = MiscUtils.getDatasetNameAndNumFiles(options.beamHaloCDS, options.nBeamHaloC, "BeamHalo C-side") # beam gas dataset - if options.trf and jobO.find('%BGIN') != -1: + if options.trf and jobO.find("%BGIN") != -1: runConfig.input.inBeamGas = True if runConfig.input.inBeamGas: # use common DS if options.useCommonGas: - options.beamGasDS,options.nBeamGas = MiscUtils.getDatasetNameAndNumFiles(options.beamGasDS, - options.nBeamGas, - 'BeamGas') + options.beamGasDS, options.nBeamGas = MiscUtils.getDatasetNameAndNumFiles(options.beamGasDS, options.nBeamGas, "BeamGas") else: # get DS for H - options.beamGasHDS,options.nBeamGasH = MiscUtils.getDatasetNameAndNumFiles(options.beamGasHDS, - options.nBeamGasH, - 'BeamGas Hydrogen') + options.beamGasHDS, options.nBeamGasH = MiscUtils.getDatasetNameAndNumFiles(options.beamGasHDS, options.nBeamGasH, "BeamGas Hydrogen") # get DS for C - options.beamGasCDS,options.nBeamGasC = MiscUtils.getDatasetNameAndNumFiles(options.beamGasCDS, - options.nBeamGasC, - 'BeamGas Carbon') + options.beamGasCDS, options.nBeamGasC = MiscUtils.getDatasetNameAndNumFiles(options.beamGasCDS, options.nBeamGasC, "BeamGas Carbon") # get DS for O - options.beamGasODS,options.nBeamGasO = MiscUtils.getDatasetNameAndNumFiles(options.beamGasODS, - options.nBeamGasO, - 'BeamGas Oxygen') + options.beamGasODS, options.nBeamGasO = MiscUtils.getDatasetNameAndNumFiles(options.beamGasODS, options.nBeamGasO, "BeamGas Oxygen") # general secondaries tmpStat, tmpOut = parse_secondary_datasets_opt(options.secondaryDSs) @@ -1308,13 +2183,13 @@ def _onExit(dir, files, del_command): # archive sources and send it to HTTP-reachable location if True: - if options.inTarBall == '': + if options.inTarBall == "": # extract jobOs with full pathnames for tmpItem in jobO.split(): - if re.search('^/.*\.py$',tmpItem) is not None: + if re.search("^/.*\.py$", tmpItem) is not None: # set random name to avoid overwriting - tmpName = tmpItem.split('/')[-1] - tmpName = '%s_%s' % (MiscUtils.wrappedUuidGen(),tmpName) + tmpName = tmpItem.split("/")[-1] + tmpName = "%s_%s" % (MiscUtils.wrappedUuidGen(), tmpName) # set AthenaUtils.fullPathJobOs[tmpItem] = tmpName @@ -1326,38 +2201,56 @@ def _onExit(dir, files, del_command): archiveName = "" if not (options.noBuild and not options.noCompile): - # archive with cpack + # archive with cpack if AthenaUtils.useCMake(): - archiveName,archiveFullName = AthenaUtils.archiveWithCpack(True,tmpDir,options.verbose) + archiveName, archiveFullName = AthenaUtils.archiveWithCpack(True, tmpDir, options.verbose) # archive sources - archiveName,archiveFullName = AthenaUtils.archiveSourceFiles(workArea,runDir,currentDir,tmpDir, - options.verbose,options.gluePackages, - archiveName=archiveName) + archiveName, archiveFullName = AthenaUtils.archiveSourceFiles( + workArea, + runDir, + currentDir, + tmpDir, + options.verbose, + options.gluePackages, + archiveName=archiveName, + ) else: - # archive with cpack + # archive with cpack if AthenaUtils.useCMake(): - archiveName,archiveFullName = AthenaUtils.archiveWithCpack(False,tmpDir,options.verbose) + archiveName, archiveFullName = AthenaUtils.archiveWithCpack(False, tmpDir, options.verbose) # archive jobO - archiveName,archiveFullName = AthenaUtils.archiveJobOFiles(workArea,runDir,currentDir, - tmpDir,options.verbose, - archiveName=archiveName) + archiveName, archiveFullName = AthenaUtils.archiveJobOFiles( + workArea, + runDir, + currentDir, + tmpDir, + options.verbose, + archiveName=archiveName, + ) # archive InstallArea - AthenaUtils.archiveInstallArea(workArea,groupArea,archiveName,archiveFullName, - tmpDir,options.noBuild,options.verbose) + AthenaUtils.archiveInstallArea( + workArea, + groupArea, + archiveName, + archiveFullName, + tmpDir, + options.noBuild, + options.verbose, + ) # back to tmp dir os.chdir(tmpDir) # remove some athena specific files AthenaUtils.deleteAthenaStuff(currentDir) if not os.path.exists(archiveName): - commands_get_status_output('tar -cf {0} -T /dev/null'.format(archiveName)) + commands_get_status_output("tar -cf {0} -T /dev/null".format(archiveName)) # compress - status,out = commands_get_status_output('gzip -f %s' % archiveName) + status, out = commands_get_status_output("gzip -f %s" % archiveName) if status != 0 or options.verbose: print(out) - archiveName += '.gz' + archiveName += ".gz" # check archive - status,out = commands_get_status_output('ls -l %s' % archiveName) + status, out = commands_get_status_output("ls -l %s" % archiveName) if status != 0: print(out) tmpLog.error("Failed to archive working area.\n If you see 'Disk quota exceeded', try '--tmpDir /tmp'") @@ -1366,7 +2259,7 @@ def _onExit(dir, files, del_command): # check symlinks tmpLog.info("checking sandbox") for _ in range(5): - status, out = commands_get_status_output('tar tvfz %s' % archiveName) + status, out = commands_get_status_output("tar tvfz %s" % archiveName) if status == 0: break time.sleep(5) @@ -1374,12 +2267,12 @@ def _onExit(dir, files, del_command): tmpLog.error("Failed to expand sandbox. {0}".format(out)) sys.exit(EC_Archive) symlinks = [] - for line in out.split('\n'): + for line in out.split("\n"): items = line.split() - if len(items) > 0 and items[0].startswith('l') and items[-1].startswith('/'): + if len(items) > 0 and items[0].startswith("l") and items[-1].startswith("/"): symlinks.append(line) if symlinks != []: - tmpStr = "Found some unresolved symlinks which may cause a problem\n" + tmpStr = "Found some unresolved symlinks which may cause a problem\n" tmpStr += " See, e.g., http://savannah.cern.ch/bugs/?43885\n" tmpStr += " Please ignore if you believe they are harmless" tmpLog.warning(tmpStr) @@ -1390,44 +2283,48 @@ def _onExit(dir, files, del_command): os.chdir(tmpDir) # use a saved copy if not (options.noBuild and not options.noCompile): - archiveName = 'sources.%s.tar' % MiscUtils.wrappedUuidGen() - archiveFullName = "%s/%s" % (tmpDir,archiveName) + archiveName = "sources.%s.tar" % MiscUtils.wrappedUuidGen() + archiveFullName = "%s/%s" % (tmpDir, archiveName) else: - archiveName = 'jobO.%s.tar' % MiscUtils.wrappedUuidGen() - archiveFullName = "%s/%s" % (tmpDir,archiveName) + archiveName = "jobO.%s.tar" % MiscUtils.wrappedUuidGen() + archiveFullName = "%s/%s" % (tmpDir, archiveName) # make copy to avoid name duplication - shutil.copy(options.inTarBall,archiveFullName) + shutil.copy(options.inTarBall, archiveFullName) # save - if options.outTarBall != '': - shutil.copy(archiveName,options.outTarBall) + if options.outTarBall != "": + shutil.copy(archiveName, options.outTarBall) # put sources/jobO via HTTP POST if not options.noSubmit: tmpLog.info("uploading sandbox") - status,out = Client.putFile(archiveName,options.verbose,useCacheSrv=True,reuseSandbox=True) - if out.startswith('NewFileName:'): + status, out = Client.putFile(archiveName, options.verbose, useCacheSrv=True, reuseSandbox=True) + if out.startswith("NewFileName:"): # found the same input sandbox to reuse - archiveName = out.split(':')[-1] - elif out != 'True': + archiveName = out.split(":")[-1] + elif out != "True": # failed print(out) tmpLog.error("Failed with %s" % status) sys.exit(EC_Post) # good run list - if options.goodRunListXML != '': - options.goodRunListXML = PsubUtils.uploadGzippedFile(options.goodRunListXML,currentDir,tmpLog,delFilesOnExit, - options.noSubmit,options.verbose) - - + if options.goodRunListXML != "": + options.goodRunListXML = PsubUtils.uploadGzippedFile( + options.goodRunListXML, + currentDir, + tmpLog, + delFilesOnExit, + options.noSubmit, + options.verbose, + ) # special handling -specialHandling = '' +specialHandling = "" if options.express: - specialHandling += 'express,' + specialHandling += "express," if options.debugMode: - specialHandling += 'debug,' + specialHandling += "debug," specialHandling = specialHandling[:-1] @@ -1435,184 +2332,197 @@ def _onExit(dir, files, del_command): # submit jobs # append tmpdir to import taskbuffer module -sys.path = [tmpDir]+sys.path +sys.path = [tmpDir] + sys.path # make task taskParamMap = {} -taskParamMap['taskName'] = options.outDS +taskParamMap["taskName"] = options.outDS if not options.allowTaskDuplication: - taskParamMap['uniqueTaskName'] = True -taskParamMap['vo'] = 'atlas' -if options.containerImage == '': - taskParamMap['architecture'] = AthenaUtils.getCmtConfigImg(athenaVer,cacheVer,nightVer,options.cmtConfig, - architecture=options.architecture) + taskParamMap["uniqueTaskName"] = True +taskParamMap["vo"] = "atlas" +if options.containerImage == "": + taskParamMap["architecture"] = AthenaUtils.getCmtConfigImg( + athenaVer, + cacheVer, + nightVer, + options.cmtConfig, + architecture=options.architecture, + ) else: - taskParamMap['architecture'] = options.architecture - taskParamMap['container_name'] = options.containerImage -taskParamMap['transUses'] = athenaVer -taskParamMap['transHome'] = 'AnalysisTransforms'+cacheVer+nightVer -taskParamMap['processingType'] = 'panda-client-{0}-jedi-athena'.format(PandaToolsPkgInfo.release_version) + taskParamMap["architecture"] = options.architecture + taskParamMap["container_name"] = options.containerImage +taskParamMap["transUses"] = athenaVer +taskParamMap["transHome"] = "AnalysisTransforms" + cacheVer + nightVer +taskParamMap["processingType"] = "panda-client-{0}-jedi-athena".format(PandaToolsPkgInfo.release_version) if options.trf: - taskParamMap['processingType'] += '-trf' -if options.eventPickEvtList != '': - taskParamMap['processingType'] += '-evp' - taskParamMap['waitInput'] = 1 -if options.goodRunListXML != '': - taskParamMap['processingType'] += '-grl' -if options.prodSourceLabel == '': - taskParamMap['prodSourceLabel'] = 'user' + taskParamMap["processingType"] += "-trf" +if options.eventPickEvtList != "": + taskParamMap["processingType"] += "-evp" + taskParamMap["waitInput"] = 1 +if options.goodRunListXML != "": + taskParamMap["processingType"] += "-grl" +if options.prodSourceLabel == "": + taskParamMap["prodSourceLabel"] = "user" else: - taskParamMap['prodSourceLabel'] = options.prodSourceLabel -if options.site != 'AUTO': - taskParamMap['site'] = options.site + taskParamMap["prodSourceLabel"] = options.prodSourceLabel +if options.site != "AUTO": + taskParamMap["site"] = options.site else: - taskParamMap['site'] = None -taskParamMap['excludedSite'] = options.excludedSite + taskParamMap["site"] = None +taskParamMap["excludedSite"] = options.excludedSite if includedSite is not None and includedSite != []: - taskParamMap['includedSite'] = includedSite + taskParamMap["includedSite"] = includedSite else: - taskParamMap['includedSite'] = None + taskParamMap["includedSite"] = None if options.priority is not None: - taskParamMap['currentPriority'] = options.priority + taskParamMap["currentPriority"] = options.priority if options.nfiles > 0: - taskParamMap['nFiles'] = options.nfiles + taskParamMap["nFiles"] = options.nfiles if options.nFilesPerJob > 0: - taskParamMap['nFilesPerJob'] = options.nFilesPerJob -if not options.nGBPerJob in [-1,'MAX']: + taskParamMap["nFilesPerJob"] = options.nFilesPerJob +if not options.nGBPerJob in [-1, "MAX"]: # don't set MAX since it is the defalt on the server side - taskParamMap['nGBPerJob'] = options.nGBPerJob + taskParamMap["nGBPerJob"] = options.nGBPerJob if options.nEventsPerJob > 0: - taskParamMap['nEventsPerJob'] = options.nEventsPerJob + taskParamMap["nEventsPerJob"] = options.nEventsPerJob if options.nEventsPerFile <= 0: - taskParamMap['useRealNumEvents'] = True + taskParamMap["useRealNumEvents"] = True if options.nEventsPerFile > 0: - taskParamMap['nEventsPerFile'] = options.nEventsPerFile + taskParamMap["nEventsPerFile"] = options.nEventsPerFile if options.split > 0 and options.nEventsPerJob > 0: - taskParamMap['nEvents'] = options.split*options.nEventsPerJob -taskParamMap['cliParams'] = fullExecString + taskParamMap["nEvents"] = options.split * options.nEventsPerJob +taskParamMap["cliParams"] = fullExecString if options.noEmail: - taskParamMap['noEmail'] = True + taskParamMap["noEmail"] = True if options.skipScout: - taskParamMap['skipScout'] = True + taskParamMap["skipScout"] = True if options.respectSplitRule: - taskParamMap['respectSplitRule'] = True + taskParamMap["respectSplitRule"] = True if options.disableAutoRetry: - taskParamMap['disableAutoRetry'] = 1 + taskParamMap["disableAutoRetry"] = 1 if options.workingGroup is not None: - taskParamMap['workingGroup'] = options.workingGroup + taskParamMap["workingGroup"] = options.workingGroup if options.official: - taskParamMap['official'] = True + taskParamMap["official"] = True if options.useNewCode: - taskParamMap['fixedSandbox'] = archiveName + taskParamMap["fixedSandbox"] = archiveName if options.maxCpuCount > 0: - taskParamMap['walltime'] = -options.maxCpuCount + taskParamMap["walltime"] = -options.maxCpuCount if options.noLoopingCheck: - taskParamMap['noLoopingCheck'] = True + taskParamMap["noLoopingCheck"] = True if options.maxWalltime > 0: - taskParamMap['maxWalltime'] = options.maxWalltime + taskParamMap["maxWalltime"] = options.maxWalltime if options.cpuTimePerEvent > 0: - taskParamMap['cpuTime'] = options.cpuTimePerEvent - taskParamMap['cpuTimeUnit'] = 'HS06sPerEvent' + taskParamMap["cpuTime"] = options.cpuTimePerEvent + taskParamMap["cpuTimeUnit"] = "HS06sPerEvent" if options.fixedCpuTime: - taskParamMap['cpuTimeUnit'] = 'HS06sPerEventFixed' + taskParamMap["cpuTimeUnit"] = "HS06sPerEventFixed" if options.memory > 0: - taskParamMap['ramCount'] = options.memory + taskParamMap["ramCount"] = options.memory if options.fixedRamCount: - taskParamMap['ramCountUnit'] = 'MBPerCoreFixed' + taskParamMap["ramCountUnit"] = "MBPerCoreFixed" else: - taskParamMap['ramCountUnit'] = 'MBPerCore' + taskParamMap["ramCountUnit"] = "MBPerCore" if options.outDiskCount is not None: - taskParamMap['outDiskCount'] = options.outDiskCount - taskParamMap['outDiskUnit'] = 'kBFixed' + taskParamMap["outDiskCount"] = options.outDiskCount + taskParamMap["outDiskUnit"] = "kBFixed" if options.nCore > 1: - taskParamMap['coreCount'] = options.nCore + taskParamMap["coreCount"] = options.nCore elif options.nThreads > 1: - taskParamMap['coreCount'] = options.nThreads -if options.skipFilesUsedBy != '': - taskParamMap['skipFilesUsedBy'] = options.skipFilesUsedBy -taskParamMap['respectSplitRule'] = True + taskParamMap["coreCount"] = options.nThreads +if options.skipFilesUsedBy != "": + taskParamMap["skipFilesUsedBy"] = options.skipFilesUsedBy +taskParamMap["respectSplitRule"] = True if options.respectLB: - taskParamMap['respectLB'] = True + taskParamMap["respectLB"] = True if options.maxAttempt > 0 and options.maxAttempt <= 50: - taskParamMap['maxAttempt'] = options.maxAttempt + taskParamMap["maxAttempt"] = options.maxAttempt if options.debugMode: - taskParamMap['debugMode'] = True + taskParamMap["debugMode"] = True if options.osMatching: - taskParamMap['osMatching'] = True -taskParamMap['osInfo'] = PsubUtils.get_os_information() + taskParamMap["osMatching"] = True +taskParamMap["osInfo"] = PsubUtils.get_os_information() if options.parentTaskID: - taskParamMap['noWaitParent'] = True + taskParamMap["noWaitParent"] = True # source URL -matchURL = re.search("(http.*://[^/]+)/",Client.baseURLCSRVSSL) +matchURL = re.search("(http.*://[^/]+)/", Client.baseURLCSRVSSL) if matchURL is not None: - taskParamMap['sourceURL'] = matchURL.group(1) + taskParamMap["sourceURL"] = matchURL.group(1) # middle name -if options.addNthFieldOfInFileToLFN != '': - taskParamMap['addNthFieldToLFN'] = options.addNthFieldOfInFileToLFN - taskParamMap['useFileAsSourceLFN'] = True -elif options.addNthFieldOfInDSToLFN != '': - taskParamMap['addNthFieldToLFN'] = options.addNthFieldOfInDSToLFN +if options.addNthFieldOfInFileToLFN != "": + taskParamMap["addNthFieldToLFN"] = options.addNthFieldOfInFileToLFN + taskParamMap["useFileAsSourceLFN"] = True +elif options.addNthFieldOfInDSToLFN != "": + taskParamMap["addNthFieldToLFN"] = options.addNthFieldOfInDSToLFN # dataset name -logDatasetName = re.sub('/$','.log/',options.outDS) +logDatasetName = re.sub("/$", ".log/", options.outDS) # log -taskParamMap['log'] = {'dataset': logDatasetName, - 'container': logDatasetName, - 'type':'template', - 'param_type':'log', - 'value':'{0}.$JEDITASKID.${{SN}}.log.tgz'.format(logDatasetName[:-1]) - } - -if options.addNthFieldOfInFileToLFN != '': - loglfn = '{0}.{1}'.format(*logDatasetName.split('.')[:2]) - loglfn += '${MIDDLENAME}.$JEDITASKID._${SN}.log.tgz' - taskParamMap['log']['value'] = loglfn -if options.spaceToken != '': - taskParamMap['log']['token'] = options.spaceToken +taskParamMap["log"] = { + "dataset": logDatasetName, + "container": logDatasetName, + "type": "template", + "param_type": "log", + "value": "{0}.$JEDITASKID.${{SN}}.log.tgz".format(logDatasetName[:-1]), +} + +if options.addNthFieldOfInFileToLFN != "": + loglfn = "{0}.{1}".format(*logDatasetName.split(".")[:2]) + loglfn += "${MIDDLENAME}.$JEDITASKID._${SN}.log.tgz" + taskParamMap["log"]["value"] = loglfn +if options.spaceToken != "": + taskParamMap["log"]["token"] = options.spaceToken if options.mergeOutput and options.mergeLog: # log merge - mLogDatasetName = re.sub(r'\.log/', r'.merge_log/', logDatasetName) - mLFN = re.sub(r'\.log\.tgz', r'.merge_log.tgz', taskParamMap['log']['value']) - data = copy.deepcopy(taskParamMap['log']) - data.update({'dataset': mLogDatasetName, - 'container': mLogDatasetName, - 'param_type': 'output', - 'mergeOnly': True, - 'value': mLFN}) - taskParamMap['log_merge'] = data + mLogDatasetName = re.sub(r"\.log/", r".merge_log/", logDatasetName) + mLFN = re.sub(r"\.log\.tgz", r".merge_log.tgz", taskParamMap["log"]["value"]) + data = copy.deepcopy(taskParamMap["log"]) + data.update( + { + "dataset": mLogDatasetName, + "container": mLogDatasetName, + "param_type": "output", + "mergeOnly": True, + "value": mLFN, + } + ) + taskParamMap["log_merge"] = data # make job parameters -taskParamMap['jobParameters'] = [] +taskParamMap["jobParameters"] = [] # build if options.noBuild and not options.noCompile: - taskParamMap['jobParameters'] += [ - {'type':'constant', - 'value': '-a {0}'.format(archiveName), - }, - ] + taskParamMap["jobParameters"] += [ + { + "type": "constant", + "value": "-a {0}".format(archiveName), + }, + ] else: - taskParamMap['jobParameters'] += [ - {'type':'constant', - 'value': '-l ${LIB}', - }, - ] + taskParamMap["jobParameters"] += [ + { + "type": "constant", + "value": "-l ${LIB}", + }, + ] # pre execution string -pStr1 = '' +pStr1 = "" if runConfig.other.rndmStream != []: pStr1 = "AtRndmGenSvc=Service('AtRndmGenSvc');AtRndmGenSvc.Seeds=[" for stream in runConfig.other.rndmStream: num = runConfig.other.rndmNumbers[runConfig.other.rndmStream.index(stream)] - pStr1 += "'%s ${RNDMSEED} %s'," % (stream,num[1]) + pStr1 += "'%s ${RNDMSEED} %s'," % (stream, num[1]) pStr1 += "]" - dictItem = {'type':'template', - 'param_type':'number', - 'value':'${RNDMSEED}', - 'hidden':True, - 'offset':runConfig.other.rndmStream[0][0], - } - taskParamMap['jobParameters'] += [dictItem] + dictItem = { + "type": "template", + "param_type": "number", + "value": "${RNDMSEED}", + "hidden": True, + "offset": runConfig.other.rndmStream[0][0], + } + taskParamMap["jobParameters"] += [dictItem] # split by event option was invoked -pStr2 = '' +pStr2 = "" if options.nEventsPerJob > 0 and (not options.trf): # @ Number of events to be processed per job param1 = "theApp.EvtMax=${MAXEVENTS}" @@ -1622,345 +2532,447 @@ def _onExit(dir, files, del_command): else: param2 = "EventSelector.SkipEvents=${SKIPEVENTS}" # @ Form a string to add to job parameters - pStr2 = '%s;%s' % (param1,param2) + pStr2 = "%s;%s" % (param1, param2) # set pre execution parameter -if pStr1 != '' or pStr2 != '': - if pStr1 == '' or pStr2 == '': - preStr = pStr1+pStr2 +if pStr1 != "" or pStr2 != "": + if pStr1 == "" or pStr2 == "": + preStr = pStr1 + pStr2 else: - preStr = "%s;%s" % (pStr1,pStr2) - taskParamMap['jobParameters'] += [ - {'type':'constant', - 'value': '-f "', - 'padding':False, - }, - ] - taskParamMap['jobParameters'] += PsubUtils.convertParamStrToJediParam(preStr,{},'', - False,False) - taskParamMap['jobParameters'] += [ - {'type':'constant', - 'value': '"', - }, - ] + preStr = "%s;%s" % (pStr1, pStr2) + taskParamMap["jobParameters"] += [ + { + "type": "constant", + "value": '-f "', + "padding": False, + }, + ] + taskParamMap["jobParameters"] += PsubUtils.convertParamStrToJediParam(preStr, {}, "", False, False) + taskParamMap["jobParameters"] += [ + { + "type": "constant", + "value": '"', + }, + ] # misc -param = '--sourceURL ${SURL} ' -param += '-r {0} '.format(runDir) +param = "--sourceURL ${SURL} " +param += "-r {0} ".format(runDir) # addPoolFC if options.addPoolFC != "": - param += '--addPoolFC %s ' % options.addPoolFC + param += "--addPoolFC %s " % options.addPoolFC # disable to skip missing files if options.notSkipMissing: - param += '--notSkipMissing ' + param += "--notSkipMissing " # given PFN -if options.pfnList != '': - param += '--givenPFN ' +if options.pfnList != "": + param += "--givenPFN " # run TRF if options.trf: - param += '--trf ' + param += "--trf " # general input format if options.generalInput: - param += '--generalInput ' + param += "--generalInput " # use theApp.nextEvent if options.useNextEvent: - param += '--useNextEvent ' + param += "--useNextEvent " # use CMake -if AthenaUtils.useCMake() or options.containerImage != '': +if AthenaUtils.useCMake() or options.containerImage != "": param += "--useCMake " # AthenaMT if options.nThreads > 1: param += "--useAthenaMT " # use code tracer if options.codeTrace: - param += '--codeTrace ' + param += "--codeTrace " # debug parameters -if options.queueData != '': +if options.queueData != "": param += "--overwriteQueuedata=%s " % options.queueData # read BS if runConfig.input.inBS: - param += '-b ' + param += "-b " # use back navigation if runConfig.input.backNavi: - param += '-e ' + param += "-e " # ship input if options.shipinput: - param += '--shipInput ' + param += "--shipInput " # event picking -if options.eventPickEvtList != '': - param += '--eventPickTxt=%s ' % eventPickRunEvtDat.split('/')[-1] +if options.eventPickEvtList != "": + param += "--eventPickTxt=%s " % eventPickRunEvtDat.split("/")[-1] # assign -if param != '': - taskParamMap['jobParameters'] += [ - {'type':'constant', - 'value': param, - }, - ] +if param != "": + taskParamMap["jobParameters"] += [ + { + "type": "constant", + "value": param, + }, + ] # input inputMap = {} -if options.inDS != '': - tmpDict = {'type':'template', - 'param_type':'input', - 'value':'-i "${IN/T}"', - 'dataset':options.inDS, - 'expand':True, - 'exclude':'\.log\.tgz(\.\d+)*$', - } +if options.inDS != "": + tmpDict = { + "type": "template", + "param_type": "input", + "value": '-i "${IN/T}"', + "dataset": options.inDS, + "expand": True, + "exclude": "\.log\.tgz(\.\d+)*$", + } if options.notExpandInDS: - del tmpDict['expand'] - tmpDict['consolidate'] = '.'.join(options.outDS.split('.')[:2]) + '.' + MiscUtils.wrappedUuidGen() + '/' - if options.inputType != '': - tmpDict['include'] = options.inputType + del tmpDict["expand"] + tmpDict["consolidate"] = ".".join(options.outDS.split(".")[:2]) + "." + MiscUtils.wrappedUuidGen() + "/" + if options.inputType != "": + tmpDict["include"] = options.inputType if options.filelist != []: - tmpDict['files'] = options.filelist - taskParamMap['jobParameters'].append(tmpDict) - taskParamMap['dsForIN'] = options.inDS - inputMap['IN'] = options.inDS -elif options.pfnList != '': - taskParamMap['pfnList'] = PsubUtils.getListPFN(options.pfnList) + tmpDict["files"] = options.filelist + taskParamMap["jobParameters"].append(tmpDict) + taskParamMap["dsForIN"] = options.inDS + inputMap["IN"] = options.inDS +elif options.pfnList != "": + taskParamMap["pfnList"] = PsubUtils.getListPFN(options.pfnList) # use noInput mecahism - taskParamMap['noInput'] = True + taskParamMap["noInput"] = True if options.nfiles == 0: - taskParamMap['nFiles'] = len(taskParamMap['pfnList']) - taskParamMap['jobParameters'] += [ - {'type':'constant', - 'value':'-i "${IN/T}"', - }, - ] -elif options.goodRunListXML != '': - tmpDict = {'type':'template', - 'param_type':'input', - 'value':'-i "${IN/T}"', - 'dataset':'%%INDS%%', - 'expand':True, - 'exclude':'\.log\.tgz(\.\d+)*$', - 'files':'%%INLFNLIST%%', - } - taskParamMap['jobParameters'].append(tmpDict) - taskParamMap['dsForIN'] = '%%INDS%%' - inputMap['IN'] ='%%INDS%%' + taskParamMap["nFiles"] = len(taskParamMap["pfnList"]) + taskParamMap["jobParameters"] += [ + { + "type": "constant", + "value": '-i "${IN/T}"', + }, + ] +elif options.goodRunListXML != "": + tmpDict = { + "type": "template", + "param_type": "input", + "value": '-i "${IN/T}"', + "dataset": "%%INDS%%", + "expand": True, + "exclude": "\.log\.tgz(\.\d+)*$", + "files": "%%INLFNLIST%%", + } + taskParamMap["jobParameters"].append(tmpDict) + taskParamMap["dsForIN"] = "%%INDS%%" + inputMap["IN"] = "%%INDS%%" else: # no input - taskParamMap['noInput'] = True + taskParamMap["noInput"] = True if options.nEventsPerJob > 0: - taskParamMap['nEventsPerJob'] = options.nEventsPerJob + taskParamMap["nEventsPerJob"] = options.nEventsPerJob else: - taskParamMap['nEventsPerJob'] = 1 + taskParamMap["nEventsPerJob"] = 1 if options.split > 0: - taskParamMap['nEvents'] = options.split + taskParamMap["nEvents"] = options.split else: - taskParamMap['nEvents'] = 1 - taskParamMap['nEvents'] *= taskParamMap['nEventsPerJob'] - taskParamMap['jobParameters'] += [ - {'type':'constant', - 'value': '-i "[]"', - }, - ] + taskParamMap["nEvents"] = 1 + taskParamMap["nEvents"] *= taskParamMap["nEventsPerJob"] + taskParamMap["jobParameters"] += [ + { + "type": "constant", + "value": '-i "[]"', + }, + ] # extract DBR for --trf dbrInTRF = False if options.trf: - tmpMatch = re.search('%DB=([^ \'\";]+)',jobO) + tmpMatch = re.search("%DB=([^ '\";]+)", jobO) if tmpMatch is not None: options.dbRelease = tmpMatch.group(1) dbrInTRF = True # param for DBR -if options.dbRelease != '': - dbrDS = options.dbRelease.split(':')[0] +if options.dbRelease != "": + dbrDS = options.dbRelease.split(":")[0] # change LATEST to DBR_LATEST - if dbrDS == 'LATEST': - dbrDS = 'DBR_LATEST' - dictItem = {'type':'template', - 'param_type':'input', - 'value':'--dbrFile=${DBR}', - 'dataset':dbrDS, - } - taskParamMap['jobParameters'] += [dictItem] + if dbrDS == "LATEST": + dbrDS = "DBR_LATEST" + dictItem = { + "type": "template", + "param_type": "input", + "value": "--dbrFile=${DBR}", + "dataset": dbrDS, + } + taskParamMap["jobParameters"] += [dictItem] # no expansion if dbrInTRF: - dictItem = {'type':'constant', - 'value':'--noExpandDBR', - } - taskParamMap['jobParameters'] += [dictItem] + dictItem = { + "type": "constant", + "value": "--noExpandDBR", + } + taskParamMap["jobParameters"] += [dictItem] # minimum bias -minBiasStream = '' -if options.minDS != '': +minBiasStream = "" +if options.minDS != "": if options.notExpandMinDS: expand_flag = False else: expand_flag = True - dictItem = MiscUtils.makeJediJobParam('${MININ}',options.minDS,'input',hidden=True, - expand=expand_flag,exclude='\.log\.tgz(\.\d+)*$', - nFilesPerJob=options.nMin,useNumFilesAsRatio=True, - randomAtt=options.randomMin, outDS=options.outDS) - taskParamMap['jobParameters'] += dictItem - inputMap['MININ'] = options.minDS - minBiasStream += 'MININ,' + dictItem = MiscUtils.makeJediJobParam( + "${MININ}", + options.minDS, + "input", + hidden=True, + expand=expand_flag, + exclude="\.log\.tgz(\.\d+)*$", + nFilesPerJob=options.nMin, + useNumFilesAsRatio=True, + randomAtt=options.randomMin, + outDS=options.outDS, + ) + taskParamMap["jobParameters"] += dictItem + inputMap["MININ"] = options.minDS + minBiasStream += "MININ," if options.sameSecRetry: - taskParamMap['reuseSecOnDemand'] = True -if options.lowMinDS != '': + taskParamMap["reuseSecOnDemand"] = True +if options.lowMinDS != "": if options.notExpandLowMinDS: expand_flag = False else: expand_flag = True - dictItem = MiscUtils.makeJediJobParam('${LOMBIN}',options.lowMinDS,'input',hidden=True, - expand=expand_flag,exclude='\.log\.tgz(\.\d+)*$', - nFilesPerJob=options.nLowMin,useNumFilesAsRatio=True, - randomAtt=options.randomMin, outDS=options.outDS) - taskParamMap['jobParameters'] += dictItem - inputMap['LOMBIN'] = options.lowMinDS - minBiasStream += 'LOMBIN,' + dictItem = MiscUtils.makeJediJobParam( + "${LOMBIN}", + options.lowMinDS, + "input", + hidden=True, + expand=expand_flag, + exclude="\.log\.tgz(\.\d+)*$", + nFilesPerJob=options.nLowMin, + useNumFilesAsRatio=True, + randomAtt=options.randomMin, + outDS=options.outDS, + ) + taskParamMap["jobParameters"] += dictItem + inputMap["LOMBIN"] = options.lowMinDS + minBiasStream += "LOMBIN," if options.sameSecRetry: - taskParamMap['reuseSecOnDemand'] = True -if options.highMinDS != '': + taskParamMap["reuseSecOnDemand"] = True +if options.highMinDS != "": if options.notExpandHighMinDS: expand_flag = False else: expand_flag = True - dictItem = MiscUtils.makeJediJobParam('${HIMBIN}',options.highMinDS,'input',hidden=True, - expand=expand_flag,exclude='\.log\.tgz(\.\d+)*$', - nFilesPerJob=options.nHighMin,useNumFilesAsRatio=True, - randomAtt=options.randomMin, outDS=options.outDS) - taskParamMap['jobParameters'] += dictItem - inputMap['HIMBIN'] = options.highMinDS - minBiasStream += 'HIMBIN,' + dictItem = MiscUtils.makeJediJobParam( + "${HIMBIN}", + options.highMinDS, + "input", + hidden=True, + expand=expand_flag, + exclude="\.log\.tgz(\.\d+)*$", + nFilesPerJob=options.nHighMin, + useNumFilesAsRatio=True, + randomAtt=options.randomMin, + outDS=options.outDS, + ) + taskParamMap["jobParameters"] += dictItem + inputMap["HIMBIN"] = options.highMinDS + minBiasStream += "HIMBIN," if options.sameSecRetry: - taskParamMap['reuseSecOnDemand'] = True + taskParamMap["reuseSecOnDemand"] = True minBiasStream = minBiasStream[:-1] -if minBiasStream != '': - dictItem = {'type':'constant', - 'value':'-m "${{{0}/T}}"'.format(minBiasStream), - } - taskParamMap['jobParameters'] += [dictItem] +if minBiasStream != "": + dictItem = { + "type": "constant", + "value": '-m "${{{0}/T}}"'.format(minBiasStream), + } + taskParamMap["jobParameters"] += [dictItem] # cavern -if options.cavDS != '': +if options.cavDS != "": if options.notExpandCavDS: expand_flag = False else: expand_flag = True - dictItem = MiscUtils.makeJediJobParam('-n "${CAVIN/T}"',options.cavDS,'input', - expand=expand_flag,exclude='\.log\.tgz(\.\d+)*$', - nFilesPerJob=options.nCav,useNumFilesAsRatio=True, - randomAtt=options.randomCav, outDS=options.outDS) - taskParamMap['jobParameters'] += dictItem - inputMap['CAVIN'] = options.cavDS + dictItem = MiscUtils.makeJediJobParam( + '-n "${CAVIN/T}"', + options.cavDS, + "input", + expand=expand_flag, + exclude="\.log\.tgz(\.\d+)*$", + nFilesPerJob=options.nCav, + useNumFilesAsRatio=True, + randomAtt=options.randomCav, + outDS=options.outDS, + ) + taskParamMap["jobParameters"] += dictItem + inputMap["CAVIN"] = options.cavDS if options.sameSecRetry: - taskParamMap['reuseSecOnDemand'] = True + taskParamMap["reuseSecOnDemand"] = True # beam halo -beamHaloStream = '' -if options.beamHaloDS != '': - dictItem = MiscUtils.makeJediJobParam('${BHIN}',options.beamHaloDS,'input',hidden=True, - expand=True,exclude='\.log\.tgz(\.\d+)*$', - nFilesPerJob=options.nBeamHalo,useNumFilesAsRatio=True) - taskParamMap['jobParameters'] += dictItem - inputMap['BHIN'] = options.beamHaloDS - beamHaloStream += 'BHIN,' +beamHaloStream = "" +if options.beamHaloDS != "": + dictItem = MiscUtils.makeJediJobParam( + "${BHIN}", + options.beamHaloDS, + "input", + hidden=True, + expand=True, + exclude="\.log\.tgz(\.\d+)*$", + nFilesPerJob=options.nBeamHalo, + useNumFilesAsRatio=True, + ) + taskParamMap["jobParameters"] += dictItem + inputMap["BHIN"] = options.beamHaloDS + beamHaloStream += "BHIN," if options.sameSecRetry: - taskParamMap['reuseSecOnDemand'] = True -if options.beamHaloADS != '': - dictItem = MiscUtils.makeJediJobParam('${BHAIN}',options.beamHaloADS,'input',hidden=True, - expand=True,exclude='\.log\.tgz(\.\d+)*$', - nFilesPerJob=options.nBeamHaloA,useNumFilesAsRatio=True) - taskParamMap['jobParameters'] += dictItem - inputMap['BHAIN'] = options.beamHaloADS - beamHaloStream += 'BHAIN,' + taskParamMap["reuseSecOnDemand"] = True +if options.beamHaloADS != "": + dictItem = MiscUtils.makeJediJobParam( + "${BHAIN}", + options.beamHaloADS, + "input", + hidden=True, + expand=True, + exclude="\.log\.tgz(\.\d+)*$", + nFilesPerJob=options.nBeamHaloA, + useNumFilesAsRatio=True, + ) + taskParamMap["jobParameters"] += dictItem + inputMap["BHAIN"] = options.beamHaloADS + beamHaloStream += "BHAIN," if options.sameSecRetry: - taskParamMap['reuseSecOnDemand'] = True -if options.beamHaloCDS != '': - dictItem = MiscUtils.makeJediJobParam('${BHCIN}',options.beamHaloCDS,'input',hidden=True, - expand=True,exclude='\.log\.tgz(\.\d+)*$', - nFilesPerJob=options.nBeamHaloC,useNumFilesAsRatio=True) - taskParamMap['jobParameters'] += dictItem - inputMap['BHCIN'] = options.beamHaloCDS - beamHaloStream += 'BHCIN,' + taskParamMap["reuseSecOnDemand"] = True +if options.beamHaloCDS != "": + dictItem = MiscUtils.makeJediJobParam( + "${BHCIN}", + options.beamHaloCDS, + "input", + hidden=True, + expand=True, + exclude="\.log\.tgz(\.\d+)*$", + nFilesPerJob=options.nBeamHaloC, + useNumFilesAsRatio=True, + ) + taskParamMap["jobParameters"] += dictItem + inputMap["BHCIN"] = options.beamHaloCDS + beamHaloStream += "BHCIN," if options.sameSecRetry: - taskParamMap['reuseSecOnDemand'] = True + taskParamMap["reuseSecOnDemand"] = True beamHaloStream = beamHaloStream[:-1] -if beamHaloStream != '': - dictItem = {'type':'constant', - 'value':'--beamHalo "${{{0}/T}}"'.format(beamHaloStream) - } - taskParamMap['jobParameters'] += [dictItem] +if beamHaloStream != "": + dictItem = { + "type": "constant", + "value": '--beamHalo "${{{0}/T}}"'.format(beamHaloStream), + } + taskParamMap["jobParameters"] += [dictItem] # beam gas -beamGasStream = '' -if options.beamGasDS != '': - dictItem = MiscUtils.makeJediJobParam('${BGIN}',options.beamGasDS,'input',hidden=True, - expand=True,exclude='\.log\.tgz(\.\d+)*$', - nFilesPerJob=options.nBeamGas,useNumFilesAsRatio=True) - taskParamMap['jobParameters'] += dictItem - inputMap['BGIN'] = options.beamGasDS - beamGasStream += 'BGIN,' -if options.beamGasHDS != '': - dictItem = MiscUtils.makeJediJobParam('${BGHIN}',options.beamGasHDS,'input',hidden=True, - expand=True,exclude='\.log\.tgz(\.\d+)*$', - nFilesPerJob=options.nBeamGasH,useNumFilesAsRatio=True) - taskParamMap['jobParameters'] += dictItem - inputMap['BGHIN'] = options.beamGasHDS - beamGasStream += 'BGHIN,' -if options.beamGasCDS != '': - dictItem = MiscUtils.makeJediJobParam('${BGCIN}',options.beamGasCDS,'input',hidden=True, - expand=True,exclude='\.log\.tgz(\.\d+)*$', - nFilesPerJob=options.nBeamGasC,useNumFilesAsRatio=True) - taskParamMap['jobParameters'] += dictItem - inputMap['BGCIN'] = options.beamGasHDS - beamGasStream += 'BGCIN,' -if options.beamGasODS != '': - dictItem = MiscUtils.makeJediJobParam('${BGOIN}',options.beamGasODS,'input',hidden=True, - expand=True,exclude='\.log\.tgz(\.\d+)*$', - nFilesPerJob=options.nBeamGasO,useNumFilesAsRatio=True) - taskParamMap['jobParameters'] += dictItem - inputMap['BGOIN'] = options.beamGasODS - beamGasStream += 'BGOIN,' +beamGasStream = "" +if options.beamGasDS != "": + dictItem = MiscUtils.makeJediJobParam( + "${BGIN}", + options.beamGasDS, + "input", + hidden=True, + expand=True, + exclude="\.log\.tgz(\.\d+)*$", + nFilesPerJob=options.nBeamGas, + useNumFilesAsRatio=True, + ) + taskParamMap["jobParameters"] += dictItem + inputMap["BGIN"] = options.beamGasDS + beamGasStream += "BGIN," +if options.beamGasHDS != "": + dictItem = MiscUtils.makeJediJobParam( + "${BGHIN}", + options.beamGasHDS, + "input", + hidden=True, + expand=True, + exclude="\.log\.tgz(\.\d+)*$", + nFilesPerJob=options.nBeamGasH, + useNumFilesAsRatio=True, + ) + taskParamMap["jobParameters"] += dictItem + inputMap["BGHIN"] = options.beamGasHDS + beamGasStream += "BGHIN," +if options.beamGasCDS != "": + dictItem = MiscUtils.makeJediJobParam( + "${BGCIN}", + options.beamGasCDS, + "input", + hidden=True, + expand=True, + exclude="\.log\.tgz(\.\d+)*$", + nFilesPerJob=options.nBeamGasC, + useNumFilesAsRatio=True, + ) + taskParamMap["jobParameters"] += dictItem + inputMap["BGCIN"] = options.beamGasHDS + beamGasStream += "BGCIN," +if options.beamGasODS != "": + dictItem = MiscUtils.makeJediJobParam( + "${BGOIN}", + options.beamGasODS, + "input", + hidden=True, + expand=True, + exclude="\.log\.tgz(\.\d+)*$", + nFilesPerJob=options.nBeamGasO, + useNumFilesAsRatio=True, + ) + taskParamMap["jobParameters"] += dictItem + inputMap["BGOIN"] = options.beamGasODS + beamGasStream += "BGOIN," beamGasStream = beamGasStream[:-1] -if beamGasStream != '': - dictItem = {'type':'constant', - 'value':'--beamGas "${{{0}/T}}"'.format(beamGasStream) - } - taskParamMap['jobParameters'] += [dictItem] +if beamGasStream != "": + dictItem = { + "type": "constant", + "value": '--beamGas "${{{0}/T}}"'.format(beamGasStream), + } + taskParamMap["jobParameters"] += [dictItem] # general secondaries if options.secondaryDSs: for tmpDsName in options.secondaryDSs: tmpMap = options.secondaryDSs[tmpDsName] # make template item - streamName = tmpMap['streamName'] + streamName = tmpMap["streamName"] if not options.notExpandSecDSs: expandFlag = True else: expandFlag = False - dictItem = MiscUtils.makeJediJobParam('${' + streamName + '}', tmpDsName, 'input', hidden=True, - expand=expandFlag, include=tmpMap['pattern'], offset=tmpMap['nSkip'], - nFilesPerJob=tmpMap['nFiles'], outDS=options.outDS, - file_list=tmpMap['files']) - taskParamMap['jobParameters'] += dictItem + dictItem = MiscUtils.makeJediJobParam( + "${" + streamName + "}", + tmpDsName, + "input", + hidden=True, + expand=expandFlag, + include=tmpMap["pattern"], + offset=tmpMap["nSkip"], + nFilesPerJob=tmpMap["nFiles"], + outDS=options.outDS, + file_list=tmpMap["files"], + ) + taskParamMap["jobParameters"] += dictItem inputMap[streamName] = tmpDsName - dictItem = {'type':'constant', - 'value':'-m "${{{0}/T}}"'.format( - ','.join([tmpMap['streamName'] for tmpMap in options.secondaryDSs.values()])) - } - taskParamMap['jobParameters'] += [dictItem] + dictItem = { + "type": "constant", + "value": '-m "${{{0}/T}}"'.format(",".join([tmpMap["streamName"] for tmpMap in options.secondaryDSs.values()])), + } + taskParamMap["jobParameters"] += [dictItem] # output -if options.addNthFieldOfInDSToLFN != '' or options.addNthFieldOfInFileToLFN != '': - descriptionInLFN = '${MIDDLENAME}' +if options.addNthFieldOfInDSToLFN != "" or options.addNthFieldOfInFileToLFN != "": + descriptionInLFN = "${MIDDLENAME}" else: - descriptionInLFN = '' -outMap,tmpParamList = AthenaUtils.convertConfToOutput(runConfig,options.extOutFile,options.outDS, - destination=options.destSE,spaceToken=options.spaceToken, - descriptionInLFN=descriptionInLFN, - allowNoOutput=options.allowNoOutput) -taskParamMap['jobParameters'] += [ - {'type':'constant', - 'value': '-o "%s" ' % outMap - }, - ] -taskParamMap['jobParameters'] += tmpParamList + descriptionInLFN = "" +outMap, tmpParamList = AthenaUtils.convertConfToOutput( + runConfig, + options.extOutFile, + options.outDS, + destination=options.destSE, + spaceToken=options.spaceToken, + descriptionInLFN=descriptionInLFN, + allowNoOutput=options.allowNoOutput, +) +taskParamMap["jobParameters"] += [ + {"type": "constant", "value": '-o "%s" ' % outMap}, +] +taskParamMap["jobParameters"] += tmpParamList # jobO parameter @@ -1969,131 +2981,141 @@ def _onExit(dir, files, del_command): # replace full-path jobOs for tmpFullName in AthenaUtils.fullPathJobOs: tmpLocalName = AthenaUtils.fullPathJobOs[tmpFullName] - tmpJobO = re.sub(tmpFullName,tmpLocalName,tmpJobO) + tmpJobO = re.sub(tmpFullName, tmpLocalName, tmpJobO) # modify one-liner for G4 random seeds if runConfig.other.G4RandomSeeds > 0: - if options.singleLine != '': - tmpJobO = re.sub('-c "%s" ' % options.singleLine, - '-c "%s;from G4AtlasApps.SimFlags import SimFlags;SimFlags.SeedsG4=${RNDMSEED}" ' \ - % options.singleLine,tmpJobO) + if options.singleLine != "": + tmpJobO = re.sub( + '-c "%s" ' % options.singleLine, + '-c "%s;from G4AtlasApps.SimFlags import SimFlags;SimFlags.SeedsG4=${RNDMSEED}" ' % options.singleLine, + tmpJobO, + ) else: tmpJobO = '-c "from G4AtlasApps.SimFlags import SimFlags;SimFlags.SeedsG4=${RNDMSEED}" ' + tmpJobO - dictItem = {'type':'template', - 'param_type':'number', - 'value':'${RNDMSEED}', - 'hidden':True, - 'offset':runConfig.other.G4RandomSeeds, - } - taskParamMap['jobParameters'] += [dictItem] + dictItem = { + "type": "template", + "param_type": "number", + "value": "${RNDMSEED}", + "hidden": True, + "offset": runConfig.other.G4RandomSeeds, + } + taskParamMap["jobParameters"] += [dictItem] else: # replace parameters for TRF tmpJobO = jobO # output : basenames are in outMap['IROOT'] trough extOutFile tmpOutMap = [] - if 'IROOT' in outMap: - for tmpName,tmpLFN in outMap['IROOT']: - tmpJobO = tmpJobO.replace('%OUT.' + tmpName,tmpName) + if "IROOT" in outMap: + for tmpName, tmpLFN in outMap["IROOT"]: + tmpJobO = tmpJobO.replace("%OUT." + tmpName, tmpName) # replace DBR - tmpJobO = re.sub('%DB=[^ \'\";]+','${DBR}',tmpJobO) + tmpJobO = re.sub("%DB=[^ '\";]+", "${DBR}", tmpJobO) # set jobO parameter -taskParamMap['jobParameters'] += [ - {'type':'constant', - 'value': '-j "', - 'padding':False, - }, - ] +taskParamMap["jobParameters"] += [ + { + "type": "constant", + "value": '-j "', + "padding": False, + }, +] if options.secondaryDSs: - extra_in_list = [tmpMap['streamName'] for tmpMap in options.secondaryDSs.values()] + extra_in_list = [tmpMap["streamName"] for tmpMap in options.secondaryDSs.values()] else: extra_in_list = [] -taskParamMap['jobParameters'] += PsubUtils.convertParamStrToJediParam( - tmpJobO, inputMap, options.outDS[:-1], - True, False, usePfnList, - extra_in_list=extra_in_list) -taskParamMap['jobParameters'] += [ - {'type':'constant', - 'value': '"', - }, - ] +taskParamMap["jobParameters"] += PsubUtils.convertParamStrToJediParam( + tmpJobO, + inputMap, + options.outDS[:-1], + True, + False, + usePfnList, + extra_in_list=extra_in_list, +) +taskParamMap["jobParameters"] += [ + { + "type": "constant", + "value": '"', + }, +] # use local IO for trf or BS if options.forceStaged or ((options.trf or runConfig.input.inBS) and not options.forceDirectIO): - taskParamMap['useLocalIO'] = 1 + taskParamMap["useLocalIO"] = 1 # use AMI to get the number of events per file if options.useAMIEventLevelSplit == True: - taskParamMap['getNumEventsInMetadata'] = True + taskParamMap["getNumEventsInMetadata"] = True # avoid VP if options.avoidVP: - taskParamMap['avoidVP'] = True + taskParamMap["avoidVP"] = True # build step if options.noBuild and not options.noCompile: pass else: - jobParameters = '-i ${IN} -o ${OUT} --sourceURL ${SURL} ' + jobParameters = "-i ${IN} -o ${OUT} --sourceURL ${SURL} " # no compile if options.noCompile: jobParameters += "--noCompile " # use CMake - if AthenaUtils.useCMake() or options.containerImage != '': + if AthenaUtils.useCMake() or options.containerImage != "": jobParameters += "--useCMake " # debug parameters - if options.queueData != '': + if options.queueData != "": jobParameters += "--overwriteQueuedata=%s " % options.queueData # set task param - taskParamMap['buildSpec'] = { - 'prodSourceLabel':'panda', - 'archiveName':archiveName, - 'jobParameters':jobParameters, - } - if options.prodSourceLabel != '': - taskParamMap['buildSpec']['prodSourceLabel'] = options.prodSourceLabel + taskParamMap["buildSpec"] = { + "prodSourceLabel": "panda", + "archiveName": archiveName, + "jobParameters": jobParameters, + } + if options.prodSourceLabel != "": + taskParamMap["buildSpec"]["prodSourceLabel"] = options.prodSourceLabel # preprocessing step # good run list -if options.goodRunListXML != '': +if options.goodRunListXML != "": jobParameters = "--goodRunListXML {0} ".format(options.goodRunListXML) - if options.goodRunDataType != '': + if options.goodRunDataType != "": jobParameters += "--goodRunListDataType {0} ".format(options.goodRunDataType) - if options.goodRunProdStep != '': + if options.goodRunProdStep != "": jobParameters += "--goodRunListProdStep {0} ".format(options.goodRunProdStep) - if options.goodRunListDS != '': + if options.goodRunListDS != "": jobParameters += "--goodRunListDS {0} ".format(options.goodRunListDS) jobParameters += "--sourceURL ${SURL} " # set task param - taskParamMap['preproSpec'] = { - 'prodSourceLabel':'panda', - 'jobParameters':jobParameters, - } - if options.prodSourceLabel != '': - taskParamMap['preproSpec']['prodSourceLabel'] = options.prodSourceLabel + taskParamMap["preproSpec"] = { + "prodSourceLabel": "panda", + "jobParameters": jobParameters, + } + if options.prodSourceLabel != "": + taskParamMap["preproSpec"]["prodSourceLabel"] = options.prodSourceLabel # merging if options.mergeOutput: - jobParameters = '-r {0} '.format(runDir) - if options.mergeScript != '': + jobParameters = "-r {0} ".format(runDir) + if options.mergeScript != "": jobParameters += '-j "{0}" '.format(options.mergeScript) if not options.noBuild: - jobParameters += '-l ${LIB} ' + jobParameters += "-l ${LIB} " else: - jobParameters += '-a {0} '.format(archiveName) + jobParameters += "-a {0} ".format(archiveName) jobParameters += "--sourceURL ${SURL} " jobParameters += "--useAthenaPackages " - if AthenaUtils.useCMake() or options.containerImage != '': + if AthenaUtils.useCMake() or options.containerImage != "": jobParameters += "--useCMake " - jobParameters += '${TRN_OUTPUT:OUTPUT} ' + jobParameters += "${TRN_OUTPUT:OUTPUT} " if options.mergeLog: - jobParameters += '${TRN_LOG_MERGE:LOG_MERGE}' + jobParameters += "${TRN_LOG_MERGE:LOG_MERGE}" else: - jobParameters += '${TRN_LOG:LOG}' - taskParamMap['mergeSpec'] = {} - taskParamMap['mergeSpec']['useLocalIO'] = 1 - taskParamMap['mergeSpec']['jobParameters'] = jobParameters - taskParamMap['mergeOutput'] = True - if options.nGBPerMergeJob != 'MAX': + jobParameters += "${TRN_LOG:LOG}" + taskParamMap["mergeSpec"] = {} + taskParamMap["mergeSpec"]["useLocalIO"] = 1 + taskParamMap["mergeSpec"]["jobParameters"] = jobParameters + taskParamMap["mergeOutput"] = True + if options.nGBPerMergeJob != "MAX": # convert to int try: options.nGBPerMergeJob = int(options.nGBPerMergeJob) @@ -2104,7 +3126,7 @@ def _onExit(dir, files, del_command): if options.nGBPerMergeJob <= 0: tmpLog.error("--nGBPerMergeJob must be positive") sys.exit(EC_Config) - taskParamMap['nGBPerMergeJob'] = options.nGBPerMergeJob + taskParamMap["nGBPerMergeJob"] = options.nGBPerMergeJob ##################################################################### @@ -2119,13 +3141,13 @@ def _onExit(dir, files, del_command): print("== parameters ==") print("Site : %s" % options.site) print("Athena : %s" % athenaVer) - if groupArea != '': + if groupArea != "": print("Group Area : %s" % groupArea) - if cacheVer != '': + if cacheVer != "": print("ProdCache : %s" % cacheVer[1:]) - if nightVer != '': + if nightVer != "": print("Nightly : %s" % nightVer[1:]) - print("cmtConfig : %s" % AthenaUtils.getCmtConfigImg(athenaVer,cacheVer,nightVer,options.cmtConfig)) + print("cmtConfig : %s" % AthenaUtils.getCmtConfigImg(athenaVer, cacheVer, nightVer, options.cmtConfig)) print("RunDir : %s" % runDir) print("jobO : %s" % jobO.lstrip()) @@ -2133,14 +3155,12 @@ def _onExit(dir, files, del_command): newTaskParamMap = taskParamMap else: # replace input and output - options.inDS = ioItem['inDS'] - options.outDS = ioItem['outDS'] - newTaskParamMap = PsubUtils.replaceInputOutput(taskParamMap, ioItem['inDS'], - ioItem['outDS'], iSubmission) + options.inDS = ioItem["inDS"] + options.outDS = ioItem["outDS"] + newTaskParamMap = PsubUtils.replaceInputOutput(taskParamMap, ioItem["inDS"], ioItem["outDS"], iSubmission) taskID = None # check outDS format - if not PsubUtils.checkOutDsName(options.outDS,options.official,nickName, - options.mergeOutput, options.verbose): + if not PsubUtils.checkOutDsName(options.outDS, options.official, nickName, options.mergeOutput, options.verbose): tmpLog.error("invalid output datasetname:%s" % options.outDS) sys.exit(EC_Config) # check task parameters @@ -2153,22 +3173,26 @@ def _onExit(dir, files, del_command): tmpKeys = list(newTaskParamMap) tmpKeys.sort() for tmpKey in tmpKeys: - print('%s : %s' % (tmpKey, newTaskParamMap[tmpKey])) + print("%s : %s" % (tmpKey, newTaskParamMap[tmpKey])) if not options.noSubmit and exitCode == 0: tmpLog.info("submit {0}".format(options.outDS)) - status,tmpOut = Client.insertTaskParams(newTaskParamMap, options.verbose, properErrorCode=True, - parent_tid=options.parentTaskID) + status, tmpOut = Client.insertTaskParams( + newTaskParamMap, + options.verbose, + properErrorCode=True, + parent_tid=options.parentTaskID, + ) # result if status != 0: tmpStr = "task submission failed with {0}".format(status) tmpLog.error(tmpStr) exitCode = EC_Submit else: - if tmpOut[0] in [0,3]: + if tmpOut[0] in [0, 3]: tmpStr = tmpOut[1] tmpLog.info(tmpStr) try: - m = re.search('jediTaskID=(\d+)', tmpStr) + m = re.search("jediTaskID=(\d+)", tmpStr) taskID = int(m.group(1)) except Exception: pass @@ -2177,18 +3201,18 @@ def _onExit(dir, files, del_command): tmpLog.error(tmpStr) exitCode = EC_Submit dumpItem = copy.deepcopy(vars(options)) - dumpItem['returnCode'] = exitCode - dumpItem['returnOut'] = tmpStr - dumpItem['jediTaskID'] = taskID + dumpItem["returnCode"] = exitCode + dumpItem["returnOut"] = tmpStr + dumpItem["jediTaskID"] = taskID if len(ioList) > 1: - dumpItem['bulkSeqNumber'] = iSubmission + dumpItem["bulkSeqNumber"] = iSubmission dumpList.append(dumpItem) # go back to current dir os.chdir(currentDir) # dump if options.dumpJson is not None: - with open(options.dumpJson, 'w') as f: + with open(options.dumpJson, "w") as f: json.dump(dumpList, f) # succeeded sys.exit(0) diff --git a/pandaclient/PrunScript.py b/pandaclient/PrunScript.py index 1a613af8..51f8797b 100644 --- a/pandaclient/PrunScript.py +++ b/pandaclient/PrunScript.py @@ -1,20 +1,27 @@ -import re +import argparse +import atexit import os -import sys +import re import shutil -import atexit -import argparse +import sys import time from pandaclient.Group_argparse import get_parser + try: from urllib import quote except ImportError: from urllib.parse import quote -import types -import json + import copy -from pandaclient.MiscUtils import commands_get_output, commands_get_status_output, parse_secondary_datasets_opt +import json +import types + +from pandaclient.MiscUtils import ( + commands_get_output, + commands_get_status_output, + parse_secondary_datasets_opt, +) try: unicode @@ -26,18 +33,17 @@ def main(get_taskparams=False, ext_args=None, dry_mode=False): # default cloud/site defaultCloud = None - defaultSite = 'AUTO' - + defaultSite = "AUTO" # error code - EC_Config = 10 - EC_Post = 50 - EC_Archive = 60 - EC_Submit = 90 + EC_Config = 10 + EC_Post = 50 + EC_Archive = 60 + EC_Submit = 90 # tweak sys.argv sys.argv.pop(0) - sys.argv.insert(0, 'prun') + sys.argv.insert(0, "prun") usage = """prun [options] @@ -50,414 +56,1148 @@ def main(get_taskparams=False, ext_args=None, dry_mode=False): """ removedOpts = [ # list of deprecated options w.r.t version 0.6.25 - "--buildInLastChunk", - "--cloud", - "--configJEM", - "--crossSite", - "--dbRunNumber", - "--disableRebrokerage", - "--enableJEM", - "--eventPickNumSites", - "--eventPickSkipDaTRI", - "--eventPickStagedDS", - "--individualOutDS", - "--libDS", - "--long", - "--manaVer", - "--myproxy", - "--outputPath", - "--provenanceID", - "--removedDS", - "--requireLFC", - "--safetySize", - "--seriesLabel", - "--skipScan", - "--transferredDS", - "--useChirpServer", - "--useContElementBoundary", - "--useGOForOutput", - "--useMana", - "--useOldStyleOutput", - "--useRucio", - "--useShortLivedReplicas", - "--useSiteGroup" + "--buildInLastChunk", + "--cloud", + "--configJEM", + "--crossSite", + "--dbRunNumber", + "--disableRebrokerage", + "--enableJEM", + "--eventPickNumSites", + "--eventPickSkipDaTRI", + "--eventPickStagedDS", + "--individualOutDS", + "--libDS", + "--long", + "--manaVer", + "--myproxy", + "--outputPath", + "--provenanceID", + "--removedDS", + "--requireLFC", + "--safetySize", + "--seriesLabel", + "--skipScan", + "--transferredDS", + "--useChirpServer", + "--useContElementBoundary", + "--useGOForOutput", + "--useMana", + "--useOldStyleOutput", + "--useRucio", + "--useShortLivedReplicas", + "--useSiteGroup", ] optP = get_parser(usage=usage, conflict_handler="resolve") optP.set_examples(examples) # command-line parameters - group_print = optP.add_group('print', 'info print') - group_prun = optP.add_group('prun', 'about prun itself') - group_config = optP.add_group('config', 'single configuration file to set multiple options') - group_input = optP.add_group('input', 'input dataset(s)/files/format') - group_output = optP.add_group('output', 'output dataset/files') - group_job = optP.add_group('job', 'job running control on grid') - group_build = optP.add_group('build', 'build/compile the package and env setup') - group_submit = optP.add_group('submit', 'job submission/site/retry') - group_evtFilter = optP.add_group('evtFilter', 'event filter such as good run and event pick') - group_expert = optP.add_group('expert', 'for experts/developers only') - - usage_containerJob="""Visit the following wiki page for examples: + group_print = optP.add_group("print", "info print") + group_prun = optP.add_group("prun", "about prun itself") + group_config = optP.add_group("config", "single configuration file to set multiple options") + group_input = optP.add_group("input", "input dataset(s)/files/format") + group_output = optP.add_group("output", "output dataset/files") + group_job = optP.add_group("job", "job running control on grid") + group_build = optP.add_group("build", "build/compile the package and env setup") + group_submit = optP.add_group("submit", "job submission/site/retry") + group_evtFilter = optP.add_group("evtFilter", "event filter such as good run and event pick") + group_expert = optP.add_group("expert", "for experts/developers only") + + usage_containerJob = """Visit the following wiki page for examples: https://twiki.cern.ch/twiki/bin/view/PanDA/PandaRun#Run_user_containers_jobs Please test the job interactively first prior to submitting to the grid. Check the following on how to test container job interactively: https://twiki.cern.ch/twiki/bin/viewauth/AtlasComputing/SingularityInAtlas """ - group_containerJob = optP.add_group('containerJob', "For container-based jobs", usage=usage_containerJob) - - optP.add_helpGroup(addHelp='Some options such as --inOutDsJson may SPAN several groups') - - group_prun.add_argument('--version',action='store_const',const=True,dest='version',default=False, - help='Displays version') - group_input.add_argument('--inDS',action='store',dest='inDS',default='', - help='Name of an input dataset or dataset container') - group_input.add_argument('--notExpandInDS', action='store_const', const=True, dest='notExpandInDS',default=False, - help='Allow jobs to use files across dataset boundaries in input dataset container') - group_input.add_argument('--notExpandSecDSs', action='store_const', const=True, dest='notExpandSecDSs',default=False, - help='Use files across dataset boundaries in secondary dataset containers') - group_input.add_argument('--inDsTxt',action='store',dest='inDsTxt',default='', - help='A text file which contains the list of datasets to run over. Newlines are replaced by commas and the result is set to --inDS. Lines starting with # are ignored') - group_input.add_argument('--respectLB', action='store_const', const=True, dest='respectLB', default=False, - help='To generate jobs repecting lumiblock boundaries') - group_output.add_argument('--outDS',action='store',dest='outDS',default='', - help='Name of an output dataset. OUTDS will contain all output files') - group_output.add_argument('--outputs',action='store',dest='outputs',default='', - help='Names of output files. Comma separated. e.g., --outputs out1.dat,out2.txt. You can specify a suffix for each output container like :. e.g., --outputs AAA:out1.dat,BBB:out2.txt. In this case output container names are outDS_AAA/ and outDS_BBB/ instead of outDS_out1.dat/ and outDS_out2.txt/') - group_output.add_argument('--mergeOutput', action='store_const', const=True, dest='mergeOutput', default=False, - help="merge output files") - group_output.add_argument('--mergeLog', action='store_const', const=True, dest='mergeLog', default=False, - help="merge log files. relevant only with --mergeOutput") - group_output.add_argument('--destSE',action='store', dest='destSE',default='', - help='Destination strorage element') - group_output.add_argument('--noSeparateLog', action='store_const', const=True, dest='noSeparateLog', default=False, - help="Set this option when jobs don't produce log files") + group_containerJob = optP.add_group("containerJob", "For container-based jobs", usage=usage_containerJob) + + optP.add_helpGroup(addHelp="Some options such as --inOutDsJson may SPAN several groups") + + group_prun.add_argument( + "--version", + action="store_const", + const=True, + dest="version", + default=False, + help="Displays version", + ) + group_input.add_argument( + "--inDS", + action="store", + dest="inDS", + default="", + help="Name of an input dataset or dataset container", + ) + group_input.add_argument( + "--notExpandInDS", + action="store_const", + const=True, + dest="notExpandInDS", + default=False, + help="Allow jobs to use files across dataset boundaries in input dataset container", + ) + group_input.add_argument( + "--notExpandSecDSs", + action="store_const", + const=True, + dest="notExpandSecDSs", + default=False, + help="Use files across dataset boundaries in secondary dataset containers", + ) + group_input.add_argument( + "--inDsTxt", + action="store", + dest="inDsTxt", + default="", + help="A text file which contains the list of datasets to run over. Newlines are replaced by commas and the result is set to --inDS. Lines starting with # are ignored", + ) + group_input.add_argument( + "--respectLB", + action="store_const", + const=True, + dest="respectLB", + default=False, + help="To generate jobs repecting lumiblock boundaries", + ) + group_output.add_argument( + "--outDS", + action="store", + dest="outDS", + default="", + help="Name of an output dataset. OUTDS will contain all output files", + ) + group_output.add_argument( + "--outputs", + action="store", + dest="outputs", + default="", + help="Names of output files. Comma separated. e.g., --outputs out1.dat,out2.txt. You can specify a suffix for each output container like :. e.g., --outputs AAA:out1.dat,BBB:out2.txt. In this case output container names are outDS_AAA/ and outDS_BBB/ instead of outDS_out1.dat/ and outDS_out2.txt/", + ) + group_output.add_argument( + "--mergeOutput", + action="store_const", + const=True, + dest="mergeOutput", + default=False, + help="merge output files", + ) + group_output.add_argument( + "--mergeLog", + action="store_const", + const=True, + dest="mergeLog", + default=False, + help="merge log files. relevant only with --mergeOutput", + ) + group_output.add_argument( + "--destSE", + action="store", + dest="destSE", + default="", + help="Destination strorage element", + ) + group_output.add_argument( + "--noSeparateLog", + action="store_const", + const=True, + dest="noSeparateLog", + default=False, + help="Set this option when jobs don't produce log files", + ) # the option is shared by both groups, group_input and group_output - action = group_input.add_argument('--inOutDsJson', action='store', dest='inOutDsJson', default='', - help="A json file to specify input and output datasets for bulk submission. " - "It contains a json dump of [{'inDS': a comma-concatenated input dataset names, " - "'outDS': output dataset name}, ...]. " - "When this option is used --bulkSubmission is automatically set internally.") + action = group_input.add_argument( + "--inOutDsJson", + action="store", + dest="inOutDsJson", + default="", + help="A json file to specify input and output datasets for bulk submission. " + "It contains a json dump of [{'inDS': a comma-concatenated input dataset names, " + "'outDS': output dataset name}, ...]. " + "When this option is used --bulkSubmission is automatically set internally.", + ) group_output.shareWithMe(action) - group_evtFilter.add_argument('--goodRunListXML', action='store', dest='goodRunListXML', default='', - help='Good Run List XML which will be converted to datasets by AMI') - group_evtFilter.add_argument('--goodRunListDataType', action='store', dest='goodRunDataType', default='', - help='specify data type when converting Good Run List XML to datasets, e.g, AOD (default)') - group_evtFilter.add_argument('--goodRunListProdStep', action='store', dest='goodRunProdStep', default='', - help='specify production step when converting Good Run List to datasets, e.g, merge (default)') - action = group_evtFilter.add_argument('--goodRunListDS', action='store', dest='goodRunListDS', default='', - help='A comma-separated list of pattern strings. Datasets which are converted from Good Run List XML will be used when they match with one of the pattern strings. Either \ or "" is required when a wild-card is used. If this option is omitted all datasets will be used') + group_evtFilter.add_argument( + "--goodRunListXML", + action="store", + dest="goodRunListXML", + default="", + help="Good Run List XML which will be converted to datasets by AMI", + ) + group_evtFilter.add_argument( + "--goodRunListDataType", + action="store", + dest="goodRunDataType", + default="", + help="specify data type when converting Good Run List XML to datasets, e.g, AOD (default)", + ) + group_evtFilter.add_argument( + "--goodRunListProdStep", + action="store", + dest="goodRunProdStep", + default="", + help="specify production step when converting Good Run List to datasets, e.g, merge (default)", + ) + action = group_evtFilter.add_argument( + "--goodRunListDS", + action="store", + dest="goodRunListDS", + default="", + help='A comma-separated list of pattern strings. Datasets which are converted from Good Run List XML will be used when they match with one of the pattern strings. Either \ or "" is required when a wild-card is used. If this option is omitted all datasets will be used', + ) group_input.shareWithMe(action) - group_evtFilter.add_argument('--eventPickEvtList',action='store',dest='eventPickEvtList',default='', - help='a file name which contains a list of runs/events for event picking') - group_evtFilter.add_argument('--eventPickDataType',action='store',dest='eventPickDataType',default='', - help='type of data for event picking. one of AOD,ESD,RAW') - group_evtFilter.add_argument('--ei_api',action='store',dest='ei_api',default='', - help='flag to signalise mc in event picking') - group_evtFilter.add_argument('--eventPickStreamName',action='store',dest='eventPickStreamName',default='', - help='stream name for event picking. e.g., physics_CosmicCaloEM') - action = group_evtFilter.add_argument('--eventPickDS',action='store',dest='eventPickDS',default='', - help='A comma-separated list of pattern strings. Datasets which are converted from the run/event list will be used when they match with one of the pattern strings. Either \ or "" is required when a wild-card is used. e.g., data\*') + group_evtFilter.add_argument( + "--eventPickEvtList", + action="store", + dest="eventPickEvtList", + default="", + help="a file name which contains a list of runs/events for event picking", + ) + group_evtFilter.add_argument( + "--eventPickDataType", + action="store", + dest="eventPickDataType", + default="", + help="type of data for event picking. one of AOD,ESD,RAW", + ) + group_evtFilter.add_argument( + "--ei_api", + action="store", + dest="ei_api", + default="", + help="flag to signalise mc in event picking", + ) + group_evtFilter.add_argument( + "--eventPickStreamName", + action="store", + dest="eventPickStreamName", + default="", + help="stream name for event picking. e.g., physics_CosmicCaloEM", + ) + action = group_evtFilter.add_argument( + "--eventPickDS", + action="store", + dest="eventPickDS", + default="", + help='A comma-separated list of pattern strings. Datasets which are converted from the run/event list will be used when they match with one of the pattern strings. Either \ or "" is required when a wild-card is used. e.g., data\*', + ) group_input.shareWithMe(action) - group_evtFilter.add_argument('--eventPickAmiTag',action='store',dest='eventPickAmiTag',default='', - help='AMI tag used to match TAG collections names. This option is required when you are interested in older data than the latest one. Either \ or "" is required when a wild-card is used. e.g., f2\*') - group_evtFilter.add_argument('--eventPickWithGUID',action='store_const',const=True,dest='eventPickWithGUID',default=False, - help='Using GUIDs together with run and event numbers in eventPickEvtList to skip event lookup') - - group_submit.add_argument('--express', action='store_const',const=True,dest='express',default=False, - help="Send the job using express quota to have higher priority. The number of express subjobs in the queue and the total execution time used by express subjobs are limited (a few subjobs and several hours per day, respectively). This option is intended to be used for quick tests before large submission. Note that buildXYZ is not included in quota calculation. If this option is used when quota has already exceeded, the panda server will ignore the option so that subjobs have normal priorities. Also, if you submit 1 buildXYZ and N runXYZ subjobs when you only have quota of M (M < N), only the first M runXYZ subjobs will have higher priorities") - group_print.add_argument('--debugMode', action='store_const',const=True,dest='debugMode',default=False, - help="Send the job with the debug mode on. If this option is specified the subjob will send stdout to the panda monitor every 5 min. The number of debug subjobs per user is limited. When this option is used and the quota has already exceeded, the panda server supresses the option so that subjobs will run without the debug mode. If you submit multiple subjobs in a single job, only the first subjob will set the debug mode on. Note that you can turn the debug mode on/off by using pbook after jobs are submitted" ) - group_output.add_argument('--addNthFieldOfInDSToLFN',action='store',dest='addNthFieldOfInDSToLFN',default='', - help="A middle name is added to LFNs of output files when they are produced from one dataset in the input container or input dataset list. The middle name is extracted from the dataset name. E.g., if --addNthFieldOfInDSToLFN=2 and the dataset name is data10_7TeV.00160387.physics_Muon..., 00160387 is extracted and LFN is something like user.hoge.TASKID.00160387.blah. Concatenate multiple field numbers with commas if necessary, e.g., --addNthFieldOfInDSToLFN=2,6.") - group_output.add_argument('--addNthFieldOfInFileToLFN',action='store',dest='addNthFieldOfInFileToLFN',default='', - help="A middle name is added to LFNs of output files similarly as --addNthFieldOfInDSToLFN, but strings are extracted from input file names") - group_build.add_argument('--followLinks',action='store_const',const=True,dest='followLinks',default=False, - help="Resolve symlinks to directories when building the input tarball. This option requires python2.6 or higher") + group_evtFilter.add_argument( + "--eventPickAmiTag", + action="store", + dest="eventPickAmiTag", + default="", + help='AMI tag used to match TAG collections names. This option is required when you are interested in older data than the latest one. Either \ or "" is required when a wild-card is used. e.g., f2\*', + ) + group_evtFilter.add_argument( + "--eventPickWithGUID", + action="store_const", + const=True, + dest="eventPickWithGUID", + default=False, + help="Using GUIDs together with run and event numbers in eventPickEvtList to skip event lookup", + ) + + group_submit.add_argument( + "--express", + action="store_const", + const=True, + dest="express", + default=False, + help="Send the job using express quota to have higher priority. The number of express subjobs in the queue and the total execution time used by express subjobs are limited (a few subjobs and several hours per day, respectively). This option is intended to be used for quick tests before large submission. Note that buildXYZ is not included in quota calculation. If this option is used when quota has already exceeded, the panda server will ignore the option so that subjobs have normal priorities. Also, if you submit 1 buildXYZ and N runXYZ subjobs when you only have quota of M (M < N), only the first M runXYZ subjobs will have higher priorities", + ) + group_print.add_argument( + "--debugMode", + action="store_const", + const=True, + dest="debugMode", + default=False, + help="Send the job with the debug mode on. If this option is specified the subjob will send stdout to the panda monitor every 5 min. The number of debug subjobs per user is limited. When this option is used and the quota has already exceeded, the panda server supresses the option so that subjobs will run without the debug mode. If you submit multiple subjobs in a single job, only the first subjob will set the debug mode on. Note that you can turn the debug mode on/off by using pbook after jobs are submitted", + ) + group_output.add_argument( + "--addNthFieldOfInDSToLFN", + action="store", + dest="addNthFieldOfInDSToLFN", + default="", + help="A middle name is added to LFNs of output files when they are produced from one dataset in the input container or input dataset list. The middle name is extracted from the dataset name. E.g., if --addNthFieldOfInDSToLFN=2 and the dataset name is data10_7TeV.00160387.physics_Muon..., 00160387 is extracted and LFN is something like user.hoge.TASKID.00160387.blah. Concatenate multiple field numbers with commas if necessary, e.g., --addNthFieldOfInDSToLFN=2,6.", + ) + group_output.add_argument( + "--addNthFieldOfInFileToLFN", + action="store", + dest="addNthFieldOfInFileToLFN", + default="", + help="A middle name is added to LFNs of output files similarly as --addNthFieldOfInDSToLFN, but strings are extracted from input file names", + ) + group_build.add_argument( + "--followLinks", + action="store_const", + const=True, + dest="followLinks", + default=False, + help="Resolve symlinks to directories when building the input tarball. This option requires python2.6 or higher", + ) # I do not know which group "--useHomeDir" should go? - group_build.add_argument('--useHomeDir', action='store_const',const=True,dest='useHomeDir',default=False, - help='execute prun just under the HOME dir') - group_build.add_argument('--noBuild', action='store_const',const=True,dest='noBuild',default=False, - help='Skip buildGen') - group_submit.add_argument('--bulkSubmission', action='store_const', const=True, dest='bulkSubmission', default=False, - help='Bulk submit tasks. When this option is used, --inOutDsJson is required while --inDS and --outDS are ignored. It is possible to use %%DATASET_IN and %%DATASET_OUT in --exec which are replaced with actual dataset names when tasks are submitted, and %%BULKSEQNUMBER which is replaced with a sequential number of tasks in the bulk submission') - group_build.add_argument('--noCompile', action='store_const',const=True,dest='noCompile',default=False, - help='Just upload a tarball in the build step to avoid the tighter size limit imposed by --noBuild. The tarball contains binaries compiled on your local computer, so that compilation is skipped in the build step on remote WN') - group_input.add_argument('--secondaryDSs',action='store',dest='secondaryDSs',default='', - help='List of secondary datasets when the job requires multiple inputs. See PandaRun wiki page for detail') - group_input.add_argument('--reusableSecondary',action='store',dest='reusableSecondary',default='', - help='A comma-separated list of secondary streams which reuse files when all files are used') - group_submit.add_argument('--site',action='store',dest='site',default=defaultSite, - help='Site name where jobs are sent. If omitted, jobs are automatically sent to sites where input is available. A comma-separated list of sites can be specified (e.g. siteA,siteB,siteC), so that best sites are chosen from the given site list. If AUTO is appended at the end of the list (e.g. siteA,siteB,siteC,AUTO), jobs are sent to any sites if input is not found in the previous sites') - group_input.add_argument('--match',action='store',dest='match',default='', - help='Use only files matching with given pattern') - group_input.add_argument('--antiMatch',action='store',dest='antiMatch',default='', - help='Skip files matching with given pattern') - group_input.add_argument('--notSkipLog',action='store_const',const=True,dest='notSkipLog',default=False, - help="Don't skip log files in input datasets (obsolete. use --useLogAsInput instead)") - group_submit.add_argument('--memory',action='store',dest='memory',default=-1,type=int, - help='Required memory size in MB per core. e.g., for 1GB per core --memory 1024') - group_submit.add_argument('--fixedRamCount', action='store_const', const=True, dest='fixedRamCount', default=False, - help='Use fixed memory size instead of estimated memory size') - group_submit.add_argument('--nCore', action='store', dest='nCore', default=-1, type=int, - help='The number of CPU cores. Note that the system distinguishes only nCore=1 and nCore>1. This means that even if you set nCore=2 jobs can go to sites with nCore=8 and your application must use the 8 cores there. The number of available cores is defined in an environment variable, $ATHENA_PROC_NUMBER, on WNs. Your application must check the env variable when starting up to dynamically change the number of cores') - group_submit.add_argument('--maxCpuCount', action='store', dest='maxCpuCount', default=0, type=int, - help=argparse.SUPPRESS) - group_expert.add_argument('--noLoopingCheck', action='store_const', const=True, dest='noLoopingCheck', default=False, - help="Disable looping job check") - group_submit.add_argument('--useDirectIOSites', action='store_const', const=True, dest='useDirectIOSites', default=False, - help="Use only sites which use directIO to read input files") - group_submit.add_argument('--outDiskCount', action='store', dest='outDiskCount', default=None, type=int, - help="Expected output size in kB per 1 MB of input. The system automatically calculates this " - "value using successful jobs and the value contains a safety offset (100kB). " - "Use this option to disable it when jobs cannot have enough input files " - "due to the offset") - - group_output.add_argument('--official',action='store_const',const=True,dest='official',default=False, - help='Produce official dataset') - group_output.add_argument('--unlimitNumOutputs', action='store_const', const=True, dest='unlimitNumOutputs', default=False, - help='Remove the limit on the number of outputs. Note that having too many outputs per job causes a severe load on the system. You may be banned if you carelessly use this option') - group_output.add_argument('--descriptionInLFN',action='store',dest='descriptionInLFN',default='', - help='LFN is user.nickname.jobsetID.something (e.g. user.harumaki.12345.AOD._00001.pool) by default. This option allows users to put a description string into LFN. i.e., user.nickname.jobsetID.description.something') - group_build.add_argument('--useRootCore',action='store_const',const=True,dest='useRootCore',default=False, - help='Use RootCore. See PandaRun wiki page for detail') - group_build.add_argument('--useAthenaPackages',action='store_const',const=True,dest='useAthenaPackages',default=False, - help='Use Athena packages. See PandaRun wiki page for detail') - group_build.add_argument('--gluePackages', action='store', dest='gluePackages', default='', - help='list of glue packages which pathena cannot find due to empty i686-slc4-gcc34-opt. e.g., External/AtlasHepMC,External/Lhapdf') - group_input.add_argument('--nFiles',action='store',dest='nFiles',default=0,type=int, - help='Use a limited number of files in the input dataset') - group_input.add_argument('--nSkipFiles',action='store',dest='nSkipFiles',default=0,type=int, - help='Skip N files in the input dataset') - group_job.add_argument('--exec',action='store',dest='jobParams',default='', - help='execution string. e.g., --exec "./myscript arg1 arg2"') - group_output.add_argument('--execWithRealFileNames', action='store_const', const=True, dest='execWithRealFileNames', - default=False, - help='Run the execution string with real output filenames') - group_job.add_argument('--nFilesPerJob',action='store',dest='nFilesPerJob',default=None,type=int, - help='Number of files on which each sub-job runs (default 50). Note that this is the number of files per sub-job in the primary dataset even if --secondaryDSs is used') - group_job.add_argument('--nJobs',action='store',dest='nJobs',default=-1,type=int, - help='Maximum number of sub-jobs. If the number of input files (N_in) is less than nJobs*nFilesPerJob, only N_in/nFilesPerJob sub-jobs will be instantiated') - group_job.add_argument('--nEvents',action='store',dest='nEvents',default=-1,type=int, - help='The total number of events to be processed. This option is considered only when either --inDS or --pfnList is not used') - group_job.add_argument('--nEventsPerJob', action='store', dest='nEventsPerJob', default=-1, type=int, - help='Number of events per subjob. This is used mainly for job splitting. If you set nEventsPerFile, the total number of subjobs is nEventsPerFile*nFiles/nEventsPerJob. Otherwise, it gets from rucio the number of events in each input file and subjobs are created accordingly. Note that you need to explicitly specify in --exec some parameters like %%MAXEVENTS, %%SKIPEVENTS and %%FIRSTEVENT and your application needs to process only an event chunk accordingly, to avoid subjobs processing the same events. All parameters descibed in https://twiki.cern.ch/twiki/bin/view/PanDA/PandaAthena#example_8_How_to_run_production are available') - action = group_job.add_argument('--nEventsPerFile', action='store', dest='nEventsPerFile', default=0, type=int, - help='Number of events per file') + group_build.add_argument( + "--useHomeDir", + action="store_const", + const=True, + dest="useHomeDir", + default=False, + help="execute prun just under the HOME dir", + ) + group_build.add_argument( + "--noBuild", + action="store_const", + const=True, + dest="noBuild", + default=False, + help="Skip buildGen", + ) + group_submit.add_argument( + "--bulkSubmission", + action="store_const", + const=True, + dest="bulkSubmission", + default=False, + help="Bulk submit tasks. When this option is used, --inOutDsJson is required while --inDS and --outDS are ignored. It is possible to use %%DATASET_IN and %%DATASET_OUT in --exec which are replaced with actual dataset names when tasks are submitted, and %%BULKSEQNUMBER which is replaced with a sequential number of tasks in the bulk submission", + ) + group_build.add_argument( + "--noCompile", + action="store_const", + const=True, + dest="noCompile", + default=False, + help="Just upload a tarball in the build step to avoid the tighter size limit imposed by --noBuild. The tarball contains binaries compiled on your local computer, so that compilation is skipped in the build step on remote WN", + ) + group_input.add_argument( + "--secondaryDSs", + action="store", + dest="secondaryDSs", + default="", + help="List of secondary datasets when the job requires multiple inputs. See PandaRun wiki page for detail", + ) + group_input.add_argument( + "--reusableSecondary", + action="store", + dest="reusableSecondary", + default="", + help="A comma-separated list of secondary streams which reuse files when all files are used", + ) + group_submit.add_argument( + "--site", + action="store", + dest="site", + default=defaultSite, + help="Site name where jobs are sent. If omitted, jobs are automatically sent to sites where input is available. A comma-separated list of sites can be specified (e.g. siteA,siteB,siteC), so that best sites are chosen from the given site list. If AUTO is appended at the end of the list (e.g. siteA,siteB,siteC,AUTO), jobs are sent to any sites if input is not found in the previous sites", + ) + group_input.add_argument( + "--match", + action="store", + dest="match", + default="", + help="Use only files matching with given pattern", + ) + group_input.add_argument( + "--antiMatch", + action="store", + dest="antiMatch", + default="", + help="Skip files matching with given pattern", + ) + group_input.add_argument( + "--notSkipLog", + action="store_const", + const=True, + dest="notSkipLog", + default=False, + help="Don't skip log files in input datasets (obsolete. use --useLogAsInput instead)", + ) + group_submit.add_argument( + "--memory", + action="store", + dest="memory", + default=-1, + type=int, + help="Required memory size in MB per core. e.g., for 1GB per core --memory 1024", + ) + group_submit.add_argument( + "--fixedRamCount", + action="store_const", + const=True, + dest="fixedRamCount", + default=False, + help="Use fixed memory size instead of estimated memory size", + ) + group_submit.add_argument( + "--nCore", + action="store", + dest="nCore", + default=-1, + type=int, + help="The number of CPU cores. Note that the system distinguishes only nCore=1 and nCore>1. This means that even if you set nCore=2 jobs can go to sites with nCore=8 and your application must use the 8 cores there. The number of available cores is defined in an environment variable, $ATHENA_PROC_NUMBER, on WNs. Your application must check the env variable when starting up to dynamically change the number of cores", + ) + group_submit.add_argument( + "--maxCpuCount", + action="store", + dest="maxCpuCount", + default=0, + type=int, + help=argparse.SUPPRESS, + ) + group_expert.add_argument( + "--noLoopingCheck", + action="store_const", + const=True, + dest="noLoopingCheck", + default=False, + help="Disable looping job check", + ) + group_submit.add_argument( + "--useDirectIOSites", + action="store_const", + const=True, + dest="useDirectIOSites", + default=False, + help="Use only sites which use directIO to read input files", + ) + group_submit.add_argument( + "--outDiskCount", + action="store", + dest="outDiskCount", + default=None, + type=int, + help="Expected output size in kB per 1 MB of input. The system automatically calculates this " + "value using successful jobs and the value contains a safety offset (100kB). " + "Use this option to disable it when jobs cannot have enough input files " + "due to the offset", + ) + + group_output.add_argument( + "--official", + action="store_const", + const=True, + dest="official", + default=False, + help="Produce official dataset", + ) + group_output.add_argument( + "--unlimitNumOutputs", + action="store_const", + const=True, + dest="unlimitNumOutputs", + default=False, + help="Remove the limit on the number of outputs. Note that having too many outputs per job causes a severe load on the system. You may be banned if you carelessly use this option", + ) + group_output.add_argument( + "--descriptionInLFN", + action="store", + dest="descriptionInLFN", + default="", + help="LFN is user.nickname.jobsetID.something (e.g. user.harumaki.12345.AOD._00001.pool) by default. This option allows users to put a description string into LFN. i.e., user.nickname.jobsetID.description.something", + ) + group_build.add_argument( + "--useRootCore", + action="store_const", + const=True, + dest="useRootCore", + default=False, + help="Use RootCore. See PandaRun wiki page for detail", + ) + group_build.add_argument( + "--useAthenaPackages", + action="store_const", + const=True, + dest="useAthenaPackages", + default=False, + help="Use Athena packages. See PandaRun wiki page for detail", + ) + group_build.add_argument( + "--gluePackages", + action="store", + dest="gluePackages", + default="", + help="list of glue packages which pathena cannot find due to empty i686-slc4-gcc34-opt. e.g., External/AtlasHepMC,External/Lhapdf", + ) + group_input.add_argument( + "--nFiles", + action="store", + dest="nFiles", + default=0, + type=int, + help="Use a limited number of files in the input dataset", + ) + group_input.add_argument( + "--nSkipFiles", + action="store", + dest="nSkipFiles", + default=0, + type=int, + help="Skip N files in the input dataset", + ) + group_job.add_argument( + "--exec", + action="store", + dest="jobParams", + default="", + help='execution string. e.g., --exec "./myscript arg1 arg2"', + ) + group_output.add_argument( + "--execWithRealFileNames", + action="store_const", + const=True, + dest="execWithRealFileNames", + default=False, + help="Run the execution string with real output filenames", + ) + group_job.add_argument( + "--nFilesPerJob", + action="store", + dest="nFilesPerJob", + default=None, + type=int, + help="Number of files on which each sub-job runs (default 50). Note that this is the number of files per sub-job in the primary dataset even if --secondaryDSs is used", + ) + group_job.add_argument( + "--nJobs", + action="store", + dest="nJobs", + default=-1, + type=int, + help="Maximum number of sub-jobs. If the number of input files (N_in) is less than nJobs*nFilesPerJob, only N_in/nFilesPerJob sub-jobs will be instantiated", + ) + group_job.add_argument( + "--nEvents", + action="store", + dest="nEvents", + default=-1, + type=int, + help="The total number of events to be processed. This option is considered only when either --inDS or --pfnList is not used", + ) + group_job.add_argument( + "--nEventsPerJob", + action="store", + dest="nEventsPerJob", + default=-1, + type=int, + help="Number of events per subjob. This is used mainly for job splitting. If you set nEventsPerFile, the total number of subjobs is nEventsPerFile*nFiles/nEventsPerJob. Otherwise, it gets from rucio the number of events in each input file and subjobs are created accordingly. Note that you need to explicitly specify in --exec some parameters like %%MAXEVENTS, %%SKIPEVENTS and %%FIRSTEVENT and your application needs to process only an event chunk accordingly, to avoid subjobs processing the same events. All parameters descibed in https://twiki.cern.ch/twiki/bin/view/PanDA/PandaAthena#example_8_How_to_run_production are available", + ) + action = group_job.add_argument( + "--nEventsPerFile", + action="store", + dest="nEventsPerFile", + default=0, + type=int, + help="Number of events per file", + ) group_input.shareWithMe(action) - group_job.add_argument('--nEventsPerChunk',action='store',dest='nEventsPerChunk',default=-1,type=int, - help='Set granuarity to split events. The number of events per job is multiples of nEventsPerChunk. This option is considered only when --nEvents is used but --nJobs is not used. If this option is not set, nEvents/20 is used as nEventsPerChunk') - group_job.add_argument('--nGBPerJob',action='store',dest='nGBPerJob',default=-1, - help='Instantiate one sub job per NGBPERJOB GB of input files. --nGBPerJob=MAX sets the size to the default maximum value') - group_build.add_argument('--maxFileSize',action='store',dest='maxFileSize',default=1024*1024,type=int, - help='Maximum size of files to be sent to WNs (default 1024*1024B)') - group_build.add_argument('--athenaTag',action='store',dest='athenaTag',default='', - help='Tags to setup Athena on remote WNs, e.g., --athenaTag=AtlasProduction,14.2.24.3') - group_build.add_argument('--rootVer',action='store',dest='rootVer',default='', - help='Specify a ROOT version which is not included in Athena, e.g., --rootVer=5.28/00' ) - group_build.add_argument('--workDir',action='store',dest='workDir',default='.', - help='All files under WORKDIR will be transfered to WNs (default=./)') - group_build.add_argument('--extFile',action='store',dest='extFile',default='', - help='root or large files under WORKDIR are not sent to WNs by default. If you want to send some skipped files, specify their names, e.g., data.root,data.tgz') - group_build.add_argument('--excludeFile',action='store',dest='excludeFile',default='', - help='specify a comma-separated string to exclude files and/or directories when gathering files in local working area. Either \ or "" is required when a wildcard is used. e.g., doc,\*.C') - group_input.add_argument('--inputFileList', action='store', dest='inputFileListName', default='', - help='A local file which specifies names of files to be used in the input dataset. ' - 'One filename per line in the the local file') - action = group_job.add_argument('--allowNoOutput',action='store',dest='allowNoOutput',default='', - help='A comma-separated list of regexp patterns. Output files are allowed not to be produced if their filenames match with one of regexp patterns. Jobs go to finish even if they are not produced on WN') + group_job.add_argument( + "--nEventsPerChunk", + action="store", + dest="nEventsPerChunk", + default=-1, + type=int, + help="Set granuarity to split events. The number of events per job is multiples of nEventsPerChunk. This option is considered only when --nEvents is used but --nJobs is not used. If this option is not set, nEvents/20 is used as nEventsPerChunk", + ) + group_job.add_argument( + "--nGBPerJob", + action="store", + dest="nGBPerJob", + default=-1, + help="Instantiate one sub job per NGBPERJOB GB of input files. --nGBPerJob=MAX sets the size to the default maximum value", + ) + group_build.add_argument( + "--maxFileSize", + action="store", + dest="maxFileSize", + default=1024 * 1024, + type=int, + help="Maximum size of files to be sent to WNs (default 1024*1024B)", + ) + group_build.add_argument( + "--athenaTag", + action="store", + dest="athenaTag", + default="", + help="Tags to setup Athena on remote WNs, e.g., --athenaTag=AtlasProduction,14.2.24.3", + ) + group_build.add_argument( + "--rootVer", + action="store", + dest="rootVer", + default="", + help="Specify a ROOT version which is not included in Athena, e.g., --rootVer=5.28/00", + ) + group_build.add_argument( + "--workDir", + action="store", + dest="workDir", + default=".", + help="All files under WORKDIR will be transfered to WNs (default=./)", + ) + group_build.add_argument( + "--extFile", + action="store", + dest="extFile", + default="", + help="root or large files under WORKDIR are not sent to WNs by default. If you want to send some skipped files, specify their names, e.g., data.root,data.tgz", + ) + group_build.add_argument( + "--excludeFile", + action="store", + dest="excludeFile", + default="", + help='specify a comma-separated string to exclude files and/or directories when gathering files in local working area. Either \ or "" is required when a wildcard is used. e.g., doc,\*.C', + ) + group_input.add_argument( + "--inputFileList", + action="store", + dest="inputFileListName", + default="", + help="A local file which specifies names of files to be used in the input dataset. " "One filename per line in the the local file", + ) + action = group_job.add_argument( + "--allowNoOutput", + action="store", + dest="allowNoOutput", + default="", + help="A comma-separated list of regexp patterns. Output files are allowed not to be produced if their filenames match with one of regexp patterns. Jobs go to finish even if they are not produced on WN", + ) group_output.shareWithMe(action) - group_submit.add_argument('--excludedSite', action='append', dest='excludedSite', default=[], - help="A comma-separated list of sites which are not used for site section, " - "e.g., ABC,OPQ*,XYZ which excludes ABC, XYZ, and OPQ due to the wildcard") - group_input.add_argument('--useLogAsInput',action='store_const',const=True,dest='useLogAsInput',default=False, - help="log.tgz files in inDS are ignored by default. This option allows log files to be used as input") - group_submit.add_argument('--noSubmit',action='store_const',const=True,dest='noSubmit',default=False, - help="Don't submit jobs") - group_submit.add_argument('--prodSourceLabel', action='store', dest='prodSourceLabel', default='', - help="set prodSourceLabel") - group_submit.add_argument('--processingType', action='store', dest='processingType', default='prun', - help="set processingType") - group_submit.add_argument('--workingGroup', action='store', dest='workingGroup', default=None, - help="set workingGroup") - group_build.add_argument('--tmpDir',action='store',dest='tmpDir',default='', - help='Temporary directory where an archive file is created') - group_build.add_argument('--voms', action='store', dest='vomsRoles', default=None, - help="generate proxy with paticular roles. e.g., atlas:/atlas/ca/Role=production,atlas:/atlas/fr/Role=pilot") - group_build.add_argument('--vo', action='store', dest='vo', default=None, - help="virtual orgnaiztion name") - group_submit.add_argument('--noEmail', action='store_const', const=True, dest='noEmail', default=False, - help='Suppress email notification') - group_prun.add_argument('--update', action='store_const', const=True, dest='update', default=False, - help='Update panda-client to the latest version') - group_output.add_argument('--spaceToken',action='store',dest='spaceToken',default='', - help='spacetoken for outputs. e.g., ATLASLOCALGROUPDISK') - group_expert.add_argument('--expertOnly_skipScout', action='store_const',const=True,dest='skipScout',default=False, - help=argparse.SUPPRESS) - group_job.add_argument('--respectSplitRule', action='store_const',const=True,dest='respectSplitRule',default=False, - help="force scout jobs to follow split rules like nGBPerJob") - group_job.add_argument('--nGBPerMergeJob', action='store', dest='nGBPerMergeJob', default='MAX', - help='Instantiate one merge job per NGBPERMERGEJOB GB of pre-merged files') - group_expert.add_argument('--devSrv',action='store_const',const=True,dest='devSrv',default=False, - help="Please don't use this option. Only for developers to use the dev panda server") - group_expert.add_argument('--intrSrv', action='store_const', const=True, dest='intrSrv', default=False, - help="Please don't use this option. Only for developers to use the intr panda server") - group_expert.add_argument('--persistentFile', action='store', dest='persistentFile', default='', - help="Please don't use this option. Only for junction steps " - "to keep persistent information in workflows") - group_build.add_argument('--outTarBall', action='store', dest='outTarBall', default='', - help='Save a gzipped tarball of local files which is the input to buildXYZ') - group_build.add_argument('--inTarBall', action='store', dest='inTarBall', default='', - help='Use a gzipped tarball of local files as input to buildXYZ. Generall the tarball is created by using --outTarBall') - group_build.add_argument('--bexec',action='store',dest='bexec',default='', - help='execution string for build stage. e.g., --bexec "make"') - group_submit.add_argument('--disableAutoRetry',action='store_const',const=True,dest='disableAutoRetry',default=False, - help='disable automatic job retry on the server side') - group_job.add_argument('--maxNFilesPerJob',action='store',dest='maxNFilesPerJob',default=200,type=int, - help='The maximum number of files per job is 200 by default since too many input files result in a too long command-line argument on WN which crashes the job. This option relax the limit. In many cases it is better to use this option together with --writeInputToTxt') - group_input.add_argument('--writeInputToTxt',action='store',dest='writeInputToTxt',default='', - help='Write the input file list to a file so that your application gets the list from the file instead of stdin. The argument is a comma separated list of StreamName:FileName. e.g., IN:input1.txt,IN2:input2.txt') - group_build.add_argument('--dbRelease',action='store',dest='dbRelease',default='', - help='DBRelease or CDRelease (DatasetName:FileName). e.g., ddo.000001.Atlas.Ideal.DBRelease.v050101:DBRelease-5.1.1.tar.gz. If --dbRelease=LATEST, the latest DBRelease is used. Most likely the --useAthenaPackages or --athenaTag option is required to setup Athena runtime on WN') - group_build.add_argument('--notExpandDBR',action='store_const',const=True,dest='notExpandDBR',default=False, - help='By default, DBRelease.tar.gz is expanded on WN and gets deleted after changing environment variables accordingly. If you need tar.gz, use this option') - action = group_job.add_argument('--mergeScript',action='store',dest='mergeScript',default='', - help='Specify user-defied script execution string for output merging') + group_submit.add_argument( + "--excludedSite", + action="append", + dest="excludedSite", + default=[], + help="A comma-separated list of sites which are not used for site section, " + "e.g., ABC,OPQ*,XYZ which excludes ABC, XYZ, and OPQ due to the wildcard", + ) + group_input.add_argument( + "--useLogAsInput", + action="store_const", + const=True, + dest="useLogAsInput", + default=False, + help="log.tgz files in inDS are ignored by default. This option allows log files to be used as input", + ) + group_submit.add_argument( + "--noSubmit", + action="store_const", + const=True, + dest="noSubmit", + default=False, + help="Don't submit jobs", + ) + group_submit.add_argument( + "--prodSourceLabel", + action="store", + dest="prodSourceLabel", + default="", + help="set prodSourceLabel", + ) + group_submit.add_argument( + "--processingType", + action="store", + dest="processingType", + default="prun", + help="set processingType", + ) + group_submit.add_argument( + "--workingGroup", + action="store", + dest="workingGroup", + default=None, + help="set workingGroup", + ) + group_build.add_argument( + "--tmpDir", + action="store", + dest="tmpDir", + default="", + help="Temporary directory where an archive file is created", + ) + group_build.add_argument( + "--voms", + action="store", + dest="vomsRoles", + default=None, + help="generate proxy with paticular roles. e.g., atlas:/atlas/ca/Role=production,atlas:/atlas/fr/Role=pilot", + ) + group_build.add_argument("--vo", action="store", dest="vo", default=None, help="virtual orgnaiztion name") + group_submit.add_argument( + "--noEmail", + action="store_const", + const=True, + dest="noEmail", + default=False, + help="Suppress email notification", + ) + group_prun.add_argument( + "--update", + action="store_const", + const=True, + dest="update", + default=False, + help="Update panda-client to the latest version", + ) + group_output.add_argument( + "--spaceToken", + action="store", + dest="spaceToken", + default="", + help="spacetoken for outputs. e.g., ATLASLOCALGROUPDISK", + ) + group_expert.add_argument( + "--expertOnly_skipScout", + action="store_const", + const=True, + dest="skipScout", + default=False, + help=argparse.SUPPRESS, + ) + group_job.add_argument( + "--respectSplitRule", + action="store_const", + const=True, + dest="respectSplitRule", + default=False, + help="force scout jobs to follow split rules like nGBPerJob", + ) + group_job.add_argument( + "--nGBPerMergeJob", + action="store", + dest="nGBPerMergeJob", + default="MAX", + help="Instantiate one merge job per NGBPERMERGEJOB GB of pre-merged files", + ) + group_expert.add_argument( + "--devSrv", + action="store_const", + const=True, + dest="devSrv", + default=False, + help="Please don't use this option. Only for developers to use the dev panda server", + ) + group_expert.add_argument( + "--intrSrv", + action="store_const", + const=True, + dest="intrSrv", + default=False, + help="Please don't use this option. Only for developers to use the intr panda server", + ) + group_expert.add_argument( + "--persistentFile", + action="store", + dest="persistentFile", + default="", + help="Please don't use this option. Only for junction steps " "to keep persistent information in workflows", + ) + group_build.add_argument( + "--outTarBall", + action="store", + dest="outTarBall", + default="", + help="Save a gzipped tarball of local files which is the input to buildXYZ", + ) + group_build.add_argument( + "--inTarBall", + action="store", + dest="inTarBall", + default="", + help="Use a gzipped tarball of local files as input to buildXYZ. Generall the tarball is created by using --outTarBall", + ) + group_build.add_argument( + "--bexec", + action="store", + dest="bexec", + default="", + help='execution string for build stage. e.g., --bexec "make"', + ) + group_submit.add_argument( + "--disableAutoRetry", + action="store_const", + const=True, + dest="disableAutoRetry", + default=False, + help="disable automatic job retry on the server side", + ) + group_job.add_argument( + "--maxNFilesPerJob", + action="store", + dest="maxNFilesPerJob", + default=200, + type=int, + help="The maximum number of files per job is 200 by default since too many input files result in a too long command-line argument on WN which crashes the job. This option relax the limit. In many cases it is better to use this option together with --writeInputToTxt", + ) + group_input.add_argument( + "--writeInputToTxt", + action="store", + dest="writeInputToTxt", + default="", + help="Write the input file list to a file so that your application gets the list from the file instead of stdin. The argument is a comma separated list of StreamName:FileName. e.g., IN:input1.txt,IN2:input2.txt", + ) + group_build.add_argument( + "--dbRelease", + action="store", + dest="dbRelease", + default="", + help="DBRelease or CDRelease (DatasetName:FileName). e.g., ddo.000001.Atlas.Ideal.DBRelease.v050101:DBRelease-5.1.1.tar.gz. If --dbRelease=LATEST, the latest DBRelease is used. Most likely the --useAthenaPackages or --athenaTag option is required to setup Athena runtime on WN", + ) + group_build.add_argument( + "--notExpandDBR", + action="store_const", + const=True, + dest="notExpandDBR", + default=False, + help="By default, DBRelease.tar.gz is expanded on WN and gets deleted after changing environment variables accordingly. If you need tar.gz, use this option", + ) + action = group_job.add_argument( + "--mergeScript", + action="store", + dest="mergeScript", + default="", + help="Specify user-defied script execution string for output merging", + ) group_output.shareWithMe(action) - group_print.add_argument('-v', '--verbose', action='store_const',const=True,dest='verbose',default=False, - help='Verbose') - group_input.add_argument('--pfnList',action='store',dest='pfnList',default='', - help='Name of file which contains a list of input PFNs. Those files can be un-registered in DDM') - group_build.add_argument('--cmtConfig', action='store', dest='cmtConfig', default=None, - help='CMTCONFIG is extracted from local environment variables when tasks are submitted, ' - 'to set up the same environment on remote worker-nodes. ' - 'This option allows to set up another CMTCONFIG ' - 'remotely. e.g., --cmtConfig x86_64-slc5-gcc43-opt.') - group_config.add_argument('--loadXML',action='store',dest='loadXML',default=None, - help='Expert mode: load complete submission configuration from an XML file ') - group_config.add_argument('--loadJson', action='store', dest='loadJson',default=None, - help='Read command-line parameters from a json file which contains a dict of {parameter: value}') - group_config.add_argument('--dumpJson', action='store', dest='dumpJson', default=None, - help='Dump all command-line parameters and submission result such as returnCode, returnOut, jediTaskID, and bulkSeqNumber if --bulkSubmission is used, to a json file') - group_config.add_argument('--dumpTaskParams', action='store', dest='dumpTaskParams', default=None, - help='Dump task parameters to a json file') - group_config.add_argument('--parentTaskID', '--parentTaskID', action='store', dest='parentTaskID', default=None, - type=int, - help='Set taskID of the paranet task to execute the task while the parent is still running') - group_config.add_argument('--useSecrets', action='store_const', const=True, dest='useSecrets',default=False, - help='Use secrets') - group_input.add_argument('--forceStaged',action='store_const',const=True,dest='forceStaged',default=False, - help='Force files from primary DS to be staged to local disk, even if direct-access is possible') - group_input.add_argument('--forceStagedSecondary',action='store_const',const=True,dest='forceStagedSecondary',default=False, - help='Force files from secondary DSs to be staged to local disk, even if direct-access is possible') - group_input.add_argument('--avoidVP', action='store_const', const=True, dest='avoidVP', default=False, - help='Not to use sites where virtual placement is enabled') - group_expert.add_argument('--queueData', action='store', dest='queueData', default='', - help="Please don't use this option. Only for developers") - - group_submit.add_argument('--useNewCode',action='store_const',const=True,dest='useNewCode',default=False, - help='When task are resubmitted with the same outDS, the original souce code is used to re-run on failed/unprocessed files. This option uploads new source code so that jobs will run with new binaries') - group_output.add_argument('--allowTaskDuplication',action='store_const',const=True,dest='allowTaskDuplication',default=False, - help="As a general rule each task has a unique outDS and history of file usage is recorded per task. This option allows multiple tasks to contribute to the same outDS. Typically useful to submit a new task with the outDS which was used by another broken task. Use this option very carefully at your own risk, since file duplication happens when the second task runs on the same input which the first task successfully processed") - group_input.add_argument('--skipFilesUsedBy', action='store',dest='skipFilesUsedBy',default='', - help='A comma-separated list of TaskIDs. Files used by those tasks are skipped when running a new task') - group_submit.add_argument('--maxAttempt', action='store', dest='maxAttempt', default=-1, - type=int, help='Maximum number of reattempts for each job (3 by default and not larger than 50)') - group_containerJob.add_argument('--containerImage', action='store', dest='containerImage', default='', - help="Name of a container image") - group_containerJob.add_argument('--architecture', action='store', dest='architecture', default='', - help="Base OS platform, CPU, and/or GPU requirements. " - "The format is @base_platform#CPU_spec&GPU_spec " - "where base platform, CPU, or GPU spec can be omitted. " - "If base platform is not specified it is automatically taken from " - "$ALRB_USER_PLATFORM. " - "CPU_spec = architecture<-vendor<-instruction set>>, " - "GPU_spec = vendor<-model>. A wildcards can be used if there is no special " - "requirement for the attribute. E.g., #x86_64-*-avx2&nvidia to ask for x86_64 " - "CPU with avx2 support and nvidia GPU") - group_containerJob.add_argument('--ctrCvmfs', action='store_const', const=True, dest='ctrCvmfs', default=False, - help=argparse.SUPPRESS) - #help="Bind /cvmfs to the container, bool, default False") - group_containerJob.add_argument('--ctrNoX509', action='store_const', const=True, dest='ctrNoX509', default=False, - help=argparse.SUPPRESS) - #help="Unset X509 environment in the container, bool, default False") - group_containerJob.add_argument('--ctrDatadir', action='store', dest='ctrDatadir', default='', - help=argparse.SUPPRESS) - #help="Binds the job directory to datadir for I/O operations, string, default /ctrdata") - group_containerJob.add_argument('--ctrWorkdir', action='store', dest='ctrWorkdir', default='', - help=argparse.SUPPRESS) - #help="chdir to workdir in the container, string, default /ctrdata") - group_containerJob.add_argument('--ctrDebug', action='store_const', const=True, dest='ctrDebug', default=False, - help=argparse.SUPPRESS) - #help="Enable more verbose output from runcontainer, bool, default False") - group_containerJob.add_argument('--useSandbox', action='store_const', const=True, dest='useSandbox', default=False, - help=argparse.SUPPRESS) - #help='To send files in the run directory to remote sites which are not sent out by default ' \ - #'when --containerImage is used') - group_containerJob.add_argument('--useCentralRegistry', action='store_const', const=True, - dest='useCentralRegistry', default=False, - help=argparse.SUPPRESS) - #help="Use the central container registry when --containerImage is used") - group_containerJob.add_argument('--notUseCentralRegistry', action='store_const', const=True, - dest='notUseCentralRegistry', default=False, - help=argparse.SUPPRESS) - #help="Not use the central container registry when --containerImage is used") - group_containerJob.add_argument('--alrb', action='store_const', const=True, dest='alrb', default=True, - help='Use ALRB for container execution') - group_containerJob.add_argument('--wrapExecInContainer', action='store_const', const=False, - dest='directExecInContainer', default=True, - help='Execute the --exec string through runGen in the container') - group_containerJob.add_argument('--alrbArgs', action='store', dest='alrbArgs', default=None, - help='Additional arguments for ALRB to run the container. ' \ - '"setupATLAS -c --help" shows available ALRB arguments. For example, ' \ - '--alrbArgs "--nocvmfs --nohome" to skip mounting /cvmfs and $HOME. ' \ - 'This option is mainly for experts who know how the system and the container ' \ - 'communicates with each other and how additional ALRB arguments affect '\ - 'the consequence') - group_containerJob.add_argument('--oldContMode', action='store_const', const=True, dest='oldContMode', default=False, - help='Use runcontainer for container execution. Note that this option will be ' \ - 'deleted near future. Try the new ARLB scheme as soon as possible and report ' \ - 'if there is a problem') - group_submit.add_argument('--priority', action='store', dest='priority', default=None, type=int, - help='Set priority of the task (1000 by default). The value must be between 900 and 1100. ' \ - 'Note that priorities of tasks are relevant only in ' \ - "each user's share, i.e., your tasks cannot jump over other user's tasks " \ - 'even if you give higher priorities.') - group_submit.add_argument('--osMatching', action='store_const', const=True, dest='osMatching', default=False, - help='To let the brokerage choose sites which have the same OS as the local machine has.') - group_job.add_argument('--cpuTimePerEvent', action='store', dest='cpuTimePerEvent', default=-1, type=int, - help='Expected HS06 seconds per event (~= 10 * the expected duration per event in seconds)') - group_job.add_argument('--fixedCpuTime', action='store_const', const=True, dest='fixedCpuTime', default=False, - help='Use fixed cpuTime instead of estimated cpuTime') - group_job.add_argument('--maxWalltime', action='store', dest='maxWalltime', default=0, type=int, - help='Max walltime for each job in hours. Note that this option works only ' \ - 'when the nevents metadata of input files are available in rucio') - group_build.add_argument("-3", action="store_true", dest="python3", default=False, - help="Use python3") + group_print.add_argument( + "-v", + "--verbose", + action="store_const", + const=True, + dest="verbose", + default=False, + help="Verbose", + ) + group_input.add_argument( + "--pfnList", + action="store", + dest="pfnList", + default="", + help="Name of file which contains a list of input PFNs. Those files can be un-registered in DDM", + ) + group_build.add_argument( + "--cmtConfig", + action="store", + dest="cmtConfig", + default=None, + help="CMTCONFIG is extracted from local environment variables when tasks are submitted, " + "to set up the same environment on remote worker-nodes. " + "This option allows to set up another CMTCONFIG " + "remotely. e.g., --cmtConfig x86_64-slc5-gcc43-opt.", + ) + group_config.add_argument( + "--loadXML", + action="store", + dest="loadXML", + default=None, + help="Expert mode: load complete submission configuration from an XML file ", + ) + group_config.add_argument( + "--loadJson", + action="store", + dest="loadJson", + default=None, + help="Read command-line parameters from a json file which contains a dict of {parameter: value}", + ) + group_config.add_argument( + "--dumpJson", + action="store", + dest="dumpJson", + default=None, + help="Dump all command-line parameters and submission result such as returnCode, returnOut, jediTaskID, and bulkSeqNumber if --bulkSubmission is used, to a json file", + ) + group_config.add_argument( + "--dumpTaskParams", + action="store", + dest="dumpTaskParams", + default=None, + help="Dump task parameters to a json file", + ) + group_config.add_argument( + "--parentTaskID", + "--parentTaskID", + action="store", + dest="parentTaskID", + default=None, + type=int, + help="Set taskID of the paranet task to execute the task while the parent is still running", + ) + group_config.add_argument( + "--useSecrets", + action="store_const", + const=True, + dest="useSecrets", + default=False, + help="Use secrets", + ) + group_input.add_argument( + "--forceStaged", + action="store_const", + const=True, + dest="forceStaged", + default=False, + help="Force files from primary DS to be staged to local disk, even if direct-access is possible", + ) + group_input.add_argument( + "--forceStagedSecondary", + action="store_const", + const=True, + dest="forceStagedSecondary", + default=False, + help="Force files from secondary DSs to be staged to local disk, even if direct-access is possible", + ) + group_input.add_argument( + "--avoidVP", + action="store_const", + const=True, + dest="avoidVP", + default=False, + help="Not to use sites where virtual placement is enabled", + ) + group_expert.add_argument( + "--queueData", + action="store", + dest="queueData", + default="", + help="Please don't use this option. Only for developers", + ) + + group_submit.add_argument( + "--useNewCode", + action="store_const", + const=True, + dest="useNewCode", + default=False, + help="When task are resubmitted with the same outDS, the original souce code is used to re-run on failed/unprocessed files. This option uploads new source code so that jobs will run with new binaries", + ) + group_output.add_argument( + "--allowTaskDuplication", + action="store_const", + const=True, + dest="allowTaskDuplication", + default=False, + help="As a general rule each task has a unique outDS and history of file usage is recorded per task. This option allows multiple tasks to contribute to the same outDS. Typically useful to submit a new task with the outDS which was used by another broken task. Use this option very carefully at your own risk, since file duplication happens when the second task runs on the same input which the first task successfully processed", + ) + group_input.add_argument( + "--skipFilesUsedBy", + action="store", + dest="skipFilesUsedBy", + default="", + help="A comma-separated list of TaskIDs. Files used by those tasks are skipped when running a new task", + ) + group_submit.add_argument( + "--maxAttempt", + action="store", + dest="maxAttempt", + default=-1, + type=int, + help="Maximum number of reattempts for each job (3 by default and not larger than 50)", + ) + group_submit.add_argument( + "-y", + action="store_true", + dest="is_confirmed", + default=False, + help="Answer yes for all questions", + ) + group_containerJob.add_argument( + "--containerImage", + action="store", + dest="containerImage", + default="", + help="Name of a container image", + ) + group_containerJob.add_argument( + "--architecture", + action="store", + dest="architecture", + default="", + help="Base OS platform, CPU, and/or GPU requirements. " + "The format is @base_platform#CPU_spec&GPU_spec " + "where base platform, CPU, or GPU spec can be omitted. " + "If base platform is not specified it is automatically taken from " + "$ALRB_USER_PLATFORM. " + "CPU_spec = architecture<-vendor<-instruction set>>, " + "GPU_spec = vendor<-model>. A wildcards can be used if there is no special " + "requirement for the attribute. E.g., #x86_64-*-avx2&nvidia to ask for x86_64 " + "CPU with avx2 support and nvidia GPU", + ) + group_containerJob.add_argument( + "--ctrCvmfs", + action="store_const", + const=True, + dest="ctrCvmfs", + default=False, + help=argparse.SUPPRESS, + ) + # help="Bind /cvmfs to the container, bool, default False") + group_containerJob.add_argument( + "--ctrNoX509", + action="store_const", + const=True, + dest="ctrNoX509", + default=False, + help=argparse.SUPPRESS, + ) + # help="Unset X509 environment in the container, bool, default False") + group_containerJob.add_argument( + "--ctrDatadir", + action="store", + dest="ctrDatadir", + default="", + help=argparse.SUPPRESS, + ) + # help="Binds the job directory to datadir for I/O operations, string, default /ctrdata") + group_containerJob.add_argument( + "--ctrWorkdir", + action="store", + dest="ctrWorkdir", + default="", + help=argparse.SUPPRESS, + ) + # help="chdir to workdir in the container, string, default /ctrdata") + group_containerJob.add_argument( + "--ctrDebug", + action="store_const", + const=True, + dest="ctrDebug", + default=False, + help=argparse.SUPPRESS, + ) + # help="Enable more verbose output from runcontainer, bool, default False") + group_containerJob.add_argument( + "--useSandbox", + action="store_const", + const=True, + dest="useSandbox", + default=False, + help=argparse.SUPPRESS, + ) + # help='To send files in the run directory to remote sites which are not sent out by default ' \ + #'when --containerImage is used') + group_containerJob.add_argument( + "--useCentralRegistry", + action="store_const", + const=True, + dest="useCentralRegistry", + default=False, + help=argparse.SUPPRESS, + ) + # help="Use the central container registry when --containerImage is used") + group_containerJob.add_argument( + "--notUseCentralRegistry", + action="store_const", + const=True, + dest="notUseCentralRegistry", + default=False, + help=argparse.SUPPRESS, + ) + # help="Not use the central container registry when --containerImage is used") + group_containerJob.add_argument( + "--alrb", + action="store_const", + const=True, + dest="alrb", + default=True, + help="Use ALRB for container execution", + ) + group_containerJob.add_argument( + "--wrapExecInContainer", + action="store_const", + const=False, + dest="directExecInContainer", + default=True, + help="Execute the --exec string through runGen in the container", + ) + group_containerJob.add_argument( + "--alrbArgs", + action="store", + dest="alrbArgs", + default=None, + help="Additional arguments for ALRB to run the container. " + '"setupATLAS -c --help" shows available ALRB arguments. For example, ' + '--alrbArgs "--nocvmfs --nohome" to skip mounting /cvmfs and $HOME. ' + "This option is mainly for experts who know how the system and the container " + "communicates with each other and how additional ALRB arguments affect " + "the consequence", + ) + group_containerJob.add_argument( + "--oldContMode", + action="store_const", + const=True, + dest="oldContMode", + default=False, + help="Use runcontainer for container execution. Note that this option will be " + "deleted near future. Try the new ARLB scheme as soon as possible and report " + "if there is a problem", + ) + group_submit.add_argument( + "--priority", + action="store", + dest="priority", + default=None, + type=int, + help="Set priority of the task (1000 by default). The value must be between 900 and 1100. " + "Note that priorities of tasks are relevant only in " + "each user's share, i.e., your tasks cannot jump over other user's tasks " + "even if you give higher priorities.", + ) + group_submit.add_argument( + "--osMatching", + action="store_const", + const=True, + dest="osMatching", + default=False, + help="To let the brokerage choose sites which have the same OS as the local machine has.", + ) + group_job.add_argument( + "--cpuTimePerEvent", + action="store", + dest="cpuTimePerEvent", + default=-1, + type=int, + help="Expected HS06 seconds per event (~= 10 * the expected duration per event in seconds)", + ) + group_job.add_argument( + "--fixedCpuTime", + action="store_const", + const=True, + dest="fixedCpuTime", + default=False, + help="Use fixed cpuTime instead of estimated cpuTime", + ) + group_job.add_argument( + "--maxWalltime", + action="store", + dest="maxWalltime", + default=0, + type=int, + help="Max walltime for each job in hours. Note that this option works only " "when the nevents metadata of input files are available in rucio", + ) + group_build.add_argument("-3", action="store_true", dest="python3", default=False, help="Use python3") from pandaclient import MiscUtils # parse options - # check against the removed options first for arg in sys.argv[1:]: - optName = arg.split('=',1)[0] - if optName in removedOpts: - print("!!Warning!! option %s has been deprecated, pls dont use anymore\n" % optName) - sys.argv.remove(arg) + optName = arg.split("=", 1)[0] + if optName in removedOpts: + print("!!Warning!! option %s has been deprecated, pls dont use anymore\n" % optName) + sys.argv.remove(arg) # options, args = optP.parse_known_args() options = optP.parse_args(ext_args) if options.verbose: print(options) - print('') + print("") # load json - jsonExecStr = '' + jsonExecStr = "" if options.loadJson is not None: loadOpts = MiscUtils.decodeJSON(options.loadJson) for k in loadOpts: @@ -468,15 +1208,15 @@ def main(get_taskparams=False, ext_args=None, dry_mode=False): except Exception: pass origK = k - if k == 'exec': - k = 'jobParams' + if k == "exec": + k = "jobParams" if not hasattr(options, k): - print("ERROR: unknown parameter {0} in {1}".format(k, options.loadJson) ) + print("ERROR: unknown parameter {0} in {1}".format(k, options.loadJson)) sys.exit(0) else: - setattr(options,k, v) + setattr(options, k, v) if v is True: - jsonExecStr += ' --{0}'.format(origK) + jsonExecStr += " --{0}".format(origK) else: if isinstance(v, (str, unicode)): jsonExecStr += " --{0}='{1}'".format(origK, v) @@ -485,18 +1225,16 @@ def main(get_taskparams=False, ext_args=None, dry_mode=False): if options.verbose: print("options after loading json") print(options) - print('') + print("") # display version from pandaclient import PandaToolsPkgInfo + if options.version: print("Version: %s" % PandaToolsPkgInfo.release_version) sys.exit(0) - from pandaclient import Client - from pandaclient import PsubUtils - from pandaclient import AthenaUtils - from pandaclient import PLogger + from pandaclient import AthenaUtils, Client, PLogger, PsubUtils # update panda-client if options.update: @@ -510,8 +1248,8 @@ def main(get_taskparams=False, ext_args=None, dry_mode=False): fullExecString += jsonExecStr # set dummy CMTSITE - if 'CMTSITE' not in os.environ: - os.environ['CMTSITE'] = '' + if "CMTSITE" not in os.environ: + os.environ["CMTSITE"] = "" # get logger tmpLog = PLogger.getPandaLogger() @@ -541,13 +1279,15 @@ def main(get_taskparams=False, ext_args=None, dry_mode=False): options.directExecInContainer = True # container stuff - if options.containerImage != '': + if options.containerImage != "": options.noBuild = True if options.alrb: options.useSandbox = True if not options.useSandbox: - tmpLog.warning("Files in the run directory are not sent out by default when --containerImage is used. " - "Please use --useSandbox if you need those files on the grid.") + tmpLog.warning( + "Files in the run directory are not sent out by default when --containerImage is used. " + "Please use --useSandbox if you need those files on the grid." + ) # files to be deleted delFilesOnExit = [] @@ -556,24 +1296,25 @@ def main(get_taskparams=False, ext_args=None, dry_mode=False): xconfig = None if options.loadXML is not None: from pandaclient import ParseJobXML + xconfig = ParseJobXML.dom_parser(options.loadXML) - tmpLog.info('dump XML config') + tmpLog.info("dump XML config") xconfig.dump(options.verbose) - if options.outDS=='': - options.outDS=xconfig.outDS() - options.outputs='all' - options.jobParams='${XML_EXESTR}' - options.inDS=xconfig.inDS() + if options.outDS == "": + options.outDS = xconfig.outDS() + options.outputs = "all" + options.jobParams = "${XML_EXESTR}" + options.inDS = xconfig.inDS() # check XML try: xconfig.files_in_DS(options.inDS) except Exception: - errtype,errvalue = sys.exc_info()[:2] + errtype, errvalue = sys.exc_info()[:2] print(errvalue) - tmpLog.error('verification of XML failed') + tmpLog.error("verification of XML failed") sys.exit(EC_Config) # inDS match and secondaryDS filter will be determined later from xconfig - options.match='' + options.match = "" options.secondaryDSs = xconfig.secondaryDSs_config(filter=False) # read XML xmlFH = open(options.loadXML) @@ -584,36 +1325,40 @@ def main(get_taskparams=False, ext_args=None, dry_mode=False): curDir = os.path.realpath(os.getcwd()) # remove whitespaces - if options.outputs != '': - options.outputs = re.sub(' ','',options.outputs) + if options.outputs != "": + options.outputs = re.sub(" ", "", options.outputs) # warning for PQ PsubUtils.get_warning_for_pq(options.site, options.excludedSite, tmpLog) + # warning for memory + is_confirmed = PsubUtils.get_warning_for_memory(options.memory, options.is_confirmed, tmpLog) + if not is_confirmed: + sys.exit(0) + # exclude sites if options.excludedSite != []: options.excludedSite = PsubUtils.splitCommaConcatenatedItems(options.excludedSite) # use certain sites includedSite = None - if re.search(',',options.site) is not None: + if re.search(",", options.site) is not None: includedSite = PsubUtils.splitCommaConcatenatedItems([options.site]) - options.site = 'AUTO' + options.site = "AUTO" # set maxNFilesPerJob PsubUtils.limit_maxNumInputs = options.maxNFilesPerJob # site specified siteSpecified = True - if options.site == 'AUTO': + if options.site == "AUTO": siteSpecified = False - # list of output files which can be skipped - options.allowNoOutput = options.allowNoOutput.split(',') + options.allowNoOutput = options.allowNoOutput.split(",") # read datasets from file - if options.inDsTxt != '': + if options.inDsTxt != "": options.inDS = PsubUtils.readDsFromFile(options.inDsTxt) # not expand inDS when setting parent @@ -622,146 +1367,155 @@ def main(get_taskparams=False, ext_args=None, dry_mode=False): # bulk submission ioList = [] - if options.inOutDsJson != '': + if options.inOutDsJson != "": options.bulkSubmission = True if options.bulkSubmission: - if options.inOutDsJson == '': + if options.inOutDsJson == "": tmpLog.error("--inOutDsJson is missing") sys.exit(EC_Config) - if options.eventPickEvtList != '': + if options.eventPickEvtList != "": tmpLog.error("cannnot use --eventPickEvtList and --inOutDsJson at the same time") sys.exit(EC_Config) ioList = MiscUtils.decodeJSON(options.inOutDsJson) for ioItem in ioList: - if not ioItem['outDS'].endswith('/'): - ioItem['outDS'] += '/' - options.inDS = ioList[0]['inDS'] - options.outDS = ioList[0]['outDS'] + if not ioItem["outDS"].endswith("/"): + ioItem["outDS"] += "/" + options.inDS = ioList[0]["inDS"] + options.outDS = ioList[0]["outDS"] else: - ioList = [{'inDS': options.inDS, 'outDS': options.outDS}] + ioList = [{"inDS": options.inDS, "outDS": options.outDS}] # enforce to use output dataset container - if not options.outDS.endswith('/'): - options.outDS = options.outDS + '/' + if not options.outDS.endswith("/"): + options.outDS = options.outDS + "/" # absolute path for PFN list - if options.pfnList != '': + if options.pfnList != "": options.pfnList = os.path.realpath(options.pfnList) # extract DBR from exec - tmpMatch = re.search('%DB:([^ \'\";]+)',options.jobParams) + tmpMatch = re.search("%DB:([^ '\";]+)", options.jobParams) if tmpMatch is not None: options.dbRelease = tmpMatch.group(1) options.notExpandDBR = True # check DBRelease - if options.dbRelease != '' and (options.dbRelease.find(':') == -1 and options.dbRelease !='LATEST'): + if options.dbRelease != "" and (options.dbRelease.find(":") == -1 and options.dbRelease != "LATEST"): tmpLog.error("invalid argument for --dbRelease. Must be DatasetName:FileName or LATEST") sys.exit(EC_Config) # Good Run List - if options.goodRunListXML != '' and options.inDS != '': + if options.goodRunListXML != "" and options.inDS != "": tmpLog.error("cannnot use --goodRunListXML and --inDS at the same time") sys.exit(EC_Config) # event picking - if options.eventPickEvtList != '' and options.inDS != '': + if options.eventPickEvtList != "" and options.inDS != "": tmpLog.error("cannnot use --eventPickEvtList and --inDS at the same time") sys.exit(EC_Config) # param check for event picking - if options.eventPickEvtList != '': - if options.eventPickDataType == '': + if options.eventPickEvtList != "": + if options.eventPickDataType == "": tmpLog.error("--eventPickDataType must be specified") sys.exit(EC_Config) # check rootVer - if options.rootVer != '': + if options.rootVer != "": if options.useAthenaPackages or options.athenaTag: - tmpLog.warning("--rootVer is ignored when --athenaTag or --useAthenaPackages is used, " - "not to break the runtime environment by superseding the root version") - options.rootVer = '' + tmpLog.warning( + "--rootVer is ignored when --athenaTag or --useAthenaPackages is used, " "not to break the runtime environment by superseding the root version" + ) + options.rootVer = "" else: # change / to . - options.rootVer = re.sub('/','.',options.rootVer) + options.rootVer = re.sub("/", ".", options.rootVer) # check writeInputToTxt - if options.writeInputToTxt != '': + if options.writeInputToTxt != "": # remove % - options.writeInputToTxt = options.writeInputToTxt.replace('%','') + options.writeInputToTxt = options.writeInputToTxt.replace("%", "") # loop over all StreamName:FileName - for tmpItem in options.writeInputToTxt.split(','): - tmpItems = tmpItem.split(':') + for tmpItem in options.writeInputToTxt.split(","): + tmpItems = tmpItem.split(":") if len(tmpItems) != 2: tmpLog.error("invalid StreamName:FileName in --writeInputToTxt : %s" % tmpItem) sys.exit(EC_Config) # read list of files to be used filesToBeUsed = [] - if options.inputFileListName != '': + if options.inputFileListName != "": rFile = open(options.inputFileListName) for line in rFile: - line = re.sub('\n','',line) + line = re.sub("\n", "", line) line = line.strip() - if line != '': + if line != "": filesToBeUsed.append(line) rFile.close() # remove whitespaces - if options.inDS != '': - options.inDS = options.inDS.replace(' ', '') + if options.inDS != "": + options.inDS = options.inDS.replace(" ", "") # persistent file if options.persistentFile: - options.persistentFile = '{0}:sources.{1}.__ow__'.format(options.persistentFile, MiscUtils.wrappedUuidGen()) + options.persistentFile = "{0}:sources.{1}.__ow__".format(options.persistentFile, MiscUtils.wrappedUuidGen()) # warning if options.nFilesPerJob is not None and options.nFilesPerJob > 0 and options.nFilesPerJob < 5: - tmpLog.warning("Very small --nFilesPerJob tends to generate so many short jobs which could send your task to exhausted state " - "after scouts are done, since short jobs are problematic for the grid. Please consider not to use the option.") + tmpLog.warning( + "Very small --nFilesPerJob tends to generate so many short jobs which could send your task to exhausted state " + "after scouts are done, since short jobs are problematic for the grid. Please consider not to use the option." + ) if options.maxNFilesPerJob < 5: - tmpLog.warning("Very small --maxNFilesPerJob tends to generate so many short jobs which could send your task to exhausted state " - "after scouts are done, since short jobs are problematic for the grid. Please consider not to use the option.") + tmpLog.warning( + "Very small --maxNFilesPerJob tends to generate so many short jobs which could send your task to exhausted state " + "after scouts are done, since short jobs are problematic for the grid. Please consider not to use the option." + ) # check grid-proxy if not dry_mode: PsubUtils.check_proxy(options.verbose, options.vomsRoles) # convert in/outTarBall to full path - if options.inTarBall != '': + if options.inTarBall != "": options.inTarBall = os.path.abspath(os.path.expanduser(options.inTarBall)) - if options.outTarBall != '': + if options.outTarBall != "": options.outTarBall = os.path.abspath(os.path.expanduser(options.outTarBall)) # check working dir options.workDir = os.path.realpath(options.workDir) - if options.workDir != curDir and (not curDir.startswith(options.workDir+'/')): + if options.workDir != curDir and (not curDir.startswith(options.workDir + "/")): tmpLog.error("you need to run prun in a directory under %s" % options.workDir) sys.exit(EC_Config) # avoid gathering the home dir - if 'HOME' in os.environ and not options.useHomeDir and not options.useAthenaPackages \ - and os.path.realpath(os.path.expanduser(os.environ['HOME'])) == options.workDir \ - and not dry_mode: - tmpStr = 'prun is executed just under the HOME directoy ' - tmpStr += 'and is going to send all files under the dir including ~/Mail/* and ~/private/*. ' - tmpStr += 'Do you really want that? (Please use --useHomeDir if you want to skip this confirmation)' + if ( + "HOME" in os.environ + and not options.useHomeDir + and not options.useAthenaPackages + and os.path.realpath(os.path.expanduser(os.environ["HOME"])) == options.workDir + and not dry_mode + ): + tmpStr = "prun is executed just under the HOME directoy " + tmpStr += "and is going to send all files under the dir including ~/Mail/* and ~/private/*. " + tmpStr += "Do you really want that? (Please use --useHomeDir if you want to skip this confirmation)" tmpLog.warning(tmpStr) while True: - tmpAnswer = input('y/N: ') + tmpAnswer = input("y/N: ") tmpAnswer = tmpAnswer.strip() - if tmpAnswer in ['y','N']: + if tmpAnswer in ["y", "N"]: break - if tmpAnswer == 'N': + if tmpAnswer == "N": sys.exit(EC_Config) # run dir - runDir = '.' + runDir = "." if curDir != options.workDir: # remove special characters - wDirString=re.sub('[\+]','.',options.workDir) - runDir = re.sub('^'+wDirString+'/','',curDir) + wDirString = re.sub("[\+]", ".", options.workDir) + runDir = re.sub("^" + wDirString + "/", "", curDir) # check maxCpuCount if options.maxCpuCount > Client.maxCpuCountLimit: @@ -769,100 +1523,101 @@ def main(get_taskparams=False, ext_args=None, dry_mode=False): sys.exit(EC_Config) # create tmp dir - if options.tmpDir == '': - tmpDir = '%s/%s' % (curDir,MiscUtils.wrappedUuidGen()) + if options.tmpDir == "": + tmpDir = "%s/%s" % (curDir, MiscUtils.wrappedUuidGen()) else: - tmpDir = '%s/%s' % (os.path.abspath(options.tmpDir),MiscUtils.wrappedUuidGen()) + tmpDir = "%s/%s" % (os.path.abspath(options.tmpDir), MiscUtils.wrappedUuidGen()) os.makedirs(tmpDir) # exit action def _onExit(dir, files, del_command): for tmpFile in files: - del_command('rm -rf %s' % tmpFile) - del_command('rm -rf %s' % dir) - + del_command("rm -rf %s" % tmpFile) + del_command("rm -rf %s" % dir) atexit.register(_onExit, tmpDir, delFilesOnExit, commands_get_output) # parse tag - athenaVer = '' - cacheVer = '' - nightVer = '' - groupArea = '' - cmtConfig = '' + athenaVer = "" + cacheVer = "" + nightVer = "" + groupArea = "" + cmtConfig = "" if options.useAthenaPackages: # get Athena versions - stA,retA = AthenaUtils.getAthenaVer() + stA, retA = AthenaUtils.getAthenaVer() # failed if not stA: tmpLog.error("You need to setup Athena runtime to use --useAthenaPackages") sys.exit(EC_Config) - workArea = retA['workArea'] - athenaVer = 'Atlas-%s' % retA['athenaVer'] - groupArea = retA['groupArea'] - cacheVer = retA['cacheVer'] - nightVer = retA['nightVer'] - cmtConfig = retA['cmtConfig'] + workArea = retA["workArea"] + athenaVer = "Atlas-%s" % retA["athenaVer"] + groupArea = retA["groupArea"] + cacheVer = retA["cacheVer"] + nightVer = retA["nightVer"] + cmtConfig = retA["cmtConfig"] # override run directory - sString=re.sub('[\+]','.',workArea) - runDir = re.sub('^%s' % sString, '', curDir) + sString = re.sub("[\+]", ".", workArea) + runDir = re.sub("^%s" % sString, "", curDir) if runDir == curDir: - errMsg = "You need to run prun in a directory under %s. " % workArea + errMsg = "You need to run prun in a directory under %s. " % workArea errMsg += "If '%s' is a read-only directory, perhaps you did setup Athena without --testarea or the 'here' tag of asetup." % workArea tmpLog.error(errMsg) sys.exit(EC_Config) - elif runDir == '': - runDir = '.' - elif runDir.startswith('/'): + elif runDir == "": + runDir = "." + elif runDir.startswith("/"): runDir = runDir[1:] - runDir = runDir+'/' - elif options.athenaTag != '': + runDir = runDir + "/" + elif options.athenaTag != "": athenaVer, cacheVer, nightVer = AthenaUtils.parse_athena_tag(options.athenaTag, options.verbose, tmpLog) # set CMTCONFIG options.cmtConfig = AthenaUtils.getCmtConfig(athenaVer, cacheVer, nightVer, options.cmtConfig, options.verbose) # check CMTCONFIG - if not AthenaUtils.checkCmtConfig(cmtConfig,options.cmtConfig,options.noBuild): + if not AthenaUtils.checkCmtConfig(cmtConfig, options.cmtConfig, options.noBuild): sys.exit(EC_Config) # event picking - if options.eventPickEvtList != '': - epLockedBy = 'prun' + if options.eventPickEvtList != "": + epLockedBy = "prun" if not options.noSubmit: # request event picking - epStat,epOutput = Client.requestEventPicking(options.eventPickEvtList, - options.eventPickDataType, - options.eventPickStreamName, - options.eventPickDS, - options.eventPickAmiTag, - [], - options.inputFileListName, - options.outDS, - epLockedBy, - fullExecString, - 1, - options.eventPickWithGUID, - options.ei_api, - options.verbose) + epStat, epOutput = Client.requestEventPicking( + options.eventPickEvtList, + options.eventPickDataType, + options.eventPickStreamName, + options.eventPickDS, + options.eventPickAmiTag, + [], + options.inputFileListName, + options.outDS, + epLockedBy, + fullExecString, + 1, + options.eventPickWithGUID, + options.ei_api, + options.verbose, + ) # set input dataset options.inDS = epOutput else: - options.inDS = 'dummy' - tmpLog.info('requested Event Picking service to stage input as %s' % options.inDS) + options.inDS = "dummy" + tmpLog.info("requested Event Picking service to stage input as %s" % options.inDS) # additional files - if options.extFile == '': + if options.extFile == "": options.extFile = [] else: - tmpItems = options.extFile.split(',') + tmpItems = options.extFile.split(",") options.extFile = [] # convert * to .* for tmpItem in tmpItems: - options.extFile.append(tmpItem.replace('*','.*')) + options.extFile.append(tmpItem.replace("*", ".*")) # user-specified merging script - if options.mergeScript != '': + if options.mergeScript != "": # enable merging options.mergeOutput = True # add it to extFile @@ -870,9 +1625,9 @@ def _onExit(dir, files, del_command): options.extFile.append(options.mergeScript) # glue packages - options.gluePackages = options.gluePackages.split(',') + options.gluePackages = options.gluePackages.split(",") try: - options.gluePackages.remove('') + options.gluePackages.remove("") except Exception: pass @@ -880,44 +1635,44 @@ def _onExit(dir, files, del_command): AthenaUtils.setExcludeFile(options.excludeFile) # LFN matching - if options.match != '': + if options.match != "": # convert . to \. - options.match = options.match.replace('.','\.') + options.match = options.match.replace(".", "\.") # convert * to .* - options.match = options.match.replace('*','.*') + options.match = options.match.replace("*", ".*") # LFN anti-matching - if options.antiMatch != '': + if options.antiMatch != "": # convert . to \. - options.antiMatch = options.antiMatch.replace('.','\.') + options.antiMatch = options.antiMatch.replace(".", "\.") # convert * to .* - options.antiMatch = options.antiMatch.replace('*','.*') + options.antiMatch = options.antiMatch.replace("*", ".*") # get job script - jobScript = '' - if options.jobParams == '': + jobScript = "" + if options.jobParams == "": tmpLog.error("you need to give --exec\n prun [--inDS inputdataset] --outDS outputdataset --exec 'myScript arg1 arg2 ...'") sys.exit(EC_Config) - orig_execStr = options.jobParams + orig_execStr = options.jobParams orig_bexecStr = options.bexec # replace : to = for backward compatibility - for optArg in ['RNDM']: - options.jobParams = re.sub('%'+optArg+':','%'+optArg+'=',options.jobParams) + for optArg in ["RNDM"]: + options.jobParams = re.sub("%" + optArg + ":", "%" + optArg + "=", options.jobParams) # check output dataset - if options.outDS == '': + if options.outDS == "": tmpLog.error("no outDS is given\n prun [--inDS inputdataset] --outDS outputdataset --exec 'myScript arg1 arg2 ...'") sys.exit(EC_Config) # avoid inDS+pfnList - if options.pfnList != '': + if options.pfnList != "": # don't use inDS - if options.inDS != '': + if options.inDS != "": tmpLog.error("--pfnList and --inDS cannot be used at the same time") sys.exit(EC_Config) # use site - if options.site == 'AUTO': + if options.site == "AUTO": tmpLog.error("--site must be specified when --pfnList is used") sys.exit(EC_Config) @@ -930,28 +1685,28 @@ def _onExit(dir, files, del_command): options.secondaryDSs = tmpOut # reusable secondary streams - if options.reusableSecondary == '': + if options.reusableSecondary == "": options.reusableSecondary = [] else: - options.reusableSecondary = options.reusableSecondary.split(',') + options.reusableSecondary = options.reusableSecondary.split(",") # get nickname if not dry_mode: nickName = PsubUtils.getNickname() else: - nickName = 'dummy' + nickName = "dummy" - if nickName == '': + if nickName == "": sys.exit(EC_Config) # set Rucio accounting - PsubUtils.setRucioAccount(nickName,'prun',True) + PsubUtils.setRucioAccount(nickName, "prun", True) # check nGBPerJob - if not options.nGBPerJob in [-1,'MAX']: + if not options.nGBPerJob in [-1, "MAX"]: # convert to int try: - if options.nGBPerJob != 'MAX': + if options.nGBPerJob != "MAX": options.nGBPerJob = int(options.nGBPerJob) except Exception: tmpLog.error("--nGBPerJob must be an integer or MAX") @@ -966,8 +1721,7 @@ def _onExit(dir, files, del_command): sys.exit(EC_Config) # split options are mutually exclusive - if options.nFilesPerJob is not None and options.nFilesPerJob > 0 \ - and options.nEventsPerJob > 0 and options.nGBPerJob != -1: + if options.nFilesPerJob is not None and options.nFilesPerJob > 0 and options.nEventsPerJob > 0 and options.nGBPerJob != -1: tmpLog.error("split by files, split by events and split by file size can not be used simultaneously") sys.exit(EC_Config) @@ -976,131 +1730,147 @@ def _onExit(dir, files, del_command): tmpLog.error("split by events and split by file size can not be used simultaneously") sys.exit(EC_Config) - - ##################################################################### # archive sources and send it to HTTP-reachable location # create archive archiveName = None - if (options.containerImage == '' or options.useSandbox) and not dry_mode: - if options.inTarBall == '': + if (options.containerImage == "" or options.useSandbox) and not dry_mode: + if options.inTarBall == "": # copy RootCore packages if options.useRootCore: # check $ROOTCOREDIR - if 'ROOTCOREDIR' not in os.environ: - tmpErrMsg = '$ROOTCOREDIR is not defined in your environment. ' - tmpErrMsg += 'Please setup RootCore runtime beforehand' + if "ROOTCOREDIR" not in os.environ: + tmpErrMsg = "$ROOTCOREDIR is not defined in your environment. " + tmpErrMsg += "Please setup RootCore runtime beforehand" tmpLog.error(tmpErrMsg) sys.exit(EC_Config) # check grid_submit.sh - rootCoreSubmitSh = os.environ['ROOTCOREDIR'] + '/scripts/grid_submit.sh' - rootCoreCompileSh = os.environ['ROOTCOREDIR'] + '/scripts/grid_compile.sh' - rootCoreRunSh = os.environ['ROOTCOREDIR'] + '/scripts/grid_run.sh' - rootCoreSubmitNbSh = os.environ['ROOTCOREDIR'] + '/scripts/grid_submit_nobuild.sh' - rootCoreCompileNbSh = os.environ['ROOTCOREDIR'] + '/scripts/grid_compile_nobuild.sh' - rootCoreShList = [rootCoreSubmitSh,rootCoreCompileSh,rootCoreRunSh] + rootCoreSubmitSh = os.environ["ROOTCOREDIR"] + "/scripts/grid_submit.sh" + rootCoreCompileSh = os.environ["ROOTCOREDIR"] + "/scripts/grid_compile.sh" + rootCoreRunSh = os.environ["ROOTCOREDIR"] + "/scripts/grid_run.sh" + rootCoreSubmitNbSh = os.environ["ROOTCOREDIR"] + "/scripts/grid_submit_nobuild.sh" + rootCoreCompileNbSh = os.environ["ROOTCOREDIR"] + "/scripts/grid_compile_nobuild.sh" + rootCoreShList = [rootCoreSubmitSh, rootCoreCompileSh, rootCoreRunSh] if options.noBuild: rootCoreShList.append(rootCoreSubmitNbSh) if options.noCompile: rootCoreShList.append(rootCoreCompileNbSh) for tmpShFile in rootCoreShList: if not os.path.exists(tmpShFile): - tmpErrMsg = "%s doesn't exist. Please use a newer version of RootCore" % tmpShFile + tmpErrMsg = "%s doesn't exist. Please use a newer version of RootCore" % tmpShFile tmpLog.error(tmpErrMsg) sys.exit(EC_Config) tmpLog.info("copy RootCore packages to current dir") # destination - pandaRootCoreWorkDirName = '__panda_rootCoreWorkDir' - rootCoreDestWorkDir = curDir + '/' + pandaRootCoreWorkDirName + pandaRootCoreWorkDirName = "__panda_rootCoreWorkDir" + rootCoreDestWorkDir = curDir + "/" + pandaRootCoreWorkDirName # add all files to extFile - options.extFile.append(pandaRootCoreWorkDirName + '/.*') + options.extFile.append(pandaRootCoreWorkDirName + "/.*") # add to be deleted on exit delFilesOnExit.append(rootCoreDestWorkDir) if not options.noBuild: - tmpStat = os.system('%s %s' % (rootCoreSubmitSh,rootCoreDestWorkDir)) + tmpStat = os.system("%s %s" % (rootCoreSubmitSh, rootCoreDestWorkDir)) else: - tmpStat = os.system('%s %s' % (rootCoreSubmitNbSh,rootCoreDestWorkDir)) + tmpStat = os.system("%s %s" % (rootCoreSubmitNbSh, rootCoreDestWorkDir)) tmpStat %= 255 if tmpStat != 0: - tmpErrMsg = "%s failed with %s" % (rootCoreSubmitSh,tmpStat) + tmpErrMsg = "%s failed with %s" % (rootCoreSubmitSh, tmpStat) tmpLog.error(tmpErrMsg) sys.exit(EC_Config) # copy build and run scripts - shutil.copy(rootCoreRunSh,rootCoreDestWorkDir) - shutil.copy(rootCoreCompileSh,rootCoreDestWorkDir) + shutil.copy(rootCoreRunSh, rootCoreDestWorkDir) + shutil.copy(rootCoreCompileSh, rootCoreDestWorkDir) if options.noCompile: - shutil.copy(rootCoreCompileNbSh,rootCoreDestWorkDir) + shutil.copy(rootCoreCompileNbSh, rootCoreDestWorkDir) # gather Athena packages archiveName = "" if options.useAthenaPackages: if AthenaUtils.useCMake(): # archive with cpack - archiveName,archiveFullName = AthenaUtils.archiveWithCpack(True,tmpDir,options.verbose) + archiveName, archiveFullName = AthenaUtils.archiveWithCpack(True, tmpDir, options.verbose) # set extFile AthenaUtils.setExtFile(options.extFile) if not options.noBuild: # archive sources - archiveName,archiveFullName = AthenaUtils.archiveSourceFiles(workArea,runDir,curDir,tmpDir, - options.verbose,options.gluePackages, - dereferenceSymLinks=options.followLinks, - archiveName=archiveName) + archiveName, archiveFullName = AthenaUtils.archiveSourceFiles( + workArea, + runDir, + curDir, + tmpDir, + options.verbose, + options.gluePackages, + dereferenceSymLinks=options.followLinks, + archiveName=archiveName, + ) else: # archive jobO - archiveName,archiveFullName = AthenaUtils.archiveJobOFiles(workArea,runDir,curDir, - tmpDir,options.verbose, - archiveName=archiveName) + archiveName, archiveFullName = AthenaUtils.archiveJobOFiles( + workArea, + runDir, + curDir, + tmpDir, + options.verbose, + archiveName=archiveName, + ) # archive InstallArea - AthenaUtils.archiveInstallArea(workArea,groupArea,archiveName,archiveFullName, - tmpDir,options.noBuild,options.verbose) + AthenaUtils.archiveInstallArea( + workArea, + groupArea, + archiveName, + archiveFullName, + tmpDir, + options.noBuild, + options.verbose, + ) # gather normal files if True: if options.useAthenaPackages: # go to workArea os.chdir(workArea) # gather files under work dir - tmpLog.info("gathering files under %s/%s" % (workArea,runDir)) + tmpLog.info("gathering files under %s/%s" % (workArea, runDir)) archStartDir = runDir - archStartDir = re.sub('/+$','',archStartDir) + archStartDir = re.sub("/+$", "", archStartDir) else: # go to work dir os.chdir(options.workDir) # gather files under work dir tmpLog.info("gathering files under %s" % options.workDir) - archStartDir = '.' + archStartDir = "." # get files in the working dir if options.noCompile: skippedExt = [] else: - skippedExt = ['.o','.a','.so'] - skippedFlag = False + skippedExt = [".o", ".a", ".so"] + skippedFlag = False workDirFiles = [] if options.followLinks: - osWalkList = os.walk(archStartDir,followlinks=True) + osWalkList = os.walk(archStartDir, followlinks=True) else: osWalkList = os.walk(archStartDir) - for tmpRoot,tmpDirs,tmpFiles in osWalkList: - emptyFlag = True + for tmpRoot, tmpDirs, tmpFiles in osWalkList: + emptyFlag = True for tmpFile in tmpFiles: if options.useAthenaPackages: if os.path.basename(tmpFile) == os.path.basename(archiveFullName): if options.verbose: - print('skip Athena archive %s' % tmpFile) + print("skip Athena archive %s" % tmpFile) continue - tmpPath = '%s/%s' % (tmpRoot,tmpFile) + tmpPath = "%s/%s" % (tmpRoot, tmpFile) # get size try: size = os.path.getsize(tmpPath) except Exception: # skip dead symlink if options.verbose: - type,value,traceBack = sys.exc_info() - print(" Ignore : %s:%s" % (type,value)) + type, value, traceBack = sys.exc_info() + print(" Ignore : %s:%s" % (type, value)) continue # check exclude files excludeFileFlag = False for tmpPatt in AthenaUtils.excludeFile: - if re.search(tmpPatt,tmpPath) is not None: + if re.search(tmpPatt, tmpPath) is not None: excludeFileFlag = True break if excludeFileFlag: @@ -1113,12 +1883,12 @@ def _onExit(dir, files, del_command): break # check root isRoot = False - if re.search('\.root(\.\d+)*$',tmpPath) is not None: + if re.search("\.root(\.\d+)*$", tmpPath) is not None: isRoot = True # extra files isExtra = False for tmpExt in options.extFile: - if re.search(tmpExt+'$',tmpPath) is not None: + if re.search(tmpExt + "$", tmpPath) is not None: isExtra = True break # regular files @@ -1127,7 +1897,7 @@ def _onExit(dir, files, del_command): emptyFlag = False # skipped extensions if isSkippedExt: - print(" skip %s %s" % (str(skippedExt),tmpPath)) + print(" skip %s %s" % (str(skippedExt), tmpPath)) skippedFlag = True continue # skip root @@ -1137,28 +1907,28 @@ def _onExit(dir, files, del_command): continue # check size if size > options.maxFileSize: - print(" skip large file %s:%sB>%sB" % (tmpPath,size,options.maxFileSize)) + print(" skip large file %s:%sB>%sB" % (tmpPath, size, options.maxFileSize)) skippedFlag = True continue # remove ./ - tmpPath = re.sub('^\./','',tmpPath) + tmpPath = re.sub("^\./", "", tmpPath) # append workDirFiles.append(tmpPath) if emptyFlag: emptyFlag = False # add empty directory - if emptyFlag and tmpDirs==[] and tmpFiles==[]: - tmpPath = re.sub('^\./','',tmpRoot) + if emptyFlag and tmpDirs == [] and tmpFiles == []: + tmpPath = re.sub("^\./", "", tmpRoot) # check exclude pattern excludePatFlag = False for tmpPatt in AthenaUtils.excludeFile: - if re.search(tmpPatt,tmpPath) is not None: + if re.search(tmpPatt, tmpPath) is not None: excludePatFlag = True break if excludePatFlag: continue # skip tmpDir - if tmpPath.split('/')[-1] == tmpDir.split('/')[-1]: + if tmpPath.split("/")[-1] == tmpDir.split("/")[-1]: continue # append workDirFiles.append(tmpPath) @@ -1169,41 +1939,41 @@ def _onExit(dir, files, del_command): # create archive if options.noBuild and not options.noCompile: # use 'jobO' for noBuild - archiveName = 'jobO.%s.tar' % MiscUtils.wrappedUuidGen() + archiveName = "jobO.%s.tar" % MiscUtils.wrappedUuidGen() else: # use 'sources' for normal build - archiveName = 'sources.%s.tar' % MiscUtils.wrappedUuidGen() - archiveFullName = "%s/%s" % (tmpDir,archiveName) + archiveName = "sources.%s.tar" % MiscUtils.wrappedUuidGen() + archiveFullName = "%s/%s" % (tmpDir, archiveName) # collect files for tmpFile in workDirFiles: # avoid self-archiving if os.path.basename(tmpFile) == os.path.basename(archiveFullName): if options.verbose: - print('skip self-archiving for %s' % tmpFile) + print("skip self-archiving for %s" % tmpFile) continue if os.path.islink(tmpFile): - status,out = commands_get_status_output("tar --exclude '.[a-zA-Z]*' -rh '%s' -f '%s'" % (tmpFile,archiveFullName)) + status, out = commands_get_status_output("tar --exclude '.[a-zA-Z]*' -rh '%s' -f '%s'" % (tmpFile, archiveFullName)) else: - status,out = commands_get_status_output("tar --exclude '.[a-zA-Z]*' -rf '%s' '%s'" % (archiveFullName,tmpFile)) + status, out = commands_get_status_output("tar --exclude '.[a-zA-Z]*' -rf '%s' '%s'" % (archiveFullName, tmpFile)) if options.verbose: print(tmpFile) - if status != 0 or out != '': + if status != 0 or out != "": print(out) # go to tmpdir os.chdir(tmpDir) # make empty if archive doesn't exist if not os.path.exists(archiveFullName): - commands_get_status_output('tar cvf %s --files-from /dev/null ' % archiveName) + commands_get_status_output("tar cvf %s --files-from /dev/null " % archiveName) # compress - status,out = commands_get_status_output('gzip %s' % archiveName) - archiveName += '.gz' - if status !=0 or options.verbose: + status, out = commands_get_status_output("gzip %s" % archiveName) + archiveName += ".gz" + if status != 0 or options.verbose: print(out) # check archive - status,out = commands_get_status_output('ls -l {0}'.format(archiveName)) + status, out = commands_get_status_output("ls -l {0}".format(archiveName)) if options.verbose: print(out) if status != 0: @@ -1214,7 +1984,7 @@ def _onExit(dir, files, del_command): if options.useAthenaPackages: tmpLog.info("checking sandbox") for _ in range(5): - status, out = commands_get_status_output('tar tvfz %s' % archiveName) + status, out = commands_get_status_output("tar tvfz %s" % archiveName) if status == 0: break time.sleep(5) @@ -1222,12 +1992,12 @@ def _onExit(dir, files, del_command): tmpLog.error("Failed to expand sandbox. {0}".format(out)) sys.exit(EC_Archive) symlinks = [] - for line in out.split('\n'): + for line in out.split("\n"): items = line.split() - if len(items) > 0 and items[0].startswith('l') and items[-1].startswith('/'): + if len(items) > 0 and items[0].startswith("l") and items[-1].startswith("/"): symlinks.append(line) if symlinks != []: - tmpStr = "Found some unresolved symlinks which may cause a problem\n" + tmpStr = "Found some unresolved symlinks which may cause a problem\n" tmpStr += " See, e.g., http://savannah.cern.ch/bugs/?43885\n" tmpStr += " Please ignore if you believe they are harmless" tmpLog.warning(tmpStr) @@ -1238,17 +2008,17 @@ def _onExit(dir, files, del_command): os.chdir(tmpDir) # use a saved copy if options.noCompile or not options.noBuild: - archiveName = 'sources.%s.tar' % MiscUtils.wrappedUuidGen() - archiveFullName = "%s/%s" % (tmpDir,archiveName) + archiveName = "sources.%s.tar" % MiscUtils.wrappedUuidGen() + archiveFullName = "%s/%s" % (tmpDir, archiveName) else: - archiveName = 'jobO.%s.tar' % MiscUtils.wrappedUuidGen() - archiveFullName = "%s/%s" % (tmpDir,archiveName) + archiveName = "jobO.%s.tar" % MiscUtils.wrappedUuidGen() + archiveFullName = "%s/%s" % (tmpDir, archiveName) # make copy to avoid name duplication - shutil.copy(options.inTarBall,archiveFullName) + shutil.copy(options.inTarBall, archiveFullName) # save - if options.outTarBall != '': - shutil.copy(archiveName,options.outTarBall) + if options.outTarBall != "": + shutil.copy(archiveName, options.outTarBall) # upload source files if not options.noSubmit: @@ -1258,257 +2028,281 @@ def _onExit(dir, files, del_command): use_cache_srv = True else: use_cache_srv = False - status,out = Client.putFile(archiveName,options.verbose,useCacheSrv=use_cache_srv,reuseSandbox=True) - if out.startswith('NewFileName:'): + status, out = Client.putFile( + archiveName, + options.verbose, + useCacheSrv=use_cache_srv, + reuseSandbox=True, + ) + if out.startswith("NewFileName:"): # found the same input sandbox to reuse - archiveName = out.split(':')[-1] - elif out != 'True': + archiveName = out.split(":")[-1] + elif out != "True": print(out) tmpLog.error("failed to upload sandbox with %s" % status) sys.exit(EC_Post) # good run list - if options.goodRunListXML != '': - options.goodRunListXML = PsubUtils.uploadGzippedFile(options.goodRunListXML,curDir,tmpLog,delFilesOnExit, - options.noSubmit,options.verbose) + if options.goodRunListXML != "": + options.goodRunListXML = PsubUtils.uploadGzippedFile( + options.goodRunListXML, + curDir, + tmpLog, + delFilesOnExit, + options.noSubmit, + options.verbose, + ) # special handling - specialHandling = '' + specialHandling = "" if options.express: - specialHandling += 'express,' + specialHandling += "express," if options.debugMode: - specialHandling += 'debug,' + specialHandling += "debug," specialHandling = specialHandling[:-1] - - ##################################################################### # task making # job name - jobName = 'prun.%s' % MiscUtils.wrappedUuidGen() + jobName = "prun.%s" % MiscUtils.wrappedUuidGen() # make task taskParamMap = {} - taskParamMap['taskName'] = options.outDS + taskParamMap["taskName"] = options.outDS if not options.allowTaskDuplication: - taskParamMap['uniqueTaskName'] = True + taskParamMap["uniqueTaskName"] = True if options.vo is None: - taskParamMap['vo'] = 'atlas' + taskParamMap["vo"] = "atlas" else: - taskParamMap['vo'] = options.vo - if options.containerImage != '' and options.alrb: - taskParamMap['architecture'] = options.architecture + taskParamMap["vo"] = options.vo + if options.containerImage != "" and options.alrb: + taskParamMap["architecture"] = options.architecture else: - taskParamMap['architecture'] = AthenaUtils.getCmtConfigImg(athenaVer,cacheVer,nightVer,options.cmtConfig, - architecture=options.architecture) - taskParamMap['transUses'] = athenaVer - if athenaVer != '': - taskParamMap['transHome'] = 'AnalysisTransforms'+cacheVer+nightVer + taskParamMap["architecture"] = AthenaUtils.getCmtConfigImg( + athenaVer, + cacheVer, + nightVer, + options.cmtConfig, + architecture=options.architecture, + ) + taskParamMap["transUses"] = athenaVer + if athenaVer != "": + taskParamMap["transHome"] = "AnalysisTransforms" + cacheVer + nightVer else: - taskParamMap['transHome'] = None - if options.containerImage != '' and not options.alrb: - taskParamMap['processingType'] = 'panda-client-{0}-jedi-cont'.format(PandaToolsPkgInfo.release_version) + taskParamMap["transHome"] = None + if options.containerImage != "" and not options.alrb: + taskParamMap["processingType"] = "panda-client-{0}-jedi-cont".format(PandaToolsPkgInfo.release_version) else: - taskParamMap['processingType'] = 'panda-client-{0}-jedi-run'.format(PandaToolsPkgInfo.release_version) - if options.eventPickEvtList != '': - taskParamMap['processingType'] += '-evp' - taskParamMap['waitInput'] = 1 - if options.goodRunListXML != '': - taskParamMap['processingType'] += '-grl' - if options.prodSourceLabel == '': - taskParamMap['prodSourceLabel'] = 'user' + taskParamMap["processingType"] = "panda-client-{0}-jedi-run".format(PandaToolsPkgInfo.release_version) + if options.eventPickEvtList != "": + taskParamMap["processingType"] += "-evp" + taskParamMap["waitInput"] = 1 + if options.goodRunListXML != "": + taskParamMap["processingType"] += "-grl" + if options.prodSourceLabel == "": + taskParamMap["prodSourceLabel"] = "user" else: - taskParamMap['prodSourceLabel'] = options.prodSourceLabel - if options.site != 'AUTO': - taskParamMap['site'] = options.site + taskParamMap["prodSourceLabel"] = options.prodSourceLabel + if options.site != "AUTO": + taskParamMap["site"] = options.site else: - taskParamMap['site'] = None - taskParamMap['excludedSite'] = options.excludedSite + taskParamMap["site"] = None + taskParamMap["excludedSite"] = options.excludedSite if includedSite is not None and includedSite != []: - taskParamMap['includedSite'] = includedSite + taskParamMap["includedSite"] = includedSite else: - taskParamMap['includedSite'] = None + taskParamMap["includedSite"] = None if options.priority is not None: - taskParamMap['currentPriority'] = options.priority + taskParamMap["currentPriority"] = options.priority if options.nFiles > 0: - taskParamMap['nFiles'] = options.nFiles + taskParamMap["nFiles"] = options.nFiles if options.nFilesPerJob is not None: - taskParamMap['nFilesPerJob'] = options.nFilesPerJob - if not options.nGBPerJob in [-1,'MAX']: + taskParamMap["nFilesPerJob"] = options.nFilesPerJob + if not options.nGBPerJob in [-1, "MAX"]: # don't set MAX since it is the defalt on the server side - taskParamMap['nGBPerJob'] = options.nGBPerJob + taskParamMap["nGBPerJob"] = options.nGBPerJob if options.nEventsPerJob > 0: - taskParamMap['nEventsPerJob'] = options.nEventsPerJob + taskParamMap["nEventsPerJob"] = options.nEventsPerJob if options.nEventsPerFile <= 0: - taskParamMap['useRealNumEvents'] = True + taskParamMap["useRealNumEvents"] = True else: - taskParamMap['nEventsPerFile'] = options.nEventsPerFile + taskParamMap["nEventsPerFile"] = options.nEventsPerFile if options.nJobs > 0 and options.nEvents < 0: - taskParamMap['nEvents'] = options.nJobs * options.nEventsPerJob - taskParamMap['cliParams'] = fullExecString + taskParamMap["nEvents"] = options.nJobs * options.nEventsPerJob + taskParamMap["cliParams"] = fullExecString if options.noEmail: - taskParamMap['noEmail'] = True + taskParamMap["noEmail"] = True if options.skipScout: - taskParamMap['skipScout'] = True + taskParamMap["skipScout"] = True if options.respectSplitRule: - taskParamMap['respectSplitRule'] = True + taskParamMap["respectSplitRule"] = True if options.respectLB: - taskParamMap['respectLB'] = True + taskParamMap["respectLB"] = True if options.osMatching: - taskParamMap['osMatching'] = True - taskParamMap['osInfo'] = PsubUtils.get_os_information() + taskParamMap["osMatching"] = True + taskParamMap["osInfo"] = PsubUtils.get_os_information() if options.parentTaskID: - taskParamMap['noWaitParent'] = True + taskParamMap["noWaitParent"] = True if options.disableAutoRetry: - taskParamMap['disableAutoRetry'] = 1 + taskParamMap["disableAutoRetry"] = 1 if options.workingGroup is not None: # remove role - taskParamMap['workingGroup'] = options.workingGroup.split('.')[0].split(':')[0] + taskParamMap["workingGroup"] = options.workingGroup.split(".")[0].split(":")[0] if options.official: - taskParamMap['official'] = True - taskParamMap['nMaxFilesPerJob'] = options.maxNFilesPerJob + taskParamMap["official"] = True + taskParamMap["nMaxFilesPerJob"] = options.maxNFilesPerJob if options.useNewCode: - taskParamMap['fixedSandbox'] = archiveName + taskParamMap["fixedSandbox"] = archiveName if options.maxCpuCount > 0: - taskParamMap['walltime'] = -options.maxCpuCount + taskParamMap["walltime"] = -options.maxCpuCount if options.noLoopingCheck: - taskParamMap['noLoopingCheck'] = True + taskParamMap["noLoopingCheck"] = True if options.maxWalltime > 0: - taskParamMap['maxWalltime'] = options.maxWalltime + taskParamMap["maxWalltime"] = options.maxWalltime if options.cpuTimePerEvent > 0: - taskParamMap['cpuTime'] = options.cpuTimePerEvent - taskParamMap['cpuTimeUnit'] = 'HS06sPerEvent' + taskParamMap["cpuTime"] = options.cpuTimePerEvent + taskParamMap["cpuTimeUnit"] = "HS06sPerEvent" if options.fixedCpuTime: - taskParamMap['cpuTimeUnit'] = 'HS06sPerEventFixed' + taskParamMap["cpuTimeUnit"] = "HS06sPerEventFixed" if options.memory > 0: - taskParamMap['ramCount'] = options.memory + taskParamMap["ramCount"] = options.memory if options.fixedRamCount: - taskParamMap['ramCountUnit'] = 'MBPerCoreFixed' + taskParamMap["ramCountUnit"] = "MBPerCoreFixed" else: - taskParamMap['ramCountUnit'] = 'MBPerCore' + taskParamMap["ramCountUnit"] = "MBPerCore" if options.outDiskCount is not None: - taskParamMap['outDiskCount'] = options.outDiskCount - taskParamMap['outDiskUnit'] = 'kBFixed' + taskParamMap["outDiskCount"] = options.outDiskCount + taskParamMap["outDiskUnit"] = "kBFixed" if options.nCore > 1: - taskParamMap['coreCount'] = options.nCore - if options.skipFilesUsedBy != '': - taskParamMap['skipFilesUsedBy'] = options.skipFilesUsedBy - taskParamMap['respectSplitRule'] = True - if options.maxAttempt >0 and options.maxAttempt <= 50: - taskParamMap['maxAttempt'] = options.maxAttempt + taskParamMap["coreCount"] = options.nCore + if options.skipFilesUsedBy != "": + taskParamMap["skipFilesUsedBy"] = options.skipFilesUsedBy + taskParamMap["respectSplitRule"] = True + if options.maxAttempt > 0 and options.maxAttempt <= 50: + taskParamMap["maxAttempt"] = options.maxAttempt if options.useSecrets: - taskParamMap['useSecrets'] = True + taskParamMap["useSecrets"] = True if options.debugMode: - taskParamMap['debugMode'] = True + taskParamMap["debugMode"] = True # source URL if options.vo is None: - matchURL = re.search("(http.*://[^/]+)/",Client.baseURLCSRVSSL) + matchURL = re.search("(http.*://[^/]+)/", Client.baseURLCSRVSSL) else: matchURL = re.search("(http.*://[^/]+)/", Client.baseURLSSL) if matchURL is not None: - taskParamMap['sourceURL'] = matchURL.group(1) + taskParamMap["sourceURL"] = matchURL.group(1) # XML config if options.loadXML is not None: - taskParamMap['loadXML'] = options.loadXML + taskParamMap["loadXML"] = options.loadXML # middle name - if options.addNthFieldOfInFileToLFN != '': - taskParamMap['addNthFieldToLFN'] = options.addNthFieldOfInFileToLFN - taskParamMap['useFileAsSourceLFN'] = True - elif options.addNthFieldOfInDSToLFN != '': - taskParamMap['addNthFieldToLFN'] = options.addNthFieldOfInDSToLFN - if options.containerImage != '' and options.alrb: - taskParamMap['container_name'] = options.containerImage + if options.addNthFieldOfInFileToLFN != "": + taskParamMap["addNthFieldToLFN"] = options.addNthFieldOfInFileToLFN + taskParamMap["useFileAsSourceLFN"] = True + elif options.addNthFieldOfInDSToLFN != "": + taskParamMap["addNthFieldToLFN"] = options.addNthFieldOfInDSToLFN + if options.containerImage != "" and options.alrb: + taskParamMap["container_name"] = options.containerImage if options.directExecInContainer: - taskParamMap['multiStepExec'] = {'preprocess': {'command': '${TRF}', - 'args': '--preprocess ${TRF_ARGS}'}, - 'postprocess' : {'command': '${TRF}', - 'args': '--postprocess ${TRF_ARGS}'}, - 'containerOptions' : {'containerExec': 'echo "=== cat exec script ==="; ' - 'cat __run_main_exec.sh; ' - 'echo; ' - 'echo "=== exec script ==="; ' - '/bin/sh __run_main_exec.sh', - 'containerImage': options.containerImage} - } + taskParamMap["multiStepExec"] = { + "preprocess": {"command": "${TRF}", "args": "--preprocess ${TRF_ARGS}"}, + "postprocess": { + "command": "${TRF}", + "args": "--postprocess ${TRF_ARGS}", + }, + "containerOptions": { + "containerExec": 'echo "=== cat exec script ==="; ' + "cat __run_main_exec.sh; " + "echo; " + 'echo "=== exec script ==="; ' + "/bin/sh __run_main_exec.sh", + "containerImage": options.containerImage, + }, + } if options.alrbArgs is not None: - taskParamMap['multiStepExec']['containerOptions']['execArgs'] = options.alrbArgs + taskParamMap["multiStepExec"]["containerOptions"]["execArgs"] = options.alrbArgs outDatasetName = options.outDS - logDatasetName = re.sub('/$','.log/',options.outDS) + logDatasetName = re.sub("/$", ".log/", options.outDS) # log if not options.noSeparateLog: - taskParamMap['log'] = {'dataset': logDatasetName, - 'container': logDatasetName, - 'type':'template', - 'param_type':'log', - 'value':'{0}.$JEDITASKID.${{SN}}.log.tgz'.format(logDatasetName[:-1]) - } - if options.addNthFieldOfInFileToLFN != '': - loglfn = '{0}.{1}'.format(*logDatasetName.split('.')[:2]) - loglfn += '${MIDDLENAME}.$JEDITASKID._${SN}.log.tgz' - taskParamMap['log']['value'] = loglfn - if options.spaceToken != '': - taskParamMap['log']['token'] = options.spaceToken + taskParamMap["log"] = { + "dataset": logDatasetName, + "container": logDatasetName, + "type": "template", + "param_type": "log", + "value": "{0}.$JEDITASKID.${{SN}}.log.tgz".format(logDatasetName[:-1]), + } + if options.addNthFieldOfInFileToLFN != "": + loglfn = "{0}.{1}".format(*logDatasetName.split(".")[:2]) + loglfn += "${MIDDLENAME}.$JEDITASKID._${SN}.log.tgz" + taskParamMap["log"]["value"] = loglfn + if options.spaceToken != "": + taskParamMap["log"]["token"] = options.spaceToken if options.mergeOutput and options.mergeLog: # log merge - mLogDatasetName = re.sub(r'\.log/', r'.merge_log/', logDatasetName) - mLFN = re.sub(r'\.log\.tgz', '.merge_log.tgz', taskParamMap['log']['value']) - data = copy.deepcopy(taskParamMap['log']) - data.update({'dataset': mLogDatasetName, - 'container': mLogDatasetName, - 'param_type': 'output', - 'mergeOnly': True, - 'value': mLFN}) - taskParamMap['log_merge'] = data + mLogDatasetName = re.sub(r"\.log/", r".merge_log/", logDatasetName) + mLFN = re.sub(r"\.log\.tgz", ".merge_log.tgz", taskParamMap["log"]["value"]) + data = copy.deepcopy(taskParamMap["log"]) + data.update( + { + "dataset": mLogDatasetName, + "container": mLogDatasetName, + "param_type": "output", + "mergeOnly": True, + "value": mLFN, + } + ) + taskParamMap["log_merge"] = data # job parameters - taskParamMap['jobParameters'] = [ - {'type':'constant', - 'value': '-j "" --sourceURL ${SURL}', - }, - {'type':'constant', - 'value': '-r {0}'.format(runDir), - }, - ] + taskParamMap["jobParameters"] = [ + { + "type": "constant", + "value": '-j "" --sourceURL ${SURL}', + }, + { + "type": "constant", + "value": "-r {0}".format(runDir), + }, + ] # delimiter - taskParamMap['jobParameters'] += [ - {'type': 'constant', - 'value': '__delimiter__', - 'hidden': True - }, - ] + taskParamMap["jobParameters"] += [ + {"type": "constant", "value": "__delimiter__", "hidden": True}, + ] # build - if options.containerImage == '' or options.useSandbox: + if options.containerImage == "" or options.useSandbox: if options.noBuild and not options.noCompile: - taskParamMap['jobParameters'] += [ - {'type':'constant', - 'value': '-a {0}'.format(archiveName), - }, - ] + taskParamMap["jobParameters"] += [ + { + "type": "constant", + "value": "-a {0}".format(archiveName), + }, + ] else: - taskParamMap['jobParameters'] += [ - {'type':'constant', - 'value': '-l ${LIB}', - }, - ] + taskParamMap["jobParameters"] += [ + { + "type": "constant", + "value": "-l ${LIB}", + }, + ] # output - if options.outputs != '': + if options.outputs != "": outMap = {} dsSuffix = [] dsIndex = 0 - for tmpLFN in options.outputs.split(','): - tmpDsSuffix = '' - if ':' in tmpLFN: - tmpDsSuffix, tmpLFN = tmpLFN.split(':') + for tmpLFN in options.outputs.split(","): + tmpDsSuffix = "" + if ":" in tmpLFN: + tmpDsSuffix, tmpLFN = tmpLFN.split(":") if tmpDsSuffix in dsSuffix: tmpErrMsg = "dataset name suffix '%s' is used for multiple files in --outputs. " % tmpDsSuffix - tmpErrMsg += 'each output must have a unique suffix.' + tmpErrMsg += "each output must have a unique suffix." tmpLog.error(tmpErrMsg) sys.exit(EC_Config) dsSuffix.append(tmpDsSuffix) - if tmpLFN.startswith('regex|'): + if tmpLFN.startswith("regex|"): # regex lfn = tmpLFN if not tmpDsSuffix: @@ -1517,164 +2311,177 @@ def _onExit(dir, files, del_command): else: tmpNewLFN = tmpLFN # change * to XYZ and add .tgz - if '*' in tmpNewLFN: - tmpNewLFN = tmpNewLFN.replace('*','XYZ') - tmpNewLFN += '.tgz' - if len(outDatasetName.split('.')) > 2: - lfn = '{0}.{1}'.format(*outDatasetName.split('.')[:2]) + if "*" in tmpNewLFN: + tmpNewLFN = tmpNewLFN.replace("*", "XYZ") + tmpNewLFN += ".tgz" + if len(outDatasetName.split(".")) > 2: + lfn = "{0}.{1}".format(*outDatasetName.split(".")[:2]) else: lfn = outDatasetName[:-1] - if options.addNthFieldOfInDSToLFN != '' or options.addNthFieldOfInFileToLFN != '': - lfn += '${MIDDLENAME}' - lfn += '.$JEDITASKID._${{SN/P}}.{0}'.format(tmpNewLFN) - if tmpDsSuffix == '': + if options.addNthFieldOfInDSToLFN != "" or options.addNthFieldOfInFileToLFN != "": + lfn += "${MIDDLENAME}" + lfn += ".$JEDITASKID._${{SN/P}}.{0}".format(tmpNewLFN) + if tmpDsSuffix == "": tmpDsSuffix = tmpNewLFN - dataset = '{0}_{1}/'.format(outDatasetName[:-1],tmpDsSuffix) - taskParamMap['jobParameters'] += MiscUtils.makeJediJobParam(lfn,dataset,'output',hidden=True, - destination=options.destSE, - token=options.spaceToken, - allowNoOutput=options.allowNoOutput) + dataset = "{0}_{1}/".format(outDatasetName[:-1], tmpDsSuffix) + taskParamMap["jobParameters"] += MiscUtils.makeJediJobParam( + lfn, + dataset, + "output", + hidden=True, + destination=options.destSE, + token=options.spaceToken, + allowNoOutput=options.allowNoOutput, + ) outMap[tmpLFN] = lfn if options.loadXML: - taskParamMap['jobParameters'] += [ - {'type':'constant', - 'value': '-o "${XML_OUTMAP}"', - }, - ] + taskParamMap["jobParameters"] += [ + { + "type": "constant", + "value": '-o "${XML_OUTMAP}"', + }, + ] else: - taskParamMap['jobParameters'] += [ - {'type':'constant', - 'value': '-o "{0}"'.format(str(outMap)), - }, - ] + taskParamMap["jobParameters"] += [ + { + "type": "constant", + "value": '-o "{0}"'.format(str(outMap)), + }, + ] # input - if options.inDS != '': - tmpDict = {'type':'template', - 'param_type':'input', - 'value':'-i "${IN/T}"', - 'dataset':options.inDS, - 'exclude':'\.log\.tgz(\.\d+)*$', - } + if options.inDS != "": + tmpDict = { + "type": "template", + "param_type": "input", + "value": '-i "${IN/T}"', + "dataset": options.inDS, + "exclude": "\.log\.tgz(\.\d+)*$", + } if options.useLogAsInput: - del tmpDict['exclude'] + del tmpDict["exclude"] if options.loadXML is None and not options.notExpandInDS: - tmpDict['expand'] = True + tmpDict["expand"] = True if options.notExpandInDS: - tmpDict['consolidate'] = '.'.join(options.outDS.split('.')[:2]) + '.' + MiscUtils.wrappedUuidGen() + '/' + tmpDict["consolidate"] = ".".join(options.outDS.split(".")[:2]) + "." + MiscUtils.wrappedUuidGen() + "/" if options.nSkipFiles != 0: - tmpDict['offset'] = options.nSkipFiles - if options.match != '': - tmpDict['include'] = options.match - if options.antiMatch != '': - if 'exclude' in tmpDict: - tmpDict['exclude'] += ','+options.antiMatch + tmpDict["offset"] = options.nSkipFiles + if options.match != "": + tmpDict["include"] = options.match + if options.antiMatch != "": + if "exclude" in tmpDict: + tmpDict["exclude"] += "," + options.antiMatch else: - tmpDict['exclude'] = options.antiMatch + tmpDict["exclude"] = options.antiMatch if filesToBeUsed != []: - tmpDict['files'] = filesToBeUsed - taskParamMap['jobParameters'].append(tmpDict) - taskParamMap['dsForIN'] = options.inDS - elif options.pfnList != '': - taskParamMap['pfnList'] = PsubUtils.getListPFN(options.pfnList) + tmpDict["files"] = filesToBeUsed + taskParamMap["jobParameters"].append(tmpDict) + taskParamMap["dsForIN"] = options.inDS + elif options.pfnList != "": + taskParamMap["pfnList"] = PsubUtils.getListPFN(options.pfnList) # use noInput - taskParamMap['noInput'] = True + taskParamMap["noInput"] = True if options.nFiles == 0: - taskParamMap['nFiles'] = len(taskParamMap['pfnList']) - taskParamMap['jobParameters'] += [ - {'type':'constant', - 'value':'-i "${IN/T}"', - }, - ] - elif options.goodRunListXML != '': - tmpDict = {'type':'template', - 'param_type':'input', - 'value':'-i "${IN/T}"', - 'dataset':'%%INDS%%', - 'expand':True, - 'exclude':'\.log\.tgz(\.\d+)*$', - 'files':'%%INLFNLIST%%', - } - taskParamMap['jobParameters'].append(tmpDict) - taskParamMap['dsForIN'] = '%%INDS%%' + taskParamMap["nFiles"] = len(taskParamMap["pfnList"]) + taskParamMap["jobParameters"] += [ + { + "type": "constant", + "value": '-i "${IN/T}"', + }, + ] + elif options.goodRunListXML != "": + tmpDict = { + "type": "template", + "param_type": "input", + "value": '-i "${IN/T}"', + "dataset": "%%INDS%%", + "expand": True, + "exclude": "\.log\.tgz(\.\d+)*$", + "files": "%%INLFNLIST%%", + } + taskParamMap["jobParameters"].append(tmpDict) + taskParamMap["dsForIN"] = "%%INDS%%" else: # no input - taskParamMap['noInput'] = True + taskParamMap["noInput"] = True if options.nEvents > 0: - taskParamMap['nEvents'] = options.nEvents + taskParamMap["nEvents"] = options.nEvents if options.nJobs > 0: - taskParamMap['nEventsPerJob'] = options.nEvents // options.nJobs + taskParamMap["nEventsPerJob"] = options.nEvents // options.nJobs else: # set granularity if options.nEventsPerChunk > 0: - taskParamMap['nEventsPerRange'] = options.nEventsPerChunk + taskParamMap["nEventsPerRange"] = options.nEventsPerChunk else: # use 1/20 by default - taskParamMap['nEventsPerRange'] = options.nEvents // 20 - if taskParamMap['nEventsPerRange'] <= 0: - taskParamMap['nEventsPerRange'] = 1 + taskParamMap["nEventsPerRange"] = options.nEvents // 20 + if taskParamMap["nEventsPerRange"] <= 0: + taskParamMap["nEventsPerRange"] = 1 elif options.nEventsPerJob > 0: - taskParamMap['nEvents'] = options.nEventsPerJob * max(1, options.nJobs) - taskParamMap['nEventsPerJob'] = options.nEventsPerJob + taskParamMap["nEvents"] = options.nEventsPerJob * max(1, options.nJobs) + taskParamMap["nEventsPerJob"] = options.nEventsPerJob else: if options.nJobs > 0: - taskParamMap['nEvents'] = options.nJobs + taskParamMap["nEvents"] = options.nJobs else: - taskParamMap['nEvents'] = 1 - taskParamMap['nEventsPerJob'] = 1 + taskParamMap["nEvents"] = 1 + taskParamMap["nEventsPerJob"] = 1 # exec string if options.loadXML is None: - taskParamMap['jobParameters'] += [ - {'type':'constant', - 'value': '-p "', - 'padding':False, - }, - ] - taskParamMap['jobParameters'] += PsubUtils.convertParamStrToJediParam(options.jobParams,{},'', - True,False, - includeIO=False) - taskParamMap['jobParameters'] += [ - {'type':'constant', - 'value': '"', - }, - ] + taskParamMap["jobParameters"] += [ + { + "type": "constant", + "value": '-p "', + "padding": False, + }, + ] + taskParamMap["jobParameters"] += PsubUtils.convertParamStrToJediParam(options.jobParams, {}, "", True, False, includeIO=False) + taskParamMap["jobParameters"] += [ + { + "type": "constant", + "value": '"', + }, + ] else: - taskParamMap['jobParameters'] += [ - {'type':'constant', - 'value': '-p "{0}"'.format(options.jobParams), - }, - ] + taskParamMap["jobParameters"] += [ + { + "type": "constant", + "value": '-p "{0}"'.format(options.jobParams), + }, + ] # param for DBR - if options.dbRelease != '': - dbrDS = options.dbRelease.split(':')[0] + if options.dbRelease != "": + dbrDS = options.dbRelease.split(":")[0] # change LATEST to DBR_LATEST - if dbrDS == 'LATEST': - dbrDS = 'DBR_LATEST' - dictItem = {'type':'template', - 'param_type':'input', - 'value':'--dbrFile=${DBR}', - 'dataset':dbrDS, - } - taskParamMap['jobParameters'] += [dictItem] + if dbrDS == "LATEST": + dbrDS = "DBR_LATEST" + dictItem = { + "type": "template", + "param_type": "input", + "value": "--dbrFile=${DBR}", + "dataset": dbrDS, + } + taskParamMap["jobParameters"] += [dictItem] # no expansion if options.notExpandDBR: - dictItem = {'type':'constant', - 'value':'--noExpandDBR', - } - taskParamMap['jobParameters'] += [dictItem] + dictItem = { + "type": "constant", + "value": "--noExpandDBR", + } + taskParamMap["jobParameters"] += [dictItem] # secondary if options.secondaryDSs != {}: inMap = {} streamNames = [] - if options.inDS != '': - inMap['IN'] = 'tmp_IN' - streamNames.append('IN') + if options.inDS != "": + inMap["IN"] = "tmp_IN" + streamNames.append("IN") for tmpDsName in options.secondaryDSs: tmpMap = options.secondaryDSs[tmpDsName] # make template item - streamName = tmpMap['streamName'] + streamName = tmpMap["streamName"] if options.loadXML is None and not options.notExpandSecDSs: expandFlag = True else: @@ -1683,28 +2490,38 @@ def _onExit(dir, files, del_command): reusableAtt = False if streamName in options.reusableSecondary: reusableAtt = True - dictItem = MiscUtils.makeJediJobParam('${'+streamName+'}',tmpDsName,'input',hidden=True, - expand=expandFlag,include=tmpMap['pattern'],offset=tmpMap['nSkip'], - nFilesPerJob=tmpMap['nFiles'],reusableAtt=reusableAtt, - outDS=options.outDS, file_list=tmpMap['files']) - taskParamMap['jobParameters'] += dictItem - inMap[streamName] = 'tmp_'+streamName + dictItem = MiscUtils.makeJediJobParam( + "${" + streamName + "}", + tmpDsName, + "input", + hidden=True, + expand=expandFlag, + include=tmpMap["pattern"], + offset=tmpMap["nSkip"], + nFilesPerJob=tmpMap["nFiles"], + reusableAtt=reusableAtt, + outDS=options.outDS, + file_list=tmpMap["files"], + ) + taskParamMap["jobParameters"] += dictItem + inMap[streamName] = "tmp_" + streamName streamNames.append(streamName) # make constant item strInMap = str(inMap) # set placeholders for streamName in streamNames: - strInMap = strInMap.replace("'tmp_"+streamName+"'",'${'+streamName+'/T}') - dictItem = {'type':'constant', - 'value':'--inMap "%s"' % strInMap, - } - taskParamMap['jobParameters'] += [dictItem] + strInMap = strInMap.replace("'tmp_" + streamName + "'", "${" + streamName + "/T}") + dictItem = { + "type": "constant", + "value": '--inMap "%s"' % strInMap, + } + taskParamMap["jobParameters"] += [dictItem] # misc - jobParameters = '' + jobParameters = "" # given PFN - if options.pfnList != '': - jobParameters += '--givenPFN ' + if options.pfnList != "": + jobParameters += "--givenPFN " # use Athena packages if options.useAthenaPackages: jobParameters += "--useAthenaPackages " @@ -1715,30 +2532,30 @@ def _onExit(dir, files, del_command): if options.useRootCore: jobParameters += "--useRootCore " # root - if options.rootVer != '': + if options.rootVer != "": jobParameters += "--rootVer %s " % options.rootVer # cmt config - if options.cmtConfig not in ['', 'NULL', None]: + if options.cmtConfig not in ["", "NULL", None]: jobParameters += "--cmtConfig %s " % options.cmtConfig # write input to txt - if options.writeInputToTxt != '': + if options.writeInputToTxt != "": jobParameters += "--writeInputToTxt %s " % options.writeInputToTxt # debug parameters - if options.queueData != '': + if options.queueData != "": jobParameters += "--overwriteQueuedata=%s " % options.queueData # exec string with real output filenames if options.execWithRealFileNames: jobParameters += "--execWithRealFileNames " # container - if options.containerImage != '' and not options.alrb: + if options.containerImage != "" and not options.alrb: jobParameters += "--containerImage {0} ".format(options.containerImage) if options.ctrCvmfs: jobParameters += "--cvmfs " if options.ctrNoX509: jobParameters += "--noX509 " - if options.ctrDatadir != '': + if options.ctrDatadir != "": jobParameters += "--datadir {0} ".format(options.ctrDatadir) - if options.ctrWorkdir != '': + if options.ctrWorkdir != "": jobParameters += "--workdir {0} ".format(options.ctrWorkdir) if options.ctrDebug: jobParameters += "--debug " @@ -1750,29 +2567,30 @@ def _onExit(dir, files, del_command): if options.persistentFile: jobParameters += "--fileToSave={0} --fileToLoad={0} ".format(options.persistentFile) # set task param - if jobParameters != '': - taskParamMap['jobParameters'] += [ - {'type':'constant', - 'value': jobParameters, - }, - ] + if jobParameters != "": + taskParamMap["jobParameters"] += [ + { + "type": "constant", + "value": jobParameters, + }, + ] # force stage-in if options.forceStaged or options.forceStagedSecondary: - taskParamMap['useLocalIO'] = 1 + taskParamMap["useLocalIO"] = 1 # avoid VP if options.avoidVP: - taskParamMap['avoidVP'] = True + taskParamMap["avoidVP"] = True # build step if options.noBuild and not options.noCompile: pass else: - jobParameters = '-i ${IN} -o ${OUT} --sourceURL ${SURL} ' - jobParameters += '-r {0} '.format(runDir) + jobParameters = "-i ${IN} -o ${OUT} --sourceURL ${SURL} " + jobParameters += "-r {0} ".format(runDir) # exec - if options.bexec != '': + if options.bexec != "": jobParameters += '--bexec "{0}" '.format(quote(options.bexec)) # use Athena packages if options.useAthenaPackages: @@ -1787,66 +2605,65 @@ def _onExit(dir, files, del_command): if AthenaUtils.useCMake(): jobParameters += "--useCMake " # root - if options.rootVer != '': + if options.rootVer != "": jobParameters += "--rootVer %s " % options.rootVer # cmt config - if not options.cmtConfig in ['','NULL',None]: + if not options.cmtConfig in ["", "NULL", None]: jobParameters += "--cmtConfig %s " % options.cmtConfig # debug parameters - if options.queueData != '': + if options.queueData != "": jobParameters += "--overwriteQueuedata=%s " % options.queueData # container - if options.containerImage != '' and not options.alrb: + if options.containerImage != "" and not options.alrb: jobParameters += "--containerImage {0} ".format(options.containerImage) if options.ctrCvmfs: jobParameters += "--cvmfs " if options.ctrNoX509: jobParameters += "--noX509 " - if options.ctrDatadir != '': + if options.ctrDatadir != "": jobParameters += "--datadir {0} ".format(options.ctrDatadir) - if options.ctrWorkdir != '': + if options.ctrWorkdir != "": jobParameters += "--workdir {0} ".format(options.ctrWorkdir) if options.ctrDebug: jobParameters += "--debug " # set task param - taskParamMap['buildSpec'] = { - 'prodSourceLabel':'panda', - 'archiveName':archiveName, - 'jobParameters':jobParameters, - } - if options.prodSourceLabel != '': - taskParamMap['buildSpec']['prodSourceLabel'] = options.prodSourceLabel + taskParamMap["buildSpec"] = { + "prodSourceLabel": "panda", + "archiveName": archiveName, + "jobParameters": jobParameters, + } + if options.prodSourceLabel != "": + taskParamMap["buildSpec"]["prodSourceLabel"] = options.prodSourceLabel # preprocessing step # good run list - if options.goodRunListXML != '': + if options.goodRunListXML != "": jobParameters = "--goodRunListXML {0} ".format(options.goodRunListXML) - if options.goodRunDataType != '': + if options.goodRunDataType != "": jobParameters += "--goodRunListDataType {0} ".format(options.goodRunDataType) - if options.goodRunProdStep != '': + if options.goodRunProdStep != "": jobParameters += "--goodRunListProdStep {0} ".format(options.goodRunProdStep) - if options.goodRunListDS != '': + if options.goodRunListDS != "": jobParameters += "--goodRunListDS {0} ".format(options.goodRunListDS) jobParameters += "--sourceURL ${SURL} " # set task param - taskParamMap['preproSpec'] = { - 'prodSourceLabel':'panda', - 'jobParameters':jobParameters, - } - if options.prodSourceLabel != '': - taskParamMap['preproSpec']['prodSourceLabel'] = options.prodSourceLabel - + taskParamMap["preproSpec"] = { + "prodSourceLabel": "panda", + "jobParameters": jobParameters, + } + if options.prodSourceLabel != "": + taskParamMap["preproSpec"]["prodSourceLabel"] = options.prodSourceLabel # merging if options.mergeOutput: - jobParameters = '-r {0} '.format(runDir) - if options.mergeScript != '': + jobParameters = "-r {0} ".format(runDir) + if options.mergeScript != "": jobParameters += '-j "{0}" '.format(options.mergeScript) - if options.rootVer != '': + if options.rootVer != "": jobParameters += "--rootVer %s " % options.rootVer - if options.cmtConfig not in ['', 'NULL', None]: + if options.cmtConfig not in ["", "NULL", None]: jobParameters += "--cmtConfig %s " % options.cmtConfig if options.useAthenaPackages: jobParameters += "--useAthenaPackages " @@ -1854,36 +2671,36 @@ def _onExit(dir, files, del_command): jobParameters += "--useCMake " if options.useRootCore: jobParameters += "--useRootCore " - if options.containerImage != '' and not options.alrb: + if options.containerImage != "" and not options.alrb: jobParameters += "--containerImage {0} ".format(options.containerImage) if options.ctrCvmfs: jobParameters += "--cvmfs " if options.ctrNoX509: jobParameters += "--noX509 " - if options.ctrDatadir != '': + if options.ctrDatadir != "": jobParameters += "--datadir {0} ".format(options.ctrDatadir) - if options.ctrWorkdir != '': + if options.ctrWorkdir != "": jobParameters += "--workdir {0} ".format(options.ctrWorkdir) if options.ctrDebug: jobParameters += "--debug " else: if not (options.noBuild and not options.noCompile): - jobParameters += '-l ${LIB} ' + jobParameters += "-l ${LIB} " else: - jobParameters += '-a {0} '.format(archiveName) + jobParameters += "-a {0} ".format(archiveName) jobParameters += "--sourceURL ${SURL} " - jobParameters += '${TRN_OUTPUT:OUTPUT} ' + jobParameters += "${TRN_OUTPUT:OUTPUT} " if options.mergeLog: - jobParameters += '${TRN_LOG_MERGE:LOG_MERGE}' + jobParameters += "${TRN_LOG_MERGE:LOG_MERGE}" else: - jobParameters += '${TRN_LOG:LOG}' - taskParamMap['mergeSpec'] = {} - taskParamMap['mergeSpec']['useLocalIO'] = 1 - taskParamMap['mergeSpec']['jobParameters'] = jobParameters - taskParamMap['mergeOutput'] = True + jobParameters += "${TRN_LOG:LOG}" + taskParamMap["mergeSpec"] = {} + taskParamMap["mergeSpec"]["useLocalIO"] = 1 + taskParamMap["mergeSpec"]["jobParameters"] = jobParameters + taskParamMap["mergeOutput"] = True # check nGBPerJob - if options.nGBPerMergeJob != 'MAX': + if options.nGBPerMergeJob != "MAX": # convert to int try: options.nGBPerMergeJob = int(options.nGBPerMergeJob) @@ -1894,8 +2711,7 @@ def _onExit(dir, files, del_command): if options.nGBPerMergeJob <= 0: tmpLog.error("--nGBPerMergeJob must be positive") sys.exit(EC_Config) - taskParamMap['nGBPerMergeJob'] = options.nGBPerMergeJob - + taskParamMap["nGBPerMergeJob"] = options.nGBPerMergeJob ##################################################################### # submission @@ -1909,11 +2725,11 @@ def _onExit(dir, files, del_command): print("== parameters ==") print("Site : %s" % options.site) print("Athena : %s" % athenaVer) - if groupArea != '': + if groupArea != "": print("Group Area : %s" % groupArea) - if cacheVer != '': + if cacheVer != "": print("Cache : %s" % cacheVer[1:]) - if nightVer != '': + if nightVer != "": print("Nightly : %s" % nightVer[1:]) print("RunDir : %s" % runDir) print("exec : %s" % options.jobParams) @@ -1922,16 +2738,20 @@ def _onExit(dir, files, del_command): newTaskParamMap = taskParamMap else: # replace input and output - options.inDS = ioItem['inDS'] - options.outDS = ioItem['outDS'] - newTaskParamMap = PsubUtils.replaceInputOutput(taskParamMap, ioItem['inDS'], - ioItem['outDS'], iSubmission) + options.inDS = ioItem["inDS"] + options.outDS = ioItem["outDS"] + newTaskParamMap = PsubUtils.replaceInputOutput(taskParamMap, ioItem["inDS"], ioItem["outDS"], iSubmission) exitCode = 0 - tmpStr = '' + tmpStr = "" taskID = None # check outDS format - if not dry_mode and not PsubUtils.checkOutDsName(options.outDS,options.official,nickName, - options.mergeOutput, options.verbose): + if not dry_mode and not PsubUtils.checkOutDsName( + options.outDS, + options.official, + nickName, + options.mergeOutput, + options.verbose, + ): tmpStr = "invalid output datasetname:%s" % options.outDS tmpLog.error(tmpStr) exitCode = EC_Config @@ -1946,9 +2766,9 @@ def _onExit(dir, files, del_command): tmpKeys = list(newTaskParamMap) tmpKeys.sort() for tmpKey in tmpKeys: - print('%s : %s' % (tmpKey, newTaskParamMap[tmpKey])) + print("%s : %s" % (tmpKey, newTaskParamMap[tmpKey])) if options.dumpTaskParams is not None: - with open(os.path.expanduser(options.dumpTaskParams), 'w') as f: + with open(os.path.expanduser(options.dumpTaskParams), "w") as f: json.dump(newTaskParamMap, f) if get_taskparams: os.chdir(curDir) @@ -1959,19 +2779,23 @@ def _onExit(dir, files, del_command): return newTaskParamMap if not options.noSubmit and exitCode == 0: tmpLog.info("submit {0}".format(options.outDS)) - status,tmpOut = Client.insertTaskParams(newTaskParamMap, options.verbose, properErrorCode=True, - parent_tid=options.parentTaskID) + status, tmpOut = Client.insertTaskParams( + newTaskParamMap, + options.verbose, + properErrorCode=True, + parent_tid=options.parentTaskID, + ) # result if status != 0: tmpStr = "task submission failed with {0}".format(status) tmpLog.error(tmpStr) exitCode = EC_Submit else: - if tmpOut[0] in [0,3]: + if tmpOut[0] in [0, 3]: tmpStr = tmpOut[1] tmpLog.info(tmpStr) try: - m = re.search('jediTaskID=(\d+)', tmpStr) + m = re.search("jediTaskID=(\d+)", tmpStr) taskID = int(m.group(1)) except Exception: pass @@ -1980,18 +2804,18 @@ def _onExit(dir, files, del_command): tmpLog.error(tmpStr) exitCode = EC_Submit dumpItem = copy.deepcopy(vars(options)) - dumpItem['returnCode'] = exitCode - dumpItem['returnOut'] = tmpStr - dumpItem['jediTaskID'] = taskID + dumpItem["returnCode"] = exitCode + dumpItem["returnOut"] = tmpStr + dumpItem["jediTaskID"] = taskID if len(ioList) > 1: - dumpItem['bulkSeqNumber'] = iSubmission + dumpItem["bulkSeqNumber"] = iSubmission dumpList.append(dumpItem) # go back to current dir os.chdir(curDir) # dump if options.dumpJson is not None: - with open(os.path.expanduser(options.dumpJson), 'w') as f: + with open(os.path.expanduser(options.dumpJson), "w") as f: json.dump(dumpList, f) # succeeded sys.exit(exitCode) diff --git a/pandaclient/PsubUtils.py b/pandaclient/PsubUtils.py index c3acae86..9738708f 100644 --- a/pandaclient/PsubUtils.py +++ b/pandaclient/PsubUtils.py @@ -2,35 +2,39 @@ import re import sys import time + try: from urllib import quote except ImportError: from urllib.parse import quote + +import copy import datetime import gzip -import copy import platform -from . import Client -from . import MiscUtils -from . import PLogger +from . import Client, MiscUtils, PLogger +from .MiscUtils import ( + commands_get_output, + commands_get_status_output, + commands_get_status_output_with_env, +) -from .MiscUtils import commands_get_status_output, commands_get_output, commands_get_status_output_with_env try: long() except Exception: long = int # error code -EC_Config = 10 -EC_Post = 11 +EC_Config = 10 +EC_Post = 11 cacheProxyStatus = None cacheVomsStatus = None cacheActimeStatus = None -cacheVomsFQAN = '' -cacheActime = '' +cacheVomsFQAN = "" +cacheActime = "" cacheLastUpdate = None cacheVomsInfo = None @@ -45,13 +49,13 @@ def resetCacheValues(): global cacheLastUpdate global cacheVomsInfo timeNow = datetime.datetime.utcnow() - if cacheLastUpdate is None or (timeNow-cacheLastUpdate) > datetime.timedelta(minutes=60): + if cacheLastUpdate is None or (timeNow - cacheLastUpdate) > datetime.timedelta(minutes=60): cacheLastUpdate = timeNow cacheProxyStatus = None cacheVomsStatus = None cacheActimeStatus = None - cacheVomsFQAN = '' - cacheActime = '' + cacheVomsFQAN = "" + cacheActime = "" cacheVomsInfo = None @@ -62,25 +66,25 @@ def get_proxy_info(force, verbose): # get logger tmpLog = PLogger.getPandaLogger() if Client.use_x509_no_grid(): - if 'PANDA_NICKNAME' not in os.environ: + if "PANDA_NICKNAME" not in os.environ: status = 1 - nickname = '' - tmpLog.error('PANDA_NICKNAME is not defined') + nickname = "" + tmpLog.error("PANDA_NICKNAME is not defined") else: status = 0 - nickname = os.environ['PANDA_NICKNAME'] + nickname = os.environ["PANDA_NICKNAME"] cacheVomsInfo = (status, (nickname,)) elif not Client.use_oidc(): # check grid-proxy gridSrc = Client._getGridSrc() - com = '%s voms-proxy-info --all --e' % gridSrc + com = "%s voms-proxy-info --all --e" % gridSrc if verbose: tmpLog.debug(com) - status,out = commands_get_status_output_with_env(com) + status, out = commands_get_status_output_with_env(com) if verbose: tmpLog.debug(status % 255) tmpLog.debug(out) - cacheVomsInfo = status,out + cacheVomsInfo = status, out else: # OIDC uid, groups, nickname = Client.get_user_name_from_token() @@ -99,28 +103,29 @@ def check_proxy(verbose, voms_role, refresh_info=False, generate_new=True): if voms_role is None: return True # check role - for tmpItem in out.split('\n'): - if not tmpItem.startswith('attribute'): + for tmpItem in out.split("\n"): + if not tmpItem.startswith("attribute"): continue - role = voms_role.split(':')[-1] + role = voms_role.split(":")[-1] if role in tmpItem: return True if not generate_new or Client.use_oidc() or Client.use_x509_no_grid(): return False # generate proxy import getpass + tmpLog = PLogger.getPandaLogger() tmpLog.info("Need to generate a grid proxy") - gridPassPhrase = getpass.getpass('Enter GRID pass phrase for this identity:\n').replace('$', '\$').replace('"', r'\"') + gridPassPhrase = getpass.getpass("Enter GRID pass phrase for this identity:\n").replace("$", "\$").replace('"', r"\"") gridSrc = Client._getGridSrc() com = '%s echo "%s" | voms-proxy-init -pwstdin ' % (gridSrc, gridPassPhrase) com_msg = '%s echo "*****" | voms-proxy-init -pwstdin ' % gridSrc if voms_role is None: - com += '-voms atlas' - com_msg += '-voms atlas' + com += "-voms atlas" + com_msg += "-voms atlas" else: - com += '-voms %s' % voms_role - com_msg += '-voms %s' % voms_role + com += "-voms %s" % voms_role + com_msg += "-voms %s" % voms_role if verbose: tmpLog.debug(com_msg) status, output = commands_get_status_output_with_env(com) @@ -133,10 +138,9 @@ def check_proxy(verbose, voms_role, refresh_info=False, generate_new=True): return check_proxy(verbose, voms_role, refresh_info=True, generate_new=False) - # get nickname def getNickname(verbose=False): - nickName = '' + nickName = "" status, output = get_proxy_info(False, verbose) # OIDC if Client.use_oidc(): @@ -145,42 +149,67 @@ def getNickname(verbose=False): if Client.use_x509_no_grid(): return output[0] # X509 - for line in output.split('\n'): - if line.startswith('attribute'): - match = re.search('nickname =\s*([^\s]+)\s*\(.*\)',line) + for line in output.split("\n"): + if line.startswith("attribute"): + match = re.search("nickname =\s*([^\s]+)\s*\(.*\)", line) if match is not None: nickName = match.group(1) break # check - if nickName == '': + if nickName == "": # get logger tmpLog = PLogger.getPandaLogger() - wMessage = 'Could not get nickname by using voms-proxy-info which gave\n\n' + wMessage = "Could not get nickname by using voms-proxy-info which gave\n\n" wMessage += output - wMessage += '\nPlease register nickname to ATLAS VO via\n\n' - wMessage += ' https://lcg-voms2.cern.ch:8443/voms/atlas/vomrs\n' - wMessage += ' [Member Info] -> [Edit Personal Info]' - print('') + wMessage += "\nPlease register nickname to ATLAS VO via\n\n" + wMessage += " https://lcg-voms2.cern.ch:8443/voms/atlas/vomrs\n" + wMessage += " [Member Info] -> [Edit Personal Info]" + print("") tmpLog.warning(wMessage) - print('') + print("") return nickName # set Rucio accounting -def setRucioAccount(account,appid,forceSet): - if forceSet or 'RUCIO_ACCOUNT' not in os.environ: - os.environ['RUCIO_ACCOUNT'] = account - if forceSet or 'RUCIO_APPID' not in os.environ: - os.environ['RUCIO_APPID'] = appid +def setRucioAccount(account, appid, forceSet): + if forceSet or "RUCIO_ACCOUNT" not in os.environ: + os.environ["RUCIO_ACCOUNT"] = account + if forceSet or "RUCIO_APPID" not in os.environ: + os.environ["RUCIO_APPID"] = appid # check name of output dataset -def checkOutDsName(outDS,official,nickName='',mergeOutput=False,verbose=False): +def checkOutDsName(outDS, official, nickName="", mergeOutput=False, verbose=False): # get logger tmpLog = PLogger.getPandaLogger() # check NG chars for SE - for tmpChar in ['%','|',';','>','<','?','\'','"','(',')','$','@','*',':', - '=','&','^','#','\\','@','[',']','{','}','`']: + for tmpChar in [ + "%", + "|", + ";", + ">", + "<", + "?", + "'", + '"', + "(", + ")", + "$", + "@", + "*", + ":", + "=", + "&", + "^", + "#", + "\\", + "@", + "[", + "]", + "{", + "}", + "`", + ]: if tmpChar in outDS: errStr = 'invalid character "%s" is used in --outDS' % tmpChar tmpLog.error(errStr) @@ -192,53 +221,53 @@ def checkOutDsName(outDS,official,nickName='',mergeOutput=False,verbose=False): prodGroups = [] if Client.use_oidc(): for tmpLine in output[1]: - tmpItems = tmpLine.split('/') + tmpItems = tmpLine.split("/") if len(tmpItems) != 2: continue - tmpVO, tmpRole = tmpLine.split('/') - if tmpRole == 'production': + tmpVO, tmpRole = tmpLine.split("/") + if tmpRole == "production": prodGroups.append(tmpVO) else: - for tmpLine in output.split('\n'): - match = re.search('/([^/]+)/Role=production',tmpLine) + for tmpLine in output.split("\n"): + match = re.search("/([^/]+)/Role=production", tmpLine) if match is not None: # ignore atlas production role - if not match.group(1) in ['atlas']: + if not match.group(1) in ["atlas"]: prodGroups.append(match.group(1)) # no production role if prodGroups == []: - errStr = "The --official option requires production role. Please use the --voms option to set production role;\n" + errStr = "The --official option requires production role. Please use the --voms option to set production role;\n" errStr += " e.g., --voms atlas:/atlas/phys-higgs/Role=production\n" errStr += "If you don't have production role for the group please request it in ATLAS VO first" tmpLog.error(errStr) return False # loop over all prefixes - allowedPrefix = ['group'] + allowedPrefix = ["group"] for tmpPrefix in allowedPrefix: for tmpGroup in prodGroups: - tmpPattO = '^'+tmpPrefix+'\d{2}'+'\.'+tmpGroup+'\.' - tmpPattN = '^'+tmpPrefix+'\.'+tmpGroup+'\.' - if re.search(tmpPattO,outDS) is not None or re.search(tmpPattN,outDS) is not None: + tmpPattO = "^" + tmpPrefix + "\d{2}" + "\." + tmpGroup + "\." + tmpPattN = "^" + tmpPrefix + "\." + tmpGroup + "\." + if re.search(tmpPattO, outDS) is not None or re.search(tmpPattN, outDS) is not None: return True # didn't match - errStr = "Your proxy is allowed to produce official datasets\n" + errStr = "Your proxy is allowed to produce official datasets\n" errStr += " with the following prefix\n" for tmpPrefix in allowedPrefix: for tmpGroup in prodGroups: - tmpPattN = '%s.%s' % (tmpPrefix,tmpGroup) + tmpPattN = "%s.%s" % (tmpPrefix, tmpGroup) errStr += " %s\n" % tmpPattN errStr += "If you have production role for another group please use the --voms option to set the role\n" errStr += " e.g., --voms atlas:/atlas/phys-higgs/Role=production\n" tmpLog.error(errStr) return False # check output dataset format - matStrN = '^user\.'+nickName+'\.' - if nickName == '' or re.match(matStrN,outDS) is None: - if nickName == '': + matStrN = "^user\." + nickName + "\." + if nickName == "" or re.match(matStrN, outDS) is None: + if nickName == "": errStr = "Could not get nickname from voms proxy\n" else: - outDsPrefixN = 'user.%s' % nickName - errStr = "outDS must be '%s.'\n" % outDsPrefixN + outDsPrefixN = "user.%s" % nickName + errStr = "outDS must be '%s.'\n" % outDsPrefixN errStr += " e.g., %s.test1234" % outDsPrefixN tmpLog.error(errStr) return False @@ -248,10 +277,10 @@ def checkOutDsName(outDS,official,nickName='',mergeOutput=False,verbose=False): maxLengthCont = 120 else: maxLengthCont = 132 - if outDS.endswith('/'): + if outDS.endswith("/"): # container if len(outDS) > maxLengthCont: - tmpErrStr = "The name of the output dataset container is too long (%s). " % len(outDS) + tmpErrStr = "The name of the output dataset container is too long (%s). " % len(outDS) tmpErrStr += "The length must be less than %s " % maxLengthCont if mergeOutput: tmpErrStr += "when --mergeOutput is used. " @@ -263,8 +292,7 @@ def checkOutDsName(outDS,official,nickName='',mergeOutput=False,verbose=False): else: # dataset if len(outDS) > maxLength: - tmpLog.error("output datasetname is too long (%s). The length must be less than %s" % \ - (len(outDS),maxLength)) + tmpLog.error("output datasetname is too long (%s). The length must be less than %s" % (len(outDS), maxLength)) return False return True @@ -274,24 +302,24 @@ def convSysArgv(argv=None): if argv is None: argv = sys.argv # job params - if 'PANDA_EXEC_STRING' in os.environ: - paramStr = os.environ['PANDA_EXEC_STRING'] + if "PANDA_EXEC_STRING" in os.environ: + paramStr = os.environ["PANDA_EXEC_STRING"] else: - paramStr = argv[0].split('/')[-1] + paramStr = argv[0].split("/")[-1] for item in argv[1:]: # remove option - match = re.search('(^-[^=]+=)(.+)',item) + match = re.search("(^-[^=]+=)(.+)", item) noSpace = False if match is not None: - paramStr += ' %s' % match.group(1) + paramStr += " %s" % match.group(1) item = match.group(2) noSpace = True if not noSpace: - paramStr += ' ' - match = re.search('(\*| |\')',item) + paramStr += " " + match = re.search("(\*| |')", item) if match is None: # normal parameters - paramStr += '%s' % item + paramStr += "%s" % item else: # quote string paramStr += '"%s"' % item @@ -303,18 +331,19 @@ def convSysArgv(argv=None): def isLatestVersion(latestVer): # extract local version numbers import PandaToolsPkgInfo - match = re.search('^(\d+)\.(\d+)\.(\d+)$',PandaToolsPkgInfo.release_version) + + match = re.search("^(\d+)\.(\d+)\.(\d+)$", PandaToolsPkgInfo.release_version) if match is None: return True - localMajorVer = int(match.group(1)) - localMinorVer = int(match.group(2)) + localMajorVer = int(match.group(1)) + localMinorVer = int(match.group(2)) localBugfixVer = int(match.group(3)) # extract local version numbers - match = re.search('^(\d+)\.(\d+)\.(\d+)$',latestVer) + match = re.search("^(\d+)\.(\d+)\.(\d+)$", latestVer) if match is None: return True - latestMajorVer = int(match.group(1)) - latestMinorVer = int(match.group(2)) + latestMajorVer = int(match.group(1)) + latestMinorVer = int(match.group(2)) latestBugfixVer = int(match.group(3)) # compare if latestMajorVer > localMajorVer: @@ -336,18 +365,18 @@ def checkPandaClientVer(verbose): # get logger tmpLog = PLogger.getPandaLogger() # get latest version number - vStatus,latestVer = Client.getPandaClientVer(verbose) + vStatus, latestVer = Client.getPandaClientVer(verbose) if vStatus == 0: # check version if not isLatestVersion(latestVer): warStr = "A newer version of panda-client is available at https://twiki.cern.ch/twiki/bin/view/Atlas/PandaTools." - if os.environ['PANDA_SYS'].startswith('/afs/cern.ch/atlas/offline/external/GRID/DA/panda-client'): + if os.environ["PANDA_SYS"].startswith("/afs/cern.ch/atlas/offline/external/GRID/DA/panda-client"): # if the user uses CERN AFS warStr += " Please execute 'source /afs/cern.ch/atlas/offline/external/GRID/DA/panda-client/latest/etc/panda/panda_setup.[c]sh" else: - warStr += " Please execute '%s --update' if you installed the package locally" % sys.argv[0].split('/')[-1] - print('') - tmpLog.warning(warStr+'\n') + warStr += " Please execute '%s --update' if you installed the package locally" % sys.argv[0].split("/")[-1] + print("") + tmpLog.warning(warStr + "\n") # function for path completion @@ -356,30 +385,29 @@ def completePathFunc(text, status): text = text.strip() # convert ~ useTilde = False - if text.startswith('~'): + if text.startswith("~"): useTilde = True # keep original origText = text # convert text = os.path.expanduser(text) # put / to directories - if (not text.endswith('/')) and os.path.isdir(text): - text += '/' + if (not text.endswith("/")) and os.path.isdir(text): + text += "/" # list dirs/files - lsStat,output = commands_get_status_output('ls -d %s*' % text) + lsStat, output = commands_get_status_output("ls -d %s*" % text) results = [] if lsStat == 0: - for tmpItem in output.split('\n'): + for tmpItem in output.split("\n"): # ignore current and parent dirs - if tmpItem in ['.','..']: + if tmpItem in [".", ".."]: continue # put / - if os.path.isdir(tmpItem) and not tmpItem.endswith('/'): - tmpItem += '/' + if os.path.isdir(tmpItem) and not tmpItem.endswith("/"): + tmpItem += "/" # recover ~ if useTilde: - tmpItem = re.sub('^%s' % os.path.expanduser(origText), - origText,tmpItem) + tmpItem = re.sub("^%s" % os.path.expanduser(origText), origText, tmpItem) # append results.append(tmpItem) # sort @@ -393,35 +421,37 @@ def updatePackage(verbose=False): # get logger tmpLog = PLogger.getPandaLogger() # get the latest version number - tmpLog.info('start version check') - status,output = Client.getPandaClientVer(verbose) + tmpLog.info("start version check") + status, output = Client.getPandaClientVer(verbose) if status != 0: tmpLog.error(output) - tmpLog.error('failed to get the latest version number : %s' % status) + tmpLog.error("failed to get the latest version number : %s" % status) return False # extract version latestVer = output # check version if isLatestVersion(latestVer): - tmpLog.info('you are already using the latest version') + tmpLog.info("you are already using the latest version") return True import PandaToolsPkgInfo - tmpLog.info('update to %s from %s' % (latestVer,PandaToolsPkgInfo.release_version)) + + tmpLog.info("update to %s from %s" % (latestVer, PandaToolsPkgInfo.release_version)) # set readline for auto-complete import readline + readline.parse_and_bind("tab: complete") readline.set_completer(completePathFunc) - readline.parse_and_bind('set show-all-if-ambiguous On') + readline.parse_and_bind("set show-all-if-ambiguous On") # remove +/~ from delimiters curDelimter = readline.get_completer_delims() - curDelimter = re.sub('\+|/|~','',curDelimter) + curDelimter = re.sub("\+|/|~", "", curDelimter) readline.set_completer_delims(curDelimter) # installation type rpmInstall = False - newPrefix = os.environ['PANDA_SYS'] - print('') + newPrefix = os.environ["PANDA_SYS"] + print("") print("Please specify type of installation") - print(" PANDA_SYS=%s" % os.environ['PANDA_SYS']) + print(" PANDA_SYS=%s" % os.environ["PANDA_SYS"]) print(" 1. Install to $PANDA_SYS") print(" all files in $PANDA_SYS will be erased first and new ones will") print(" be installed to the same dir") @@ -431,178 +461,197 @@ def updatePackage(verbose=False): print(" existing files in $PANDA_SYS will be patched with new ones") print(" 4. RPM installation") print(" install RPM. sudo is required") - print('') + print("") while True: str = input("Enter 1-4 : ") - if str == '1': + if str == "1": cleanInstall = True break - if str == '2': + if str == "2": cleanInstall = False + # set default def startupHookPath(): - defPath = os.environ['PANDA_SYS'] + defPath = os.environ["PANDA_SYS"] # remove / - defPath = re.sub('/+$','',defPath) + defPath = re.sub("/+$", "", defPath) # use one dir up - defPath = re.sub('/[^/]+$','',defPath) + defPath = re.sub("/[^/]+$", "", defPath) # add / - if not defPath.endswith('/'): - defPath += '/' + if not defPath.endswith("/"): + defPath += "/" # set readline.insert_text(defPath) + # set hook readline.set_startup_hook(startupHookPath) # get location while True: newPrefix = input("Enter new location (TAB for autocomplete): ") - if newPrefix != '': + if newPrefix != "": break # unset hook readline.set_startup_hook(None) break - if str == '3': + if str == "3": cleanInstall = False break - if str == '4': + if str == "4": rpmInstall = True break # get tarball - tmpLog.info('get panda-client-%s' % latestVer) + tmpLog.info("get panda-client-%s" % latestVer) if not rpmInstall: - packageName = 'panda-client-%s.tar.gz' % latestVer + packageName = "panda-client-%s.tar.gz" % latestVer else: - packageName = 'panda-client-%s-1.noarch.rpm' % latestVer - com = 'wget --no-check-certificate --timeout 120 https://atlpan.web.cern.ch/atlpan/panda-client/%s' \ - % packageName + packageName = "panda-client-%s-1.noarch.rpm" % latestVer + com = "wget --no-check-certificate --timeout 120 https://atlpan.web.cern.ch/atlpan/panda-client/%s" % packageName status = os.system(com) status %= 255 if status != 0: - tmpLog.error('failed to download tarball : %s' % status) + tmpLog.error("failed to download tarball : %s" % status) # delete tarball just in case - commands_get_output('rm %s' % packageName) + commands_get_output("rm %s" % packageName) return False # install if not rpmInstall: # expand - status,output = commands_get_status_output('tar xvfz %s' % packageName) + status, output = commands_get_status_output("tar xvfz %s" % packageName) status %= 255 if verbose: tmpLog.debug(status) tmpLog.debug(output) if status != 0: - tmpLog.error('failed to expand tarball : %s' % status) + tmpLog.error("failed to expand tarball : %s" % status) # delete dirs just in case - commands_get_output('rm -rf panda-client-%s' % latestVer) + commands_get_output("rm -rf panda-client-%s" % latestVer) return False # delete tarball - commands_get_output('rm %s' % packageName) + commands_get_output("rm %s" % packageName) # save current dir currentDir = os.path.realpath(os.getcwd()) # keep old release if cleanInstall: - tmpLog.info('keep old version in %s.back' % os.environ['PANDA_SYS']) - backUpDir = '%s.back' % os.environ['PANDA_SYS'] - status,output = commands_get_status_output('rm -rf %s; mv %s %s' % \ - (backUpDir,os.environ['PANDA_SYS'],backUpDir)) + tmpLog.info("keep old version in %s.back" % os.environ["PANDA_SYS"]) + backUpDir = "%s.back" % os.environ["PANDA_SYS"] + status, output = commands_get_status_output("rm -rf %s; mv %s %s" % (backUpDir, os.environ["PANDA_SYS"], backUpDir)) if status != 0: tmpLog.error(output) - tmpLog.error('failed to keep old version') + tmpLog.error("failed to keep old version") # delete dirs - commands_get_output('rm -rf panda-client-%s' % latestVer) + commands_get_output("rm -rf panda-client-%s" % latestVer) return False # install result = True - os.chdir('panda-client-%s' % latestVer) - status,output = commands_get_status_output('python setup.py install --prefix=%s' % newPrefix) + os.chdir("panda-client-%s" % latestVer) + status, output = commands_get_status_output("python setup.py install --prefix=%s" % newPrefix) if verbose: tmpLog.debug(output) tmpLog.debug(status) os.chdir(currentDir) status %= 255 if status != 0: - tmpLog.error('failed to install panda-client : %s' % status) + tmpLog.error("failed to install panda-client : %s" % status) # recover old one - commands_get_output('rm -rf %s' % os.environ['PANDA_SYS']) - commands_get_output('mv %s.back %s' % (os.environ['PANDA_SYS'],os.environ['PANDA_SYS'])) + commands_get_output("rm -rf %s" % os.environ["PANDA_SYS"]) + commands_get_output("mv %s.back %s" % (os.environ["PANDA_SYS"], os.environ["PANDA_SYS"])) result = False # cleanup - commands_get_output('rm -rf panda-client-%s' % latestVer) + commands_get_output("rm -rf panda-client-%s" % latestVer) else: # rpm install result = True - newPrefix = '' - com = 'sudo rpm -Uvh %s' % packageName + newPrefix = "" + com = "sudo rpm -Uvh %s" % packageName print(com) status = os.system(com) status %= 255 if status != 0: - tmpLog.error('failed to install rpm : %s' % status) + tmpLog.error("failed to install rpm : %s" % status) result = False # cleanup - commands_get_output('rm -rf %s' % packageName) + commands_get_output("rm -rf %s" % packageName) # return if result: - tmpLog.info('completed') + tmpLog.info("completed") tmpLog.info("please do 'source %s/etc/panda/panda_setup.[c]sh'" % newPrefix) return result # read dataset names from text def readDsFromFile(txtName): - dsList = '' + dsList = "" try: # read lines txt = open(txtName) for tmpLine in txt: # remove \n - tmpLine = re.sub('\n','',tmpLine) + tmpLine = re.sub("\n", "", tmpLine) # remove white spaces tmpLine = tmpLine.strip() # skip comment or empty - if tmpLine.startswith('#') or tmpLine == '': + if tmpLine.startswith("#") or tmpLine == "": continue # append - dsList += '%s,' % tmpLine + dsList += "%s," % tmpLine # close file txt.close() # remove the last comma dsList = dsList[:-1] except Exception: - errType,errValue = sys.exc_info()[:2] + errType, errValue = sys.exc_info()[:2] tmpLog = PLogger.getPandaLogger() - tmpLog.error('cannot read datasets from %s due to %s:%s' \ - % (txtName,errType,errValue)) + tmpLog.error("cannot read datasets from %s due to %s:%s" % (txtName, errType, errValue)) sys.exit(EC_Config) return dsList # convert param string to JEDI params -def convertParamStrToJediParam(encStr,inputMap,outNamePrefix,encode,padding,usePfnList=False,includeIO=True, - extra_in_list=None): +def convertParamStrToJediParam( + encStr, + inputMap, + outNamePrefix, + encode, + padding, + usePfnList=False, + includeIO=True, + extra_in_list=None, +): # list of placeholders for input - inList = ['IN','CAVIN','MININ','LOMBIN','HIMBIN','BHIN','BGIN','BGHIN','BGCIN','BGOIN'] + inList = [ + "IN", + "CAVIN", + "MININ", + "LOMBIN", + "HIMBIN", + "BHIN", + "BGIN", + "BGHIN", + "BGCIN", + "BGOIN", + ] if extra_in_list: inList += extra_in_list # placeholder for seq_number - seqHolder = 'SEQNUMBER' + seqHolder = "SEQNUMBER" # placeholder for output - outHolder = 'SN' + outHolder = "SN" # placeholders with extension - digExList = ['SEQNUMBER', 'FIRSTEVENT'] - allExList = digExList + ['DBR'] + digExList = ["SEQNUMBER", "FIRSTEVENT"] + allExList = digExList + ["DBR"] # mapping of client and JEDI placeholders - holders = {'SEQNUMBER' : 'RNDM', - 'DBR' : 'DB', - 'SKIPEVENTS': 'SKIPEVENTS', - 'FIRSTEVENT': None, - 'MAXEVENTS' : None, - 'SEGMENT_NAME': None - } + holders = { + "SEQNUMBER": "RNDM", + "DBR": "DB", + "SKIPEVENTS": "SKIPEVENTS", + "FIRSTEVENT": None, + "MAXEVENTS": None, + "SEGMENT_NAME": None, + } # replace %XYZ with ${XYZ} if includeIO: for tmpH in inList: - encStr = re.sub('%'+tmpH+r'\b', '${'+tmpH+'}', encStr) + encStr = re.sub("%" + tmpH + r"\b", "${" + tmpH + "}", encStr) # replace %XYZ with ${newXYZ} extensionMap = {} for newH in holders: @@ -610,90 +659,90 @@ def convertParamStrToJediParam(encStr,inputMap,outNamePrefix,encode,padding,useP # JEDI-only placeholders if oldH is None: oldH = newH - oldH = '%' + oldH + oldH = "%" + oldH # with extension if newH in allExList: if newH in digExList: - oldH += '(:|=)(\d+)%{0,1}' + oldH += "(:|=)(\d+)%{0,1}" else: - oldH += '(:|=)([^ \'\"\}]+)' + oldH += "(:|=)([^ '\"\}]+)" # look for extension - tmpM = re.search(oldH,encStr) + tmpM = re.search(oldH, encStr) if tmpM is not None: extensionMap[newH] = tmpM.group(2) - newH = '${' + newH + '}' + newH = "${" + newH + "}" else: - newH = '${' + newH + '}' - encStr = re.sub(oldH,newH,encStr) + newH = "${" + newH + "}" + encStr = re.sub(oldH, newH, encStr) # replace %OUT to outDS${SN} if includeIO: - encStr = re.sub('%OUT',outNamePrefix+'.${'+outHolder+'}',encStr) + encStr = re.sub("%OUT", outNamePrefix + ".${" + outHolder + "}", encStr) # make pattern for split - patS = "(" + patS = "(" allKeys = list(holders) if includeIO: allKeys += inList allKeys += [outHolder] for tmpH in allKeys: - patS += '[^=,\"\' \(\{;]*\$\{' + tmpH + '[^\}]*\}[^,\"\' \)\};]*|' - patS = patS[:-1] + patS += "[^=,\"' \(\{;]*\$\{" + tmpH + "[^\}]*\}[^,\"' \)\};]*|" + patS = patS[:-1] patS += ")" # split - tmpItems = re.split(patS,encStr) + tmpItems = re.split(patS, encStr) # make parameters jobParams = [] for tmpItem in tmpItems: # check if a placeholder - matchP = re.search('\$\{([^:\}]+)',tmpItem) - if re.search(patS,tmpItem) is not None and matchP is not None: + matchP = re.search("\$\{([^:\}]+)", tmpItem) + if re.search(patS, tmpItem) is not None and matchP is not None: tmpHolder = matchP.group(1) # set attributes if tmpHolder in inList: # use constant since it is templated in another option e.g., -i - tmpDict = {'type':'constant'} + tmpDict = {"type": "constant"} if encode: - tmpDict['value'] = '${' + tmpHolder + '/E}' + tmpDict["value"] = "${" + tmpHolder + "/E}" else: - tmpDict['value'] = tmpItem + tmpDict["value"] = tmpItem # set dataset if PFN list is not used or the stream is not primary - if not usePfnList or tmpHolder not in ['IN']: - tmpDict['param_type'] = 'input' - tmpDict['dataset'] = inputMap[tmpHolder] + if not usePfnList or tmpHolder not in ["IN"]: + tmpDict["param_type"] = "input" + tmpDict["dataset"] = inputMap[tmpHolder] elif tmpHolder == seqHolder: - tmpDict = {'type':'template'} - tmpDict['value'] = tmpItem - tmpDict['param_type'] = 'pseudo_input' - tmpDict['dataset'] = 'seq_number' + tmpDict = {"type": "template"} + tmpDict["value"] = tmpItem + tmpDict["param_type"] = "pseudo_input" + tmpDict["dataset"] = "seq_number" if tmpHolder in extensionMap: try: - tmpDict['offset'] = long(extensionMap[tmpHolder]) + tmpDict["offset"] = long(extensionMap[tmpHolder]) except Exception: pass elif tmpHolder == outHolder: - tmpDict = {'type':'template'} - tmpDict['value'] = tmpItem - tmpDict['param_type'] = 'output' - tmpDict['dataset'] = outNamePrefix + tmpItem.split('}')[-1] + '/' - tmpDict['container'] = tmpDict['dataset'] + tmpDict = {"type": "template"} + tmpDict["value"] = tmpItem + tmpDict["param_type"] = "output" + tmpDict["dataset"] = outNamePrefix + tmpItem.split("}")[-1] + "/" + tmpDict["container"] = tmpDict["dataset"] else: - tmpDict = {'type':'template'} - tmpDict['value'] = tmpItem - tmpDict['param_type'] = 'number' + tmpDict = {"type": "template"} + tmpDict["value"] = tmpItem + tmpDict["param_type"] = "number" if tmpHolder in extensionMap: try: - tmpDict['offset'] = long(extensionMap[tmpHolder]) + tmpDict["offset"] = long(extensionMap[tmpHolder]) except Exception: pass else: # constant - tmpDict = {'type':'constant'} + tmpDict = {"type": "constant"} if encode: - tmpDict['value'] = quote(tmpItem) + tmpDict["value"] = quote(tmpItem) else: - tmpDict['value'] = tmpItem + tmpDict["value"] = tmpItem # no padding if not padding: - tmpDict['padding'] = False + tmpDict["padding"] = False # append jobParams.append(tmpDict) # return @@ -706,11 +755,11 @@ def splitCommaConcatenatedItems(oldList): oldList = [oldList] newList = [] for oldItem in oldList: - temItems = oldItem.split(',') + temItems = oldItem.split(",") for tmpItem in temItems: tmpItem = tmpItem.strip() # remove empty - if tmpItem == '': + if tmpItem == "": continue if tmpItem not in newList: newList.append(tmpItem) @@ -718,28 +767,28 @@ def splitCommaConcatenatedItems(oldList): # upload gzipped file -def uploadGzippedFile(origFileName,currentDir,tmpLog,delFilesOnExit,nosubmit,verbose): +def uploadGzippedFile(origFileName, currentDir, tmpLog, delFilesOnExit, nosubmit, verbose): # open original file - if origFileName.startswith('/'): + if origFileName.startswith("/"): # absolute path - tmpIn = open(origFileName, 'rb') + tmpIn = open(origFileName, "rb") else: # relative path - tmpIn = open('%s/%s' % (currentDir,origFileName), 'rb') + tmpIn = open("%s/%s" % (currentDir, origFileName), "rb") # use unique name for gzip - newFileName = 'pre_%s.dat' % MiscUtils.wrappedUuidGen() - gzipFullPath = '%s/%s.gz' % (currentDir,newFileName) + newFileName = "pre_%s.dat" % MiscUtils.wrappedUuidGen() + gzipFullPath = "%s/%s.gz" % (currentDir, newFileName) delFilesOnExit.append(gzipFullPath) # make gzip - tmpOut = gzip.open(gzipFullPath,'wb') + tmpOut = gzip.open(gzipFullPath, "wb") tmpOut.writelines(tmpIn) tmpOut.close() tmpIn.close() # upload if not nosubmit: tmpLog.info("uploading data file for preprocessing") - status,out = Client.putFile(gzipFullPath,verbose,useCacheSrv=True,reuseSandbox=False) - if status != 0 or out != 'True': + status, out = Client.putFile(gzipFullPath, verbose, useCacheSrv=True, reuseSandbox=False) + if status != 0 or out != "True": # failed print(out) tmpLog.error("Failed with %s" % status) @@ -755,9 +804,9 @@ def getListPFN(pfnFile): rFile = open(pfnFile) inputFileList = [] for line in rFile: - line = re.sub('\n','',line) + line = re.sub("\n", "", line) line.strip() - if line != '' and not line.startswith('#'): + if line != "" and not line.startswith("#"): inputFileList.append(line) rFile.close() inputFileList.sort() @@ -770,87 +819,87 @@ def getListPFN(pfnFile): # check task parameters -def checkTaskParam(taskParamMap,unlimitNumOutputs): +def checkTaskParam(taskParamMap, unlimitNumOutputs): # check output dataset names maxLengthCont = 132 maxNumOutputs = 10 nOutputs = 0 - dict_list = taskParamMap['jobParameters']+[taskParamMap['log']] if 'log' in taskParamMap \ - else taskParamMap['jobParameters'] + dict_list = taskParamMap["jobParameters"] + [taskParamMap["log"]] if "log" in taskParamMap else taskParamMap["jobParameters"] for tmpDict in dict_list: - if tmpDict['type'] == 'template' and tmpDict['param_type'] in ['output','log']: - if tmpDict['param_type'] == 'output': + if tmpDict["type"] == "template" and tmpDict["param_type"] in ["output", "log"]: + if tmpDict["param_type"] == "output": nOutputs += 1 tmpErrStr = None # check length of dataset name - if len(tmpDict['dataset']) > maxLengthCont: - tmpErrStr = "The name of an output or log dataset container (%s) is too long (%s). " % (tmpDict['dataset'],len(tmpDict['dataset'])) + if len(tmpDict["dataset"]) > maxLengthCont: + tmpErrStr = "The name of an output or log dataset container (%s) is too long (%s). " % (tmpDict["dataset"], len(tmpDict["dataset"])) tmpErrStr += "The length must be less than %s following DDM definition. " % maxLengthCont tmpErrStr += "Please note that one dataset container is creted per output/log type and " tmpErrStr += "each name is _/ or .log/. " # check non-ascii characters if not tmpErrStr: try: - tmpDict['value'].encode('ascii') + tmpDict["value"].encode("ascii") except Exception: - tmpErrStr = "Output name {0} contains non-ascii charters that are forbidden since they screw up "\ - "the storage".format(tmpDict['value']) + tmpErrStr = "Output name {0} contains non-ascii charters that are forbidden since they screw up " "the storage".format(tmpDict["value"]) if not tmpErrStr: try: - tmpDict['dataset'].encode('ascii') + tmpDict["dataset"].encode("ascii") except Exception: - tmpErrStr = "Dataset name {0} contains non-ascii charters that are forbidden since they screw up "\ - "the storage".format(tmpDict['dataset']) + tmpErrStr = "Dataset name {0} contains non-ascii charters that are forbidden since they screw up " "the storage".format(tmpDict["dataset"]) if tmpErrStr: tmpLog = PLogger.getPandaLogger() tmpLog.error(tmpErrStr) return (EC_Config, tmpErrStr) if not unlimitNumOutputs and nOutputs > maxNumOutputs: - errStr ='Too many output files (=%s) per job. The default limit is %s. ' % (nOutputs,maxNumOutputs) - errStr += 'You can remove the constraint by using the --unlimitNumOutputs option. ' - errStr += 'But please note that having too many outputs per job causes a severe load on the system. ' - errStr += 'You may be banned if you carelessly use the option' + errStr = "Too many output files (=%s) per job. The default limit is %s. " % ( + nOutputs, + maxNumOutputs, + ) + errStr += "You can remove the constraint by using the --unlimitNumOutputs option. " + errStr += "But please note that having too many outputs per job causes a severe load on the system. " + errStr += "You may be banned if you carelessly use the option" tmpLog = PLogger.getPandaLogger() tmpLog.error(errStr) - return(EC_Config, errStr) + return (EC_Config, errStr) return (0, None) # replace input and output def replaceInputOutput(taskParamMap, inDS, outDS, seqNum): newTaskParamMap = copy.deepcopy(taskParamMap) - if inDS != '': - oldInDS = taskParamMap['dsForIN'] - subInDSbefore = quote('%DATASET_IN') + if inDS != "": + oldInDS = taskParamMap["dsForIN"] + subInDSbefore = quote("%DATASET_IN") subInDSafter = quote(inDS) - newTaskParamMap['dsForIN'] = inDS - for tmpDict in newTaskParamMap['jobParameters']: - if 'dataset' in tmpDict: - if tmpDict['dataset'] == oldInDS: - tmpDict['dataset'] = inDS - elif tmpDict['type'] == 'constant': - tmpDict['value'] = re.sub(subInDSbefore, subInDSafter, tmpDict['value']) - outDS = re.sub('/$', '', outDS) - oldOutDS = taskParamMap['taskName'] - oldOutDS = re.sub('/$', '', oldOutDS) - subOutDSbefore = quote('%DATASET_OUT') + newTaskParamMap["dsForIN"] = inDS + for tmpDict in newTaskParamMap["jobParameters"]: + if "dataset" in tmpDict: + if tmpDict["dataset"] == oldInDS: + tmpDict["dataset"] = inDS + elif tmpDict["type"] == "constant": + tmpDict["value"] = re.sub(subInDSbefore, subInDSafter, tmpDict["value"]) + outDS = re.sub("/$", "", outDS) + oldOutDS = taskParamMap["taskName"] + oldOutDS = re.sub("/$", "", oldOutDS) + subOutDSbefore = quote("%DATASET_OUT") subOutDSafter = quote(outDS) - subSeqBefore = quote('%BULKSEQNUMBER') + subSeqBefore = quote("%BULKSEQNUMBER") subSeqAfter = str(seqNum) - newTaskParamMap['taskName'] = outDS - newTaskParamMap['log']['dataset'] = re.sub(oldOutDS, outDS, taskParamMap['log']['dataset']) - newTaskParamMap['log']['container'] = re.sub(oldOutDS, outDS, taskParamMap['log']['container']) - newTaskParamMap['log']['value'] = re.sub(oldOutDS, outDS, taskParamMap['log']['value']) - for tmpDict in newTaskParamMap['jobParameters']: - if 'dataset' in tmpDict: - if tmpDict['dataset'].startswith(oldOutDS): - tmpDict['dataset'] = re.sub(oldOutDS, outDS, tmpDict['dataset']) - tmpDict['container'] = re.sub(oldOutDS, outDS, tmpDict['container']) - tmpDict['value'] = re.sub(oldOutDS, outDS, tmpDict['value']) - elif tmpDict['type'] == 'constant': - tmpDict['value'] = re.sub(subOutDSbefore, subOutDSafter, tmpDict['value']) - tmpDict['value'] = re.sub(subSeqBefore, subSeqAfter, tmpDict['value']) - tmpDict['value'] = re.sub(oldOutDS, outDS, tmpDict['value']) + newTaskParamMap["taskName"] = outDS + newTaskParamMap["log"]["dataset"] = re.sub(oldOutDS, outDS, taskParamMap["log"]["dataset"]) + newTaskParamMap["log"]["container"] = re.sub(oldOutDS, outDS, taskParamMap["log"]["container"]) + newTaskParamMap["log"]["value"] = re.sub(oldOutDS, outDS, taskParamMap["log"]["value"]) + for tmpDict in newTaskParamMap["jobParameters"]: + if "dataset" in tmpDict: + if tmpDict["dataset"].startswith(oldOutDS): + tmpDict["dataset"] = re.sub(oldOutDS, outDS, tmpDict["dataset"]) + tmpDict["container"] = re.sub(oldOutDS, outDS, tmpDict["container"]) + tmpDict["value"] = re.sub(oldOutDS, outDS, tmpDict["value"]) + elif tmpDict["type"] == "constant": + tmpDict["value"] = re.sub(subOutDSbefore, subOutDSafter, tmpDict["value"]) + tmpDict["value"] = re.sub(subSeqBefore, subSeqAfter, tmpDict["value"]) + tmpDict["value"] = re.sub(oldOutDS, outDS, tmpDict["value"]) return newTaskParamMap @@ -867,24 +916,40 @@ def extract_voms_proxy_username(): return output[0] if status != 0: return None - for line in output.split('\n'): - if line.startswith('subject'): - subj = line.split(':', 1)[-1].lstrip() - user_dn = re.sub(r'(/CN=\d+)+$', '', subj.replace('/CN=proxy', '')) - username = user_dn.split('=')[-1] - username = re.sub('[ |_]\d+', '', username) - username = re.sub("[()']", '', username) + for line in output.split("\n"): + if line.startswith("subject"): + subj = line.split(":", 1)[-1].lstrip() + user_dn = re.sub(r"(/CN=\d+)+$", "", subj.replace("/CN=proxy", "")) + username = user_dn.split("=")[-1] + username = re.sub("[ |_]\d+", "", username) + username = re.sub("[()']", "", username) break - name_wo_email = re.sub(r' [a-z][\w\.-]+@[\w\.-]+(?:\.\w+)+', '', username).strip() - if ' ' in name_wo_email: + name_wo_email = re.sub(r" [a-z][\w\.-]+@[\w\.-]+(?:\.\w+)+", "", username).strip() + if " " in name_wo_email: username = name_wo_email return username # warning message when PQ is specified def get_warning_for_pq(site, excluded_site, tmp_log): - if site not in ['AUTO', None] or excluded_site: - tmp_log.warning("The grid queue names could change due to consolidation, migration, etc. " - "Please check with the command listAnalyPQ to use only online/valid queues " - "when site and/or excludedSite options are specified.") - return '' + if site not in ["AUTO", None] or excluded_site: + tmp_log.warning( + "The grid queue names could change due to consolidation, migration, etc. " + "Please check with the command listAnalyPQ to use only online/valid queues " + "when site and/or excludedSite options are specified." + ) + return "" + + +# warning message for memory +def get_warning_for_memory(memory, is_confirmed, tmp_log): + if memory > 4000: + tmp_log.warning( + "You are requesting {0} MB/core which severely restricts the available resources to run on. " + "Your task will take longer or may not run at all. Check if you really need this, " + "and maybe " + "improve the code.".format(memory) + ) + if not is_confirmed: + return MiscUtils.query_yes_no("\nAre you sure with the memory requirement? ") + return True