Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Full sim with CMSSWGeneration/generation #3

Open
wants to merge 15 commits into
base: master
Choose a base branch
from
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@


process.externalLHEProducer = cms.EDProducer("ExternalLHEProducer",
args = cms.vstring("..//eos/user/g/gpizzati/generation/genproductions/bin/MadGraph5_aMCatNLO/Zjj_cHB_LI_slc7_amd64_gcc700_CMSSW_10_6_0_tarball.tar.xz"),
args = cms.vstring("../Wewk_slc7_amd64_gcc700_CMSSW_10_6_19_tarball.tar.xz"),
nEvents = cms.untracked.uint32(2500),
numberOfParameters = cms.uint32(1),
outputFile = cms.string('cmsgrid_final.lhe'),
Expand Down
9 changes: 4 additions & 5 deletions Generate2018/submit.jdl
Original file line number Diff line number Diff line change
@@ -1,12 +1,11 @@
Universe = vanilla
Executable = wrapper.sh
arguments = $(proc) 2500
request_cpus = 8
arguments = $(proc) 2000
should_transfer_files = YES
Error = log/$(proc)/$(proc).err_$(Step)
Output = log/$(proc)/$(proc).out_$(Step)
Log = log/$(proc)/$(proc).log
transfer_input_files = /home/gpizzati/prova/genproductions/bin/MadGraph5_aMCatNLO/$(proc)_slc7_amd64_gcc700_CMSSW_10_6_19_tarball.tar.xz, /home/gpizzati/CMSSWGeneration/Generate2018/input/SMP-RunIIFall18wmLHEGS-00062_SM_1_cfg.py, /home/gpizzati/CMSSWGeneration/Generate2018/input/SMP-RunIIAutumn18DRPremix-00050_1_cfg.py, /home/gpizzati/CMSSWGeneration/Generate2018/input/SMP-RunIIAutumn18DRPremix-00050_2_cfg.py, /home/gpizzati/CMSSWGeneration/Generate2018/input/SMP-RunIIAutumn18MiniAOD-00050_1_cfg.py, /home/gpizzati/CMSSWGeneration/Generate2018/input/SMP-RunIIAutumn18NanoAODv7-00058_1_cfg.py, /home/gpizzati/CMSSWGeneration/Generate2018/input/CMSSW_10_2_6.tgz, /home/gpizzati/CMSSWGeneration/Generate2018/input/CMSSW_10_6_20.tgz
transfer_output_remaps = "SMP-RunIIAutumn18NanoAODv7-00058.root = /home/gpizzati/CMSSWGeneration/Generate2018/output/$(proc)/$(proc)_$(Cluster)_$(Step).root"
transfer_input_files = /home/abulla/CMSSWGeneration/$(proc)_slc7_amd64_gcc700_CMSSW_10_6_19_tarball.tar.xz, /home/abulla/CMSSWGeneration/Generate2018/input/SMP-RunIIFall18wmLHEGS-00062_SM_1_cfg.py, /home/abulla/CMSSWGeneration/Generate2018/input/SMP-RunIIAutumn18DRPremix-00050_1_cfg.py, /home/abulla/CMSSWGeneration/Generate2018/input/SMP-RunIIAutumn18DRPremix-00050_2_cfg.py, /home/abulla/CMSSWGeneration/Generate2018/input/SMP-RunIIAutumn18MiniAOD-00050_1_cfg.py, /home/abulla/CMSSWGeneration/Generate2018/input/SMP-RunIIAutumn18NanoAODv7-00058_1_cfg.py, /home/abulla/CMSSWGeneration/Generate2018/input/CMSSW_10_2_6.tgz, /home/abulla/CMSSWGeneration/Generate2018/input/CMSSW_10_6_20.tgz
transfer_output_remaps = "SMP-RunIIAutumn18NanoAODv7-00058.root = /home/abulla/CMSSWGeneration/Generate2018/output/$(proc)/$(proc)_$(Cluster)_$(Step).root"
when_to_transfer_output = ON_EXIT
Queue 40 proc in (Zjj_SM_5f_con_h)
Queue 3000 proc in (Wewk)
2 changes: 1 addition & 1 deletion Generate2018/submit.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#! /bin/bash
sed -i "12s/.*/Queue 40 proc in ($1)/g" submit.jdl
sed -i "12s/.*/Queue 3000 proc in ($1)/g" submit.jdl
mkdir -p output/$1
mkdir -p log/$1
chmod +x submit.jdl
Expand Down
5 changes: 5 additions & 0 deletions Generate2018/wrapper.sh
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@ cmssw_version2="CMSSW_10_6_20"
# set environment
source /cvmfs/cms.cern.ch/cmsset_default.sh
echo "Opening CMSSW_10_2_6"
ls
# cd /home/abulla/CMSSWGeneration/Generate2018/input
pwd
tar -xzvf $sandbox_name1
rm $sandbox_name1
cd $cmssw_version1/src/
Expand All @@ -31,6 +34,7 @@ echo $1
#sed -i "s/^.*tarball.tar.xz.*$/ args = cms.vstring(\'..\/$gridpack_name\'),/" -i $excute_file1
sed -i 's#^.*tarball.tar.xz.*$# args = cms.vstring(\"..\/'${gridpack_name}'\"),#' -i $excute_file1
# change the request events
echo "OOOOOOOOOOOOOOOO nevent = "$2
sed -i "s/int32(2500)/int32($2)/g" -i $excute_file1
date
cmsRun $excute_file1
Expand All @@ -45,6 +49,7 @@ cmsRun $excute_file4
rm SMP-RunIIAutumn18DRPremix-00050.root
date

ls
rm -rf $cmssw_version1
tar -xzvf $sandbox_name2
rm $sandbox_name2
Expand Down
108 changes: 79 additions & 29 deletions generation/Downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,10 @@
import json
import glob
import sys
import re



def download(path, link, run=True):
r = requests.get(link, verify=False)
if r.status_code!=200:
Expand All @@ -16,7 +20,8 @@ def download(path, link, run=True):
lines = r.text.split("\n")
begin = lines.index(list(filter(lambda k: 'export SCRAM_ARCH' in k, lines))[0])
scram = lines[begin].split("=")[-1]
end = lines.index(list(filter(lambda k: 'EndOfTestFile' == k, lines))[0])
# end = lines.index(list(filter(lambda k: 'EndOfTestFile' == k, lines))[0])
end = None
lines = lines[begin:end]
lines = list(map(lambda k: k+'\n', lines))
if not os.path.isdir(path):
Expand All @@ -40,11 +45,13 @@ def download(path, link, run=True):
name = fs[0].split("/")[-1]
process = subprocess.Popen("cd {}; cp {} ../; cd -".format(path, name),shell=True)
process.wait()

else:
name = list(map(lambda k: k.split("/")[-1], fs))
for n in name:
process = subprocess.Popen("cd {}; cp {} ../; cd -".format(path, n),shell=True)
process.wait()

fs = glob.glob(path+"/CMSSW*")
if len(fs)==1:
name = {"release": fs[0].split("/")[-1], "filename": name}
Expand All @@ -58,37 +65,80 @@ def download(path, link, run=True):
parser = argparse.ArgumentParser()
parser.add_argument("-y","--year", help="Year" , required=True)
parser.add_argument("-s","--steps", help="Step to download", nargs="+", required=True)
parser.add_argument("-f","--fix", help="If true fixes input and output file names of year specified, you should first have created files for the entire flow" , nargs='?', type=int, const=0, default=0)


args = parser.parse_args()

with open("Steps.json") as file:
Steps = json.load(file)
totalYears = ["2018", "2017", "2016"]
if args.year not in totalYears:
print("Year not valid")
print("Valid years are: {}".format(", ".join(totalYears)))
sys.exit(1)
totalSteps = ["lhe", "premix", "miniAOD", "nanoAOD"]
if len(list(filter(lambda k: k not in totalSteps, args.steps)))>0:
print("Steps not valid")
print("Valid steps are: {}".format(", ".join(totalSteps)))
sys.exit(1)
print("\n\nSteps to be performed for year {}".format(args.year))
for step in args.steps:
print(step)
print("\n\n")
for step in args.steps:
print("\n\nNow dowloading config for step: {} and year: {}\n\n".format(step, args.year))
scram,name = download("data/input_{}/{}".format(args.year, step),Steps[args.year][step]['link'])
print(name)
if isinstance(name, str) or isinstance(name, list):
Steps[args.year][step]['filename'] = name
else:
Steps[args.year][step]['filename'] = name["filename"]
Steps[args.year][step]['release'] = name["release"]
Steps[args.year][step]['SCRAM_ARCH'] = scram



with open("Steps.json", "w") as file:
json.dump(Steps,file,indent=4, sort_keys=True)
if args.fix==1:

totalSteps = Steps[args.year]['steps']
files = glob.glob("data/input_{}/*.py".format(args.year))
stepFiles = []
for step in totalSteps:
a = sorted(list(filter(lambda k: step.lower() in k.lower(), files)))
stepFiles.extend(a)
previousFilename = ""
for f in stepFiles:
if previousFilename!= "":
# replace for this file the input file name with previousFilename
print("Should write {} as input for {}".format(previousFilename, f))
with open(f) as file:
txt = file.read()
pattern = re.compile('(process.source = cms.Source\("PoolSource",[\n\t ]*fileNames = cms.untracked.vstring\()([^)]*)\)')
r = re.sub(pattern, r"\1{})".format(previousFilename), txt)
with open(f, "w") as file:
file.write(r)


# read f and extract output filename
with open(f) as file:
txt = file.read()
pattern = re.compile('.*cms\.OutputModule\("PoolOutputModule",[\n\t a-zA-Z0-9()\',=.\-:_*]*fileName[^=]*=[^=]*cms.untracked.string\(([^)]*)\)')
a = pattern.search(txt)
if a:
previousFilename = a.group(1)
if "nanoaod" in f.lower():
with open(f,"r") as file:
txt= file.read()
pattern = re.compile("PoolOutputModule")
r = re.sub(pattern, "NanoAODOutputModule", txt)
pattern2 = re.compile("NANOEDMAODSIMoutput")
r2 = re.sub(pattern2, "NANOAODSIMoutput", r)
with open(f, "w") as file:
file.write(r2)

else:

totalYears = Steps.keys()
if args.year not in totalYears:
print("Year not valid")
print("Valid years are: {}".format(", ".join(totalYears)))
sys.exit(1)
totalSteps = Steps[args.year]["steps"]
if len(list(filter(lambda k: k not in totalSteps, args.steps)))>0:
print("Steps not valid")
print("Valid steps are: {}".format(", ".join(totalSteps)))
sys.exit(1)
print("\n\nSteps to be performed for year {}".format(args.year))
for step in args.steps:
print(step)
print("\n\n")
for step in args.steps:
print("\n\nNow dowloading config for step: {} and year: {}\n\n".format(step, args.year))
scram,name = download("data/input_{}/{}".format(args.year, step),Steps[args.year][step]['link'])
print(name)
if isinstance(name, str) or isinstance(name, list):
Steps[args.year][step]['filename'] = name
else:
Steps[args.year][step]['filename'] = name["filename"]
Steps[args.year][step]['release'] = name["release"]
Steps[args.year][step]['SCRAM_ARCH'] = scram



with open("Steps.json", "w") as file:
json.dump(Steps,file,indent=4, sort_keys=True)
Loading