Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

runner re-styling and other fixes #16

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 6 additions & 7 deletions mkShapesRDF/shapeAnalysis/BatchSubmission.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from pathlib import Path
import os
import shutil

from mkShapesRDF.shapeAnalysis.runner import RunAnalysis

class BatchSubmission:
@staticmethod
Expand Down Expand Up @@ -59,6 +59,7 @@ def __init__(
self.tag = tag

self.samples = samples
self.splitSamples = RunAnalysis.splitSamples(samples)
self.d = d
self.batchVars = batchVars
self.jdlconfigfile = jdlconfigfile
Expand All @@ -85,17 +86,15 @@ def createBatch(self, sample):
# python file

txtpy = "from collections import OrderedDict\n"

_samples = [sample]

txtpy += f"samples = {str(_samples)}\n"

## Need to assign dictionary with subset of files to each sample
self.d["samples"][sampleName]["name"] = [(sampleName, sample[1], sample[2])]
for var in self.batchVars:
_var = var
if not isinstance(var, str):
_var = var[0]

if _var == "samples":
txtpy += f"{_var} = {dict([(sampleName, self.d[_var][sampleName])])}\n"
continue
if isinstance(self.d[_var], int) or isinstance(self.d[_var], float):
txtpy += f"{_var} = {self.d[_var]}\n"
Expand All @@ -114,7 +113,7 @@ def createBatches(self):
except Exception as e:
print("Error removing directory", e)

for sample in self.samples:
for sample in self.splitSamples:
self.createBatch(sample)

def submit(self, dryRun=0, queue="workday"):
Expand Down
90 changes: 23 additions & 67 deletions mkShapesRDF/shapeAnalysis/histo_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,7 @@ def fold(h, ifrom, ito):
shape = (h.GetNbinsX() + 2, h.GetNbinsY() + 2)
elif isinstance(h, ROOT.TH1):
shape = (h.GetNbinsX() + 2,)
if array.shape != shape:
slices = []
for axis, bins in enumerate(shape):
if array.shape[axis] == bins - 2:
slices.append(slice(1, -1))
elif array.shape[axis] == bins:
slices.append(slice(None))
sumw2 = sumw2.reshape(shape)

if h.GetDimension() == 1:
cont[ito] += cont[ifrom]
Expand Down Expand Up @@ -242,10 +236,10 @@ def postProcessNuisances(filename, samples, aliases, variables, cuts, nuisances)
or nuisances[nuisance].get("kind", "").endswith("square")
):
continue

nuisanceKind = nuisances[nuisance]["kind"].split("_")[1]
print("work for ", nuisance)
# categoriesmap = utils.flatten_cuts(cuts)
subsamplesmap = utils.flatten_samples(samples)

utils.update_variables_with_categories(variables, categoriesmap)
utils.update_nuisances_with_subsamples(nuisances, subsamplesmap)
utils.update_nuisances_with_categories(nuisances, categoriesmap)
Expand All @@ -255,88 +249,50 @@ def postProcessNuisances(filename, samples, aliases, variables, cuts, nuisances)
for variable in variables.keys():
f.cd(f"/{cut}/{variable}")
print("work in ", cut, variable)
histos = [k.GetName() for k in ROOT.gDirectory.GetListOfKeys()]
for sampleName in _samples:
limitSamples = nuisances[nuisance].get("samples", {})
if not (
len(limitSamples.keys()) == 0
or sampleName in limitSamples.keys()
):
continue
histosNameToProcess = list(
filter(
lambda k: k.startswith(
f"histo_{sampleName}_{nuisances[nuisance]['name']}_SPECIAL_NUIS"
),
histos,
)
)
histosToProcess = list(
map(
lambda k: ROOT.gDirectory.Get(k).Clone(),
histosNameToProcess,
)
)
if len(histosToProcess) == 0:
print(
f'No variations found for {sampleName} in {cut}/{variable} for nuisance {nuisances[nuisance]["name"]}',
file=sys.stderr,
)
continue

sys.exit(1)
hNominal = ROOT.gDirectory.Get(f"histo_{sampleName}").Clone()

weights = nuisances[nuisance]["samples"][sampleName]
histoNameToProcess = [f"histo_{sampleName}_{nuisances[nuisance]['name']}_SPECIAL_NUIS_{nuisanceKind}" + str(i) for i in range(0, len(weights))]
hNominal = ROOT.gDirectory.Get(f"histo_{sampleName}")
hName = f"histo_{sampleName}_{nuisances[nuisance]['name']}"
h_up = histosToProcess[0].Clone()
h_do = histosToProcess[0].Clone()
variations = np.empty(
(
len(histosToProcess),
histosToProcess[0].GetNbinsX() + 2,
),
dtype=float,
)
for i in range(len(histosToProcess)):
variations[i, :] = rnp_hist2array(
histosToProcess[i], include_overflow=True, copy=True
)
vnominal = rnp_hist2array(hNominal, include_overflow=True, copy=False)
variations = np.empty((len(weights), vnominal.size), dtype=vnominal.dtype)
arrup = 0
arrdo = 0
if nuisances[nuisance]["kind"].endswith("envelope"):
for ivar in range(len(weights)):
hVar = ROOT.gDirectory.Get(histoNameToProcess[ivar])
variations[ivar, :] = rnp_hist2array(hVar, include_overflow=True, copy=True)
if nuisanceKind == "envelope":
arrup = np.max(variations, axis=0)
arrdo = np.min(variations, axis=0)
elif nuisances[nuisance]["kind"].endswith("rms"):
vnominal = rnp_hist2array(
hNominal, include_overflow=True, copy=True
)
arrnom = np.tile(vnominal, (variations.shape[0], 1))
elif nuisanceKind == "rms":
arrnom = np.tile(vnominal.flat, (variations.shape[0], 1))
arrv = np.sqrt(np.mean(np.square(variations - arrnom), axis=0))
arrup = vnominal + arrv
arrdo = vnominal - arrv
elif nuisances[nuisance]["kind"].endswith("square"):
elif nuisanceKind == 'square':
vnominal = rnp_hist2array(
hNominal, include_overflow=True, copy=True
)
arrnom = np.tile(vnominal, (variations.shape[0], 1))
# up_is_up = variations > arrnom
arrv = np.sqrt(np.sum(np.square(variations - arrnom), axis=0))
arrup = vnominal + arrv
arrdo = vnominal - arrv
# arrup = np.where(up_is_up, vnominal + arrv, vnominal - arrv)
# arrdo = np.where(~up_is_up, vnominal - arrv, vnominal + arrv)
else:
continue
print(arrup)
print(arrdo)
for i in range(len(arrup)): # includes under/over flow
h_up.SetBinContent(i, arrup[i])
h_do.SetBinContent(i, arrdo[i])
print(hName)
h_up.SetName(hName + "Up")
h_up.Write()
h_do.SetName(hName + "Down")
h_do.Write()
for histo in histosNameToProcess:
ROOT.gDirectory.Delete(f"{histo};*")
histoNameUp = hName + "Up"
histoNameDown = hName + "Down"
outputHistoUp = hNominal.Clone(histoNameUp)
outputHistoDown = hNominal.Clone(histoNameDown)
rnp_array2hist(arrup, outputHistoUp)
rnp_array2hist(arrdo, outputHistoDown)
outputHistoUp.Write()
outputHistoDown.Write()
f.Close()
37 changes: 35 additions & 2 deletions mkShapesRDF/shapeAnalysis/latinos/LatinosUtils.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,6 @@ def update_variables_with_categories(variables, categoriesmap):
# otherwise we replace the cut with all the categories
cutspec.extend(categories)


def update_nuisances_with_subsamples(nuisances, subsamplesmap):
"""
Update nuisances dict with the flatten subsamples.
Expand All @@ -108,7 +107,7 @@ def update_nuisances_with_subsamples(nuisances, subsamplesmap):
subsamplesmap : list
subsamplesmap as returned by flatten_samples
"""
for nuisance in nuisances.items():
for _,nuisance in nuisances.items():
if "samples" not in nuisance:
continue

Expand Down Expand Up @@ -159,3 +158,37 @@ def update_nuisances_with_categories(nuisances, categoriesmap):

# otherwise we replace the cut with all the categories
cutspec.extend(categories)

def update_aliases_with_subsamples(aliases, subsamplesmap, samples):
"""
Update aliases dict with the flatten subsamples.

Parameters
----------
aliases : dict
aliases dictionary, will be modified in place
subsamplesmap : list
subsamplesmap as returned by flatten_samples
samples : dict
samples dictionary, used to assign its keys to aliases that do not specify any sample
"""
parents = [parent[0] for parent in subsamplesmap]
children = [children[1] for children in subsamplesmap]
for _,alias in aliases.items():
if "samples" not in alias:
alias["samples"] = samples.keys()
for parent in parents:
if parent in alias["samples"]:
alias["sample"] .remove(parent)
alias["sample"] += children
continue

samplespec = alias["samples"]

for sname, subsamples in subsamplesmap:
if sname not in samplespec:
continue
else:
samplespec.remove(sname)
samplespec += subsamples

21 changes: 12 additions & 9 deletions mkShapesRDF/shapeAnalysis/mkShapesRDF.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@
headersPath = os.path.dirname(os.path.dirname(__file__)) + "/include/headers.hh"
ROOT.gInterpreter.Declare(f'#include "{headersPath}"')


def defaultParser():
parser = argparse.ArgumentParser()

Expand Down Expand Up @@ -196,8 +195,12 @@ def main():
else:
d = ConfigLib.loadLatestPickle(configsFolder, globals())

print(samples.keys())
print(d.keys())
samples = d["samples"]
aliases = d["aliases"]
variables = d["variables"]
cuts = d["cuts"]
nuisances = d["nuisances"]
lumi = d["lumi"]

print("\n\n", batchVars, "\n\n")

Expand Down Expand Up @@ -226,7 +229,10 @@ def main():

_results = {}
sys.path.append(os.path.dirname(runnerPath))
runnerModule = __import__(runnerFile.strip(".py"))
if runnerFile == "default":
runnerModule = __import__("runner")
else:
runnerModule = __import__(runnerFile.strip(".py"))
if not hasattr(runnerModule, "RunAnalysis"):
raise AttributeError(
f"Runner module {runnerFile} from {runnerPath} has no attribute RunAnalysis"
Expand All @@ -239,8 +245,6 @@ def main():
if doBatch == 1:
print("#" * 20, "\n\n", " Running on condor ", "\n\n", "#" * 20)

_samples = RunAnalysis.splitSamples(samples)

from mkShapesRDF.shapeAnalysis.BatchSubmission import BatchSubmission

batch = BatchSubmission(
Expand All @@ -250,7 +254,7 @@ def main():
headersPath,
runnerPath,
tag,
_samples,
samples,
d,
batchVars,
jdlconfigfile,
Expand All @@ -261,10 +265,9 @@ def main():
else:
print("#" * 20, "\n\n", " Running on local machine ", "\n\n", "#" * 20)

_samples = RunAnalysis.splitSamples(samples, False)

runner = RunAnalysis(
_samples,
samples,
aliases,
variables,
cuts,
Expand Down
2 changes: 1 addition & 1 deletion mkShapesRDF/shapeAnalysis/rnp.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ def rnp_array(array, copy=True):
converted numpy array
"""
if not isinstance(array, ROOT.TArrayD):
raise ("Cannot convert ", array, "to TArrayD")
raise ValueError("Cannot convert ", array, "to TArrayD")
dtype = np.double
nx = len(array)
arr = np.ndarray((nx,), dtype=dtype, buffer=array.GetArray())
Expand Down
Loading