Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add true position and E to hits dataset #127

Closed
wants to merge 6 commits into from
Closed
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
109 changes: 72 additions & 37 deletions src/proto_nd_flow/reco/charge/calib_prompt_hits.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,19 +77,6 @@ class CalibHitBuilder(H5FlowStage):
pedestal_mv=580
))

calib_hits_dtype = np.dtype([
('id', 'u4'),
('x', 'f8'),
('y', 'f8'),
('z', 'f8'),
('t_drift', 'f8'),
('ts_pps', 'u8'),
('io_group', 'u8'),
('io_channel', 'u8'),
('Q', 'f8'),
('E', 'f8')
])

def __init__(self, **params):
super(CalibHitBuilder, self).__init__(**params)

Expand All @@ -109,29 +96,42 @@ def init(self, source_name):
self.load_pedestals()
self.load_configurations()

self.hit_frac_dtype = np.dtype([
('fraction', f'({self.max_contrib_segments},)f8'),
('segment_id', f'({self.max_contrib_segments},)u8')
self.calib_hits_dtype = np.dtype([
('id', 'u4'),
('x', 'f8'),
('y', 'f8'),
('z', 'f8'),
('t_drift', 'f8'),
('ts_pps', 'u8'),
('io_group', 'u8'),
('io_channel', 'u8'),
('Q', 'f8'),
('E', 'f8')
])

# save all config info
self.data_manager.set_attrs(self.calib_hits_dset_name,
classname=self.classname,
class_version=self.class_version,
source_dset=source_name,
packets_dset=self.packets_dset_name,
t0_dset=self.t0_dset_name,
pedestal_file=self.pedestal_file,
configuration_file=self.configuration_file
)

# then set up new datasets
self.data_manager.create_dset(self.calib_hits_dset_name, dtype=self.calib_hits_dtype)
self.data_manager.create_dset(self.mc_hit_frac_dset_name, dtype=self.hit_frac_dtype)
self.data_manager.create_ref(source_name, self.calib_hits_dset_name)
self.data_manager.create_ref(self.calib_hits_dset_name, self.packets_dset_name)
self.data_manager.create_ref(self.events_dset_name, self.calib_hits_dset_name)
self.data_manager.create_ref(self.calib_hits_dset_name, self.mc_hit_frac_dset_name)
# self.hit_frac_dtype = np.dtype([
# ('fraction', f'({self.max_contrib_segments},)f8'),
# ('segment_id', f'({self.max_contrib_segments},)u8')
# ])
#
# # save all config info
# self.data_manager.set_attrs(self.calib_hits_dset_name,
# classname=self.classname,
# class_version=self.class_version,
# source_dset=source_name,
# packets_dset=self.packets_dset_name,
# t0_dset=self.t0_dset_name,
# pedestal_file=self.pedestal_file,
# configuration_file=self.configuration_file
# )
#
# # then set up new datasets
# self.data_manager.create_dset(self.calib_hits_dset_name, dtype=self.calib_hits_dtype)
# self.data_manager.create_dset(self.mc_hit_frac_dset_name, dtype=self.hit_frac_dtype)
# self.data_manager.create_ref(source_name, self.calib_hits_dset_name)
# self.data_manager.create_ref(self.calib_hits_dset_name, self.packets_dset_name)
# self.data_manager.create_ref(self.events_dset_name, self.calib_hits_dset_name)
# self.data_manager.create_ref(self.calib_hits_dset_name, self.mc_hit_frac_dset_name)

def run(self, source_name, source_slice, cache):
super(CalibHitBuilder, self).run(source_name, source_slice, cache)
Expand Down Expand Up @@ -161,6 +161,33 @@ def run(self, source_name, source_slice, cache):

has_mc_truth = packet_seg_bt is not None

if has_mc_truth:
self.calib_hits_dtype = np.dtype(self.calib_hits_dtype.descr + [('x_true_seg_t', 'f8'), ('E_true_recomb_elife', 'f8')])
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What's the rational for this? I don't think we should be mixing truth and reco information in the same dataset. This is what the back tracking datasets are for, no? We should also try and keep the "reco" datasets consistent for mc and data..

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hits with true t0 position is required for MLreco label making. Storing all contributions with the new commit (also corrected a unit issue). Given our setup, the output recombination (close to 0.7), lifetime (close to 1), E values (~0.5) all make sense to me. The differences between x and x_true_seg_t, E and E_true_recomb_elife also fits the scale and is consistent with the making.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Truth information should go into the backtracking and truth datasets, not the reco datasets.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you make more concrete suggestions how to structure this? The current proposal makes sense to me in a way that these are extension of what calib_prompt_hits have. Especially E_true_recomb_elife is half true half readout. Putting in mc_truth requires backtracking, not straightforward matching. It doesn't affect the processing of the dataset and the name of the variable is clear about that it uses truth information.


self.hit_frac_dtype = np.dtype([
('fraction', f'({self.max_contrib_segments},)f8'),
('segment_id', f'({self.max_contrib_segments},)u8')
])

# save all config info
self.data_manager.set_attrs(self.calib_hits_dset_name,
classname=self.classname,
class_version=self.class_version,
source_dset=source_name,
packets_dset=self.packets_dset_name,
t0_dset=self.t0_dset_name,
pedestal_file=self.pedestal_file,
configuration_file=self.configuration_file
)

# then set up new datasets
self.data_manager.create_dset(self.calib_hits_dset_name, dtype=self.calib_hits_dtype)
self.data_manager.create_dset(self.mc_hit_frac_dset_name, dtype=self.hit_frac_dtype)
self.data_manager.create_ref(source_name, self.calib_hits_dset_name)
self.data_manager.create_ref(self.calib_hits_dset_name, self.packets_dset_name)
self.data_manager.create_ref(self.events_dset_name, self.calib_hits_dset_name)
self.data_manager.create_ref(self.calib_hits_dset_name, self.mc_hit_frac_dset_name)

# reserve new data
calib_hits_slice = self.data_manager.reserve_data(self.calib_hits_dset_name, n)
if has_mc_truth:
Expand Down Expand Up @@ -196,6 +223,11 @@ def run(self, source_name, source_slice, cache):
drift_d = drift_t * (resources['LArData'].v_drift * resources['RunData'].crs_ticks) / units.cm # convert mm -> cm
x = resources['Geometry'].get_drift_coordinate(packets_arr['io_group'],packets_arr['io_channel'],drift_d)

# true drift position pair
drift_t_true = packet_seg_bt_arr[:,0]['t']
drift_d_true = drift_t_true * (resources['LArData'].v_drift * resources['RunData'].crs_ticks) / units.cm # convert mm -> cm
x_true_seg_t = resources['Geometry'].get_drift_coordinate(packets_arr['io_group'],packets_arr['io_channel'],drift_d_true)

zy = resources['Geometry'].pixel_coordinates_2D[packets_arr['io_group'],
packets_arr['io_channel'], packets_arr['chip_id'], packets_arr['channel_id']]
tile_id = resources['Geometry'].tile_id[packets_arr['io_group'],packets_arr['io_channel']]
Expand All @@ -212,15 +244,17 @@ def run(self, source_name, source_slice, cache):
for unique_id in hit_uniqueid_str])
calib_hits_arr['id'] = calib_hits_slice.start + np.arange(n, dtype=int)
calib_hits_arr['x'] = x
calib_hits_arr['x_true_seg_t'] = x_true_seg_t
calib_hits_arr['y'] = zy[:,1]
calib_hits_arr['z'] = zy[:,0]
calib_hits_arr['ts_pps'] = raw_hits_arr['ts_pps']
calib_hits_arr['t_drift'] = drift_t
calib_hits_arr['io_group'] = packets_arr['io_group']
calib_hits_arr['io_channel'] = packets_arr['io_channel']
calib_hits_arr['Q'] = self.charge_from_dataword(packets_arr['dataword'],vref,vcm,ped)
#!!! hardcoding W_ion, R=0.7, and not accounting for finite electron lifetime
calib_hits_arr['E'] = self.charge_from_dataword(packets_arr['dataword'],vref,vcm,ped) * 23.6e-3 / 0.7
calib_hits_arr['Q'] = self.charge_from_dataword(packets_arr['dataword'],vref,vcm,ped) # ke-
#FIXME supply more realistic dEdx in the recombination; also apply measured electron lifetime
calib_hits_arr['E'] = self.charge_from_dataword(packets_arr['dataword'],vref,vcm,ped) / resources['LArData'].ionization_recombination(mode=2,dEdx=2) * (resources['LArData'].ionization_w / units.MeV) # MeV
calib_hits_arr['E_true_recomb_elife'] = self.charge_from_dataword(packets_arr['dataword'],vref,vcm,ped) / resources['LArData'].ionization_recombination(mode=2,dEdx=packet_seg_bt_arr['dEdx'][:,0]) / resources['LArData'].charge_reduction_lifetime(t_drift=drift_t_true) * (resources['LArData'].ionization_w / units.MeV) # MeV

# create truth-level backtracking dataset
if has_mc_truth:
Expand Down Expand Up @@ -270,3 +304,4 @@ def load_configurations(self):
with open(self.configuration_file, 'r') as infile:
for key, value in json.load(infile).items():
self.configuration[key] = value

37 changes: 29 additions & 8 deletions src/proto_nd_flow/resources/lar_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import logging
import scipy.interpolate as interpolate
import os
import math

from h5flow.core import H5FlowResource, resources

Expand Down Expand Up @@ -50,6 +51,11 @@ class LArData(H5FlowResource):
default_electron_mobility_params = np.array([551.6, 7158.3, 4440.43, 4.29, 43.63, 0.2053])
default_electron_lifetime = 2.2e3 # us
default_electron_lifetime_file = None
default_box_alpha = 0.93
default_box_beta = 0.207 #0.3 (MeV/cm)^-1 * 1.383 (g/cm^3)* 0.5 (kV/cm), R. Acciarri et al JINST 8 (2013) P08005
default_birks_Ab = 0.800
default_birks_kb = 0.0486 # g/cm2/MeV Amoruso, et al NIM A 523 (2004) 275
default_W_ion = 23.6 # eV/e

electron_lifetime_data_dtype = np.dtype([
('unix_s', 'f8'),
Expand All @@ -64,6 +70,11 @@ def __init__(self, **params):
self.electron_mobility_params = np.array(params.get('electron_mobility_params', self.default_electron_mobility_params))
self._electron_lifetime = params.get('electron_lifetime', self.default_electron_lifetime)
self.electron_lifetime_file = params.get('electron_lifetime_file', self.default_electron_lifetime_file)
self.box_alpha = params.get('box_alpha', self.default_box_alpha)
self.box_beta = params.get('box_beta', self.default_box_beta)
self.birks_Ab = params.get('birks_Ab', self.default_birks_Ab)
self.birks_kb = params.get('birks_kb', self.default_birks_kb)
self.W_ion = params.get('W_ion', self.default_W_ion)

def init(self, source_name):
super(LArData, self).init(source_name)
Expand Down Expand Up @@ -171,13 +182,17 @@ def electron_lifetime(self, unix_ts):
self._electron_lifetime_upper_interp(unix_ts)
)

def charge_reduction_lifetime(self, t_drift):
lifetime_red = np.exp(-t_drift / self._electron_lifetime)
return lifetime_red

@property
def ionization_w(self):
''' Ionization W-value in LAr in keV/e-. Fixed value of 0.0236 '''
if 'ionization_w' in self.data:
return self.data['ionization_w']

self.data['ionization_w'] = 23.6 * units.eV / units.e
self.data['ionization_w'] = self.W_ion * units.eV / units.e
return self.ionization_w

@property
Expand All @@ -189,7 +204,7 @@ def scintillation_w(self):
self.data['scintillation_w'] = 19.5 * units.eV
return self.scintillation_w

def ionization_recombination(self, dedx):
def ionization_recombination(self, mode, dEdx):
'''
Calculate charge recombination factor using Birks Model with parameters:

Expand All @@ -201,12 +216,18 @@ def ionization_recombination(self, dedx):
R_i = dQ * W_i / dE

'''
A = 0.8
K = (0.0486 * units.kV * units.g / units.MeV / (units.cm)**3)
eps = resources['RunData'].e_field * self.density

rv = A / (1 + (K / eps) * dedx)
return rv
# box
if mode == 1:
# Baller, 2013 JINST 8 P08005
csi = self.box_beta * dEdx / (resources['RunData'].e_field * self.density)
recomb = max(0, math.log(self.box_alpha + csi) / csi)

# birks
elif mode == 2:
# Amoruso, et al NIM A 523 (2004) 275
recomb = self.birks_Ab / (1 + self.birks_kb * dEdx / (resources['RunData'].e_field * self.density))

return recomb

def scintillation_recombination(self, dedx):
'''
Expand Down
12 changes: 11 additions & 1 deletion yamls/proto_nd_flow/resources/LArData.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,14 @@ params:
path: 'lar_info'
# electron_lifetime_file: 'data/module0_flow/ElecLifetimeFit_Module0.npz'
# download link: https://portal.nersc.gov/project/dune/data/Module0/electronLifetime/ElecLifetimeFit_Module0.root
electron_lifetime: 2600 # us
electron_lifetime: 2200 # us
#: Recombination :math:`\alpha` constant for the Box model
box_alpha: 0.93
#: Recombination :math:`\beta` value for the Box model in :math:`(kV/cm)(g/cm^2)/MeV`
box_beta: 0.207 #0.3 (MeV/cm)^-1 * 1.383 (g/cm^3)* 0.5 (kV/cm), R. Acciarri et al JINST 8 (2013) P08005
#: Recombination :math:`A_b` value for the Birks Model
birks_Ab: 0.800
#: Recombination :math:`k_b` value for the Birks Model in :math:`(kV/cm)(g/cm^2)/MeV`
birks_kb: 0.0486 # g/cm2/MeV Amoruso, et al NIM A 523 (2004) 275
#: Average energy expended per ion pair in LAr in :math:`eV` from Phys. Rev. A 10, 1452
W_ion: 23.6
Loading