-
Notifications
You must be signed in to change notification settings - Fork 0
/
extract_data.py
286 lines (245 loc) · 12.3 KB
/
extract_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
'''
This file take output of "signal_selection.py" and applies
clustering algorithm on calorimeter data. And outputs a new tree with clusters
information
'''
from ROOT import TFile, TTree
import array
import time
import numpy as np
from clustering import make_clusters_lists
import argparse
# This is Hit object
class Hit:
def __init__(self, sector, pad, layer, energy):
self.sector = sector
self.pad = pad
self.layer = layer
self.energy = energy
rho = 80.9 + 1.8 * pad
phi = np.pi / 24. * (13.5 - sector)
self.x = rho * np.cos(phi)
self.y = rho * np.sin(phi)
if layer == 0 or layer == 1:
self.seed = -1
def make_hits_lists(event):
"""
# Input: single event
# Output: 3 List of Hit objects for tracker1 tracker2, calorimeter
# in this event
# NOTE: in TB20 there is a different tracker.
# And the whole algorithm will be different anyway!
"""
# Create empty lists
hits_calorimeter = []
hits_tracker1 = []
hits_tracker2 = []
# Loop through hits in the event
for sector, pad, layer, energy in zip(event.sector, event.pad,
event.layer, event.energy):
# Geometrical cuts
if (pad < 20 or sector == 0 or sector == 3 or layer == 7):
continue
if layer == 0:
hits_tracker1.append(Hit(sector, pad, layer, energy))
elif layer == 1:
hits_tracker2.append(Hit(sector, pad, layer, energy))
else:
hits_calorimeter.append(Hit(sector, pad, layer, energy))
return hits_tracker1, hits_tracker2, hits_calorimeter
def align_data(hits_tr1, hits_tr2, hits_cal):
'''
Shift hit's y coordinates to take into account misalignment
This is very poorely done here. As it takes calorimeter as a whole.
When is better allign each individual plane.
Don't pay much attention to this right now
'''
tr1_shift = -0.14156476251841354
tr2_shift = 0.9273328597379873
cal_shift = -0.785768097219659
for hit in hits_tr1:
hit.y -= tr1_shift
for hit in hits_tr2:
hit.y -= tr2_shift
for hit in hits_cal:
hit.y -= cal_shift
def main(args):
# Timer
start_time = time.time()
# Upload data for analysis
input_file = TFile.Open(args.path_to_file, "READ")
input_tree = input_file.data
print("Total n events in loaded files: ", input_tree.GetEntries())
# Create output root file.
# Create output root file before the tree!!! It prevents memory leakage
output_file = TFile('./extracted_data_RENAME.root', "RECREATE")
output_tree = TTree('data', 'Extracted Data')
# Create variables associated with the output tree
tr1_n_hits = array.array('i', [0])
tr1_hit_pad = array.array('i', [0] * 128)
tr1_hit_sector = array.array('i', [0] * 128)
tr1_hit_layer = array.array('i', [0] * 128)
tr1_hit_x = array.array('f', [0.0] * 128)
tr1_hit_y = array.array('f', [0.0] * 128)
tr1_hit_energy = array.array('f', [0.0] * 128)
tr1_n_clusters = array.array('i', [0])
tr1_cluster_n_pads = array.array('i', [0] * 128)
tr1_cluster_pad = array.array('f', [0.0] * 128)
tr1_cluster_sector = array.array('f', [0.0] * 128)
tr1_cluster_x = array.array('f', [0.0] * 128)
tr1_cluster_y = array.array('f', [0.0] * 128)
tr1_cluster_energy = array.array('f', [0.0] * 128)
tr2_n_hits = array.array('i', [0])
tr2_hit_pad = array.array('i', [0] * 128)
tr2_hit_sector = array.array('i', [0] * 128)
tr2_hit_layer = array.array('i', [0] * 128)
tr2_hit_x = array.array('f', [0.0] * 128)
tr2_hit_y = array.array('f', [0.0] * 128)
tr2_hit_energy = array.array('f', [0.0] * 128)
tr2_n_clusters = array.array('i', [0])
tr2_cluster_n_pads = array.array('i', [0] * 128)
tr2_cluster_pad = array.array('f', [0.0] * 128)
tr2_cluster_sector = array.array('f', [0.0] * 128)
tr2_cluster_x = array.array('f', [0.0] * 128)
tr2_cluster_y = array.array('f', [0.0] * 128)
tr2_cluster_energy = array.array('f', [0.0] * 128)
cal_n_hits = array.array('i', [0])
cal_hit_pad = array.array('i', [0] * 128 * 5)
cal_hit_sector = array.array('i', [0] * 128 * 5)
cal_hit_layer = array.array('i', [0] * 128 * 5)
cal_hit_x = array.array('f', [0.0] * 128 * 5)
cal_hit_y = array.array('f', [0.0] * 128 * 5)
cal_hit_energy = array.array('f', [0.0] * 128 * 5)
cal_n_clusters = array.array('i', [0])
cal_cluster_n_pads = array.array('i', [0] * 128 * 5)
cal_cluster_n_towers = array.array('i', [0] * 128 * 5)
cal_cluster_pad = array.array('f', [0.0] * 128 * 5)
cal_cluster_sector = array.array('f', [0.0] * 128 * 5)
cal_cluster_layer = array.array('f', [0.0] * 128 * 5)
cal_cluster_x = array.array('f', [0.0] * 128 * 5)
cal_cluster_y = array.array('f', [0.0] * 128 * 5)
cal_cluster_energy = array.array('f', [0.0] * 128 * 5)
# Create branches in the output tree for these variables
output_tree.Branch('tr1_n_hits', tr1_n_hits, 'tr1_n_hits/I')
output_tree.Branch('tr1_hit_pad', tr1_hit_pad, 'tr1_hit_pad[tr1_n_hits]/I')
output_tree.Branch('tr1_hit_sector', tr1_hit_sector, 'tr1_hit_sector[tr1_n_hits]/I')
output_tree.Branch('tr1_hit_layer', tr1_hit_layer, 'tr1_hit_layer[tr1_n_hits]/I')
output_tree.Branch('tr1_hit_x', tr1_hit_x, 'tr1_hit_x[tr1_n_hits]/F')
output_tree.Branch('tr1_hit_y', tr1_hit_y, 'tr1_hit_y[tr1_n_hits]/F')
output_tree.Branch('tr1_hit_energy', tr1_hit_energy, 'tr1_hit_energy[tr1_n_hits]/F')
output_tree.Branch('tr1_n_clusters', tr1_n_clusters, 'tr1_n_clusters/I')
output_tree.Branch('tr1_cluster_n_pads', tr1_cluster_n_pads, 'tr1_cluster_n_pads[tr1_n_clusters]/I')
output_tree.Branch('tr1_cluster_pad', tr1_cluster_pad, 'tr1_cluster_pad[tr1_n_clusters]/F')
output_tree.Branch('tr1_cluster_sector', tr1_cluster_sector, 'tr1_cluster_sector[tr1_n_clusters]/F')
output_tree.Branch('tr1_cluster_x', tr1_cluster_x, 'tr1_cluster_x[tr1_n_clusters]/F')
output_tree.Branch('tr1_cluster_y', tr1_cluster_y, 'tr1_cluster_y[tr1_n_clusters]/F')
output_tree.Branch('tr1_cluster_energy', tr1_cluster_energy, 'tr1_cluster_energy[tr1_n_clusters]/F')
output_tree.Branch('tr2_n_hits', tr2_n_hits, 'tr2_n_hits/I')
output_tree.Branch('tr2_hit_pad', tr2_hit_pad, 'tr2_hit_pad[tr2_n_hits]/I')
output_tree.Branch('tr2_hit_sector', tr2_hit_sector, 'tr2_hit_sector[tr2_n_hits]/I')
output_tree.Branch('tr2_hit_layer', tr2_hit_layer, 'tr2_hit_layer[tr2_n_hits]/I')
output_tree.Branch('tr2_hit_x', tr2_hit_x, 'tr2_hit_x[tr2_n_hits]/F')
output_tree.Branch('tr2_hit_y', tr2_hit_y, 'tr2_hit_y[tr2_n_hits]/F')
output_tree.Branch('tr2_hit_energy', tr2_hit_energy, 'tr2_hit_energy[tr2_n_hits]/F')
output_tree.Branch('tr2_n_clusters', tr2_n_clusters, 'tr2_n_clusters/I')
output_tree.Branch('tr2_cluster_n_pads', tr2_cluster_n_pads, 'tr2_cluster_n_pads[tr2_n_clusters]/I')
output_tree.Branch('tr2_cluster_pad', tr2_cluster_pad, 'tr2_cluster_pad[tr2_n_clusters]/F')
output_tree.Branch('tr2_cluster_sector', tr2_cluster_sector, 'tr2_cluster_sector[tr2_n_clusters]/F')
output_tree.Branch('tr2_cluster_x', tr2_cluster_x, 'tr2_cluster_x[tr2_n_clusters]/F')
output_tree.Branch('tr2_cluster_y', tr2_cluster_y, 'tr2_cluster_y[tr2_n_clusters]/F')
output_tree.Branch('tr2_cluster_energy', tr2_cluster_energy, 'tr2_cluster_energy[tr2_n_clusters]/F')
output_tree.Branch('cal_n_hits', cal_n_hits, 'cal_n_hits/I')
output_tree.Branch('cal_hit_pad', cal_hit_pad, 'cal_hit_pad[cal_n_hits]/I')
output_tree.Branch('cal_hit_sector', cal_hit_sector, 'cal_hit_sector[cal_n_hits]/I')
output_tree.Branch('cal_hit_layer', cal_hit_layer, 'cal_hit_layer[cal_n_hits]/I')
output_tree.Branch('cal_hit_x', cal_hit_x, 'cal_hit_x[cal_n_hits]/F')
output_tree.Branch('cal_hit_y', cal_hit_y, 'cal_hit_y[cal_n_hits]/F')
output_tree.Branch('cal_hit_energy', cal_hit_energy, 'cal_hit_energy[cal_n_hits]/F')
output_tree.Branch('cal_n_clusters', cal_n_clusters, 'cal_n_clusters/I')
output_tree.Branch('cal_cluster_n_pads', cal_cluster_n_pads, 'cal_cluster_n_pads[cal_n_clusters]/I')
output_tree.Branch('cal_cluster_n_towers', cal_cluster_n_towers, 'cal_cluster_n_towers[cal_n_clusters]/I')
output_tree.Branch('cal_cluster_pad', cal_cluster_pad, 'cal_cluster_pad[cal_n_clusters]/F')
output_tree.Branch('cal_cluster_sector', cal_cluster_sector, 'cal_cluster_sector[cal_n_clusters]/F')
output_tree.Branch('cal_cluster_layer', cal_cluster_layer, 'cal_cluster_layer[cal_n_clusters]/F')
output_tree.Branch('cal_cluster_x', cal_cluster_x, 'cal_cluster_x[cal_n_clusters]/F')
output_tree.Branch('cal_cluster_y', cal_cluster_y, 'cal_cluster_y[cal_n_clusters]/F')
output_tree.Branch('cal_cluster_energy', cal_cluster_energy, 'cal_cluster_energy[cal_n_clusters]/F')
n_events = input_tree.GetEntries()
for idx, event in enumerate(input_tree):
# if idx == 10000:
# break
if idx % (1000) == 0:
time_min = (time.time() - start_time) // 60
time_sec = (time.time() - start_time) % 60
print('Event: {} out of {};'.format(idx, n_events), end=' ')
print('{} min {} sec'.format(time_min, time_sec))
# Create hits lists for this event.
hits_tr1, hits_tr2, hits_cal = make_hits_lists(event)
# Align hits
align_data(hits_tr1, hits_tr2, hits_cal)
# Create clusters for this event. Clusterin algorithm is in the "clustering.py" file
clusters_tr1, clusters_tr2, clusters_cal = make_clusters_lists(hits_tr1, hits_tr2, hits_cal)
# Resort clusters in trackers by distance to the most energetic cluster in calorimeter
if len(clusters_cal) != 0:
clusters_tr1.sort(key=lambda x: abs(x.y - clusters_cal[0].y))
clusters_tr2.sort(key=lambda x: abs(x.y - clusters_cal[0].y))
# Write results into variables associated with a tree and fill
tr1_n_hits[0] = len(hits_tr1)
for i, hit in enumerate(hits_tr1):
tr1_hit_pad[i] = hit.pad
tr1_hit_sector[i] = hit.sector
tr1_hit_layer[i] = hit.layer
tr1_hit_energy[i] = hit.energy
tr1_hit_x[i] = hit.x
tr1_hit_y[i] = hit.y
tr1_n_clusters[0] = len(clusters_tr1)
for i, cluster in enumerate(clusters_tr1):
tr1_cluster_n_pads[i] = cluster.n_pads
tr1_cluster_pad[i] = cluster.pad
tr1_cluster_sector[i] = cluster.sector
tr1_cluster_x[i] = cluster.x
tr1_cluster_y[i] = cluster.y
tr1_cluster_energy[i] = cluster.energy
tr2_n_hits[0] = len(hits_tr2)
for i, hit in enumerate(hits_tr2):
tr2_hit_pad[i] = hit.pad
tr2_hit_sector[i] = hit.sector
tr2_hit_layer[i] = hit.layer
tr2_hit_energy[i] = hit.energy
tr2_hit_x[i] = hit.x
tr2_hit_y[i] = hit.y
tr2_n_clusters[0] = len(clusters_tr2)
for i, cluster in enumerate(clusters_tr2):
tr2_cluster_n_pads[i] = cluster.n_pads
tr2_cluster_pad[i] = cluster.pad
tr2_cluster_sector[i] = cluster.sector
tr2_cluster_x[i] = cluster.x
tr2_cluster_y[i] = cluster.y
tr2_cluster_energy[i] = cluster.energy
cal_n_hits[0] = len(hits_cal)
for i, hit in enumerate(hits_cal):
cal_hit_pad[i] = hit.pad
cal_hit_sector[i] = hit.sector
cal_hit_layer[i] = hit.layer
cal_hit_x[i] = hit.x
cal_hit_y[i] = hit.y
cal_hit_energy[i] = hit.energy
cal_n_clusters[0] = len(clusters_cal)
for i, cluster in enumerate(clusters_cal):
cal_cluster_n_pads[i] = cluster.n_pads
cal_cluster_n_towers[i] = cluster.n_towers
cal_cluster_pad[i] = cluster.pad
cal_cluster_sector[i] = cluster.sector
cal_cluster_layer[i] = cluster.layer
cal_cluster_x[i] = cluster.x
cal_cluster_y[i] = cluster.y
cal_cluster_energy[i] = cluster.energy
output_tree.Fill()
output_tree.Write()
output_file.Close()
print("Hooray, extracted tree file is ready, take it :3")
if __name__ == "__main__":
parser = argparse.ArgumentParser(description=('Do selection and clustering of data'))
parser.add_argument('path_to_file', type=str, help='Provide path to the root file')
args = parser.parse_args()
main(args)