diff --git a/apps/ta_dump.py b/apps/ta_dump.py deleted file mode 100644 index 8388dba..0000000 --- a/apps/ta_dump.py +++ /dev/null @@ -1,425 +0,0 @@ -""" -Display diagnostic information for TAs for a given -tpstream file. -""" - -import os -import sys -import argparse - -import numpy as np -import matplotlib.pyplot as plt -from matplotlib.backends.backend_pdf import PdfPages -from scipy import stats - -import trgtools - -TICK_TO_SEC_SCALE = 16e-9 # s per tick - -def window_length_hist(window_lengths, seconds=False): - """ - Plot a histogram of the TA window lengths. - - Optionally, use ticks or seconds for scaling. - """ - time_unit = 'Ticks' - if seconds: - window_lengths = window_lengths * TICK_TO_SEC_SCALE - time_unit = 's' - - plt.figure(figsize=(6,4)) - plt.hist(window_lengths, color='k') - - plt.title("TA Time Window Length Histogram") - plt.xlabel(f"Time Window Length ({time_unit})") - - plt.tight_layout() - plt.savefig("ta_window_length_histogram.svg") - plt.close() - -def num_tps_hist(num_tps): - """ - Plot the number of TPs for each TA as a histogram. - """ - plt.figure(figsize=(6,4)) - plt.hist(num_tps, bins=50, color='k') - - plt.title("Number of TPs Histogram") - plt.xlabel("Number of TPs") - - plt.tight_layout() - plt.savefig("ta_num_tps_histogram.svg") - plt.close() - -def time_start_plot(start_times, seconds=False): - """ - Plot TA start times vs TA index. - - Optionally, use ticks or seconds for scaling. - """ - time_unit = 'Ticks' - if seconds: - start_times = start_times * TICK_TO_SEC_SCALE - time_unit = 's' - - first_time = start_times[0] - total_time = start_times[-1] - start_times[0] - total_tas = start_times.shape[0] - avg_rate = total_tas / total_time - - plt.figure(figsize=(6,4)) - plt.plot(start_times - first_time, 'k', label=f"Average Rate: {avg_rate:.3} TA/{time_unit}") - - plt.title(f"TA Start Times: Shifted by {first_time:.4e} {time_unit}") - plt.xlabel("TA Order") - plt.ylabel(f"Start Time ({time_unit})") - - plt.legend() - - plt.tight_layout() - plt.savefig("ta_start_times.svg") - plt.close() - -def algorithm_hist(algorithms): - """ - Plot a histogram of the algorithm types for each TA. - """ - num_tas = algorithms.shape[0] - plt.figure(figsize=(12,8)) - plt.hist(algorithms, bins=np.arange(-0.5, 8, 1), range=(0,7), align='mid', color='k', label=f"Number of TAs: {num_tas}") - - plt.title("TA Algorithm Histogram") - plt.xticks(ticks=range(0,8), labels=("Unknown", - "Supernova", - "Prescale", - "ADCSimpleWindow", - "HorizontalMuon", - "MichelElectron", - "DBSCAN", - "PlaneCoincidence"), rotation=60) - - plt.legend() - - plt.tight_layout() - plt.savefig("ta_algorithm_histogram.svg") - plt.close() - -def det_type_hist(det_types): - """ - Plot a histogram of the detector type for the TAs. - """ - plt.figure(figsize=(12,8)) - plt.hist(det_types, bins=np.arange(-0.5, 3, 1), range=(0,2), align='mid', color='k') - - plt.title("TA Detector Type Histogram") - plt.xticks(ticks=range(0,3), labels=("Unknown", - "TPC", - "PDS"), rotation=60) - - plt.tight_layout() - plt.savefig("ta_det_types_histogram.svg") - plt.close() - -def adc_integral_hist(adc_integrals): - """ - Plot a histogram of the ADC integrals for the TAs. - """ - plt.figure(figsize=(6,4)) - plt.hist(adc_integrals, color='k') - - plt.title("TA ADC Integral Histogram") - plt.xlabel("ADC Integral") - - plt.tight_layout() - plt.savefig("ta_adc_integral_histogram.svg") - plt.close() - -def plot_time_window_summary(ta_data, tp_data, quiet=False, seconds=False): - """ - Plot summary statistics on time windows. - - This uses 2 definitions for time windows: - Direct Diff: max(tp.time_start) - ta.time_start, - Max Diff: max(tp.time_start + tp.time_over_threshold) - ta.time_start - """ - time_unit = 's' if seconds else 'Ticks' - - direct_diff = np.zeros(ta_data.shape) - max_diff = np.zeros(ta_data.shape) - - # Find the differences for each TA - for idx in np.arange(ta_data.shape[0]): - direct_diff[idx] = np.max(tp_data[idx]['time_start']) - ta_data[idx]['time_start'] - max_diff[idx] = np.max(tp_data[idx]['time_start'] + tp_data[idx]['time_over_threshold']) - ta_data[idx]['time_start'] - - if seconds: - direct_diff = direct_diff * TICK_TO_SEC_SCALE - max_diff = max_diff * TICK_TO_SEC_SCALE - - plt.figure(figsize=(6,4)) - plt.boxplot((direct_diff, max_diff), notch=True, vert=False, sym='+', labels=["Direct Difference", "Maximum Difference"]) - - plt.title("TA Time Windows Summary") - plt.xlabel(f"Time ({time_unit})") - - plt.tight_layout() - plt.savefig("ta_time_windows_summary.svg") - plt.close() - -def write_summary_stats(data, filename, title): - """ - Writes the given summary statistics to 'filename'. - """ - summary = stats.describe(data) - std = np.sqrt(summary.variance) - with open(filename, 'a') as out: - out.write(f"{title}\n") - out.write(f"Reference Statistics:\n" \ - f"\tTotal # TPs = {summary.nobs},\n" \ - f"\tMean = {summary.mean:.2f},\n" \ - f"\tStd = {std:.2f},\n" \ - f"\tMin = {summary.minmax[0]},\n" \ - f"\tMax = {summary.minmax[1]}.\n") - std3_count = np.sum(data > summary.mean + 3*std) \ - + np.sum(data < summary.mean - 3*std) - std2_count = np.sum(data > summary.mean + 2*std) \ - + np.sum(data < summary.mean - 2*std) - out.write(f"Anomalies:\n" \ - f"\t# of >3 Sigma TPs = {std3_count},\n" \ - f"\t# of >2 Sigma TPs = {std2_count}.\n") - out.write("\n\n") - -def plot_summary_stats(ta_data, no_anomaly=False, quiet=False): - """ - Plot summary statistics on various TA member data. - Displays as box plots on multiple pages of a PDF. - """ - # 'Sanity' titles _should_ all be the same value. - titles = { - 'adc_integral': "ADC Integral Summary", - 'adc_peak': "ADC Peak Summary", - 'algorithm': "Algorithm (Sanity) Summary", - 'channel_end': "Channel End Summary", - 'channel_peak': "Channel Peak Summary", - 'channel_start': "Channel Start Summary", - 'detid': "Detector ID (Sanity) Summary", - 'num_tps': "Number of TPs Summary", - 'time_end': "Time End Summary", - 'time_peak': "Time Peak Summary", - 'time_start': "Time Start Summary", - 'type': "Type (Sanity) Summary" - } - labels = { - 'adc_integral': "ADC Integral", - 'adc_peak': "ADC Count", - 'algorithm': "", - 'channel_end': "Channel Number", - 'channel_peak': "Channel Number", - 'channel_start': "Channel Number", - 'detid': "", - 'num_tps': "TP Count", - 'time_end': "Ticks", - 'time_peak': "Ticks", - 'time_start': "Ticks", - 'type': "" - } - - anomaly_filename = 'ta_anomaly_summary.txt' - - if not no_anomaly: - if not quiet: - print(f"Writing descriptive statistics to {anomaly_filename}.") - if os.path.isfile(anomaly_filename): - # Prepare a new ta_anomaly_summary.txt - os.remove(anomaly_filename) - - with PdfPages("ta_summary_stats.pdf") as pdf: - for ta_key, title in titles.items(): - # Extract only ta_key - plt.figure(figsize=(6,4)) - ax = plt.gca() - - plt.boxplot(ta_data[ta_key], notch=True, vert=False, sym='+') - plt.yticks([]) # Would just show '1'. - ax.xaxis.grid(True) - plt.xlabel(labels[ta_key]) - - plt.title(title) - - plt.tight_layout() - pdf.savefig() - plt.close() - - # Write anomalies to file. - if not no_anomaly: - if "Sanity" in title and np.all(ta_data[ta_key] == ta_data[ta_key][0]): - # Either passed check or all wrong in the same way. - continue - write_summary_stats(ta_data[ta_key], anomaly_filename, title) - - -def all_event_displays(tp_data, run_id, file_index, seconds=False): - """ - Plot all event_displays as pages in a PDF. - - Optionally, use ticks or seconds for scaling. - """ - time_unit = 's' if seconds else 'Ticks' - - with PdfPages(f"event_displays_{run_id}.{file_index:04}.pdf") as pdf: - for tadx, ta in enumerate(tp_data): - if seconds: - ta = ta * TICK_TO_SEC_SCALE - plt.figure(figsize=(6,4)) - - plt.scatter(ta['time_peak'], ta['channel'], c='k', s=2) - - # Auto limits were too wide; this narrows it. - max_time = np.max(ta['time_peak']) - min_time = np.min(ta['time_peak']) - time_diff = max_time - min_time - plt.xlim((min_time - 0.1*time_diff, max_time + 0.1*time_diff)) - - plt.title(f'Run {run_id}.{file_index:04} Event Display: {tadx:03}') - plt.xlabel(f"Peak Time ({time_unit})") - plt.ylabel("Channel") - - plt.tight_layout() - pdf.savefig() - plt.close() - -def time_diff_hist(start_times, end_times, seconds=False): - """ - Plot a histogram of the time differences. - - Optionally, use ticks or seconds for scaling. - """ - time_unit = 'Ticks' - if seconds: - start_times = start_times * TICK_TO_SEC_SCALE - end_times = end_times * TICK_TO_SEC_SCALE - time_unit = 's' - - # Difference between all the start times. - start_time_diff = (np.concatenate((start_times[1:], [0])) - start_times)[:-1] - # Difference between previous TA end time and current TA start time. - time_gaps = start_times[1:] - end_times[:-1] - - plt.figure(figsize=(6,4)) - - plt.hist(start_time_diff, bins=40, color='#63ACBE', label="Start Time Difference", alpha=0.2) - plt.hist(time_gaps, bins=40, color="#EE442F", label="TA Time Gap", alpha=0.2) - - plt.title("TA Timings Histogram") - plt.xlabel(f"Time ({time_unit})") - plt.legend() - - plt.tight_layout() - plt.savefig("ta_timings_histogram.svg") - plt.close() - -def event_display(peak_times, channels, idx, seconds=False): - """ - Plot an individual event display. - - Optionally, use ticks or seconds for scaling. - """ - time_unit = 'Ticks' - if seconds: - peak_times = peak_times * TICK_TO_SEC_SCALE - time_unit = 's' - - plt.figure(figsize=(6,4)) - - plt.scatter(peak_times, channels, c='k', s=2) - max_time = np.max(peak_times) - min_time = np.min(peak_times) - time_diff = max_time - min_time - - plt.xlim((min_time - 0.1*time_diff, max_time + 0.1*time_diff)) - - plt.title(f"Event Display: {idx:03}") - plt.xlabel(f"Peak Time ({time_unit})") - plt.ylabel("Channel") - - plt.tight_layout() - plt.savefig(f"./event_display_{idx:03}.svg") - plt.close() - -def parse(): - """ - Parses CLI input arguments. - """ - parser = argparse.ArgumentParser(description="Display diagnostic information for TAs for a given tpstream file.") - parser.add_argument("filename", help="Absolute path to tpstream file to display.") - parser.add_argument("--quiet", action="store_true", help="Stops the output of printed information. Default: False.") - parser.add_argument("--no-displays", action="store_true", help="Stops the processing of event displays.") - parser.add_argument("--start-frag", type=int, help="Starting fragment index to process from. Takes negative indexing. Default: -10.", default=-10) - parser.add_argument("--end-frag", type=int, help="Fragment index to stop processing (i.e. not inclusive). Takes negative indexing. Default: 0.", default=0) - parser.add_argument("--no-anomaly", action="store_true", help="Pass to not write 'ta_anomaly_summary.txt'. Default: False.") - parser.add_argument("--seconds", action="store_true", help="Pass to use seconds instead of time ticks. Default: False.") - parser.add_argument("--overwrite", action="store_true", help="Overwrite old outputs. Default: False.") - - return parser.parse_args() - -def main(): - """ - Drives the processing and plotting. - """ - ## Process Arguments & Data - args = parse() - filename = args.filename - quiet = args.quiet - no_displays = args.no_displays - start_frag = args.start_frag - end_frag = args.end_frag - no_anomaly = args.no_anomaly - seconds = args.seconds - overwrite = args.overwrite - - data = trgtools.TAData(filename, quiet) - - # Load all case. - if start_frag == 0 and end_frag == -1: - data.load_all_frags() # Has extra debug/warning info - else: # Only load some. - if end_frag != 0: # Python doesn't like [n:0] - frag_paths = data.get_ta_frag_paths()[start_frag:end_frag] - elif end_frag == 0: - frag_paths = data.get_ta_frag_paths()[start_frag:] - - # Does not count empty frags. - for path in frag_paths: - data.load_frag(path) - - # Try to find an empty plotting directory - plot_iter = 0 - plot_dir = f"{data.run_id}-{data.file_index}_figures_{plot_iter:04}" - while not overwrite and os.path.isdir(plot_dir): - plot_iter += 1 - plot_dir = f"{data.run_id}-{data.file_index}_figures_{plot_iter:04}" - print(f"Saving outputs to ./{plot_dir}/") - # If overwriting and it does exist, don't need to make it. - # So take the inverse to mkdir. - if not (overwrite and os.path.isdir(plot_dir)): - os.mkdir(plot_dir) - os.chdir(plot_dir) - - print(f"Number of TAs: {data.ta_data.shape[0]}") # Enforcing output for useful metric - - ## Plotting - num_tps_hist(data.ta_data["num_tps"]) - window_length_hist(data.ta_data["time_start"] - data.ta_data["time_end"]) - algorithm_hist(data.ta_data["algorithm"]) - det_type_hist(data.ta_data["type"]) - adc_integral_hist(data.ta_data["adc_integral"]) - time_start_plot(data.ta_data["time_start"], seconds) - time_diff_hist(data.ta_data["time_start"], data.ta_data["time_end"], seconds) - plot_time_window_summary(data.ta_data, data.tp_data, quiet, seconds) - plot_summary_stats(data.ta_data, no_anomaly, quiet) - - if (not no_displays): - all_event_displays(data.tp_data, data.run_id, data.file_index, seconds) - -if __name__ == "__main__": - main() diff --git a/docs/ta-dump.md b/docs/ta-dump.md index f294abc..574c75f 100644 --- a/docs/ta-dump.md +++ b/docs/ta-dump.md @@ -1,6 +1,10 @@ # Trigger Activity Dump Info -`ta_dump.py` is a plotting script that shows TA diagnostic information, such as: algorithms produced, number of TPs per TA, event displays, window length histogram, ADC integral histogram, and a plot of the time starts. Most of these plots are saved as an SVG. The event displays are plotted in a multi-page PDF. +`ta_dump.py` is a plotting script that shows TA diagnostic information, such as: algorithms produced, number of TPs per TA, event displays, ADC integral histogram, and a plot of the time starts. + +A new directory is created that identifies the HDF5 file the script was run on and increments based on preceding plots for the same file. One can overwrite the first set of plots generated by using `--overwrite`. Plots are saved in two multi-page PDFs: histograms for member data and event displays. + +There are two plotting options `--linear` and `--log` that set the y-scale for the plots. By default, plots use both scales with linear on the left y-axis and log on the right y-axis. There is an additional plotting option `--seconds` to produce time plots using seconds instead of ticks. While running, this prints warnings for empty fragments that are skipped in the given HDF5 file. These outputs can be suppressed with `--quiet`. @@ -8,7 +12,7 @@ One can specify which fragments to _attempt_ to load from with the `--start-frag Event displays are processed by default. If there are many TAs that were loaded, then this may take a while to plot. The `--no-display` options skips event display plotting. -A text file named `ta_anomaly_summary.txt` is generated that gives reference statistics for each TA data member and gives a count of data members that are at least 2 sigma and 3 sigma from the mean. One can use `--no-anomaly` to stop this file generation. +A text file named `ta_anomalies.txt` is generated that gives reference statistics for each TA data member and gives a count of data members that are at least 2 sigma and 3 sigma from the mean. One can use `--no-anomaly` to stop this file generation. ## Example ```bash @@ -18,7 +22,8 @@ python ta_dump.py file.hdf5 --quiet python ta_dump.py file.hdf5 --start-frag 50 --end-frag 100 # Attempts 50 fragments python ta_dump.py file.hdf5 --no-display python ta_dump.py file.hdf5 --no-anomaly +python ta_dump.py file.hdf5 --log +python ta_dump.py file.hdf5 --linear +python ta_dump.py file.hdf5 --seconds +python ta_dump.py file.hdf5 --overwrite ``` - -## Run Numbers & File Naming -For the moment, getting meaningful run numbers and sub-run numbers is read from the filename because accessing HDF5 run number and sub-run numbers is not available to python. [This](https://github.com/DUNE-DAQ/hdf5libs/pull/68) PR aims to solve that. diff --git a/docs/tp-dump.md b/docs/tp-dump.md index e7dddaa..d7af42d 100644 --- a/docs/tp-dump.md +++ b/docs/tp-dump.md @@ -2,6 +2,10 @@ `tp_dump.py` is a plotting script that shows TP diagnostic information, such as: TP channel histogram and channel vs time over threshold. Plots are saved as SVGs, PDFs, and PNGs. +A new directory is created that identifies the HDF5 file the script was run on and increments based on preceding plots for the same file. One can overwrite the first set of plots generated by using `--overwrite`. + +There are two plotting options `--linear` and `--log` that set the y-scale for the plots. By default, plots use both scales with linear on the left y-axis and log on the right y-axis. There is an additional plotting option `--seconds` to produce time plots using seconds instead of ticks. + While running, this script prints various loading information. These outputs can be suppressed with `--quiet`. One can specify which fragments to load from with the `--start-frag` option. This is -10 by default in order to get the last 10 fragments for the given file. One can also specify which fragment to end on (not inclusive) with `--end-frag` option. This is 0 by default (for the previously mentioned reason). @@ -15,4 +19,8 @@ python tp_dump.py file.hdf5 --help python tp_dump.py file.hdf5 --quiet python tp_dump.py file.hdf5 --start-frag 50 --end-frag 100 # Loads 50 fragments. python tp_dump.py file.hdf5 --no-anomaly +python ta_dump.py file.hdf5 --log +python ta_dump.py file.hdf5 --linear +python ta_dump.py file.hdf5 --seconds +python ta_dump.py file.hdf5 --overwrite ``` diff --git a/python/trgtools/TAData.py b/python/trgtools/TAData.py index 93648ad..9f39064 100644 --- a/python/trgtools/TAData.py +++ b/python/trgtools/TAData.py @@ -42,7 +42,8 @@ class TAData: ('time_end', np.uint64), ('time_peak', np.uint64), ('time_start', np.uint64), - ('type', np.uint8) + ('type', np.uint8), + ('version', np.uint16) ]) ## TP data type tp_dt = np.dtype([ @@ -132,7 +133,8 @@ def load_frag(self, frag_path, index=None) -> None: ta_datum.data.time_end, ta_datum.data.time_peak, ta_datum.data.time_start, - np.uint8(ta_datum.data.type))], + np.uint8(ta_datum.data.type), + np.uint16(ta_datum.data.version))], dtype=self.ta_dt) self.ta_data = np.hstack((self.ta_data, np_ta_datum)) byte_idx += ta_datum.sizeof() diff --git a/scripts/ta_dump.py b/scripts/ta_dump.py new file mode 100755 index 0000000..edaa831 --- /dev/null +++ b/scripts/ta_dump.py @@ -0,0 +1,442 @@ +#!/usr/bin/env python +""" +Display diagnostic information for TAs for a given +tpstream file. +""" + +import os +import sys +import argparse + +import numpy as np +import matplotlib.pyplot as plt +from matplotlib.backends.backend_pdf import PdfPages +from scipy import stats + +import trgtools + +TICK_TO_SEC_SCALE = 16e-9 # s per tick + +def time_start_plot(start_times, seconds=False): + """ + Plot TA start times vs TA index. + + Optionally, use ticks or seconds for scaling. + """ + time_unit = 'Ticks' + if seconds: + start_times = start_times * TICK_TO_SEC_SCALE + time_unit = 's' + + first_time = start_times[0] + total_time = start_times[-1] - start_times[0] + total_tas = start_times.shape[0] + avg_rate = total_tas / total_time + + plt.figure(figsize=(6,4)) + plt.plot(start_times - first_time, 'k', label=f"Average Rate: {avg_rate:.3} TA/{time_unit}") + + plt.title(f"TA Start Times: Shifted by {first_time:.4e} {time_unit}") + plt.xlabel("TA Order") + plt.ylabel(f"Start Time ({time_unit})") + + plt.legend() + + plt.tight_layout() + plt.savefig("ta_start_times.svg") + plt.close() + + +def plot_pdf_time_delta_histograms( + ta_data: np.ndarray, + tp_data: list[np.ndarray], + pdf: PdfPages, + time_label: str) -> None: + """ + Plot the different time delta histograms to a PdfPages. + + Parameters: + ta_data (np.ndarray): Array of TA data members. + tp_data (list[np.ndarray]): List of TPs per TA. tp_data[i] holds TP data for the i-th TA. + pdf (PdfPages): PdfPages object to append plot to. + time_label (str): Time label to plot with (ticks vs seconds). + + Returns: + Nothing. Mutates :pdf: with the new plot. + """ + direct_diff = ta_data['time_end'] - ta_data['time_start'] + last_tp_start_diff = [] + last_tp_peak_diff = [] + for idx, tp in enumerate(tp_data): + last_tp_start_diff.append(np.max(tp['time_start']) - ta_data[idx]['time_start']) + last_tp_peak_diff.append(np.max(tp['time_peak']) - ta_data[idx]['time_start']) + + last_tp_start_diff = np.array(last_tp_start_diff) + last_tp_peak_diff = np.array(last_tp_peak_diff) + + # Seconds case. + if "Ticks" not in time_label: + direct_diff = direct_diff * TICK_TO_SEC_SCALE + last_tp_start_diff = last_tp_start_diff * TICK_TO_SEC_SCALE + last_tp_peak_diff = last_tp_peak_diff * TICK_TO_SEC_SCALE + + bins = 40 + + plt.figure(figsize=(6, 4)) + + plt.hist( + (direct_diff, last_tp_start_diff, last_tp_peak_diff), + bins=bins, + label=( + "TA(End) - TA(Start)", + "Last TP(Start) - TA(Start)", + "Last TP(Peak) - TA(Start)" + ), + color=( + "#B2182B", + "#3BB27A", + "#2166AC" + ), + alpha=0.6 + ) + + plt.title("Time Difference Histograms") + plt.xlabel(time_label) + plt.legend(framealpha=0.4) + + plt.tight_layout() + pdf.savefig() + + plt.close() + return None + + +def write_summary_stats(data, filename, title): + """ + Writes the given summary statistics to 'filename'. + """ + # Algorithm, Det ID, etc. are not expected to vary. + # Check first that they don't vary, and move on if so. + if np.all(data == data[0]): + print(f"{title} data member is the same for all TAs. Skipping summary statistics.") + return + + summary = stats.describe(data) + std = np.sqrt(summary.variance) + with open(filename, 'a') as out: + out.write(f"{title}\n") + out.write(f"Reference Statistics:\n" \ + f"\tTotal # TPs = {summary.nobs},\n" \ + f"\tMean = {summary.mean:.2f},\n" \ + f"\tStd = {std:.2f},\n" \ + f"\tMin = {summary.minmax[0]},\n" \ + f"\tMax = {summary.minmax[1]}.\n") + std3_count = np.sum(data > summary.mean + 3*std) \ + + np.sum(data < summary.mean - 3*std) + std2_count = np.sum(data > summary.mean + 2*std) \ + + np.sum(data < summary.mean - 2*std) + out.write(f"Anomalies:\n" \ + f"\t# of >3 Sigma TPs = {std3_count},\n" \ + f"\t# of >2 Sigma TPs = {std2_count}.\n") + out.write("\n\n") + +def plot_pdf_histogram(data, plot_details_dict, pdf, linear=True, log=True): + """ + Plot a histogram for the given data to a PdfPage object. + """ + plt.figure(figsize=(6,4)) + ax = plt.gca() + bins = 100 + + # Custom xticks are for specific typing. Expect to see much + # smaller plots, so only do linear and use less bins. + if 'xticks' in plot_details_dict: + linear = True + log = False + plt.xticks(**plot_details_dict['xticks']) + if 'bins' in plot_details_dict: + bins = plot_details_dict['bins'] + + if linear and log: + ax.hist(data, bins=bins, color='#63ACBE', label='Linear', alpha=0.6) + ax.set_yscale('linear') + + ax2 = ax.twinx() + ax2.hist(data, bins=bins, color='#EE442F', label='Log', alpha=0.6) + ax2.set_yscale('log') + + # Setting the plot order + ax.set_zorder(2) + ax.patch.set_visible(False) + ax2.set_zorder(1) + + handles, labels = ax.get_legend_handles_labels() + handles2, labels2 = ax2.get_legend_handles_labels() + handles = handles + handles2 + labels = labels + labels2 + plt.legend(handles=handles, labels=labels) + else: + plt.hist(data, bins=bins, color='k') + if log: # Default to linear, so only change on log + plt.yscale('log') + + plt.title(plot_details_dict['title'] + " Histogram") + ax.set_xlabel(plot_details_dict['xlabel']) + if 'xlim' in plot_details_dict: + plt.xlim(plot_details_dict['xlim']) + + plt.tight_layout() + pdf.savefig() + plt.close() + +def all_event_displays(tp_data, run_id, file_index, seconds=False): + """ + Plot all event_displays as pages in a PDF. + + Optionally, use ticks or seconds for scaling. + """ + time_unit = 's' if seconds else 'Ticks' + + with PdfPages(f"event_displays_{run_id}.{file_index:04}.pdf") as pdf: + for tadx, ta in enumerate(tp_data): + if seconds: + ta = ta * TICK_TO_SEC_SCALE + plt.figure(figsize=(6,4)) + + plt.scatter(ta['time_peak'], ta['channel'], c='k', s=2) + + # Auto limits were too wide; this narrows it. + max_time = np.max(ta['time_peak']) + min_time = np.min(ta['time_peak']) + time_diff = max_time - min_time + plt.xlim((min_time - 0.1*time_diff, max_time + 0.1*time_diff)) + + plt.title(f'Run {run_id}.{file_index:04} Event Display: {tadx:03}') + plt.xlabel(f"Peak Time ({time_unit})") + plt.ylabel("Channel") + + plt.tight_layout() + pdf.savefig() + plt.close() + + +def event_display(peak_times, channels, idx, seconds=False): + """ + Plot an individual event display. + + Optionally, use ticks or seconds for scaling. + """ + time_unit = 'Ticks' + if seconds: + peak_times = peak_times * TICK_TO_SEC_SCALE + time_unit = 's' + + plt.figure(figsize=(6,4)) + + plt.scatter(peak_times, channels, c='k', s=2) + max_time = np.max(peak_times) + min_time = np.min(peak_times) + time_diff = max_time - min_time + + plt.xlim((min_time - 0.1*time_diff, max_time + 0.1*time_diff)) + + plt.title(f"Event Display: {idx:03}") + plt.xlabel(f"Peak Time ({time_unit})") + plt.ylabel("Channel") + + plt.tight_layout() + plt.savefig(f"./event_display_{idx:03}.svg") + plt.close() + +def parse(): + """ + Parses CLI input arguments. + """ + parser = argparse.ArgumentParser(description="Display diagnostic information for TAs for a given tpstream file.") + parser.add_argument("filename", help="Absolute path to tpstream file to display.") + parser.add_argument("--quiet", action="store_true", help="Stops the output of printed information. Default: False.") + parser.add_argument("--no-displays", action="store_true", help="Stops the processing of event displays.") + parser.add_argument("--start-frag", type=int, help="Starting fragment index to process from. Takes negative indexing. Default: -10.", default=-10) + parser.add_argument("--end-frag", type=int, help="Fragment index to stop processing (i.e. not inclusive). Takes negative indexing. Default: 0.", default=0) + parser.add_argument("--no-anomaly", action="store_true", help="Pass to not write 'ta_anomaly_summary.txt'. Default: False.") + parser.add_argument("--seconds", action="store_true", help="Pass to use seconds instead of time ticks. Default: False.") + parser.add_argument("--linear", action="store_true", help="Pass to use linear histogram scaling. Default: plots both linear and log.") + parser.add_argument("--log", action="store_true", help="Pass to use logarithmic histogram scaling. Default: plots both linear and log.") + parser.add_argument("--overwrite", action="store_true", help="Overwrite old outputs. Default: False.") + + return parser.parse_args() + +def main(): + """ + Drives the processing and plotting. + """ + ## Process Arguments & Data + args = parse() + filename = args.filename + quiet = args.quiet + no_displays = args.no_displays + start_frag = args.start_frag + end_frag = args.end_frag + no_anomaly = args.no_anomaly + seconds = args.seconds + overwrite = args.overwrite + + linear = args.linear + log = args.log + + # User didn't pass either flag, so default to both being true. + if (not linear) and (not log): + linear = True + log = True + + data = trgtools.TAData(filename, quiet) + + # Load all case. + if start_frag == 0 and end_frag == -1: + data.load_all_frags() # Has extra debug/warning info + else: # Only load some. + if end_frag != 0: # Python doesn't like [n:0] + frag_paths = data.get_ta_frag_paths()[start_frag:end_frag] + elif end_frag == 0: + frag_paths = data.get_ta_frag_paths()[start_frag:] + + # Does not count empty frags. + for path in frag_paths: + data.load_frag(path) + + # Try to find an empty plotting directory + plot_iter = 0 + plot_dir = f"{data.run_id}-{data.file_index}_figures_{plot_iter:04}" + while not overwrite and os.path.isdir(plot_dir): + plot_iter += 1 + plot_dir = f"{data.run_id}-{data.file_index}_figures_{plot_iter:04}" + print(f"Saving outputs to ./{plot_dir}/") + # If overwriting and it does exist, don't need to make it. + # So take the inverse to mkdir. + if not (overwrite and os.path.isdir(plot_dir)): + os.mkdir(plot_dir) + os.chdir(plot_dir) + + print(f"Number of TAs: {data.ta_data.shape[0]}") # Enforcing output for useful metric + + ## Plotting + # General Data Member Plots + time_label = "Time (s)" if seconds else "Time (Ticks)" + + # Dictionary containing unique title, xlabel, and xticks (only some) + plot_dict = { + 'adc_integral': { + 'title': "ADC Integral", + 'xlabel': "ADC Integral" + }, + 'adc_peak': { + 'title': "ADC Peak", + 'xlabel': "ADC Count" + }, + 'algorithm': { + 'title': "Algorithm", + 'xlabel': 'Algorithm Type', + 'bins': 8, + 'xlim': (-0.5, 7.5), + 'xticks': { + 'ticks': range(0, 8), # xticks to change + 'labels': ( + "Unknown", + "Supernova", + "Prescale", + "ADCSimpleWindow", + "HorizontalMuon", + "MichelElectron", + "DBSCAN", + "PlaneCoincidence" + ), + 'rotation': 60, + 'ha': 'right' # Horizontal alignment + } + }, + # TODO: Channel data members should bin on + # the available channels; however, this is + # inconsistent between detectors (APA/CRP). + # Requires loading channel maps. + 'channel_end': { + 'title': "Channel End", + 'xlabel': "Channel Number" + }, + 'channel_peak': { + 'title': "Channel Peak", + 'xlabel': "Channel Number" + }, + 'channel_start': { + 'title': "Channel Start", + 'xlabel': "Channel Number" + }, + 'detid': { + 'title': "Detector ID", + 'xlabel': "Detector IDs" + }, + 'num_tps': { + 'title': "Number of TPs per TA", + 'xlabel': "Number of TPs" + }, + 'time_activity': { + 'title': "Relative Time Activity", + 'xlabel': time_label + }, + 'time_end': { + 'title': "Relative Time End", + 'xlabel': time_label + }, + 'time_peak': { + 'title': "Relative Time Peak", + 'xlabel': time_label + }, + 'time_start': { + 'title': "Relative Time Start", + 'xlabel': time_label + }, + 'type': { + 'title': "Type", + 'xlabel': "Type", + 'bins': 3, + 'xlim': (-0.5, 2.5), + 'xticks': { + 'ticks': (0, 1, 2), # Ticks to change + 'labels': ('Unknown', 'TPC', 'PDS'), + 'rotation': 60, + 'ha': 'right' # Horizontal alignment + } + }, + 'version': { + 'title': "Version", + 'xlabel': "Versions" + } + } + if not no_anomaly: + anomaly_filename = "ta_anomalies.txt" + if not quiet: + print(f"Writing descriptive statistics to {anomaly_filename}.") + if os.path.isfile(anomaly_filename): + # Prepare a new ta_anomaly_summary.txt + os.remove(anomaly_filename) + + with PdfPages("ta_data_member_histograms.pdf") as pdf: + for ta_key in data.ta_data.dtype.names: + if not no_anomaly: + write_summary_stats(data.ta_data[ta_key], anomaly_filename, plot_dict[ta_key]['title']) + if 'time' in ta_key: + time = data.ta_data[ta_key] + if seconds: + time = time * TICK_TO_SEC_SCALE + min_time = np.min(time) # Prefer making the relative time change. + plot_pdf_histogram(time - min_time, plot_dict[ta_key], pdf, linear, log) + continue + plot_pdf_histogram(data.ta_data[ta_key], plot_dict[ta_key], pdf, linear, log) + + # Analysis Plots + plot_pdf_time_delta_histograms(data.ta_data, data.tp_data, pdf, time_label) + + if (not no_displays): + all_event_displays(data.tp_data, data.run_id, data.file_index, seconds) + +if __name__ == "__main__": + main() diff --git a/apps/tc_dump.py b/scripts/tc_dump.py old mode 100644 new mode 100755 similarity index 99% rename from apps/tc_dump.py rename to scripts/tc_dump.py index d52e2ad..c34d530 --- a/apps/tc_dump.py +++ b/scripts/tc_dump.py @@ -1,3 +1,4 @@ +#!/usr/bin/env python """ Display diagnostic information for TCs for a given tpstream file. diff --git a/apps/tp_dump.py b/scripts/tp_dump.py old mode 100644 new mode 100755 similarity index 55% rename from apps/tp_dump.py rename to scripts/tp_dump.py index ed6aa92..2a84c0a --- a/apps/tp_dump.py +++ b/scripts/tp_dump.py @@ -1,3 +1,4 @@ +#!/usr/bin/env python """ Display diagnostic information for TPs in a given tpstream file. @@ -66,194 +67,84 @@ def tp_percent_histogram(tp_data): plt.savefig("percent_total.svg") plt.close() -def plot_channel_histogram(channels, quiet=False): +def plot_adc_integral_vs_peak(tp_data): """ - Plot the TP channel histogram. + Plot the ADC Integral vs ADC Peak. """ - counts, bins = np.histogram(channels, bins=np.arange(0.5, 3072.5, 1)) - total_counts = np.sum(counts) - if (not quiet): - print("High TP Count Channels:", np.where(counts >= 500)) - print("Percentage Counts:", np.where(counts >= 0.01*total_counts)) - - plt.figure(figsize=(6,4)) + plt.figure(figsize=(6, 4), dpi=200) + plt.scatter(tp_data['adc_peak'], tp_data['adc_integral'], c='k', s=2, label='TP') + print("Number of ADC Integrals at Signed 16 Limit:", np.sum(tp_data['adc_integral'] == np.power(2, 15)-1)) + print("Total number of TPs:", len(tp_data['adc_peak'])) + high_integral_locs = np.where(tp_data['adc_integral'] == np.power(2, 15)-1) + plt.scatter(tp_data['adc_peak'][high_integral_locs], tp_data['adc_integral'][high_integral_locs], c='#63ACBE', s=2, marker='+', label=r'$2^{15}-1$') - plt.stairs(counts, bins, fill=True, color='k', label=f'TP Count: {total_counts}') - - plt.title("TP Channel Histogram") - plt.xlabel("Channel") + plt.title("ADC Integral vs ADC Peak") + plt.xlabel("ADC Peak") + plt.ylabel("ADC Integral") plt.legend() - plt.ylim((0, 800)) - - plt.tight_layout() - plt.savefig("tp_channel_histogram.svg") - plt.close() - -def plot_version_histogram(versions, quiet=False): - """ - Plot the TP versions histogram. - - Generally, this will be one value. - """ - plt.figure(figsize=(6,4)) - - plt.hist(versions, color='k') - - plt.title("TP Versions Histogram") - plt.xlabel("Versions") - - plt.tight_layout() - plt.savefig("tp_versions_histogram.svg") - plt.close() - -def plot_type_histogram(types, quiet=False): - """ - Plot the TP types histogram. - """ - plt.figure(figsize=(6,4)) - - plt.hist(types, bins=np.arange(-0.5, 3, 1), range=(0,2), align='mid', color='k') - - plt.title("TP Types Histogram") - plt.xlabel("Types") - plt.xticks([0,1,2], ["Unknown", "TPC", "PDS"]) # Taken from TriggerPrimitive.hpp - - plt.tight_layout() - plt.savefig("tp_types_histogram.svg") - plt.close() - -def plot_time_start_histogram(times, quiet=False): - """ - Plot the TP time starts histogram. - """ - plt.figure(figsize=(6,4)) - - plt.hist(times, color='k') - - plt.title("TP Time Starts Histogram") - plt.xlabel("Time Start (Ticks)") - - plt.tight_layout() - plt.savefig("tp_time_start_histogram.svg") - plt.close() - -def plot_time_peak_histogram(times, quiet=False): - """ - Plot the TP time peaks histogram. - """ - plt.figure(figsize=(6,4)) - - plt.hist(times, color='k') - - plt.title("TP Time Peaks Histogram") - plt.xlabel("Time Peak (Ticks)") - - plt.tight_layout() - plt.savefig("tp_time_peak_histogram.svg") - plt.close() - -def plot_time_over_threshold_histogram(times, quiet=False): - """ - Plot the TP time over threshold histogram. - """ - plt.figure(figsize=(6,4)) - - plt.hist(times, color='k') - - plt.title("TP Time Over Threshold Histogram") - plt.xlabel("Time Over Threshold (Ticks)") - - plt.tight_layout() - plt.savefig("tp_time_over_threshold_histogram.svg") - plt.close() - -def plot_flag_histogram(flags, quiet=False): - """ - Plot the TP flags histogram. - - Generally, something notable. Likely difficult to read as a histogram. - """ - plt.figure(figsize=(6,4)) - - plt.hist(flags, color='k') - - plt.title("TP Flags Histogram") - plt.xlabel("Flags") - - plt.tight_layout() - plt.savefig("tp_flags_histogram.svg") - plt.close() - -def plot_detid_histogram(det_ids, quiet=False): - """ - Plot the TP detector IDs histogram. - """ - plt.figure(figsize=(6,4)) - - plt.hist(det_ids, color='k') - # Uncertain what the values of the det_ids refer to, so they remain as numbers. - - plt.title("TP Detector IDs Histogram") - plt.xlabel("Detector IDs") - plt.tight_layout() - plt.savefig("tp_det_ids_histogram.svg") + plt.savefig("tp_adc_integral_vs_peak.png") # Many scatter plot points makes this a PNG plt.close() -def plot_algorithm_histogram(algorithms, quiet=False): +def plot_pdf_histogram(data, plot_details_dict, pdf, linear=True, log=True): """ - Plot the TP algoritm histogram. - - Generally, this should be a single histogram. + Plot a histogram for the given data to a PdfPage object. """ plt.figure(figsize=(6,4)) + ax = plt.gca() + bins = 100 + + # Custom xticks are for specific typing. Expect to see much + # smaller plots, so only do linear and use less bins. + if 'xticks' in plot_details_dict: + linear = True + log = False + plt.xticks(**plot_details_dict['xticks']) + if 'bins' in plot_details_dict: + bins = plot_details_dict['bins'] + + if linear and log: + ax.hist(data, bins=bins, color='#63ACBE', label='Linear', alpha=0.6) + ax.set_yscale('linear') + + ax2 = ax.twinx() + ax2.hist(data, bins=bins, color='#EE442F', label='Log', alpha=0.6) + ax2.set_yscale('log') + + # Setting the plot order + ax.set_zorder(2) + ax.patch.set_visible(False) + ax2.set_zorder(1) + + handles, labels = ax.get_legend_handles_labels() + handles2, labels2 = ax2.get_legend_handles_labels() + handles = handles + handles2 + labels = labels + labels2 + plt.legend(handles=handles, labels=labels) + else: + plt.hist(data, bins=bins, color='k') + if log: # Default to linear, so only change on log + plt.yscale('log') - plt.hist(algorithms, bins=np.arange(-0.5, 2, 1), range=(0,1), align='mid', color='k') - - plt.title("TP Algorithms Histogram") - plt.xlabel("Algorithm") - plt.xticks([0,1], ["Unknown", "TPCDefault"]) - # Values taken from TriggerPrimitive.hpp - - plt.tight_layout() - plt.savefig("tp_algorithms_histogram.svg") - plt.close() - -def plot_adc_peak_histogram(adc_peaks, quiet=False): - """ - Plot the TP peak histogram. - """ - plt.figure(figsize=(6,4)) - - plt.hist(adc_peaks, color='k') - - plt.title("TP ADC Peaks Histogram") - plt.xlabel("ADC Count") - - plt.tight_layout() - plt.savefig("tp_adc_peaks_histogram.svg") - plt.close() - -def plot_adc_integral_histogram(adc_integrals, quiet=False): - """ - Plot the TP ADC integral histogram. - """ - plt.figure(figsize=(6,4)) - - plt.hist(adc_integrals, color='k') - - plt.title("TP ADC Integrals Histogram") - plt.xlabel("ADC Integral") + plt.title(plot_details_dict['title'] + " Histogram") + ax.set_xlabel(plot_details_dict['xlabel']) + if 'xlim' in plot_details_dict: + plt.xlim(plot_details_dict['xlim']) plt.tight_layout() - plt.savefig("tp_adc_integral_histogram.svg") + pdf.savefig() plt.close() def write_summary_stats(data, filename, title): """ Writes the given summary statistics to 'filename'. """ + # Algorithm, Det ID, etc. are not expected to vary. + # Check first that they don't vary, and move on if so. + if np.all(data == data[0]): + print(f"{title} data member is the same for all TPs. Skipping summary statistics.") + return summary = stats.describe(data) std = np.sqrt(summary.variance) with open(filename, 'a') as out: @@ -346,6 +237,9 @@ def parse(): parser.add_argument("--start-frag", type=int, help="Fragment to start loading from (inclusive); can take negative integers. Default: -10", default=-10) parser.add_argument("--end-frag", type=int, help="Fragment to stop loading at (exclusive); can take negative integers. Default: 0", default=0) parser.add_argument("--no-anomaly", action="store_true", help="Pass to not write 'tp_anomaly_summary.txt'. Default: False.") + parser.add_argument("--seconds", action="store_true", help="Pass to use seconds instead of time ticks. Default: False.") + parser.add_argument("--linear", action="store_true", help="Pass to use linear histogram scaling. Default: plots both linear and log.") + parser.add_argument("--log", action="store_true", help="Pass to use logarithmic histogram scaling. Default: plots both linear and log.") parser.add_argument("--overwrite", action="store_true", help="Overwrite old outputs. Default: False.") return parser.parse_args() @@ -362,6 +256,14 @@ def main(): end_frag = args.end_frag no_anomaly = args.no_anomaly overwrite = args.overwrite + linear = args.linear + log = args.log + seconds = args.seconds + + # User didn't pass either flag, so default to both being true. + if (not linear) and (not log): + linear = True + log = True data = trgtools.TPData(filename, quiet) if end_frag == 0: # Ex: [-10:0] is bad. @@ -393,22 +295,94 @@ def main(): ## Plots with more involved analysis channel_tot(data.tp_data) tp_percent_histogram(data.tp_data) + plot_adc_integral_vs_peak(data.tp_data) ## Basic Plots: Histograms & Box Plots - # For the moment, none of these functions make use of 'quiet'. - plot_adc_integral_histogram(data.tp_data['adc_integral'], quiet) - plot_adc_peak_histogram(data.tp_data['adc_peak'], quiet) - plot_algorithm_histogram(data.tp_data['algorithm'], quiet) - plot_channel_histogram(data.tp_data['channel'], quiet) - plot_detid_histogram(data.tp_data['detid'], quiet) - plot_flag_histogram(data.tp_data['flag'], quiet) - plot_time_over_threshold_histogram(data.tp_data['time_over_threshold'], quiet) - plot_time_peak_histogram(data.tp_data['time_peak'], quiet) - plot_time_start_histogram(data.tp_data['time_start'], quiet) - plot_type_histogram(data.tp_data['type'], quiet) - plot_version_histogram(data.tp_data['version'], quiet) - - plot_summary_stats(data.tp_data, no_anomaly, quiet) + # Dictionary containing unique title, xlabel, and xticks (only some) + time_label = "Time (s)" if seconds else "Time (Ticks)" + + plot_dict = { + 'adc_integral': { + 'title': "ADC Integral", + 'xlabel': "ADC Integral" + }, + 'adc_peak': { + 'title': "ADC Peak", + 'xlabel': "ADC Count" + }, + 'algorithm': { + 'title': "Algorithm", + 'xlabel': 'Algorithm Type', + 'xlim': (-1, 2), + 'xticks': { + 'ticks': (0, 1), + 'labels': ("Unknown", "TPCDefault") + }, + 'bins': (-0.5, 0.5, 1.5) # TODO: Dangerous. Hides values outside of this range. + }, + # TODO: Channel should bin on the available + # channels; however, this is inconsistent + # between detectors (APA/CRP). + # Requires loading channel maps. + 'channel': { + 'title': "Channel", + 'xlabel': "Channel Number" + }, + 'detid': { + 'title': "Detector ID", + 'xlabel': "Detector IDs" + }, + 'flag': { + 'title': "Flag", + 'xlabel': "Flags" + }, + 'time_over_threshold': { + 'title': "Time Over Threshold", + 'xlabel': time_label + }, + 'time_peak': { + 'title': "Relative Time Peak", + 'xlabel': time_label + }, + 'time_start': { + 'title': "Relative Time Start", + 'xlabel': time_label + }, + 'type': { + 'title': "Type", + 'xlabel': "Type", + 'xlim': (-1, 3), + 'xticks': { + 'ticks': (0, 1, 2), + 'labels': ('Unknown', 'TPC', 'PDS') + }, + 'bins': (-0.5, 0.5, 1.5, 2.5) # TODO: Dangerous. Hides values outside of this range. + }, + 'version': { + 'title': "Version", + 'xlabel': "Versions" + } + } + if not no_anomaly: + anomaly_filename = "tp_anomalies.txt" + if not quiet: + print(f"Writing descriptive statistics to {anomaly_filename}.") + if os.path.isfile(anomaly_filename): + # Prepare a new tp_anomaly_summary.txt + os.remove(anomaly_filename) + + with PdfPages("tp_data_member_histograms.pdf") as pdf: + for tp_key in data.tp_data.dtype.names: + if not no_anomaly: + write_summary_stats(data.tp_data[tp_key], anomaly_filename, plot_dict[tp_key]['title']) + if tp_key == 'time_start' or tp_key == 'time_peak': + time = data.tp_data[tp_key] + if seconds: + time = time * TICK_TO_SEC_SCALE + min_time = np.min(time) + plot_pdf_histogram(time - min_time, plot_dict[tp_key], pdf, linear, log) + continue + plot_pdf_histogram(data.tp_data[tp_key], plot_dict[tp_key], pdf, linear, log) if __name__ == "__main__": main()