-
Notifications
You must be signed in to change notification settings - Fork 0
/
calculate_scaling_factor.py
169 lines (147 loc) · 5.84 KB
/
calculate_scaling_factor.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# vim:fenc=utf-8
#
# Copyright 2024 by Kris Alavattam
# Email: [email protected]
# Usage: python calculate_scaling_factor.py \
# --ip_main <no_ip_main_alignments> \
# --ip_spike_in <no_ip_spike_in_alignments> \
# --input_main <no_input_main_alignments> \
# --input_spike_in <no_input_spike_in_alignments>
# Description: This script calculates a spike-in-derived scaling factor for
# ChIP-seq datasets. It requires counts of "main" and spike-in alignments for
# corresponding IP and input samples. The script calculates the ratio of
# spike-in to main alignments for IP and input samples, and then computes a
# final scaling factor by dividing the input sample ratio by the IP sample
# ratio.
#
# Distributed under terms of the MIT license.
import argparse
import sys
def calculate_scaling_factor(ip_main, ip_spike_in, input_main, input_spike_in):
"""
Calculate the spike-in-derived scaling factor based on alignment counts.
This function computes two ratios: the ratio of spike-in to "main"
alignments for the IP sample and the same ratio for the input sample. It
then returns the scaling factor derived by dividing the input sample ratio
by the IP sample ratio.
Args:
ip_main (int): The number of "main" alignments in the IP sample.
ip_spike_in (int): The number of spike-in alignments in the IP sample.
input_main (int): The number of "main" alignments in the input sample.
input_spike_in (int): The number of spike-in alignments in the input
sample.
Returns:
float: The scaling factor calculated as the ratio of input sample
ratio to IP sample ratio.
Raises:
ValueError: If any of the counts are negative.
TypeError: If any of the counts are not integers.
ZeroDivisionError: If any of the main alignment counts are zero.
"""
# Validate that all inputs are integers
for count in [ip_main, ip_spike_in, input_main, input_spike_in]:
if not isinstance(count, int):
raise TypeError(f'Expected integer, got {type(count).__name__}')
# Validate that all inputs are non-negative
if any(
count < 0 for count in [
ip_main, ip_spike_in, input_main, input_spike_in
]
):
raise ValueError('Alignment counts cannot be negative.')
# Check for division by zero
if ip_main == 0 or input_main == 0:
raise ZeroDivisionError(
'Main alignment counts cannot be zero to avoid division by zero.'
)
ratio_ip = ip_spike_in / ip_main
ratio_input = input_spike_in / input_main
# Calculate the scaling factor
return ratio_input / ratio_ip
def main():
"""
Execute the primary control flow of the script.
This function facilitates the calculation of a spike-in-derived scaling
factor for ChIP-seq samples. It parses command-line arguments for the
number of alignments from both IP and input samples, calculates ratios of
spike-in to main alignments for both sample types, and then computes a
final scaling factor by dividing the input sample ratio by the IP sample
ratio.
Command-line Arguments:
-ip_m/--ip_main (int): The number of "main" alignments for the ChIP-seq
IP sample.
-ip_s/--ip_spike_in (int): The number of spike-in alignments for the
ChIP-seq IP sample.
-in_m/--input_main (int): The number of "main" alignments for the
corresponding ChIP-seq input sample.
-in_s/--input_spike_in (int): The number of spike-in alignments for the
corresponding ChIP-seq input sample.
The function will terminate early and print an error message if any of the
main alignment counts are zero, preventing division by zero errors.
Returns:
None, but outputs the final scaling factor to standard output, which is
the ratio of the input sample spike-in to main alignments ratio divided
by the IP sample spike-in to main alignments ratio.
"""
# Define arguments
parser = argparse.ArgumentParser(description=(
'Calculate spike-in-derived scaling factor for a ChIP-seq sample with '
'IP and input data.'
))
parser.add_argument(
'-ip_m',
'--ip_main',
type=int,
help='Number of "main" alignments for the ChIP-seq IP sample.'
)
parser.add_argument(
'-ip_s',
'--ip_spike_in',
type=int,
help='Number of spike-in alignments for the ChIP-seq IP sample.'
)
parser.add_argument(
'-in_m',
'--input_main',
type=int,
help=(
'Number of "main" alignments for the corresponding ChIP-seq input '
'sample.'
)
)
parser.add_argument(
'-in_s',
'--input_spike_in',
type=int,
help=(
'Number of spike-in alignments for the corresponding ChIP-seq '
'input sample.'
)
)
# Display help and exit if no arguments were provided
if len(sys.argv) == 1:
parser.print_help(sys.stderr)
sys.exit(1)
args = parser.parse_args()
# Calculate the scaling factor, handling exceptions as necessary
try:
result = calculate_scaling_factor(
args.ip_main,
args.ip_spike_in,
args.input_main,
args.input_spike_in
)
print(f'{result}')
except ValueError as e:
print(f'Error: {e}', file=sys.stderr)
sys.exit(1)
except TypeError as e:
print(f'Error: {e}', file=sys.stderr)
sys.exit(1)
except ZeroDivisionError as e:
print(f'Error: {e}', file=sys.stderr)
sys.exit(1)
if __name__ == "__main__":
main()