-
Notifications
You must be signed in to change notification settings - Fork 10
/
BLsmooth.py
executable file
·228 lines (201 loc) · 10.1 KB
/
BLsmooth.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Copyright (C) 2020 - Francesco de Gasperin, Henrik Edler
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
# Usage: BLsmooth.py vis.MS [--options]
# Load a MS, smooth visibilities according to the baseline length,
# i.e. shorter BLs are averaged more, and write a new column to the MS
import os, sys
import optparse
import logging
import numpy as np
from scipy.ndimage import gaussian_filter1d as gfilter
import casacore.tables as pt
from lib_multiproc import multiprocManager
logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s: %(message)s')
logging.info('BL-based smoother - Francesco de Gasperin, Henrik Edler')
def addcol(ms, incol, outcol):
""" Add a new column to a MS. """
if outcol not in ms.colnames():
logging.info('Adding column: '+outcol)
coldmi = ms.getdminfo(incol)
coldmi['NAME'] = outcol
ms.addcols(pt.makecoldesc(outcol, ms.getcoldesc(incol)), coldmi)
if (outcol != incol):
# copy columns val
logging.info('Set '+outcol+'='+incol)
pt.taql("UPDATE $ms SET "+outcol+"="+incol)
def smooth_baseline(in_bl, data, weights, std_t, std_f, outQueue=None):
"""
Smooth one baseline.
Multiply every element of the data by the weights, convolve both the
scaled data and the weights, and then divide the convolved data by the
convolved weights (translating flagged data into weight=0).
That's basically the equivalent of a running weighted average with a
Gaussian window function.
see also: https://stackoverflow.com/questions/51728224/gaussian-filtering-image-with-a-cut-off-value-in-python
Parameters
----------
in_bl: boolean ndarray
Mask for everything not in this baseline.
data: ndarray
Data for one baseline that is to be smoothed.
weights: ndarray
Weight input.
std_t: float
Standard deviation in samples to use for time-smoothing.
std_f: float
Standard deviation in samples to use for freq-smoothing.
Returns
-------
in_bl: boolean ndarray
Mask for everything not in this baseline.
data: ndarray.
Smoothed ata for one baseline.
weights: ndarray
Weight output.
"""
data = np.nan_to_num(data * weights) # set bad data to 0 so nans don't propagate
if np.isnan(data).all():
return in_bl, data, weights # flagged ants
# smear weighted data and weights
if options.onlyamp: # smooth only amplitudes
dataAMP, dataPH = np.abs(data), np.angle(data)
if not options.notime:
dataAMP = gfilter(dataAMP, std_t, axis=0, truncate=3)
if not options.nofreq:
dataAMP = gfilter(dataAMP, std_f, axis=1, truncate=3)
data = dataAMP * (np.cos(dataPH) + 1j * np.sin(dataPH)) # recreate data
else:
dataR, dataI = np.real(data), np.imag(data)
if not options.notime:
dataR = gfilter(dataR, std_t, axis=0, truncate=3)
dataI = gfilter(dataI, std_t, axis=0, truncate=3)
if not options.nofreq:
dataR = gfilter(dataR, std_f, axis=1, truncate=3)
dataI = gfilter(dataI, std_f, axis=1, truncate=3)
data = dataR + 1j*dataI # recreate data
if not options.notime:
weights = gfilter(weights, std_t, axis=0, truncate=3)
if not options.nofreq:
weights = gfilter(weights, std_f, axis=1, truncate=3)
data[(weights != 0)] /= weights[(weights != 0)] # avoid divbyzero
# print( np.count_nonzero(data[~flags[in_bl]]), np.count_nonzero(data[flags[in_bl]]), 100*np.count_nonzero(data[flags[in_bl]])/np.count_nonzero(data))
# print( "NANs in flagged data: ", np.count_nonzero(np.isnan(data[flags[in_bl]])))
# print( "NANs in unflagged data: ", np.count_nonzero(np.isnan(data[~flags[in_bl]])))
# print( "NANs in weights: ", np.count_nonzero(np.isnan(weights)))
outQueue.put([in_bl, data, weights])
opt = optparse.OptionParser(usage="%prog [options] MS", version="%prog 3.0")
opt.add_option('-f', '--ionfactor', help='Gives an indication on how strong is the ionosphere [default: 0.01]', type='float', default=0.01)
opt.add_option('-s', '--bscalefactor', help='Gives an indication on how the smoothing varies with BL-lenght [default: 1.0]', type='float', default=1.0)
opt.add_option('-i', '--incol', help='Column name to smooth [default: DATA]', type='string', default='DATA')
opt.add_option('-o', '--outcol', help='Output column [default: SMOOTHED_DATA]', type="string", default='SMOOTHED_DATA')
opt.add_option('-w', '--weight', help='Save the newly computed WEIGHT_SPECTRUM, this action permanently modify the MS! [default: False]', action="store_true", default=False)
opt.add_option('-r', '--restore', help='If WEIGHT_SPECTRUM_ORIG exists then restore it before smoothing [default: False]', action="store_true", default=False)
opt.add_option('-b', '--nobackup', help='Do not backup the old WEIGHT_SPECTRUM in WEIGHT_SPECTRUM_ORIG [default: do backup if -w]', action="store_true", default=False)
opt.add_option('-a', '--onlyamp', help='Smooth only amplitudes [default: smooth real/imag]', action="store_true", default=False)
opt.add_option('-t', '--notime', help='Do not do smoothing in time [default: False]', action="store_true", default=False)
opt.add_option('-q', '--nofreq', help='Do not do smoothing in frequency [default: False]', action="store_true", default=False)
opt.add_option('-c', '--chunks', help='Split the I/O in n chunks. If you run out of memory, set this to a value > 2.', default=8, type='int')
opt.add_option('-n', '--ncpu', help='Number of cores', default=4, type='int')
(options, msfile) = opt.parse_args()
if msfile == []:
opt.print_help()
sys.exit(0)
msfile = msfile[0]
if not os.path.exists(msfile):
logging.error("Cannot find MS file {}.".format(msfile))
sys.exit(1)
# open input/output MS
ms = pt.table(msfile, readonly=False, ack=False)
with pt.table(msfile + '::SPECTRAL_WINDOW', ack=False) as freqtab:
freq = freqtab.getcol('REF_FREQUENCY')[0]
freqpersample = np.mean(freqtab.getcol('RESOLUTION'))
timepersample = ms.getcell('INTERVAL',0)
# get info on all baselines
with pt.taql("SELECT ANTENNA1,ANTENNA2,sqrt(sumsqr(UVW)),GCOUNT() FROM $ms GROUPBY ANTENNA1,ANTENNA2") as BL:
ants1, ants2 = BL.getcol('ANTENNA1'), BL.getcol('ANTENNA2')
dists = BL.getcol('Col_3')/1e3 # baseleline length in km
n_t = BL.getcol('Col_4')[0] # number of timesteps
n_bl = len(ants1)
# check if ms is time-ordered
times = ms.getcol('TIME_CENTROID')
if not all(np.diff(times) >= 0):
logging.critical('This code cannot handle MS that are not time-sorted.')
sys.exit(1)
del times
# create column to smooth
addcol(ms, options.incol, options.outcol)
# restore WEIGHT_SPECTRUM
if 'WEIGHT_SPECTRUM_ORIG' in ms.colnames() and options.restore:
addcol(ms, 'WEIGHT_SPECTRUM_ORIG', 'WEIGHT_SPECTRUM')
# backup WEIGHT_SPECTRUM
elif options.weight and not options.nobackup:
addcol(ms, 'WEIGHT_SPECTRUM', 'WEIGHT_SPECTRUM_ORIG')
# Iterate over chunks of baselines
for c, idx in enumerate(np.array_split(np.arange(n_bl), options.chunks)):
logging.debug('### Fetching chunk {}/{}'.format(c+1,options.chunks))
# get input data for this chunk
ants1_chunk, ants2_chunk = ants1[idx], ants2[idx]
chunk = pt.taql("SELECT FROM $ms WHERE any(ANTENNA1== $ants1_chunk && ANTENNA2==$ants2_chunk)")
data_chunk = chunk.getcol(options.incol)
weights_chunk = chunk.getcol('WEIGHT_SPECTRUM')
# flag NaNs and set weights to zero
flags = chunk.getcol('FLAG')
flags[np.isnan(data_chunk)] = True
weights_chunk[flags] = 0
del flags
# prepare output cols
smoothed_data = data_chunk.copy()
if options.weight:
new_weights = np.zeros_like(weights_chunk)
# Iterate on each baseline in this chunk
mpm = multiprocManager(options.ncpu, smooth_baseline)
for i_chunk, (ant1, ant2, dist) in enumerate(zip(ants1_chunk, ants2_chunk, dists[idx])):
if ant1 == ant2:
continue # skip autocorrelations
elif np.isnan(dist):
continue # fix for missing antennas
logging.debug('Working on baseline: {} - {} (dist = {:.2f}km)'.format(ant1, ant2, dist))
in_bl = slice(i_chunk, -1, len(ants1_chunk)) # All times for 1 BL
data, weights= data_chunk[in_bl], weights_chunk[in_bl]
std_t = options.ionfactor * (25.e3 / dist) ** options.bscalefactor * (freq / 60.e6) # in sec
std_t = std_t / timepersample # in samples
# TODO: for freq this is hardcoded, it should be thought better
# However, the limitation is probably smearing here
std_f = 1e6 / dist # Hz
std_f = std_f / freqpersample # in samples
logging.debug("-Time: sig={:.1f} samples ({:.1f}s) -Freq: sig={:.1f} samples ({:.2f}MHz)".format(
std_t, timepersample * std_t, std_f, freqpersample * std_f / 1e6))
if std_t < 0.5: continue # avoid very small smoothing and flagged ants
# fill queue
mpm.put([in_bl, data, weights, std_t, std_f])
mpm.wait() # run queue
# reconstruct chunk column
for in_bl, data, weights in mpm.get():
smoothed_data[in_bl] = data
if options.weight:
new_weights[in_bl] = weights
# write to ms
logging.info('Writing %s column.' % options.outcol)
chunk.putcol(options.outcol, smoothed_data)
if options.weight:
logging.warning('Writing WEIGHT_SPECTRUM column.')
chunk.putcol('WEIGHT_SPECTRUM', new_weights)
chunk.close()
ms.close()
logging.info("Done.")