xradio 0.0.55__py3-none-any.whl → 0.0.58__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- xradio/__init__.py +2 -2
- xradio/_utils/_casacore/casacore_from_casatools.py +1001 -0
- xradio/_utils/_casacore/tables.py +6 -1
- xradio/_utils/coord_math.py +22 -23
- xradio/_utils/dict_helpers.py +76 -11
- xradio/_utils/schema.py +5 -2
- xradio/_utils/zarr/common.py +1 -73
- xradio/image/_util/_casacore/common.py +11 -3
- xradio/image/_util/_casacore/xds_from_casacore.py +59 -35
- xradio/image/_util/_casacore/xds_to_casacore.py +47 -16
- xradio/image/_util/_fits/xds_from_fits.py +172 -77
- xradio/image/_util/casacore.py +9 -4
- xradio/image/_util/common.py +4 -4
- xradio/image/_util/image_factory.py +8 -8
- xradio/image/image.py +45 -5
- xradio/measurement_set/__init__.py +19 -9
- xradio/measurement_set/_utils/__init__.py +1 -3
- xradio/measurement_set/_utils/_msv2/__init__.py +0 -0
- xradio/measurement_set/_utils/_msv2/_tables/read.py +35 -90
- xradio/measurement_set/_utils/_msv2/_tables/read_main_table.py +6 -686
- xradio/measurement_set/_utils/_msv2/_tables/table_query.py +13 -3
- xradio/measurement_set/_utils/_msv2/conversion.py +129 -145
- xradio/measurement_set/_utils/_msv2/create_antenna_xds.py +9 -16
- xradio/measurement_set/_utils/_msv2/create_field_and_source_xds.py +125 -221
- xradio/measurement_set/_utils/_msv2/msv2_to_msv4_meta.py +1 -2
- xradio/measurement_set/_utils/_msv2/msv4_info_dicts.py +13 -8
- xradio/measurement_set/_utils/_msv2/msv4_sub_xdss.py +27 -72
- xradio/measurement_set/_utils/_msv2/partition_queries.py +5 -262
- xradio/measurement_set/_utils/_msv2/subtables.py +0 -107
- xradio/measurement_set/_utils/_utils/interpolate.py +60 -0
- xradio/measurement_set/_utils/_zarr/encoding.py +2 -7
- xradio/measurement_set/convert_msv2_to_processing_set.py +0 -2
- xradio/measurement_set/load_processing_set.py +2 -2
- xradio/measurement_set/measurement_set_xdt.py +14 -14
- xradio/measurement_set/open_processing_set.py +1 -3
- xradio/measurement_set/processing_set_xdt.py +41 -835
- xradio/measurement_set/schema.py +96 -123
- xradio/schema/check.py +91 -97
- xradio/schema/dataclass.py +159 -22
- xradio/schema/export.py +99 -0
- xradio/schema/metamodel.py +51 -16
- xradio/schema/typing.py +5 -5
- {xradio-0.0.55.dist-info → xradio-0.0.58.dist-info}/METADATA +43 -11
- xradio-0.0.58.dist-info/RECORD +65 -0
- {xradio-0.0.55.dist-info → xradio-0.0.58.dist-info}/WHEEL +1 -1
- xradio/image/_util/fits.py +0 -13
- xradio/measurement_set/_utils/_msv2/_tables/load.py +0 -63
- xradio/measurement_set/_utils/_msv2/_tables/load_main_table.py +0 -487
- xradio/measurement_set/_utils/_msv2/_tables/read_subtables.py +0 -395
- xradio/measurement_set/_utils/_msv2/_tables/write.py +0 -320
- xradio/measurement_set/_utils/_msv2/_tables/write_exp_api.py +0 -385
- xradio/measurement_set/_utils/_msv2/chunks.py +0 -115
- xradio/measurement_set/_utils/_msv2/descr.py +0 -165
- xradio/measurement_set/_utils/_msv2/msv2_msv3.py +0 -7
- xradio/measurement_set/_utils/_msv2/partitions.py +0 -392
- xradio/measurement_set/_utils/_utils/cds.py +0 -40
- xradio/measurement_set/_utils/_utils/xds_helper.py +0 -404
- xradio/measurement_set/_utils/_zarr/read.py +0 -263
- xradio/measurement_set/_utils/_zarr/write.py +0 -329
- xradio/measurement_set/_utils/msv2.py +0 -106
- xradio/measurement_set/_utils/zarr.py +0 -133
- xradio-0.0.55.dist-info/RECORD +0 -77
- {xradio-0.0.55.dist-info → xradio-0.0.58.dist-info}/licenses/LICENSE.txt +0 -0
- {xradio-0.0.55.dist-info → xradio-0.0.58.dist-info}/top_level.txt +0 -0
|
@@ -1,385 +0,0 @@
|
|
|
1
|
-
import os, time
|
|
2
|
-
from typing import List, Optional, Union
|
|
3
|
-
|
|
4
|
-
import dask
|
|
5
|
-
import numpy as np
|
|
6
|
-
import xarray as xr
|
|
7
|
-
|
|
8
|
-
from ..._utils.xds_helper import flatten_xds, calc_optimal_ms_chunk_shape
|
|
9
|
-
from .write import write_generic_table, write_main_table_slice
|
|
10
|
-
from .write import create_table, revert_time
|
|
11
|
-
|
|
12
|
-
from casacore import tables
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
# TODO: this should be consolidated with the equivalent in read_main_table,
|
|
16
|
-
# if we keep this mapping
|
|
17
|
-
rename_to_msv2_cols = {
|
|
18
|
-
"antenna1_id": "ANTENNA1",
|
|
19
|
-
"antenna2_id": "ANTENNA2",
|
|
20
|
-
"feed1_id": "FEED1",
|
|
21
|
-
"feed2_id": "FEED2",
|
|
22
|
-
# optional cols:
|
|
23
|
-
# "WEIGHT": "WEIGHT_SPECTRUM",
|
|
24
|
-
"VIS_CORRECTED": "CORRECTED_DATA",
|
|
25
|
-
"VIS": "DATA",
|
|
26
|
-
"VIS_MODEL": "MODEL_DATA",
|
|
27
|
-
"AUTOCORR": "FLOAT_DATA",
|
|
28
|
-
}
|
|
29
|
-
# cols added in xds not in MSv2
|
|
30
|
-
cols_not_in_msv2 = ["baseline_ant1_id", "baseline_ant2_id"]
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
def cols_from_xds_to_ms(cols: List[str]) -> List[str]:
|
|
34
|
-
"""
|
|
35
|
-
Translates between lowercase/uppercase convention
|
|
36
|
-
Rename some MS_colum_names <-> xds_data_var_names
|
|
37
|
-
Excludes the pointing_ vars that are in the xds but should not be written to MS
|
|
38
|
-
"""
|
|
39
|
-
return {
|
|
40
|
-
rename_to_msv2_cols.get(col, col).upper(): col
|
|
41
|
-
for col in cols
|
|
42
|
-
if (col and col not in cols_not_in_msv2 and not col.startswith("pointing_"))
|
|
43
|
-
}
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
def write_ms(
|
|
47
|
-
mxds: xr.Dataset,
|
|
48
|
-
outfile: str,
|
|
49
|
-
infile: str = None,
|
|
50
|
-
subtables: bool = False,
|
|
51
|
-
modcols: Union[List[str], None] = None,
|
|
52
|
-
verbose: bool = False,
|
|
53
|
-
execute: bool = True,
|
|
54
|
-
) -> Optional[list]:
|
|
55
|
-
"""
|
|
56
|
-
Write ms format xds contents back to casacore MS (CTDS - casacore Table Data System) format on disk
|
|
57
|
-
|
|
58
|
-
Parameters
|
|
59
|
-
----------
|
|
60
|
-
mxds : xr.Dataset,
|
|
61
|
-
Source multi-xarray dataset (originally created by read_ms)
|
|
62
|
-
outfile : str
|
|
63
|
-
Destination filename
|
|
64
|
-
infile : Union[str, None] (Default value = None)
|
|
65
|
-
Source filename to copy subtables from. Generally faster than reading/writing through mxds via the subtables parameter. Default None
|
|
66
|
-
does not copy subtables to output.
|
|
67
|
-
subtables : bool (Default value = False)
|
|
68
|
-
Also write subtables from mxds. Default of False only writes mxds attributes that begin with xdsN to the MS main table.
|
|
69
|
-
Setting to True will write all other mxds attributes to subtables of the main table. This is probably going to be SLOW!
|
|
70
|
-
Use infile instead whenever possible.
|
|
71
|
-
modcols : Union[List[str], None] (Default value = None)
|
|
72
|
-
List of strings indicating what column(s) were modified (aka xds data_vars). Different logic can be applied to speed up processing when
|
|
73
|
-
a data_var has not been modified from the input. Default None assumes everything has been modified (SLOW)
|
|
74
|
-
verbose : bool (Default value = False)
|
|
75
|
-
Whether or not to print output progress. Since writes will typically execute the DAG, if something is
|
|
76
|
-
going to go wrong, it will be here. Default False
|
|
77
|
-
execute : bool (Default value = True)
|
|
78
|
-
Whether or not to actually execute the DAG, or just return it with write steps appended. Default True will execute it
|
|
79
|
-
|
|
80
|
-
Returns
|
|
81
|
-
-------
|
|
82
|
-
Optional[list]
|
|
83
|
-
delayed write functions
|
|
84
|
-
"""
|
|
85
|
-
outfile = os.path.expanduser(outfile)
|
|
86
|
-
if verbose:
|
|
87
|
-
print("initializing output...")
|
|
88
|
-
start = time.time()
|
|
89
|
-
|
|
90
|
-
xds_list = [flatten_xds(xds) for _key, xds in mxds.partitions.items()]
|
|
91
|
-
|
|
92
|
-
cols = cols_from_xds_to_ms(
|
|
93
|
-
list(set([dv for dx in xds_list for dv in dx.data_vars]))
|
|
94
|
-
)
|
|
95
|
-
if modcols is None:
|
|
96
|
-
modcols = cols
|
|
97
|
-
|
|
98
|
-
# create an empty main table with enough space for all desired xds partitions
|
|
99
|
-
# the first selected xds partition will be passed to create_table to provide a definition of columns and table keywords
|
|
100
|
-
# we first need to add in additional keywords for the selected subtables that will be written as well
|
|
101
|
-
max_rows = np.sum([dx.row.shape[0] for dx in xds_list])
|
|
102
|
-
create_table(
|
|
103
|
-
outfile, xds_list[0], max_rows=max_rows, infile=infile, cols=cols, generic=False
|
|
104
|
-
)
|
|
105
|
-
|
|
106
|
-
# start a list of dask delayed writes to disk (to be executed later)
|
|
107
|
-
# the SPECTRAL_WINDOW, POLARIZATION, and DATA_DESCRIPTION tables must always be present and will always be written
|
|
108
|
-
delayed_writes = [
|
|
109
|
-
dask.delayed(write_generic_table)(
|
|
110
|
-
mxds.metainfo["spectral_window"], outfile, "SPECTRAL_WINDOW", cols=None
|
|
111
|
-
)
|
|
112
|
-
]
|
|
113
|
-
delayed_writes += [
|
|
114
|
-
dask.delayed(write_generic_table)(
|
|
115
|
-
mxds.metainfo["polarization"], outfile, "POLARIZATION", cols=None
|
|
116
|
-
)
|
|
117
|
-
]
|
|
118
|
-
# should data_description be kept somewhere (in attrs?) or rebuilt?
|
|
119
|
-
# delayed_writes += [
|
|
120
|
-
# dask.delayed(write_generic_table)(
|
|
121
|
-
# mxds.metainfo["data_description"], outfile, "DATA_DESCRIPTION", cols=None
|
|
122
|
-
# )
|
|
123
|
-
# ]
|
|
124
|
-
if subtables: # also write the rest of the subtables
|
|
125
|
-
for subtable in list(mxds.attrs.keys()):
|
|
126
|
-
if (
|
|
127
|
-
subtable.startswith("xds")
|
|
128
|
-
or (subtable in ["spectral_window", "polarization", "data_description"])
|
|
129
|
-
or not isinstance(subtable, xr.Dataset)
|
|
130
|
-
):
|
|
131
|
-
continue
|
|
132
|
-
|
|
133
|
-
if verbose:
|
|
134
|
-
print("writing subtable %s..." % subtable)
|
|
135
|
-
delayed_writes += [
|
|
136
|
-
dask.delayed(write_generic_table)(
|
|
137
|
-
mxds.attrs[subtable], outfile, subtable, cols=None
|
|
138
|
-
)
|
|
139
|
-
]
|
|
140
|
-
|
|
141
|
-
ddi_row_start = 0 # output rows will be ordered by DDI
|
|
142
|
-
for xds in xds_list:
|
|
143
|
-
txds = xds.copy().unify_chunks()
|
|
144
|
-
# TODO: carry over or rebuild?
|
|
145
|
-
ddi = 0 # txds.data_desc_id[:1].values[0]
|
|
146
|
-
|
|
147
|
-
# serial write entire DDI column first so subsequent delayed writes can find their spot
|
|
148
|
-
if verbose:
|
|
149
|
-
print("setting up DDI %i..." % ddi)
|
|
150
|
-
|
|
151
|
-
# write each chunk of each modified data_var, triggering the DAG along the way
|
|
152
|
-
for col in modcols:
|
|
153
|
-
if col not in txds:
|
|
154
|
-
continue # this can happen with bad_cols, should still be created in create_table()
|
|
155
|
-
|
|
156
|
-
if col in cols_not_in_msv2:
|
|
157
|
-
continue
|
|
158
|
-
|
|
159
|
-
chunks = txds[col].chunks
|
|
160
|
-
dims = txds[col].dims
|
|
161
|
-
for d0 in range(len(chunks[0])):
|
|
162
|
-
d0start = ([0] + list(np.cumsum(chunks[0][:-1])))[d0]
|
|
163
|
-
|
|
164
|
-
for d1 in range(len(chunks[1]) if len(chunks) > 1 else 1):
|
|
165
|
-
d1start = (
|
|
166
|
-
([0] + list(np.cumsum(chunks[1][:-1])))[d1]
|
|
167
|
-
if len(chunks) > 1
|
|
168
|
-
else 0
|
|
169
|
-
)
|
|
170
|
-
|
|
171
|
-
for d2 in range(len(chunks[2]) if len(chunks) > 2 else 1):
|
|
172
|
-
d2start = (
|
|
173
|
-
([0] + list(np.cumsum(chunks[2][:-1])))[d2]
|
|
174
|
-
if len(chunks) > 2
|
|
175
|
-
else 0
|
|
176
|
-
)
|
|
177
|
-
|
|
178
|
-
starts = [d0start, d1start, d2start]
|
|
179
|
-
lengths = [
|
|
180
|
-
chunks[0][d0],
|
|
181
|
-
(chunks[1][d1] if len(chunks) > 1 else 0),
|
|
182
|
-
(chunks[2][d2] if len(chunks) > 2 else 0),
|
|
183
|
-
]
|
|
184
|
-
slices = [
|
|
185
|
-
slice(starts[0], starts[0] + lengths[0]),
|
|
186
|
-
slice(starts[1], starts[1] + lengths[1]),
|
|
187
|
-
slice(starts[2], starts[2] + lengths[2]),
|
|
188
|
-
]
|
|
189
|
-
txda = txds[col].isel(
|
|
190
|
-
dict(zip(dims, slices)), missing_dims="ignore"
|
|
191
|
-
)
|
|
192
|
-
starts[0] = starts[0] + ddi_row_start # offset to end of table
|
|
193
|
-
delayed_writes += [
|
|
194
|
-
dask.delayed(write_main_table_slice)(
|
|
195
|
-
txda,
|
|
196
|
-
outfile,
|
|
197
|
-
ddi=ddi,
|
|
198
|
-
col=col,
|
|
199
|
-
full_shape=txds[col].shape[1:],
|
|
200
|
-
starts=starts,
|
|
201
|
-
)
|
|
202
|
-
]
|
|
203
|
-
|
|
204
|
-
# now write remaining data_vars from the xds that weren't modified
|
|
205
|
-
# this can be done faster by collapsing the chunking to maximum size (minimum #) possible
|
|
206
|
-
max_chunk_size = np.prod(
|
|
207
|
-
[txds.chunks[kk][0] for kk in txds.chunks if kk in ["row", "freq", "pol"]]
|
|
208
|
-
)
|
|
209
|
-
for col in list(np.setdiff1d(list(cols), modcols)):
|
|
210
|
-
if col not in txds:
|
|
211
|
-
continue # this can happen with bad_cols, should still be created in create_table()
|
|
212
|
-
|
|
213
|
-
if col in cols_not_in_msv2:
|
|
214
|
-
continue
|
|
215
|
-
|
|
216
|
-
col_chunk_size = np.prod([kk[0] for kk in txds[col].chunks])
|
|
217
|
-
if max_chunk_size <= 0:
|
|
218
|
-
max_chunk_size = 19200
|
|
219
|
-
if col_chunk_size <= 0:
|
|
220
|
-
col_rows = max_chunk_size
|
|
221
|
-
else:
|
|
222
|
-
col_rows = (
|
|
223
|
-
int(np.ceil(max_chunk_size / col_chunk_size))
|
|
224
|
-
* txds[col].chunks[0][0]
|
|
225
|
-
)
|
|
226
|
-
for rr in range(0, txds[col].row.shape[0], col_rows):
|
|
227
|
-
txda = txds[col].isel(row=slice(rr, rr + col_rows))
|
|
228
|
-
delayed_writes += [
|
|
229
|
-
dask.delayed(write_main_table_slice)(
|
|
230
|
-
txda,
|
|
231
|
-
outfile,
|
|
232
|
-
ddi=ddi,
|
|
233
|
-
col=rename_to_msv2_cols.get(col, col).upper(),
|
|
234
|
-
full_shape=txda.shape[1:],
|
|
235
|
-
starts=(rr + ddi_row_start,) + (0,) * (len(txda.shape) - 1),
|
|
236
|
-
)
|
|
237
|
-
]
|
|
238
|
-
|
|
239
|
-
ddi_row_start += txds.row.shape[0] # next xds will be appended after this one
|
|
240
|
-
|
|
241
|
-
if execute:
|
|
242
|
-
if verbose:
|
|
243
|
-
print("triggering DAG...")
|
|
244
|
-
zs = dask.compute(delayed_writes)
|
|
245
|
-
if verbose:
|
|
246
|
-
print(
|
|
247
|
-
"execution time %0.2f sec. Compute result len: %d"
|
|
248
|
-
% ((time.time() - start), len(zs))
|
|
249
|
-
)
|
|
250
|
-
else:
|
|
251
|
-
if verbose:
|
|
252
|
-
print("returning delayed task list")
|
|
253
|
-
return delayed_writes
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
def write_ms_serial(
|
|
257
|
-
mxds: xr.Dataset,
|
|
258
|
-
outfile: str,
|
|
259
|
-
infile: str = None,
|
|
260
|
-
subtables: bool = False,
|
|
261
|
-
verbose: bool = False,
|
|
262
|
-
execute: bool = True,
|
|
263
|
-
memory_available_in_bytes: int = 500000000000,
|
|
264
|
-
):
|
|
265
|
-
"""
|
|
266
|
-
Write ms format xds contents back to casacore table format on disk
|
|
267
|
-
|
|
268
|
-
Parameters
|
|
269
|
-
----------
|
|
270
|
-
mxds : xr.Dataset
|
|
271
|
-
Source multi-xarray dataset (originally created by read_ms)
|
|
272
|
-
outfile : str
|
|
273
|
-
Destination filename
|
|
274
|
-
infile : str (Default value = None)
|
|
275
|
-
Source filename to copy subtables from. Generally faster than reading/writing through mxds via the subtables parameter. Default None
|
|
276
|
-
does not copy subtables to output.
|
|
277
|
-
subtables : bool (Default value = False)
|
|
278
|
-
Also write subtables from mxds. Default of False only writes mxds attributes that begin with xdsN to the MS main table.
|
|
279
|
-
Setting to True will write all other mxds attributes to subtables of the main table. This is probably going to be SLOW!
|
|
280
|
-
Use infile instead whenever possible.
|
|
281
|
-
verbose : bool (Default value = False)
|
|
282
|
-
Whether or not to print output progress. Since writes will typically execute the DAG, if something is
|
|
283
|
-
going to go wrong, it will be here. Default False
|
|
284
|
-
|
|
285
|
-
execute : bool (Default value = True)
|
|
286
|
-
Whether or not to actually execute the DAG, or just return it with write steps appended. Default True will execute it
|
|
287
|
-
memory_available_in_bytes : (Default value = 500000000000)
|
|
288
|
-
|
|
289
|
-
Returns
|
|
290
|
-
-------
|
|
291
|
-
|
|
292
|
-
"""
|
|
293
|
-
|
|
294
|
-
print("*********************")
|
|
295
|
-
outfile = os.path.expanduser(outfile)
|
|
296
|
-
if verbose:
|
|
297
|
-
print("initializing output...")
|
|
298
|
-
# start = time.time()
|
|
299
|
-
|
|
300
|
-
xds_list = [flatten_xds(xds) for _key, xds in mxds.partitions.items()]
|
|
301
|
-
cols = list(set([dv for dx in xds_list for dv in dx.data_vars]))
|
|
302
|
-
cols = cols_from_xds_to_ms(list(np.atleast_1d(cols)))
|
|
303
|
-
|
|
304
|
-
# create an empty main table with enough space for all desired xds partitions
|
|
305
|
-
# the first selected xds partition will be passed to create_table to provide a definition of columns and table keywords
|
|
306
|
-
# we first need to add in additional keywords for the selected subtables that will be written as well
|
|
307
|
-
max_rows = np.sum([dx.row.shape[0] for dx in xds_list])
|
|
308
|
-
create_table(
|
|
309
|
-
outfile, xds_list[0], max_rows=max_rows, infile=infile, cols=cols, generic=False
|
|
310
|
-
)
|
|
311
|
-
|
|
312
|
-
# start a list of dask delayed writes to disk (to be executed later)
|
|
313
|
-
# the SPECTRAL_WINDOW, POLARIZATION, and DATA_DESCRIPTION tables must always be present and will always be written
|
|
314
|
-
write_generic_table(
|
|
315
|
-
mxds.metainfo["spectral_window"], outfile, "SPECTRAL_WINDOW", cols=None
|
|
316
|
-
)
|
|
317
|
-
write_generic_table(
|
|
318
|
-
mxds.metainfo["polarization"], outfile, "POLARIZATION", cols=None
|
|
319
|
-
)
|
|
320
|
-
# should data_description be kept somewhere (in attrs?) or rebuilt?
|
|
321
|
-
# write_generic_table(mxds.metainfo.data_description, outfile, "DATA_DESCRIPTION", cols=None)
|
|
322
|
-
|
|
323
|
-
if subtables: # also write the rest of the subtables
|
|
324
|
-
# for subtable in list(mxds.attrs.keys()):
|
|
325
|
-
#'OBSERVATION','STATE'
|
|
326
|
-
# ['FEED','OBSERVATION','FIELD','ANTENNA','HISTORY','STATE']
|
|
327
|
-
# ['FEED','FIELD','ANTENNA','HISTORY']
|
|
328
|
-
# ,'FIELD','ANTENNA'
|
|
329
|
-
# for subtable in ['OBSERVATION']:
|
|
330
|
-
for subtable in list(mxds.metainfo.keys()):
|
|
331
|
-
if subtable.startswith("xds") or (
|
|
332
|
-
subtable in ["spectral_window", "polarization", "data_description"]
|
|
333
|
-
):
|
|
334
|
-
continue
|
|
335
|
-
if verbose:
|
|
336
|
-
print("writing subtable %s..." % subtable)
|
|
337
|
-
# print(subtable)
|
|
338
|
-
# print(mxds.attrs[subtable])
|
|
339
|
-
try:
|
|
340
|
-
write_generic_table(
|
|
341
|
-
mxds.metainfo[subtable], outfile, subtable.upper(), cols=None
|
|
342
|
-
)
|
|
343
|
-
except (RuntimeError, KeyError) as exc:
|
|
344
|
-
print(f"Exception writing subtable {subtable}: {exc}")
|
|
345
|
-
|
|
346
|
-
part_key0 = next(iter(mxds.partitions))
|
|
347
|
-
vis_data_shape = mxds.partitions[part_key0].VIS.shape
|
|
348
|
-
rows_chunk_size = calc_optimal_ms_chunk_shape(
|
|
349
|
-
memory_available_in_bytes, vis_data_shape, 16, "DATA"
|
|
350
|
-
)
|
|
351
|
-
|
|
352
|
-
# print(rows_chunk_size)
|
|
353
|
-
# rows_chunk_size = 200000000
|
|
354
|
-
# write each chunk of each modified data_var, triggering the DAG along the way
|
|
355
|
-
tbs = tables.table(
|
|
356
|
-
outfile, readonly=False, lockoptions={"option": "permanentwait"}, ack=True
|
|
357
|
-
)
|
|
358
|
-
|
|
359
|
-
start_main = time.time()
|
|
360
|
-
for col, var_name in cols.items():
|
|
361
|
-
xda = mxds.partitions[part_key0][var_name]
|
|
362
|
-
# print(col,xda.dtype)
|
|
363
|
-
|
|
364
|
-
for start_row in np.arange(0, vis_data_shape[0], rows_chunk_size):
|
|
365
|
-
end_row = start_row + rows_chunk_size
|
|
366
|
-
if end_row > vis_data_shape[0]:
|
|
367
|
-
end_row = vis_data_shape[0]
|
|
368
|
-
|
|
369
|
-
# start = time.time()
|
|
370
|
-
values = xda[start_row:end_row,].compute().values
|
|
371
|
-
if xda.dtype == "datetime64[ns]":
|
|
372
|
-
values = revert_time(values)
|
|
373
|
-
# print('1. Time', time.time()-start, values.shape)
|
|
374
|
-
|
|
375
|
-
# start = time.time()
|
|
376
|
-
try:
|
|
377
|
-
tbs.putcol(col, values, start_row, len(values))
|
|
378
|
-
# print('2. Time', time.time()-start)
|
|
379
|
-
except RuntimeError as exc:
|
|
380
|
-
print(f"Exception writing main table column {col}: {exc}")
|
|
381
|
-
|
|
382
|
-
print("3. Time", time.time() - start_main)
|
|
383
|
-
|
|
384
|
-
tbs.unlock()
|
|
385
|
-
tbs.close()
|
|
@@ -1,115 +0,0 @@
|
|
|
1
|
-
from pathlib import Path
|
|
2
|
-
from typing import Dict, Tuple
|
|
3
|
-
|
|
4
|
-
import xarray as xr
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
from .subtables import subt_rename_ids
|
|
8
|
-
from ._tables.read import load_generic_table
|
|
9
|
-
from ._tables.read_subtables import read_delayed_pointing_table
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
def read_spw_ddi_ant_pol(inpath: str) -> Tuple[xr.Dataset]:
|
|
13
|
-
"""
|
|
14
|
-
Reads the four metainfo subtables needed to load data chunks into xdss.
|
|
15
|
-
|
|
16
|
-
Parameters
|
|
17
|
-
----------
|
|
18
|
-
inpath : str
|
|
19
|
-
MS path (main table)
|
|
20
|
-
|
|
21
|
-
Returns
|
|
22
|
-
-------
|
|
23
|
-
Tuple[xr.Dataset]
|
|
24
|
-
tuple with antenna, ddi, spw, and polarization setup subtables info
|
|
25
|
-
"""
|
|
26
|
-
spw_xds = load_generic_table(
|
|
27
|
-
inpath,
|
|
28
|
-
"SPECTRAL_WINDOW",
|
|
29
|
-
rename_ids=subt_rename_ids["SPECTRAL_WINDOW"],
|
|
30
|
-
)
|
|
31
|
-
ddi_xds = load_generic_table(inpath, "DATA_DESCRIPTION")
|
|
32
|
-
ant_xds = load_generic_table(
|
|
33
|
-
inpath, "ANTENNA", rename_ids=subt_rename_ids["ANTENNA"]
|
|
34
|
-
)
|
|
35
|
-
pol_xds = load_generic_table(
|
|
36
|
-
inpath, "POLARIZATION", rename_ids=subt_rename_ids["POLARIZATION"]
|
|
37
|
-
)
|
|
38
|
-
return ant_xds, ddi_xds, spw_xds, pol_xds
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
def finalize_chunks(
|
|
42
|
-
infile: str, chunks: Dict[str, xr.Dataset], chunk_spec: Dict[str, slice]
|
|
43
|
-
) -> Dict[Tuple[int, int], xr.Dataset]:
|
|
44
|
-
"""
|
|
45
|
-
Adds pointing variables to a dictionary of chunk xdss. This is
|
|
46
|
-
intended to be added after reading chunks from an MS main table.
|
|
47
|
-
|
|
48
|
-
Parameters
|
|
49
|
-
----------
|
|
50
|
-
infile : str
|
|
51
|
-
MS path (main table)
|
|
52
|
-
chunks : Dict[str, xr.Dataset]
|
|
53
|
-
chunk xdss
|
|
54
|
-
chunk_spec : Dict[str, slice]
|
|
55
|
-
specification of chunk to load
|
|
56
|
-
|
|
57
|
-
Returns
|
|
58
|
-
-------
|
|
59
|
-
Dict[Tuple[int, int], xr.Dataset]
|
|
60
|
-
dictionary of chunk xdss where every xds now has pointing
|
|
61
|
-
data variables
|
|
62
|
-
"""
|
|
63
|
-
pnt_name = "POINTING"
|
|
64
|
-
pnt_path = Path(infile, pnt_name)
|
|
65
|
-
if "time" in chunk_spec:
|
|
66
|
-
time_slice = chunk_spec["time"]
|
|
67
|
-
else:
|
|
68
|
-
time_slice = None
|
|
69
|
-
pnt_xds = read_delayed_pointing_table(
|
|
70
|
-
str(pnt_path),
|
|
71
|
-
rename_ids=subt_rename_ids.get(pnt_name, None),
|
|
72
|
-
time_slice=time_slice,
|
|
73
|
-
)
|
|
74
|
-
|
|
75
|
-
if "time" not in pnt_xds.dims:
|
|
76
|
-
return xr.Dataset()
|
|
77
|
-
|
|
78
|
-
pnt_xds = pnt_xds.compute()
|
|
79
|
-
|
|
80
|
-
pnt_chunks = {
|
|
81
|
-
key: finalize_chunk_xds(infile, xds, pnt_xds)
|
|
82
|
-
for _idx, (key, xds) in enumerate(chunks.items())
|
|
83
|
-
}
|
|
84
|
-
|
|
85
|
-
return pnt_chunks
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
def finalize_chunk_xds(
|
|
89
|
-
infile: str, chunk_xds: xr.Dataset, pointing_xds: xr.Dataset
|
|
90
|
-
) -> xr.Dataset:
|
|
91
|
-
"""
|
|
92
|
-
Adds pointing variables to one chunk xds.
|
|
93
|
-
|
|
94
|
-
Parameters
|
|
95
|
-
----------
|
|
96
|
-
infile : str
|
|
97
|
-
MS path (main table)
|
|
98
|
-
xds_chunk : xr.Dataset
|
|
99
|
-
chunks xds
|
|
100
|
-
pointing_xds : xr.Dataset
|
|
101
|
-
pointing (sub)table xds
|
|
102
|
-
|
|
103
|
-
Returns
|
|
104
|
-
-------
|
|
105
|
-
xr.Dataset
|
|
106
|
-
chunk xds with pointing data variables interpolated form
|
|
107
|
-
the pointing (sub)table
|
|
108
|
-
"""
|
|
109
|
-
|
|
110
|
-
interp_pnt = pointing_xds.interp(time=chunk_xds.time, method="nearest")
|
|
111
|
-
|
|
112
|
-
for var in interp_pnt.data_vars:
|
|
113
|
-
chunk_xds[f"pointing_{var}"] = interp_pnt[var]
|
|
114
|
-
|
|
115
|
-
return chunk_xds
|
|
@@ -1,165 +0,0 @@
|
|
|
1
|
-
import os
|
|
2
|
-
from typing import Dict, Union
|
|
3
|
-
|
|
4
|
-
import numpy as np
|
|
5
|
-
import pandas as pd
|
|
6
|
-
import xarray as xr
|
|
7
|
-
|
|
8
|
-
from ._tables.read import load_generic_table, read_flat_col_chunk
|
|
9
|
-
from ._tables.table_query import open_query, open_table_ro
|
|
10
|
-
from xradio._utils.list_and_array import unique_1d
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
def describe_ms(
|
|
14
|
-
infile: str, mode: str = "summary", rowmap: Union[dict, None] = None
|
|
15
|
-
) -> Union[pd.DataFrame, Dict]:
|
|
16
|
-
"""
|
|
17
|
-
Summarize the contents of an MS directory in casacore table format
|
|
18
|
-
|
|
19
|
-
Parameters
|
|
20
|
-
----------
|
|
21
|
-
infile : str
|
|
22
|
-
input MS filename
|
|
23
|
-
mode : str (Default value = "summary")
|
|
24
|
-
type of information returned ('summary', 'flat', 'expanded').
|
|
25
|
-
'summary' returns a pandas dataframe that is nice for displaying in notebooks
|
|
26
|
-
etc. 'flat' returns a list of tuples of (ddi, row, chan, pol). 'expanded'
|
|
27
|
-
returns a list of tuples of (ddi, time, baseline, chan, pol). These latter two
|
|
28
|
-
are good for trying to determine chunk size for read_ms(expand=True/False). (Default value = "summary")
|
|
29
|
-
rowmap : Union[dict, None] (Default value = None)
|
|
30
|
-
dict of DDI to tuple of (row indices, channel indices). Returned
|
|
31
|
-
by ms_selection function. Default None ignores selections
|
|
32
|
-
|
|
33
|
-
Returns
|
|
34
|
-
-------
|
|
35
|
-
Union[pd.DataFrame, Dict]
|
|
36
|
-
summary as a pd dataframe
|
|
37
|
-
"""
|
|
38
|
-
infile = os.path.expanduser(infile) # does nothing if $HOME is unknown
|
|
39
|
-
if not os.path.isdir(infile):
|
|
40
|
-
raise ValueError(f"invalid input filename to describe_ms: {infile}")
|
|
41
|
-
if mode not in [
|
|
42
|
-
"summary",
|
|
43
|
-
"flat",
|
|
44
|
-
"expanded",
|
|
45
|
-
]:
|
|
46
|
-
raise ValueError("invalid mode, must be summary, flat or expanded")
|
|
47
|
-
|
|
48
|
-
ddi_xds = load_generic_table(infile, "DATA_DESCRIPTION")
|
|
49
|
-
ddis = list(ddi_xds.row.values) if rowmap is None else list(rowmap.keys())
|
|
50
|
-
summary: Union[pd.DataFrame, Dict] = []
|
|
51
|
-
if mode == "summary":
|
|
52
|
-
summary = pd.DataFrame([])
|
|
53
|
-
|
|
54
|
-
all_sdf = []
|
|
55
|
-
with open_table_ro(infile) as tb_tool:
|
|
56
|
-
for ddi in ddis:
|
|
57
|
-
taql = f"select * from $tb_tool where DATA_DESC_ID = {ddi}"
|
|
58
|
-
with open_query(tb_tool, taql) as query_per_ddi:
|
|
59
|
-
sdf = populate_ms_descr(
|
|
60
|
-
infile, mode, query_per_ddi, summary, ddi, ddi_xds
|
|
61
|
-
)
|
|
62
|
-
all_sdf.append(sdf)
|
|
63
|
-
|
|
64
|
-
if mode == "summary":
|
|
65
|
-
summary = pd.DataFrame(all_sdf)
|
|
66
|
-
summary = summary.set_index("ddi").sort_index()
|
|
67
|
-
else:
|
|
68
|
-
summary = dict(summary)
|
|
69
|
-
return summary
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
def populate_ms_descr(
|
|
73
|
-
infile: str,
|
|
74
|
-
mode: str,
|
|
75
|
-
query_per_ddi,
|
|
76
|
-
summary: dict,
|
|
77
|
-
ddi: int,
|
|
78
|
-
ddi_xds: xr.Dataset,
|
|
79
|
-
rowmap: Union[Dict, None] = None,
|
|
80
|
-
) -> pd.DataFrame:
|
|
81
|
-
"""
|
|
82
|
-
Adds information from the time and baseline (antenna1+antenna2)
|
|
83
|
-
columns as well as channel and polarizations, based on a taql
|
|
84
|
-
query.
|
|
85
|
-
|
|
86
|
-
Parameters
|
|
87
|
-
----------
|
|
88
|
-
infile : str
|
|
89
|
-
input table/MS path
|
|
90
|
-
mode : str
|
|
91
|
-
mode (as in describe_ms())
|
|
92
|
-
query_per_ddi :
|
|
93
|
-
a TaQL query with data per individual DDI
|
|
94
|
-
summary : Dict
|
|
95
|
-
summary dict being populated
|
|
96
|
-
ddi_xds : xr.Dataset
|
|
97
|
-
final summary object being populated from the invividual sdf's
|
|
98
|
-
|
|
99
|
-
Returns
|
|
100
|
-
-------
|
|
101
|
-
pd.DataFrame
|
|
102
|
-
"""
|
|
103
|
-
spw_ids = ddi_xds.SPECTRAL_WINDOW_ID.values
|
|
104
|
-
pol_ids = ddi_xds.POLARIZATION_ID.values
|
|
105
|
-
sdf = {
|
|
106
|
-
"ddi": ddi,
|
|
107
|
-
"spw_id": spw_ids[ddi],
|
|
108
|
-
"pol_id": pol_ids[ddi],
|
|
109
|
-
"rows": query_per_ddi.nrows(),
|
|
110
|
-
}
|
|
111
|
-
|
|
112
|
-
# figure out characteristics of main table from select subtables (must all be present)
|
|
113
|
-
spw_xds = load_generic_table(infile, "SPECTRAL_WINDOW")
|
|
114
|
-
pol_xds = load_generic_table(infile, "POLARIZATION")
|
|
115
|
-
|
|
116
|
-
if mode in ["expanded", "summary"]:
|
|
117
|
-
times = (
|
|
118
|
-
query_per_ddi.getcol("TIME")
|
|
119
|
-
if rowmap is None
|
|
120
|
-
else read_flat_col_chunk(infile, "TIME", (1,), rowmap[ddi][0], 0, 0)
|
|
121
|
-
)
|
|
122
|
-
baselines = [
|
|
123
|
-
(
|
|
124
|
-
query_per_ddi.getcol(rr)[:, None]
|
|
125
|
-
if rowmap is None
|
|
126
|
-
else read_flat_col_chunk(infile, rr, (1,), rowmap[ddi][0], 0, 0)
|
|
127
|
-
)
|
|
128
|
-
for rr in ["ANTENNA1", "ANTENNA2"]
|
|
129
|
-
]
|
|
130
|
-
sdf.update(
|
|
131
|
-
{
|
|
132
|
-
"times": len(unique_1d(times)),
|
|
133
|
-
"baselines": len(np.unique(np.hstack(baselines), axis=0)),
|
|
134
|
-
}
|
|
135
|
-
)
|
|
136
|
-
|
|
137
|
-
chans = spw_xds.NUM_CHAN.values
|
|
138
|
-
pols = pol_xds.NUM_CORR.values
|
|
139
|
-
sdf.update(
|
|
140
|
-
{
|
|
141
|
-
"chans": (
|
|
142
|
-
chans[spw_ids[ddi]]
|
|
143
|
-
if (rowmap is None) or (rowmap[ddi][1] is None)
|
|
144
|
-
else len(rowmap[ddi][1])
|
|
145
|
-
),
|
|
146
|
-
"pols": pols[pol_ids[ddi]],
|
|
147
|
-
}
|
|
148
|
-
)
|
|
149
|
-
sdf["size_MB"] = np.ceil(
|
|
150
|
-
(sdf["rows"] * sdf["chans"] * sdf["pols"] * 10) / 1024**2
|
|
151
|
-
).astype(int)
|
|
152
|
-
|
|
153
|
-
if rowmap is not None:
|
|
154
|
-
sdf["rows"] = len(rowmap[ddi][0])
|
|
155
|
-
|
|
156
|
-
if mode == "summary":
|
|
157
|
-
summary = pd.concat(
|
|
158
|
-
[summary, pd.DataFrame(sdf, index=[str(ddi)])], axis=0, sort=False
|
|
159
|
-
)
|
|
160
|
-
elif mode == "flat":
|
|
161
|
-
summary += [(ddi, (sdf["rows"], sdf["chans"], sdf["pols"]))]
|
|
162
|
-
else:
|
|
163
|
-
summary += [(ddi, sdf["times"], sdf["baselines"], sdf["chans"], sdf["pols"])]
|
|
164
|
-
|
|
165
|
-
return sdf
|