xradio 0.0.56__py3-none-any.whl → 0.0.58__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. xradio/__init__.py +2 -2
  2. xradio/_utils/_casacore/casacore_from_casatools.py +12 -2
  3. xradio/_utils/_casacore/tables.py +1 -0
  4. xradio/_utils/coord_math.py +22 -23
  5. xradio/_utils/dict_helpers.py +76 -11
  6. xradio/_utils/schema.py +5 -2
  7. xradio/_utils/zarr/common.py +1 -73
  8. xradio/image/_util/_casacore/xds_from_casacore.py +49 -33
  9. xradio/image/_util/_casacore/xds_to_casacore.py +41 -14
  10. xradio/image/_util/_fits/xds_from_fits.py +146 -35
  11. xradio/image/_util/casacore.py +4 -3
  12. xradio/image/_util/common.py +4 -4
  13. xradio/image/_util/image_factory.py +8 -8
  14. xradio/image/image.py +45 -5
  15. xradio/measurement_set/__init__.py +19 -9
  16. xradio/measurement_set/_utils/__init__.py +1 -3
  17. xradio/measurement_set/_utils/_msv2/__init__.py +0 -0
  18. xradio/measurement_set/_utils/_msv2/_tables/read.py +17 -76
  19. xradio/measurement_set/_utils/_msv2/_tables/read_main_table.py +2 -685
  20. xradio/measurement_set/_utils/_msv2/conversion.py +123 -145
  21. xradio/measurement_set/_utils/_msv2/create_antenna_xds.py +9 -16
  22. xradio/measurement_set/_utils/_msv2/create_field_and_source_xds.py +125 -221
  23. xradio/measurement_set/_utils/_msv2/msv2_to_msv4_meta.py +1 -2
  24. xradio/measurement_set/_utils/_msv2/msv4_info_dicts.py +8 -7
  25. xradio/measurement_set/_utils/_msv2/msv4_sub_xdss.py +27 -72
  26. xradio/measurement_set/_utils/_msv2/partition_queries.py +1 -261
  27. xradio/measurement_set/_utils/_msv2/subtables.py +0 -107
  28. xradio/measurement_set/_utils/_utils/interpolate.py +60 -0
  29. xradio/measurement_set/_utils/_zarr/encoding.py +2 -7
  30. xradio/measurement_set/convert_msv2_to_processing_set.py +0 -2
  31. xradio/measurement_set/load_processing_set.py +2 -2
  32. xradio/measurement_set/measurement_set_xdt.py +14 -14
  33. xradio/measurement_set/open_processing_set.py +1 -3
  34. xradio/measurement_set/processing_set_xdt.py +41 -835
  35. xradio/measurement_set/schema.py +95 -122
  36. xradio/schema/check.py +91 -97
  37. xradio/schema/dataclass.py +159 -22
  38. xradio/schema/export.py +99 -0
  39. xradio/schema/metamodel.py +51 -16
  40. xradio/schema/typing.py +5 -5
  41. {xradio-0.0.56.dist-info → xradio-0.0.58.dist-info}/METADATA +2 -1
  42. xradio-0.0.58.dist-info/RECORD +65 -0
  43. {xradio-0.0.56.dist-info → xradio-0.0.58.dist-info}/WHEEL +1 -1
  44. xradio/image/_util/fits.py +0 -13
  45. xradio/measurement_set/_utils/_msv2/_tables/load.py +0 -66
  46. xradio/measurement_set/_utils/_msv2/_tables/load_main_table.py +0 -490
  47. xradio/measurement_set/_utils/_msv2/_tables/read_subtables.py +0 -398
  48. xradio/measurement_set/_utils/_msv2/_tables/write.py +0 -323
  49. xradio/measurement_set/_utils/_msv2/_tables/write_exp_api.py +0 -388
  50. xradio/measurement_set/_utils/_msv2/chunks.py +0 -115
  51. xradio/measurement_set/_utils/_msv2/descr.py +0 -165
  52. xradio/measurement_set/_utils/_msv2/msv2_msv3.py +0 -7
  53. xradio/measurement_set/_utils/_msv2/partitions.py +0 -392
  54. xradio/measurement_set/_utils/_utils/cds.py +0 -40
  55. xradio/measurement_set/_utils/_utils/xds_helper.py +0 -404
  56. xradio/measurement_set/_utils/_zarr/read.py +0 -263
  57. xradio/measurement_set/_utils/_zarr/write.py +0 -329
  58. xradio/measurement_set/_utils/msv2.py +0 -106
  59. xradio/measurement_set/_utils/zarr.py +0 -133
  60. xradio-0.0.56.dist-info/RECORD +0 -78
  61. {xradio-0.0.56.dist-info → xradio-0.0.58.dist-info}/licenses/LICENSE.txt +0 -0
  62. {xradio-0.0.56.dist-info → xradio-0.0.58.dist-info}/top_level.txt +0 -0
@@ -1,388 +0,0 @@
1
- import os, time
2
- from typing import List, Optional, Union
3
-
4
- import dask
5
- import numpy as np
6
- import xarray as xr
7
-
8
- from ..._utils.xds_helper import flatten_xds, calc_optimal_ms_chunk_shape
9
- from .write import write_generic_table, write_main_table_slice
10
- from .write import create_table, revert_time
11
-
12
- try:
13
- from casacore import tables
14
- except ImportError:
15
- import xradio._utils._casacore.casacore_from_casatools as tables
16
-
17
-
18
- # TODO: this should be consolidated with the equivalent in read_main_table,
19
- # if we keep this mapping
20
- rename_to_msv2_cols = {
21
- "antenna1_id": "ANTENNA1",
22
- "antenna2_id": "ANTENNA2",
23
- "feed1_id": "FEED1",
24
- "feed2_id": "FEED2",
25
- # optional cols:
26
- # "WEIGHT": "WEIGHT_SPECTRUM",
27
- "VIS_CORRECTED": "CORRECTED_DATA",
28
- "VIS": "DATA",
29
- "VIS_MODEL": "MODEL_DATA",
30
- "AUTOCORR": "FLOAT_DATA",
31
- }
32
- # cols added in xds not in MSv2
33
- cols_not_in_msv2 = ["baseline_ant1_id", "baseline_ant2_id"]
34
-
35
-
36
- def cols_from_xds_to_ms(cols: List[str]) -> List[str]:
37
- """
38
- Translates between lowercase/uppercase convention
39
- Rename some MS_colum_names <-> xds_data_var_names
40
- Excludes the pointing_ vars that are in the xds but should not be written to MS
41
- """
42
- return {
43
- rename_to_msv2_cols.get(col, col).upper(): col
44
- for col in cols
45
- if (col and col not in cols_not_in_msv2 and not col.startswith("pointing_"))
46
- }
47
-
48
-
49
- def write_ms(
50
- mxds: xr.Dataset,
51
- outfile: str,
52
- infile: str = None,
53
- subtables: bool = False,
54
- modcols: Union[List[str], None] = None,
55
- verbose: bool = False,
56
- execute: bool = True,
57
- ) -> Optional[list]:
58
- """
59
- Write ms format xds contents back to casacore MS (CTDS - casacore Table Data System) format on disk
60
-
61
- Parameters
62
- ----------
63
- mxds : xr.Dataset,
64
- Source multi-xarray dataset (originally created by read_ms)
65
- outfile : str
66
- Destination filename
67
- infile : Union[str, None] (Default value = None)
68
- Source filename to copy subtables from. Generally faster than reading/writing through mxds via the subtables parameter. Default None
69
- does not copy subtables to output.
70
- subtables : bool (Default value = False)
71
- Also write subtables from mxds. Default of False only writes mxds attributes that begin with xdsN to the MS main table.
72
- Setting to True will write all other mxds attributes to subtables of the main table. This is probably going to be SLOW!
73
- Use infile instead whenever possible.
74
- modcols : Union[List[str], None] (Default value = None)
75
- List of strings indicating what column(s) were modified (aka xds data_vars). Different logic can be applied to speed up processing when
76
- a data_var has not been modified from the input. Default None assumes everything has been modified (SLOW)
77
- verbose : bool (Default value = False)
78
- Whether or not to print output progress. Since writes will typically execute the DAG, if something is
79
- going to go wrong, it will be here. Default False
80
- execute : bool (Default value = True)
81
- Whether or not to actually execute the DAG, or just return it with write steps appended. Default True will execute it
82
-
83
- Returns
84
- -------
85
- Optional[list]
86
- delayed write functions
87
- """
88
- outfile = os.path.expanduser(outfile)
89
- if verbose:
90
- print("initializing output...")
91
- start = time.time()
92
-
93
- xds_list = [flatten_xds(xds) for _key, xds in mxds.partitions.items()]
94
-
95
- cols = cols_from_xds_to_ms(
96
- list(set([dv for dx in xds_list for dv in dx.data_vars]))
97
- )
98
- if modcols is None:
99
- modcols = cols
100
-
101
- # create an empty main table with enough space for all desired xds partitions
102
- # the first selected xds partition will be passed to create_table to provide a definition of columns and table keywords
103
- # we first need to add in additional keywords for the selected subtables that will be written as well
104
- max_rows = np.sum([dx.row.shape[0] for dx in xds_list])
105
- create_table(
106
- outfile, xds_list[0], max_rows=max_rows, infile=infile, cols=cols, generic=False
107
- )
108
-
109
- # start a list of dask delayed writes to disk (to be executed later)
110
- # the SPECTRAL_WINDOW, POLARIZATION, and DATA_DESCRIPTION tables must always be present and will always be written
111
- delayed_writes = [
112
- dask.delayed(write_generic_table)(
113
- mxds.metainfo["spectral_window"], outfile, "SPECTRAL_WINDOW", cols=None
114
- )
115
- ]
116
- delayed_writes += [
117
- dask.delayed(write_generic_table)(
118
- mxds.metainfo["polarization"], outfile, "POLARIZATION", cols=None
119
- )
120
- ]
121
- # should data_description be kept somewhere (in attrs?) or rebuilt?
122
- # delayed_writes += [
123
- # dask.delayed(write_generic_table)(
124
- # mxds.metainfo["data_description"], outfile, "DATA_DESCRIPTION", cols=None
125
- # )
126
- # ]
127
- if subtables: # also write the rest of the subtables
128
- for subtable in list(mxds.attrs.keys()):
129
- if (
130
- subtable.startswith("xds")
131
- or (subtable in ["spectral_window", "polarization", "data_description"])
132
- or not isinstance(subtable, xr.Dataset)
133
- ):
134
- continue
135
-
136
- if verbose:
137
- print("writing subtable %s..." % subtable)
138
- delayed_writes += [
139
- dask.delayed(write_generic_table)(
140
- mxds.attrs[subtable], outfile, subtable, cols=None
141
- )
142
- ]
143
-
144
- ddi_row_start = 0 # output rows will be ordered by DDI
145
- for xds in xds_list:
146
- txds = xds.copy().unify_chunks()
147
- # TODO: carry over or rebuild?
148
- ddi = 0 # txds.data_desc_id[:1].values[0]
149
-
150
- # serial write entire DDI column first so subsequent delayed writes can find their spot
151
- if verbose:
152
- print("setting up DDI %i..." % ddi)
153
-
154
- # write each chunk of each modified data_var, triggering the DAG along the way
155
- for col in modcols:
156
- if col not in txds:
157
- continue # this can happen with bad_cols, should still be created in create_table()
158
-
159
- if col in cols_not_in_msv2:
160
- continue
161
-
162
- chunks = txds[col].chunks
163
- dims = txds[col].dims
164
- for d0 in range(len(chunks[0])):
165
- d0start = ([0] + list(np.cumsum(chunks[0][:-1])))[d0]
166
-
167
- for d1 in range(len(chunks[1]) if len(chunks) > 1 else 1):
168
- d1start = (
169
- ([0] + list(np.cumsum(chunks[1][:-1])))[d1]
170
- if len(chunks) > 1
171
- else 0
172
- )
173
-
174
- for d2 in range(len(chunks[2]) if len(chunks) > 2 else 1):
175
- d2start = (
176
- ([0] + list(np.cumsum(chunks[2][:-1])))[d2]
177
- if len(chunks) > 2
178
- else 0
179
- )
180
-
181
- starts = [d0start, d1start, d2start]
182
- lengths = [
183
- chunks[0][d0],
184
- (chunks[1][d1] if len(chunks) > 1 else 0),
185
- (chunks[2][d2] if len(chunks) > 2 else 0),
186
- ]
187
- slices = [
188
- slice(starts[0], starts[0] + lengths[0]),
189
- slice(starts[1], starts[1] + lengths[1]),
190
- slice(starts[2], starts[2] + lengths[2]),
191
- ]
192
- txda = txds[col].isel(
193
- dict(zip(dims, slices)), missing_dims="ignore"
194
- )
195
- starts[0] = starts[0] + ddi_row_start # offset to end of table
196
- delayed_writes += [
197
- dask.delayed(write_main_table_slice)(
198
- txda,
199
- outfile,
200
- ddi=ddi,
201
- col=col,
202
- full_shape=txds[col].shape[1:],
203
- starts=starts,
204
- )
205
- ]
206
-
207
- # now write remaining data_vars from the xds that weren't modified
208
- # this can be done faster by collapsing the chunking to maximum size (minimum #) possible
209
- max_chunk_size = np.prod(
210
- [txds.chunks[kk][0] for kk in txds.chunks if kk in ["row", "freq", "pol"]]
211
- )
212
- for col in list(np.setdiff1d(list(cols), modcols)):
213
- if col not in txds:
214
- continue # this can happen with bad_cols, should still be created in create_table()
215
-
216
- if col in cols_not_in_msv2:
217
- continue
218
-
219
- col_chunk_size = np.prod([kk[0] for kk in txds[col].chunks])
220
- if max_chunk_size <= 0:
221
- max_chunk_size = 19200
222
- if col_chunk_size <= 0:
223
- col_rows = max_chunk_size
224
- else:
225
- col_rows = (
226
- int(np.ceil(max_chunk_size / col_chunk_size))
227
- * txds[col].chunks[0][0]
228
- )
229
- for rr in range(0, txds[col].row.shape[0], col_rows):
230
- txda = txds[col].isel(row=slice(rr, rr + col_rows))
231
- delayed_writes += [
232
- dask.delayed(write_main_table_slice)(
233
- txda,
234
- outfile,
235
- ddi=ddi,
236
- col=rename_to_msv2_cols.get(col, col).upper(),
237
- full_shape=txda.shape[1:],
238
- starts=(rr + ddi_row_start,) + (0,) * (len(txda.shape) - 1),
239
- )
240
- ]
241
-
242
- ddi_row_start += txds.row.shape[0] # next xds will be appended after this one
243
-
244
- if execute:
245
- if verbose:
246
- print("triggering DAG...")
247
- zs = dask.compute(delayed_writes)
248
- if verbose:
249
- print(
250
- "execution time %0.2f sec. Compute result len: %d"
251
- % ((time.time() - start), len(zs))
252
- )
253
- else:
254
- if verbose:
255
- print("returning delayed task list")
256
- return delayed_writes
257
-
258
-
259
- def write_ms_serial(
260
- mxds: xr.Dataset,
261
- outfile: str,
262
- infile: str = None,
263
- subtables: bool = False,
264
- verbose: bool = False,
265
- execute: bool = True,
266
- memory_available_in_bytes: int = 500000000000,
267
- ):
268
- """
269
- Write ms format xds contents back to casacore table format on disk
270
-
271
- Parameters
272
- ----------
273
- mxds : xr.Dataset
274
- Source multi-xarray dataset (originally created by read_ms)
275
- outfile : str
276
- Destination filename
277
- infile : str (Default value = None)
278
- Source filename to copy subtables from. Generally faster than reading/writing through mxds via the subtables parameter. Default None
279
- does not copy subtables to output.
280
- subtables : bool (Default value = False)
281
- Also write subtables from mxds. Default of False only writes mxds attributes that begin with xdsN to the MS main table.
282
- Setting to True will write all other mxds attributes to subtables of the main table. This is probably going to be SLOW!
283
- Use infile instead whenever possible.
284
- verbose : bool (Default value = False)
285
- Whether or not to print output progress. Since writes will typically execute the DAG, if something is
286
- going to go wrong, it will be here. Default False
287
-
288
- execute : bool (Default value = True)
289
- Whether or not to actually execute the DAG, or just return it with write steps appended. Default True will execute it
290
- memory_available_in_bytes : (Default value = 500000000000)
291
-
292
- Returns
293
- -------
294
-
295
- """
296
-
297
- print("*********************")
298
- outfile = os.path.expanduser(outfile)
299
- if verbose:
300
- print("initializing output...")
301
- # start = time.time()
302
-
303
- xds_list = [flatten_xds(xds) for _key, xds in mxds.partitions.items()]
304
- cols = list(set([dv for dx in xds_list for dv in dx.data_vars]))
305
- cols = cols_from_xds_to_ms(list(np.atleast_1d(cols)))
306
-
307
- # create an empty main table with enough space for all desired xds partitions
308
- # the first selected xds partition will be passed to create_table to provide a definition of columns and table keywords
309
- # we first need to add in additional keywords for the selected subtables that will be written as well
310
- max_rows = np.sum([dx.row.shape[0] for dx in xds_list])
311
- create_table(
312
- outfile, xds_list[0], max_rows=max_rows, infile=infile, cols=cols, generic=False
313
- )
314
-
315
- # start a list of dask delayed writes to disk (to be executed later)
316
- # the SPECTRAL_WINDOW, POLARIZATION, and DATA_DESCRIPTION tables must always be present and will always be written
317
- write_generic_table(
318
- mxds.metainfo["spectral_window"], outfile, "SPECTRAL_WINDOW", cols=None
319
- )
320
- write_generic_table(
321
- mxds.metainfo["polarization"], outfile, "POLARIZATION", cols=None
322
- )
323
- # should data_description be kept somewhere (in attrs?) or rebuilt?
324
- # write_generic_table(mxds.metainfo.data_description, outfile, "DATA_DESCRIPTION", cols=None)
325
-
326
- if subtables: # also write the rest of the subtables
327
- # for subtable in list(mxds.attrs.keys()):
328
- #'OBSERVATION','STATE'
329
- # ['FEED','OBSERVATION','FIELD','ANTENNA','HISTORY','STATE']
330
- # ['FEED','FIELD','ANTENNA','HISTORY']
331
- # ,'FIELD','ANTENNA'
332
- # for subtable in ['OBSERVATION']:
333
- for subtable in list(mxds.metainfo.keys()):
334
- if subtable.startswith("xds") or (
335
- subtable in ["spectral_window", "polarization", "data_description"]
336
- ):
337
- continue
338
- if verbose:
339
- print("writing subtable %s..." % subtable)
340
- # print(subtable)
341
- # print(mxds.attrs[subtable])
342
- try:
343
- write_generic_table(
344
- mxds.metainfo[subtable], outfile, subtable.upper(), cols=None
345
- )
346
- except (RuntimeError, KeyError) as exc:
347
- print(f"Exception writing subtable {subtable}: {exc}")
348
-
349
- part_key0 = next(iter(mxds.partitions))
350
- vis_data_shape = mxds.partitions[part_key0].VIS.shape
351
- rows_chunk_size = calc_optimal_ms_chunk_shape(
352
- memory_available_in_bytes, vis_data_shape, 16, "DATA"
353
- )
354
-
355
- # print(rows_chunk_size)
356
- # rows_chunk_size = 200000000
357
- # write each chunk of each modified data_var, triggering the DAG along the way
358
- tbs = tables.table(
359
- outfile, readonly=False, lockoptions={"option": "permanentwait"}, ack=True
360
- )
361
-
362
- start_main = time.time()
363
- for col, var_name in cols.items():
364
- xda = mxds.partitions[part_key0][var_name]
365
- # print(col,xda.dtype)
366
-
367
- for start_row in np.arange(0, vis_data_shape[0], rows_chunk_size):
368
- end_row = start_row + rows_chunk_size
369
- if end_row > vis_data_shape[0]:
370
- end_row = vis_data_shape[0]
371
-
372
- # start = time.time()
373
- values = xda[start_row:end_row,].compute().values
374
- if xda.dtype == "datetime64[ns]":
375
- values = revert_time(values)
376
- # print('1. Time', time.time()-start, values.shape)
377
-
378
- # start = time.time()
379
- try:
380
- tbs.putcol(col, values, start_row, len(values))
381
- # print('2. Time', time.time()-start)
382
- except RuntimeError as exc:
383
- print(f"Exception writing main table column {col}: {exc}")
384
-
385
- print("3. Time", time.time() - start_main)
386
-
387
- tbs.unlock()
388
- tbs.close()
@@ -1,115 +0,0 @@
1
- from pathlib import Path
2
- from typing import Dict, Tuple
3
-
4
- import xarray as xr
5
-
6
-
7
- from .subtables import subt_rename_ids
8
- from ._tables.read import load_generic_table
9
- from ._tables.read_subtables import read_delayed_pointing_table
10
-
11
-
12
- def read_spw_ddi_ant_pol(inpath: str) -> Tuple[xr.Dataset]:
13
- """
14
- Reads the four metainfo subtables needed to load data chunks into xdss.
15
-
16
- Parameters
17
- ----------
18
- inpath : str
19
- MS path (main table)
20
-
21
- Returns
22
- -------
23
- Tuple[xr.Dataset]
24
- tuple with antenna, ddi, spw, and polarization setup subtables info
25
- """
26
- spw_xds = load_generic_table(
27
- inpath,
28
- "SPECTRAL_WINDOW",
29
- rename_ids=subt_rename_ids["SPECTRAL_WINDOW"],
30
- )
31
- ddi_xds = load_generic_table(inpath, "DATA_DESCRIPTION")
32
- ant_xds = load_generic_table(
33
- inpath, "ANTENNA", rename_ids=subt_rename_ids["ANTENNA"]
34
- )
35
- pol_xds = load_generic_table(
36
- inpath, "POLARIZATION", rename_ids=subt_rename_ids["POLARIZATION"]
37
- )
38
- return ant_xds, ddi_xds, spw_xds, pol_xds
39
-
40
-
41
- def finalize_chunks(
42
- infile: str, chunks: Dict[str, xr.Dataset], chunk_spec: Dict[str, slice]
43
- ) -> Dict[Tuple[int, int], xr.Dataset]:
44
- """
45
- Adds pointing variables to a dictionary of chunk xdss. This is
46
- intended to be added after reading chunks from an MS main table.
47
-
48
- Parameters
49
- ----------
50
- infile : str
51
- MS path (main table)
52
- chunks : Dict[str, xr.Dataset]
53
- chunk xdss
54
- chunk_spec : Dict[str, slice]
55
- specification of chunk to load
56
-
57
- Returns
58
- -------
59
- Dict[Tuple[int, int], xr.Dataset]
60
- dictionary of chunk xdss where every xds now has pointing
61
- data variables
62
- """
63
- pnt_name = "POINTING"
64
- pnt_path = Path(infile, pnt_name)
65
- if "time" in chunk_spec:
66
- time_slice = chunk_spec["time"]
67
- else:
68
- time_slice = None
69
- pnt_xds = read_delayed_pointing_table(
70
- str(pnt_path),
71
- rename_ids=subt_rename_ids.get(pnt_name, None),
72
- time_slice=time_slice,
73
- )
74
-
75
- if "time" not in pnt_xds.dims:
76
- return xr.Dataset()
77
-
78
- pnt_xds = pnt_xds.compute()
79
-
80
- pnt_chunks = {
81
- key: finalize_chunk_xds(infile, xds, pnt_xds)
82
- for _idx, (key, xds) in enumerate(chunks.items())
83
- }
84
-
85
- return pnt_chunks
86
-
87
-
88
- def finalize_chunk_xds(
89
- infile: str, chunk_xds: xr.Dataset, pointing_xds: xr.Dataset
90
- ) -> xr.Dataset:
91
- """
92
- Adds pointing variables to one chunk xds.
93
-
94
- Parameters
95
- ----------
96
- infile : str
97
- MS path (main table)
98
- xds_chunk : xr.Dataset
99
- chunks xds
100
- pointing_xds : xr.Dataset
101
- pointing (sub)table xds
102
-
103
- Returns
104
- -------
105
- xr.Dataset
106
- chunk xds with pointing data variables interpolated form
107
- the pointing (sub)table
108
- """
109
-
110
- interp_pnt = pointing_xds.interp(time=chunk_xds.time, method="nearest")
111
-
112
- for var in interp_pnt.data_vars:
113
- chunk_xds[f"pointing_{var}"] = interp_pnt[var]
114
-
115
- return chunk_xds
@@ -1,165 +0,0 @@
1
- import os
2
- from typing import Dict, Union
3
-
4
- import numpy as np
5
- import pandas as pd
6
- import xarray as xr
7
-
8
- from ._tables.read import load_generic_table, read_flat_col_chunk
9
- from ._tables.table_query import open_query, open_table_ro
10
- from xradio._utils.list_and_array import unique_1d
11
-
12
-
13
- def describe_ms(
14
- infile: str, mode: str = "summary", rowmap: Union[dict, None] = None
15
- ) -> Union[pd.DataFrame, Dict]:
16
- """
17
- Summarize the contents of an MS directory in casacore table format
18
-
19
- Parameters
20
- ----------
21
- infile : str
22
- input MS filename
23
- mode : str (Default value = "summary")
24
- type of information returned ('summary', 'flat', 'expanded').
25
- 'summary' returns a pandas dataframe that is nice for displaying in notebooks
26
- etc. 'flat' returns a list of tuples of (ddi, row, chan, pol). 'expanded'
27
- returns a list of tuples of (ddi, time, baseline, chan, pol). These latter two
28
- are good for trying to determine chunk size for read_ms(expand=True/False). (Default value = "summary")
29
- rowmap : Union[dict, None] (Default value = None)
30
- dict of DDI to tuple of (row indices, channel indices). Returned
31
- by ms_selection function. Default None ignores selections
32
-
33
- Returns
34
- -------
35
- Union[pd.DataFrame, Dict]
36
- summary as a pd dataframe
37
- """
38
- infile = os.path.expanduser(infile) # does nothing if $HOME is unknown
39
- if not os.path.isdir(infile):
40
- raise ValueError(f"invalid input filename to describe_ms: {infile}")
41
- if mode not in [
42
- "summary",
43
- "flat",
44
- "expanded",
45
- ]:
46
- raise ValueError("invalid mode, must be summary, flat or expanded")
47
-
48
- ddi_xds = load_generic_table(infile, "DATA_DESCRIPTION")
49
- ddis = list(ddi_xds.row.values) if rowmap is None else list(rowmap.keys())
50
- summary: Union[pd.DataFrame, Dict] = []
51
- if mode == "summary":
52
- summary = pd.DataFrame([])
53
-
54
- all_sdf = []
55
- with open_table_ro(infile) as tb_tool:
56
- for ddi in ddis:
57
- taql = f"select * from $tb_tool where DATA_DESC_ID = {ddi}"
58
- with open_query(tb_tool, taql) as query_per_ddi:
59
- sdf = populate_ms_descr(
60
- infile, mode, query_per_ddi, summary, ddi, ddi_xds
61
- )
62
- all_sdf.append(sdf)
63
-
64
- if mode == "summary":
65
- summary = pd.DataFrame(all_sdf)
66
- summary = summary.set_index("ddi").sort_index()
67
- else:
68
- summary = dict(summary)
69
- return summary
70
-
71
-
72
- def populate_ms_descr(
73
- infile: str,
74
- mode: str,
75
- query_per_ddi,
76
- summary: dict,
77
- ddi: int,
78
- ddi_xds: xr.Dataset,
79
- rowmap: Union[Dict, None] = None,
80
- ) -> pd.DataFrame:
81
- """
82
- Adds information from the time and baseline (antenna1+antenna2)
83
- columns as well as channel and polarizations, based on a taql
84
- query.
85
-
86
- Parameters
87
- ----------
88
- infile : str
89
- input table/MS path
90
- mode : str
91
- mode (as in describe_ms())
92
- query_per_ddi :
93
- a TaQL query with data per individual DDI
94
- summary : Dict
95
- summary dict being populated
96
- ddi_xds : xr.Dataset
97
- final summary object being populated from the invividual sdf's
98
-
99
- Returns
100
- -------
101
- pd.DataFrame
102
- """
103
- spw_ids = ddi_xds.SPECTRAL_WINDOW_ID.values
104
- pol_ids = ddi_xds.POLARIZATION_ID.values
105
- sdf = {
106
- "ddi": ddi,
107
- "spw_id": spw_ids[ddi],
108
- "pol_id": pol_ids[ddi],
109
- "rows": query_per_ddi.nrows(),
110
- }
111
-
112
- # figure out characteristics of main table from select subtables (must all be present)
113
- spw_xds = load_generic_table(infile, "SPECTRAL_WINDOW")
114
- pol_xds = load_generic_table(infile, "POLARIZATION")
115
-
116
- if mode in ["expanded", "summary"]:
117
- times = (
118
- query_per_ddi.getcol("TIME")
119
- if rowmap is None
120
- else read_flat_col_chunk(infile, "TIME", (1,), rowmap[ddi][0], 0, 0)
121
- )
122
- baselines = [
123
- (
124
- query_per_ddi.getcol(rr)[:, None]
125
- if rowmap is None
126
- else read_flat_col_chunk(infile, rr, (1,), rowmap[ddi][0], 0, 0)
127
- )
128
- for rr in ["ANTENNA1", "ANTENNA2"]
129
- ]
130
- sdf.update(
131
- {
132
- "times": len(unique_1d(times)),
133
- "baselines": len(np.unique(np.hstack(baselines), axis=0)),
134
- }
135
- )
136
-
137
- chans = spw_xds.NUM_CHAN.values
138
- pols = pol_xds.NUM_CORR.values
139
- sdf.update(
140
- {
141
- "chans": (
142
- chans[spw_ids[ddi]]
143
- if (rowmap is None) or (rowmap[ddi][1] is None)
144
- else len(rowmap[ddi][1])
145
- ),
146
- "pols": pols[pol_ids[ddi]],
147
- }
148
- )
149
- sdf["size_MB"] = np.ceil(
150
- (sdf["rows"] * sdf["chans"] * sdf["pols"] * 10) / 1024**2
151
- ).astype(int)
152
-
153
- if rowmap is not None:
154
- sdf["rows"] = len(rowmap[ddi][0])
155
-
156
- if mode == "summary":
157
- summary = pd.concat(
158
- [summary, pd.DataFrame(sdf, index=[str(ddi)])], axis=0, sort=False
159
- )
160
- elif mode == "flat":
161
- summary += [(ddi, (sdf["rows"], sdf["chans"], sdf["pols"]))]
162
- else:
163
- summary += [(ddi, sdf["times"], sdf["baselines"], sdf["chans"], sdf["pols"])]
164
-
165
- return sdf
@@ -1,7 +0,0 @@
1
- ignore_msv2_cols = [
2
- "FLAG_ROW",
3
- "FLAG_CATEGORY",
4
- "TIME_EXTRA_PREC",
5
- "DATA_DESC_ID",
6
- "SIGMA",
7
- ]