xradio 0.0.55__py3-none-any.whl → 0.0.58__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. xradio/__init__.py +2 -2
  2. xradio/_utils/_casacore/casacore_from_casatools.py +1001 -0
  3. xradio/_utils/_casacore/tables.py +6 -1
  4. xradio/_utils/coord_math.py +22 -23
  5. xradio/_utils/dict_helpers.py +76 -11
  6. xradio/_utils/schema.py +5 -2
  7. xradio/_utils/zarr/common.py +1 -73
  8. xradio/image/_util/_casacore/common.py +11 -3
  9. xradio/image/_util/_casacore/xds_from_casacore.py +59 -35
  10. xradio/image/_util/_casacore/xds_to_casacore.py +47 -16
  11. xradio/image/_util/_fits/xds_from_fits.py +172 -77
  12. xradio/image/_util/casacore.py +9 -4
  13. xradio/image/_util/common.py +4 -4
  14. xradio/image/_util/image_factory.py +8 -8
  15. xradio/image/image.py +45 -5
  16. xradio/measurement_set/__init__.py +19 -9
  17. xradio/measurement_set/_utils/__init__.py +1 -3
  18. xradio/measurement_set/_utils/_msv2/__init__.py +0 -0
  19. xradio/measurement_set/_utils/_msv2/_tables/read.py +35 -90
  20. xradio/measurement_set/_utils/_msv2/_tables/read_main_table.py +6 -686
  21. xradio/measurement_set/_utils/_msv2/_tables/table_query.py +13 -3
  22. xradio/measurement_set/_utils/_msv2/conversion.py +129 -145
  23. xradio/measurement_set/_utils/_msv2/create_antenna_xds.py +9 -16
  24. xradio/measurement_set/_utils/_msv2/create_field_and_source_xds.py +125 -221
  25. xradio/measurement_set/_utils/_msv2/msv2_to_msv4_meta.py +1 -2
  26. xradio/measurement_set/_utils/_msv2/msv4_info_dicts.py +13 -8
  27. xradio/measurement_set/_utils/_msv2/msv4_sub_xdss.py +27 -72
  28. xradio/measurement_set/_utils/_msv2/partition_queries.py +5 -262
  29. xradio/measurement_set/_utils/_msv2/subtables.py +0 -107
  30. xradio/measurement_set/_utils/_utils/interpolate.py +60 -0
  31. xradio/measurement_set/_utils/_zarr/encoding.py +2 -7
  32. xradio/measurement_set/convert_msv2_to_processing_set.py +0 -2
  33. xradio/measurement_set/load_processing_set.py +2 -2
  34. xradio/measurement_set/measurement_set_xdt.py +14 -14
  35. xradio/measurement_set/open_processing_set.py +1 -3
  36. xradio/measurement_set/processing_set_xdt.py +41 -835
  37. xradio/measurement_set/schema.py +96 -123
  38. xradio/schema/check.py +91 -97
  39. xradio/schema/dataclass.py +159 -22
  40. xradio/schema/export.py +99 -0
  41. xradio/schema/metamodel.py +51 -16
  42. xradio/schema/typing.py +5 -5
  43. {xradio-0.0.55.dist-info → xradio-0.0.58.dist-info}/METADATA +43 -11
  44. xradio-0.0.58.dist-info/RECORD +65 -0
  45. {xradio-0.0.55.dist-info → xradio-0.0.58.dist-info}/WHEEL +1 -1
  46. xradio/image/_util/fits.py +0 -13
  47. xradio/measurement_set/_utils/_msv2/_tables/load.py +0 -63
  48. xradio/measurement_set/_utils/_msv2/_tables/load_main_table.py +0 -487
  49. xradio/measurement_set/_utils/_msv2/_tables/read_subtables.py +0 -395
  50. xradio/measurement_set/_utils/_msv2/_tables/write.py +0 -320
  51. xradio/measurement_set/_utils/_msv2/_tables/write_exp_api.py +0 -385
  52. xradio/measurement_set/_utils/_msv2/chunks.py +0 -115
  53. xradio/measurement_set/_utils/_msv2/descr.py +0 -165
  54. xradio/measurement_set/_utils/_msv2/msv2_msv3.py +0 -7
  55. xradio/measurement_set/_utils/_msv2/partitions.py +0 -392
  56. xradio/measurement_set/_utils/_utils/cds.py +0 -40
  57. xradio/measurement_set/_utils/_utils/xds_helper.py +0 -404
  58. xradio/measurement_set/_utils/_zarr/read.py +0 -263
  59. xradio/measurement_set/_utils/_zarr/write.py +0 -329
  60. xradio/measurement_set/_utils/msv2.py +0 -106
  61. xradio/measurement_set/_utils/zarr.py +0 -133
  62. xradio-0.0.55.dist-info/RECORD +0 -77
  63. {xradio-0.0.55.dist-info → xradio-0.0.58.dist-info}/licenses/LICENSE.txt +0 -0
  64. {xradio-0.0.55.dist-info → xradio-0.0.58.dist-info}/top_level.txt +0 -0
@@ -1,487 +0,0 @@
1
- import toolviper.utils.logger as logger
2
- from typing import Dict, List, Tuple, Union
3
-
4
- import pandas as pd
5
- import numpy as np
6
- import xarray as xr
7
-
8
- from casacore import tables
9
-
10
- from .load import load_col_chunk
11
- from .read_main_table import get_partition_ids, redim_id_data_vars, rename_vars
12
- from .read import add_units_measures, convert_casacore_time, extract_table_attributes
13
- from .write import revert_time
14
- from .table_query import open_query, open_table_ro
15
- from xradio.measurement_set._utils._ms._tables.read_main_table import (
16
- get_baselines,
17
- get_baseline_indices,
18
- )
19
- from xradio._utils.list_and_array import unique_1d
20
-
21
-
22
- def load_expanded_main_table_chunk(
23
- infile: str,
24
- ddi: int,
25
- chunk: Dict[str, slice],
26
- ignore_msv2_cols: Union[list, None] = None,
27
- ) -> xr.Dataset:
28
- """
29
- Load a chunk of data from main table into memory, with expanded
30
- dims: (time, baseline, freq, pols)
31
-
32
- Parameters
33
- ----------
34
- infile : str
35
- Input MS path
36
- ddi : int
37
- DDI to load chunk from
38
- chunk : Dict[str, slice]
39
- specification of chunk to load
40
- ignore_msv2_cols : Union[list, None] (Default value = None)
41
- cols that should not be loaded (deprecated MSv2 or similar)
42
-
43
- Returns
44
- -------
45
- xr.Dataset
46
- Xarray datasets with chunk of visibility data, one per DDI (spw_id, pol_setup_id) pair
47
- """
48
-
49
- taql_where = f"where DATA_DESC_ID = {ddi}"
50
- taql_ddi = f"select * from $mtable {taql_where}"
51
-
52
- with open_table_ro(infile) as mtable:
53
- with open_query(mtable, taql_ddi) as tb_tool:
54
- if tb_tool.nrows() == 0:
55
- return xr.Dataset()
56
-
57
- xds, part_ids, attrs = load_expanded_ddi_chunk(
58
- infile, tb_tool, taql_where, chunk, ignore_msv2_cols
59
- )
60
-
61
- return xds, part_ids, attrs
62
-
63
-
64
- def load_expanded_ddi_chunk(
65
- infile: str,
66
- tb_tool: tables.table,
67
- taql_pre: str,
68
- chunk: Dict[str, slice],
69
- ignore_msv2_cols: Union[list, None] = None,
70
- ) -> xr.Dataset:
71
- """
72
- Helper function to effectively load the chunk and produce an
73
- xr.Dataset from a DII once the table and initial query(ies) have
74
- been opened successfully.
75
-
76
- Parameters
77
- ----------
78
- infile : str
79
- Input MS path
80
- tb_tool : tables.table
81
- table query contrained to one DDI and chunk time range
82
- taql_pre : str
83
- TaQL query used for tb_tool, with some pre-selection of rows and columns
84
- chunk : Dict[str, slice]
85
- specification of data chunk to load
86
- ignore_msv2_cols : Union[list, None] (Default value = None)
87
- propagated from calling funtions
88
-
89
- Returns
90
- -------
91
- xr.Dataset
92
- An Xarray dataset with data variables as plain numpy
93
- arrays loaded directly from the MS columns
94
- """
95
-
96
- # read the specified chunk of data, figure out indices and lens
97
- utimes, times = get_chunk_times(taql_pre, chunk)
98
- baselines, blines = get_chunk_baselines(tb_tool, chunk)
99
- tidxs, bidxs, didxs, taql_where_chunk = get_chunk_data_indices(
100
- taql_pre, chunk, utimes, times, baselines, blines
101
- )
102
-
103
- ctlen = min(len(utimes), times[1] - times[0] + 1)
104
- cblen = min(len(baselines), blines[1] - blines[0] + 1)
105
- mvars = load_ddi_cols_chunk(
106
- ctlen, cblen, tidxs, bidxs, didxs, tb_tool, chunk, ignore_msv2_cols
107
- )
108
-
109
- mcoords = {
110
- "time": xr.DataArray(convert_casacore_time(utimes[:ctlen]), dims=["time"]),
111
- "baseline": xr.DataArray(np.arange(cblen), dims=["baseline"]),
112
- }
113
-
114
- # add xds global attributes
115
- cc_attrs = extract_table_attributes(infile)
116
- attrs = {"other": {"msv2": {"ctds_attrs": cc_attrs, "bad_cols": ignore_msv2_cols}}}
117
- # add per data var attributes
118
- mvars = add_units_measures(mvars, cc_attrs)
119
- mcoords = add_units_measures(mcoords, cc_attrs)
120
-
121
- mvars = rename_vars(mvars)
122
- mvars = redim_id_data_vars(mvars)
123
- xds = xr.Dataset(mvars, coords=mcoords)
124
-
125
- part_ids = get_partition_ids(tb_tool, taql_where_chunk)
126
-
127
- # needs an ~equivalent to add_partition_attrs?
128
- return xds, part_ids, attrs
129
-
130
-
131
- def load_ddi_cols_chunk(
132
- ctlen: int,
133
- cblen: int,
134
- tidxs: np.ndarray,
135
- bidxs: np.ndarray,
136
- didxs: np.ndarray,
137
- tb_tool: tables.table,
138
- chunk: Dict[str, slice],
139
- ignore_msv2_cols: Union[list, None] = None,
140
- ) -> Dict[str, np.ndarray]:
141
- """
142
- For a given chunk and DDI, load all the MSv2 columns
143
-
144
- Parameters
145
- ----------
146
- ctlen : int
147
- length of the time axis/dim of the chunk
148
- cblen : int
149
- length of the baseline axis of the chunk
150
- tidxs : np.ndarray
151
- time axis indices
152
- bidxs : np.ndarray
153
- baseline axis indices
154
- didxs : np.ndarray
155
- (effective) data indices, excluding missing baselines
156
- tb_tool : tables.table
157
- a table/TaQL query open and being used to load columns
158
- chunk : Dict[str, slice]
159
- data chunk specification
160
- ignore_msv2_cols : Union[list, None] (Default value = None)
161
- propagated from calling funtions
162
-
163
- Returns
164
- -------
165
- Dict[str, np.ndarray]
166
- columns loaded into memory as np arrays
167
-
168
- """
169
- cols = tb_tool.colnames()
170
-
171
- cshapes = [
172
- np.array(tb_tool.getcell(col, 0)).shape
173
- for col in cols
174
- if tb_tool.iscelldefined(col, 0)
175
- ]
176
- # Assumes shapes are consistent across columns - MSv2
177
- chan_cnt, pol_cnt = [(csh[0], csh[1]) for csh in cshapes if len(csh) == 2][0]
178
-
179
- dims = ["time", "baseline", "freq", "pol"]
180
- mvars = {}
181
- # loop over each column and load data
182
- for col in cols:
183
- if (col in ignore_msv2_cols + ["TIME"]) or not tb_tool.iscelldefined(col, 0):
184
- continue
185
-
186
- cdata = tb_tool.getcol(col, 0, 1)[0]
187
- cell_shape = cdata.shape
188
- if len(cell_shape) == 0:
189
- col_dims = dims[:2]
190
- mvars[col] = xr.DataArray(
191
- load_col_chunk(
192
- tb_tool, col, (ctlen, cblen), tidxs, bidxs, didxs, None, None
193
- ),
194
- dims=col_dims,
195
- )
196
-
197
- elif col == "UVW":
198
- col_dims = dims[:2] + ["uvw_coords"]
199
- mvars[col] = xr.DataArray(
200
- load_col_chunk(
201
- tb_tool, col, (ctlen, cblen, 3), tidxs, bidxs, didxs, None, None
202
- ),
203
- dims=col_dims,
204
- )
205
-
206
- elif len(cell_shape) == 1:
207
- pols, col_dims = get_col_1d_pols(cell_shape, dims, chan_cnt, pol_cnt, chunk)
208
- cshape = (ctlen, cblen) + (pols[1] - pols[0] + 1,)
209
- mvars[col] = xr.DataArray(
210
- load_col_chunk(tb_tool, col, cshape, tidxs, bidxs, didxs, pols, None),
211
- dims=col_dims,
212
- )
213
-
214
- elif len(cell_shape) == 2:
215
- chans, pols = get_col_2d_chans_pols(cell_shape, chan_cnt, pol_cnt, chunk)
216
- cshape = (ctlen, cblen) + (chans[1] - chans[0] + 1, pols[1] - pols[0] + 1)
217
- col_dims = dims
218
- mvars[col] = xr.DataArray(
219
- load_col_chunk(tb_tool, col, cshape, tidxs, bidxs, didxs, chans, pols),
220
- dims=col_dims,
221
- )
222
-
223
- return mvars
224
-
225
-
226
- def get_chunk_times(
227
- taql_pre: str, chunk: Dict[str, slice]
228
- ) -> Tuple[np.ndarray, Tuple[int, int]]:
229
- """
230
- Produces time col/axis related values for a chunk: unique times,
231
- start/stop times.
232
-
233
- Parameters
234
- ----------
235
- taql_pre : str
236
- TaQL query used for tb_tool, with some pre-selection
237
- of rows and columns.
238
- chunk : Dict[str, slice]
239
- specification of data chunk to load
240
-
241
- Returns
242
- -------
243
- Tuple[np.ndarray, Tuple[int, int]]
244
- array of unique times + (firsr, last) time in the chunk
245
- """
246
-
247
- taql_utimes = f"select DISTINCT TIME from $mtable {taql_pre}"
248
- with open_query(None, taql_utimes) as query_utimes:
249
- utimes = unique_1d(query_utimes.getcol("TIME", 0, -1))
250
- # add a tol around the time ranges returned by taql
251
- if len(utimes) < 2:
252
- tol = 1e-5
253
- else:
254
- tol = np.diff(utimes).min() / 4
255
-
256
- if "time" in chunk:
257
- time_slice = chunk["time"]
258
- if (
259
- type(time_slice.start) == pd.Timestamp
260
- and type(time_slice.stop) == pd.Timestamp
261
- ):
262
- times = (
263
- revert_time(time_slice.start) - tol,
264
- revert_time(time_slice.stop) + tol,
265
- )
266
- elif (
267
- int(time_slice.start) == time_slice.start
268
- and int(time_slice.stop) == time_slice.stop
269
- ):
270
- # could be operator.index(time_slice.start):
271
- nutimes = len(utimes)
272
- times = (
273
- min(nutimes, int(time_slice.start)),
274
- min(nutimes, int(time_slice.stop)) - 1,
275
- )
276
- else:
277
- raise ValueError(
278
- f"Invalid time type. Not a timestamp and Cannot use as"
279
- f" index: {time_slice.start} (type: {type(time_slice.start)})"
280
- )
281
- else:
282
- times = (utimes[0], utimes[-1])
283
-
284
- return utimes, times
285
-
286
-
287
- def get_chunk_baselines(
288
- tb_tool: tables.table, chunk: Dict[str, slice]
289
- ) -> Tuple[np.ndarray, Tuple[int, int]]:
290
- """
291
- Produces the basline col/axis related values for a chunk: an array of
292
- baselines and the start/stop baseline indices.
293
-
294
- Parameters
295
- ----------
296
- tb_tool : tables.table
297
- table/query opened with prev selections (time)
298
- chunk : Dict[str, slice]
299
- specification of data chunk to load
300
-
301
- Returns
302
- -------
303
- Tuple[np.ndarray, Tuple[int, int]]
304
- array of baselines + (first, last) baseline in the chunk
305
- """
306
- baselines = get_baselines(tb_tool)
307
-
308
- if "baseline" in chunk:
309
- baseline_chunk = chunk["baseline"]
310
- baseline_boundaries = (int(baseline_chunk.start), int(baseline_chunk.stop))
311
- else:
312
- baseline_boundaries = (baselines[0][0], baselines[-1][0] - 1)
313
-
314
- return baselines, baseline_boundaries
315
-
316
-
317
- def get_chunk_data_indices(
318
- taql_pre: str,
319
- chunk: Dict[str, slice],
320
- utimes: np.ndarray,
321
- times: Tuple[int, int],
322
- baselines: np.ndarray,
323
- blines: Tuple[int, int],
324
- ) -> Tuple[np.ndarray, np.ndarray, np.ndarray, str]:
325
- """
326
- Produces indices to pass to the casacore getcol(slice) functions to load
327
- the chunk of data. tidxs (time), bidxs (baseline), didxs (effective data
328
- indices, considering present/absent baselines).
329
-
330
- Time selection is added on top of that.
331
-
332
- Parameters
333
- ----------
334
- taql_pre : str
335
- TaQL query constraints to prepend/inject
336
- chunk : Dict[str, slice]
337
- specification of data chunk
338
- utimes : np.ndarray
339
- array of times in the chunk
340
- times : Tuple[int, int]
341
- start, stop time indices
342
- baselines : np.ndarray
343
- array of baselines inthe chunk
344
- blines : Tuple[int, int]
345
- start, stop baseline indices
346
-
347
- Returns
348
- -------
349
- Tuple[np.ndarray, np.ndarray, np.ndarray]
350
- indices along the time, baseline and data (time/baseline)
351
- axes + the full where... string defined for this chunk
352
-
353
- """
354
-
355
- taql_time = f"TIME BETWEEN {utimes[times[0]]} AND {utimes[times[1]]}"
356
- taql_ant = f"ANTENNA1 BETWEEN {blines[0]} and {blines[1]}"
357
- taql_where_chunk = f"{taql_pre} AND {taql_time} AND {taql_ant}"
358
- taql_chunk = f"select * from $mtable {taql_where_chunk}"
359
- with open_query(None, taql_chunk) as query_times_ants:
360
- logger.debug(
361
- f"Opened chunk query, with {query_times_ants.nrows()} rows. Query: {taql_chunk}"
362
- )
363
- tidxs = (
364
- np.searchsorted(utimes, query_times_ants.getcol("TIME", 0, -1)) - times[0]
365
- )
366
- ts_ant1, ts_ant2 = (
367
- query_times_ants.getcol("ANTENNA1", 0, -1),
368
- query_times_ants.getcol("ANTENNA2", 0, -1),
369
- )
370
-
371
- ts_bases = np.column_stack((ts_ant1, ts_ant2))
372
-
373
- bidxs = get_baseline_indices(baselines, ts_bases) - blines[0]
374
-
375
- # some antenna 2's will be out of bounds for this chunk, store rows that are in bounds
376
- didxs = np.where(
377
- (bidxs >= 0)
378
- & (bidxs < min(blines[1] - blines[0] + 1, len(baselines) - blines[0]))
379
- )[0]
380
-
381
- return tidxs, bidxs, didxs, taql_where_chunk
382
-
383
-
384
- def get_col_1d_pols(
385
- cell_shape: Tuple[int],
386
- dims: List[str],
387
- chan_cnt: int,
388
- pol_cnt: int,
389
- chunk: Dict[str, slice],
390
- ) -> Tuple[Tuple[int, int], List[str]]:
391
- """
392
- For a column with 1d array values, calculate the start/stop
393
- indices for the last dimension (either pol or freq).
394
- It also produces the adequate dimension names.
395
-
396
- Parameters
397
- ----------
398
- cell_shape : Tuple[int]
399
- shape of the column
400
- dims : List[str]
401
- full list of dataset dimensions
402
- chan_cnt : int
403
- number of channels
404
- pol_cnt : int
405
- number of pols
406
- chunk : Dict[str, slice]
407
- data chunk specification
408
-
409
- Returns
410
- -------
411
- Tuple[Tuple[int, int], List[str]]
412
- first and last pol/freq index of the chunk, and its
413
- dimension names
414
-
415
- """
416
- if cell_shape == chan_cnt:
417
- # chan/freq
418
- col_dims = dims[:2] + ["freq"]
419
- if "freq" in chunk:
420
- pols = (
421
- min(chan_cnt, chunk["freq"].start),
422
- min(chan_cnt, chunk["freq"].stop) - 1,
423
- )
424
- else:
425
- pols = (0, cell_shape[0])
426
- else:
427
- # pol
428
- col_dims = dims[:2] + ["pol"]
429
- if "pol" in chunk:
430
- pols = (
431
- min(pol_cnt, chunk["pol"].start),
432
- min(pol_cnt, chunk["pol"].stop) - 1,
433
- )
434
- else:
435
- pols = (0, cell_shape[0])
436
-
437
- return pols, col_dims
438
-
439
-
440
- def get_col_2d_chans_pols(
441
- cell_shape: Tuple[int],
442
- chan_cnt: int,
443
- pol_cnt: int,
444
- chunk: Dict[str, slice],
445
- ) -> Tuple[Tuple[int, int], Tuple[int, int]]:
446
- """
447
- For a column with 2d array values (FLAG, DATA, WEIGHT_SPECTRUM,
448
- etc., calculate the the start/stop indices for the last two
449
- dimensions of the chunk (freq and pol).
450
- The dimension names can be assumed to be the full list of dims in
451
- visibilities (time, baseline, freq, pol).
452
-
453
- Parameters
454
- ----------
455
- cell_shape : Tuple[int]
456
- shape of the column
457
- chan_cnt : int
458
- number of channels
459
- pol_cnt : int
460
- number of pols
461
- chunk : Dict[str, slice]
462
- data chunk specification
463
-
464
- Returns
465
- -------
466
- Tuple[Tuple[int, int], Tuple[int, int]]
467
- first and last index for freq (channel) and pol axes of
468
- the chunk
469
-
470
- """
471
- if "freq" in chunk:
472
- chans = (
473
- min(chan_cnt, chunk["freq"].start),
474
- min(chan_cnt, chunk["freq"].stop) - 1,
475
- )
476
- else:
477
- chans = (0, cell_shape[0])
478
-
479
- if "pol" in chunk:
480
- pols = (
481
- min(pol_cnt, chunk["pol"].start),
482
- min(pol_cnt, chunk["pol"].stop) - 1,
483
- )
484
- else:
485
- pols = (0, cell_shape[1])
486
-
487
- return chans, pols