xradio 0.0.56__py3-none-any.whl → 0.0.58__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. xradio/__init__.py +2 -2
  2. xradio/_utils/_casacore/casacore_from_casatools.py +12 -2
  3. xradio/_utils/_casacore/tables.py +1 -0
  4. xradio/_utils/coord_math.py +22 -23
  5. xradio/_utils/dict_helpers.py +76 -11
  6. xradio/_utils/schema.py +5 -2
  7. xradio/_utils/zarr/common.py +1 -73
  8. xradio/image/_util/_casacore/xds_from_casacore.py +49 -33
  9. xradio/image/_util/_casacore/xds_to_casacore.py +41 -14
  10. xradio/image/_util/_fits/xds_from_fits.py +146 -35
  11. xradio/image/_util/casacore.py +4 -3
  12. xradio/image/_util/common.py +4 -4
  13. xradio/image/_util/image_factory.py +8 -8
  14. xradio/image/image.py +45 -5
  15. xradio/measurement_set/__init__.py +19 -9
  16. xradio/measurement_set/_utils/__init__.py +1 -3
  17. xradio/measurement_set/_utils/_msv2/__init__.py +0 -0
  18. xradio/measurement_set/_utils/_msv2/_tables/read.py +17 -76
  19. xradio/measurement_set/_utils/_msv2/_tables/read_main_table.py +2 -685
  20. xradio/measurement_set/_utils/_msv2/conversion.py +123 -145
  21. xradio/measurement_set/_utils/_msv2/create_antenna_xds.py +9 -16
  22. xradio/measurement_set/_utils/_msv2/create_field_and_source_xds.py +125 -221
  23. xradio/measurement_set/_utils/_msv2/msv2_to_msv4_meta.py +1 -2
  24. xradio/measurement_set/_utils/_msv2/msv4_info_dicts.py +8 -7
  25. xradio/measurement_set/_utils/_msv2/msv4_sub_xdss.py +27 -72
  26. xradio/measurement_set/_utils/_msv2/partition_queries.py +1 -261
  27. xradio/measurement_set/_utils/_msv2/subtables.py +0 -107
  28. xradio/measurement_set/_utils/_utils/interpolate.py +60 -0
  29. xradio/measurement_set/_utils/_zarr/encoding.py +2 -7
  30. xradio/measurement_set/convert_msv2_to_processing_set.py +0 -2
  31. xradio/measurement_set/load_processing_set.py +2 -2
  32. xradio/measurement_set/measurement_set_xdt.py +14 -14
  33. xradio/measurement_set/open_processing_set.py +1 -3
  34. xradio/measurement_set/processing_set_xdt.py +41 -835
  35. xradio/measurement_set/schema.py +95 -122
  36. xradio/schema/check.py +91 -97
  37. xradio/schema/dataclass.py +159 -22
  38. xradio/schema/export.py +99 -0
  39. xradio/schema/metamodel.py +51 -16
  40. xradio/schema/typing.py +5 -5
  41. {xradio-0.0.56.dist-info → xradio-0.0.58.dist-info}/METADATA +2 -1
  42. xradio-0.0.58.dist-info/RECORD +65 -0
  43. {xradio-0.0.56.dist-info → xradio-0.0.58.dist-info}/WHEEL +1 -1
  44. xradio/image/_util/fits.py +0 -13
  45. xradio/measurement_set/_utils/_msv2/_tables/load.py +0 -66
  46. xradio/measurement_set/_utils/_msv2/_tables/load_main_table.py +0 -490
  47. xradio/measurement_set/_utils/_msv2/_tables/read_subtables.py +0 -398
  48. xradio/measurement_set/_utils/_msv2/_tables/write.py +0 -323
  49. xradio/measurement_set/_utils/_msv2/_tables/write_exp_api.py +0 -388
  50. xradio/measurement_set/_utils/_msv2/chunks.py +0 -115
  51. xradio/measurement_set/_utils/_msv2/descr.py +0 -165
  52. xradio/measurement_set/_utils/_msv2/msv2_msv3.py +0 -7
  53. xradio/measurement_set/_utils/_msv2/partitions.py +0 -392
  54. xradio/measurement_set/_utils/_utils/cds.py +0 -40
  55. xradio/measurement_set/_utils/_utils/xds_helper.py +0 -404
  56. xradio/measurement_set/_utils/_zarr/read.py +0 -263
  57. xradio/measurement_set/_utils/_zarr/write.py +0 -329
  58. xradio/measurement_set/_utils/msv2.py +0 -106
  59. xradio/measurement_set/_utils/zarr.py +0 -133
  60. xradio-0.0.56.dist-info/RECORD +0 -78
  61. {xradio-0.0.56.dist-info → xradio-0.0.58.dist-info}/licenses/LICENSE.txt +0 -0
  62. {xradio-0.0.56.dist-info → xradio-0.0.58.dist-info}/top_level.txt +0 -0
@@ -1,490 +0,0 @@
1
- import toolviper.utils.logger as logger
2
- from typing import Dict, List, Tuple, Union
3
-
4
- import pandas as pd
5
- import numpy as np
6
- import xarray as xr
7
-
8
- try:
9
- from casacore import tables
10
- except ImportError:
11
- import xradio._utils._casacore.casacore_from_casatools as tables
12
-
13
- from .load import load_col_chunk
14
- from .read_main_table import get_partition_ids, redim_id_data_vars, rename_vars
15
- from .read import add_units_measures, convert_casacore_time, extract_table_attributes
16
- from .write import revert_time
17
- from .table_query import open_query, open_table_ro
18
- from xradio.measurement_set._utils._ms._tables.read_main_table import (
19
- get_baselines,
20
- get_baseline_indices,
21
- )
22
- from xradio._utils.list_and_array import unique_1d
23
-
24
-
25
- def load_expanded_main_table_chunk(
26
- infile: str,
27
- ddi: int,
28
- chunk: Dict[str, slice],
29
- ignore_msv2_cols: Union[list, None] = None,
30
- ) -> xr.Dataset:
31
- """
32
- Load a chunk of data from main table into memory, with expanded
33
- dims: (time, baseline, freq, pols)
34
-
35
- Parameters
36
- ----------
37
- infile : str
38
- Input MS path
39
- ddi : int
40
- DDI to load chunk from
41
- chunk : Dict[str, slice]
42
- specification of chunk to load
43
- ignore_msv2_cols : Union[list, None] (Default value = None)
44
- cols that should not be loaded (deprecated MSv2 or similar)
45
-
46
- Returns
47
- -------
48
- xr.Dataset
49
- Xarray datasets with chunk of visibility data, one per DDI (spw_id, pol_setup_id) pair
50
- """
51
-
52
- taql_where = f"where DATA_DESC_ID = {ddi}"
53
- taql_ddi = f"select * from $mtable {taql_where}"
54
-
55
- with open_table_ro(infile) as mtable:
56
- with open_query(mtable, taql_ddi) as tb_tool:
57
- if tb_tool.nrows() == 0:
58
- return xr.Dataset()
59
-
60
- xds, part_ids, attrs = load_expanded_ddi_chunk(
61
- infile, tb_tool, taql_where, chunk, ignore_msv2_cols
62
- )
63
-
64
- return xds, part_ids, attrs
65
-
66
-
67
- def load_expanded_ddi_chunk(
68
- infile: str,
69
- tb_tool: tables.table,
70
- taql_pre: str,
71
- chunk: Dict[str, slice],
72
- ignore_msv2_cols: Union[list, None] = None,
73
- ) -> xr.Dataset:
74
- """
75
- Helper function to effectively load the chunk and produce an
76
- xr.Dataset from a DII once the table and initial query(ies) have
77
- been opened successfully.
78
-
79
- Parameters
80
- ----------
81
- infile : str
82
- Input MS path
83
- tb_tool : tables.table
84
- table query contrained to one DDI and chunk time range
85
- taql_pre : str
86
- TaQL query used for tb_tool, with some pre-selection of rows and columns
87
- chunk : Dict[str, slice]
88
- specification of data chunk to load
89
- ignore_msv2_cols : Union[list, None] (Default value = None)
90
- propagated from calling funtions
91
-
92
- Returns
93
- -------
94
- xr.Dataset
95
- An Xarray dataset with data variables as plain numpy
96
- arrays loaded directly from the MS columns
97
- """
98
-
99
- # read the specified chunk of data, figure out indices and lens
100
- utimes, times = get_chunk_times(taql_pre, chunk)
101
- baselines, blines = get_chunk_baselines(tb_tool, chunk)
102
- tidxs, bidxs, didxs, taql_where_chunk = get_chunk_data_indices(
103
- taql_pre, chunk, utimes, times, baselines, blines
104
- )
105
-
106
- ctlen = min(len(utimes), times[1] - times[0] + 1)
107
- cblen = min(len(baselines), blines[1] - blines[0] + 1)
108
- mvars = load_ddi_cols_chunk(
109
- ctlen, cblen, tidxs, bidxs, didxs, tb_tool, chunk, ignore_msv2_cols
110
- )
111
-
112
- mcoords = {
113
- "time": xr.DataArray(convert_casacore_time(utimes[:ctlen]), dims=["time"]),
114
- "baseline": xr.DataArray(np.arange(cblen), dims=["baseline"]),
115
- }
116
-
117
- # add xds global attributes
118
- cc_attrs = extract_table_attributes(infile)
119
- attrs = {"other": {"msv2": {"ctds_attrs": cc_attrs, "bad_cols": ignore_msv2_cols}}}
120
- # add per data var attributes
121
- mvars = add_units_measures(mvars, cc_attrs)
122
- mcoords = add_units_measures(mcoords, cc_attrs)
123
-
124
- mvars = rename_vars(mvars)
125
- mvars = redim_id_data_vars(mvars)
126
- xds = xr.Dataset(mvars, coords=mcoords)
127
-
128
- part_ids = get_partition_ids(tb_tool, taql_where_chunk)
129
-
130
- # needs an ~equivalent to add_partition_attrs?
131
- return xds, part_ids, attrs
132
-
133
-
134
- def load_ddi_cols_chunk(
135
- ctlen: int,
136
- cblen: int,
137
- tidxs: np.ndarray,
138
- bidxs: np.ndarray,
139
- didxs: np.ndarray,
140
- tb_tool: tables.table,
141
- chunk: Dict[str, slice],
142
- ignore_msv2_cols: Union[list, None] = None,
143
- ) -> Dict[str, np.ndarray]:
144
- """
145
- For a given chunk and DDI, load all the MSv2 columns
146
-
147
- Parameters
148
- ----------
149
- ctlen : int
150
- length of the time axis/dim of the chunk
151
- cblen : int
152
- length of the baseline axis of the chunk
153
- tidxs : np.ndarray
154
- time axis indices
155
- bidxs : np.ndarray
156
- baseline axis indices
157
- didxs : np.ndarray
158
- (effective) data indices, excluding missing baselines
159
- tb_tool : tables.table
160
- a table/TaQL query open and being used to load columns
161
- chunk : Dict[str, slice]
162
- data chunk specification
163
- ignore_msv2_cols : Union[list, None] (Default value = None)
164
- propagated from calling funtions
165
-
166
- Returns
167
- -------
168
- Dict[str, np.ndarray]
169
- columns loaded into memory as np arrays
170
-
171
- """
172
- cols = tb_tool.colnames()
173
-
174
- cshapes = [
175
- np.array(tb_tool.getcell(col, 0)).shape
176
- for col in cols
177
- if tb_tool.iscelldefined(col, 0)
178
- ]
179
- # Assumes shapes are consistent across columns - MSv2
180
- chan_cnt, pol_cnt = [(csh[0], csh[1]) for csh in cshapes if len(csh) == 2][0]
181
-
182
- dims = ["time", "baseline", "freq", "pol"]
183
- mvars = {}
184
- # loop over each column and load data
185
- for col in cols:
186
- if (col in ignore_msv2_cols + ["TIME"]) or not tb_tool.iscelldefined(col, 0):
187
- continue
188
-
189
- cdata = tb_tool.getcol(col, 0, 1)[0]
190
- cell_shape = cdata.shape
191
- if len(cell_shape) == 0:
192
- col_dims = dims[:2]
193
- mvars[col] = xr.DataArray(
194
- load_col_chunk(
195
- tb_tool, col, (ctlen, cblen), tidxs, bidxs, didxs, None, None
196
- ),
197
- dims=col_dims,
198
- )
199
-
200
- elif col == "UVW":
201
- col_dims = dims[:2] + ["uvw_coords"]
202
- mvars[col] = xr.DataArray(
203
- load_col_chunk(
204
- tb_tool, col, (ctlen, cblen, 3), tidxs, bidxs, didxs, None, None
205
- ),
206
- dims=col_dims,
207
- )
208
-
209
- elif len(cell_shape) == 1:
210
- pols, col_dims = get_col_1d_pols(cell_shape, dims, chan_cnt, pol_cnt, chunk)
211
- cshape = (ctlen, cblen) + (pols[1] - pols[0] + 1,)
212
- mvars[col] = xr.DataArray(
213
- load_col_chunk(tb_tool, col, cshape, tidxs, bidxs, didxs, pols, None),
214
- dims=col_dims,
215
- )
216
-
217
- elif len(cell_shape) == 2:
218
- chans, pols = get_col_2d_chans_pols(cell_shape, chan_cnt, pol_cnt, chunk)
219
- cshape = (ctlen, cblen) + (chans[1] - chans[0] + 1, pols[1] - pols[0] + 1)
220
- col_dims = dims
221
- mvars[col] = xr.DataArray(
222
- load_col_chunk(tb_tool, col, cshape, tidxs, bidxs, didxs, chans, pols),
223
- dims=col_dims,
224
- )
225
-
226
- return mvars
227
-
228
-
229
- def get_chunk_times(
230
- taql_pre: str, chunk: Dict[str, slice]
231
- ) -> Tuple[np.ndarray, Tuple[int, int]]:
232
- """
233
- Produces time col/axis related values for a chunk: unique times,
234
- start/stop times.
235
-
236
- Parameters
237
- ----------
238
- taql_pre : str
239
- TaQL query used for tb_tool, with some pre-selection
240
- of rows and columns.
241
- chunk : Dict[str, slice]
242
- specification of data chunk to load
243
-
244
- Returns
245
- -------
246
- Tuple[np.ndarray, Tuple[int, int]]
247
- array of unique times + (firsr, last) time in the chunk
248
- """
249
-
250
- taql_utimes = f"select DISTINCT TIME from $mtable {taql_pre}"
251
- with open_query(None, taql_utimes) as query_utimes:
252
- utimes = unique_1d(query_utimes.getcol("TIME", 0, -1))
253
- # add a tol around the time ranges returned by taql
254
- if len(utimes) < 2:
255
- tol = 1e-5
256
- else:
257
- tol = np.diff(utimes).min() / 4
258
-
259
- if "time" in chunk:
260
- time_slice = chunk["time"]
261
- if (
262
- type(time_slice.start) == pd.Timestamp
263
- and type(time_slice.stop) == pd.Timestamp
264
- ):
265
- times = (
266
- revert_time(time_slice.start) - tol,
267
- revert_time(time_slice.stop) + tol,
268
- )
269
- elif (
270
- int(time_slice.start) == time_slice.start
271
- and int(time_slice.stop) == time_slice.stop
272
- ):
273
- # could be operator.index(time_slice.start):
274
- nutimes = len(utimes)
275
- times = (
276
- min(nutimes, int(time_slice.start)),
277
- min(nutimes, int(time_slice.stop)) - 1,
278
- )
279
- else:
280
- raise ValueError(
281
- f"Invalid time type. Not a timestamp and Cannot use as"
282
- f" index: {time_slice.start} (type: {type(time_slice.start)})"
283
- )
284
- else:
285
- times = (utimes[0], utimes[-1])
286
-
287
- return utimes, times
288
-
289
-
290
- def get_chunk_baselines(
291
- tb_tool: tables.table, chunk: Dict[str, slice]
292
- ) -> Tuple[np.ndarray, Tuple[int, int]]:
293
- """
294
- Produces the basline col/axis related values for a chunk: an array of
295
- baselines and the start/stop baseline indices.
296
-
297
- Parameters
298
- ----------
299
- tb_tool : tables.table
300
- table/query opened with prev selections (time)
301
- chunk : Dict[str, slice]
302
- specification of data chunk to load
303
-
304
- Returns
305
- -------
306
- Tuple[np.ndarray, Tuple[int, int]]
307
- array of baselines + (first, last) baseline in the chunk
308
- """
309
- baselines = get_baselines(tb_tool)
310
-
311
- if "baseline" in chunk:
312
- baseline_chunk = chunk["baseline"]
313
- baseline_boundaries = (int(baseline_chunk.start), int(baseline_chunk.stop))
314
- else:
315
- baseline_boundaries = (baselines[0][0], baselines[-1][0] - 1)
316
-
317
- return baselines, baseline_boundaries
318
-
319
-
320
- def get_chunk_data_indices(
321
- taql_pre: str,
322
- chunk: Dict[str, slice],
323
- utimes: np.ndarray,
324
- times: Tuple[int, int],
325
- baselines: np.ndarray,
326
- blines: Tuple[int, int],
327
- ) -> Tuple[np.ndarray, np.ndarray, np.ndarray, str]:
328
- """
329
- Produces indices to pass to the casacore getcol(slice) functions to load
330
- the chunk of data. tidxs (time), bidxs (baseline), didxs (effective data
331
- indices, considering present/absent baselines).
332
-
333
- Time selection is added on top of that.
334
-
335
- Parameters
336
- ----------
337
- taql_pre : str
338
- TaQL query constraints to prepend/inject
339
- chunk : Dict[str, slice]
340
- specification of data chunk
341
- utimes : np.ndarray
342
- array of times in the chunk
343
- times : Tuple[int, int]
344
- start, stop time indices
345
- baselines : np.ndarray
346
- array of baselines inthe chunk
347
- blines : Tuple[int, int]
348
- start, stop baseline indices
349
-
350
- Returns
351
- -------
352
- Tuple[np.ndarray, np.ndarray, np.ndarray]
353
- indices along the time, baseline and data (time/baseline)
354
- axes + the full where... string defined for this chunk
355
-
356
- """
357
-
358
- taql_time = f"TIME BETWEEN {utimes[times[0]]} AND {utimes[times[1]]}"
359
- taql_ant = f"ANTENNA1 BETWEEN {blines[0]} and {blines[1]}"
360
- taql_where_chunk = f"{taql_pre} AND {taql_time} AND {taql_ant}"
361
- taql_chunk = f"select * from $mtable {taql_where_chunk}"
362
- with open_query(None, taql_chunk) as query_times_ants:
363
- logger.debug(
364
- f"Opened chunk query, with {query_times_ants.nrows()} rows. Query: {taql_chunk}"
365
- )
366
- tidxs = (
367
- np.searchsorted(utimes, query_times_ants.getcol("TIME", 0, -1)) - times[0]
368
- )
369
- ts_ant1, ts_ant2 = (
370
- query_times_ants.getcol("ANTENNA1", 0, -1),
371
- query_times_ants.getcol("ANTENNA2", 0, -1),
372
- )
373
-
374
- ts_bases = np.column_stack((ts_ant1, ts_ant2))
375
-
376
- bidxs = get_baseline_indices(baselines, ts_bases) - blines[0]
377
-
378
- # some antenna 2's will be out of bounds for this chunk, store rows that are in bounds
379
- didxs = np.where(
380
- (bidxs >= 0)
381
- & (bidxs < min(blines[1] - blines[0] + 1, len(baselines) - blines[0]))
382
- )[0]
383
-
384
- return tidxs, bidxs, didxs, taql_where_chunk
385
-
386
-
387
- def get_col_1d_pols(
388
- cell_shape: Tuple[int],
389
- dims: List[str],
390
- chan_cnt: int,
391
- pol_cnt: int,
392
- chunk: Dict[str, slice],
393
- ) -> Tuple[Tuple[int, int], List[str]]:
394
- """
395
- For a column with 1d array values, calculate the start/stop
396
- indices for the last dimension (either pol or freq).
397
- It also produces the adequate dimension names.
398
-
399
- Parameters
400
- ----------
401
- cell_shape : Tuple[int]
402
- shape of the column
403
- dims : List[str]
404
- full list of dataset dimensions
405
- chan_cnt : int
406
- number of channels
407
- pol_cnt : int
408
- number of pols
409
- chunk : Dict[str, slice]
410
- data chunk specification
411
-
412
- Returns
413
- -------
414
- Tuple[Tuple[int, int], List[str]]
415
- first and last pol/freq index of the chunk, and its
416
- dimension names
417
-
418
- """
419
- if cell_shape == chan_cnt:
420
- # chan/freq
421
- col_dims = dims[:2] + ["freq"]
422
- if "freq" in chunk:
423
- pols = (
424
- min(chan_cnt, chunk["freq"].start),
425
- min(chan_cnt, chunk["freq"].stop) - 1,
426
- )
427
- else:
428
- pols = (0, cell_shape[0])
429
- else:
430
- # pol
431
- col_dims = dims[:2] + ["pol"]
432
- if "pol" in chunk:
433
- pols = (
434
- min(pol_cnt, chunk["pol"].start),
435
- min(pol_cnt, chunk["pol"].stop) - 1,
436
- )
437
- else:
438
- pols = (0, cell_shape[0])
439
-
440
- return pols, col_dims
441
-
442
-
443
- def get_col_2d_chans_pols(
444
- cell_shape: Tuple[int],
445
- chan_cnt: int,
446
- pol_cnt: int,
447
- chunk: Dict[str, slice],
448
- ) -> Tuple[Tuple[int, int], Tuple[int, int]]:
449
- """
450
- For a column with 2d array values (FLAG, DATA, WEIGHT_SPECTRUM,
451
- etc., calculate the the start/stop indices for the last two
452
- dimensions of the chunk (freq and pol).
453
- The dimension names can be assumed to be the full list of dims in
454
- visibilities (time, baseline, freq, pol).
455
-
456
- Parameters
457
- ----------
458
- cell_shape : Tuple[int]
459
- shape of the column
460
- chan_cnt : int
461
- number of channels
462
- pol_cnt : int
463
- number of pols
464
- chunk : Dict[str, slice]
465
- data chunk specification
466
-
467
- Returns
468
- -------
469
- Tuple[Tuple[int, int], Tuple[int, int]]
470
- first and last index for freq (channel) and pol axes of
471
- the chunk
472
-
473
- """
474
- if "freq" in chunk:
475
- chans = (
476
- min(chan_cnt, chunk["freq"].start),
477
- min(chan_cnt, chunk["freq"].stop) - 1,
478
- )
479
- else:
480
- chans = (0, cell_shape[0])
481
-
482
- if "pol" in chunk:
483
- pols = (
484
- min(pol_cnt, chunk["pol"].start),
485
- min(pol_cnt, chunk["pol"].stop) - 1,
486
- )
487
- else:
488
- pols = (0, cell_shape[1])
489
-
490
- return chans, pols