xradio 0.0.55__py3-none-any.whl → 0.0.58__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. xradio/__init__.py +2 -2
  2. xradio/_utils/_casacore/casacore_from_casatools.py +1001 -0
  3. xradio/_utils/_casacore/tables.py +6 -1
  4. xradio/_utils/coord_math.py +22 -23
  5. xradio/_utils/dict_helpers.py +76 -11
  6. xradio/_utils/schema.py +5 -2
  7. xradio/_utils/zarr/common.py +1 -73
  8. xradio/image/_util/_casacore/common.py +11 -3
  9. xradio/image/_util/_casacore/xds_from_casacore.py +59 -35
  10. xradio/image/_util/_casacore/xds_to_casacore.py +47 -16
  11. xradio/image/_util/_fits/xds_from_fits.py +172 -77
  12. xradio/image/_util/casacore.py +9 -4
  13. xradio/image/_util/common.py +4 -4
  14. xradio/image/_util/image_factory.py +8 -8
  15. xradio/image/image.py +45 -5
  16. xradio/measurement_set/__init__.py +19 -9
  17. xradio/measurement_set/_utils/__init__.py +1 -3
  18. xradio/measurement_set/_utils/_msv2/__init__.py +0 -0
  19. xradio/measurement_set/_utils/_msv2/_tables/read.py +35 -90
  20. xradio/measurement_set/_utils/_msv2/_tables/read_main_table.py +6 -686
  21. xradio/measurement_set/_utils/_msv2/_tables/table_query.py +13 -3
  22. xradio/measurement_set/_utils/_msv2/conversion.py +129 -145
  23. xradio/measurement_set/_utils/_msv2/create_antenna_xds.py +9 -16
  24. xradio/measurement_set/_utils/_msv2/create_field_and_source_xds.py +125 -221
  25. xradio/measurement_set/_utils/_msv2/msv2_to_msv4_meta.py +1 -2
  26. xradio/measurement_set/_utils/_msv2/msv4_info_dicts.py +13 -8
  27. xradio/measurement_set/_utils/_msv2/msv4_sub_xdss.py +27 -72
  28. xradio/measurement_set/_utils/_msv2/partition_queries.py +5 -262
  29. xradio/measurement_set/_utils/_msv2/subtables.py +0 -107
  30. xradio/measurement_set/_utils/_utils/interpolate.py +60 -0
  31. xradio/measurement_set/_utils/_zarr/encoding.py +2 -7
  32. xradio/measurement_set/convert_msv2_to_processing_set.py +0 -2
  33. xradio/measurement_set/load_processing_set.py +2 -2
  34. xradio/measurement_set/measurement_set_xdt.py +14 -14
  35. xradio/measurement_set/open_processing_set.py +1 -3
  36. xradio/measurement_set/processing_set_xdt.py +41 -835
  37. xradio/measurement_set/schema.py +96 -123
  38. xradio/schema/check.py +91 -97
  39. xradio/schema/dataclass.py +159 -22
  40. xradio/schema/export.py +99 -0
  41. xradio/schema/metamodel.py +51 -16
  42. xradio/schema/typing.py +5 -5
  43. {xradio-0.0.55.dist-info → xradio-0.0.58.dist-info}/METADATA +43 -11
  44. xradio-0.0.58.dist-info/RECORD +65 -0
  45. {xradio-0.0.55.dist-info → xradio-0.0.58.dist-info}/WHEEL +1 -1
  46. xradio/image/_util/fits.py +0 -13
  47. xradio/measurement_set/_utils/_msv2/_tables/load.py +0 -63
  48. xradio/measurement_set/_utils/_msv2/_tables/load_main_table.py +0 -487
  49. xradio/measurement_set/_utils/_msv2/_tables/read_subtables.py +0 -395
  50. xradio/measurement_set/_utils/_msv2/_tables/write.py +0 -320
  51. xradio/measurement_set/_utils/_msv2/_tables/write_exp_api.py +0 -385
  52. xradio/measurement_set/_utils/_msv2/chunks.py +0 -115
  53. xradio/measurement_set/_utils/_msv2/descr.py +0 -165
  54. xradio/measurement_set/_utils/_msv2/msv2_msv3.py +0 -7
  55. xradio/measurement_set/_utils/_msv2/partitions.py +0 -392
  56. xradio/measurement_set/_utils/_utils/cds.py +0 -40
  57. xradio/measurement_set/_utils/_utils/xds_helper.py +0 -404
  58. xradio/measurement_set/_utils/_zarr/read.py +0 -263
  59. xradio/measurement_set/_utils/_zarr/write.py +0 -329
  60. xradio/measurement_set/_utils/msv2.py +0 -106
  61. xradio/measurement_set/_utils/zarr.py +0 -133
  62. xradio-0.0.55.dist-info/RECORD +0 -77
  63. {xradio-0.0.55.dist-info → xradio-0.0.58.dist-info}/licenses/LICENSE.txt +0 -0
  64. {xradio-0.0.55.dist-info → xradio-0.0.58.dist-info}/top_level.txt +0 -0
@@ -1,404 +0,0 @@
1
- from importlib.metadata import version
2
- import toolviper.utils.logger as logger, multiprocessing, psutil
3
- from typing import Any, Dict, List, Tuple, Union
4
-
5
- import numpy as np
6
- import xarray as xr
7
-
8
- from .cds import CASAVisSet
9
- from .stokes_types import stokes_types
10
- from xradio._utils.list_and_array import get_pad_value
11
-
12
-
13
- def make_coords(
14
- xds: xr.Dataset, ddi: int, subtables: Tuple[xr.Dataset, ...]
15
- ) -> Dict[str, np.ndarray]:
16
- """
17
- Make the coords to be added to a partition or chunk (besides
18
- the time, baseline) basic structure
19
-
20
- Grabs:
21
- - channel (center) frequency values from the spw subtable
22
- - pol idxs from the pol+ddi subtables -> pol names via the stokes_types
23
- - antenna IDs from antenna subtable
24
-
25
- Parameters
26
- ----------
27
- xds : xr.Dataset
28
-
29
- ddi : int
30
-
31
- subtables: Tuple[xr.Dataset, ...]
32
-
33
-
34
- Returns
35
- -------
36
- Dict[str, np.ndarray]
37
- """
38
- ant_xds, ddi_xds, spw_xds, pol_xds = subtables
39
- freq = spw_xds.CHAN_FREQ.values[
40
- ddi_xds.SPECTRAL_WINDOW_ID.values[ddi], : xds.freq.shape[0]
41
- ]
42
- pol_ids = pol_xds.CORR_TYPE.values[
43
- ddi_xds.POLARIZATION_ID.values[ddi], : xds.pol.shape[0]
44
- ]
45
- pol_names = np.vectorize(stokes_types.get)(pol_ids)
46
- ant_id = ant_xds.antenna_id.values
47
- coords = {
48
- "freq": freq,
49
- "pol": pol_names,
50
- "antenna_id": ant_id,
51
- # These will be metainfo in partitions
52
- # "spw_id": [ddi_xds["spectral_window_id"].values[ddi]],
53
- # "pol_id": [ddi_xds["polarization_id"].values[ddi]],
54
- }
55
- return coords
56
-
57
-
58
- def vis_xds_packager_cds(
59
- subtables: List[Tuple[str, xr.Dataset]],
60
- partitions: Dict[Any, xr.Dataset],
61
- descr_add: str = "",
62
- ) -> CASAVisSet:
63
- """
64
- Takes a a list of subtable xds datasets and a dictionary of data
65
- partition xds datasets and and packages them as a CASA vis dataset
66
- (cds)
67
-
68
- Parameters
69
- ----------
70
- partitions : List[Tuple[str, xr.Dataset]]
71
- data partiions as xds datasets
72
- subtables : Dict[Any, xr.Dataset]
73
- subtables as xds datasets
74
- descr_add : str (Default value = "")
75
- substring to add to the short descr string of the cds
76
-
77
- Returns
78
- -------
79
- CASAVisSet
80
- A "cds" - container for the metainfo subtables and data partitions
81
- """
82
- vers = version("xradio")
83
-
84
- return CASAVisSet(
85
- subtables,
86
- partitions,
87
- f"CASA vis set produced by xradio {vers}/{descr_add}",
88
- )
89
-
90
-
91
- def vis_xds_packager_mxds(
92
- partitions: Dict[Any, xr.Dataset],
93
- subtables: List[Tuple[str, xr.Dataset]],
94
- add_global_coords: bool = True,
95
- ) -> xr.Dataset:
96
- """
97
- Takes a dictionary of data partition xds datasets and a list of
98
- subtable xds datasets and packages them as a dataset of datasets
99
- (mxds)
100
-
101
- Parameters
102
- ----------
103
- partitions : Dict[Any, xr.Dataset]
104
- data partiions as xds datasets
105
- subtables : List[Tuple[str, xr.Dataset]]
106
- subtables as xds datasets
107
- :add_global_coords: whether to add coords to the output mxds
108
- add_global_coords: bool (Default value = True)
109
-
110
- Returns
111
- -------
112
- xr.Dataset
113
- A "mxds" - xr.dataset of datasets
114
- """
115
- mxds = xr.Dataset(attrs={"metainfo": subtables, "partitions": partitions})
116
-
117
- if add_global_coords:
118
- mxds = mxds.assign_coords(make_global_coords(mxds))
119
-
120
- return mxds
121
-
122
-
123
- def make_global_coords(mxds: xr.Dataset) -> Dict[str, xr.DataArray]:
124
- coords = {}
125
- metainfo = mxds.attrs["metainfo"]
126
- if "antenna" in metainfo:
127
- coords["antenna_ids"] = metainfo["antenna"].antenna_id.values
128
- coords["antennas"] = xr.DataArray(
129
- metainfo["antenna"].NAME.values, dims=["antenna_ids"]
130
- )
131
- if "field" in metainfo:
132
- coords["field_ids"] = metainfo["field"].field_id.values
133
- coords["fields"] = xr.DataArray(
134
- metainfo["field"].NAME.values, dims=["field_ids"]
135
- )
136
- if "feed" in mxds.attrs:
137
- coords["feed_ids"] = metainfo["feed"].FEED_ID.values
138
- if "observation" in metainfo:
139
- coords["observation_ids"] = metainfo["observation"].observation_id.values
140
- coords["observations"] = xr.DataArray(
141
- metainfo["observation"].PROJECT.values, dims=["observation_ids"]
142
- )
143
- if "polarization" in metainfo:
144
- coords["polarization_ids"] = metainfo["polarization"].pol_setup_id.values
145
- if "source" in metainfo:
146
- coords["source_ids"] = metainfo["source"].SOURCE_ID.values
147
- coords["sources"] = xr.DataArray(
148
- metainfo["source"].NAME.values, dims=["source_ids"]
149
- )
150
- if "spectral_window" in metainfo:
151
- coords["spw_ids"] = metainfo["spectral_window"].spw_id.values
152
- if "state" in metainfo:
153
- coords["state_ids"] = metainfo["state"].STATE_ID.values
154
-
155
- return coords
156
-
157
-
158
- def expand_xds(xds: xr.Dataset) -> xr.Dataset:
159
- """
160
- expand single (row) dimension of xds to (time, baseline)
161
-
162
- Parameters
163
- ----------
164
- xds : xr.Dataset
165
- "flat" dataset (with row dimension - without (time, baseline) dimensions)
166
-
167
- Returns
168
- -------
169
- xr.Dataset
170
- expanded dataset, with (time, baseline) dimensions
171
- """
172
- assert "baseline" not in xds.coords
173
-
174
- txds = xds.copy()
175
-
176
- unique_baselines, baselines = np.unique(
177
- [txds.baseline_ant1_id.values, txds.baseline_ant2_id.values],
178
- axis=1,
179
- return_inverse=True,
180
- )
181
-
182
- txds["baseline"] = xr.DataArray(baselines.astype("int32"), dims=["row"])
183
-
184
- try:
185
- txds = (
186
- txds.set_index(row=["time", "baseline"])
187
- .unstack("row")
188
- .transpose("time", "baseline", ...)
189
- )
190
- # unstack changes type to float when it needs to introduce NaNs, so
191
- # we need to reset to the proper type. Avoid if possible, as the
192
- # astype are costly
193
- for dv in txds.data_vars:
194
- if txds[dv].dtype != xds[dv].dtype:
195
- txds[dv] = txds[dv].astype(xds[dv].dtype)
196
- except Exception as exc:
197
- logger.warning(
198
- f"WARNING: Cannot expand rows to (time, baseline), "
199
- f"possibly duplicate values in (time, baseline). Exception: {exc}."
200
- f"\nDataset: {txds=}"
201
- )
202
- txds = xds.copy()
203
-
204
- return txds
205
-
206
-
207
- def flatten_xds(xds: xr.Dataset) -> xr.Dataset:
208
- """
209
- flatten (time, baseline) dimensions of xds back to single dimension (row)
210
-
211
- Parameters
212
- ----------
213
- xds : xr.Dataset
214
-
215
-
216
- Returns
217
- -------
218
- xr.Dataset
219
- Dataset in flat form (back to 'row' dimension as read by casacore tables)
220
- """
221
- txds = xds.copy()
222
-
223
- # flatten the time x baseline dimensions of main table
224
- if ("time" in xds.sizes) and ("baseline" in xds.sizes):
225
- txds = xds.stack({"row": ("time", "baseline")}).transpose("row", ...)
226
- # compute for issue https://github.com/hainegroup/oceanspy/issues/332
227
- # drop=True silently does compute (or at least used to)
228
-
229
- fill_value_int32 = get_pad_value(np.int32)
230
- txds = txds.where(
231
- (
232
- (txds.STATE_ID != fill_value_int32)
233
- & (txds.FIELD_ID != fill_value_int32)
234
- ).compute(),
235
- drop=True,
236
- ) # .unify_chunks()
237
-
238
- # re-assigning (implicitly dropping index coords) one by one produces
239
- # DeprecationWarnings: https://github.com/pydata/xarray/issues/6505
240
- astyped_data_vars = dict(xds.data_vars)
241
- for dv in list(txds.data_vars):
242
- if txds[dv].dtype != xds[dv].dtype:
243
- astyped_data_vars[dv] = txds[dv].astype(xds[dv].dtype)
244
- else:
245
- astyped_data_vars[dv] = txds[dv]
246
-
247
- flat_xds = xr.Dataset(astyped_data_vars, coords=txds.coords, attrs=txds.attrs)
248
- flat_xds = flat_xds.reset_index(["time", "baseline"])
249
-
250
- else:
251
- flat_xds = txds
252
-
253
- return flat_xds
254
-
255
-
256
- ####################################
257
- # xautomatically compute best data chunking
258
- def optimal_chunking(
259
- ndim: Union[int, None] = None,
260
- didxs: Union[Tuple[int], List[int], None] = None,
261
- chunk_size: str = "auto",
262
- data_shape: Union[tuple, None] = None,
263
- ) -> tuple:
264
- """
265
- Determine the optimal chunk shape for reading an MS or Image based
266
- on machine resources and intended operations
267
-
268
- Parameters
269
- ----------
270
- ndim : Union[int, None] = None
271
- number of dimensions to chunk. An MS is 3, an
272
- expanded MS is 4. An image could be anywhere from 2 to 5. Not
273
- needed if data_shape is given.
274
- didxs : Union[Tuple[int], List[int], None] = None
275
- dimension indices over which subsequent operations
276
- will be performed. Values should be less than ndim. Tries to
277
- reduce inter-process communication of data contents. Needs to
278
- know the shape to do this well. Default None balances chunk size
279
- across all dimensions.
280
- chunk_size : str (Default value = "auto")
281
- target chunk size ('large', 'small', 'auto').
282
- Default 'auto' tries to guess by looking at CPU core count and
283
- available memory.
284
- data_shape : Union[tuple, None] = None
285
- shape of the total MS DDI or Image data. Helps
286
- to know. Default None does not optimize based on shape
287
-
288
- Returns
289
- -------
290
- tuple
291
- optimal chunking for reading the ms (row, chan, pol)
292
- """
293
- assert (ndim is not None) or (
294
- data_shape is not None
295
- ), "either ndim or data_shape must be given"
296
- assert chunk_size in ["large", "small", "auto"], "invalid chunk_size parameter"
297
- if ndim is None:
298
- ndim = len(data_shape)
299
-
300
- opt_dims = (
301
- didxs if (didxs is not None) and (len(didxs) > 0) else np.arange(ndim)
302
- ) # maximize these dim chunk sizes
303
- nonopt_dims = np.setdiff1d(np.arange(ndim), opt_dims) # at the expense of these
304
-
305
- max_chunk_sizes = (
306
- data_shape
307
- if data_shape is not None
308
- else [dd for ii, dd in enumerate([10000, 10000, 10000, 4, 10]) if ii < ndim]
309
- )
310
- min_chunk_sizes = (
311
- np.ceil(np.array(data_shape) / 80).astype(int)
312
- if data_shape is not None
313
- else (
314
- [1000, 1, 1]
315
- if ndim == 3
316
- else [dd for ii, dd in enumerate([10, 10, 1, 1, 1]) if ii < ndim]
317
- )
318
- )
319
- target_size = 175 * 1024**2 / 8 # ~175 MB chunk worst case with 8-byte DATA column
320
- bytes_per_core = int(
321
- round(
322
- ((psutil.virtual_memory().available * 0.10) / multiprocessing.cpu_count())
323
- )
324
- )
325
- if data_shape is not None:
326
- bytes_per_core = min(
327
- bytes_per_core, np.prod(data_shape) * 8 / 2
328
- ) # ensure at least two chunks
329
- if chunk_size == "large":
330
- target_size = target_size * 6 # ~1 GB
331
- if chunk_size == "auto":
332
- target_size = max(min(target_size * 6, bytes_per_core / 8), target_size)
333
-
334
- # start by setting the optimized dims to their max size and non-optimized dims to their min size
335
- chunks = np.zeros((ndim), dtype="int")
336
- chunks[opt_dims] = np.array(max_chunk_sizes)[opt_dims]
337
- chunks[nonopt_dims] = np.array(min_chunk_sizes)[nonopt_dims]
338
-
339
- # iteratively walk towards an optimal chunk size
340
- # iteration is needed because rounding to nearest integer index can make a big different (2x) in chunk size
341
- # for small dimensions like pol
342
- for ii in range(10):
343
- # if the resulting size is too big, reduce the sizes of the optimized dimensions
344
- if (np.prod(chunks) > target_size) and (len(opt_dims) > 0):
345
- chunks[opt_dims] = np.round(
346
- chunks[opt_dims]
347
- * (target_size / np.prod(chunks)) ** (1 / len(opt_dims))
348
- )
349
- # else if the resulting size is too small, increase the sizes of the non-optimized dimensions
350
- elif (np.prod(chunks) < target_size) and (len(nonopt_dims) > 0):
351
- chunks[nonopt_dims] = np.round(
352
- chunks[nonopt_dims]
353
- * (target_size / np.prod(chunks)) ** (1 / len(nonopt_dims))
354
- )
355
- chunks = np.min((chunks, max_chunk_sizes), axis=0)
356
- chunks = np.max((chunks, min_chunk_sizes), axis=0)
357
-
358
- return tuple(chunks)
359
-
360
-
361
- def calc_optimal_ms_chunk_shape(
362
- memory_available_in_bytes, shape, element_size_in_bytes, column_name
363
- ) -> int:
364
- """
365
- Calculates the max number of rows (1st dim in shape) of a variable
366
- that can be fit in the memory for a thread.
367
-
368
- Parameters
369
- ----------
370
- memory_available_in_bytes :
371
-
372
- shape :
373
-
374
- element_size_in_bytes :
375
-
376
- column_name :
377
-
378
-
379
- Returns
380
- -------
381
- int
382
- """
383
- factor = 0.8 # Account for memory used by other objects in thread.
384
- # total_mem = np.prod(shape)*element_size_in_bytes
385
- single_row_mem = np.prod(shape[1:]) * element_size_in_bytes
386
-
387
- if not single_row_mem < factor * memory_available_in_bytes:
388
- msg = (
389
- "Not engough memory in a thread to contain a row of "
390
- f"{column_name}. Need at least {single_row_mem / factor}"
391
- " bytes."
392
- )
393
- raise RuntimeError(msg)
394
-
395
- rows_chunk_size = int((factor * memory_available_in_bytes) / single_row_mem)
396
-
397
- if rows_chunk_size > shape[0]:
398
- rows_chunk_size = shape[0]
399
-
400
- logger.debug(
401
- "Numbers of rows in chunk for " + column_name + ": " + str(rows_chunk_size)
402
- )
403
-
404
- return rows_chunk_size
@@ -1,263 +0,0 @@
1
- import toolviper.utils.logger as logger
2
- import os
3
- from pathlib import Path
4
- from typing import Dict, List, Tuple, Union
5
-
6
- import numpy as np
7
- import xarray as xr
8
-
9
-
10
- def read_part_keys(inpath: str) -> List[Tuple]:
11
- """
12
- Reads the partition keys from a Zarr-stored cds.
13
-
14
- Parameters
15
- ----------
16
- inpath : str
17
- path to read from
18
-
19
- Returns
20
- -------
21
- List[Tuple]
22
- partition keys from a cds
23
-
24
- """
25
-
26
- xds_keys = xr.open_zarr(
27
- os.path.join(inpath, "partition_keys"),
28
- )
29
-
30
- spw_ids = xds_keys.coords["spw_ids"]
31
- pol_setup_ids = xds_keys.coords["pol_setup_ids"]
32
- intents = xds_keys.coords["intents"]
33
-
34
- return list(zip(spw_ids.values, pol_setup_ids.values, intents.values))
35
-
36
-
37
- def read_subtables(inpath: str, asdm_subtables: bool) -> Dict[str, xr.Dataset]:
38
- """
39
- Reads the metainfo subtables from a Zarr-stored cds.
40
-
41
- Parameters
42
- ----------
43
- inpath : str
44
- path to read from
45
-
46
- asdm_subtables : bool
47
-
48
-
49
- Returns
50
- -------
51
- Dict[str, xr.Dataset]
52
- metainfo subtables from a cds
53
-
54
- """
55
-
56
- metainfo = {}
57
- metadir = Path(inpath, "metainfo")
58
- for subt in sorted(metadir.iterdir()):
59
- if subt.is_dir():
60
- if not asdm_subtables and subt.name.startswith("ASDM_"):
61
- logger.debug(f"Not loading ASDM_ subtable {subt.name}...")
62
- continue
63
-
64
- metainfo[subt.name] = read_xds(subt, consolidated=True)
65
-
66
- return metainfo
67
-
68
-
69
- def read_partitions(inpath: str, part_keys: List[Tuple]) -> Dict[str, xr.Dataset]:
70
- """
71
- Reads all the data partitions a Zarr-stored cds.
72
-
73
- Parameters
74
- ----------
75
- inpath : str
76
- path to read from
77
- part_keys : List[Tuple]
78
-
79
-
80
- Returns
81
- -------
82
- Dict[str, xr.Dataset]
83
- partitions from a cds
84
-
85
- """
86
-
87
- partitions = {}
88
- partdir = Path(inpath, "partitions")
89
- xds_cnt = 0
90
- for part in sorted(partdir.iterdir()):
91
- if part.is_dir() and part.name.startswith("xds_"):
92
- xds = read_xds(part, consolidated=True)
93
- partitions[part_keys[xds_cnt]] = xds
94
- xds_cnt += 1
95
-
96
- return partitions
97
-
98
-
99
- def read_xds(
100
- inpath: str,
101
- chunks: Union[Dict, None] = None,
102
- consolidated: bool = True,
103
- overwrite_encoded_chunks: bool = True,
104
- ) -> xr.Dataset:
105
- """
106
- Read single xds from zarr storage.
107
-
108
- Parameters
109
- ----------
110
- inpath : str
111
- path to read from
112
- chunks : Union[Dict, None] (Default value = None)
113
- set chunk size per dimension. Dict is in the form of
114
- 'dim':chunk_size, for example {'time':100, 'baseline':400, 'chan':32, 'pol':1}.
115
- Default None uses the original chunking in the zarr input.
116
- consolidated : boold (Default value = True)
117
- use zarr consolidated metadata.
118
- overwrite_encoded_chunks : bool (Default value = True)
119
- drop the zarr chunks encoded for each variable
120
- when a dataset is loaded with specified chunk sizes.
121
-
122
- Returns
123
- -------
124
- xr.Dataset
125
- """
126
-
127
- xds = xr.open_zarr(
128
- os.path.join(inpath),
129
- chunks=chunks,
130
- consolidated=consolidated,
131
- overwrite_encoded_chunks=overwrite_encoded_chunks,
132
- )
133
-
134
- return xds
135
-
136
-
137
- def read_zarr(
138
- infile: str,
139
- sel_xds: Union[List, str] = None,
140
- chunks: Dict = None,
141
- consolidated: bool = True,
142
- overwrite_encoded_chunks: bool = True,
143
- **kwargs,
144
- ):
145
- """
146
- Note: old, initial cngi-io format. To be removed, most likely.
147
- Read zarr format Visibility data from disk to an ngCASA visibilities dataset
148
- object consisting of dictionaries of xarray Datasets.
149
-
150
- Parameters
151
- ----------
152
- infile : str
153
- input Visibility filename
154
- sel_xds : string or list
155
- Select the ddi to open, for example ['xds0','xds1'] will open the first two ddi. Default None returns everything
156
- chunks : dict
157
- sets specified chunk size per dimension. Dict is in the form of
158
- 'dim':chunk_size, for example {'time':100, 'baseline':400, 'chan':32, 'pol':1}.
159
- Default None uses the original zarr chunking.
160
- consolidated : bool
161
- use zarr consolidated metadata capability. Only works for stores that have
162
- already been consolidated. Default True works with datasets produced by
163
- convert_ms which automatically consolidates metadata.
164
- overwrite_encoded_chunks : bool
165
- drop the zarr chunks encoded for each variable when a dataset is loaded with
166
- specified chunk sizes. Default True, only applies when chunks is not None.
167
- **kwargs :
168
-
169
-
170
- Returns
171
- -------
172
-
173
- """
174
-
175
- if chunks is None:
176
- chunks = "auto"
177
- # overwrite_encoded_chunks = False
178
- # print('overwrite_encoded_chunks',overwrite_encoded_chunks)
179
-
180
- infile = os.path.expanduser(infile)
181
- if sel_xds is None:
182
- sel_xds = os.listdir(infile)
183
- sel_xds = list(np.atleast_1d(sel_xds))
184
-
185
- # print(os.path.join(infile, 'DDI_INDEX'))
186
- mxds = xr.open_zarr(
187
- os.path.join(infile, "DDI_INDEX"),
188
- chunks=chunks,
189
- consolidated=consolidated,
190
- overwrite_encoded_chunks=overwrite_encoded_chunks,
191
- )
192
-
193
- for part in os.listdir(os.path.join(infile, "global")):
194
- xds_temp = xr.open_zarr(
195
- os.path.join(infile, "global/" + part),
196
- chunks=chunks,
197
- consolidated=consolidated,
198
- overwrite_encoded_chunks=overwrite_encoded_chunks,
199
- )
200
- xds_temp = _fix_dict_for_ms(part, xds_temp)
201
- mxds.attrs[part] = xds_temp.compute()
202
-
203
- for part in os.listdir(infile):
204
- if ("xds" in part) and (part in sel_xds):
205
- xds_temp = xr.open_zarr(
206
- os.path.join(infile, part),
207
- chunks=chunks,
208
- consolidated=consolidated,
209
- overwrite_encoded_chunks=overwrite_encoded_chunks,
210
- )
211
- xds_temp = _fix_dict_for_ms(part, xds_temp)
212
- mxds.attrs[part] = xds_temp
213
-
214
- return mxds
215
-
216
-
217
- def _fix_dict_for_ms(name, xds):
218
- # Used to be:
219
- # xds.attrs["column_descriptions"] = xds.attrs["column_descriptions"][0]
220
- # xds.attrs["info"] = xds.attrs["info"][0]
221
-
222
- if "xds" in name:
223
- xds.column_descriptions["UVW"]["shape"] = np.array(
224
- xds.column_descriptions["UVW"]["shape"].split(",")
225
- ).astype(int)
226
-
227
- if "spectral_window" == name:
228
- xds.column_descriptions["CHAN_FREQ"]["keywords"]["MEASINFO"]["TabRefCodes"] = (
229
- np.array(
230
- xds.column_descriptions["CHAN_FREQ"]["keywords"]["MEASINFO"][
231
- "TabRefCodes"
232
- ].split(",")
233
- ).astype(int)
234
- )
235
- xds.column_descriptions["REF_FREQUENCY"]["keywords"]["MEASINFO"][
236
- "TabRefCodes"
237
- ] = np.array(
238
- xds.column_descriptions["REF_FREQUENCY"]["keywords"]["MEASINFO"][
239
- "TabRefCodes"
240
- ].split(",")
241
- ).astype(
242
- int
243
- )
244
-
245
- if "antenna" == name:
246
- xds.column_descriptions["OFFSET"]["shape"] = np.array(
247
- xds.column_descriptions["OFFSET"]["shape"].split(",")
248
- ).astype(int)
249
- xds.column_descriptions["POSITION"]["shape"] = np.array(
250
- xds.column_descriptions["POSITION"]["shape"].split(",")
251
- ).astype(int)
252
-
253
- if "feed" == name:
254
- xds.column_descriptions["POSITION"]["shape"] = np.array(
255
- xds.column_descriptions["POSITION"]["shape"].split(",")
256
- ).astype(int)
257
-
258
- if "observation" == name:
259
- xds.column_descriptions["TIME_RANGE"]["shape"] = np.array(
260
- xds.column_descriptions["TIME_RANGE"]["shape"].split(",")
261
- ).astype(int)
262
-
263
- return xds