xradio 0.0.28__py3-none-any.whl → 0.0.30__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. xradio/__init__.py +5 -4
  2. xradio/_utils/array.py +90 -0
  3. xradio/_utils/zarr/common.py +48 -3
  4. xradio/image/_util/zarr.py +4 -1
  5. xradio/schema/__init__.py +24 -6
  6. xradio/schema/bases.py +440 -2
  7. xradio/schema/check.py +96 -55
  8. xradio/schema/dataclass.py +123 -27
  9. xradio/schema/metamodel.py +21 -4
  10. xradio/schema/typing.py +33 -18
  11. xradio/vis/__init__.py +5 -2
  12. xradio/vis/_processing_set.py +71 -32
  13. xradio/vis/_vis_utils/_ms/_tables/create_field_and_source_xds.py +710 -0
  14. xradio/vis/_vis_utils/_ms/_tables/load.py +23 -10
  15. xradio/vis/_vis_utils/_ms/_tables/load_main_table.py +145 -64
  16. xradio/vis/_vis_utils/_ms/_tables/read.py +747 -172
  17. xradio/vis/_vis_utils/_ms/_tables/read_main_table.py +173 -44
  18. xradio/vis/_vis_utils/_ms/_tables/read_subtables.py +79 -28
  19. xradio/vis/_vis_utils/_ms/_tables/write.py +102 -45
  20. xradio/vis/_vis_utils/_ms/_tables/write_exp_api.py +127 -65
  21. xradio/vis/_vis_utils/_ms/chunks.py +58 -21
  22. xradio/vis/_vis_utils/_ms/conversion.py +582 -102
  23. xradio/vis/_vis_utils/_ms/descr.py +52 -20
  24. xradio/vis/_vis_utils/_ms/msv2_to_msv4_meta.py +72 -35
  25. xradio/vis/_vis_utils/_ms/msv4_infos.py +0 -59
  26. xradio/vis/_vis_utils/_ms/msv4_sub_xdss.py +76 -9
  27. xradio/vis/_vis_utils/_ms/optimised_functions.py +0 -46
  28. xradio/vis/_vis_utils/_ms/partition_queries.py +308 -119
  29. xradio/vis/_vis_utils/_ms/partitions.py +82 -25
  30. xradio/vis/_vis_utils/_ms/subtables.py +32 -14
  31. xradio/vis/_vis_utils/_utils/partition_attrs.py +30 -11
  32. xradio/vis/_vis_utils/_utils/xds_helper.py +136 -45
  33. xradio/vis/_vis_utils/_zarr/read.py +60 -22
  34. xradio/vis/_vis_utils/_zarr/write.py +83 -9
  35. xradio/vis/_vis_utils/ms.py +48 -29
  36. xradio/vis/_vis_utils/zarr.py +44 -20
  37. xradio/vis/convert_msv2_to_processing_set.py +43 -32
  38. xradio/vis/load_processing_set.py +38 -61
  39. xradio/vis/read_processing_set.py +64 -96
  40. xradio/vis/schema.py +687 -0
  41. xradio/vis/vis_io.py +75 -43
  42. {xradio-0.0.28.dist-info → xradio-0.0.30.dist-info}/LICENSE.txt +6 -1
  43. {xradio-0.0.28.dist-info → xradio-0.0.30.dist-info}/METADATA +10 -5
  44. xradio-0.0.30.dist-info/RECORD +73 -0
  45. {xradio-0.0.28.dist-info → xradio-0.0.30.dist-info}/WHEEL +1 -1
  46. xradio/vis/model.py +0 -497
  47. xradio-0.0.28.dist-info/RECORD +0 -71
  48. {xradio-0.0.28.dist-info → xradio-0.0.30.dist-info}/top_level.txt +0 -0
@@ -13,11 +13,11 @@ from ._tables.read_subtables import read_ephemerides, read_delayed_pointing_tabl
13
13
 
14
14
  subt_rename_ids = {
15
15
  "ANTENNA": {"row": "antenna_id", "dim_1": "xyz"},
16
- "FEED": {"dim_1": "xyz", "dim_2": "receptor", "dim_3": "receptor"},
16
+ "FEED": {"dim_1": "xyz", "dim_2": "receptor", "dim_3": "receptor2"},
17
17
  "FIELD": {"row": "field_id", "dim_1": "poly_id", "dim_2": "ra/dec"},
18
18
  "FREQ_OFFSET": {"antenna1": "antenna1_id", "antenna2": "antenna2_id"},
19
19
  "OBSERVATION": {"row": "observation_id", "dim_1": "start/end"},
20
- "POINTING": {"dim_1": "n_polynomial", "dim_2": "ra/dec", "dim_3": "ra/dec"},
20
+ "POINTING": {"dim_1": "n_polynomial", "dim_3": "dir"},
21
21
  "POLARIZATION": {"row": "pol_setup_id", "dim_2": "product_id"},
22
22
  "PROCESSOR": {"row": "processor_id"},
23
23
  "SPECTRAL_WINDOW": {"row": "spectral_window_id", "dim_1": "chan"},
@@ -35,10 +35,20 @@ def read_ms_subtables(
35
35
  """
36
36
  Read MSv2 subtables (main table keywords) as xr.Dataset
37
37
 
38
- :param infile: input MeasurementSet path
39
- :param done_subt: Subtables that were already read, to skip them
40
- :param asdm_subtables: Whether to also read ASDM_* subtables
41
- :return: dict of xarray datasets read from subtables (metadata tables)
38
+ Parameters
39
+ ----------
40
+ infile : str
41
+ input MeasurementSet path
42
+ done_subt : List[str]
43
+ Subtables that were already read, to skip them
44
+ asdm_subtables : bool (Default value = False)
45
+ Whether to also read ASDM_* subtables
46
+
47
+ Returns
48
+ -------
49
+ Dict[str, xr.Dataset]
50
+ dict of xarray datasets read from subtables (metadata tables)
51
+
42
52
  """
43
53
  ignore_msv2_cols_subt = ["FLAG_CMD", "FLAG_ROW", "BEAM_ID"]
44
54
  skip_tables = ["SORTED_TABLE", "FLAG_CMD"] + done_subt
@@ -62,9 +72,8 @@ def read_ms_subtables(
62
72
 
63
73
  if subt_name == "POINTING":
64
74
  subt_path = Path(infile, subt_name)
65
- xds = read_delayed_pointing_table(
66
- str(subt_path), rename_ids=subt_rename_ids.get(subt_name, None)
67
- )
75
+ rename_ids = {"dim_2": "n_polynomial", "dim_3": "dir"}
76
+ xds = read_delayed_pointing_table(str(subt_path), rename_ids=rename_ids)
68
77
  else:
69
78
  xds = read_generic_table(
70
79
  infile,
@@ -88,14 +97,23 @@ def read_ms_subtables(
88
97
  def add_pointing_to_partition(
89
98
  xds_part: xr.Dataset, xds_pointing: xr.Dataset
90
99
  ) -> xr.Dataset:
91
- """Take pointing variables from a (delayed) pointing dataset and
100
+ """
101
+ Take pointing variables from a (delayed) pointing dataset and
92
102
  transfer them to a main table partition dataset (interpolating into
93
103
  the destination time axis)
94
104
 
95
- :param xds_part: a partition/sub-xds of the main table
96
- :param xds_pointing: the xds read from the pointing subtable
97
- :return: partition xds with pointing variables added/interpolated from the
98
- pointing_xds into its time axis
105
+ Parameters
106
+ ----------
107
+ xds_part : xr.Dataset
108
+ a partition/sub-xds of the main table
109
+ xds_pointing : xr.Dataset
110
+ the xds read from the pointing subtable
111
+
112
+ Returns
113
+ -------
114
+ xr.Dataset
115
+ partition xds with pointing variables added/interpolated from the
116
+ pointing_xds into its time axis
99
117
 
100
118
  """
101
119
  interp_xds = xds_pointing.interp(time=xds_part.time, method="nearest")
@@ -29,11 +29,19 @@ VisGroup = TypedDict(
29
29
 
30
30
 
31
31
  def make_vis_group_attr(xds: xr.Dataset) -> Dict:
32
- """Add an attribute with the initial data/vis groups that have been
32
+ """
33
+ Add an attribute with the initial data/vis groups that have been
33
34
  read from the MS (DATA / CORRECTED_DATA / MODEL_DATA)
34
35
 
35
- :param xds: dataset to make the vis_group depending on its data_vars
36
- :return: vis_group derived form this dataset
36
+ Parameters
37
+ ----------
38
+ xds : xr.Dataset
39
+ dataset to make the vis_group depending on its data_vars
40
+
41
+ Returns
42
+ -------
43
+ Dict
44
+ vis_group derived form this dataset
37
45
  """
38
46
  msv2_extended_vis_vars = ["vis", "vis_corrected", "vis_model"]
39
47
  msv2_col_names = ["DATA", "CORRECTED_DATA", "MODEL_DATA"]
@@ -87,7 +95,8 @@ def add_partition_attrs(
87
95
  part_ids: PartitionIds,
88
96
  other_attrs: Dict,
89
97
  ) -> xr.Dataset:
90
- """add attributes to the xr.Dataset:
98
+ """
99
+ add attributes to the xr.Dataset:
91
100
  - sub-dict of partition-id related ones
92
101
  - sub-dict of data/vis groups
93
102
  - sub-dict of attributes coming from the lower level read
@@ -96,13 +105,23 @@ def add_partition_attrs(
96
105
  Produces the partition IDs that can be retrieved from the DD subtable and also
97
106
  adds the ones passed in part_ids
98
107
 
99
- :param xds: dataset partition
100
- :param ddi: DDI of this partition
101
- :param ddi_xds: dataset for the DATA_DESCRIPTION subtable
102
- :param part_ids: partition id attrs
103
- :param other_attrs: additional attributes produced by the read functions
104
- :return: dataset with attributes added
105
-
108
+ Parameters
109
+ ----------
110
+ xds : xr.Dataset
111
+ dataset partition
112
+ ddi : int
113
+ DDI of this partition
114
+ ddi_xds : xr.Dataset
115
+ dataset for the DATA_DESCRIPTION subtable
116
+ part_ids : PartitionIds
117
+ partition id attrs
118
+ other_attrs : Dict
119
+ additional attributes produced by the read functions
120
+
121
+ Returns
122
+ -------
123
+ xr.Dataset
124
+ dataset with attributes added
106
125
  """
107
126
 
108
127
  xds = xds.assign_attrs(
@@ -12,7 +12,8 @@ from .stokes_types import stokes_types
12
12
  def make_coords(
13
13
  xds: xr.Dataset, ddi: int, subtables: Tuple[xr.Dataset, ...]
14
14
  ) -> Dict[str, np.ndarray]:
15
- """Make the coords to be added to a partition or chunk (besides
15
+ """
16
+ Make the coords to be added to a partition or chunk (besides
16
17
  the time, baseline) basic structure
17
18
 
18
19
  Grabs:
@@ -20,7 +21,18 @@ def make_coords(
20
21
  - pol idxs from the pol+ddi subtables -> pol names via the stokes_types
21
22
  - antenna IDs from antenna subtable
22
23
 
23
- :param: sub-xds as (ant_xds, ddi_xds, spw_xds, pol_xds)
24
+ Parameters
25
+ ----------
26
+ xds : xr.Dataset
27
+
28
+ ddi : int
29
+
30
+ subtables: Tuple[xr.Dataset, ...]
31
+
32
+
33
+ Returns
34
+ -------
35
+ Dict[str, np.ndarray]
24
36
  """
25
37
  ant_xds, ddi_xds, spw_xds, pol_xds = subtables
26
38
  freq = spw_xds.chan_freq.values[
@@ -46,15 +58,25 @@ def vis_xds_packager_cds(
46
58
  subtables: List[Tuple[str, xr.Dataset]],
47
59
  partitions: Dict[Any, xr.Dataset],
48
60
  descr_add: str = "",
49
- ):
50
- """Takes a a list of subtable xds datasets and a dictionary of data
61
+ ) -> CASAVisSet:
62
+ """
63
+ Takes a a list of subtable xds datasets and a dictionary of data
51
64
  partition xds datasets and and packages them as a CASA vis dataset
52
65
  (cds)
53
66
 
54
- :param partitions: data partiions as xds datasets
55
- :param subtables: subtables as xds datasets
56
- :param descr_add: substring to add to the short descr string of the cds
57
- :return: A "cds" - container for the metainfo subtables and data partitions
67
+ Parameters
68
+ ----------
69
+ partitions : List[Tuple[str, xr.Dataset]]
70
+ data partiions as xds datasets
71
+ subtables : Dict[Any, xr.Dataset]
72
+ subtables as xds datasets
73
+ descr_add : str (Default value = "")
74
+ substring to add to the short descr string of the cds
75
+
76
+ Returns
77
+ -------
78
+ CASAVisSet
79
+ A "cds" - container for the metainfo subtables and data partitions
58
80
  """
59
81
  vers = version("xradio")
60
82
 
@@ -70,14 +92,24 @@ def vis_xds_packager_mxds(
70
92
  subtables: List[Tuple[str, xr.Dataset]],
71
93
  add_global_coords: bool = True,
72
94
  ) -> xr.Dataset:
73
- """Takes a dictionary of data partition xds datasets and a list of
95
+ """
96
+ Takes a dictionary of data partition xds datasets and a list of
74
97
  subtable xds datasets and packages them as a dataset of datasets
75
98
  (mxds)
76
99
 
77
- :param partitions: data partiions as xds datasets
78
- :param subtables: subtables as xds datasets
79
- :add_global_coords: whether to add coords to the output mxds
80
- :return: A "mxds" - xr.dataset of datasets
100
+ Parameters
101
+ ----------
102
+ partitions : Dict[Any, xr.Dataset]
103
+ data partiions as xds datasets
104
+ subtables : List[Tuple[str, xr.Dataset]]
105
+ subtables as xds datasets
106
+ :add_global_coords: whether to add coords to the output mxds
107
+ add_global_coords: bool (Default value = True)
108
+
109
+ Returns
110
+ -------
111
+ xr.Dataset
112
+ A "mxds" - xr.dataset of datasets
81
113
  """
82
114
  mxds = xr.Dataset(attrs={"metainfo": subtables, "partitions": partitions})
83
115
 
@@ -87,7 +119,7 @@ def vis_xds_packager_mxds(
87
119
  return mxds
88
120
 
89
121
 
90
- def make_global_coords(mxds: xr.Dataset):
122
+ def make_global_coords(mxds: xr.Dataset) -> Dict[str, xr.DataArray]:
91
123
  coords = {}
92
124
  metainfo = mxds.attrs["metainfo"]
93
125
  if "antenna" in metainfo:
@@ -125,12 +157,24 @@ def make_global_coords(mxds: xr.Dataset):
125
157
  def expand_xds(xds: xr.Dataset) -> xr.Dataset:
126
158
  """
127
159
  expand single (row) dimension of xds to (time, baseline)
160
+
161
+ Parameters
162
+ ----------
163
+ xds : xr.Dataset
164
+ "flat" dataset (with row dimension - without (time, baseline) dimensions)
165
+
166
+ Returns
167
+ -------
168
+ xr.Dataset
169
+ expanded dataset, with (time, baseline) dimensions
128
170
  """
129
171
  assert "baseline" not in xds.coords
130
172
 
131
173
  txds = xds.copy()
132
174
  unique_baselines, baselines = np.unique(
133
- [txds.antenna1.values, txds.antenna2.values], axis=1, return_inverse=True
175
+ [txds.baseline_ant1_id.values, txds.baseline_ant2_id.values],
176
+ axis=1,
177
+ return_inverse=True,
134
178
  )
135
179
  txds["baseline"] = xr.DataArray(baselines.astype("int32"), dims=["row"])
136
180
 
@@ -148,7 +192,7 @@ def expand_xds(xds: xr.Dataset) -> xr.Dataset:
148
192
  if txds[dv].dtype != xds[dv].dtype:
149
193
  txds[dv] = txds[dv].astype(xds[dv].dtype)
150
194
  except Exception as exc:
151
- print(
195
+ logger.warning(
152
196
  f"WARNING: Cannot expand rows to (time, baseline), "
153
197
  f"possibly duplicate values in (time, baseline). Exception: {exc}"
154
198
  )
@@ -160,20 +204,47 @@ def expand_xds(xds: xr.Dataset) -> xr.Dataset:
160
204
  def flatten_xds(xds: xr.Dataset) -> xr.Dataset:
161
205
  """
162
206
  flatten (time, baseline) dimensions of xds back to single dimension (row)
207
+
208
+ Parameters
209
+ ----------
210
+ xds : xr.Dataset
211
+
212
+
213
+ Returns
214
+ -------
215
+ xr.Dataset
163
216
  """
164
- nan_int = np.array([np.nan]).astype("int32")[0]
217
+ # known invalid cast warning when casting to integer
218
+ with np.errstate(invalid="ignore"):
219
+ nan_int = np.array([np.nan]).astype("int32")[0]
165
220
  txds = xds.copy()
166
221
 
167
222
  # flatten the time x baseline dimensions of main table
168
223
  if ("time" in xds.sizes) and ("baseline" in xds.sizes):
169
224
  txds = xds.stack({"row": ("time", "baseline")}).transpose("row", ...)
225
+ # compute for issue https://github.com/hainegroup/oceanspy/issues/332
226
+ # drop=True silently does compute (or at least used to)
170
227
  txds = txds.where(
171
- (txds.state_id != nan_int) & (txds.field_id != nan_int), drop=True
228
+ ((txds.state_id != nan_int) & (txds.field_id != nan_int)).compute(),
229
+ drop=True,
172
230
  ) # .unify_chunks()
173
- for dv in list(xds.data_vars):
174
- txds[dv] = txds[dv].astype(xds[dv].dtype)
175
231
 
176
- return txds
232
+ # re-assigning (implicitly dropping index coords) one by one produces
233
+ # DeprecationWarnings: https://github.com/pydata/xarray/issues/6505
234
+ astyped_data_vars = dict(xds.data_vars)
235
+ for dv in list(txds.data_vars):
236
+ if txds[dv].dtype != xds[dv].dtype:
237
+ astyped_data_vars[dv] = txds[dv].astype(xds[dv].dtype)
238
+ else:
239
+ astyped_data_vars[dv] = txds[dv]
240
+
241
+ flat_xds = xr.Dataset(astyped_data_vars, coords=txds.coords, attrs=txds.attrs)
242
+ flat_xds = flat_xds.reset_index(["time", "baseline"])
243
+
244
+ else:
245
+ flat_xds = txds
246
+
247
+ return flat_xds
177
248
 
178
249
 
179
250
  ####################################
@@ -188,21 +259,30 @@ def optimal_chunking(
188
259
  Determine the optimal chunk shape for reading an MS or Image based
189
260
  on machine resources and intended operations
190
261
 
191
- :param ndim: number of dimensions to chunk. An MS is 3, an
192
- expanded MS is 4. An image could be anywhere from 2 to 5. Not
193
- needed if data_shape is given.
194
- :param didxs: dimension indices over which subsequent operations
195
- will be performed. Values should be less than ndim. Tries to
196
- reduce inter-process communication of data contents. Needs to
197
- know the shape to do this well. Default None balances chunk size
198
- across all dimensions.
199
- :param chunk_size: target chunk size ('large', 'small', 'auto').
200
- Default 'auto' tries to guess by looking at CPU core count and
201
- available memory.
202
- :param data_shape: shape of the total MS DDI or Image data. Helps
203
- to know. Default None does not optimize based on shape
204
-
205
- :return: optimal chunking for reading the ms (row, chan, pol)
262
+ Parameters
263
+ ----------
264
+ ndim : Union[int, None] = None
265
+ number of dimensions to chunk. An MS is 3, an
266
+ expanded MS is 4. An image could be anywhere from 2 to 5. Not
267
+ needed if data_shape is given.
268
+ didxs : Union[Tuple[int], List[int], None] = None
269
+ dimension indices over which subsequent operations
270
+ will be performed. Values should be less than ndim. Tries to
271
+ reduce inter-process communication of data contents. Needs to
272
+ know the shape to do this well. Default None balances chunk size
273
+ across all dimensions.
274
+ chunk_size : str (Default value = "auto")
275
+ target chunk size ('large', 'small', 'auto').
276
+ Default 'auto' tries to guess by looking at CPU core count and
277
+ available memory.
278
+ data_shape : Union[tuple, None] = None
279
+ shape of the total MS DDI or Image data. Helps
280
+ to know. Default None does not optimize based on shape
281
+
282
+ Returns
283
+ -------
284
+ tuple
285
+ optimal chunking for reading the ms (row, chan, pol)
206
286
  """
207
287
  assert (ndim is not None) or (
208
288
  data_shape is not None
@@ -278,22 +358,33 @@ def calc_optimal_ms_chunk_shape(
278
358
  """
279
359
  Calculates the max number of rows (1st dim in shape) of a variable
280
360
  that can be fit in the memory for a thread.
361
+
362
+ Parameters
363
+ ----------
364
+ memory_available_in_bytes :
365
+
366
+ shape :
367
+
368
+ element_size_in_bytes :
369
+
370
+ column_name :
371
+
372
+
373
+ Returns
374
+ -------
375
+ int
281
376
  """
282
377
  factor = 0.8 # Account for memory used by other objects in thread.
283
378
  # total_mem = np.prod(shape)*element_size_in_bytes
284
379
  single_row_mem = np.prod(shape[1:]) * element_size_in_bytes
285
380
 
286
- try:
287
- assert single_row_mem < factor * memory_available_in_bytes
288
- except AssertionError as err:
289
- logger.exception(
381
+ if not single_row_mem < factor * memory_available_in_bytes:
382
+ msg = (
290
383
  "Not engough memory in a thread to contain a row of "
291
- + column_name
292
- + ". Need at least "
293
- + str(single_row_mem / factor)
294
- + " bytes."
384
+ f"{column_name}. Need at least {single_row_mem / factor}"
385
+ " bytes."
295
386
  )
296
- raise err
387
+ raise RuntimeError(msg)
297
388
 
298
389
  rows_chunk_size = int((factor * memory_available_in_bytes) / single_row_mem)
299
390
 
@@ -11,9 +11,16 @@ def read_part_keys(inpath: str) -> List[Tuple]:
11
11
  """
12
12
  Reads the partition keys from a Zarr-stored cds.
13
13
 
14
- :param inpath: path to read from
14
+ Parameters
15
+ ----------
16
+ inpath : str
17
+ path to read from
18
+
19
+ Returns
20
+ -------
21
+ List[Tuple]
22
+ partition keys from a cds
15
23
 
16
- :return: partition keys from a cds
17
24
  """
18
25
 
19
26
  xds_keys = xr.open_zarr(
@@ -31,9 +38,19 @@ def read_subtables(inpath: str, asdm_subtables: bool) -> Dict[str, xr.Dataset]:
31
38
  """
32
39
  Reads the metainfo subtables from a Zarr-stored cds.
33
40
 
34
- :param inpath: path to read from
41
+ Parameters
42
+ ----------
43
+ inpath : str
44
+ path to read from
45
+
46
+ asdm_subtables : bool
47
+
48
+
49
+ Returns
50
+ -------
51
+ Dict[str, xr.Dataset]
52
+ metainfo subtables from a cds
35
53
 
36
- :return: metainfo subtables from a cds
37
54
  """
38
55
 
39
56
  metainfo = {}
@@ -53,9 +70,18 @@ def read_partitions(inpath: str, part_keys: List[Tuple]) -> Dict[str, xr.Dataset
53
70
  """
54
71
  Reads all the data partitions a Zarr-stored cds.
55
72
 
56
- :param inpath: path to read from
73
+ Parameters
74
+ ----------
75
+ inpath : str
76
+ path to read from
77
+ part_keys : List[Tuple]
78
+
79
+
80
+ Returns
81
+ -------
82
+ Dict[str, xr.Dataset]
83
+ partitions from a cds
57
84
 
58
- :return: partitions from a cds
59
85
  """
60
86
 
61
87
  partitions = {}
@@ -79,13 +105,23 @@ def read_xds(
79
105
  """
80
106
  Read single xds from zarr storage.
81
107
 
82
- :param inpath: path to read from
83
- :param chunks: set chunk size per dimension. Dict is in the form of
84
- 'dim':chunk_size, for example {'time':100, 'baseline':400, 'chan':32, 'pol':1}.
85
- Default None uses the original chunking in the zarr input.
86
- :param consolidated: use zarr consolidated metadata.
87
- :param overwrite_encoded_chunks: drop the zarr chunks encoded for each variable
88
- when a dataset is loaded with specified chunk sizes.
108
+ Parameters
109
+ ----------
110
+ inpath : str
111
+ path to read from
112
+ chunks : Union[Dict, None] (Default value = None)
113
+ set chunk size per dimension. Dict is in the form of
114
+ 'dim':chunk_size, for example {'time':100, 'baseline':400, 'chan':32, 'pol':1}.
115
+ Default None uses the original chunking in the zarr input.
116
+ consolidated : boold (Default value = True)
117
+ use zarr consolidated metadata.
118
+ overwrite_encoded_chunks : bool (Default value = True)
119
+ drop the zarr chunks encoded for each variable
120
+ when a dataset is loaded with specified chunk sizes.
121
+
122
+ Returns
123
+ -------
124
+ xr.Dataset
89
125
  """
90
126
 
91
127
  xds = xr.open_zarr(
@@ -99,11 +135,11 @@ def read_xds(
99
135
 
100
136
 
101
137
  def read_zarr(
102
- infile,
103
- sel_xds=None,
104
- chunks=None,
105
- consolidated=True,
106
- overwrite_encoded_chunks=True,
138
+ infile: str,
139
+ sel_xds: Union[List, str] = None,
140
+ chunks: Dict = None,
141
+ consolidated: bool = True,
142
+ overwrite_encoded_chunks: bool = True,
107
143
  **kwargs,
108
144
  ):
109
145
  """
@@ -128,11 +164,12 @@ def read_zarr(
128
164
  overwrite_encoded_chunks : bool
129
165
  drop the zarr chunks encoded for each variable when a dataset is loaded with
130
166
  specified chunk sizes. Default True, only applies when chunks is not None.
167
+ **kwargs :
168
+
131
169
 
132
170
  Returns
133
171
  -------
134
- xarray.core.dataset.Dataset
135
- New xarray Dataset of Visibility data contents
172
+
136
173
  """
137
174
 
138
175
  if chunks is None:
@@ -178,8 +215,9 @@ def read_zarr(
178
215
 
179
216
 
180
217
  def _fix_dict_for_ms(name, xds):
181
- xds.attrs["column_descriptions"] = xds.attrs["column_descriptions"][0]
182
- xds.attrs["info"] = xds.attrs["info"][0]
218
+ # Used to be:
219
+ # xds.attrs["column_descriptions"] = xds.attrs["column_descriptions"][0]
220
+ # xds.attrs["info"] = xds.attrs["info"][0]
183
221
 
184
222
  if "xds" in name:
185
223
  xds.column_descriptions["UVW"]["shape"] = np.array(
@@ -10,10 +10,21 @@ import zarr
10
10
  def write_part_keys(
11
11
  partitions: Dict[Any, xr.Dataset], outpath: str, compressor: numcodecs.abc.Codec
12
12
  ) -> None:
13
- """Writes an xds with the partition keys.
13
+ """
14
+ Writes an xds with the partition keys.
15
+
16
+ Parameters
17
+ ----------
18
+ partitions : Dict[Any, xr.Dataset]
19
+ partitions from a cds
20
+ outpath : str
21
+ path to write a cds
22
+ compressor : numcodecs.abc.Codec
23
+ compressor used for the partition keys variable
24
+
25
+ Returns
26
+ -------
14
27
 
15
- :param partitions: partitions from a cds
16
- :param outpath: path to write a cds
17
28
  """
18
29
 
19
30
  spw_ids, pol_setup_ids, intents = map(list, zip(*partitions.keys()))
@@ -47,6 +58,23 @@ def write_metainfo(
47
58
  ) -> None:
48
59
  """
49
60
  Write all metainfo subtables from a cds to zarr storage
61
+
62
+ Parameters
63
+ ----------
64
+ outpath : str
65
+
66
+ metainfo : Dict[str, xr.Dataset]:
67
+
68
+ chunks_on_disk : Union[Dict, None] (Default value = None)
69
+
70
+ compressor : Union[numcodecs.abc.Codec, None) (Default value = None)
71
+
72
+ consolidated : bool (Default value = True)
73
+
74
+
75
+ Returns
76
+ -------
77
+
50
78
  """
51
79
  metadir = Path(outpath, "metainfo")
52
80
  os.mkdir(metadir)
@@ -67,6 +95,23 @@ def write_partitions(
67
95
  ) -> None:
68
96
  """
69
97
  Write all data partitions metainfo from a cds to zarr storage
98
+
99
+ Parameters
100
+ ----------
101
+ outpath : str :
102
+
103
+ partitions : Dict[str, xr.Dataset]
104
+
105
+ chunks_on_disk : Union[Dict, None] (Default value = None)
106
+
107
+ compressor : Union[numcodecs.abc.Codec, None] (Default value = True)
108
+
109
+ consolidated: bool (Default value = True)
110
+
111
+
112
+ Returns
113
+ -------
114
+
70
115
  """
71
116
 
72
117
  partdir = Path(outpath, "partitions")
@@ -92,11 +137,28 @@ def write_xds_to_zarr(
92
137
  """
93
138
  Write one xr dataset from a cds (either metainfo or a partition).
94
139
 
95
- :param xds: cds (sub)dataset
96
- :param name: dataset name (example subtable name, or xds{i})
97
- :param graph_name: the time taken to execute the graph and save the
98
- dataset is measured and saved as an attribute in the zarr file.
99
- The graph_name is the label for this timing information.
140
+ Parameters
141
+ ----------
142
+ xds : xr.Dataset
143
+ cds (sub)dataset
144
+ name : str
145
+ dataset name (example subtable name, or xds{i})
146
+ outpath: str :
147
+
148
+ chunks_on_disk : Union[Dict, None] (Default value = None)
149
+
150
+ compressor : Union[numcodecs.abc.Codec, None] (Default value = None)
151
+
152
+ consolidated : bool (Default value = True)
153
+
154
+ graph_name : str
155
+ the time taken to execute the graph and save the
156
+ dataset is measured and saved as an attribute in the zarr file.
157
+ The graph_name is the label for this timing information.
158
+
159
+ Returns
160
+ -------
161
+
100
162
  """
101
163
 
102
164
  xds_for_disk = xds
@@ -159,8 +221,20 @@ def write_xds_to_zarr(
159
221
 
160
222
 
161
223
  def prepare_attrs_for_zarr(name: str, xds: xr.Dataset) -> xr.Dataset:
162
- """Deal with types that cannot be serialized as they are in the
224
+ """
225
+ Deal with types that cannot be serialized as they are in the
163
226
  cds/xds (ndarray etc.)
227
+
228
+ Parameters
229
+ ----------
230
+ name : str
231
+
232
+ xds : xr.Dataset
233
+
234
+
235
+ Returns
236
+ -------
237
+
164
238
  """
165
239
  ctds_attrs = xds.attrs["other"]["msv2"]["ctds_attrs"]
166
240
  col_descrs = ctds_attrs["column_descriptions"]