xradio 0.0.27__py3-none-any.whl → 0.0.29__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- xradio/__init__.py +5 -4
- xradio/_utils/array.py +90 -0
- xradio/_utils/zarr/common.py +48 -3
- xradio/image/_util/_fits/xds_from_fits.py +10 -5
- xradio/image/_util/_zarr/zarr_low_level.py +27 -24
- xradio/image/_util/common.py +4 -1
- xradio/image/_util/zarr.py +4 -1
- xradio/schema/__init__.py +24 -6
- xradio/schema/bases.py +440 -2
- xradio/schema/check.py +96 -55
- xradio/schema/dataclass.py +123 -27
- xradio/schema/metamodel.py +21 -4
- xradio/schema/typing.py +33 -18
- xradio/vis/__init__.py +5 -2
- xradio/vis/_processing_set.py +30 -9
- xradio/vis/_vis_utils/_ms/_tables/create_field_and_source_xds.py +710 -0
- xradio/vis/_vis_utils/_ms/_tables/load.py +23 -10
- xradio/vis/_vis_utils/_ms/_tables/load_main_table.py +145 -64
- xradio/vis/_vis_utils/_ms/_tables/read.py +782 -156
- xradio/vis/_vis_utils/_ms/_tables/read_main_table.py +176 -45
- xradio/vis/_vis_utils/_ms/_tables/read_subtables.py +79 -28
- xradio/vis/_vis_utils/_ms/_tables/write.py +102 -45
- xradio/vis/_vis_utils/_ms/_tables/write_exp_api.py +127 -65
- xradio/vis/_vis_utils/_ms/chunks.py +58 -21
- xradio/vis/_vis_utils/_ms/conversion.py +536 -67
- xradio/vis/_vis_utils/_ms/descr.py +52 -20
- xradio/vis/_vis_utils/_ms/msv2_to_msv4_meta.py +70 -35
- xradio/vis/_vis_utils/_ms/msv4_infos.py +0 -59
- xradio/vis/_vis_utils/_ms/msv4_sub_xdss.py +76 -9
- xradio/vis/_vis_utils/_ms/optimised_functions.py +0 -46
- xradio/vis/_vis_utils/_ms/partition_queries.py +308 -119
- xradio/vis/_vis_utils/_ms/partitions.py +82 -25
- xradio/vis/_vis_utils/_ms/subtables.py +32 -14
- xradio/vis/_vis_utils/_utils/partition_attrs.py +30 -11
- xradio/vis/_vis_utils/_utils/xds_helper.py +136 -45
- xradio/vis/_vis_utils/_zarr/read.py +60 -22
- xradio/vis/_vis_utils/_zarr/write.py +83 -9
- xradio/vis/_vis_utils/ms.py +48 -29
- xradio/vis/_vis_utils/zarr.py +44 -20
- xradio/vis/convert_msv2_to_processing_set.py +106 -32
- xradio/vis/load_processing_set.py +38 -61
- xradio/vis/read_processing_set.py +62 -96
- xradio/vis/schema.py +687 -0
- xradio/vis/vis_io.py +75 -43
- {xradio-0.0.27.dist-info → xradio-0.0.29.dist-info}/LICENSE.txt +6 -1
- {xradio-0.0.27.dist-info → xradio-0.0.29.dist-info}/METADATA +10 -5
- xradio-0.0.29.dist-info/RECORD +73 -0
- {xradio-0.0.27.dist-info → xradio-0.0.29.dist-info}/WHEEL +1 -1
- xradio/vis/model.py +0 -497
- xradio-0.0.27.dist-info/RECORD +0 -71
- {xradio-0.0.27.dist-info → xradio-0.0.29.dist-info}/top_level.txt +0 -0
|
@@ -13,11 +13,11 @@ from ._tables.read_subtables import read_ephemerides, read_delayed_pointing_tabl
|
|
|
13
13
|
|
|
14
14
|
subt_rename_ids = {
|
|
15
15
|
"ANTENNA": {"row": "antenna_id", "dim_1": "xyz"},
|
|
16
|
-
"FEED": {"dim_1": "xyz", "dim_2": "receptor", "dim_3": "
|
|
16
|
+
"FEED": {"dim_1": "xyz", "dim_2": "receptor", "dim_3": "receptor2"},
|
|
17
17
|
"FIELD": {"row": "field_id", "dim_1": "poly_id", "dim_2": "ra/dec"},
|
|
18
18
|
"FREQ_OFFSET": {"antenna1": "antenna1_id", "antenna2": "antenna2_id"},
|
|
19
19
|
"OBSERVATION": {"row": "observation_id", "dim_1": "start/end"},
|
|
20
|
-
"POINTING": {"dim_1": "n_polynomial", "
|
|
20
|
+
"POINTING": {"dim_1": "n_polynomial", "dim_3": "dir"},
|
|
21
21
|
"POLARIZATION": {"row": "pol_setup_id", "dim_2": "product_id"},
|
|
22
22
|
"PROCESSOR": {"row": "processor_id"},
|
|
23
23
|
"SPECTRAL_WINDOW": {"row": "spectral_window_id", "dim_1": "chan"},
|
|
@@ -35,10 +35,20 @@ def read_ms_subtables(
|
|
|
35
35
|
"""
|
|
36
36
|
Read MSv2 subtables (main table keywords) as xr.Dataset
|
|
37
37
|
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
38
|
+
Parameters
|
|
39
|
+
----------
|
|
40
|
+
infile : str
|
|
41
|
+
input MeasurementSet path
|
|
42
|
+
done_subt : List[str]
|
|
43
|
+
Subtables that were already read, to skip them
|
|
44
|
+
asdm_subtables : bool (Default value = False)
|
|
45
|
+
Whether to also read ASDM_* subtables
|
|
46
|
+
|
|
47
|
+
Returns
|
|
48
|
+
-------
|
|
49
|
+
Dict[str, xr.Dataset]
|
|
50
|
+
dict of xarray datasets read from subtables (metadata tables)
|
|
51
|
+
|
|
42
52
|
"""
|
|
43
53
|
ignore_msv2_cols_subt = ["FLAG_CMD", "FLAG_ROW", "BEAM_ID"]
|
|
44
54
|
skip_tables = ["SORTED_TABLE", "FLAG_CMD"] + done_subt
|
|
@@ -62,9 +72,8 @@ def read_ms_subtables(
|
|
|
62
72
|
|
|
63
73
|
if subt_name == "POINTING":
|
|
64
74
|
subt_path = Path(infile, subt_name)
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
)
|
|
75
|
+
rename_ids = {"dim_2": "n_polynomial", "dim_3": "dir"}
|
|
76
|
+
xds = read_delayed_pointing_table(str(subt_path), rename_ids=rename_ids)
|
|
68
77
|
else:
|
|
69
78
|
xds = read_generic_table(
|
|
70
79
|
infile,
|
|
@@ -88,14 +97,23 @@ def read_ms_subtables(
|
|
|
88
97
|
def add_pointing_to_partition(
|
|
89
98
|
xds_part: xr.Dataset, xds_pointing: xr.Dataset
|
|
90
99
|
) -> xr.Dataset:
|
|
91
|
-
"""
|
|
100
|
+
"""
|
|
101
|
+
Take pointing variables from a (delayed) pointing dataset and
|
|
92
102
|
transfer them to a main table partition dataset (interpolating into
|
|
93
103
|
the destination time axis)
|
|
94
104
|
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
:
|
|
98
|
-
|
|
105
|
+
Parameters
|
|
106
|
+
----------
|
|
107
|
+
xds_part : xr.Dataset
|
|
108
|
+
a partition/sub-xds of the main table
|
|
109
|
+
xds_pointing : xr.Dataset
|
|
110
|
+
the xds read from the pointing subtable
|
|
111
|
+
|
|
112
|
+
Returns
|
|
113
|
+
-------
|
|
114
|
+
xr.Dataset
|
|
115
|
+
partition xds with pointing variables added/interpolated from the
|
|
116
|
+
pointing_xds into its time axis
|
|
99
117
|
|
|
100
118
|
"""
|
|
101
119
|
interp_xds = xds_pointing.interp(time=xds_part.time, method="nearest")
|
|
@@ -29,11 +29,19 @@ VisGroup = TypedDict(
|
|
|
29
29
|
|
|
30
30
|
|
|
31
31
|
def make_vis_group_attr(xds: xr.Dataset) -> Dict:
|
|
32
|
-
"""
|
|
32
|
+
"""
|
|
33
|
+
Add an attribute with the initial data/vis groups that have been
|
|
33
34
|
read from the MS (DATA / CORRECTED_DATA / MODEL_DATA)
|
|
34
35
|
|
|
35
|
-
|
|
36
|
-
|
|
36
|
+
Parameters
|
|
37
|
+
----------
|
|
38
|
+
xds : xr.Dataset
|
|
39
|
+
dataset to make the vis_group depending on its data_vars
|
|
40
|
+
|
|
41
|
+
Returns
|
|
42
|
+
-------
|
|
43
|
+
Dict
|
|
44
|
+
vis_group derived form this dataset
|
|
37
45
|
"""
|
|
38
46
|
msv2_extended_vis_vars = ["vis", "vis_corrected", "vis_model"]
|
|
39
47
|
msv2_col_names = ["DATA", "CORRECTED_DATA", "MODEL_DATA"]
|
|
@@ -87,7 +95,8 @@ def add_partition_attrs(
|
|
|
87
95
|
part_ids: PartitionIds,
|
|
88
96
|
other_attrs: Dict,
|
|
89
97
|
) -> xr.Dataset:
|
|
90
|
-
"""
|
|
98
|
+
"""
|
|
99
|
+
add attributes to the xr.Dataset:
|
|
91
100
|
- sub-dict of partition-id related ones
|
|
92
101
|
- sub-dict of data/vis groups
|
|
93
102
|
- sub-dict of attributes coming from the lower level read
|
|
@@ -96,13 +105,23 @@ def add_partition_attrs(
|
|
|
96
105
|
Produces the partition IDs that can be retrieved from the DD subtable and also
|
|
97
106
|
adds the ones passed in part_ids
|
|
98
107
|
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
108
|
+
Parameters
|
|
109
|
+
----------
|
|
110
|
+
xds : xr.Dataset
|
|
111
|
+
dataset partition
|
|
112
|
+
ddi : int
|
|
113
|
+
DDI of this partition
|
|
114
|
+
ddi_xds : xr.Dataset
|
|
115
|
+
dataset for the DATA_DESCRIPTION subtable
|
|
116
|
+
part_ids : PartitionIds
|
|
117
|
+
partition id attrs
|
|
118
|
+
other_attrs : Dict
|
|
119
|
+
additional attributes produced by the read functions
|
|
120
|
+
|
|
121
|
+
Returns
|
|
122
|
+
-------
|
|
123
|
+
xr.Dataset
|
|
124
|
+
dataset with attributes added
|
|
106
125
|
"""
|
|
107
126
|
|
|
108
127
|
xds = xds.assign_attrs(
|
|
@@ -12,7 +12,8 @@ from .stokes_types import stokes_types
|
|
|
12
12
|
def make_coords(
|
|
13
13
|
xds: xr.Dataset, ddi: int, subtables: Tuple[xr.Dataset, ...]
|
|
14
14
|
) -> Dict[str, np.ndarray]:
|
|
15
|
-
"""
|
|
15
|
+
"""
|
|
16
|
+
Make the coords to be added to a partition or chunk (besides
|
|
16
17
|
the time, baseline) basic structure
|
|
17
18
|
|
|
18
19
|
Grabs:
|
|
@@ -20,7 +21,18 @@ def make_coords(
|
|
|
20
21
|
- pol idxs from the pol+ddi subtables -> pol names via the stokes_types
|
|
21
22
|
- antenna IDs from antenna subtable
|
|
22
23
|
|
|
23
|
-
|
|
24
|
+
Parameters
|
|
25
|
+
----------
|
|
26
|
+
xds : xr.Dataset
|
|
27
|
+
|
|
28
|
+
ddi : int
|
|
29
|
+
|
|
30
|
+
subtables: Tuple[xr.Dataset, ...]
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
Returns
|
|
34
|
+
-------
|
|
35
|
+
Dict[str, np.ndarray]
|
|
24
36
|
"""
|
|
25
37
|
ant_xds, ddi_xds, spw_xds, pol_xds = subtables
|
|
26
38
|
freq = spw_xds.chan_freq.values[
|
|
@@ -46,15 +58,25 @@ def vis_xds_packager_cds(
|
|
|
46
58
|
subtables: List[Tuple[str, xr.Dataset]],
|
|
47
59
|
partitions: Dict[Any, xr.Dataset],
|
|
48
60
|
descr_add: str = "",
|
|
49
|
-
):
|
|
50
|
-
"""
|
|
61
|
+
) -> CASAVisSet:
|
|
62
|
+
"""
|
|
63
|
+
Takes a a list of subtable xds datasets and a dictionary of data
|
|
51
64
|
partition xds datasets and and packages them as a CASA vis dataset
|
|
52
65
|
(cds)
|
|
53
66
|
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
67
|
+
Parameters
|
|
68
|
+
----------
|
|
69
|
+
partitions : List[Tuple[str, xr.Dataset]]
|
|
70
|
+
data partiions as xds datasets
|
|
71
|
+
subtables : Dict[Any, xr.Dataset]
|
|
72
|
+
subtables as xds datasets
|
|
73
|
+
descr_add : str (Default value = "")
|
|
74
|
+
substring to add to the short descr string of the cds
|
|
75
|
+
|
|
76
|
+
Returns
|
|
77
|
+
-------
|
|
78
|
+
CASAVisSet
|
|
79
|
+
A "cds" - container for the metainfo subtables and data partitions
|
|
58
80
|
"""
|
|
59
81
|
vers = version("xradio")
|
|
60
82
|
|
|
@@ -70,14 +92,24 @@ def vis_xds_packager_mxds(
|
|
|
70
92
|
subtables: List[Tuple[str, xr.Dataset]],
|
|
71
93
|
add_global_coords: bool = True,
|
|
72
94
|
) -> xr.Dataset:
|
|
73
|
-
"""
|
|
95
|
+
"""
|
|
96
|
+
Takes a dictionary of data partition xds datasets and a list of
|
|
74
97
|
subtable xds datasets and packages them as a dataset of datasets
|
|
75
98
|
(mxds)
|
|
76
99
|
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
:
|
|
80
|
-
|
|
100
|
+
Parameters
|
|
101
|
+
----------
|
|
102
|
+
partitions : Dict[Any, xr.Dataset]
|
|
103
|
+
data partiions as xds datasets
|
|
104
|
+
subtables : List[Tuple[str, xr.Dataset]]
|
|
105
|
+
subtables as xds datasets
|
|
106
|
+
:add_global_coords: whether to add coords to the output mxds
|
|
107
|
+
add_global_coords: bool (Default value = True)
|
|
108
|
+
|
|
109
|
+
Returns
|
|
110
|
+
-------
|
|
111
|
+
xr.Dataset
|
|
112
|
+
A "mxds" - xr.dataset of datasets
|
|
81
113
|
"""
|
|
82
114
|
mxds = xr.Dataset(attrs={"metainfo": subtables, "partitions": partitions})
|
|
83
115
|
|
|
@@ -87,7 +119,7 @@ def vis_xds_packager_mxds(
|
|
|
87
119
|
return mxds
|
|
88
120
|
|
|
89
121
|
|
|
90
|
-
def make_global_coords(mxds: xr.Dataset):
|
|
122
|
+
def make_global_coords(mxds: xr.Dataset) -> Dict[str, xr.DataArray]:
|
|
91
123
|
coords = {}
|
|
92
124
|
metainfo = mxds.attrs["metainfo"]
|
|
93
125
|
if "antenna" in metainfo:
|
|
@@ -125,12 +157,24 @@ def make_global_coords(mxds: xr.Dataset):
|
|
|
125
157
|
def expand_xds(xds: xr.Dataset) -> xr.Dataset:
|
|
126
158
|
"""
|
|
127
159
|
expand single (row) dimension of xds to (time, baseline)
|
|
160
|
+
|
|
161
|
+
Parameters
|
|
162
|
+
----------
|
|
163
|
+
xds : xr.Dataset
|
|
164
|
+
"flat" dataset (with row dimension - without (time, baseline) dimensions)
|
|
165
|
+
|
|
166
|
+
Returns
|
|
167
|
+
-------
|
|
168
|
+
xr.Dataset
|
|
169
|
+
expanded dataset, with (time, baseline) dimensions
|
|
128
170
|
"""
|
|
129
171
|
assert "baseline" not in xds.coords
|
|
130
172
|
|
|
131
173
|
txds = xds.copy()
|
|
132
174
|
unique_baselines, baselines = np.unique(
|
|
133
|
-
[txds.
|
|
175
|
+
[txds.baseline_ant1_id.values, txds.baseline_ant2_id.values],
|
|
176
|
+
axis=1,
|
|
177
|
+
return_inverse=True,
|
|
134
178
|
)
|
|
135
179
|
txds["baseline"] = xr.DataArray(baselines.astype("int32"), dims=["row"])
|
|
136
180
|
|
|
@@ -148,7 +192,7 @@ def expand_xds(xds: xr.Dataset) -> xr.Dataset:
|
|
|
148
192
|
if txds[dv].dtype != xds[dv].dtype:
|
|
149
193
|
txds[dv] = txds[dv].astype(xds[dv].dtype)
|
|
150
194
|
except Exception as exc:
|
|
151
|
-
|
|
195
|
+
logger.warning(
|
|
152
196
|
f"WARNING: Cannot expand rows to (time, baseline), "
|
|
153
197
|
f"possibly duplicate values in (time, baseline). Exception: {exc}"
|
|
154
198
|
)
|
|
@@ -160,20 +204,47 @@ def expand_xds(xds: xr.Dataset) -> xr.Dataset:
|
|
|
160
204
|
def flatten_xds(xds: xr.Dataset) -> xr.Dataset:
|
|
161
205
|
"""
|
|
162
206
|
flatten (time, baseline) dimensions of xds back to single dimension (row)
|
|
207
|
+
|
|
208
|
+
Parameters
|
|
209
|
+
----------
|
|
210
|
+
xds : xr.Dataset
|
|
211
|
+
|
|
212
|
+
|
|
213
|
+
Returns
|
|
214
|
+
-------
|
|
215
|
+
xr.Dataset
|
|
163
216
|
"""
|
|
164
|
-
|
|
217
|
+
# known invalid cast warning when casting to integer
|
|
218
|
+
with np.errstate(invalid="ignore"):
|
|
219
|
+
nan_int = np.array([np.nan]).astype("int32")[0]
|
|
165
220
|
txds = xds.copy()
|
|
166
221
|
|
|
167
222
|
# flatten the time x baseline dimensions of main table
|
|
168
223
|
if ("time" in xds.sizes) and ("baseline" in xds.sizes):
|
|
169
224
|
txds = xds.stack({"row": ("time", "baseline")}).transpose("row", ...)
|
|
225
|
+
# compute for issue https://github.com/hainegroup/oceanspy/issues/332
|
|
226
|
+
# drop=True silently does compute (or at least used to)
|
|
170
227
|
txds = txds.where(
|
|
171
|
-
(txds.state_id != nan_int) & (txds.field_id != nan_int),
|
|
228
|
+
((txds.state_id != nan_int) & (txds.field_id != nan_int)).compute(),
|
|
229
|
+
drop=True,
|
|
172
230
|
) # .unify_chunks()
|
|
173
|
-
for dv in list(xds.data_vars):
|
|
174
|
-
txds[dv] = txds[dv].astype(xds[dv].dtype)
|
|
175
231
|
|
|
176
|
-
|
|
232
|
+
# re-assigning (implicitly dropping index coords) one by one produces
|
|
233
|
+
# DeprecationWarnings: https://github.com/pydata/xarray/issues/6505
|
|
234
|
+
astyped_data_vars = dict(xds.data_vars)
|
|
235
|
+
for dv in list(txds.data_vars):
|
|
236
|
+
if txds[dv].dtype != xds[dv].dtype:
|
|
237
|
+
astyped_data_vars[dv] = txds[dv].astype(xds[dv].dtype)
|
|
238
|
+
else:
|
|
239
|
+
astyped_data_vars[dv] = txds[dv]
|
|
240
|
+
|
|
241
|
+
flat_xds = xr.Dataset(astyped_data_vars, coords=txds.coords, attrs=txds.attrs)
|
|
242
|
+
flat_xds = flat_xds.reset_index(["time", "baseline"])
|
|
243
|
+
|
|
244
|
+
else:
|
|
245
|
+
flat_xds = txds
|
|
246
|
+
|
|
247
|
+
return flat_xds
|
|
177
248
|
|
|
178
249
|
|
|
179
250
|
####################################
|
|
@@ -188,21 +259,30 @@ def optimal_chunking(
|
|
|
188
259
|
Determine the optimal chunk shape for reading an MS or Image based
|
|
189
260
|
on machine resources and intended operations
|
|
190
261
|
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
262
|
+
Parameters
|
|
263
|
+
----------
|
|
264
|
+
ndim : Union[int, None] = None
|
|
265
|
+
number of dimensions to chunk. An MS is 3, an
|
|
266
|
+
expanded MS is 4. An image could be anywhere from 2 to 5. Not
|
|
267
|
+
needed if data_shape is given.
|
|
268
|
+
didxs : Union[Tuple[int], List[int], None] = None
|
|
269
|
+
dimension indices over which subsequent operations
|
|
270
|
+
will be performed. Values should be less than ndim. Tries to
|
|
271
|
+
reduce inter-process communication of data contents. Needs to
|
|
272
|
+
know the shape to do this well. Default None balances chunk size
|
|
273
|
+
across all dimensions.
|
|
274
|
+
chunk_size : str (Default value = "auto")
|
|
275
|
+
target chunk size ('large', 'small', 'auto').
|
|
276
|
+
Default 'auto' tries to guess by looking at CPU core count and
|
|
277
|
+
available memory.
|
|
278
|
+
data_shape : Union[tuple, None] = None
|
|
279
|
+
shape of the total MS DDI or Image data. Helps
|
|
280
|
+
to know. Default None does not optimize based on shape
|
|
281
|
+
|
|
282
|
+
Returns
|
|
283
|
+
-------
|
|
284
|
+
tuple
|
|
285
|
+
optimal chunking for reading the ms (row, chan, pol)
|
|
206
286
|
"""
|
|
207
287
|
assert (ndim is not None) or (
|
|
208
288
|
data_shape is not None
|
|
@@ -278,22 +358,33 @@ def calc_optimal_ms_chunk_shape(
|
|
|
278
358
|
"""
|
|
279
359
|
Calculates the max number of rows (1st dim in shape) of a variable
|
|
280
360
|
that can be fit in the memory for a thread.
|
|
361
|
+
|
|
362
|
+
Parameters
|
|
363
|
+
----------
|
|
364
|
+
memory_available_in_bytes :
|
|
365
|
+
|
|
366
|
+
shape :
|
|
367
|
+
|
|
368
|
+
element_size_in_bytes :
|
|
369
|
+
|
|
370
|
+
column_name :
|
|
371
|
+
|
|
372
|
+
|
|
373
|
+
Returns
|
|
374
|
+
-------
|
|
375
|
+
int
|
|
281
376
|
"""
|
|
282
377
|
factor = 0.8 # Account for memory used by other objects in thread.
|
|
283
378
|
# total_mem = np.prod(shape)*element_size_in_bytes
|
|
284
379
|
single_row_mem = np.prod(shape[1:]) * element_size_in_bytes
|
|
285
380
|
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
except AssertionError as err:
|
|
289
|
-
logger.exception(
|
|
381
|
+
if not single_row_mem < factor * memory_available_in_bytes:
|
|
382
|
+
msg = (
|
|
290
383
|
"Not engough memory in a thread to contain a row of "
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
+ str(single_row_mem / factor)
|
|
294
|
-
+ " bytes."
|
|
384
|
+
f"{column_name}. Need at least {single_row_mem / factor}"
|
|
385
|
+
" bytes."
|
|
295
386
|
)
|
|
296
|
-
raise
|
|
387
|
+
raise RuntimeError(msg)
|
|
297
388
|
|
|
298
389
|
rows_chunk_size = int((factor * memory_available_in_bytes) / single_row_mem)
|
|
299
390
|
|
|
@@ -11,9 +11,16 @@ def read_part_keys(inpath: str) -> List[Tuple]:
|
|
|
11
11
|
"""
|
|
12
12
|
Reads the partition keys from a Zarr-stored cds.
|
|
13
13
|
|
|
14
|
-
|
|
14
|
+
Parameters
|
|
15
|
+
----------
|
|
16
|
+
inpath : str
|
|
17
|
+
path to read from
|
|
18
|
+
|
|
19
|
+
Returns
|
|
20
|
+
-------
|
|
21
|
+
List[Tuple]
|
|
22
|
+
partition keys from a cds
|
|
15
23
|
|
|
16
|
-
:return: partition keys from a cds
|
|
17
24
|
"""
|
|
18
25
|
|
|
19
26
|
xds_keys = xr.open_zarr(
|
|
@@ -31,9 +38,19 @@ def read_subtables(inpath: str, asdm_subtables: bool) -> Dict[str, xr.Dataset]:
|
|
|
31
38
|
"""
|
|
32
39
|
Reads the metainfo subtables from a Zarr-stored cds.
|
|
33
40
|
|
|
34
|
-
|
|
41
|
+
Parameters
|
|
42
|
+
----------
|
|
43
|
+
inpath : str
|
|
44
|
+
path to read from
|
|
45
|
+
|
|
46
|
+
asdm_subtables : bool
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
Returns
|
|
50
|
+
-------
|
|
51
|
+
Dict[str, xr.Dataset]
|
|
52
|
+
metainfo subtables from a cds
|
|
35
53
|
|
|
36
|
-
:return: metainfo subtables from a cds
|
|
37
54
|
"""
|
|
38
55
|
|
|
39
56
|
metainfo = {}
|
|
@@ -53,9 +70,18 @@ def read_partitions(inpath: str, part_keys: List[Tuple]) -> Dict[str, xr.Dataset
|
|
|
53
70
|
"""
|
|
54
71
|
Reads all the data partitions a Zarr-stored cds.
|
|
55
72
|
|
|
56
|
-
|
|
73
|
+
Parameters
|
|
74
|
+
----------
|
|
75
|
+
inpath : str
|
|
76
|
+
path to read from
|
|
77
|
+
part_keys : List[Tuple]
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
Returns
|
|
81
|
+
-------
|
|
82
|
+
Dict[str, xr.Dataset]
|
|
83
|
+
partitions from a cds
|
|
57
84
|
|
|
58
|
-
:return: partitions from a cds
|
|
59
85
|
"""
|
|
60
86
|
|
|
61
87
|
partitions = {}
|
|
@@ -79,13 +105,23 @@ def read_xds(
|
|
|
79
105
|
"""
|
|
80
106
|
Read single xds from zarr storage.
|
|
81
107
|
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
108
|
+
Parameters
|
|
109
|
+
----------
|
|
110
|
+
inpath : str
|
|
111
|
+
path to read from
|
|
112
|
+
chunks : Union[Dict, None] (Default value = None)
|
|
113
|
+
set chunk size per dimension. Dict is in the form of
|
|
114
|
+
'dim':chunk_size, for example {'time':100, 'baseline':400, 'chan':32, 'pol':1}.
|
|
115
|
+
Default None uses the original chunking in the zarr input.
|
|
116
|
+
consolidated : boold (Default value = True)
|
|
117
|
+
use zarr consolidated metadata.
|
|
118
|
+
overwrite_encoded_chunks : bool (Default value = True)
|
|
119
|
+
drop the zarr chunks encoded for each variable
|
|
120
|
+
when a dataset is loaded with specified chunk sizes.
|
|
121
|
+
|
|
122
|
+
Returns
|
|
123
|
+
-------
|
|
124
|
+
xr.Dataset
|
|
89
125
|
"""
|
|
90
126
|
|
|
91
127
|
xds = xr.open_zarr(
|
|
@@ -99,11 +135,11 @@ def read_xds(
|
|
|
99
135
|
|
|
100
136
|
|
|
101
137
|
def read_zarr(
|
|
102
|
-
infile,
|
|
103
|
-
sel_xds=None,
|
|
104
|
-
chunks=None,
|
|
105
|
-
consolidated=True,
|
|
106
|
-
overwrite_encoded_chunks=True,
|
|
138
|
+
infile: str,
|
|
139
|
+
sel_xds: Union[List, str] = None,
|
|
140
|
+
chunks: Dict = None,
|
|
141
|
+
consolidated: bool = True,
|
|
142
|
+
overwrite_encoded_chunks: bool = True,
|
|
107
143
|
**kwargs,
|
|
108
144
|
):
|
|
109
145
|
"""
|
|
@@ -128,11 +164,12 @@ def read_zarr(
|
|
|
128
164
|
overwrite_encoded_chunks : bool
|
|
129
165
|
drop the zarr chunks encoded for each variable when a dataset is loaded with
|
|
130
166
|
specified chunk sizes. Default True, only applies when chunks is not None.
|
|
167
|
+
**kwargs :
|
|
168
|
+
|
|
131
169
|
|
|
132
170
|
Returns
|
|
133
171
|
-------
|
|
134
|
-
|
|
135
|
-
New xarray Dataset of Visibility data contents
|
|
172
|
+
|
|
136
173
|
"""
|
|
137
174
|
|
|
138
175
|
if chunks is None:
|
|
@@ -178,8 +215,9 @@ def read_zarr(
|
|
|
178
215
|
|
|
179
216
|
|
|
180
217
|
def _fix_dict_for_ms(name, xds):
|
|
181
|
-
|
|
182
|
-
xds.attrs["
|
|
218
|
+
# Used to be:
|
|
219
|
+
# xds.attrs["column_descriptions"] = xds.attrs["column_descriptions"][0]
|
|
220
|
+
# xds.attrs["info"] = xds.attrs["info"][0]
|
|
183
221
|
|
|
184
222
|
if "xds" in name:
|
|
185
223
|
xds.column_descriptions["UVW"]["shape"] = np.array(
|
|
@@ -10,10 +10,21 @@ import zarr
|
|
|
10
10
|
def write_part_keys(
|
|
11
11
|
partitions: Dict[Any, xr.Dataset], outpath: str, compressor: numcodecs.abc.Codec
|
|
12
12
|
) -> None:
|
|
13
|
-
"""
|
|
13
|
+
"""
|
|
14
|
+
Writes an xds with the partition keys.
|
|
15
|
+
|
|
16
|
+
Parameters
|
|
17
|
+
----------
|
|
18
|
+
partitions : Dict[Any, xr.Dataset]
|
|
19
|
+
partitions from a cds
|
|
20
|
+
outpath : str
|
|
21
|
+
path to write a cds
|
|
22
|
+
compressor : numcodecs.abc.Codec
|
|
23
|
+
compressor used for the partition keys variable
|
|
24
|
+
|
|
25
|
+
Returns
|
|
26
|
+
-------
|
|
14
27
|
|
|
15
|
-
:param partitions: partitions from a cds
|
|
16
|
-
:param outpath: path to write a cds
|
|
17
28
|
"""
|
|
18
29
|
|
|
19
30
|
spw_ids, pol_setup_ids, intents = map(list, zip(*partitions.keys()))
|
|
@@ -47,6 +58,23 @@ def write_metainfo(
|
|
|
47
58
|
) -> None:
|
|
48
59
|
"""
|
|
49
60
|
Write all metainfo subtables from a cds to zarr storage
|
|
61
|
+
|
|
62
|
+
Parameters
|
|
63
|
+
----------
|
|
64
|
+
outpath : str
|
|
65
|
+
|
|
66
|
+
metainfo : Dict[str, xr.Dataset]:
|
|
67
|
+
|
|
68
|
+
chunks_on_disk : Union[Dict, None] (Default value = None)
|
|
69
|
+
|
|
70
|
+
compressor : Union[numcodecs.abc.Codec, None) (Default value = None)
|
|
71
|
+
|
|
72
|
+
consolidated : bool (Default value = True)
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
Returns
|
|
76
|
+
-------
|
|
77
|
+
|
|
50
78
|
"""
|
|
51
79
|
metadir = Path(outpath, "metainfo")
|
|
52
80
|
os.mkdir(metadir)
|
|
@@ -67,6 +95,23 @@ def write_partitions(
|
|
|
67
95
|
) -> None:
|
|
68
96
|
"""
|
|
69
97
|
Write all data partitions metainfo from a cds to zarr storage
|
|
98
|
+
|
|
99
|
+
Parameters
|
|
100
|
+
----------
|
|
101
|
+
outpath : str :
|
|
102
|
+
|
|
103
|
+
partitions : Dict[str, xr.Dataset]
|
|
104
|
+
|
|
105
|
+
chunks_on_disk : Union[Dict, None] (Default value = None)
|
|
106
|
+
|
|
107
|
+
compressor : Union[numcodecs.abc.Codec, None] (Default value = True)
|
|
108
|
+
|
|
109
|
+
consolidated: bool (Default value = True)
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
Returns
|
|
113
|
+
-------
|
|
114
|
+
|
|
70
115
|
"""
|
|
71
116
|
|
|
72
117
|
partdir = Path(outpath, "partitions")
|
|
@@ -92,11 +137,28 @@ def write_xds_to_zarr(
|
|
|
92
137
|
"""
|
|
93
138
|
Write one xr dataset from a cds (either metainfo or a partition).
|
|
94
139
|
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
140
|
+
Parameters
|
|
141
|
+
----------
|
|
142
|
+
xds : xr.Dataset
|
|
143
|
+
cds (sub)dataset
|
|
144
|
+
name : str
|
|
145
|
+
dataset name (example subtable name, or xds{i})
|
|
146
|
+
outpath: str :
|
|
147
|
+
|
|
148
|
+
chunks_on_disk : Union[Dict, None] (Default value = None)
|
|
149
|
+
|
|
150
|
+
compressor : Union[numcodecs.abc.Codec, None] (Default value = None)
|
|
151
|
+
|
|
152
|
+
consolidated : bool (Default value = True)
|
|
153
|
+
|
|
154
|
+
graph_name : str
|
|
155
|
+
the time taken to execute the graph and save the
|
|
156
|
+
dataset is measured and saved as an attribute in the zarr file.
|
|
157
|
+
The graph_name is the label for this timing information.
|
|
158
|
+
|
|
159
|
+
Returns
|
|
160
|
+
-------
|
|
161
|
+
|
|
100
162
|
"""
|
|
101
163
|
|
|
102
164
|
xds_for_disk = xds
|
|
@@ -159,8 +221,20 @@ def write_xds_to_zarr(
|
|
|
159
221
|
|
|
160
222
|
|
|
161
223
|
def prepare_attrs_for_zarr(name: str, xds: xr.Dataset) -> xr.Dataset:
|
|
162
|
-
"""
|
|
224
|
+
"""
|
|
225
|
+
Deal with types that cannot be serialized as they are in the
|
|
163
226
|
cds/xds (ndarray etc.)
|
|
227
|
+
|
|
228
|
+
Parameters
|
|
229
|
+
----------
|
|
230
|
+
name : str
|
|
231
|
+
|
|
232
|
+
xds : xr.Dataset
|
|
233
|
+
|
|
234
|
+
|
|
235
|
+
Returns
|
|
236
|
+
-------
|
|
237
|
+
|
|
164
238
|
"""
|
|
165
239
|
ctds_attrs = xds.attrs["other"]["msv2"]["ctds_attrs"]
|
|
166
240
|
col_descrs = ctds_attrs["column_descriptions"]
|