xradio 0.0.44__tar.gz → 0.0.46__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {xradio-0.0.44/src/xradio.egg-info → xradio-0.0.46}/PKG-INFO +1 -1
- {xradio-0.0.44 → xradio-0.0.46}/pyproject.toml +1 -1
- xradio-0.0.46/src/xradio/_utils/dict_helpers.py +14 -0
- {xradio-0.0.44 → xradio-0.0.46}/src/xradio/image/_util/_casacore/xds_from_casacore.py +4 -17
- {xradio-0.0.44 → xradio-0.0.46}/src/xradio/image/_util/_fits/xds_from_fits.py +7 -18
- {xradio-0.0.44 → xradio-0.0.46}/src/xradio/image/_util/common.py +3 -6
- {xradio-0.0.44 → xradio-0.0.46}/src/xradio/image/_util/image_factory.py +4 -9
- {xradio-0.0.44 → xradio-0.0.46}/src/xradio/measurement_set/__init__.py +5 -1
- {xradio-0.0.44 → xradio-0.0.46}/src/xradio/measurement_set/_utils/_msv2/conversion.py +236 -41
- {xradio-0.0.44 → xradio-0.0.46}/src/xradio/measurement_set/_utils/_msv2/create_antenna_xds.py +5 -22
- {xradio-0.0.44 → xradio-0.0.46}/src/xradio/measurement_set/_utils/_msv2/create_field_and_source_xds.py +8 -13
- {xradio-0.0.44 → xradio-0.0.46}/src/xradio/measurement_set/_utils/_msv2/msv4_sub_xdss.py +79 -23
- {xradio-0.0.44 → xradio-0.0.46}/src/xradio/measurement_set/_utils/_msv2/partition_queries.py +4 -5
- {xradio-0.0.44 → xradio-0.0.46}/src/xradio/measurement_set/convert_msv2_to_processing_set.py +44 -3
- {xradio-0.0.44 → xradio-0.0.46}/src/xradio/measurement_set/schema.py +47 -35
- {xradio-0.0.44 → xradio-0.0.46/src/xradio.egg-info}/PKG-INFO +1 -1
- {xradio-0.0.44 → xradio-0.0.46}/src/xradio.egg-info/SOURCES.txt +1 -0
- {xradio-0.0.44 → xradio-0.0.46}/LICENSE.txt +0 -0
- {xradio-0.0.44 → xradio-0.0.46}/MANIFEST.in +0 -0
- {xradio-0.0.44 → xradio-0.0.46}/README.md +0 -0
- {xradio-0.0.44 → xradio-0.0.46}/setup.cfg +0 -0
- {xradio-0.0.44 → xradio-0.0.46}/src/xradio/__init__.py +0 -0
- {xradio-0.0.44 → xradio-0.0.46}/src/xradio/_utils/__init__.py +0 -0
- {xradio-0.0.44 → xradio-0.0.46}/src/xradio/_utils/_casacore/tables.py +0 -0
- {xradio-0.0.44 → xradio-0.0.46}/src/xradio/_utils/coord_math.py +0 -0
- {xradio-0.0.44 → xradio-0.0.46}/src/xradio/_utils/list_and_array.py +0 -0
- {xradio-0.0.44 → xradio-0.0.46}/src/xradio/_utils/schema.py +0 -0
- {xradio-0.0.44 → xradio-0.0.46}/src/xradio/_utils/zarr/__init__.py +0 -0
- {xradio-0.0.44 → xradio-0.0.46}/src/xradio/_utils/zarr/common.py +0 -0
- {xradio-0.0.44 → xradio-0.0.46}/src/xradio/image/__init__.py +0 -0
- {xradio-0.0.44 → xradio-0.0.46}/src/xradio/image/_util/__init__.py +0 -0
- {xradio-0.0.44 → xradio-0.0.46}/src/xradio/image/_util/_casacore/__init__.py +0 -0
- {xradio-0.0.44 → xradio-0.0.46}/src/xradio/image/_util/_casacore/common.py +0 -0
- {xradio-0.0.44 → xradio-0.0.46}/src/xradio/image/_util/_casacore/xds_to_casacore.py +0 -0
- {xradio-0.0.44 → xradio-0.0.46}/src/xradio/image/_util/_zarr/common.py +0 -0
- {xradio-0.0.44 → xradio-0.0.46}/src/xradio/image/_util/_zarr/xds_from_zarr.py +0 -0
- {xradio-0.0.44 → xradio-0.0.46}/src/xradio/image/_util/_zarr/xds_to_zarr.py +0 -0
- {xradio-0.0.44 → xradio-0.0.46}/src/xradio/image/_util/_zarr/zarr_low_level.py +0 -0
- {xradio-0.0.44 → xradio-0.0.46}/src/xradio/image/_util/casacore.py +0 -0
- {xradio-0.0.44 → xradio-0.0.46}/src/xradio/image/_util/fits.py +0 -0
- {xradio-0.0.44 → xradio-0.0.46}/src/xradio/image/_util/zarr.py +0 -0
- {xradio-0.0.44 → xradio-0.0.46}/src/xradio/image/image.py +0 -0
- {xradio-0.0.44 → xradio-0.0.46}/src/xradio/measurement_set/_utils/__init__.py +0 -0
- {xradio-0.0.44 → xradio-0.0.46}/src/xradio/measurement_set/_utils/_msv2/_tables/load.py +0 -0
- {xradio-0.0.44 → xradio-0.0.46}/src/xradio/measurement_set/_utils/_msv2/_tables/load_main_table.py +0 -0
- {xradio-0.0.44 → xradio-0.0.46}/src/xradio/measurement_set/_utils/_msv2/_tables/read.py +0 -0
- {xradio-0.0.44 → xradio-0.0.46}/src/xradio/measurement_set/_utils/_msv2/_tables/read_main_table.py +0 -0
- {xradio-0.0.44 → xradio-0.0.46}/src/xradio/measurement_set/_utils/_msv2/_tables/read_subtables.py +0 -0
- {xradio-0.0.44 → xradio-0.0.46}/src/xradio/measurement_set/_utils/_msv2/_tables/table_query.py +0 -0
- {xradio-0.0.44 → xradio-0.0.46}/src/xradio/measurement_set/_utils/_msv2/_tables/write.py +0 -0
- {xradio-0.0.44 → xradio-0.0.46}/src/xradio/measurement_set/_utils/_msv2/_tables/write_exp_api.py +0 -0
- {xradio-0.0.44 → xradio-0.0.46}/src/xradio/measurement_set/_utils/_msv2/chunks.py +0 -0
- {xradio-0.0.44 → xradio-0.0.46}/src/xradio/measurement_set/_utils/_msv2/descr.py +0 -0
- {xradio-0.0.44 → xradio-0.0.46}/src/xradio/measurement_set/_utils/_msv2/msv2_msv3.py +0 -0
- {xradio-0.0.44 → xradio-0.0.46}/src/xradio/measurement_set/_utils/_msv2/msv2_to_msv4_meta.py +0 -0
- {xradio-0.0.44 → xradio-0.0.46}/src/xradio/measurement_set/_utils/_msv2/msv4_info_dicts.py +0 -0
- {xradio-0.0.44 → xradio-0.0.46}/src/xradio/measurement_set/_utils/_msv2/optimised_functions.py +0 -0
- {xradio-0.0.44 → xradio-0.0.46}/src/xradio/measurement_set/_utils/_msv2/partitions.py +0 -0
- {xradio-0.0.44 → xradio-0.0.46}/src/xradio/measurement_set/_utils/_msv2/subtables.py +0 -0
- {xradio-0.0.44 → xradio-0.0.46}/src/xradio/measurement_set/_utils/_utils/cds.py +0 -0
- {xradio-0.0.44 → xradio-0.0.46}/src/xradio/measurement_set/_utils/_utils/partition_attrs.py +0 -0
- {xradio-0.0.44 → xradio-0.0.46}/src/xradio/measurement_set/_utils/_utils/stokes_types.py +0 -0
- {xradio-0.0.44 → xradio-0.0.46}/src/xradio/measurement_set/_utils/_utils/xds_helper.py +0 -0
- {xradio-0.0.44 → xradio-0.0.46}/src/xradio/measurement_set/_utils/_zarr/encoding.py +0 -0
- {xradio-0.0.44 → xradio-0.0.46}/src/xradio/measurement_set/_utils/_zarr/read.py +0 -0
- {xradio-0.0.44 → xradio-0.0.46}/src/xradio/measurement_set/_utils/_zarr/write.py +0 -0
- {xradio-0.0.44 → xradio-0.0.46}/src/xradio/measurement_set/_utils/msv2.py +0 -0
- {xradio-0.0.44 → xradio-0.0.46}/src/xradio/measurement_set/_utils/zarr.py +0 -0
- {xradio-0.0.44 → xradio-0.0.46}/src/xradio/measurement_set/load_processing_set.py +0 -0
- {xradio-0.0.44 → xradio-0.0.46}/src/xradio/measurement_set/measurement_set_xds.py +0 -0
- {xradio-0.0.44 → xradio-0.0.46}/src/xradio/measurement_set/open_processing_set.py +0 -0
- {xradio-0.0.44 → xradio-0.0.46}/src/xradio/measurement_set/processing_set.py +0 -0
- {xradio-0.0.44 → xradio-0.0.46}/src/xradio/schema/__init__.py +0 -0
- {xradio-0.0.44 → xradio-0.0.46}/src/xradio/schema/bases.py +0 -0
- {xradio-0.0.44 → xradio-0.0.46}/src/xradio/schema/check.py +0 -0
- {xradio-0.0.44 → xradio-0.0.46}/src/xradio/schema/dataclass.py +0 -0
- {xradio-0.0.44 → xradio-0.0.46}/src/xradio/schema/metamodel.py +0 -0
- {xradio-0.0.44 → xradio-0.0.46}/src/xradio/schema/typing.py +0 -0
- {xradio-0.0.44 → xradio-0.0.46}/src/xradio/sphinx/__init__.py +0 -0
- {xradio-0.0.44 → xradio-0.0.46}/src/xradio/sphinx/schema_table.py +0 -0
- {xradio-0.0.44 → xradio-0.0.46}/src/xradio.egg-info/dependency_links.txt +0 -0
- {xradio-0.0.44 → xradio-0.0.46}/src/xradio.egg-info/requires.txt +0 -0
- {xradio-0.0.44 → xradio-0.0.46}/src/xradio.egg-info/top_level.txt +0 -0
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
def make_quantity(value, units: str) -> dict:
|
|
2
|
+
"""
|
|
3
|
+
create a quantity dictionary given value and units
|
|
4
|
+
Parameters
|
|
5
|
+
----------
|
|
6
|
+
value : numeric or array of numerics
|
|
7
|
+
Quantity value
|
|
8
|
+
units: str
|
|
9
|
+
Quantity units
|
|
10
|
+
Returns
|
|
11
|
+
-------
|
|
12
|
+
dict
|
|
13
|
+
"""
|
|
14
|
+
return {"value": value, "units": units, "type": "quantity"}
|
|
@@ -31,6 +31,7 @@ from ..common import (
|
|
|
31
31
|
)
|
|
32
32
|
from ...._utils._casacore.tables import extract_table_attributes, open_table_ro
|
|
33
33
|
from xradio._utils.coord_math import _deg_to_rad
|
|
34
|
+
from xradio._utils.dict_helpers import make_quantity
|
|
34
35
|
|
|
35
36
|
"""
|
|
36
37
|
def _add_coord_attrs(xds: xr.Dataset, icoords: dict, dir_axes: list) -> xr.Dataset:
|
|
@@ -62,13 +63,7 @@ def _add_freq_attrs(xds, coord_dict):
|
|
|
62
63
|
for k in coord_dict:
|
|
63
64
|
if k.startswith("spectral"):
|
|
64
65
|
sd = coord_dict[k]
|
|
65
|
-
|
|
66
|
-
meta["rest_frequency"] = {
|
|
67
|
-
"type": "quantity",
|
|
68
|
-
"units": "Hz",
|
|
69
|
-
"value": sd["restfreq"],
|
|
70
|
-
}
|
|
71
|
-
# meta["restfreqs"] = {'type': 'quantity', 'units': 'Hz', 'value': list(sd["restfreqs"])}
|
|
66
|
+
meta["rest_frequency"] = make_quantity(sd["restfreq"], "Hz")
|
|
72
67
|
meta["type"] = "frequency"
|
|
73
68
|
meta["units"] = sd["unit"]
|
|
74
69
|
meta["frame"] = sd["system"]
|
|
@@ -184,11 +179,7 @@ def _casa_image_to_xds_attrs(img_full_path: str, history: bool = True) -> dict:
|
|
|
184
179
|
k = "latpole"
|
|
185
180
|
if k in coord_dir_dict:
|
|
186
181
|
for j in (k, "longpole"):
|
|
187
|
-
dir_dict[j] =
|
|
188
|
-
"value": coord_dir_dict[j] * _deg_to_rad,
|
|
189
|
-
"units": "rad",
|
|
190
|
-
"type": "quantity",
|
|
191
|
-
}
|
|
182
|
+
dir_dict[j] = make_quantity(coord_dir_dict[j] * _deg_to_rad, "rad")
|
|
192
183
|
for j in ("pc", "projection_parameters", "projection"):
|
|
193
184
|
if j in coord_dir_dict:
|
|
194
185
|
dir_dict[j] = coord_dir_dict[j]
|
|
@@ -518,11 +509,7 @@ def _get_freq_values_attrs(
|
|
|
518
509
|
crpix=wcs["crpix"],
|
|
519
510
|
cdelt=wcs["cdelt"],
|
|
520
511
|
)
|
|
521
|
-
attrs["rest_frequency"] =
|
|
522
|
-
"type": "quantity",
|
|
523
|
-
"units": "Hz",
|
|
524
|
-
"value": sd["restfreq"],
|
|
525
|
-
}
|
|
512
|
+
attrs["rest_frequency"] = make_quantity(sd["restfreq"], "Hz")
|
|
526
513
|
attrs["type"] = "frequency"
|
|
527
514
|
attrs["units"] = sd["unit"]
|
|
528
515
|
attrs["frame"] = sd["system"]
|
|
@@ -16,6 +16,7 @@ from ..common import (
|
|
|
16
16
|
_l_m_attr_notes,
|
|
17
17
|
)
|
|
18
18
|
from xradio._utils.coord_math import _deg_to_rad
|
|
19
|
+
from xradio._utils.dict_helpers import make_quantity
|
|
19
20
|
import copy
|
|
20
21
|
import dask
|
|
21
22
|
import dask.array as da
|
|
@@ -77,11 +78,7 @@ def _add_freq_attrs(xds: xr.Dataset, helpers: dict) -> xr.Dataset:
|
|
|
77
78
|
freq_coord = xds.coords["frequency"]
|
|
78
79
|
meta = {}
|
|
79
80
|
if helpers["has_freq"]:
|
|
80
|
-
meta["rest_frequency"] =
|
|
81
|
-
"type": "quantity",
|
|
82
|
-
"units": "Hz",
|
|
83
|
-
"value": helpers["restfreq"],
|
|
84
|
-
}
|
|
81
|
+
meta["rest_frequency"] = make_quantity(helpers["restfreq"], "Hz")
|
|
85
82
|
meta["frame"] = helpers["specsys"]
|
|
86
83
|
meta["units"] = "Hz"
|
|
87
84
|
meta["type"] = "frequency"
|
|
@@ -184,16 +181,8 @@ def _xds_direction_attrs_from_header(helpers: dict, header) -> dict:
|
|
|
184
181
|
direction["reference"]["value"][i] = x.value
|
|
185
182
|
x = helpers["cdelt"][i] * u.Unit(_get_unit(helpers["cunit"][i]))
|
|
186
183
|
x = x.to("rad")
|
|
187
|
-
direction["latpole"] =
|
|
188
|
-
|
|
189
|
-
"units": "rad",
|
|
190
|
-
"type": "quantity",
|
|
191
|
-
}
|
|
192
|
-
direction["longpole"] = {
|
|
193
|
-
"value": header["LONPOLE"] * _deg_to_rad,
|
|
194
|
-
"units": "rad",
|
|
195
|
-
"type": "quantity",
|
|
196
|
-
}
|
|
184
|
+
direction["latpole"] = make_quantity(header["LATPOLE"] * _deg_to_rad, "rad")
|
|
185
|
+
direction["longpole"] = make_quantity(header["LONPOLE"] * _deg_to_rad, "rad")
|
|
197
186
|
pc = np.zeros([2, 2])
|
|
198
187
|
for i in (0, 1):
|
|
199
188
|
for j in (0, 1):
|
|
@@ -325,9 +314,9 @@ def _beam_attr_from_header(helpers: dict, header) -> Union[dict, str, None]:
|
|
|
325
314
|
if "BMAJ" in header:
|
|
326
315
|
# single global beam
|
|
327
316
|
beam = {
|
|
328
|
-
"bmaj":
|
|
329
|
-
"bmin":
|
|
330
|
-
"pa":
|
|
317
|
+
"bmaj": make_quantity(header["BMAJ"], "arcsec"),
|
|
318
|
+
"bmin": make_quantity(header["BMIN"], "arcsec"),
|
|
319
|
+
"pa": make_quantity(header["BPA"], "arcsec"),
|
|
331
320
|
}
|
|
332
321
|
return _convert_beam_to_rad(beam)
|
|
333
322
|
elif "CASAMBM" in header and header["CASAMBM"]:
|
|
@@ -6,6 +6,7 @@ import numpy as np
|
|
|
6
6
|
from typing import Dict, List
|
|
7
7
|
import xarray as xr
|
|
8
8
|
from xradio._utils.coord_math import _deg_to_rad
|
|
9
|
+
from xradio._utils.dict_helpers import make_quantity
|
|
9
10
|
|
|
10
11
|
_c = 2.99792458e08 * u.m / u.s
|
|
11
12
|
# OPTICAL = Z
|
|
@@ -39,7 +40,7 @@ def _convert_beam_to_rad(beam: dict) -> dict:
|
|
|
39
40
|
q = u.quantity.Quantity(f"{beam[k]['value']}{beam[k]['units']}")
|
|
40
41
|
q = q.to("rad")
|
|
41
42
|
j = "pa" if k == "positionangle" else k
|
|
42
|
-
mybeam[j] =
|
|
43
|
+
mybeam[j] = make_quantity(q.value, "rad")
|
|
43
44
|
return mybeam
|
|
44
45
|
|
|
45
46
|
|
|
@@ -102,11 +103,7 @@ def _numpy_arrayize_dv(xds: xr.Dataset) -> xr.Dataset:
|
|
|
102
103
|
|
|
103
104
|
def _default_freq_info() -> dict:
|
|
104
105
|
return {
|
|
105
|
-
"rest_frequency":
|
|
106
|
-
"value": 1420405751.7860003,
|
|
107
|
-
"units": "Hz",
|
|
108
|
-
"type": "quantity",
|
|
109
|
-
},
|
|
106
|
+
"rest_frequency": make_quantity(1420405751.7860003, "Hz"),
|
|
110
107
|
"type": "frequency",
|
|
111
108
|
"frame": "LSRK",
|
|
112
109
|
"units": "Hz",
|
|
@@ -4,6 +4,7 @@ import xarray as xr
|
|
|
4
4
|
from typing import List, Union
|
|
5
5
|
from .common import _c, _compute_world_sph_dims, _l_m_attr_notes
|
|
6
6
|
from xradio._utils.coord_math import _deg_to_rad
|
|
7
|
+
from xradio._utils.dict_helpers import make_quantity
|
|
7
8
|
|
|
8
9
|
|
|
9
10
|
def _input_checks(
|
|
@@ -46,11 +47,7 @@ def _add_common_attrs(
|
|
|
46
47
|
xds.time.attrs = {"format": "MJD", "scale": "UTC", "units": "d"}
|
|
47
48
|
freq_vals = np.array(xds.frequency)
|
|
48
49
|
xds.frequency.attrs = {
|
|
49
|
-
"rest_frequency":
|
|
50
|
-
"type": "quantity",
|
|
51
|
-
"units": "Hz",
|
|
52
|
-
"value": restfreq,
|
|
53
|
-
},
|
|
50
|
+
"rest_frequency": make_quantity(restfreq, "Hz"),
|
|
54
51
|
"frame": spectral_reference.upper(),
|
|
55
52
|
"units": "Hz",
|
|
56
53
|
"wave_unit": "mm",
|
|
@@ -69,8 +66,8 @@ def _add_common_attrs(
|
|
|
69
66
|
"value": list(phase_center),
|
|
70
67
|
"units": ["rad", "rad"],
|
|
71
68
|
},
|
|
72
|
-
"longpole":
|
|
73
|
-
"latpole":
|
|
69
|
+
"longpole": make_quantity(np.pi, "rad"),
|
|
70
|
+
"latpole": make_quantity(0.0, "rad"),
|
|
74
71
|
"pc": np.array([[1.0, 0.0], [0.0, 1.0]]),
|
|
75
72
|
"projection": projection,
|
|
76
73
|
"projection_parameters": [0.0, 0.0],
|
|
@@ -289,7 +286,6 @@ def _make_empty_lmuv_image(
|
|
|
289
286
|
"crval": 0.0,
|
|
290
287
|
"cdelt": -abs(sky_image_cell_size[0]),
|
|
291
288
|
"units": "rad",
|
|
292
|
-
"type": "quantity",
|
|
293
289
|
"note": attr_note["l"],
|
|
294
290
|
}
|
|
295
291
|
xds.m.attrs = {
|
|
@@ -297,7 +293,6 @@ def _make_empty_lmuv_image(
|
|
|
297
293
|
"crval": 0.0,
|
|
298
294
|
"cdelt": abs(sky_image_cell_size[1]),
|
|
299
295
|
"units": "rad",
|
|
300
|
-
"type": "quantity",
|
|
301
296
|
"note": attr_note["m"],
|
|
302
297
|
}
|
|
303
298
|
xds.u.attrs = {
|
|
@@ -1,7 +1,10 @@
|
|
|
1
1
|
from .processing_set import ProcessingSet
|
|
2
2
|
from .open_processing_set import open_processing_set
|
|
3
3
|
from .load_processing_set import load_processing_set, ProcessingSetIterator
|
|
4
|
-
from .convert_msv2_to_processing_set import
|
|
4
|
+
from .convert_msv2_to_processing_set import (
|
|
5
|
+
convert_msv2_to_processing_set,
|
|
6
|
+
estimate_conversion_memory_and_cores,
|
|
7
|
+
)
|
|
5
8
|
from .measurement_set_xds import MeasurementSetXds
|
|
6
9
|
|
|
7
10
|
from .schema import SpectrumXds, VisibilityXds
|
|
@@ -13,6 +16,7 @@ __all__ = [
|
|
|
13
16
|
"load_processing_set",
|
|
14
17
|
"ProcessingSetIterator",
|
|
15
18
|
"convert_msv2_to_processing_set",
|
|
19
|
+
"estimate_conversion_memory_and_cores",
|
|
16
20
|
"SpectrumXds",
|
|
17
21
|
"VisibilityXds",
|
|
18
22
|
]
|
|
@@ -679,7 +679,7 @@ def get_weight(
|
|
|
679
679
|
return xds
|
|
680
680
|
|
|
681
681
|
|
|
682
|
-
def
|
|
682
|
+
def create_taql_query_where(partition_info: dict):
|
|
683
683
|
main_par_table_cols = [
|
|
684
684
|
"DATA_DESC_ID",
|
|
685
685
|
"OBSERVATION_ID",
|
|
@@ -729,6 +729,192 @@ def fix_uvw_frame(
|
|
|
729
729
|
return xds
|
|
730
730
|
|
|
731
731
|
|
|
732
|
+
def estimate_memory_for_partition(in_file: str, partition: dict) -> float:
|
|
733
|
+
"""
|
|
734
|
+
Aim: given a partition description, estimates a safe maximum memory value, but avoiding overestimation
|
|
735
|
+
(at least not adding not well understood factors).
|
|
736
|
+
"""
|
|
737
|
+
|
|
738
|
+
def calculate_term_all_data(
|
|
739
|
+
tb_tool: tables.table, ntimes: float, nbaselines: float
|
|
740
|
+
) -> tuple[list[float], bool]:
|
|
741
|
+
"""
|
|
742
|
+
Size that DATA vars from MS will have in the MSv4, whether this MS has FLOAT_DATA
|
|
743
|
+
"""
|
|
744
|
+
sizes_all_data_vars = []
|
|
745
|
+
col_names = tb_tool.colnames()
|
|
746
|
+
for data_col in ["DATA", "CORRECTED_DATA", "MODEL_DATA", "FLOAT_DATA"]:
|
|
747
|
+
if data_col in col_names:
|
|
748
|
+
col_descr = tb_tool.getcoldesc(data_col)
|
|
749
|
+
if "shape" in col_descr and isinstance(col_descr["shape"], np.ndarray):
|
|
750
|
+
# example: "shape": array([15, 4]) => gives pols x channels
|
|
751
|
+
cells_in_row = col_descr["shape"].prod()
|
|
752
|
+
npols = col_descr["shape"][-1]
|
|
753
|
+
else:
|
|
754
|
+
first_row = np.array(tb_tool.col(data_col)[0])
|
|
755
|
+
cells_in_row = np.prod(first_row.shape)
|
|
756
|
+
npols = first_row.shape[-1]
|
|
757
|
+
|
|
758
|
+
if col_descr["valueType"] == "complex":
|
|
759
|
+
# Assume. Otherwise, read first column and get the itemsize:
|
|
760
|
+
# col_dtype = np.array(mtable.col(data_col)[0]).dtype
|
|
761
|
+
# cell_size = col_dtype.itemsize
|
|
762
|
+
cell_size = 4
|
|
763
|
+
if data_col != "FLOAT_DATA":
|
|
764
|
+
cell_size *= 2
|
|
765
|
+
elif col_descr["valueType"] == "float":
|
|
766
|
+
cell_size = 4
|
|
767
|
+
|
|
768
|
+
# cells_in_row should account for the polarization and frequency dims
|
|
769
|
+
size_data_var = ntimes * nbaselines * cells_in_row * cell_size
|
|
770
|
+
|
|
771
|
+
sizes_all_data_vars.append(size_data_var)
|
|
772
|
+
|
|
773
|
+
is_float_data = "FLOAT_DATA" in col_names
|
|
774
|
+
|
|
775
|
+
return sizes_all_data_vars, is_float_data
|
|
776
|
+
|
|
777
|
+
def calculate_term_weight_flag(size_largest_data, is_float_data) -> float:
|
|
778
|
+
"""
|
|
779
|
+
Size that WEIGHT and FLAG will have in the MSv4, derived from the size of the
|
|
780
|
+
MSv2 DATA col=> MSv4 VIS/SPECTRUM data var.
|
|
781
|
+
"""
|
|
782
|
+
# Factors of the relative "cell_size" wrt the DATA var
|
|
783
|
+
# WEIGHT_SPECTRUM size: DATA (IF), DATA/2 (SD)
|
|
784
|
+
factor_weight = 1.0 if is_float_data else 0.5
|
|
785
|
+
factor_flag = 1.0 / 4.0 if is_float_data else 1.0 / 8.0
|
|
786
|
+
|
|
787
|
+
return size_largest_data * (factor_weight + factor_flag)
|
|
788
|
+
|
|
789
|
+
def calculate_term_other_data_vars(
|
|
790
|
+
ntimes: int, nbaselines: int, is_float_data: bool
|
|
791
|
+
) -> float:
|
|
792
|
+
"""
|
|
793
|
+
Size all data vars other than the DATA (visibility/spectrum) vars will have in the MSv4
|
|
794
|
+
|
|
795
|
+
For the rest of columns, including indices/iteration columns and other
|
|
796
|
+
scalar columns could say approx ->5% of the (large) data cols
|
|
797
|
+
|
|
798
|
+
"""
|
|
799
|
+
# Small ones, but as they are loaded into data arrays, why not including,
|
|
800
|
+
# For example: UVW (3xscalar), EXPOSURE, TIME_CENTROID
|
|
801
|
+
# assuming float64 in output MSv4
|
|
802
|
+
item_size = 8
|
|
803
|
+
return ntimes * nbaselines * (3 + 1 + 1) * item_size
|
|
804
|
+
|
|
805
|
+
def calculate_term_calc_indx_for_row_split(msv2_nrows: int) -> float:
|
|
806
|
+
"""
|
|
807
|
+
Account for the indices produced in calc_indx_for_row_split():
|
|
808
|
+
the dominating ones are: tidxs, bidxs, didxs.
|
|
809
|
+
|
|
810
|
+
In terms of amount of memory represented by this term relative to the
|
|
811
|
+
total, it becomes relevant proportionally to the ratio between
|
|
812
|
+
nrows / (chans x pols)
|
|
813
|
+
- for example LOFAR long scans/partitions with few channels,
|
|
814
|
+
but its value is independent from # chans, pols.
|
|
815
|
+
"""
|
|
816
|
+
item_size = 8
|
|
817
|
+
# 3 are: tidxs, bidxs, didxs
|
|
818
|
+
return msv2_nrows * 3 * item_size
|
|
819
|
+
|
|
820
|
+
def calculate_term_other_msv2_indices(msv2_nrows: int) -> float:
|
|
821
|
+
"""
|
|
822
|
+
Account for the allocations to load ID, etc. columns from input MSv2.
|
|
823
|
+
The converter needs to load: OBSERVATION_ID, INTERVAL, SCAN_NUMBER.
|
|
824
|
+
These are loaded one after another (allocations do not stack up).
|
|
825
|
+
Also, in most memory profiles these allocations are released once we
|
|
826
|
+
get to create_data_variables(). As such, adding this term will most
|
|
827
|
+
likely lead to overestimation (but adding it for safety).
|
|
828
|
+
|
|
829
|
+
Simlarly as with calculate_term_calc_indx_for_row_split() this term
|
|
830
|
+
becomes relevant when the ratio 'nrows / (chans x pols)' is high.
|
|
831
|
+
"""
|
|
832
|
+
# assuming float64/int64 in input MSv2, which seems to be the case,
|
|
833
|
+
# except for OBSERVATION_ID (int32)
|
|
834
|
+
item_size = 8
|
|
835
|
+
return msv2_nrows * item_size
|
|
836
|
+
|
|
837
|
+
def calculate_term_attrs(size_estimate_main_xds: float) -> float:
|
|
838
|
+
"""Rough guess which seems to be more than enough"""
|
|
839
|
+
# could also account for info_dicts (which seem to require typically ~1 MB)
|
|
840
|
+
return 10 * 1024 * 1024
|
|
841
|
+
|
|
842
|
+
def calculate_term_sub_xds(size_estimate_main_xds: float) -> float:
|
|
843
|
+
"""
|
|
844
|
+
This is still very rough. Just seemingly working for now. Not taking into account the dims
|
|
845
|
+
of the sub-xdss, interpolation options used, etc.
|
|
846
|
+
"""
|
|
847
|
+
# Most cases so far 1% seems enough
|
|
848
|
+
return 0.015 * size_estimate_main_xds
|
|
849
|
+
|
|
850
|
+
def calculate_term_to_zarr(size_estimate_main_xds: float) -> float:
|
|
851
|
+
"""
|
|
852
|
+
The to_zarr call on the main_xds seems to allocate 10s or 100s of MBs, presumably for buffers.
|
|
853
|
+
That adds on top of the expected main_xds size.
|
|
854
|
+
This is currently a very rough extrapolation and is being (mis)used to give a safe up to 5-6%
|
|
855
|
+
overestimation. Perhaps we should drop this term once other sub-xdss are accounted for (and
|
|
856
|
+
this term could be replaced by a similar, smaller but still safe over-estimation percentage).
|
|
857
|
+
"""
|
|
858
|
+
return 0.05 * size_estimate_main_xds
|
|
859
|
+
|
|
860
|
+
taql_partition = create_taql_query_where(partition)
|
|
861
|
+
taql_main = f"select * from $mtable {taql_partition}"
|
|
862
|
+
with open_table_ro(in_file) as mtable:
|
|
863
|
+
col_names = mtable.colnames()
|
|
864
|
+
with open_query(mtable, taql_main) as tb_tool:
|
|
865
|
+
# Do not feel tempted to rely on nrows. nrows tends to underestimate memory when baselines are missing.
|
|
866
|
+
# For some EVN datasets that can easily underestimate by a 50%
|
|
867
|
+
utimes, _tol = get_utimes_tol(mtable, taql_partition)
|
|
868
|
+
ntimes = len(utimes)
|
|
869
|
+
nbaselines = len(get_baselines(tb_tool))
|
|
870
|
+
|
|
871
|
+
# Still, use nrwos for estimations related to sizes of input (MSv2)
|
|
872
|
+
# columns, not sizes of output (MSv4) data vars
|
|
873
|
+
msv2_nrows = tb_tool.nrows()
|
|
874
|
+
|
|
875
|
+
sizes_all_data, is_float_data = calculate_term_all_data(
|
|
876
|
+
tb_tool, ntimes, nbaselines
|
|
877
|
+
)
|
|
878
|
+
|
|
879
|
+
size_largest_data = np.max(sizes_all_data)
|
|
880
|
+
sum_sizes_data = np.sum(sizes_all_data)
|
|
881
|
+
estimate_main_xds = (
|
|
882
|
+
sum_sizes_data
|
|
883
|
+
+ calculate_term_weight_flag(size_largest_data, is_float_data)
|
|
884
|
+
+ calculate_term_other_data_vars(ntimes, nbaselines, is_float_data)
|
|
885
|
+
)
|
|
886
|
+
estimate = (
|
|
887
|
+
estimate_main_xds
|
|
888
|
+
+ calculate_term_calc_indx_for_row_split(msv2_nrows)
|
|
889
|
+
+ calculate_term_other_msv2_indices(msv2_nrows)
|
|
890
|
+
+ calculate_term_sub_xds(estimate_main_xds)
|
|
891
|
+
+ calculate_term_to_zarr(estimate_main_xds)
|
|
892
|
+
)
|
|
893
|
+
estimate /= GiBYTES_TO_BYTES
|
|
894
|
+
|
|
895
|
+
return estimate
|
|
896
|
+
|
|
897
|
+
|
|
898
|
+
def estimate_memory_and_cores_for_partitions(
|
|
899
|
+
in_file: str, partitions: list
|
|
900
|
+
) -> tuple[float, int, int]:
|
|
901
|
+
"""
|
|
902
|
+
Estimates approximate memory required to convert an MSv2 to MSv4, given
|
|
903
|
+
a predefined set of partitions.
|
|
904
|
+
"""
|
|
905
|
+
max_cores = len(partitions)
|
|
906
|
+
|
|
907
|
+
size_estimates = [
|
|
908
|
+
estimate_memory_for_partition(in_file, part_description)
|
|
909
|
+
for part_description in partitions
|
|
910
|
+
]
|
|
911
|
+
max_estimate = np.max(size_estimates) if size_estimates else 0.0
|
|
912
|
+
|
|
913
|
+
recommended_cores = np.ceil(max_cores / 4).astype("int")
|
|
914
|
+
|
|
915
|
+
return float(max_estimate), int(max_cores), int(recommended_cores)
|
|
916
|
+
|
|
917
|
+
|
|
732
918
|
def convert_and_write_partition(
|
|
733
919
|
in_file: str,
|
|
734
920
|
out_file: str,
|
|
@@ -790,7 +976,7 @@ def convert_and_write_partition(
|
|
|
790
976
|
_description_
|
|
791
977
|
"""
|
|
792
978
|
|
|
793
|
-
taql_where =
|
|
979
|
+
taql_where = create_taql_query_where(partition_info)
|
|
794
980
|
ddi = partition_info["DATA_DESC_ID"][0]
|
|
795
981
|
intents = str(partition_info["OBS_MODE"][0])
|
|
796
982
|
|
|
@@ -839,9 +1025,11 @@ def convert_and_write_partition(
|
|
|
839
1025
|
start = time.time()
|
|
840
1026
|
xds = xr.Dataset(
|
|
841
1027
|
attrs={
|
|
842
|
-
"creation_date": datetime.datetime.
|
|
1028
|
+
"creation_date": datetime.datetime.now(
|
|
1029
|
+
datetime.timezone.utc
|
|
1030
|
+
).isoformat(),
|
|
843
1031
|
"xradio_version": importlib.metadata.version("xradio"),
|
|
844
|
-
"schema_version": "4.0.-
|
|
1032
|
+
"schema_version": "4.0.-9991",
|
|
845
1033
|
"type": "visibility",
|
|
846
1034
|
}
|
|
847
1035
|
)
|
|
@@ -1085,6 +1273,8 @@ def convert_and_write_partition(
|
|
|
1085
1273
|
else:
|
|
1086
1274
|
xds.attrs["type"] = "visibility"
|
|
1087
1275
|
|
|
1276
|
+
import sys
|
|
1277
|
+
|
|
1088
1278
|
start = time.time()
|
|
1089
1279
|
if storage_backend == "zarr":
|
|
1090
1280
|
xds.to_zarr(store=os.path.join(file_name, "correlated_xds"), mode=mode)
|
|
@@ -1193,7 +1383,12 @@ def antenna_ids_to_names(
|
|
|
1193
1383
|
]
|
|
1194
1384
|
for unwanted_coord in unwanted_coords_from_ant_xds:
|
|
1195
1385
|
xds = xds.drop_vars(unwanted_coord)
|
|
1196
|
-
|
|
1386
|
+
|
|
1387
|
+
# Rename a dim coord started generating warnings (index not re-created). Swap dims, create coord
|
|
1388
|
+
# https://github.com/pydata/xarray/pull/6999
|
|
1389
|
+
xds = xds.swap_dims({"baseline_id": "antenna_name"})
|
|
1390
|
+
xds = xds.assign_coords({"antenna_name": xds["baseline_id"].data})
|
|
1391
|
+
xds = xds.drop_vars("baseline_id")
|
|
1197
1392
|
|
|
1198
1393
|
# drop more vars that seem unwanted in main_sd_xds, but there shouuld be a better way
|
|
1199
1394
|
# of not creating them in the first place
|
|
@@ -1204,58 +1399,58 @@ def antenna_ids_to_names(
|
|
|
1204
1399
|
return xds
|
|
1205
1400
|
|
|
1206
1401
|
|
|
1402
|
+
def add_group_to_data_groups(
|
|
1403
|
+
data_groups: dict, what_group: str, correlated_data_name: str, uvw: bool = True
|
|
1404
|
+
):
|
|
1405
|
+
"""
|
|
1406
|
+
Adds one correlated_data variable to the data_groups dict.
|
|
1407
|
+
A utility function to use when creating/updating data_groups from MSv2 data columns
|
|
1408
|
+
/ data variables.
|
|
1409
|
+
|
|
1410
|
+
Parameters
|
|
1411
|
+
----------
|
|
1412
|
+
data_groups: str
|
|
1413
|
+
The data_groups dict of an MSv4 xds. It is updated in-place
|
|
1414
|
+
what_group: str
|
|
1415
|
+
Name of the data group: "base", "corrected", "model", etc.
|
|
1416
|
+
correlated_data_name: str
|
|
1417
|
+
Name of the correlated_data var: "VISIBILITY", "VISIBILITY_CORRECTED", "SPECTRUM", etc.
|
|
1418
|
+
uvw: bool
|
|
1419
|
+
Whether to add a uvw field to the data group (assume True = interferometric data).
|
|
1420
|
+
"""
|
|
1421
|
+
data_groups[what_group] = {
|
|
1422
|
+
"correlated_data": correlated_data_name,
|
|
1423
|
+
"flag": "FLAG",
|
|
1424
|
+
"weight": "WEIGHT",
|
|
1425
|
+
}
|
|
1426
|
+
if uvw:
|
|
1427
|
+
data_groups[what_group]["uvw"] = "UVW"
|
|
1428
|
+
|
|
1429
|
+
|
|
1207
1430
|
def add_data_groups(xds):
|
|
1208
1431
|
xds.attrs["data_groups"] = {}
|
|
1432
|
+
|
|
1433
|
+
data_groups = xds.attrs["data_groups"]
|
|
1209
1434
|
if "VISIBILITY" in xds:
|
|
1210
|
-
|
|
1211
|
-
"correlated_data": "VISIBILITY",
|
|
1212
|
-
"flag": "FLAG",
|
|
1213
|
-
"weight": "WEIGHT",
|
|
1214
|
-
"uvw": "UVW",
|
|
1215
|
-
}
|
|
1435
|
+
add_group_to_data_groups(data_groups, "base", "VISIBILITY")
|
|
1216
1436
|
|
|
1217
1437
|
if "VISIBILITY_CORRECTED" in xds:
|
|
1218
|
-
|
|
1219
|
-
"correlated_data": "VISIBILITY_CORRECTED",
|
|
1220
|
-
"flag": "FLAG",
|
|
1221
|
-
"weight": "WEIGHT",
|
|
1222
|
-
"uvw": "UVW",
|
|
1223
|
-
}
|
|
1438
|
+
add_group_to_data_groups(data_groups, "corrected", "VISIBILITY_CORRECTED")
|
|
1224
1439
|
|
|
1225
1440
|
if "VISIBILITY_MODEL" in xds:
|
|
1226
|
-
|
|
1227
|
-
"correlated_data": "VISIBILITY_MODEL",
|
|
1228
|
-
"flag": "FLAG",
|
|
1229
|
-
"weight": "WEIGHT",
|
|
1230
|
-
"uvw": "UVW",
|
|
1231
|
-
}
|
|
1441
|
+
add_group_to_data_groups(data_groups, "model", "VISIBILITY_MODEL")
|
|
1232
1442
|
|
|
1233
1443
|
is_single_dish = False
|
|
1234
1444
|
if "SPECTRUM" in xds:
|
|
1235
|
-
|
|
1236
|
-
"correlated_data": "SPECTRUM",
|
|
1237
|
-
"flag": "FLAG",
|
|
1238
|
-
"weight": "WEIGHT",
|
|
1239
|
-
"uvw": "UVW",
|
|
1240
|
-
}
|
|
1445
|
+
add_group_to_data_groups(data_groups, "base", "SPECTRUM", False)
|
|
1241
1446
|
is_single_dish = True
|
|
1242
1447
|
|
|
1243
1448
|
if "SPECTRUM_MODEL" in xds:
|
|
1244
|
-
|
|
1245
|
-
"correlated_data": "SPECTRUM_MODEL",
|
|
1246
|
-
"flag": "FLAG",
|
|
1247
|
-
"weight": "WEIGHT",
|
|
1248
|
-
"uvw": "UVW",
|
|
1249
|
-
}
|
|
1449
|
+
add_group_to_data_groups(data_groups, "model", "SPECTRUM_MODEL", False)
|
|
1250
1450
|
is_single_dish = True
|
|
1251
1451
|
|
|
1252
1452
|
if "SPECTRUM_CORRECTED" in xds:
|
|
1253
|
-
|
|
1254
|
-
"correlated_data": "SPECTRUM_CORRECTED",
|
|
1255
|
-
"flag": "FLAG",
|
|
1256
|
-
"weight": "WEIGHT",
|
|
1257
|
-
"uvw": "UVW",
|
|
1258
|
-
}
|
|
1453
|
+
add_group_to_data_groups(data_groups, "corrected", "SPECTRUM_CORRECTED", False)
|
|
1259
1454
|
is_single_dish = True
|
|
1260
1455
|
|
|
1261
1456
|
return xds, is_single_dish
|
{xradio-0.0.44 → xradio-0.0.46}/src/xradio/measurement_set/_utils/_msv2/create_antenna_xds.py
RENAMED
|
@@ -15,7 +15,9 @@ from xradio.measurement_set._utils._msv2._tables.read import (
|
|
|
15
15
|
table_exists,
|
|
16
16
|
)
|
|
17
17
|
from xradio._utils.schema import convert_generic_xds_to_xradio_schema
|
|
18
|
-
from xradio.measurement_set._utils._msv2.msv4_sub_xdss import
|
|
18
|
+
from xradio.measurement_set._utils._msv2.msv4_sub_xdss import (
|
|
19
|
+
rename_and_interpolate_to_time,
|
|
20
|
+
)
|
|
19
21
|
|
|
20
22
|
from xradio._utils.list_and_array import (
|
|
21
23
|
check_if_consistent,
|
|
@@ -509,27 +511,8 @@ def create_phase_calibration_xds(
|
|
|
509
511
|
phase_cal_xds.time_phase_cal.astype("float64").astype("float64") / 10**9
|
|
510
512
|
)
|
|
511
513
|
|
|
512
|
-
phase_cal_xds =
|
|
513
|
-
phase_cal_xds,
|
|
514
|
-
phase_cal_interp_time,
|
|
515
|
-
"antenna_xds",
|
|
516
|
-
time_name="time_phase_cal",
|
|
514
|
+
phase_cal_xds = rename_and_interpolate_to_time(
|
|
515
|
+
phase_cal_xds, "time_phase_cal", phase_cal_interp_time, "phase_cal_xds"
|
|
517
516
|
)
|
|
518
517
|
|
|
519
|
-
time_coord_attrs = {
|
|
520
|
-
"type": "time",
|
|
521
|
-
"units": ["s"],
|
|
522
|
-
"scale": "utc",
|
|
523
|
-
"format": "unix",
|
|
524
|
-
}
|
|
525
|
-
|
|
526
|
-
# If we interpolate rename the time_phase_cal axis to time.
|
|
527
|
-
if phase_cal_interp_time is not None:
|
|
528
|
-
time_coord = {"time": ("time_phase_cal", phase_cal_interp_time.data)}
|
|
529
|
-
phase_cal_xds = phase_cal_xds.assign_coords(time_coord)
|
|
530
|
-
phase_cal_xds.coords["time"].attrs.update(time_coord_attrs)
|
|
531
|
-
phase_cal_xds = phase_cal_xds.swap_dims({"time_phase_cal": "time"}).drop_vars(
|
|
532
|
-
"time_phase_cal"
|
|
533
|
-
)
|
|
534
|
-
|
|
535
518
|
return phase_cal_xds
|
|
@@ -6,7 +6,9 @@ import numpy as np
|
|
|
6
6
|
import xarray as xr
|
|
7
7
|
|
|
8
8
|
import toolviper.utils.logger as logger
|
|
9
|
-
from xradio.measurement_set._utils._msv2.msv4_sub_xdss import
|
|
9
|
+
from xradio.measurement_set._utils._msv2.msv4_sub_xdss import (
|
|
10
|
+
rename_and_interpolate_to_time,
|
|
11
|
+
)
|
|
10
12
|
from xradio.measurement_set._utils._msv2.subtables import subt_rename_ids
|
|
11
13
|
from xradio.measurement_set._utils._msv2._tables.read import (
|
|
12
14
|
convert_casacore_time_to_mjd,
|
|
@@ -363,20 +365,13 @@ def extract_ephemeris_info(
|
|
|
363
365
|
}
|
|
364
366
|
temp_xds["time_ephemeris"].attrs.update(time_coord_attrs)
|
|
365
367
|
|
|
366
|
-
# Convert to si units
|
|
368
|
+
# Convert to si units
|
|
367
369
|
temp_xds = convert_to_si_units(temp_xds)
|
|
368
|
-
temp_xds = interpolate_to_time(
|
|
369
|
-
temp_xds, interp_time, "field_and_source_xds", time_name="time_ephemeris"
|
|
370
|
-
)
|
|
371
370
|
|
|
372
|
-
#
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
temp_xds.coords["time"].attrs.update(time_coord_attrs)
|
|
377
|
-
temp_xds = temp_xds.swap_dims({"time_ephemeris": "time"}).drop_vars(
|
|
378
|
-
"time_ephemeris"
|
|
379
|
-
)
|
|
371
|
+
# interpolate if ephemeris_interpolate/interp_time=True, and rename time_ephemeris=>time
|
|
372
|
+
temp_xds = rename_and_interpolate_to_time(
|
|
373
|
+
temp_xds, "time_ephemeris", interp_time, "field_and_source_xds"
|
|
374
|
+
)
|
|
380
375
|
|
|
381
376
|
xds = xr.merge([xds, temp_xds])
|
|
382
377
|
|