xradio 0.0.44__tar.gz → 0.0.46__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. {xradio-0.0.44/src/xradio.egg-info → xradio-0.0.46}/PKG-INFO +1 -1
  2. {xradio-0.0.44 → xradio-0.0.46}/pyproject.toml +1 -1
  3. xradio-0.0.46/src/xradio/_utils/dict_helpers.py +14 -0
  4. {xradio-0.0.44 → xradio-0.0.46}/src/xradio/image/_util/_casacore/xds_from_casacore.py +4 -17
  5. {xradio-0.0.44 → xradio-0.0.46}/src/xradio/image/_util/_fits/xds_from_fits.py +7 -18
  6. {xradio-0.0.44 → xradio-0.0.46}/src/xradio/image/_util/common.py +3 -6
  7. {xradio-0.0.44 → xradio-0.0.46}/src/xradio/image/_util/image_factory.py +4 -9
  8. {xradio-0.0.44 → xradio-0.0.46}/src/xradio/measurement_set/__init__.py +5 -1
  9. {xradio-0.0.44 → xradio-0.0.46}/src/xradio/measurement_set/_utils/_msv2/conversion.py +236 -41
  10. {xradio-0.0.44 → xradio-0.0.46}/src/xradio/measurement_set/_utils/_msv2/create_antenna_xds.py +5 -22
  11. {xradio-0.0.44 → xradio-0.0.46}/src/xradio/measurement_set/_utils/_msv2/create_field_and_source_xds.py +8 -13
  12. {xradio-0.0.44 → xradio-0.0.46}/src/xradio/measurement_set/_utils/_msv2/msv4_sub_xdss.py +79 -23
  13. {xradio-0.0.44 → xradio-0.0.46}/src/xradio/measurement_set/_utils/_msv2/partition_queries.py +4 -5
  14. {xradio-0.0.44 → xradio-0.0.46}/src/xradio/measurement_set/convert_msv2_to_processing_set.py +44 -3
  15. {xradio-0.0.44 → xradio-0.0.46}/src/xradio/measurement_set/schema.py +47 -35
  16. {xradio-0.0.44 → xradio-0.0.46/src/xradio.egg-info}/PKG-INFO +1 -1
  17. {xradio-0.0.44 → xradio-0.0.46}/src/xradio.egg-info/SOURCES.txt +1 -0
  18. {xradio-0.0.44 → xradio-0.0.46}/LICENSE.txt +0 -0
  19. {xradio-0.0.44 → xradio-0.0.46}/MANIFEST.in +0 -0
  20. {xradio-0.0.44 → xradio-0.0.46}/README.md +0 -0
  21. {xradio-0.0.44 → xradio-0.0.46}/setup.cfg +0 -0
  22. {xradio-0.0.44 → xradio-0.0.46}/src/xradio/__init__.py +0 -0
  23. {xradio-0.0.44 → xradio-0.0.46}/src/xradio/_utils/__init__.py +0 -0
  24. {xradio-0.0.44 → xradio-0.0.46}/src/xradio/_utils/_casacore/tables.py +0 -0
  25. {xradio-0.0.44 → xradio-0.0.46}/src/xradio/_utils/coord_math.py +0 -0
  26. {xradio-0.0.44 → xradio-0.0.46}/src/xradio/_utils/list_and_array.py +0 -0
  27. {xradio-0.0.44 → xradio-0.0.46}/src/xradio/_utils/schema.py +0 -0
  28. {xradio-0.0.44 → xradio-0.0.46}/src/xradio/_utils/zarr/__init__.py +0 -0
  29. {xradio-0.0.44 → xradio-0.0.46}/src/xradio/_utils/zarr/common.py +0 -0
  30. {xradio-0.0.44 → xradio-0.0.46}/src/xradio/image/__init__.py +0 -0
  31. {xradio-0.0.44 → xradio-0.0.46}/src/xradio/image/_util/__init__.py +0 -0
  32. {xradio-0.0.44 → xradio-0.0.46}/src/xradio/image/_util/_casacore/__init__.py +0 -0
  33. {xradio-0.0.44 → xradio-0.0.46}/src/xradio/image/_util/_casacore/common.py +0 -0
  34. {xradio-0.0.44 → xradio-0.0.46}/src/xradio/image/_util/_casacore/xds_to_casacore.py +0 -0
  35. {xradio-0.0.44 → xradio-0.0.46}/src/xradio/image/_util/_zarr/common.py +0 -0
  36. {xradio-0.0.44 → xradio-0.0.46}/src/xradio/image/_util/_zarr/xds_from_zarr.py +0 -0
  37. {xradio-0.0.44 → xradio-0.0.46}/src/xradio/image/_util/_zarr/xds_to_zarr.py +0 -0
  38. {xradio-0.0.44 → xradio-0.0.46}/src/xradio/image/_util/_zarr/zarr_low_level.py +0 -0
  39. {xradio-0.0.44 → xradio-0.0.46}/src/xradio/image/_util/casacore.py +0 -0
  40. {xradio-0.0.44 → xradio-0.0.46}/src/xradio/image/_util/fits.py +0 -0
  41. {xradio-0.0.44 → xradio-0.0.46}/src/xradio/image/_util/zarr.py +0 -0
  42. {xradio-0.0.44 → xradio-0.0.46}/src/xradio/image/image.py +0 -0
  43. {xradio-0.0.44 → xradio-0.0.46}/src/xradio/measurement_set/_utils/__init__.py +0 -0
  44. {xradio-0.0.44 → xradio-0.0.46}/src/xradio/measurement_set/_utils/_msv2/_tables/load.py +0 -0
  45. {xradio-0.0.44 → xradio-0.0.46}/src/xradio/measurement_set/_utils/_msv2/_tables/load_main_table.py +0 -0
  46. {xradio-0.0.44 → xradio-0.0.46}/src/xradio/measurement_set/_utils/_msv2/_tables/read.py +0 -0
  47. {xradio-0.0.44 → xradio-0.0.46}/src/xradio/measurement_set/_utils/_msv2/_tables/read_main_table.py +0 -0
  48. {xradio-0.0.44 → xradio-0.0.46}/src/xradio/measurement_set/_utils/_msv2/_tables/read_subtables.py +0 -0
  49. {xradio-0.0.44 → xradio-0.0.46}/src/xradio/measurement_set/_utils/_msv2/_tables/table_query.py +0 -0
  50. {xradio-0.0.44 → xradio-0.0.46}/src/xradio/measurement_set/_utils/_msv2/_tables/write.py +0 -0
  51. {xradio-0.0.44 → xradio-0.0.46}/src/xradio/measurement_set/_utils/_msv2/_tables/write_exp_api.py +0 -0
  52. {xradio-0.0.44 → xradio-0.0.46}/src/xradio/measurement_set/_utils/_msv2/chunks.py +0 -0
  53. {xradio-0.0.44 → xradio-0.0.46}/src/xradio/measurement_set/_utils/_msv2/descr.py +0 -0
  54. {xradio-0.0.44 → xradio-0.0.46}/src/xradio/measurement_set/_utils/_msv2/msv2_msv3.py +0 -0
  55. {xradio-0.0.44 → xradio-0.0.46}/src/xradio/measurement_set/_utils/_msv2/msv2_to_msv4_meta.py +0 -0
  56. {xradio-0.0.44 → xradio-0.0.46}/src/xradio/measurement_set/_utils/_msv2/msv4_info_dicts.py +0 -0
  57. {xradio-0.0.44 → xradio-0.0.46}/src/xradio/measurement_set/_utils/_msv2/optimised_functions.py +0 -0
  58. {xradio-0.0.44 → xradio-0.0.46}/src/xradio/measurement_set/_utils/_msv2/partitions.py +0 -0
  59. {xradio-0.0.44 → xradio-0.0.46}/src/xradio/measurement_set/_utils/_msv2/subtables.py +0 -0
  60. {xradio-0.0.44 → xradio-0.0.46}/src/xradio/measurement_set/_utils/_utils/cds.py +0 -0
  61. {xradio-0.0.44 → xradio-0.0.46}/src/xradio/measurement_set/_utils/_utils/partition_attrs.py +0 -0
  62. {xradio-0.0.44 → xradio-0.0.46}/src/xradio/measurement_set/_utils/_utils/stokes_types.py +0 -0
  63. {xradio-0.0.44 → xradio-0.0.46}/src/xradio/measurement_set/_utils/_utils/xds_helper.py +0 -0
  64. {xradio-0.0.44 → xradio-0.0.46}/src/xradio/measurement_set/_utils/_zarr/encoding.py +0 -0
  65. {xradio-0.0.44 → xradio-0.0.46}/src/xradio/measurement_set/_utils/_zarr/read.py +0 -0
  66. {xradio-0.0.44 → xradio-0.0.46}/src/xradio/measurement_set/_utils/_zarr/write.py +0 -0
  67. {xradio-0.0.44 → xradio-0.0.46}/src/xradio/measurement_set/_utils/msv2.py +0 -0
  68. {xradio-0.0.44 → xradio-0.0.46}/src/xradio/measurement_set/_utils/zarr.py +0 -0
  69. {xradio-0.0.44 → xradio-0.0.46}/src/xradio/measurement_set/load_processing_set.py +0 -0
  70. {xradio-0.0.44 → xradio-0.0.46}/src/xradio/measurement_set/measurement_set_xds.py +0 -0
  71. {xradio-0.0.44 → xradio-0.0.46}/src/xradio/measurement_set/open_processing_set.py +0 -0
  72. {xradio-0.0.44 → xradio-0.0.46}/src/xradio/measurement_set/processing_set.py +0 -0
  73. {xradio-0.0.44 → xradio-0.0.46}/src/xradio/schema/__init__.py +0 -0
  74. {xradio-0.0.44 → xradio-0.0.46}/src/xradio/schema/bases.py +0 -0
  75. {xradio-0.0.44 → xradio-0.0.46}/src/xradio/schema/check.py +0 -0
  76. {xradio-0.0.44 → xradio-0.0.46}/src/xradio/schema/dataclass.py +0 -0
  77. {xradio-0.0.44 → xradio-0.0.46}/src/xradio/schema/metamodel.py +0 -0
  78. {xradio-0.0.44 → xradio-0.0.46}/src/xradio/schema/typing.py +0 -0
  79. {xradio-0.0.44 → xradio-0.0.46}/src/xradio/sphinx/__init__.py +0 -0
  80. {xradio-0.0.44 → xradio-0.0.46}/src/xradio/sphinx/schema_table.py +0 -0
  81. {xradio-0.0.44 → xradio-0.0.46}/src/xradio.egg-info/dependency_links.txt +0 -0
  82. {xradio-0.0.44 → xradio-0.0.46}/src/xradio.egg-info/requires.txt +0 -0
  83. {xradio-0.0.44 → xradio-0.0.46}/src/xradio.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: xradio
3
- Version: 0.0.44
3
+ Version: 0.0.46
4
4
  Summary: Xarray Radio Astronomy Data IO
5
5
  Author-email: Jan-Willem Steeb <jsteeb@nrao.edu>
6
6
  License: BSD 3-Clause License
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "xradio"
3
- version = "0.0.44"
3
+ version = "0.0.46"
4
4
  description = " Xarray Radio Astronomy Data IO"
5
5
  authors = [
6
6
  {name = "Jan-Willem Steeb", email="jsteeb@nrao.edu"},
@@ -0,0 +1,14 @@
1
+ def make_quantity(value, units: str) -> dict:
2
+ """
3
+ create a quantity dictionary given value and units
4
+ Parameters
5
+ ----------
6
+ value : numeric or array of numerics
7
+ Quantity value
8
+ units: str
9
+ Quantity units
10
+ Returns
11
+ -------
12
+ dict
13
+ """
14
+ return {"value": value, "units": units, "type": "quantity"}
@@ -31,6 +31,7 @@ from ..common import (
31
31
  )
32
32
  from ...._utils._casacore.tables import extract_table_attributes, open_table_ro
33
33
  from xradio._utils.coord_math import _deg_to_rad
34
+ from xradio._utils.dict_helpers import make_quantity
34
35
 
35
36
  """
36
37
  def _add_coord_attrs(xds: xr.Dataset, icoords: dict, dir_axes: list) -> xr.Dataset:
@@ -62,13 +63,7 @@ def _add_freq_attrs(xds, coord_dict):
62
63
  for k in coord_dict:
63
64
  if k.startswith("spectral"):
64
65
  sd = coord_dict[k]
65
- # meta["native_type"] = _native_types[sd["nativeType"]]
66
- meta["rest_frequency"] = {
67
- "type": "quantity",
68
- "units": "Hz",
69
- "value": sd["restfreq"],
70
- }
71
- # meta["restfreqs"] = {'type': 'quantity', 'units': 'Hz', 'value': list(sd["restfreqs"])}
66
+ meta["rest_frequency"] = make_quantity(sd["restfreq"], "Hz")
72
67
  meta["type"] = "frequency"
73
68
  meta["units"] = sd["unit"]
74
69
  meta["frame"] = sd["system"]
@@ -184,11 +179,7 @@ def _casa_image_to_xds_attrs(img_full_path: str, history: bool = True) -> dict:
184
179
  k = "latpole"
185
180
  if k in coord_dir_dict:
186
181
  for j in (k, "longpole"):
187
- dir_dict[j] = {
188
- "value": coord_dir_dict[j] * _deg_to_rad,
189
- "units": "rad",
190
- "type": "quantity",
191
- }
182
+ dir_dict[j] = make_quantity(coord_dir_dict[j] * _deg_to_rad, "rad")
192
183
  for j in ("pc", "projection_parameters", "projection"):
193
184
  if j in coord_dir_dict:
194
185
  dir_dict[j] = coord_dir_dict[j]
@@ -518,11 +509,7 @@ def _get_freq_values_attrs(
518
509
  crpix=wcs["crpix"],
519
510
  cdelt=wcs["cdelt"],
520
511
  )
521
- attrs["rest_frequency"] = {
522
- "type": "quantity",
523
- "units": "Hz",
524
- "value": sd["restfreq"],
525
- }
512
+ attrs["rest_frequency"] = make_quantity(sd["restfreq"], "Hz")
526
513
  attrs["type"] = "frequency"
527
514
  attrs["units"] = sd["unit"]
528
515
  attrs["frame"] = sd["system"]
@@ -16,6 +16,7 @@ from ..common import (
16
16
  _l_m_attr_notes,
17
17
  )
18
18
  from xradio._utils.coord_math import _deg_to_rad
19
+ from xradio._utils.dict_helpers import make_quantity
19
20
  import copy
20
21
  import dask
21
22
  import dask.array as da
@@ -77,11 +78,7 @@ def _add_freq_attrs(xds: xr.Dataset, helpers: dict) -> xr.Dataset:
77
78
  freq_coord = xds.coords["frequency"]
78
79
  meta = {}
79
80
  if helpers["has_freq"]:
80
- meta["rest_frequency"] = {
81
- "type": "quantity",
82
- "units": "Hz",
83
- "value": helpers["restfreq"],
84
- }
81
+ meta["rest_frequency"] = make_quantity(helpers["restfreq"], "Hz")
85
82
  meta["frame"] = helpers["specsys"]
86
83
  meta["units"] = "Hz"
87
84
  meta["type"] = "frequency"
@@ -184,16 +181,8 @@ def _xds_direction_attrs_from_header(helpers: dict, header) -> dict:
184
181
  direction["reference"]["value"][i] = x.value
185
182
  x = helpers["cdelt"][i] * u.Unit(_get_unit(helpers["cunit"][i]))
186
183
  x = x.to("rad")
187
- direction["latpole"] = {
188
- "value": header["LATPOLE"] * _deg_to_rad,
189
- "units": "rad",
190
- "type": "quantity",
191
- }
192
- direction["longpole"] = {
193
- "value": header["LONPOLE"] * _deg_to_rad,
194
- "units": "rad",
195
- "type": "quantity",
196
- }
184
+ direction["latpole"] = make_quantity(header["LATPOLE"] * _deg_to_rad, "rad")
185
+ direction["longpole"] = make_quantity(header["LONPOLE"] * _deg_to_rad, "rad")
197
186
  pc = np.zeros([2, 2])
198
187
  for i in (0, 1):
199
188
  for j in (0, 1):
@@ -325,9 +314,9 @@ def _beam_attr_from_header(helpers: dict, header) -> Union[dict, str, None]:
325
314
  if "BMAJ" in header:
326
315
  # single global beam
327
316
  beam = {
328
- "bmaj": {"type": "quantity", "units": "arcsec", "value": header["BMAJ"]},
329
- "bmin": {"type": "quantity", "units": "arcsec", "value": header["BMIN"]},
330
- "pa": {"type": "quantity", "units": "arcsec", "value": header["BPA"]},
317
+ "bmaj": make_quantity(header["BMAJ"], "arcsec"),
318
+ "bmin": make_quantity(header["BMIN"], "arcsec"),
319
+ "pa": make_quantity(header["BPA"], "arcsec"),
331
320
  }
332
321
  return _convert_beam_to_rad(beam)
333
322
  elif "CASAMBM" in header and header["CASAMBM"]:
@@ -6,6 +6,7 @@ import numpy as np
6
6
  from typing import Dict, List
7
7
  import xarray as xr
8
8
  from xradio._utils.coord_math import _deg_to_rad
9
+ from xradio._utils.dict_helpers import make_quantity
9
10
 
10
11
  _c = 2.99792458e08 * u.m / u.s
11
12
  # OPTICAL = Z
@@ -39,7 +40,7 @@ def _convert_beam_to_rad(beam: dict) -> dict:
39
40
  q = u.quantity.Quantity(f"{beam[k]['value']}{beam[k]['units']}")
40
41
  q = q.to("rad")
41
42
  j = "pa" if k == "positionangle" else k
42
- mybeam[j] = {"type": "quantity", "value": q.value, "units": "rad"}
43
+ mybeam[j] = make_quantity(q.value, "rad")
43
44
  return mybeam
44
45
 
45
46
 
@@ -102,11 +103,7 @@ def _numpy_arrayize_dv(xds: xr.Dataset) -> xr.Dataset:
102
103
 
103
104
  def _default_freq_info() -> dict:
104
105
  return {
105
- "rest_frequency": {
106
- "value": 1420405751.7860003,
107
- "units": "Hz",
108
- "type": "quantity",
109
- },
106
+ "rest_frequency": make_quantity(1420405751.7860003, "Hz"),
110
107
  "type": "frequency",
111
108
  "frame": "LSRK",
112
109
  "units": "Hz",
@@ -4,6 +4,7 @@ import xarray as xr
4
4
  from typing import List, Union
5
5
  from .common import _c, _compute_world_sph_dims, _l_m_attr_notes
6
6
  from xradio._utils.coord_math import _deg_to_rad
7
+ from xradio._utils.dict_helpers import make_quantity
7
8
 
8
9
 
9
10
  def _input_checks(
@@ -46,11 +47,7 @@ def _add_common_attrs(
46
47
  xds.time.attrs = {"format": "MJD", "scale": "UTC", "units": "d"}
47
48
  freq_vals = np.array(xds.frequency)
48
49
  xds.frequency.attrs = {
49
- "rest_frequency": {
50
- "type": "quantity",
51
- "units": "Hz",
52
- "value": restfreq,
53
- },
50
+ "rest_frequency": make_quantity(restfreq, "Hz"),
54
51
  "frame": spectral_reference.upper(),
55
52
  "units": "Hz",
56
53
  "wave_unit": "mm",
@@ -69,8 +66,8 @@ def _add_common_attrs(
69
66
  "value": list(phase_center),
70
67
  "units": ["rad", "rad"],
71
68
  },
72
- "longpole": {"type": "quantity", "value": np.pi, "units": "rad"},
73
- "latpole": {"type": "quantity", "value": 0.0, "units": "rad"},
69
+ "longpole": make_quantity(np.pi, "rad"),
70
+ "latpole": make_quantity(0.0, "rad"),
74
71
  "pc": np.array([[1.0, 0.0], [0.0, 1.0]]),
75
72
  "projection": projection,
76
73
  "projection_parameters": [0.0, 0.0],
@@ -289,7 +286,6 @@ def _make_empty_lmuv_image(
289
286
  "crval": 0.0,
290
287
  "cdelt": -abs(sky_image_cell_size[0]),
291
288
  "units": "rad",
292
- "type": "quantity",
293
289
  "note": attr_note["l"],
294
290
  }
295
291
  xds.m.attrs = {
@@ -297,7 +293,6 @@ def _make_empty_lmuv_image(
297
293
  "crval": 0.0,
298
294
  "cdelt": abs(sky_image_cell_size[1]),
299
295
  "units": "rad",
300
- "type": "quantity",
301
296
  "note": attr_note["m"],
302
297
  }
303
298
  xds.u.attrs = {
@@ -1,7 +1,10 @@
1
1
  from .processing_set import ProcessingSet
2
2
  from .open_processing_set import open_processing_set
3
3
  from .load_processing_set import load_processing_set, ProcessingSetIterator
4
- from .convert_msv2_to_processing_set import convert_msv2_to_processing_set
4
+ from .convert_msv2_to_processing_set import (
5
+ convert_msv2_to_processing_set,
6
+ estimate_conversion_memory_and_cores,
7
+ )
5
8
  from .measurement_set_xds import MeasurementSetXds
6
9
 
7
10
  from .schema import SpectrumXds, VisibilityXds
@@ -13,6 +16,7 @@ __all__ = [
13
16
  "load_processing_set",
14
17
  "ProcessingSetIterator",
15
18
  "convert_msv2_to_processing_set",
19
+ "estimate_conversion_memory_and_cores",
16
20
  "SpectrumXds",
17
21
  "VisibilityXds",
18
22
  ]
@@ -679,7 +679,7 @@ def get_weight(
679
679
  return xds
680
680
 
681
681
 
682
- def create_taql_query(partition_info):
682
+ def create_taql_query_where(partition_info: dict):
683
683
  main_par_table_cols = [
684
684
  "DATA_DESC_ID",
685
685
  "OBSERVATION_ID",
@@ -729,6 +729,192 @@ def fix_uvw_frame(
729
729
  return xds
730
730
 
731
731
 
732
+ def estimate_memory_for_partition(in_file: str, partition: dict) -> float:
733
+ """
734
+ Aim: given a partition description, estimates a safe maximum memory value, but avoiding overestimation
735
+ (at least not adding not well understood factors).
736
+ """
737
+
738
+ def calculate_term_all_data(
739
+ tb_tool: tables.table, ntimes: float, nbaselines: float
740
+ ) -> tuple[list[float], bool]:
741
+ """
742
+ Size that DATA vars from MS will have in the MSv4, whether this MS has FLOAT_DATA
743
+ """
744
+ sizes_all_data_vars = []
745
+ col_names = tb_tool.colnames()
746
+ for data_col in ["DATA", "CORRECTED_DATA", "MODEL_DATA", "FLOAT_DATA"]:
747
+ if data_col in col_names:
748
+ col_descr = tb_tool.getcoldesc(data_col)
749
+ if "shape" in col_descr and isinstance(col_descr["shape"], np.ndarray):
750
+ # example: "shape": array([15, 4]) => gives pols x channels
751
+ cells_in_row = col_descr["shape"].prod()
752
+ npols = col_descr["shape"][-1]
753
+ else:
754
+ first_row = np.array(tb_tool.col(data_col)[0])
755
+ cells_in_row = np.prod(first_row.shape)
756
+ npols = first_row.shape[-1]
757
+
758
+ if col_descr["valueType"] == "complex":
759
+ # Assume. Otherwise, read first column and get the itemsize:
760
+ # col_dtype = np.array(mtable.col(data_col)[0]).dtype
761
+ # cell_size = col_dtype.itemsize
762
+ cell_size = 4
763
+ if data_col != "FLOAT_DATA":
764
+ cell_size *= 2
765
+ elif col_descr["valueType"] == "float":
766
+ cell_size = 4
767
+
768
+ # cells_in_row should account for the polarization and frequency dims
769
+ size_data_var = ntimes * nbaselines * cells_in_row * cell_size
770
+
771
+ sizes_all_data_vars.append(size_data_var)
772
+
773
+ is_float_data = "FLOAT_DATA" in col_names
774
+
775
+ return sizes_all_data_vars, is_float_data
776
+
777
+ def calculate_term_weight_flag(size_largest_data, is_float_data) -> float:
778
+ """
779
+ Size that WEIGHT and FLAG will have in the MSv4, derived from the size of the
780
+ MSv2 DATA col=> MSv4 VIS/SPECTRUM data var.
781
+ """
782
+ # Factors of the relative "cell_size" wrt the DATA var
783
+ # WEIGHT_SPECTRUM size: DATA (IF), DATA/2 (SD)
784
+ factor_weight = 1.0 if is_float_data else 0.5
785
+ factor_flag = 1.0 / 4.0 if is_float_data else 1.0 / 8.0
786
+
787
+ return size_largest_data * (factor_weight + factor_flag)
788
+
789
+ def calculate_term_other_data_vars(
790
+ ntimes: int, nbaselines: int, is_float_data: bool
791
+ ) -> float:
792
+ """
793
+ Size all data vars other than the DATA (visibility/spectrum) vars will have in the MSv4
794
+
795
+ For the rest of columns, including indices/iteration columns and other
796
+ scalar columns could say approx ->5% of the (large) data cols
797
+
798
+ """
799
+ # Small ones, but as they are loaded into data arrays, why not including,
800
+ # For example: UVW (3xscalar), EXPOSURE, TIME_CENTROID
801
+ # assuming float64 in output MSv4
802
+ item_size = 8
803
+ return ntimes * nbaselines * (3 + 1 + 1) * item_size
804
+
805
+ def calculate_term_calc_indx_for_row_split(msv2_nrows: int) -> float:
806
+ """
807
+ Account for the indices produced in calc_indx_for_row_split():
808
+ the dominating ones are: tidxs, bidxs, didxs.
809
+
810
+ In terms of amount of memory represented by this term relative to the
811
+ total, it becomes relevant proportionally to the ratio between
812
+ nrows / (chans x pols)
813
+ - for example LOFAR long scans/partitions with few channels,
814
+ but its value is independent from # chans, pols.
815
+ """
816
+ item_size = 8
817
+ # 3 are: tidxs, bidxs, didxs
818
+ return msv2_nrows * 3 * item_size
819
+
820
+ def calculate_term_other_msv2_indices(msv2_nrows: int) -> float:
821
+ """
822
+ Account for the allocations to load ID, etc. columns from input MSv2.
823
+ The converter needs to load: OBSERVATION_ID, INTERVAL, SCAN_NUMBER.
824
+ These are loaded one after another (allocations do not stack up).
825
+ Also, in most memory profiles these allocations are released once we
826
+ get to create_data_variables(). As such, adding this term will most
827
+ likely lead to overestimation (but adding it for safety).
828
+
829
+ Simlarly as with calculate_term_calc_indx_for_row_split() this term
830
+ becomes relevant when the ratio 'nrows / (chans x pols)' is high.
831
+ """
832
+ # assuming float64/int64 in input MSv2, which seems to be the case,
833
+ # except for OBSERVATION_ID (int32)
834
+ item_size = 8
835
+ return msv2_nrows * item_size
836
+
837
+ def calculate_term_attrs(size_estimate_main_xds: float) -> float:
838
+ """Rough guess which seems to be more than enough"""
839
+ # could also account for info_dicts (which seem to require typically ~1 MB)
840
+ return 10 * 1024 * 1024
841
+
842
+ def calculate_term_sub_xds(size_estimate_main_xds: float) -> float:
843
+ """
844
+ This is still very rough. Just seemingly working for now. Not taking into account the dims
845
+ of the sub-xdss, interpolation options used, etc.
846
+ """
847
+ # Most cases so far 1% seems enough
848
+ return 0.015 * size_estimate_main_xds
849
+
850
+ def calculate_term_to_zarr(size_estimate_main_xds: float) -> float:
851
+ """
852
+ The to_zarr call on the main_xds seems to allocate 10s or 100s of MBs, presumably for buffers.
853
+ That adds on top of the expected main_xds size.
854
+ This is currently a very rough extrapolation and is being (mis)used to give a safe up to 5-6%
855
+ overestimation. Perhaps we should drop this term once other sub-xdss are accounted for (and
856
+ this term could be replaced by a similar, smaller but still safe over-estimation percentage).
857
+ """
858
+ return 0.05 * size_estimate_main_xds
859
+
860
+ taql_partition = create_taql_query_where(partition)
861
+ taql_main = f"select * from $mtable {taql_partition}"
862
+ with open_table_ro(in_file) as mtable:
863
+ col_names = mtable.colnames()
864
+ with open_query(mtable, taql_main) as tb_tool:
865
+ # Do not feel tempted to rely on nrows. nrows tends to underestimate memory when baselines are missing.
866
+ # For some EVN datasets that can easily underestimate by a 50%
867
+ utimes, _tol = get_utimes_tol(mtable, taql_partition)
868
+ ntimes = len(utimes)
869
+ nbaselines = len(get_baselines(tb_tool))
870
+
871
+ # Still, use nrwos for estimations related to sizes of input (MSv2)
872
+ # columns, not sizes of output (MSv4) data vars
873
+ msv2_nrows = tb_tool.nrows()
874
+
875
+ sizes_all_data, is_float_data = calculate_term_all_data(
876
+ tb_tool, ntimes, nbaselines
877
+ )
878
+
879
+ size_largest_data = np.max(sizes_all_data)
880
+ sum_sizes_data = np.sum(sizes_all_data)
881
+ estimate_main_xds = (
882
+ sum_sizes_data
883
+ + calculate_term_weight_flag(size_largest_data, is_float_data)
884
+ + calculate_term_other_data_vars(ntimes, nbaselines, is_float_data)
885
+ )
886
+ estimate = (
887
+ estimate_main_xds
888
+ + calculate_term_calc_indx_for_row_split(msv2_nrows)
889
+ + calculate_term_other_msv2_indices(msv2_nrows)
890
+ + calculate_term_sub_xds(estimate_main_xds)
891
+ + calculate_term_to_zarr(estimate_main_xds)
892
+ )
893
+ estimate /= GiBYTES_TO_BYTES
894
+
895
+ return estimate
896
+
897
+
898
+ def estimate_memory_and_cores_for_partitions(
899
+ in_file: str, partitions: list
900
+ ) -> tuple[float, int, int]:
901
+ """
902
+ Estimates approximate memory required to convert an MSv2 to MSv4, given
903
+ a predefined set of partitions.
904
+ """
905
+ max_cores = len(partitions)
906
+
907
+ size_estimates = [
908
+ estimate_memory_for_partition(in_file, part_description)
909
+ for part_description in partitions
910
+ ]
911
+ max_estimate = np.max(size_estimates) if size_estimates else 0.0
912
+
913
+ recommended_cores = np.ceil(max_cores / 4).astype("int")
914
+
915
+ return float(max_estimate), int(max_cores), int(recommended_cores)
916
+
917
+
732
918
  def convert_and_write_partition(
733
919
  in_file: str,
734
920
  out_file: str,
@@ -790,7 +976,7 @@ def convert_and_write_partition(
790
976
  _description_
791
977
  """
792
978
 
793
- taql_where = create_taql_query(partition_info)
979
+ taql_where = create_taql_query_where(partition_info)
794
980
  ddi = partition_info["DATA_DESC_ID"][0]
795
981
  intents = str(partition_info["OBS_MODE"][0])
796
982
 
@@ -839,9 +1025,11 @@ def convert_and_write_partition(
839
1025
  start = time.time()
840
1026
  xds = xr.Dataset(
841
1027
  attrs={
842
- "creation_date": datetime.datetime.utcnow().isoformat(),
1028
+ "creation_date": datetime.datetime.now(
1029
+ datetime.timezone.utc
1030
+ ).isoformat(),
843
1031
  "xradio_version": importlib.metadata.version("xradio"),
844
- "schema_version": "4.0.-9994",
1032
+ "schema_version": "4.0.-9991",
845
1033
  "type": "visibility",
846
1034
  }
847
1035
  )
@@ -1085,6 +1273,8 @@ def convert_and_write_partition(
1085
1273
  else:
1086
1274
  xds.attrs["type"] = "visibility"
1087
1275
 
1276
+ import sys
1277
+
1088
1278
  start = time.time()
1089
1279
  if storage_backend == "zarr":
1090
1280
  xds.to_zarr(store=os.path.join(file_name, "correlated_xds"), mode=mode)
@@ -1193,7 +1383,12 @@ def antenna_ids_to_names(
1193
1383
  ]
1194
1384
  for unwanted_coord in unwanted_coords_from_ant_xds:
1195
1385
  xds = xds.drop_vars(unwanted_coord)
1196
- xds = xds.rename({"baseline_id": "antenna_name"})
1386
+
1387
+ # Rename a dim coord started generating warnings (index not re-created). Swap dims, create coord
1388
+ # https://github.com/pydata/xarray/pull/6999
1389
+ xds = xds.swap_dims({"baseline_id": "antenna_name"})
1390
+ xds = xds.assign_coords({"antenna_name": xds["baseline_id"].data})
1391
+ xds = xds.drop_vars("baseline_id")
1197
1392
 
1198
1393
  # drop more vars that seem unwanted in main_sd_xds, but there shouuld be a better way
1199
1394
  # of not creating them in the first place
@@ -1204,58 +1399,58 @@ def antenna_ids_to_names(
1204
1399
  return xds
1205
1400
 
1206
1401
 
1402
+ def add_group_to_data_groups(
1403
+ data_groups: dict, what_group: str, correlated_data_name: str, uvw: bool = True
1404
+ ):
1405
+ """
1406
+ Adds one correlated_data variable to the data_groups dict.
1407
+ A utility function to use when creating/updating data_groups from MSv2 data columns
1408
+ / data variables.
1409
+
1410
+ Parameters
1411
+ ----------
1412
+ data_groups: str
1413
+ The data_groups dict of an MSv4 xds. It is updated in-place
1414
+ what_group: str
1415
+ Name of the data group: "base", "corrected", "model", etc.
1416
+ correlated_data_name: str
1417
+ Name of the correlated_data var: "VISIBILITY", "VISIBILITY_CORRECTED", "SPECTRUM", etc.
1418
+ uvw: bool
1419
+ Whether to add a uvw field to the data group (assume True = interferometric data).
1420
+ """
1421
+ data_groups[what_group] = {
1422
+ "correlated_data": correlated_data_name,
1423
+ "flag": "FLAG",
1424
+ "weight": "WEIGHT",
1425
+ }
1426
+ if uvw:
1427
+ data_groups[what_group]["uvw"] = "UVW"
1428
+
1429
+
1207
1430
  def add_data_groups(xds):
1208
1431
  xds.attrs["data_groups"] = {}
1432
+
1433
+ data_groups = xds.attrs["data_groups"]
1209
1434
  if "VISIBILITY" in xds:
1210
- xds.attrs["data_groups"]["base"] = {
1211
- "correlated_data": "VISIBILITY",
1212
- "flag": "FLAG",
1213
- "weight": "WEIGHT",
1214
- "uvw": "UVW",
1215
- }
1435
+ add_group_to_data_groups(data_groups, "base", "VISIBILITY")
1216
1436
 
1217
1437
  if "VISIBILITY_CORRECTED" in xds:
1218
- xds.attrs["data_groups"]["corrected"] = {
1219
- "correlated_data": "VISIBILITY_CORRECTED",
1220
- "flag": "FLAG",
1221
- "weight": "WEIGHT",
1222
- "uvw": "UVW",
1223
- }
1438
+ add_group_to_data_groups(data_groups, "corrected", "VISIBILITY_CORRECTED")
1224
1439
 
1225
1440
  if "VISIBILITY_MODEL" in xds:
1226
- xds.attrs["data_groups"]["model"] = {
1227
- "correlated_data": "VISIBILITY_MODEL",
1228
- "flag": "FLAG",
1229
- "weight": "WEIGHT",
1230
- "uvw": "UVW",
1231
- }
1441
+ add_group_to_data_groups(data_groups, "model", "VISIBILITY_MODEL")
1232
1442
 
1233
1443
  is_single_dish = False
1234
1444
  if "SPECTRUM" in xds:
1235
- xds.attrs["data_groups"]["base"] = {
1236
- "correlated_data": "SPECTRUM",
1237
- "flag": "FLAG",
1238
- "weight": "WEIGHT",
1239
- "uvw": "UVW",
1240
- }
1445
+ add_group_to_data_groups(data_groups, "base", "SPECTRUM", False)
1241
1446
  is_single_dish = True
1242
1447
 
1243
1448
  if "SPECTRUM_MODEL" in xds:
1244
- xds.attrs["data_groups"]["model"] = {
1245
- "correlated_data": "SPECTRUM_MODEL",
1246
- "flag": "FLAG",
1247
- "weight": "WEIGHT",
1248
- "uvw": "UVW",
1249
- }
1449
+ add_group_to_data_groups(data_groups, "model", "SPECTRUM_MODEL", False)
1250
1450
  is_single_dish = True
1251
1451
 
1252
1452
  if "SPECTRUM_CORRECTED" in xds:
1253
- xds.attrs["data_groups"]["corrected"] = {
1254
- "correlated_data": "SPECTRUM_CORRECTED",
1255
- "flag": "FLAG",
1256
- "weight": "WEIGHT",
1257
- "uvw": "UVW",
1258
- }
1453
+ add_group_to_data_groups(data_groups, "corrected", "SPECTRUM_CORRECTED", False)
1259
1454
  is_single_dish = True
1260
1455
 
1261
1456
  return xds, is_single_dish
@@ -15,7 +15,9 @@ from xradio.measurement_set._utils._msv2._tables.read import (
15
15
  table_exists,
16
16
  )
17
17
  from xradio._utils.schema import convert_generic_xds_to_xradio_schema
18
- from xradio.measurement_set._utils._msv2.msv4_sub_xdss import interpolate_to_time
18
+ from xradio.measurement_set._utils._msv2.msv4_sub_xdss import (
19
+ rename_and_interpolate_to_time,
20
+ )
19
21
 
20
22
  from xradio._utils.list_and_array import (
21
23
  check_if_consistent,
@@ -509,27 +511,8 @@ def create_phase_calibration_xds(
509
511
  phase_cal_xds.time_phase_cal.astype("float64").astype("float64") / 10**9
510
512
  )
511
513
 
512
- phase_cal_xds = interpolate_to_time(
513
- phase_cal_xds,
514
- phase_cal_interp_time,
515
- "antenna_xds",
516
- time_name="time_phase_cal",
514
+ phase_cal_xds = rename_and_interpolate_to_time(
515
+ phase_cal_xds, "time_phase_cal", phase_cal_interp_time, "phase_cal_xds"
517
516
  )
518
517
 
519
- time_coord_attrs = {
520
- "type": "time",
521
- "units": ["s"],
522
- "scale": "utc",
523
- "format": "unix",
524
- }
525
-
526
- # If we interpolate rename the time_phase_cal axis to time.
527
- if phase_cal_interp_time is not None:
528
- time_coord = {"time": ("time_phase_cal", phase_cal_interp_time.data)}
529
- phase_cal_xds = phase_cal_xds.assign_coords(time_coord)
530
- phase_cal_xds.coords["time"].attrs.update(time_coord_attrs)
531
- phase_cal_xds = phase_cal_xds.swap_dims({"time_phase_cal": "time"}).drop_vars(
532
- "time_phase_cal"
533
- )
534
-
535
518
  return phase_cal_xds
@@ -6,7 +6,9 @@ import numpy as np
6
6
  import xarray as xr
7
7
 
8
8
  import toolviper.utils.logger as logger
9
- from xradio.measurement_set._utils._msv2.msv4_sub_xdss import interpolate_to_time
9
+ from xradio.measurement_set._utils._msv2.msv4_sub_xdss import (
10
+ rename_and_interpolate_to_time,
11
+ )
10
12
  from xradio.measurement_set._utils._msv2.subtables import subt_rename_ids
11
13
  from xradio.measurement_set._utils._msv2._tables.read import (
12
14
  convert_casacore_time_to_mjd,
@@ -363,20 +365,13 @@ def extract_ephemeris_info(
363
365
  }
364
366
  temp_xds["time_ephemeris"].attrs.update(time_coord_attrs)
365
367
 
366
- # Convert to si units and interpolate if ephemeris_interpolate=True:
368
+ # Convert to si units
367
369
  temp_xds = convert_to_si_units(temp_xds)
368
- temp_xds = interpolate_to_time(
369
- temp_xds, interp_time, "field_and_source_xds", time_name="time_ephemeris"
370
- )
371
370
 
372
- # If we interpolate rename the time_ephemeris axis to time.
373
- if interp_time is not None:
374
- time_coord = {"time": ("time_ephemeris", interp_time.data)}
375
- temp_xds = temp_xds.assign_coords(time_coord)
376
- temp_xds.coords["time"].attrs.update(time_coord_attrs)
377
- temp_xds = temp_xds.swap_dims({"time_ephemeris": "time"}).drop_vars(
378
- "time_ephemeris"
379
- )
371
+ # interpolate if ephemeris_interpolate/interp_time=True, and rename time_ephemeris=>time
372
+ temp_xds = rename_and_interpolate_to_time(
373
+ temp_xds, "time_ephemeris", interp_time, "field_and_source_xds"
374
+ )
380
375
 
381
376
  xds = xr.merge([xds, temp_xds])
382
377