xradio 0.0.44__tar.gz → 0.0.45__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. {xradio-0.0.44/src/xradio.egg-info → xradio-0.0.45}/PKG-INFO +1 -1
  2. {xradio-0.0.44 → xradio-0.0.45}/pyproject.toml +1 -1
  3. xradio-0.0.45/src/xradio/_utils/dict_helpers.py +14 -0
  4. {xradio-0.0.44 → xradio-0.0.45}/src/xradio/image/_util/_casacore/xds_from_casacore.py +4 -17
  5. {xradio-0.0.44 → xradio-0.0.45}/src/xradio/image/_util/_fits/xds_from_fits.py +7 -18
  6. {xradio-0.0.44 → xradio-0.0.45}/src/xradio/image/_util/common.py +3 -6
  7. {xradio-0.0.44 → xradio-0.0.45}/src/xradio/image/_util/image_factory.py +4 -9
  8. {xradio-0.0.44 → xradio-0.0.45}/src/xradio/measurement_set/__init__.py +5 -1
  9. {xradio-0.0.44 → xradio-0.0.45}/src/xradio/measurement_set/_utils/_msv2/conversion.py +199 -4
  10. {xradio-0.0.44 → xradio-0.0.45}/src/xradio/measurement_set/_utils/_msv2/create_antenna_xds.py +5 -22
  11. {xradio-0.0.44 → xradio-0.0.45}/src/xradio/measurement_set/_utils/_msv2/create_field_and_source_xds.py +8 -13
  12. {xradio-0.0.44 → xradio-0.0.45}/src/xradio/measurement_set/_utils/_msv2/msv4_sub_xdss.py +79 -23
  13. {xradio-0.0.44 → xradio-0.0.45}/src/xradio/measurement_set/_utils/_msv2/partition_queries.py +4 -5
  14. {xradio-0.0.44 → xradio-0.0.45}/src/xradio/measurement_set/convert_msv2_to_processing_set.py +41 -1
  15. {xradio-0.0.44 → xradio-0.0.45/src/xradio.egg-info}/PKG-INFO +1 -1
  16. {xradio-0.0.44 → xradio-0.0.45}/src/xradio.egg-info/SOURCES.txt +1 -0
  17. {xradio-0.0.44 → xradio-0.0.45}/LICENSE.txt +0 -0
  18. {xradio-0.0.44 → xradio-0.0.45}/MANIFEST.in +0 -0
  19. {xradio-0.0.44 → xradio-0.0.45}/README.md +0 -0
  20. {xradio-0.0.44 → xradio-0.0.45}/setup.cfg +0 -0
  21. {xradio-0.0.44 → xradio-0.0.45}/src/xradio/__init__.py +0 -0
  22. {xradio-0.0.44 → xradio-0.0.45}/src/xradio/_utils/__init__.py +0 -0
  23. {xradio-0.0.44 → xradio-0.0.45}/src/xradio/_utils/_casacore/tables.py +0 -0
  24. {xradio-0.0.44 → xradio-0.0.45}/src/xradio/_utils/coord_math.py +0 -0
  25. {xradio-0.0.44 → xradio-0.0.45}/src/xradio/_utils/list_and_array.py +0 -0
  26. {xradio-0.0.44 → xradio-0.0.45}/src/xradio/_utils/schema.py +0 -0
  27. {xradio-0.0.44 → xradio-0.0.45}/src/xradio/_utils/zarr/__init__.py +0 -0
  28. {xradio-0.0.44 → xradio-0.0.45}/src/xradio/_utils/zarr/common.py +0 -0
  29. {xradio-0.0.44 → xradio-0.0.45}/src/xradio/image/__init__.py +0 -0
  30. {xradio-0.0.44 → xradio-0.0.45}/src/xradio/image/_util/__init__.py +0 -0
  31. {xradio-0.0.44 → xradio-0.0.45}/src/xradio/image/_util/_casacore/__init__.py +0 -0
  32. {xradio-0.0.44 → xradio-0.0.45}/src/xradio/image/_util/_casacore/common.py +0 -0
  33. {xradio-0.0.44 → xradio-0.0.45}/src/xradio/image/_util/_casacore/xds_to_casacore.py +0 -0
  34. {xradio-0.0.44 → xradio-0.0.45}/src/xradio/image/_util/_zarr/common.py +0 -0
  35. {xradio-0.0.44 → xradio-0.0.45}/src/xradio/image/_util/_zarr/xds_from_zarr.py +0 -0
  36. {xradio-0.0.44 → xradio-0.0.45}/src/xradio/image/_util/_zarr/xds_to_zarr.py +0 -0
  37. {xradio-0.0.44 → xradio-0.0.45}/src/xradio/image/_util/_zarr/zarr_low_level.py +0 -0
  38. {xradio-0.0.44 → xradio-0.0.45}/src/xradio/image/_util/casacore.py +0 -0
  39. {xradio-0.0.44 → xradio-0.0.45}/src/xradio/image/_util/fits.py +0 -0
  40. {xradio-0.0.44 → xradio-0.0.45}/src/xradio/image/_util/zarr.py +0 -0
  41. {xradio-0.0.44 → xradio-0.0.45}/src/xradio/image/image.py +0 -0
  42. {xradio-0.0.44 → xradio-0.0.45}/src/xradio/measurement_set/_utils/__init__.py +0 -0
  43. {xradio-0.0.44 → xradio-0.0.45}/src/xradio/measurement_set/_utils/_msv2/_tables/load.py +0 -0
  44. {xradio-0.0.44 → xradio-0.0.45}/src/xradio/measurement_set/_utils/_msv2/_tables/load_main_table.py +0 -0
  45. {xradio-0.0.44 → xradio-0.0.45}/src/xradio/measurement_set/_utils/_msv2/_tables/read.py +0 -0
  46. {xradio-0.0.44 → xradio-0.0.45}/src/xradio/measurement_set/_utils/_msv2/_tables/read_main_table.py +0 -0
  47. {xradio-0.0.44 → xradio-0.0.45}/src/xradio/measurement_set/_utils/_msv2/_tables/read_subtables.py +0 -0
  48. {xradio-0.0.44 → xradio-0.0.45}/src/xradio/measurement_set/_utils/_msv2/_tables/table_query.py +0 -0
  49. {xradio-0.0.44 → xradio-0.0.45}/src/xradio/measurement_set/_utils/_msv2/_tables/write.py +0 -0
  50. {xradio-0.0.44 → xradio-0.0.45}/src/xradio/measurement_set/_utils/_msv2/_tables/write_exp_api.py +0 -0
  51. {xradio-0.0.44 → xradio-0.0.45}/src/xradio/measurement_set/_utils/_msv2/chunks.py +0 -0
  52. {xradio-0.0.44 → xradio-0.0.45}/src/xradio/measurement_set/_utils/_msv2/descr.py +0 -0
  53. {xradio-0.0.44 → xradio-0.0.45}/src/xradio/measurement_set/_utils/_msv2/msv2_msv3.py +0 -0
  54. {xradio-0.0.44 → xradio-0.0.45}/src/xradio/measurement_set/_utils/_msv2/msv2_to_msv4_meta.py +0 -0
  55. {xradio-0.0.44 → xradio-0.0.45}/src/xradio/measurement_set/_utils/_msv2/msv4_info_dicts.py +0 -0
  56. {xradio-0.0.44 → xradio-0.0.45}/src/xradio/measurement_set/_utils/_msv2/optimised_functions.py +0 -0
  57. {xradio-0.0.44 → xradio-0.0.45}/src/xradio/measurement_set/_utils/_msv2/partitions.py +0 -0
  58. {xradio-0.0.44 → xradio-0.0.45}/src/xradio/measurement_set/_utils/_msv2/subtables.py +0 -0
  59. {xradio-0.0.44 → xradio-0.0.45}/src/xradio/measurement_set/_utils/_utils/cds.py +0 -0
  60. {xradio-0.0.44 → xradio-0.0.45}/src/xradio/measurement_set/_utils/_utils/partition_attrs.py +0 -0
  61. {xradio-0.0.44 → xradio-0.0.45}/src/xradio/measurement_set/_utils/_utils/stokes_types.py +0 -0
  62. {xradio-0.0.44 → xradio-0.0.45}/src/xradio/measurement_set/_utils/_utils/xds_helper.py +0 -0
  63. {xradio-0.0.44 → xradio-0.0.45}/src/xradio/measurement_set/_utils/_zarr/encoding.py +0 -0
  64. {xradio-0.0.44 → xradio-0.0.45}/src/xradio/measurement_set/_utils/_zarr/read.py +0 -0
  65. {xradio-0.0.44 → xradio-0.0.45}/src/xradio/measurement_set/_utils/_zarr/write.py +0 -0
  66. {xradio-0.0.44 → xradio-0.0.45}/src/xradio/measurement_set/_utils/msv2.py +0 -0
  67. {xradio-0.0.44 → xradio-0.0.45}/src/xradio/measurement_set/_utils/zarr.py +0 -0
  68. {xradio-0.0.44 → xradio-0.0.45}/src/xradio/measurement_set/load_processing_set.py +0 -0
  69. {xradio-0.0.44 → xradio-0.0.45}/src/xradio/measurement_set/measurement_set_xds.py +0 -0
  70. {xradio-0.0.44 → xradio-0.0.45}/src/xradio/measurement_set/open_processing_set.py +0 -0
  71. {xradio-0.0.44 → xradio-0.0.45}/src/xradio/measurement_set/processing_set.py +0 -0
  72. {xradio-0.0.44 → xradio-0.0.45}/src/xradio/measurement_set/schema.py +0 -0
  73. {xradio-0.0.44 → xradio-0.0.45}/src/xradio/schema/__init__.py +0 -0
  74. {xradio-0.0.44 → xradio-0.0.45}/src/xradio/schema/bases.py +0 -0
  75. {xradio-0.0.44 → xradio-0.0.45}/src/xradio/schema/check.py +0 -0
  76. {xradio-0.0.44 → xradio-0.0.45}/src/xradio/schema/dataclass.py +0 -0
  77. {xradio-0.0.44 → xradio-0.0.45}/src/xradio/schema/metamodel.py +0 -0
  78. {xradio-0.0.44 → xradio-0.0.45}/src/xradio/schema/typing.py +0 -0
  79. {xradio-0.0.44 → xradio-0.0.45}/src/xradio/sphinx/__init__.py +0 -0
  80. {xradio-0.0.44 → xradio-0.0.45}/src/xradio/sphinx/schema_table.py +0 -0
  81. {xradio-0.0.44 → xradio-0.0.45}/src/xradio.egg-info/dependency_links.txt +0 -0
  82. {xradio-0.0.44 → xradio-0.0.45}/src/xradio.egg-info/requires.txt +0 -0
  83. {xradio-0.0.44 → xradio-0.0.45}/src/xradio.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: xradio
3
- Version: 0.0.44
3
+ Version: 0.0.45
4
4
  Summary: Xarray Radio Astronomy Data IO
5
5
  Author-email: Jan-Willem Steeb <jsteeb@nrao.edu>
6
6
  License: BSD 3-Clause License
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "xradio"
3
- version = "0.0.44"
3
+ version = "0.0.45"
4
4
  description = " Xarray Radio Astronomy Data IO"
5
5
  authors = [
6
6
  {name = "Jan-Willem Steeb", email="jsteeb@nrao.edu"},
@@ -0,0 +1,14 @@
1
+ def make_quantity(value, units: str) -> dict:
2
+ """
3
+ create a quantity dictionary given value and units
4
+ Parameters
5
+ ----------
6
+ value : numeric or array of numerics
7
+ Quantity value
8
+ units: str
9
+ Quantity units
10
+ Returns
11
+ -------
12
+ dict
13
+ """
14
+ return {"value": value, "units": units, "type": "quantity"}
@@ -31,6 +31,7 @@ from ..common import (
31
31
  )
32
32
  from ...._utils._casacore.tables import extract_table_attributes, open_table_ro
33
33
  from xradio._utils.coord_math import _deg_to_rad
34
+ from xradio._utils.dict_helpers import make_quantity
34
35
 
35
36
  """
36
37
  def _add_coord_attrs(xds: xr.Dataset, icoords: dict, dir_axes: list) -> xr.Dataset:
@@ -62,13 +63,7 @@ def _add_freq_attrs(xds, coord_dict):
62
63
  for k in coord_dict:
63
64
  if k.startswith("spectral"):
64
65
  sd = coord_dict[k]
65
- # meta["native_type"] = _native_types[sd["nativeType"]]
66
- meta["rest_frequency"] = {
67
- "type": "quantity",
68
- "units": "Hz",
69
- "value": sd["restfreq"],
70
- }
71
- # meta["restfreqs"] = {'type': 'quantity', 'units': 'Hz', 'value': list(sd["restfreqs"])}
66
+ meta["rest_frequency"] = make_quantity(sd["restfreq"], "Hz")
72
67
  meta["type"] = "frequency"
73
68
  meta["units"] = sd["unit"]
74
69
  meta["frame"] = sd["system"]
@@ -184,11 +179,7 @@ def _casa_image_to_xds_attrs(img_full_path: str, history: bool = True) -> dict:
184
179
  k = "latpole"
185
180
  if k in coord_dir_dict:
186
181
  for j in (k, "longpole"):
187
- dir_dict[j] = {
188
- "value": coord_dir_dict[j] * _deg_to_rad,
189
- "units": "rad",
190
- "type": "quantity",
191
- }
182
+ dir_dict[j] = make_quantity(coord_dir_dict[j] * _deg_to_rad, "rad")
192
183
  for j in ("pc", "projection_parameters", "projection"):
193
184
  if j in coord_dir_dict:
194
185
  dir_dict[j] = coord_dir_dict[j]
@@ -518,11 +509,7 @@ def _get_freq_values_attrs(
518
509
  crpix=wcs["crpix"],
519
510
  cdelt=wcs["cdelt"],
520
511
  )
521
- attrs["rest_frequency"] = {
522
- "type": "quantity",
523
- "units": "Hz",
524
- "value": sd["restfreq"],
525
- }
512
+ attrs["rest_frequency"] = make_quantity(sd["restfreq"], "Hz")
526
513
  attrs["type"] = "frequency"
527
514
  attrs["units"] = sd["unit"]
528
515
  attrs["frame"] = sd["system"]
@@ -16,6 +16,7 @@ from ..common import (
16
16
  _l_m_attr_notes,
17
17
  )
18
18
  from xradio._utils.coord_math import _deg_to_rad
19
+ from xradio._utils.dict_helpers import make_quantity
19
20
  import copy
20
21
  import dask
21
22
  import dask.array as da
@@ -77,11 +78,7 @@ def _add_freq_attrs(xds: xr.Dataset, helpers: dict) -> xr.Dataset:
77
78
  freq_coord = xds.coords["frequency"]
78
79
  meta = {}
79
80
  if helpers["has_freq"]:
80
- meta["rest_frequency"] = {
81
- "type": "quantity",
82
- "units": "Hz",
83
- "value": helpers["restfreq"],
84
- }
81
+ meta["rest_frequency"] = make_quantity(helpers["restfreq"], "Hz")
85
82
  meta["frame"] = helpers["specsys"]
86
83
  meta["units"] = "Hz"
87
84
  meta["type"] = "frequency"
@@ -184,16 +181,8 @@ def _xds_direction_attrs_from_header(helpers: dict, header) -> dict:
184
181
  direction["reference"]["value"][i] = x.value
185
182
  x = helpers["cdelt"][i] * u.Unit(_get_unit(helpers["cunit"][i]))
186
183
  x = x.to("rad")
187
- direction["latpole"] = {
188
- "value": header["LATPOLE"] * _deg_to_rad,
189
- "units": "rad",
190
- "type": "quantity",
191
- }
192
- direction["longpole"] = {
193
- "value": header["LONPOLE"] * _deg_to_rad,
194
- "units": "rad",
195
- "type": "quantity",
196
- }
184
+ direction["latpole"] = make_quantity(header["LATPOLE"] * _deg_to_rad, "rad")
185
+ direction["longpole"] = make_quantity(header["LONPOLE"] * _deg_to_rad, "rad")
197
186
  pc = np.zeros([2, 2])
198
187
  for i in (0, 1):
199
188
  for j in (0, 1):
@@ -325,9 +314,9 @@ def _beam_attr_from_header(helpers: dict, header) -> Union[dict, str, None]:
325
314
  if "BMAJ" in header:
326
315
  # single global beam
327
316
  beam = {
328
- "bmaj": {"type": "quantity", "units": "arcsec", "value": header["BMAJ"]},
329
- "bmin": {"type": "quantity", "units": "arcsec", "value": header["BMIN"]},
330
- "pa": {"type": "quantity", "units": "arcsec", "value": header["BPA"]},
317
+ "bmaj": make_quantity(header["BMAJ"], "arcsec"),
318
+ "bmin": make_quantity(header["BMIN"], "arcsec"),
319
+ "pa": make_quantity(header["BPA"], "arcsec"),
331
320
  }
332
321
  return _convert_beam_to_rad(beam)
333
322
  elif "CASAMBM" in header and header["CASAMBM"]:
@@ -6,6 +6,7 @@ import numpy as np
6
6
  from typing import Dict, List
7
7
  import xarray as xr
8
8
  from xradio._utils.coord_math import _deg_to_rad
9
+ from xradio._utils.dict_helpers import make_quantity
9
10
 
10
11
  _c = 2.99792458e08 * u.m / u.s
11
12
  # OPTICAL = Z
@@ -39,7 +40,7 @@ def _convert_beam_to_rad(beam: dict) -> dict:
39
40
  q = u.quantity.Quantity(f"{beam[k]['value']}{beam[k]['units']}")
40
41
  q = q.to("rad")
41
42
  j = "pa" if k == "positionangle" else k
42
- mybeam[j] = {"type": "quantity", "value": q.value, "units": "rad"}
43
+ mybeam[j] = make_quantity(q.value, "rad")
43
44
  return mybeam
44
45
 
45
46
 
@@ -102,11 +103,7 @@ def _numpy_arrayize_dv(xds: xr.Dataset) -> xr.Dataset:
102
103
 
103
104
  def _default_freq_info() -> dict:
104
105
  return {
105
- "rest_frequency": {
106
- "value": 1420405751.7860003,
107
- "units": "Hz",
108
- "type": "quantity",
109
- },
106
+ "rest_frequency": make_quantity(1420405751.7860003, "Hz"),
110
107
  "type": "frequency",
111
108
  "frame": "LSRK",
112
109
  "units": "Hz",
@@ -4,6 +4,7 @@ import xarray as xr
4
4
  from typing import List, Union
5
5
  from .common import _c, _compute_world_sph_dims, _l_m_attr_notes
6
6
  from xradio._utils.coord_math import _deg_to_rad
7
+ from xradio._utils.dict_helpers import make_quantity
7
8
 
8
9
 
9
10
  def _input_checks(
@@ -46,11 +47,7 @@ def _add_common_attrs(
46
47
  xds.time.attrs = {"format": "MJD", "scale": "UTC", "units": "d"}
47
48
  freq_vals = np.array(xds.frequency)
48
49
  xds.frequency.attrs = {
49
- "rest_frequency": {
50
- "type": "quantity",
51
- "units": "Hz",
52
- "value": restfreq,
53
- },
50
+ "rest_frequency": make_quantity(restfreq, "Hz"),
54
51
  "frame": spectral_reference.upper(),
55
52
  "units": "Hz",
56
53
  "wave_unit": "mm",
@@ -69,8 +66,8 @@ def _add_common_attrs(
69
66
  "value": list(phase_center),
70
67
  "units": ["rad", "rad"],
71
68
  },
72
- "longpole": {"type": "quantity", "value": np.pi, "units": "rad"},
73
- "latpole": {"type": "quantity", "value": 0.0, "units": "rad"},
69
+ "longpole": make_quantity(np.pi, "rad"),
70
+ "latpole": make_quantity(0.0, "rad"),
74
71
  "pc": np.array([[1.0, 0.0], [0.0, 1.0]]),
75
72
  "projection": projection,
76
73
  "projection_parameters": [0.0, 0.0],
@@ -289,7 +286,6 @@ def _make_empty_lmuv_image(
289
286
  "crval": 0.0,
290
287
  "cdelt": -abs(sky_image_cell_size[0]),
291
288
  "units": "rad",
292
- "type": "quantity",
293
289
  "note": attr_note["l"],
294
290
  }
295
291
  xds.m.attrs = {
@@ -297,7 +293,6 @@ def _make_empty_lmuv_image(
297
293
  "crval": 0.0,
298
294
  "cdelt": abs(sky_image_cell_size[1]),
299
295
  "units": "rad",
300
- "type": "quantity",
301
296
  "note": attr_note["m"],
302
297
  }
303
298
  xds.u.attrs = {
@@ -1,7 +1,10 @@
1
1
  from .processing_set import ProcessingSet
2
2
  from .open_processing_set import open_processing_set
3
3
  from .load_processing_set import load_processing_set, ProcessingSetIterator
4
- from .convert_msv2_to_processing_set import convert_msv2_to_processing_set
4
+ from .convert_msv2_to_processing_set import (
5
+ convert_msv2_to_processing_set,
6
+ estimate_conversion_memory_and_cores,
7
+ )
5
8
  from .measurement_set_xds import MeasurementSetXds
6
9
 
7
10
  from .schema import SpectrumXds, VisibilityXds
@@ -13,6 +16,7 @@ __all__ = [
13
16
  "load_processing_set",
14
17
  "ProcessingSetIterator",
15
18
  "convert_msv2_to_processing_set",
19
+ "estimate_conversion_memory_and_cores",
16
20
  "SpectrumXds",
17
21
  "VisibilityXds",
18
22
  ]
@@ -679,7 +679,7 @@ def get_weight(
679
679
  return xds
680
680
 
681
681
 
682
- def create_taql_query(partition_info):
682
+ def create_taql_query_where(partition_info: dict):
683
683
  main_par_table_cols = [
684
684
  "DATA_DESC_ID",
685
685
  "OBSERVATION_ID",
@@ -729,6 +729,192 @@ def fix_uvw_frame(
729
729
  return xds
730
730
 
731
731
 
732
+ def estimate_memory_for_partition(in_file: str, partition: dict) -> float:
733
+ """
734
+ Aim: given a partition description, estimates a safe maximum memory value, but avoiding overestimation
735
+ (at least not adding not well understood factors).
736
+ """
737
+
738
+ def calculate_term_all_data(
739
+ tb_tool: tables.table, ntimes: float, nbaselines: float
740
+ ) -> tuple[list[float], bool]:
741
+ """
742
+ Size that DATA vars from MS will have in the MSv4, whether this MS has FLOAT_DATA
743
+ """
744
+ sizes_all_data_vars = []
745
+ col_names = tb_tool.colnames()
746
+ for data_col in ["DATA", "CORRECTED_DATA", "MODEL_DATA", "FLOAT_DATA"]:
747
+ if data_col in col_names:
748
+ col_descr = tb_tool.getcoldesc(data_col)
749
+ if "shape" in col_descr and isinstance(col_descr["shape"], np.ndarray):
750
+ # example: "shape": array([15, 4]) => gives pols x channels
751
+ cells_in_row = col_descr["shape"].prod()
752
+ npols = col_descr["shape"][-1]
753
+ else:
754
+ first_row = np.array(tb_tool.col(data_col)[0])
755
+ cells_in_row = np.prod(first_row.shape)
756
+ npols = first_row.shape[-1]
757
+
758
+ if col_descr["valueType"] == "complex":
759
+ # Assume. Otherwise, read first column and get the itemsize:
760
+ # col_dtype = np.array(mtable.col(data_col)[0]).dtype
761
+ # cell_size = col_dtype.itemsize
762
+ cell_size = 4
763
+ if data_col != "FLOAT_DATA":
764
+ cell_size *= 2
765
+ elif col_descr["valueType"] == "float":
766
+ cell_size = 4
767
+
768
+ # cells_in_row should account for the polarization and frequency dims
769
+ size_data_var = ntimes * nbaselines * cells_in_row * cell_size
770
+
771
+ sizes_all_data_vars.append(size_data_var)
772
+
773
+ is_float_data = "FLOAT_DATA" in col_names
774
+
775
+ return sizes_all_data_vars, is_float_data
776
+
777
+ def calculate_term_weight_flag(size_largest_data, is_float_data) -> float:
778
+ """
779
+ Size that WEIGHT and FLAG will have in the MSv4, derived from the size of the
780
+ MSv2 DATA col=> MSv4 VIS/SPECTRUM data var.
781
+ """
782
+ # Factors of the relative "cell_size" wrt the DATA var
783
+ # WEIGHT_SPECTRUM size: DATA (IF), DATA/2 (SD)
784
+ factor_weight = 1.0 if is_float_data else 0.5
785
+ factor_flag = 1.0 / 4.0 if is_float_data else 1.0 / 8.0
786
+
787
+ return size_largest_data * (factor_weight + factor_flag)
788
+
789
+ def calculate_term_other_data_vars(
790
+ ntimes: int, nbaselines: int, is_float_data: bool
791
+ ) -> float:
792
+ """
793
+ Size all data vars other than the DATA (visibility/spectrum) vars will have in the MSv4
794
+
795
+ For the rest of columns, including indices/iteration columns and other
796
+ scalar columns could say approx ->5% of the (large) data cols
797
+
798
+ """
799
+ # Small ones, but as they are loaded into data arrays, why not including,
800
+ # For example: UVW (3xscalar), EXPOSURE, TIME_CENTROID
801
+ # assuming float64 in output MSv4
802
+ item_size = 8
803
+ return ntimes * nbaselines * (3 + 1 + 1) * item_size
804
+
805
+ def calculate_term_calc_indx_for_row_split(msv2_nrows: int) -> float:
806
+ """
807
+ Account for the indices produced in calc_indx_for_row_split():
808
+ the dominating ones are: tidxs, bidxs, didxs.
809
+
810
+ In terms of amount of memory represented by this term relative to the
811
+ total, it becomes relevant proportionally to the ratio between
812
+ nrows / (chans x pols)
813
+ - for example LOFAR long scans/partitions with few channels,
814
+ but its value is independent from # chans, pols.
815
+ """
816
+ item_size = 8
817
+ # 3 are: tidxs, bidxs, didxs
818
+ return msv2_nrows * 3 * item_size
819
+
820
+ def calculate_term_other_msv2_indices(msv2_nrows: int) -> float:
821
+ """
822
+ Account for the allocations to load ID, etc. columns from input MSv2.
823
+ The converter needs to load: OBSERVATION_ID, INTERVAL, SCAN_NUMBER.
824
+ These are loaded one after another (allocations do not stack up).
825
+ Also, in most memory profiles these allocations are released once we
826
+ get to create_data_variables(). As such, adding this term will most
827
+ likely lead to overestimation (but adding it for safety).
828
+
829
+ Simlarly as with calculate_term_calc_indx_for_row_split() this term
830
+ becomes relevant when the ratio 'nrows / (chans x pols)' is high.
831
+ """
832
+ # assuming float64/int64 in input MSv2, which seems to be the case,
833
+ # except for OBSERVATION_ID (int32)
834
+ item_size = 8
835
+ return msv2_nrows * item_size
836
+
837
+ def calculate_term_attrs(size_estimate_main_xds: float) -> float:
838
+ """Rough guess which seems to be more than enough"""
839
+ # could also account for info_dicts (which seem to require typically ~1 MB)
840
+ return 10 * 1024 * 1024
841
+
842
+ def calculate_term_sub_xds(size_estimate_main_xds: float) -> float:
843
+ """
844
+ This is still very rough. Just seemingly working for now. Not taking into account the dims
845
+ of the sub-xdss, interpolation options used, etc.
846
+ """
847
+ # Most cases so far 1% seems enough
848
+ return 0.015 * size_estimate_main_xds
849
+
850
+ def calculate_term_to_zarr(size_estimate_main_xds: float) -> float:
851
+ """
852
+ The to_zarr call on the main_xds seems to allocate 10s or 100s of MBs, presumably for buffers.
853
+ That adds on top of the expected main_xds size.
854
+ This is currently a very rough extrapolation and is being (mis)used to give a safe up to 5-6%
855
+ overestimation. Perhaps we should drop this term once other sub-xdss are accounted for (and
856
+ this term could be replaced by a similar, smaller but still safe over-estimation percentage).
857
+ """
858
+ return 0.05 * size_estimate_main_xds
859
+
860
+ taql_partition = create_taql_query_where(partition)
861
+ taql_main = f"select * from $mtable {taql_partition}"
862
+ with open_table_ro(in_file) as mtable:
863
+ col_names = mtable.colnames()
864
+ with open_query(mtable, taql_main) as tb_tool:
865
+ # Do not feel tempted to rely on nrows. nrows tends to underestimate memory when baselines are missing.
866
+ # For some EVN datasets that can easily underestimate by a 50%
867
+ utimes, _tol = get_utimes_tol(mtable, taql_partition)
868
+ ntimes = len(utimes)
869
+ nbaselines = len(get_baselines(tb_tool))
870
+
871
+ # Still, use nrwos for estimations related to sizes of input (MSv2)
872
+ # columns, not sizes of output (MSv4) data vars
873
+ msv2_nrows = tb_tool.nrows()
874
+
875
+ sizes_all_data, is_float_data = calculate_term_all_data(
876
+ tb_tool, ntimes, nbaselines
877
+ )
878
+
879
+ size_largest_data = np.max(sizes_all_data)
880
+ sum_sizes_data = np.sum(sizes_all_data)
881
+ estimate_main_xds = (
882
+ sum_sizes_data
883
+ + calculate_term_weight_flag(size_largest_data, is_float_data)
884
+ + calculate_term_other_data_vars(ntimes, nbaselines, is_float_data)
885
+ )
886
+ estimate = (
887
+ estimate_main_xds
888
+ + calculate_term_calc_indx_for_row_split(msv2_nrows)
889
+ + calculate_term_other_msv2_indices(msv2_nrows)
890
+ + calculate_term_sub_xds(estimate_main_xds)
891
+ + calculate_term_to_zarr(estimate_main_xds)
892
+ )
893
+ estimate /= GiBYTES_TO_BYTES
894
+
895
+ return estimate
896
+
897
+
898
+ def estimate_memory_and_cores_for_partitions(
899
+ in_file: str, partitions: list
900
+ ) -> tuple[float, int, int]:
901
+ """
902
+ Estimates approximate memory required to convert an MSv2 to MSv4, given
903
+ a predefined set of partitions.
904
+ """
905
+ max_cores = len(partitions)
906
+
907
+ size_estimates = [
908
+ estimate_memory_for_partition(in_file, part_description)
909
+ for part_description in partitions
910
+ ]
911
+ max_estimate = np.max(size_estimates) if size_estimates else 0.0
912
+
913
+ recommended_cores = np.ceil(max_cores / 4).astype("int")
914
+
915
+ return float(max_estimate), int(max_cores), int(recommended_cores)
916
+
917
+
732
918
  def convert_and_write_partition(
733
919
  in_file: str,
734
920
  out_file: str,
@@ -790,7 +976,7 @@ def convert_and_write_partition(
790
976
  _description_
791
977
  """
792
978
 
793
- taql_where = create_taql_query(partition_info)
979
+ taql_where = create_taql_query_where(partition_info)
794
980
  ddi = partition_info["DATA_DESC_ID"][0]
795
981
  intents = str(partition_info["OBS_MODE"][0])
796
982
 
@@ -839,7 +1025,9 @@ def convert_and_write_partition(
839
1025
  start = time.time()
840
1026
  xds = xr.Dataset(
841
1027
  attrs={
842
- "creation_date": datetime.datetime.utcnow().isoformat(),
1028
+ "creation_date": datetime.datetime.now(
1029
+ datetime.timezone.utc
1030
+ ).isoformat(),
843
1031
  "xradio_version": importlib.metadata.version("xradio"),
844
1032
  "schema_version": "4.0.-9994",
845
1033
  "type": "visibility",
@@ -1085,6 +1273,8 @@ def convert_and_write_partition(
1085
1273
  else:
1086
1274
  xds.attrs["type"] = "visibility"
1087
1275
 
1276
+ import sys
1277
+
1088
1278
  start = time.time()
1089
1279
  if storage_backend == "zarr":
1090
1280
  xds.to_zarr(store=os.path.join(file_name, "correlated_xds"), mode=mode)
@@ -1193,7 +1383,12 @@ def antenna_ids_to_names(
1193
1383
  ]
1194
1384
  for unwanted_coord in unwanted_coords_from_ant_xds:
1195
1385
  xds = xds.drop_vars(unwanted_coord)
1196
- xds = xds.rename({"baseline_id": "antenna_name"})
1386
+
1387
+ # Rename a dim coord started generating warnings (index not re-created). Swap dims, create coord
1388
+ # https://github.com/pydata/xarray/pull/6999
1389
+ xds = xds.swap_dims({"baseline_id": "antenna_name"})
1390
+ xds = xds.assign_coords({"antenna_name": xds["baseline_id"].data})
1391
+ xds = xds.drop_vars("baseline_id")
1197
1392
 
1198
1393
  # drop more vars that seem unwanted in main_sd_xds, but there shouuld be a better way
1199
1394
  # of not creating them in the first place
@@ -15,7 +15,9 @@ from xradio.measurement_set._utils._msv2._tables.read import (
15
15
  table_exists,
16
16
  )
17
17
  from xradio._utils.schema import convert_generic_xds_to_xradio_schema
18
- from xradio.measurement_set._utils._msv2.msv4_sub_xdss import interpolate_to_time
18
+ from xradio.measurement_set._utils._msv2.msv4_sub_xdss import (
19
+ rename_and_interpolate_to_time,
20
+ )
19
21
 
20
22
  from xradio._utils.list_and_array import (
21
23
  check_if_consistent,
@@ -509,27 +511,8 @@ def create_phase_calibration_xds(
509
511
  phase_cal_xds.time_phase_cal.astype("float64").astype("float64") / 10**9
510
512
  )
511
513
 
512
- phase_cal_xds = interpolate_to_time(
513
- phase_cal_xds,
514
- phase_cal_interp_time,
515
- "antenna_xds",
516
- time_name="time_phase_cal",
514
+ phase_cal_xds = rename_and_interpolate_to_time(
515
+ phase_cal_xds, "time_phase_cal", phase_cal_interp_time, "phase_cal_xds"
517
516
  )
518
517
 
519
- time_coord_attrs = {
520
- "type": "time",
521
- "units": ["s"],
522
- "scale": "utc",
523
- "format": "unix",
524
- }
525
-
526
- # If we interpolate rename the time_phase_cal axis to time.
527
- if phase_cal_interp_time is not None:
528
- time_coord = {"time": ("time_phase_cal", phase_cal_interp_time.data)}
529
- phase_cal_xds = phase_cal_xds.assign_coords(time_coord)
530
- phase_cal_xds.coords["time"].attrs.update(time_coord_attrs)
531
- phase_cal_xds = phase_cal_xds.swap_dims({"time_phase_cal": "time"}).drop_vars(
532
- "time_phase_cal"
533
- )
534
-
535
518
  return phase_cal_xds
@@ -6,7 +6,9 @@ import numpy as np
6
6
  import xarray as xr
7
7
 
8
8
  import toolviper.utils.logger as logger
9
- from xradio.measurement_set._utils._msv2.msv4_sub_xdss import interpolate_to_time
9
+ from xradio.measurement_set._utils._msv2.msv4_sub_xdss import (
10
+ rename_and_interpolate_to_time,
11
+ )
10
12
  from xradio.measurement_set._utils._msv2.subtables import subt_rename_ids
11
13
  from xradio.measurement_set._utils._msv2._tables.read import (
12
14
  convert_casacore_time_to_mjd,
@@ -363,20 +365,13 @@ def extract_ephemeris_info(
363
365
  }
364
366
  temp_xds["time_ephemeris"].attrs.update(time_coord_attrs)
365
367
 
366
- # Convert to si units and interpolate if ephemeris_interpolate=True:
368
+ # Convert to si units
367
369
  temp_xds = convert_to_si_units(temp_xds)
368
- temp_xds = interpolate_to_time(
369
- temp_xds, interp_time, "field_and_source_xds", time_name="time_ephemeris"
370
- )
371
370
 
372
- # If we interpolate rename the time_ephemeris axis to time.
373
- if interp_time is not None:
374
- time_coord = {"time": ("time_ephemeris", interp_time.data)}
375
- temp_xds = temp_xds.assign_coords(time_coord)
376
- temp_xds.coords["time"].attrs.update(time_coord_attrs)
377
- temp_xds = temp_xds.swap_dims({"time_ephemeris": "time"}).drop_vars(
378
- "time_ephemeris"
379
- )
371
+ # interpolate if ephemeris_interpolate/interp_time=True, and rename time_ephemeris=>time
372
+ temp_xds = rename_and_interpolate_to_time(
373
+ temp_xds, "time_ephemeris", interp_time, "field_and_source_xds"
374
+ )
380
375
 
381
376
  xds = xr.merge([xds, temp_xds])
382
377
 
@@ -20,6 +20,74 @@ from ._tables.read import (
20
20
  )
21
21
 
22
22
 
23
+ standard_time_coord_attrs = {
24
+ "type": "time",
25
+ "units": ["s"],
26
+ "scale": "utc",
27
+ "format": "unix",
28
+ }
29
+
30
+
31
+ def rename_and_interpolate_to_time(
32
+ xds: xr.Dataset,
33
+ time_initial_name: str,
34
+ interp_time: Union[xr.DataArray, None],
35
+ message_prefix: str,
36
+ ) -> xr.Dataset:
37
+ """
38
+ This function interpolates the time dimension and renames it:
39
+
40
+ - interpolates a time_* dimension to values given in interp_time (presumably the time
41
+ axis of the main xds)
42
+ - rename/replace that time_* dimension to "time", where time_* is a (sub)xds specific
43
+ time axis
44
+ (for example "time_pointing", "time_ephemeris", "time_syscal", "time_phase_cal").
45
+
46
+ If interp_time is None this will simply return the input xds without modificaitons.
47
+ Uses interpolate_to_time() for interpolation.
48
+ ...
49
+
50
+ Parameters:
51
+ ----------
52
+ xds : xr.Dataset
53
+ Xarray dataset to interpolate (presumably a pointing_xds or an xds of
54
+ ephemeris variables)
55
+ time_initial_name: str = None
56
+ Name of time to be renamed+interpolated. Expected an existing time_* coordinate in the
57
+ dataset
58
+ interp_time:
59
+ Time axis to interpolate the dataset to (usually main MSv4 time)
60
+ message_prefix:
61
+ A prefix for info/debug/etc. messages about the specific xds being interpolated/
62
+ time-renamed
63
+
64
+ Returns:
65
+ -------
66
+ renamed_interpolated_xds : xr.Dataset
67
+ xarray dataset with time axis renamed to "time" (from time_name, for example
68
+ "time_ephemeris") and interpolated to interp_time.
69
+ """
70
+ if interp_time is None:
71
+ return xds
72
+
73
+ interpolated_xds = interpolate_to_time(
74
+ xds,
75
+ interp_time,
76
+ message_prefix,
77
+ time_name=time_initial_name,
78
+ )
79
+
80
+ # rename the time_* axis to time.
81
+ time_coord = {"time": (time_initial_name, interp_time.data)}
82
+ renamed_time_xds = interpolated_xds.assign_coords(time_coord)
83
+ renamed_time_xds.coords["time"].attrs.update(standard_time_coord_attrs)
84
+ renamed_time_xds = renamed_time_xds.swap_dims({time_initial_name: "time"})
85
+ if time_initial_name != "time":
86
+ renamed_time_xds = renamed_time_xds.drop_vars(time_initial_name)
87
+
88
+ return renamed_time_xds
89
+
90
+
23
91
  def interpolate_to_time(
24
92
  xds: xr.Dataset,
25
93
  interp_time: Union[xr.DataArray, None],
@@ -56,7 +124,9 @@ def interpolate_to_time(
56
124
  method = "linear"
57
125
  else:
58
126
  method = "nearest"
59
- xds = xds.interp({time_name: interp_time}, method=method, assume_sorted=True)
127
+ xds = xds.interp(
128
+ {time_name: interp_time.data}, method=method, assume_sorted=True
129
+ )
60
130
  # scan_number sneaks in as a coordinate of the main time axis, drop it
61
131
  if "scan_number" in xds.coords:
62
132
  xds = xds.drop_vars("scan_number")
@@ -309,7 +379,7 @@ def create_pointing_xds(
309
379
  elif size == 0:
310
380
  generic_pointing_xds = generic_pointing_xds.drop_dims("n_polynomial")
311
381
 
312
- time_ant_dims = ["time", "antenna_name"]
382
+ time_ant_dims = ["time_pointing", "antenna_name"]
313
383
  time_ant_dir_dims = time_ant_dims + ["local_sky_dir_label"]
314
384
  to_new_data_variables = {
315
385
  "DIRECTION": ["POINTING_BEAM", time_ant_dir_dims],
@@ -318,7 +388,7 @@ def create_pointing_xds(
318
388
  }
319
389
 
320
390
  to_new_coords = {
321
- "TIME": ["time", ["time"]],
391
+ "TIME": ["time_pointing", ["time_pointing"]],
322
392
  "dim_2": ["local_sky_dir_label", ["local_sky_dir_label"]],
323
393
  }
324
394
 
@@ -337,7 +407,9 @@ def create_pointing_xds(
337
407
  generic_pointing_xds, pointing_xds, to_new_data_variables, to_new_coords
338
408
  )
339
409
 
340
- pointing_xds = interpolate_to_time(pointing_xds, interp_time, "pointing_xds")
410
+ pointing_xds = rename_and_interpolate_to_time(
411
+ pointing_xds, "time_pointing", interp_time, "pointing_xds"
412
+ )
341
413
 
342
414
  logger.debug(f"create_pointing_xds() execution time {time.time() - start:0.2f} s")
343
415
 
@@ -522,25 +594,9 @@ def create_system_calibration_xds(
522
594
  }
523
595
  sys_cal_xds.coords["frequency_cal"].attrs.update(frequency_measure)
524
596
 
525
- if sys_cal_interp_time is not None:
526
- sys_cal_xds = interpolate_to_time(
527
- sys_cal_xds,
528
- sys_cal_interp_time,
529
- "system_calibration_xds",
530
- time_name="time_cal",
531
- )
532
-
533
- time_coord_attrs = {
534
- "type": "time",
535
- "units": ["s"],
536
- "scale": "utc",
537
- "format": "unix",
538
- }
539
- # If interpolating time, rename time_cal => time
540
- time_coord = {"time": ("time_cal", sys_cal_interp_time.data)}
541
- sys_cal_xds = sys_cal_xds.assign_coords(time_coord)
542
- sys_cal_xds.coords["time"].attrs.update(time_coord_attrs)
543
- sys_cal_xds = sys_cal_xds.swap_dims({"time_cal": "time"}).drop_vars("time_cal")
597
+ sys_cal_xds = rename_and_interpolate_to_time(
598
+ sys_cal_xds, "time_cal", sys_cal_interp_time, "system_calibration_xds"
599
+ )
544
600
 
545
601
  # correct expected types
546
602
  for data_var in sys_cal_xds:
@@ -9,6 +9,7 @@ import xarray as xr
9
9
  from casacore import tables
10
10
 
11
11
  from ._tables.table_query import open_table_ro, open_query
12
+ from ._tables.read import table_exists
12
13
 
13
14
 
14
15
  def enumerated_product(*args):
@@ -17,7 +18,7 @@ def enumerated_product(*args):
17
18
  )
18
19
 
19
20
 
20
- def create_partitions(in_file: str, partition_scheme: list):
21
+ def create_partitions(in_file: str, partition_scheme: list) -> list[dict]:
21
22
  """Create a list of dictionaries with the partition information.
22
23
 
23
24
  Parameters
@@ -37,8 +38,6 @@ def create_partitions(in_file: str, partition_scheme: list):
37
38
  # vla_otf (bool, optional): The partioning of VLA OTF (on the fly) mosaics needs a special partitioning scheme. Defaults to False.
38
39
 
39
40
  # Create partition table
40
- from casacore import tables
41
- import numpy as np
42
41
  import pandas as pd
43
42
  import os
44
43
 
@@ -67,7 +66,7 @@ def create_partitions(in_file: str, partition_scheme: list):
67
66
  # par_df["FIELD_NAME"] = np.array(field_tb.getcol("NAME"))[par_df["FIELD_ID"]]
68
67
 
69
68
  # Get source ids if available from source table.
70
- if os.path.isdir(os.path.join(os.path.join(in_file, "SOURCE"))):
69
+ if table_exists(os.path.join(os.path.join(in_file, "SOURCE"))):
71
70
  source_tb = tables.table(
72
71
  os.path.join(in_file, "SOURCE"),
73
72
  readonly=True,
@@ -82,7 +81,7 @@ def create_partitions(in_file: str, partition_scheme: list):
82
81
  # ]
83
82
 
84
83
  # Get intents and subscan numbers if available from state table.
85
- if os.path.isdir(os.path.join(in_file, "STATE")):
84
+ if table_exists(os.path.join(in_file, "STATE")):
86
85
  state_tb = tables.table(
87
86
  os.path.join(in_file, "STATE"),
88
87
  readonly=True,
@@ -5,7 +5,47 @@ from typing import Dict, Union
5
5
  import dask
6
6
 
7
7
  from xradio.measurement_set._utils._msv2.partition_queries import create_partitions
8
- from xradio.measurement_set._utils._msv2.conversion import convert_and_write_partition
8
+ from xradio.measurement_set._utils._msv2.conversion import (
9
+ convert_and_write_partition,
10
+ estimate_memory_and_cores_for_partitions,
11
+ )
12
+
13
+
14
+ def estimate_conversion_memory_and_cores(
15
+ in_file: str,
16
+ partition_scheme: list = ["FIELD_ID"],
17
+ ) -> tuple[float, int, int]:
18
+ """
19
+ Given an MSv2 and a partition_scheme to use when converting it to MSv4,
20
+ estimates:
21
+ - memory (in the sense of the amount expected to be enough to convert)
22
+ - cores (in the sense of the recommended/optimal number of cores to use to convert)
23
+
24
+ Note: this function does not currently try to estimate the memory required for
25
+ sub-xdss such as pointing_xds and system_calibration_xds, instead it uses a small
26
+ percentage of the main_xds to account for them. This can lead to underestimation
27
+ especially for MSv2s with small partitions but large pointing or syscal tables.
28
+ This should not typically be a concern for sufficiently large partitions
29
+ (a few or 10s, 100s of GiBs).
30
+
31
+ Parameters
32
+ ----------
33
+ in_file: str
34
+ Input MS name.
35
+ partition_scheme: list
36
+ Partition scheme as used in the function convert_msv2_to_processing_set()
37
+
38
+ Returns
39
+ ----------
40
+ tuple
41
+ estimated maximum memory required for one partition,
42
+ maximum number of cores it makes sense to use (number of partitions),
43
+ suggested number of cores to use (maximum/4 as a rule of thumb)
44
+ """
45
+
46
+ partitions = create_partitions(in_file, partition_scheme=partition_scheme)
47
+
48
+ return estimate_memory_and_cores_for_partitions(in_file, partitions)
9
49
 
10
50
 
11
51
  def convert_msv2_to_processing_set(
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: xradio
3
- Version: 0.0.44
3
+ Version: 0.0.45
4
4
  Summary: Xarray Radio Astronomy Data IO
5
5
  Author-email: Jan-Willem Steeb <jsteeb@nrao.edu>
6
6
  License: BSD 3-Clause License
@@ -10,6 +10,7 @@ src/xradio.egg-info/requires.txt
10
10
  src/xradio.egg-info/top_level.txt
11
11
  src/xradio/_utils/__init__.py
12
12
  src/xradio/_utils/coord_math.py
13
+ src/xradio/_utils/dict_helpers.py
13
14
  src/xradio/_utils/list_and_array.py
14
15
  src/xradio/_utils/schema.py
15
16
  src/xradio/_utils/_casacore/tables.py
File without changes
File without changes
File without changes
File without changes
File without changes