PyPI - xradio - Versions diffs - 0.0.44__tar.gz → 0.0.45__tar.gz - Mend

xradio 0.0.44tar.gz → 0.0.45tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (83) hide show

{xradio-0.0.44/src/xradio.egg-info → xradio-0.0.45}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: xradio
-Version: 0.0.44
+Version: 0.0.45
 Summary:  Xarray Radio Astronomy Data IO
 Author-email: Jan-Willem Steeb <jsteeb@nrao.edu>
 License: BSD 3-Clause License

{xradio-0.0.44 → xradio-0.0.45}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "xradio"
-version = "0.0.44"
+version = "0.0.45"
 description = " Xarray Radio Astronomy Data IO"
 authors = [
     {name = "Jan-Willem Steeb", email="jsteeb@nrao.edu"},

xradio-0.0.45/src/xradio/_utils/dict_helpers.py ADDED Viewed

@@ -0,0 +1,14 @@
+def make_quantity(value, units: str) -> dict:
+    """
+    create a quantity dictionary given value and units
+    Parameters
+    ----------
+    value : numeric or array of numerics
+        Quantity value
+    units: str
+        Quantity units
+    Returns
+    -------
+    dict
+    """
+    return {"value": value, "units": units, "type": "quantity"}

{xradio-0.0.44 → xradio-0.0.45}/src/xradio/image/_util/_casacore/xds_from_casacore.py RENAMED Viewed

@@ -31,6 +31,7 @@ from ..common import (
 )
 from ...._utils._casacore.tables import extract_table_attributes, open_table_ro
 from xradio._utils.coord_math import _deg_to_rad
+from xradio._utils.dict_helpers import make_quantity
 """
 def _add_coord_attrs(xds: xr.Dataset, icoords: dict, dir_axes: list) -> xr.Dataset:
@@ -62,13 +63,7 @@ def _add_freq_attrs(xds, coord_dict):
     for k in coord_dict:
         if k.startswith("spectral"):
             sd = coord_dict[k]
-            # meta["native_type"] = _native_types[sd["nativeType"]]
-            meta["rest_frequency"] = {
-                "type": "quantity",
-                "units": "Hz",
-                "value": sd["restfreq"],
-            }
-            # meta["restfreqs"] = {'type': 'quantity', 'units': 'Hz', 'value': list(sd["restfreqs"])}
+            meta["rest_frequency"] = make_quantity(sd["restfreq"], "Hz")
             meta["type"] = "frequency"
             meta["units"] = sd["unit"]
             meta["frame"] = sd["system"]
@@ -184,11 +179,7 @@ def _casa_image_to_xds_attrs(img_full_path: str, history: bool = True) -> dict:
         k = "latpole"
         if k in coord_dir_dict:
             for j in (k, "longpole"):
-                dir_dict[j] = {
-                    "value": coord_dir_dict[j] * _deg_to_rad,
-                    "units": "rad",
-                    "type": "quantity",
-                }
+                dir_dict[j] = make_quantity(coord_dir_dict[j] * _deg_to_rad, "rad")
         for j in ("pc", "projection_parameters", "projection"):
             if j in coord_dir_dict:
                 dir_dict[j] = coord_dir_dict[j]
@@ -518,11 +509,7 @@ def _get_freq_values_attrs(
                     crpix=wcs["crpix"],
                     cdelt=wcs["cdelt"],
                 )
-                attrs["rest_frequency"] = {
-                    "type": "quantity",
-                    "units": "Hz",
-                    "value": sd["restfreq"],
-                }
+                attrs["rest_frequency"] = make_quantity(sd["restfreq"], "Hz")
                 attrs["type"] = "frequency"
                 attrs["units"] = sd["unit"]
                 attrs["frame"] = sd["system"]

{xradio-0.0.44 → xradio-0.0.45}/src/xradio/image/_util/_fits/xds_from_fits.py RENAMED Viewed

@@ -16,6 +16,7 @@ from ..common import (
     _l_m_attr_notes,
 )
 from xradio._utils.coord_math import _deg_to_rad
+from xradio._utils.dict_helpers import make_quantity
 import copy
 import dask
 import dask.array as da
@@ -77,11 +78,7 @@ def _add_freq_attrs(xds: xr.Dataset, helpers: dict) -> xr.Dataset:
     freq_coord = xds.coords["frequency"]
     meta = {}
     if helpers["has_freq"]:
-        meta["rest_frequency"] = {
-            "type": "quantity",
-            "units": "Hz",
-            "value": helpers["restfreq"],
-        }
+        meta["rest_frequency"] = make_quantity(helpers["restfreq"], "Hz")
         meta["frame"] = helpers["specsys"]
         meta["units"] = "Hz"
         meta["type"] = "frequency"
@@ -184,16 +181,8 @@ def _xds_direction_attrs_from_header(helpers: dict, header) -> dict:
         direction["reference"]["value"][i] = x.value
         x = helpers["cdelt"][i] * u.Unit(_get_unit(helpers["cunit"][i]))
         x = x.to("rad")
-    direction["latpole"] = {
-        "value": header["LATPOLE"] * _deg_to_rad,
-        "units": "rad",
-        "type": "quantity",
-    }
-    direction["longpole"] = {
-        "value": header["LONPOLE"] * _deg_to_rad,
-        "units": "rad",
-        "type": "quantity",
-    }
+    direction["latpole"] = make_quantity(header["LATPOLE"] * _deg_to_rad, "rad")
+    direction["longpole"] = make_quantity(header["LONPOLE"] * _deg_to_rad, "rad")
     pc = np.zeros([2, 2])
     for i in (0, 1):
         for j in (0, 1):
@@ -325,9 +314,9 @@ def _beam_attr_from_header(helpers: dict, header) -> Union[dict, str, None]:
     if "BMAJ" in header:
         # single global beam
         beam = {
-            "bmaj": {"type": "quantity", "units": "arcsec", "value": header["BMAJ"]},
-            "bmin": {"type": "quantity", "units": "arcsec", "value": header["BMIN"]},
-            "pa": {"type": "quantity", "units": "arcsec", "value": header["BPA"]},
+            "bmaj": make_quantity(header["BMAJ"], "arcsec"),
+            "bmin": make_quantity(header["BMIN"], "arcsec"),
+            "pa": make_quantity(header["BPA"], "arcsec"),
         }
         return _convert_beam_to_rad(beam)
     elif "CASAMBM" in header and header["CASAMBM"]:

{xradio-0.0.44 → xradio-0.0.45}/src/xradio/image/_util/common.py RENAMED Viewed

@@ -6,6 +6,7 @@ import numpy as np
 from typing import Dict, List
 import xarray as xr
 from xradio._utils.coord_math import _deg_to_rad
+from xradio._utils.dict_helpers import make_quantity
 _c = 2.99792458e08 * u.m / u.s
 # OPTICAL = Z
@@ -39,7 +40,7 @@ def _convert_beam_to_rad(beam: dict) -> dict:
             q = u.quantity.Quantity(f"{beam[k]['value']}{beam[k]['units']}")
         q = q.to("rad")
         j = "pa" if k == "positionangle" else k
-        mybeam[j] = {"type": "quantity", "value": q.value, "units": "rad"}
+        mybeam[j] = make_quantity(q.value, "rad")
     return mybeam
@@ -102,11 +103,7 @@ def _numpy_arrayize_dv(xds: xr.Dataset) -> xr.Dataset:
 def _default_freq_info() -> dict:
     return {
-        "rest_frequency": {
-            "value": 1420405751.7860003,
-            "units": "Hz",
-            "type": "quantity",
-        },
+        "rest_frequency": make_quantity(1420405751.7860003, "Hz"),
         "type": "frequency",
         "frame": "LSRK",
         "units": "Hz",

{xradio-0.0.44 → xradio-0.0.45}/src/xradio/image/_util/image_factory.py RENAMED Viewed

@@ -4,6 +4,7 @@ import xarray as xr
 from typing import List, Union
 from .common import _c, _compute_world_sph_dims, _l_m_attr_notes
 from xradio._utils.coord_math import _deg_to_rad
+from xradio._utils.dict_helpers import make_quantity
 def _input_checks(
@@ -46,11 +47,7 @@ def _add_common_attrs(
     xds.time.attrs = {"format": "MJD", "scale": "UTC", "units": "d"}
     freq_vals = np.array(xds.frequency)
     xds.frequency.attrs = {
-        "rest_frequency": {
-            "type": "quantity",
-            "units": "Hz",
-            "value": restfreq,
-        },
+        "rest_frequency": make_quantity(restfreq, "Hz"),
         "frame": spectral_reference.upper(),
         "units": "Hz",
         "wave_unit": "mm",
@@ -69,8 +66,8 @@ def _add_common_attrs(
                 "value": list(phase_center),
                 "units": ["rad", "rad"],
             },
-            "longpole": {"type": "quantity", "value": np.pi, "units": "rad"},
-            "latpole": {"type": "quantity", "value": 0.0, "units": "rad"},
+            "longpole": make_quantity(np.pi, "rad"),
+            "latpole": make_quantity(0.0, "rad"),
             "pc": np.array([[1.0, 0.0], [0.0, 1.0]]),
             "projection": projection,
             "projection_parameters": [0.0, 0.0],
@@ -289,7 +286,6 @@ def _make_empty_lmuv_image(
         "crval": 0.0,
         "cdelt": -abs(sky_image_cell_size[0]),
         "units": "rad",
-        "type": "quantity",
         "note": attr_note["l"],
     }
     xds.m.attrs = {
@@ -297,7 +293,6 @@ def _make_empty_lmuv_image(
         "crval": 0.0,
         "cdelt": abs(sky_image_cell_size[1]),
         "units": "rad",
-        "type": "quantity",
         "note": attr_note["m"],
     }
     xds.u.attrs = {

{xradio-0.0.44 → xradio-0.0.45}/src/xradio/measurement_set/__init__.py RENAMED Viewed

@@ -1,7 +1,10 @@
 from .processing_set import ProcessingSet
 from .open_processing_set import open_processing_set
 from .load_processing_set import load_processing_set, ProcessingSetIterator
-from .convert_msv2_to_processing_set import convert_msv2_to_processing_set
+from .convert_msv2_to_processing_set import (
+    convert_msv2_to_processing_set,
+    estimate_conversion_memory_and_cores,
+)
 from .measurement_set_xds import MeasurementSetXds
 from .schema import SpectrumXds, VisibilityXds
@@ -13,6 +16,7 @@ __all__ = [
     "load_processing_set",
     "ProcessingSetIterator",
     "convert_msv2_to_processing_set",
+    "estimate_conversion_memory_and_cores",
     "SpectrumXds",
     "VisibilityXds",
 ]

{xradio-0.0.44 → xradio-0.0.45}/src/xradio/measurement_set/_utils/_msv2/conversion.py RENAMED Viewed

@@ -679,7 +679,7 @@ def get_weight(
     return xds
-def create_taql_query(partition_info):
+def create_taql_query_where(partition_info: dict):
     main_par_table_cols = [
         "DATA_DESC_ID",
         "OBSERVATION_ID",
@@ -729,6 +729,192 @@ def fix_uvw_frame(
     return xds
+def estimate_memory_for_partition(in_file: str, partition: dict) -> float:
+    """
+    Aim: given a partition description, estimates a safe maximum memory value, but avoiding overestimation
+    (at least not adding not well understood factors).
+    """
+    def calculate_term_all_data(
+        tb_tool: tables.table, ntimes: float, nbaselines: float
+    ) -> tuple[list[float], bool]:
+        """
+        Size that DATA vars from MS will have in the MSv4, whether this MS has FLOAT_DATA
+        """
+        sizes_all_data_vars = []
+        col_names = tb_tool.colnames()
+        for data_col in ["DATA", "CORRECTED_DATA", "MODEL_DATA", "FLOAT_DATA"]:
+            if data_col in col_names:
+                col_descr = tb_tool.getcoldesc(data_col)
+                if "shape" in col_descr and isinstance(col_descr["shape"], np.ndarray):
+                    # example: "shape": array([15,  4]) => gives pols x channels
+                    cells_in_row = col_descr["shape"].prod()
+                    npols = col_descr["shape"][-1]
+                else:
+                    first_row = np.array(tb_tool.col(data_col)[0])
+                    cells_in_row = np.prod(first_row.shape)
+                    npols = first_row.shape[-1]
+                if col_descr["valueType"] == "complex":
+                    # Assume. Otherwise, read first column and get the itemsize:
+                    # col_dtype = np.array(mtable.col(data_col)[0]).dtype
+                    # cell_size = col_dtype.itemsize
+                    cell_size = 4
+                    if data_col != "FLOAT_DATA":
+                        cell_size *= 2
+                elif col_descr["valueType"] == "float":
+                    cell_size = 4
+                # cells_in_row should account for the polarization and frequency dims
+                size_data_var = ntimes * nbaselines * cells_in_row * cell_size
+                sizes_all_data_vars.append(size_data_var)
+        is_float_data = "FLOAT_DATA" in col_names
+        return sizes_all_data_vars, is_float_data
+    def calculate_term_weight_flag(size_largest_data, is_float_data) -> float:
+        """
+        Size that WEIGHT and FLAG will have in the MSv4, derived from the size of the
+        MSv2 DATA col=> MSv4 VIS/SPECTRUM data var.
+        """
+        # Factors of the relative "cell_size" wrt the DATA var
+        # WEIGHT_SPECTRUM size: DATA (IF), DATA/2 (SD)
+        factor_weight = 1.0 if is_float_data else 0.5
+        factor_flag = 1.0 / 4.0 if is_float_data else 1.0 / 8.0
+        return size_largest_data * (factor_weight + factor_flag)
+    def calculate_term_other_data_vars(
+        ntimes: int, nbaselines: int, is_float_data: bool
+    ) -> float:
+        """
+        Size all data vars other than the DATA (visibility/spectrum) vars will have in the MSv4
+        For the rest of columns, including indices/iteration columns and other
+        scalar columns could say approx ->5% of the (large) data cols
+        """
+        # Small ones, but as they are loaded into data arrays, why not including,
+        # For example: UVW (3xscalar), EXPOSURE, TIME_CENTROID
+        # assuming float64 in output MSv4
+        item_size = 8
+        return ntimes * nbaselines * (3 + 1 + 1) * item_size
+    def calculate_term_calc_indx_for_row_split(msv2_nrows: int) -> float:
+        """
+        Account for the indices produced in calc_indx_for_row_split():
+        the dominating ones are: tidxs, bidxs, didxs.
+        In terms of amount of memory represented by this term relative to the
+        total, it becomes relevant proportionally to the ratio between
+           nrows / (chans x pols)
+        - for example LOFAR long scans/partitions with few channels,
+        but its value is independent from # chans, pols.
+        """
+        item_size = 8
+        # 3 are: tidxs, bidxs, didxs
+        return msv2_nrows * 3 * item_size
+    def calculate_term_other_msv2_indices(msv2_nrows: int) -> float:
+        """
+        Account for the allocations to load ID, etc. columns from input MSv2.
+        The converter needs to load: OBSERVATION_ID, INTERVAL, SCAN_NUMBER.
+        These are loaded one after another (allocations do not stack up).
+        Also, in most memory profiles these allocations are released once we
+        get to create_data_variables(). As such, adding this term will most
+        likely lead to overestimation (but adding it for safety).
+        Simlarly as with calculate_term_calc_indx_for_row_split() this term
+        becomes relevant when the ratio 'nrows / (chans x pols)' is high.
+        """
+        # assuming float64/int64 in input MSv2, which seems to be the case,
+        # except for OBSERVATION_ID (int32)
+        item_size = 8
+        return msv2_nrows * item_size
+    def calculate_term_attrs(size_estimate_main_xds: float) -> float:
+        """Rough guess which seems to be more than enough"""
+        # could also account for info_dicts (which seem to require typically ~1 MB)
+        return 10 * 1024 * 1024
+    def calculate_term_sub_xds(size_estimate_main_xds: float) -> float:
+        """
+        This is still very rough. Just seemingly working for now. Not taking into account the dims
+        of the sub-xdss, interpolation options used, etc.
+        """
+        # Most cases so far 1% seems enough
+        return 0.015 * size_estimate_main_xds
+    def calculate_term_to_zarr(size_estimate_main_xds: float) -> float:
+        """
+        The to_zarr call on the main_xds seems to allocate 10s or 100s of MBs, presumably for buffers.
+        That adds on top of the expected main_xds size.
+        This is currently a very rough extrapolation and is being (mis)used to give a safe up to 5-6%
+        overestimation. Perhaps we should drop this term once other sub-xdss are accounted for (and
+        this term could be replaced by a similar, smaller but still safe over-estimation percentage).
+        """
+        return 0.05 * size_estimate_main_xds
+    taql_partition = create_taql_query_where(partition)
+    taql_main = f"select * from $mtable {taql_partition}"
+    with open_table_ro(in_file) as mtable:
+        col_names = mtable.colnames()
+        with open_query(mtable, taql_main) as tb_tool:
+            # Do not feel tempted to rely on nrows. nrows tends to underestimate memory when baselines are missing.
+            # For some EVN datasets that can easily underestimate by a 50%
+            utimes, _tol = get_utimes_tol(mtable, taql_partition)
+            ntimes = len(utimes)
+            nbaselines = len(get_baselines(tb_tool))
+            # Still, use nrwos for estimations related to sizes of input (MSv2)
+            # columns, not sizes of output (MSv4) data vars
+            msv2_nrows = tb_tool.nrows()
+            sizes_all_data, is_float_data = calculate_term_all_data(
+                tb_tool, ntimes, nbaselines
+            )
+    size_largest_data = np.max(sizes_all_data)
+    sum_sizes_data = np.sum(sizes_all_data)
+    estimate_main_xds = (
+        sum_sizes_data
+        + calculate_term_weight_flag(size_largest_data, is_float_data)
+        + calculate_term_other_data_vars(ntimes, nbaselines, is_float_data)
+    )
+    estimate = (
+        estimate_main_xds
+        + calculate_term_calc_indx_for_row_split(msv2_nrows)
+        + calculate_term_other_msv2_indices(msv2_nrows)
+        + calculate_term_sub_xds(estimate_main_xds)
+        + calculate_term_to_zarr(estimate_main_xds)
+    )
+    estimate /= GiBYTES_TO_BYTES
+    return estimate
+def estimate_memory_and_cores_for_partitions(
+    in_file: str, partitions: list
+) -> tuple[float, int, int]:
+    """
+    Estimates approximate memory required to convert an MSv2 to MSv4, given
+    a predefined set of partitions.
+    """
+    max_cores = len(partitions)
+    size_estimates = [
+        estimate_memory_for_partition(in_file, part_description)
+        for part_description in partitions
+    ]
+    max_estimate = np.max(size_estimates) if size_estimates else 0.0
+    recommended_cores = np.ceil(max_cores / 4).astype("int")
+    return float(max_estimate), int(max_cores), int(recommended_cores)
 def convert_and_write_partition(
     in_file: str,
     out_file: str,
@@ -790,7 +976,7 @@ def convert_and_write_partition(
         _description_
     """
-    taql_where = create_taql_query(partition_info)
+    taql_where = create_taql_query_where(partition_info)
     ddi = partition_info["DATA_DESC_ID"][0]
     intents = str(partition_info["OBS_MODE"][0])
@@ -839,7 +1025,9 @@ def convert_and_write_partition(
             start = time.time()
             xds = xr.Dataset(
                 attrs={
-                    "creation_date": datetime.datetime.utcnow().isoformat(),
+                    "creation_date": datetime.datetime.now(
+                        datetime.timezone.utc
+                    ).isoformat(),
                     "xradio_version": importlib.metadata.version("xradio"),
                     "schema_version": "4.0.-9994",
                     "type": "visibility",
@@ -1085,6 +1273,8 @@ def convert_and_write_partition(
                 else:
                     xds.attrs["type"] = "visibility"
+            import sys
             start = time.time()
             if storage_backend == "zarr":
                 xds.to_zarr(store=os.path.join(file_name, "correlated_xds"), mode=mode)
@@ -1193,7 +1383,12 @@ def antenna_ids_to_names(
         ]
         for unwanted_coord in unwanted_coords_from_ant_xds:
             xds = xds.drop_vars(unwanted_coord)
-        xds = xds.rename({"baseline_id": "antenna_name"})
+        # Rename a dim coord started generating warnings (index not re-created). Swap dims, create coord
+        # https://github.com/pydata/xarray/pull/6999
+        xds = xds.swap_dims({"baseline_id": "antenna_name"})
+        xds = xds.assign_coords({"antenna_name": xds["baseline_id"].data})
+        xds = xds.drop_vars("baseline_id")
         # drop more vars that seem unwanted in main_sd_xds, but there shouuld be a better way
         # of not creating them in the first place

{xradio-0.0.44 → xradio-0.0.45}/src/xradio/measurement_set/_utils/_msv2/create_antenna_xds.py RENAMED Viewed

@@ -15,7 +15,9 @@ from xradio.measurement_set._utils._msv2._tables.read import (
     table_exists,
 )
 from xradio._utils.schema import convert_generic_xds_to_xradio_schema
-from xradio.measurement_set._utils._msv2.msv4_sub_xdss import interpolate_to_time
+from xradio.measurement_set._utils._msv2.msv4_sub_xdss import (
+    rename_and_interpolate_to_time,
+)
 from xradio._utils.list_and_array import (
     check_if_consistent,
@@ -509,27 +511,8 @@ def create_phase_calibration_xds(
         phase_cal_xds.time_phase_cal.astype("float64").astype("float64") / 10**9
     )
-    phase_cal_xds = interpolate_to_time(
-        phase_cal_xds,
-        phase_cal_interp_time,
-        "antenna_xds",
-        time_name="time_phase_cal",
+    phase_cal_xds = rename_and_interpolate_to_time(
+        phase_cal_xds, "time_phase_cal", phase_cal_interp_time, "phase_cal_xds"
     )
-    time_coord_attrs = {
-        "type": "time",
-        "units": ["s"],
-        "scale": "utc",
-        "format": "unix",
-    }
-    # If we interpolate rename the time_phase_cal axis to time.
-    if phase_cal_interp_time is not None:
-        time_coord = {"time": ("time_phase_cal", phase_cal_interp_time.data)}
-        phase_cal_xds = phase_cal_xds.assign_coords(time_coord)
-        phase_cal_xds.coords["time"].attrs.update(time_coord_attrs)
-        phase_cal_xds = phase_cal_xds.swap_dims({"time_phase_cal": "time"}).drop_vars(
-            "time_phase_cal"
-        )
     return phase_cal_xds

{xradio-0.0.44 → xradio-0.0.45}/src/xradio/measurement_set/_utils/_msv2/create_field_and_source_xds.py RENAMED Viewed

@@ -6,7 +6,9 @@ import numpy as np
 import xarray as xr
 import toolviper.utils.logger as logger
-from xradio.measurement_set._utils._msv2.msv4_sub_xdss import interpolate_to_time
+from xradio.measurement_set._utils._msv2.msv4_sub_xdss import (
+    rename_and_interpolate_to_time,
+)
 from xradio.measurement_set._utils._msv2.subtables import subt_rename_ids
 from xradio.measurement_set._utils._msv2._tables.read import (
     convert_casacore_time_to_mjd,
@@ -363,20 +365,13 @@ def extract_ephemeris_info(
     }
     temp_xds["time_ephemeris"].attrs.update(time_coord_attrs)
-    # Convert to si units and interpolate if ephemeris_interpolate=True:
+    # Convert to si units
     temp_xds = convert_to_si_units(temp_xds)
-    temp_xds = interpolate_to_time(
-        temp_xds, interp_time, "field_and_source_xds", time_name="time_ephemeris"
-    )
-    # If we interpolate rename the time_ephemeris axis to time.
-    if interp_time is not None:
-        time_coord = {"time": ("time_ephemeris", interp_time.data)}
-        temp_xds = temp_xds.assign_coords(time_coord)
-        temp_xds.coords["time"].attrs.update(time_coord_attrs)
-        temp_xds = temp_xds.swap_dims({"time_ephemeris": "time"}).drop_vars(
-            "time_ephemeris"
-        )
+    # interpolate if ephemeris_interpolate/interp_time=True, and rename time_ephemeris=>time
+    temp_xds = rename_and_interpolate_to_time(
+        temp_xds, "time_ephemeris", interp_time, "field_and_source_xds"
+    )
     xds = xr.merge([xds, temp_xds])

{xradio-0.0.44 → xradio-0.0.45}/src/xradio/measurement_set/_utils/_msv2/msv4_sub_xdss.py RENAMED Viewed

@@ -20,6 +20,74 @@ from ._tables.read import (
 )
+standard_time_coord_attrs = {
+    "type": "time",
+    "units": ["s"],
+    "scale": "utc",
+    "format": "unix",
+}
+def rename_and_interpolate_to_time(
+    xds: xr.Dataset,
+    time_initial_name: str,
+    interp_time: Union[xr.DataArray, None],
+    message_prefix: str,
+) -> xr.Dataset:
+    """
+    This function interpolates the time dimension and renames it:
+    - interpolates a time_* dimension to values given in interp_time (presumably the time
+    axis of the main xds)
+    - rename/replace that time_* dimension to "time", where time_* is a (sub)xds specific
+    time axis
+    (for example "time_pointing", "time_ephemeris", "time_syscal", "time_phase_cal").
+    If interp_time is None this will simply return the input xds without modificaitons.
+    Uses interpolate_to_time() for interpolation.
+    ...
+    Parameters:
+    ----------
+    xds : xr.Dataset
+        Xarray dataset to interpolate (presumably a pointing_xds or an xds of
+        ephemeris variables)
+    time_initial_name: str = None
+        Name of time to be renamed+interpolated. Expected an existing time_* coordinate in the
+        dataset
+    interp_time:
+        Time axis to interpolate the dataset to (usually main MSv4 time)
+    message_prefix:
+        A prefix for info/debug/etc. messages about the specific xds being interpolated/
+        time-renamed
+    Returns:
+    -------
+    renamed_interpolated_xds : xr.Dataset
+        xarray dataset with time axis renamed to "time" (from time_name, for example
+        "time_ephemeris") and interpolated to interp_time.
+    """
+    if interp_time is None:
+        return xds
+    interpolated_xds = interpolate_to_time(
+        xds,
+        interp_time,
+        message_prefix,
+        time_name=time_initial_name,
+    )
+    # rename the time_* axis to time.
+    time_coord = {"time": (time_initial_name, interp_time.data)}
+    renamed_time_xds = interpolated_xds.assign_coords(time_coord)
+    renamed_time_xds.coords["time"].attrs.update(standard_time_coord_attrs)
+    renamed_time_xds = renamed_time_xds.swap_dims({time_initial_name: "time"})
+    if time_initial_name != "time":
+        renamed_time_xds = renamed_time_xds.drop_vars(time_initial_name)
+    return renamed_time_xds
 def interpolate_to_time(
     xds: xr.Dataset,
     interp_time: Union[xr.DataArray, None],
@@ -56,7 +124,9 @@ def interpolate_to_time(
             method = "linear"
         else:
             method = "nearest"
-        xds = xds.interp({time_name: interp_time}, method=method, assume_sorted=True)
+        xds = xds.interp(
+            {time_name: interp_time.data}, method=method, assume_sorted=True
+        )
         # scan_number sneaks in as a coordinate of the main time axis, drop it
         if "scan_number" in xds.coords:
             xds = xds.drop_vars("scan_number")
@@ -309,7 +379,7 @@ def create_pointing_xds(
         elif size == 0:
             generic_pointing_xds = generic_pointing_xds.drop_dims("n_polynomial")
-    time_ant_dims = ["time", "antenna_name"]
+    time_ant_dims = ["time_pointing", "antenna_name"]
     time_ant_dir_dims = time_ant_dims + ["local_sky_dir_label"]
     to_new_data_variables = {
         "DIRECTION": ["POINTING_BEAM", time_ant_dir_dims],
@@ -318,7 +388,7 @@ def create_pointing_xds(
     }
     to_new_coords = {
-        "TIME": ["time", ["time"]],
+        "TIME": ["time_pointing", ["time_pointing"]],
         "dim_2": ["local_sky_dir_label", ["local_sky_dir_label"]],
     }
@@ -337,7 +407,9 @@ def create_pointing_xds(
         generic_pointing_xds, pointing_xds, to_new_data_variables, to_new_coords
     )
-    pointing_xds = interpolate_to_time(pointing_xds, interp_time, "pointing_xds")
+    pointing_xds = rename_and_interpolate_to_time(
+        pointing_xds, "time_pointing", interp_time, "pointing_xds"
+    )
     logger.debug(f"create_pointing_xds() execution time {time.time() - start:0.2f} s")
@@ -522,25 +594,9 @@ def create_system_calibration_xds(
         }
         sys_cal_xds.coords["frequency_cal"].attrs.update(frequency_measure)
-    if sys_cal_interp_time is not None:
-        sys_cal_xds = interpolate_to_time(
-            sys_cal_xds,
-            sys_cal_interp_time,
-            "system_calibration_xds",
-            time_name="time_cal",
-        )
-        time_coord_attrs = {
-            "type": "time",
-            "units": ["s"],
-            "scale": "utc",
-            "format": "unix",
-        }
-        # If interpolating time, rename time_cal => time
-        time_coord = {"time": ("time_cal", sys_cal_interp_time.data)}
-        sys_cal_xds = sys_cal_xds.assign_coords(time_coord)
-        sys_cal_xds.coords["time"].attrs.update(time_coord_attrs)
-        sys_cal_xds = sys_cal_xds.swap_dims({"time_cal": "time"}).drop_vars("time_cal")
+    sys_cal_xds = rename_and_interpolate_to_time(
+        sys_cal_xds, "time_cal", sys_cal_interp_time, "system_calibration_xds"
+    )
     # correct expected types
     for data_var in sys_cal_xds:

{xradio-0.0.44 → xradio-0.0.45}/src/xradio/measurement_set/_utils/_msv2/partition_queries.py RENAMED Viewed

@@ -9,6 +9,7 @@ import xarray as xr
 from casacore import tables
 from ._tables.table_query import open_table_ro, open_query
+from ._tables.read import table_exists
 def enumerated_product(*args):
@@ -17,7 +18,7 @@ def enumerated_product(*args):
     )
-def create_partitions(in_file: str, partition_scheme: list):
+def create_partitions(in_file: str, partition_scheme: list) -> list[dict]:
     """Create a list of dictionaries with the partition information.
     Parameters
@@ -37,8 +38,6 @@ def create_partitions(in_file: str, partition_scheme: list):
     # vla_otf (bool, optional):  The partioning of VLA OTF (on the fly) mosaics needs a special partitioning scheme. Defaults to False.
     # Create partition table
-    from casacore import tables
-    import numpy as np
     import pandas as pd
     import os
@@ -67,7 +66,7 @@ def create_partitions(in_file: str, partition_scheme: list):
     #     par_df["FIELD_NAME"] = np.array(field_tb.getcol("NAME"))[par_df["FIELD_ID"]]
     # Get source ids if available from source table.
-    if os.path.isdir(os.path.join(os.path.join(in_file, "SOURCE"))):
+    if table_exists(os.path.join(os.path.join(in_file, "SOURCE"))):
         source_tb = tables.table(
             os.path.join(in_file, "SOURCE"),
             readonly=True,
@@ -82,7 +81,7 @@ def create_partitions(in_file: str, partition_scheme: list):
             #     ]
     # Get intents and subscan numbers if available from state table.
-    if os.path.isdir(os.path.join(in_file, "STATE")):
+    if table_exists(os.path.join(in_file, "STATE")):
         state_tb = tables.table(
             os.path.join(in_file, "STATE"),
             readonly=True,

{xradio-0.0.44 → xradio-0.0.45}/src/xradio/measurement_set/convert_msv2_to_processing_set.py RENAMED Viewed

@@ -5,7 +5,47 @@ from typing import Dict, Union
 import dask
 from xradio.measurement_set._utils._msv2.partition_queries import create_partitions
-from xradio.measurement_set._utils._msv2.conversion import convert_and_write_partition
+from xradio.measurement_set._utils._msv2.conversion import (
+    convert_and_write_partition,
+    estimate_memory_and_cores_for_partitions,
+)
+def estimate_conversion_memory_and_cores(
+    in_file: str,
+    partition_scheme: list = ["FIELD_ID"],
+) -> tuple[float, int, int]:
+    """
+    Given an MSv2 and a partition_scheme to use when converting it to MSv4,
+    estimates:
+    - memory (in the sense of the amount expected to be enough to convert)
+    - cores (in the sense of the recommended/optimal number of cores to use to convert)
+    Note: this function does not currently try to estimate the memory required for
+    sub-xdss such as pointing_xds and system_calibration_xds, instead it uses a small
+    percentage of the main_xds to account for them. This can lead to underestimation
+    especially for MSv2s with small partitions but large pointing or syscal tables.
+    This should not typically be a concern for sufficiently large partitions
+    (a few or 10s, 100s of GiBs).
+    Parameters
+    ----------
+    in_file: str
+        Input MS name.
+    partition_scheme: list
+        Partition scheme as used in the function convert_msv2_to_processing_set()
+    Returns
+    ----------
+    tuple
+        estimated maximum memory required for one partition,
+        maximum number of cores it makes sense to use (number of partitions),
+        suggested number of cores to use (maximum/4 as a rule of thumb)
+    """
+    partitions = create_partitions(in_file, partition_scheme=partition_scheme)
+    return estimate_memory_and_cores_for_partitions(in_file, partitions)
 def convert_msv2_to_processing_set(

{xradio-0.0.44 → xradio-0.0.45/src/xradio.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: xradio
-Version: 0.0.44
+Version: 0.0.45
 Summary:  Xarray Radio Astronomy Data IO
 Author-email: Jan-Willem Steeb <jsteeb@nrao.edu>
 License: BSD 3-Clause License

{xradio-0.0.44 → xradio-0.0.45}/src/xradio.egg-info/SOURCES.txt RENAMED Viewed

@@ -10,6 +10,7 @@ src/xradio.egg-info/requires.txt
 src/xradio.egg-info/top_level.txt
 src/xradio/_utils/__init__.py
 src/xradio/_utils/coord_math.py
+src/xradio/_utils/dict_helpers.py
 src/xradio/_utils/list_and_array.py
 src/xradio/_utils/schema.py
 src/xradio/_utils/_casacore/tables.py