PyPI - xradio - Versions diffs - 0.0.27__py3-none-any.whl → 0.0.29__py3-none-any.whl - Mend

xradio 0.0.27py3-none-any.whl → 0.0.29py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (51) hide show

xradio/__init__.py +5 -4
xradio/_utils/array.py +90 -0
xradio/_utils/zarr/common.py +48 -3
xradio/image/_util/_fits/xds_from_fits.py +10 -5
xradio/image/_util/_zarr/zarr_low_level.py +27 -24
xradio/image/_util/common.py +4 -1
xradio/image/_util/zarr.py +4 -1
xradio/schema/__init__.py +24 -6
xradio/schema/bases.py +440 -2
xradio/schema/check.py +96 -55
xradio/schema/dataclass.py +123 -27
xradio/schema/metamodel.py +21 -4
xradio/schema/typing.py +33 -18
xradio/vis/__init__.py +5 -2
xradio/vis/_processing_set.py +30 -9
xradio/vis/_vis_utils/_ms/_tables/create_field_and_source_xds.py +710 -0
xradio/vis/_vis_utils/_ms/_tables/load.py +23 -10
xradio/vis/_vis_utils/_ms/_tables/load_main_table.py +145 -64
xradio/vis/_vis_utils/_ms/_tables/read.py +782 -156
xradio/vis/_vis_utils/_ms/_tables/read_main_table.py +176 -45
xradio/vis/_vis_utils/_ms/_tables/read_subtables.py +79 -28
xradio/vis/_vis_utils/_ms/_tables/write.py +102 -45
xradio/vis/_vis_utils/_ms/_tables/write_exp_api.py +127 -65
xradio/vis/_vis_utils/_ms/chunks.py +58 -21
xradio/vis/_vis_utils/_ms/conversion.py +536 -67
xradio/vis/_vis_utils/_ms/descr.py +52 -20
xradio/vis/_vis_utils/_ms/msv2_to_msv4_meta.py +70 -35
xradio/vis/_vis_utils/_ms/msv4_infos.py +0 -59
xradio/vis/_vis_utils/_ms/msv4_sub_xdss.py +76 -9
xradio/vis/_vis_utils/_ms/optimised_functions.py +0 -46
xradio/vis/_vis_utils/_ms/partition_queries.py +308 -119
xradio/vis/_vis_utils/_ms/partitions.py +82 -25
xradio/vis/_vis_utils/_ms/subtables.py +32 -14
xradio/vis/_vis_utils/_utils/partition_attrs.py +30 -11
xradio/vis/_vis_utils/_utils/xds_helper.py +136 -45
xradio/vis/_vis_utils/_zarr/read.py +60 -22
xradio/vis/_vis_utils/_zarr/write.py +83 -9
xradio/vis/_vis_utils/ms.py +48 -29
xradio/vis/_vis_utils/zarr.py +44 -20
xradio/vis/convert_msv2_to_processing_set.py +106 -32
xradio/vis/load_processing_set.py +38 -61
xradio/vis/read_processing_set.py +62 -96
xradio/vis/schema.py +687 -0
xradio/vis/vis_io.py +75 -43
{xradio-0.0.27.dist-info → xradio-0.0.29.dist-info}/LICENSE.txt +6 -1
{xradio-0.0.27.dist-info → xradio-0.0.29.dist-info}/METADATA +10 -5
xradio-0.0.29.dist-info/RECORD +73 -0
{xradio-0.0.27.dist-info → xradio-0.0.29.dist-info}/WHEEL +1 -1
xradio/vis/model.py +0 -497
xradio-0.0.27.dist-info/RECORD +0 -71
{xradio-0.0.27.dist-info → xradio-0.0.29.dist-info}/top_level.txt +0 -0

xradio/vis/_vis_utils/_ms/conversion.py CHANGED Viewed

@@ -1,21 +1,26 @@
 import numcodecs
+import math
 import time
 from .._zarr.encoding import add_encoding
 from typing import Dict, Union
 import graphviper.utils.logger as logger
+import os
 import numpy as np
 import xarray as xr
-from .msv4_infos import create_field_info
+from casacore import tables
 from .msv4_sub_xdss import create_ant_xds, create_pointing_xds, create_weather_xds
+from xradio.vis._vis_utils._ms._tables.create_field_and_source_xds import (
+    create_field_and_source_xds,
+)
 from .msv2_to_msv4_meta import (
     column_description_casacore_to_msv4_measure,
     create_attribute_metadata,
     col_to_data_variable_names,
     col_dims,
 )
-from .partition_queries import create_taql_query_and_file_name
 from .subtables import subt_rename_ids
 from ._tables.table_query import open_table_ro, open_query
 from ._tables.read import (
@@ -26,28 +31,333 @@ from ._tables.read import (
 )
 from ._tables.read_main_table import get_baselines, get_baseline_indices, get_utimes_tol
 from .._utils.stokes_types import stokes_types
-from xradio.vis._vis_utils._ms.optimised_functions import unique_1d
+from xradio._utils.array import check_if_consistent, unique_1d
-def check_if_consistent(col, col_name):
-    """_summary_
+def parse_chunksize(
+    chunksize: Union[Dict, float, None], xds_type: str, xds: xr.Dataset
+) -> Dict[str, int]:
+    """
+    Parameters
+    ----------
+    chunksize : Union[Dict, float, None]
+        Desired maximum size of the chunks, either as a dict of per-dimension sizes or as
+        an amount of memory
+    xds_type : str
+        whether to use chunking logic for main or pointing datasets
+    xds : xr.Dataset
+        dataset to calculate best chunking
+    Returns
+    -------
+    Dict[str, int]
+        dictionary of chunk sizes (as dim->size)
+    """
+    if isinstance(chunksize, dict):
+        check_chunksize(chunksize, xds_type)
+    elif isinstance(chunksize, float):
+        chunksize = mem_chunksize_to_dict(chunksize, xds_type, xds)
+    elif chunksize is not None:
+        raise ValueError(
+            f"Chunk size expected as a dict or a float, got: "
+            f" {chunksize} (of type {type(chunksize)}"
+        )
+    return chunksize
+def check_chunksize(chunksize: dict, xds_type: str) -> None:
+    """
+    Rudimentary check of the chunksize parameters to catch obvious errors early before
+    more work is done.
+    """
+    # perphaps start using some TypeDict or/and validator like pydantic?
+    if xds_type == "main":
+        allowed_dims = [
+            "time",
+            "baseline_id",
+            "antenna_id",
+            "frequency",
+            "polarization",
+        ]
+    elif xds_type == "pointing":
+        allowed_dims = ["time", "antenna"]
+    msg = ""
+    for dim in chunksize.keys():
+        if dim not in allowed_dims:
+            msg += f"dimension {dim} not allowed in {xds_type} dataset:\n"
+    if msg:
+        raise ValueError(f"Wrong keys found in chunksize: {msg}")
+def mem_chunksize_to_dict(
+    chunksize: float, xds_type: str, xds: xr.Dataset
+) -> Dict[str, int]:
+    """
+    Given a desired 'chunksize' as amount of memory in GB, calculate best chunk sizes
+    for every dimension of an xds.
     Parameters
     ----------
-    col : _type_
-        _description_
-    col_name : _type_
-        _description_
+    chunksize : float
+        Desired maximum size of the chunks
+    xds_type : str
+        whether to use chunking logic for main or pointing datasets
+    xds : xr.Dataset
+        dataset to auto-calculate chunking of its dimensions
     Returns
     -------
-    _type_
-        _description_
+    Dict[str, int]
+        dictionary of chunk sizes (as dim->size)
+    """
+    if xds_type == "pointing":
+        sizes = mem_chunksize_to_dict_pointing(chunksize, xds)
+    elif xds_type == "main":
+        sizes = mem_chunksize_to_dict_main(chunksize, xds)
+    else:
+        raise RuntimeError(f"Unexpected type: {xds_type=}")
+    return sizes
+GiBYTES_TO_BYTES = 1024 * 1024 * 1024
+def mem_chunksize_to_dict_main(chunksize: float, xds: xr.Dataset) -> Dict[str, int]:
+    """
+    Checks the assumption that all polarizations can be held in memory, at least for one
+    data point (one time, one freq, one channel).
+    It presently relies on the logic of mem_chunksize_to_dict_main_balanced() to find a
+    balanced list of dimension sizes for the chunks
+    Assumes these relevant dims: (time, antenna_id/baseline_id, frequency,
+    polarization).
+    """
+    sizeof_vis = itemsize_vis_spec(xds)
+    size_all_pols = sizeof_vis * xds.sizes["polarization"]
+    if size_all_pols / GiBYTES_TO_BYTES > chunksize:
+        raise RuntimeError(
+            "Cannot calculate chunk sizes when memory bound ({chunksize}) does not even allow all polarizations in one chunk"
+        )
+    baseline_or_antenna_id = find_baseline_or_antenna_var(xds)
+    total_size = calc_used_gb(xds.sizes, baseline_or_antenna_id, sizeof_vis)
+    ratio = chunksize / total_size
+    chunked_dims = ["time", baseline_or_antenna_id, "frequency", "polarization"]
+    if ratio >= 1:
+        result = {dim: xds.sizes[dim] for dim in chunked_dims}
+        logger.debug(
+            f"{chunksize=} GiB is enough to fully hold {total_size=} GiB (for {xds.sizes=}) in memory in one chunk"
+        )
+    else:
+        xds_dim_sizes = {k: xds.sizes[k] for k in chunked_dims}
+        result = mem_chunksize_to_dict_main_balanced(
+            chunksize, xds_dim_sizes, baseline_or_antenna_id, sizeof_vis
+        )
+    return result
+def mem_chunksize_to_dict_main_balanced(
+    chunksize: float, xds_dim_sizes: dict, baseline_or_antenna_id: str, sizeof_vis: int
+) -> Dict[str, int]:
+    """
+    Assumes the ratio is <1 and all pols can fit in memory (from
+    mem_chunksize_to_dict_main()).
+    What is kept balanced is the fraction of the total size of every dimension included in a
+    chunk. For example, time: 10, baseline: 100, freq: 1000, if we can afford about 33% in
+    one chunk, the chunksize will be ~ time: 3, baseline: 33, freq: 333.
+    The polarization axis is excluded from the calculations.
+    Because this can leave a leftover (below or above the desired chunksize limit) and
+    adjustment is done to get the final memory use below but as close as possible to
+    'chunksize'. This adjustment alters the balance.
+    Parameters
+    ----------
+    chunksize : float
+        Desired maximum size of the chunks
+    xds_dim_sizes : dict
+        Dataset dimension sizes as dim_name->size
+    sizeof_vis : int
+        Size in bytes of a data point (one visibility / spectrum value)
+    Returns
+    -------
+    Dict[str, int]
+        dictionary of chunk sizes (as dim->size)
+    """
+    dim_names = [name for name in xds_dim_sizes.keys()]
+    dim_sizes = [size for size in xds_dim_sizes.values()]
+    # Fix fourth dim (polarization) to all (not free to auto-calculate)
+    free_dims_mask = np.array([True, True, True, False])
+    total_size = np.prod(dim_sizes) * sizeof_vis / GiBYTES_TO_BYTES
+    ratio = chunksize / total_size
+    dim_chunksizes = np.array(dim_sizes, dtype="int64")
+    factor = ratio ** (1 / np.sum(free_dims_mask))
+    dim_chunksizes[free_dims_mask] = np.maximum(
+        dim_chunksizes[free_dims_mask] * factor, 1
+    )
+    used = np.prod(dim_chunksizes) * sizeof_vis / GiBYTES_TO_BYTES
+    logger.debug(
+        f"Auto-calculating main chunk sizes. First order approximation {dim_chunksizes=}, used total: {used} GiB (with {chunksize=} GiB)"
+    )
+    # Iterate through the dims, starting from the dims with lower chunk size
+    #  (=bigger impact of a +1)
+    # Note the use of math.floor, this iteration can either increase or decrease sizes,
+    #  if increasing sizes we want to keep mem use below the upper limit, floor(2.3) = +2
+    #  if decreasing sizes we want to take mem use below the upper limit, floor(-2.3) = -3
+    indices = np.argsort(dim_chunksizes[free_dims_mask])
+    for idx in indices:
+        left = chunksize - used
+        other_dims_mask = np.ones(free_dims_mask.shape, dtype=bool)
+        other_dims_mask[idx] = False
+        delta = np.divide(
+            left,
+            np.prod(dim_chunksizes[other_dims_mask]) * sizeof_vis / GiBYTES_TO_BYTES,
+        )
+        int_delta = np.floor(delta)
+        if abs(int_delta) > 0 and int_delta + dim_chunksizes[idx] > 0:
+            dim_chunksizes[idx] += int_delta
+        used = np.prod(dim_chunksizes) * sizeof_vis / GiBYTES_TO_BYTES
+    chunked_dim_names = ["time", baseline_or_antenna_id, "frequency", "polarization"]
+    dim_chunksizes_int = [int(v) for v in dim_chunksizes]
+    result = dict(zip(chunked_dim_names, dim_chunksizes_int))
+    logger.debug(
+        f"Auto-calculated main chunk sizes with {chunksize=}, {total_size=} GiB (for {dim_sizes=}): {result=} which uses {used} GiB."
+    )
+    return result
+def mem_chunksize_to_dict_pointing(chunksize: float, xds: xr.Dataset) -> Dict[str, int]:
+    """
+    Equivalent to mem_chunksize_to_dict_main adapted to pointing xdss.
+    Assumes these relevant dims: (time, antenna, direction).
+    """
+    if not xds.sizes:
+        return {}
+    sizeof_pointing = itemsize_pointing_spec(xds)
+    chunked_dim_names = [name for name in xds.sizes.keys()]
+    dim_sizes = [size for size in xds.sizes.values()]
+    total_size = np.prod(dim_sizes) * sizeof_pointing / GiBYTES_TO_BYTES
+    # Fix third dim (direction) to all
+    free_dims_mask = np.array([True, True, False])
+    ratio = chunksize / total_size
+    if ratio >= 1:
+        logger.debug(
+            f"Pointing chunsize: {chunksize=} GiB is enough to fully hold {total_size=} GiB (for {xds.sizes=}) in memory in one chunk"
+        )
+        dim_chunksizes = dim_sizes
+    else:
+        # balanced
+        dim_chunksizes = np.array(dim_sizes, dtype="int")
+        factor = ratio ** (1 / np.sum(free_dims_mask))
+        dim_chunksizes[free_dims_mask] = np.maximum(
+            dim_chunksizes[free_dims_mask] * factor, 1
+        )
+        used = np.prod(dim_chunksizes) * sizeof_pointing / GiBYTES_TO_BYTES
+        logger.debug(
+            f"Auto-calculating pointing chunk sizes. First order approximation: {dim_chunksizes=}, used total: {used=} GiB (with {chunksize=} GiB"
+        )
+        indices = np.argsort(dim_chunksizes[free_dims_mask])
+        # refine dim_chunksizes
+        for idx in indices:
+            left = chunksize - used
+            other_dims_mask = np.ones(free_dims_mask.shape, dtype=bool)
+            other_dims_mask[idx] = False
+            delta = np.divide(
+                left,
+                np.prod(dim_chunksizes[other_dims_mask])
+                * sizeof_pointing
+                / GiBYTES_TO_BYTES,
+            )
+            int_delta = np.floor(delta)
+            if abs(int_delta) > 0 and int_delta + dim_chunksizes[idx] > 0:
+                dim_chunksizes[idx] += int_delta
+            used = np.prod(dim_chunksizes) * sizeof_pointing / GiBYTES_TO_BYTES
+    dim_chunksizes_int = [int(v) for v in dim_chunksizes]
+    result = dict(zip(chunked_dim_names, dim_chunksizes_int))
+    if ratio < 1:
+        logger.debug(
+            f"Auto-calculated pointing chunk sizes with {chunksize=}, {total_size=} GiB (for {xds.sizes=}): {result=} which uses {used} GiB."
+        )
+    return result
+def find_baseline_or_antenna_var(xds: xr.Dataset) -> str:
+    if "baseline_id" in xds.coords:
+        baseline_or_antenna_id = "baseline_id"
+    elif "antenna_id" in xds.coords:
+        baseline_or_antenna_id = "antenna_id"
+    return baseline_or_antenna_id
+def itemsize_vis_spec(xds: xr.Dataset) -> int:
     """
+    Size in bytes of one visibility (or spectrum) value.
+    """
+    names = ["SPECTRUM", "VISIBILITY"]
+    itemsize = 8
+    for var in names:
+        if var in xds.data_vars:
+            var_name = var
+            itemsize = np.dtype(xds.data_vars[var_name].dtype).itemsize
+            break
+    return itemsize
-    col_unique = unique_1d(col)
-    assert len(col_unique) == 1, col_name + " is not consistent."
-    return col_unique[0]
+def itemsize_pointing_spec(xds: xr.Dataset) -> int:
+    """
+    Size in bytes of one pointing (or spectrum) value.
+    """
+    pnames = ["BEAM_POINTING"]
+    itemsize = 8
+    for var in pnames:
+        if var in xds.data_vars:
+            var_name = var
+            itemsize = np.dtype(xds.data_vars[var_name].dtype).itemsize
+            break
+    return itemsize
+def calc_used_gb(
+    chunksizes: dict, baseline_or_antenna_id: str, sizeof_vis: int
+) -> float:
+    return (
+        chunksizes["time"]
+        * chunksizes[baseline_or_antenna_id]
+        * chunksizes["frequency"]
+        * chunksizes["polarization"]
+        * sizeof_vis
+        / GiBYTES_TO_BYTES
+    )
 # TODO: if the didxs are not used in read_col_conversion, remove didxs from here (and convert_and_write_partition)
@@ -103,15 +413,15 @@ def create_coordinates(
     ddi_xds = read_generic_table(in_file, "DATA_DESCRIPTION").sel(row=ddi)
     pol_setup_id = ddi_xds.polarization_id.values
-    spw_id = ddi_xds.spectral_window_id.values
+    spectral_window_id = int(ddi_xds.spectral_window_id.values)
-    spw_xds = read_generic_table(
+    spectral_window_xds = read_generic_table(
         in_file,
         "SPECTRAL_WINDOW",
         rename_ids=subt_rename_ids["SPECTRAL_WINDOW"],
-    ).sel(spectral_window_id=spw_id)
-    coords["frequency"] = spw_xds["chan_freq"].data[
-        ~(np.isnan(spw_xds["chan_freq"].data))
+    ).sel(spectral_window_id=spectral_window_id)
+    coords["frequency"] = spectral_window_xds["chan_freq"].data[
+        ~(np.isnan(spectral_window_xds["chan_freq"].data))
     ]
     pol_xds = read_generic_table(
@@ -127,25 +437,27 @@ def create_coordinates(
     xds = xds.assign_coords(coords)
     ###### Create Frequency Coordinate ######
-    freq_column_description = spw_xds.attrs["other"]["msv2"]["ctds_attrs"][
+    freq_column_description = spectral_window_xds.attrs["other"]["msv2"]["ctds_attrs"][
         "column_descriptions"
     ]
     msv4_measure = column_description_casacore_to_msv4_measure(
-        freq_column_description["CHAN_FREQ"], ref_code=spw_xds["meas_freq_ref"].data
+        freq_column_description["CHAN_FREQ"],
+        ref_code=spectral_window_xds["meas_freq_ref"].data,
     )
     xds.frequency.attrs.update(msv4_measure)
-    xds.frequency.attrs["spectral_window_name"] = str(spw_xds.name.values)
+    xds.frequency.attrs["spectral_window_name"] = str(spectral_window_xds.name.values)
     msv4_measure = column_description_casacore_to_msv4_measure(
-        freq_column_description["REF_FREQUENCY"], ref_code=spw_xds["meas_freq_ref"].data
+        freq_column_description["REF_FREQUENCY"],
+        ref_code=spectral_window_xds["meas_freq_ref"].data,
     )
     xds.frequency.attrs["reference_frequency"] = {
-        "dims": "",
-        "data": float(spw_xds.ref_frequency.values),
+        "dims": [],
+        "data": float(spectral_window_xds.ref_frequency.values),
         "attrs": msv4_measure,
     }
-    xds.frequency.attrs["spw_id"] = spw_id
+    xds.frequency.attrs["spectral_window_id"] = spectral_window_id
     # xds.frequency.attrs["effective_channel_width"] = "EFFECTIVE_CHANNEL_WIDTH"
     # Add if doppler table is present
@@ -153,20 +465,23 @@ def create_coordinates(
     # xds.frequency.attrs["doppler_type"] =
     unique_chan_width = unique_1d(
-        spw_xds.chan_width.data[np.logical_not(np.isnan(spw_xds.chan_width.data))]
+        spectral_window_xds.chan_width.data[
+            np.logical_not(np.isnan(spectral_window_xds.chan_width.data))
+        ]
     )
-    # assert len(unique_chan_width) == 1, "Channel width varies for spw."
-    # xds.frequency.attrs["channel_width"] = spw_xds.chan_width.data[
-    #    ~(np.isnan(spw_xds.chan_width.data))
+    # assert len(unique_chan_width) == 1, "Channel width varies for spectral_window."
+    # xds.frequency.attrs["channel_width"] = spectral_window_xds.chan_width.data[
+    #    ~(np.isnan(spectral_window_xds.chan_width.data))
     # ]  # unique_chan_width[0]
     msv4_measure = column_description_casacore_to_msv4_measure(
-        freq_column_description["CHAN_WIDTH"], ref_code=spw_xds["meas_freq_ref"].data
+        freq_column_description["CHAN_WIDTH"],
+        ref_code=spectral_window_xds["meas_freq_ref"].data,
     )
     if not msv4_measure:
         msv4_measure["type"] = "quantity"
         msv4_measure["units"] = ["Hz"]
     xds.frequency.attrs["channel_width"] = {
-        "dims": "",
+        "dims": [],
         "data": np.abs(unique_chan_width[0]),
         "attrs": msv4_measure,
     }
@@ -186,7 +501,7 @@ def create_coordinates(
         msv4_measure["type"] = "quantity"
         msv4_measure["units"] = ["s"]
     xds.time.attrs["integration_time"] = {
-        "dims": "",
+        "dims": [],
         "data": interval,
         "attrs": msv4_measure,
     }
@@ -194,6 +509,34 @@ def create_coordinates(
     return xds
+def find_min_max_times(tb_tool: tables.table, taql_where: str) -> tuple:
+    """
+    Find the min/max times in an MSv4, for constraining pointing.
+    To avoid numerical comparison issues (leaving out some times at the edges),
+    it substracts/adds a tolerance from/to the min and max values. The tolerance
+    is a fraction of the difference between times / interval of the MS (see
+    get_utimes_tol()).
+    Parameters
+    ----------
+    tb_tool : tables.table
+        table (query) opened with an MSv4 query
+    taql_where : str
+        TaQL where that defines the partition of this MSv4
+    Returns
+    -------
+    tuple
+        min/max times (raw time values from the Msv2 table)
+    """
+    utimes, tol = get_utimes_tol(tb_tool, taql_where)
+    time_min = utimes.min() - tol
+    time_max = utimes.max() + tol
+    return (time_min, time_max)
 def create_data_variables(
     in_file, xds, tb_tool, time_baseline_shape, tidxs, bidxs, didxs
 ):
@@ -242,6 +585,7 @@ def create_data_variables(
                     )
             except:
                 # logger.debug("Could not load column",col)
+                # print("Could not load column", col)
                 continue
             xds[col_to_data_variable_names[col]].attrs.update(
@@ -249,15 +593,38 @@ def create_data_variables(
             )
+def create_taql_query(partition_info):
+    main_par_table_cols = [
+        "DATA_DESC_ID",
+        "STATE_ID",
+        "FIELD_ID",
+        "SCAN_NUMBER",
+        "STATE_ID",
+    ]
+    taql_where = "WHERE "
+    for col_name in main_par_table_cols:
+        if col_name in partition_info:
+            taql_where = (
+                taql_where
+                + f"({col_name} IN [{','.join(map(str, partition_info[col_name]))}]) AND"
+            )
+    taql_where = taql_where[:-3]
+    return taql_where
 def convert_and_write_partition(
     in_file: str,
     out_file: str,
-    intent: str,
-    ddi: int = 0,
-    state_ids=None,
-    field_id: int = None,
-    ignore_msv2_cols: Union[list, None] = None,
-    main_chunksize: Union[Dict, None] = None,
+    ms_v4_id: int,
+    partition_info: Dict,
+    partition_scheme: str = "ddi_intent_field",
+    main_chunksize: Union[Dict, float, None] = None,
+    with_pointing: bool = True,
+    pointing_chunksize: Union[Dict, float, None] = None,
+    pointing_interpolate: bool = False,
+    ephemeris_interpolate: bool = False,
     compressor: numcodecs.abc.Codec = numcodecs.Zstd(level=2),
     storage_backend="zarr",
     overwrite: bool = False,
@@ -278,9 +645,15 @@ def convert_and_write_partition(
         _description_, by default None
     field_id : int, optional
         _description_, by default None
-    ignore_msv2_cols : Union[list, None], optional
+    main_chunksize : Union[Dict, float, None], optional
         _description_, by default None
-    main_chunksize : Union[Dict, None], optional
+    with_pointing: bool, optional
+        _description_, by default True
+    pointing_chunksize : Union[Dict, float, None], optional
+        _description_, by default None
+    pointing_interpolate : bool, optional
+        _description_, by default None
+    ephemeris_interpolate : bool, optional
         _description_, by default None
     compressor : numcodecs.abc.Codec, optional
         _description_, by default numcodecs.Zstd(level=2)
@@ -294,17 +667,16 @@ def convert_and_write_partition(
     _type_
         _description_
     """
-    if ignore_msv2_cols is None:
-        ignore_msv2_cols = []
-    taql_where, file_name = create_taql_query_and_file_name(
-        out_file, intent, state_ids, field_id, ddi
-    )
+    taql_where = create_taql_query(partition_info)
+    ddi = partition_info["DATA_DESC_ID"][0]
+    intent = str(partition_info["INTENT"][0])
     start = time.time()
     with open_table_ro(in_file) as mtable:
         taql_main = f"select * from $mtable {taql_where}"
         with open_query(mtable, taql_main) as tb_tool:
             if tb_tool.nrows() == 0:
                 tb_tool.close()
                 mtable.close()
@@ -329,7 +701,7 @@ def convert_and_write_partition(
             interval_unique = unique_1d(interval)
             if len(interval_unique) > 1:
-                print(
+                logger.debug(
                     "Integration time (interval) not consitent in partition, using median."
                 )
                 interval = np.median(interval)
@@ -347,12 +719,6 @@ def convert_and_write_partition(
             )
             logger.debug("Time create data variables " + str(time.time() - start))
-            # Create field_info
-            start = time.time()
-            field_id = check_if_consistent(tb_tool.getcol("FIELD_ID"), "FIELD_ID")
-            field_info = create_field_info(in_file, field_id)
-            logger.debug("Time field info " + str(time.time() - start))
             # Create ant_xds
             start = time.time()
             ant_xds = create_ant_xds(in_file)
@@ -363,14 +729,30 @@ def convert_and_write_partition(
             weather_xds = create_weather_xds(in_file)
             logger.debug("Time weather " + str(time.time() - start))
-            start = time.time()
-            pointing_xds = create_pointing_xds(in_file)
-            logger.debug("Time pointing " + str(time.time() - start))
+            # To constrain the time range to load (in pointing, ephemerides data_vars)
+            time_min_max = find_min_max_times(tb_tool, taql_where)
+            if with_pointing:
+                start = time.time()
+                if pointing_interpolate:
+                    pointing_interp_time = xds.time
+                else:
+                    pointing_interp_time = None
+                pointing_xds = create_pointing_xds(
+                    in_file, time_min_max, pointing_interp_time
+                )
+                pointing_chunksize = parse_chunksize(
+                    pointing_chunksize, "pointing", pointing_xds
+                )
+                add_encoding(
+                    pointing_xds, compressor=compressor, chunks=pointing_chunksize
+                )
+                logger.debug(
+                    "Time pointing (with add compressor and chunking) "
+                    + str(time.time() - start)
+                )
             start = time.time()
-            # Fix UVW frame
-            # From CASA fixvis docs: clean and the im tool ignore the reference frame claimed by the UVW column (it is often mislabelled as ITRF when it is really FK5 (J2000)) and instead assume the (u, v, w)s are in the same frame as the phase tracking center. calcuvw does not yet force the UVW column and field centers to use the same reference frame! Blank = use the phase tracking frame of vis.
-            xds.UVW.attrs["frame"] = field_info["phase_direction"]["attrs"]["frame"]
             xds.attrs["intent"] = intent
             xds.attrs["ddi"] = ddi
@@ -391,7 +773,6 @@ def convert_and_write_partition(
                     "weight": "WEIGHT",
                     "uvw": "UVW",
                 }
-                xds.VISIBILITY.attrs["field_info"] = field_info
             if "VISIBILITY_CORRECTED" in xds:
                 xds.attrs["data_groups"]["corrected"] = {
@@ -400,8 +781,8 @@ def convert_and_write_partition(
                     "weight": "WEIGHT",
                     "uvw": "UVW",
                 }
-                xds.VISIBILITY_CORRECTED.attrs["field_info"] = field_info
+            is_single_dish = False
             if "SPECTRUM" in xds:
                 xds.attrs["data_groups"]["base"] = {
                     "spectrum": "SPECTRUM",
@@ -409,7 +790,7 @@ def convert_and_write_partition(
                     "weight": "WEIGHT",
                     "uvw": "UVW",
                 }
-                xds.SPECTRUM.attrs["field_info"] = field_info
+                is_single_dish = True
             if "SPECTRUM_CORRECTED" in xds:
                 xds.attrs["data_groups"]["corrected"] = {
@@ -418,23 +799,111 @@ def convert_and_write_partition(
                     "weight": "WEIGHT",
                     "uvw": "UVW",
                 }
-                xds.SPECTRUM_CORRECTED.attrs["field_info"] = field_info
+                is_single_dish = True
+            # Create field_and_source_xds (combines field, source and ephemeris data into one super dataset)
+            start = time.time()
+            if ephemeris_interpolate:
+                ephemeris_interp_time = xds.time
+            else:
+                ephemeris_interp_time = None
+            scan_id = np.full(time_baseline_shape, -42, dtype=int)
+            scan_id[tidxs, bidxs] = tb_tool.getcol("SCAN_NUMBER")
+            scan_id = np.max(scan_id, axis=1)
+            if (
+                partition_scheme == "ddi_intent_source"
+                or partition_scheme == "ddi_intent_scan"
+            ):
+                field_id = np.full(time_baseline_shape, -42, dtype=int)
+                field_id[tidxs, bidxs] = tb_tool.getcol("FIELD_ID")
+                field_id = np.max(field_id, axis=1)
+                field_times = utime
+            else:
+                field_id = check_if_consistent(tb_tool.getcol("FIELD_ID"), "FIELD_ID")
+                field_times = None
+            # col_unique = unique_1d(col)
+            # assert len(col_unique) == 1, col_name + " is not consistent."
+            # return col_unique[0]
+            field_and_source_xds = create_field_and_source_xds(
+                in_file,
+                field_id,
+                xds.frequency.attrs["spectral_window_id"],
+                field_times,
+                is_single_dish,
+                time_min_max,
+                ephemeris_interp_time,
+            )
+            logger.debug("Time field_and_source_xds " + str(time.time() - start))
+            # Fix UVW frame
+            # From CASA fixvis docs: clean and the im tool ignore the reference frame claimed by the UVW column (it is often mislabelled as ITRF when it is really FK5 (J2000)) and instead assume the (u, v, w)s are in the same frame as the phase tracking center. calcuvw does not yet force the UVW column and field centers to use the same reference frame! Blank = use the phase tracking frame of vis.
+            # print('##################',field_and_source_xds)
+            if is_single_dish:
+                xds.UVW.attrs["frame"] = field_and_source_xds[
+                    "FIELD_REFERENCE_CENTER"
+                ].attrs["frame"]
+            else:
+                xds.UVW.attrs["frame"] = field_and_source_xds[
+                    "FIELD_PHASE_CENTER"
+                ].attrs["frame"]
             if overwrite:
                 mode = "w"
             else:
                 mode = "w-"
+            main_chunksize = parse_chunksize(main_chunksize, "main", xds)
             add_encoding(xds, compressor=compressor, chunks=main_chunksize)
             logger.debug("Time add compressor and chunk " + str(time.time() - start))
+            file_name = os.path.join(
+                out_file,
+                out_file.replace(".vis.zarr", "").replace(".zarr", "").split("/")[-1]
+                + "_"
+                + str(ms_v4_id),
+            )
+            if isinstance(field_id, np.ndarray):
+                field_id = "OTF"
+            xds.attrs["partition_info"] = {
+                "spectral_window_id": xds.frequency.attrs["spectral_window_id"],
+                "spectral_window_name": xds.frequency.attrs["spectral_window_name"],
+                "field_id": field_id,
+                "field_name": field_and_source_xds.attrs["field_name"],
+                "source_id": field_and_source_xds.attrs["source_id"],
+                "source_name": field_and_source_xds.attrs["source_name"],
+                "polarization_setup": list(xds.polarization.values),
+                "intent": intent,
+                "taql": taql_where,
+            }
+            # print(xds)
             start = time.time()
             if storage_backend == "zarr":
-                xds.to_zarr(store=file_name + "/MAIN", mode=mode)
-                ant_xds.to_zarr(store=file_name + "/ANTENNA", mode=mode)
-                pointing_xds.to_zarr(store=file_name + "/POINTING", mode=mode)
+                xds.to_zarr(store=os.path.join(file_name, "MAIN"), mode=mode)
+                ant_xds.to_zarr(store=os.path.join(file_name, "ANTENNA"), mode=mode)
+                for group_name in xds.attrs["data_groups"]:
+                    field_and_source_xds.to_zarr(
+                        store=os.path.join(
+                            file_name, f"FIELD_AND_SOURCE_{group_name.upper()}"
+                        ),
+                        mode=mode,
+                    )
+                if with_pointing:
+                    pointing_xds.to_zarr(store=file_name + "/POINTING", mode=mode)
                 if weather_xds:
-                    weather_xds.to_zarr(store=file_name + "/WEATHER", mode=mode)
+                    weather_xds.to_zarr(
+                        store=os.path.join(file_name, "WEATHER"), mode=mode
+                    )
             elif storage_backend == "netcdf":
                 # xds.to_netcdf(path=file_name+"/MAIN", mode=mode) #Does not work
                 raise

xradio 0.0.27__py3-none-any.whl → 0.0.29__py3-none-any.whl

xradio 0.0.27py3-none-any.whl → 0.0.29py3-none-any.whl