PyPI - xradio - Versions diffs - 0.0.55__py3-none-any.whl → 0.0.58__py3-none-any.whl - Mend

xradio 0.0.55py3-none-any.whl → 0.0.58py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (64) hide show

xradio/__init__.py +2 -2
xradio/_utils/_casacore/casacore_from_casatools.py +1001 -0
xradio/_utils/_casacore/tables.py +6 -1
xradio/_utils/coord_math.py +22 -23
xradio/_utils/dict_helpers.py +76 -11
xradio/_utils/schema.py +5 -2
xradio/_utils/zarr/common.py +1 -73
xradio/image/_util/_casacore/common.py +11 -3
xradio/image/_util/_casacore/xds_from_casacore.py +59 -35
xradio/image/_util/_casacore/xds_to_casacore.py +47 -16
xradio/image/_util/_fits/xds_from_fits.py +172 -77
xradio/image/_util/casacore.py +9 -4
xradio/image/_util/common.py +4 -4
xradio/image/_util/image_factory.py +8 -8
xradio/image/image.py +45 -5
xradio/measurement_set/__init__.py +19 -9
xradio/measurement_set/_utils/__init__.py +1 -3
xradio/measurement_set/_utils/_msv2/__init__.py +0 -0
xradio/measurement_set/_utils/_msv2/_tables/read.py +35 -90
xradio/measurement_set/_utils/_msv2/_tables/read_main_table.py +6 -686
xradio/measurement_set/_utils/_msv2/_tables/table_query.py +13 -3
xradio/measurement_set/_utils/_msv2/conversion.py +129 -145
xradio/measurement_set/_utils/_msv2/create_antenna_xds.py +9 -16
xradio/measurement_set/_utils/_msv2/create_field_and_source_xds.py +125 -221
xradio/measurement_set/_utils/_msv2/msv2_to_msv4_meta.py +1 -2
xradio/measurement_set/_utils/_msv2/msv4_info_dicts.py +13 -8
xradio/measurement_set/_utils/_msv2/msv4_sub_xdss.py +27 -72
xradio/measurement_set/_utils/_msv2/partition_queries.py +5 -262
xradio/measurement_set/_utils/_msv2/subtables.py +0 -107
xradio/measurement_set/_utils/_utils/interpolate.py +60 -0
xradio/measurement_set/_utils/_zarr/encoding.py +2 -7
xradio/measurement_set/convert_msv2_to_processing_set.py +0 -2
xradio/measurement_set/load_processing_set.py +2 -2
xradio/measurement_set/measurement_set_xdt.py +14 -14
xradio/measurement_set/open_processing_set.py +1 -3
xradio/measurement_set/processing_set_xdt.py +41 -835
xradio/measurement_set/schema.py +96 -123
xradio/schema/check.py +91 -97
xradio/schema/dataclass.py +159 -22
xradio/schema/export.py +99 -0
xradio/schema/metamodel.py +51 -16
xradio/schema/typing.py +5 -5
{xradio-0.0.55.dist-info → xradio-0.0.58.dist-info}/METADATA +43 -11
xradio-0.0.58.dist-info/RECORD +65 -0
{xradio-0.0.55.dist-info → xradio-0.0.58.dist-info}/WHEEL +1 -1
xradio/image/_util/fits.py +0 -13
xradio/measurement_set/_utils/_msv2/_tables/load.py +0 -63
xradio/measurement_set/_utils/_msv2/_tables/load_main_table.py +0 -487
xradio/measurement_set/_utils/_msv2/_tables/read_subtables.py +0 -395
xradio/measurement_set/_utils/_msv2/_tables/write.py +0 -320
xradio/measurement_set/_utils/_msv2/_tables/write_exp_api.py +0 -385
xradio/measurement_set/_utils/_msv2/chunks.py +0 -115
xradio/measurement_set/_utils/_msv2/descr.py +0 -165
xradio/measurement_set/_utils/_msv2/msv2_msv3.py +0 -7
xradio/measurement_set/_utils/_msv2/partitions.py +0 -392
xradio/measurement_set/_utils/_utils/cds.py +0 -40
xradio/measurement_set/_utils/_utils/xds_helper.py +0 -404
xradio/measurement_set/_utils/_zarr/read.py +0 -263
xradio/measurement_set/_utils/_zarr/write.py +0 -329
xradio/measurement_set/_utils/msv2.py +0 -106
xradio/measurement_set/_utils/zarr.py +0 -133
xradio-0.0.55.dist-info/RECORD +0 -77
{xradio-0.0.55.dist-info → xradio-0.0.58.dist-info}/licenses/LICENSE.txt +0 -0
{xradio-0.0.55.dist-info → xradio-0.0.58.dist-info}/top_level.txt +0 -0

xradio/measurement_set/_utils/_msv2/_tables/load_main_table.py DELETED Viewed

@@ -1,487 +0,0 @@
-import toolviper.utils.logger as logger
-from typing import Dict, List, Tuple, Union
-import pandas as pd
-import numpy as np
-import xarray as xr
-from casacore import tables
-from .load import load_col_chunk
-from .read_main_table import get_partition_ids, redim_id_data_vars, rename_vars
-from .read import add_units_measures, convert_casacore_time, extract_table_attributes
-from .write import revert_time
-from .table_query import open_query, open_table_ro
-from xradio.measurement_set._utils._ms._tables.read_main_table import (
-    get_baselines,
-    get_baseline_indices,
-)
-from xradio._utils.list_and_array import unique_1d
-def load_expanded_main_table_chunk(
-    infile: str,
-    ddi: int,
-    chunk: Dict[str, slice],
-    ignore_msv2_cols: Union[list, None] = None,
-) -> xr.Dataset:
-    """
-    Load a chunk of data from main table into memory, with expanded
-    dims: (time, baseline, freq, pols)
-    Parameters
-    ----------
-    infile : str
-        Input MS path
-    ddi : int
-        DDI to load chunk from
-    chunk : Dict[str, slice]
-        specification of chunk to load
-    ignore_msv2_cols : Union[list, None] (Default value = None)
-        cols that should not be loaded (deprecated MSv2 or similar)
-    Returns
-    -------
-    xr.Dataset
-        Xarray datasets with chunk of visibility data, one per DDI (spw_id, pol_setup_id) pair
-    """
-    taql_where = f"where DATA_DESC_ID = {ddi}"
-    taql_ddi = f"select * from $mtable {taql_where}"
-    with open_table_ro(infile) as mtable:
-        with open_query(mtable, taql_ddi) as tb_tool:
-            if tb_tool.nrows() == 0:
-                return xr.Dataset()
-            xds, part_ids, attrs = load_expanded_ddi_chunk(
-                infile, tb_tool, taql_where, chunk, ignore_msv2_cols
-            )
-    return xds, part_ids, attrs
-def load_expanded_ddi_chunk(
-    infile: str,
-    tb_tool: tables.table,
-    taql_pre: str,
-    chunk: Dict[str, slice],
-    ignore_msv2_cols: Union[list, None] = None,
-) -> xr.Dataset:
-    """
-    Helper function to effectively load the chunk and produce an
-    xr.Dataset from a DII once the table and initial query(ies) have
-    been opened successfully.
-    Parameters
-    ----------
-    infile : str
-        Input MS path
-    tb_tool : tables.table
-        table query contrained to one DDI and chunk time range
-    taql_pre : str
-        TaQL query used for tb_tool, with some pre-selection of rows and columns
-    chunk : Dict[str, slice]
-        specification of data chunk to load
-    ignore_msv2_cols : Union[list, None] (Default value = None)
-        propagated from calling funtions
-    Returns
-    -------
-    xr.Dataset
-        An Xarray dataset with data variables as plain numpy
-        arrays loaded directly from the MS columns
-    """
-    # read the specified chunk of data, figure out indices and lens
-    utimes, times = get_chunk_times(taql_pre, chunk)
-    baselines, blines = get_chunk_baselines(tb_tool, chunk)
-    tidxs, bidxs, didxs, taql_where_chunk = get_chunk_data_indices(
-        taql_pre, chunk, utimes, times, baselines, blines
-    )
-    ctlen = min(len(utimes), times[1] - times[0] + 1)
-    cblen = min(len(baselines), blines[1] - blines[0] + 1)
-    mvars = load_ddi_cols_chunk(
-        ctlen, cblen, tidxs, bidxs, didxs, tb_tool, chunk, ignore_msv2_cols
-    )
-    mcoords = {
-        "time": xr.DataArray(convert_casacore_time(utimes[:ctlen]), dims=["time"]),
-        "baseline": xr.DataArray(np.arange(cblen), dims=["baseline"]),
-    }
-    # add xds global attributes
-    cc_attrs = extract_table_attributes(infile)
-    attrs = {"other": {"msv2": {"ctds_attrs": cc_attrs, "bad_cols": ignore_msv2_cols}}}
-    # add per data var attributes
-    mvars = add_units_measures(mvars, cc_attrs)
-    mcoords = add_units_measures(mcoords, cc_attrs)
-    mvars = rename_vars(mvars)
-    mvars = redim_id_data_vars(mvars)
-    xds = xr.Dataset(mvars, coords=mcoords)
-    part_ids = get_partition_ids(tb_tool, taql_where_chunk)
-    # needs an ~equivalent to add_partition_attrs?
-    return xds, part_ids, attrs
-def load_ddi_cols_chunk(
-    ctlen: int,
-    cblen: int,
-    tidxs: np.ndarray,
-    bidxs: np.ndarray,
-    didxs: np.ndarray,
-    tb_tool: tables.table,
-    chunk: Dict[str, slice],
-    ignore_msv2_cols: Union[list, None] = None,
-) -> Dict[str, np.ndarray]:
-    """
-    For a given chunk and DDI, load all the MSv2 columns
-    Parameters
-    ----------
-    ctlen : int
-        length of the time axis/dim of the chunk
-    cblen : int
-        length of the baseline axis of the chunk
-    tidxs : np.ndarray
-        time axis indices
-    bidxs : np.ndarray
-        baseline axis indices
-    didxs : np.ndarray
-        (effective) data indices, excluding missing baselines
-    tb_tool : tables.table
-        a table/TaQL query open and being used to load columns
-    chunk : Dict[str, slice]
-        data chunk specification
-    ignore_msv2_cols : Union[list, None] (Default value = None)
-        propagated from calling funtions
-    Returns
-    -------
-    Dict[str, np.ndarray]
-        columns loaded into memory as np arrays
-    """
-    cols = tb_tool.colnames()
-    cshapes = [
-        np.array(tb_tool.getcell(col, 0)).shape
-        for col in cols
-        if tb_tool.iscelldefined(col, 0)
-    ]
-    # Assumes shapes are consistent across columns - MSv2
-    chan_cnt, pol_cnt = [(csh[0], csh[1]) for csh in cshapes if len(csh) == 2][0]
-    dims = ["time", "baseline", "freq", "pol"]
-    mvars = {}
-    # loop over each column and load data
-    for col in cols:
-        if (col in ignore_msv2_cols + ["TIME"]) or not tb_tool.iscelldefined(col, 0):
-            continue
-        cdata = tb_tool.getcol(col, 0, 1)[0]
-        cell_shape = cdata.shape
-        if len(cell_shape) == 0:
-            col_dims = dims[:2]
-            mvars[col] = xr.DataArray(
-                load_col_chunk(
-                    tb_tool, col, (ctlen, cblen), tidxs, bidxs, didxs, None, None
-                ),
-                dims=col_dims,
-            )
-        elif col == "UVW":
-            col_dims = dims[:2] + ["uvw_coords"]
-            mvars[col] = xr.DataArray(
-                load_col_chunk(
-                    tb_tool, col, (ctlen, cblen, 3), tidxs, bidxs, didxs, None, None
-                ),
-                dims=col_dims,
-            )
-        elif len(cell_shape) == 1:
-            pols, col_dims = get_col_1d_pols(cell_shape, dims, chan_cnt, pol_cnt, chunk)
-            cshape = (ctlen, cblen) + (pols[1] - pols[0] + 1,)
-            mvars[col] = xr.DataArray(
-                load_col_chunk(tb_tool, col, cshape, tidxs, bidxs, didxs, pols, None),
-                dims=col_dims,
-            )
-        elif len(cell_shape) == 2:
-            chans, pols = get_col_2d_chans_pols(cell_shape, chan_cnt, pol_cnt, chunk)
-            cshape = (ctlen, cblen) + (chans[1] - chans[0] + 1, pols[1] - pols[0] + 1)
-            col_dims = dims
-            mvars[col] = xr.DataArray(
-                load_col_chunk(tb_tool, col, cshape, tidxs, bidxs, didxs, chans, pols),
-                dims=col_dims,
-            )
-    return mvars
-def get_chunk_times(
-    taql_pre: str, chunk: Dict[str, slice]
-) -> Tuple[np.ndarray, Tuple[int, int]]:
-    """
-    Produces time col/axis related values for a chunk: unique times,
-    start/stop times.
-    Parameters
-    ----------
-    taql_pre : str
-        TaQL query used for tb_tool, with some pre-selection
-        of rows and columns.
-    chunk : Dict[str, slice]
-        specification of data chunk to load
-    Returns
-    -------
-    Tuple[np.ndarray, Tuple[int, int]]
-        array of unique times + (firsr, last) time in the chunk
-    """
-    taql_utimes = f"select DISTINCT TIME from $mtable {taql_pre}"
-    with open_query(None, taql_utimes) as query_utimes:
-        utimes = unique_1d(query_utimes.getcol("TIME", 0, -1))
-        # add a tol around the time ranges returned by taql
-        if len(utimes) < 2:
-            tol = 1e-5
-        else:
-            tol = np.diff(utimes).min() / 4
-    if "time" in chunk:
-        time_slice = chunk["time"]
-        if (
-            type(time_slice.start) == pd.Timestamp
-            and type(time_slice.stop) == pd.Timestamp
-        ):
-            times = (
-                revert_time(time_slice.start) - tol,
-                revert_time(time_slice.stop) + tol,
-            )
-        elif (
-            int(time_slice.start) == time_slice.start
-            and int(time_slice.stop) == time_slice.stop
-        ):
-            # could be operator.index(time_slice.start):
-            nutimes = len(utimes)
-            times = (
-                min(nutimes, int(time_slice.start)),
-                min(nutimes, int(time_slice.stop)) - 1,
-            )
-        else:
-            raise ValueError(
-                f"Invalid time type. Not a timestamp and Cannot use as"
-                f" index: {time_slice.start} (type: {type(time_slice.start)})"
-            )
-    else:
-        times = (utimes[0], utimes[-1])
-    return utimes, times
-def get_chunk_baselines(
-    tb_tool: tables.table, chunk: Dict[str, slice]
-) -> Tuple[np.ndarray, Tuple[int, int]]:
-    """
-    Produces the basline col/axis related values for a chunk: an array of
-    baselines and the start/stop baseline indices.
-    Parameters
-    ----------
-    tb_tool : tables.table
-        table/query opened with prev selections (time)
-    chunk : Dict[str, slice]
-        specification of data chunk to load
-    Returns
-    -------
-    Tuple[np.ndarray, Tuple[int, int]]
-        array of baselines + (first, last) baseline in the chunk
-    """
-    baselines = get_baselines(tb_tool)
-    if "baseline" in chunk:
-        baseline_chunk = chunk["baseline"]
-        baseline_boundaries = (int(baseline_chunk.start), int(baseline_chunk.stop))
-    else:
-        baseline_boundaries = (baselines[0][0], baselines[-1][0] - 1)
-    return baselines, baseline_boundaries
-def get_chunk_data_indices(
-    taql_pre: str,
-    chunk: Dict[str, slice],
-    utimes: np.ndarray,
-    times: Tuple[int, int],
-    baselines: np.ndarray,
-    blines: Tuple[int, int],
-) -> Tuple[np.ndarray, np.ndarray, np.ndarray, str]:
-    """
-    Produces indices to pass to the casacore getcol(slice) functions to load
-    the chunk of data. tidxs (time), bidxs (baseline), didxs (effective data
-    indices, considering present/absent baselines).
-    Time selection is added on top of that.
-    Parameters
-    ----------
-    taql_pre : str
-        TaQL query constraints to prepend/inject
-    chunk : Dict[str, slice]
-        specification of data chunk
-    utimes : np.ndarray
-        array of times in the chunk
-    times : Tuple[int, int]
-        start, stop time indices
-    baselines : np.ndarray
-        array of baselines inthe chunk
-    blines : Tuple[int, int]
-        start, stop baseline indices
-    Returns
-    -------
-    Tuple[np.ndarray, np.ndarray, np.ndarray]
-        indices along the time, baseline and data (time/baseline)
-        axes + the full where... string defined for this chunk
-    """
-    taql_time = f"TIME BETWEEN {utimes[times[0]]} AND {utimes[times[1]]}"
-    taql_ant = f"ANTENNA1 BETWEEN {blines[0]} and {blines[1]}"
-    taql_where_chunk = f"{taql_pre} AND {taql_time} AND {taql_ant}"
-    taql_chunk = f"select * from $mtable {taql_where_chunk}"
-    with open_query(None, taql_chunk) as query_times_ants:
-        logger.debug(
-            f"Opened chunk query, with {query_times_ants.nrows()} rows. Query: {taql_chunk}"
-        )
-        tidxs = (
-            np.searchsorted(utimes, query_times_ants.getcol("TIME", 0, -1)) - times[0]
-        )
-        ts_ant1, ts_ant2 = (
-            query_times_ants.getcol("ANTENNA1", 0, -1),
-            query_times_ants.getcol("ANTENNA2", 0, -1),
-        )
-        ts_bases = np.column_stack((ts_ant1, ts_ant2))
-        bidxs = get_baseline_indices(baselines, ts_bases) - blines[0]
-    # some antenna 2's will be out of bounds for this chunk, store rows that are in bounds
-    didxs = np.where(
-        (bidxs >= 0)
-        & (bidxs < min(blines[1] - blines[0] + 1, len(baselines) - blines[0]))
-    )[0]
-    return tidxs, bidxs, didxs, taql_where_chunk
-def get_col_1d_pols(
-    cell_shape: Tuple[int],
-    dims: List[str],
-    chan_cnt: int,
-    pol_cnt: int,
-    chunk: Dict[str, slice],
-) -> Tuple[Tuple[int, int], List[str]]:
-    """
-    For a column with 1d array values, calculate the start/stop
-    indices for the last dimension (either pol or freq).
-    It also produces the adequate dimension names.
-    Parameters
-    ----------
-    cell_shape : Tuple[int]
-        shape of the column
-    dims : List[str]
-        full list of dataset dimensions
-    chan_cnt : int
-        number of channels
-    pol_cnt : int
-        number of pols
-    chunk : Dict[str, slice]
-        data chunk specification
-    Returns
-    -------
-    Tuple[Tuple[int, int], List[str]]
-        first and last pol/freq index of the chunk, and its
-        dimension names
-    """
-    if cell_shape == chan_cnt:
-        # chan/freq
-        col_dims = dims[:2] + ["freq"]
-        if "freq" in chunk:
-            pols = (
-                min(chan_cnt, chunk["freq"].start),
-                min(chan_cnt, chunk["freq"].stop) - 1,
-            )
-        else:
-            pols = (0, cell_shape[0])
-    else:
-        # pol
-        col_dims = dims[:2] + ["pol"]
-        if "pol" in chunk:
-            pols = (
-                min(pol_cnt, chunk["pol"].start),
-                min(pol_cnt, chunk["pol"].stop) - 1,
-            )
-        else:
-            pols = (0, cell_shape[0])
-    return pols, col_dims
-def get_col_2d_chans_pols(
-    cell_shape: Tuple[int],
-    chan_cnt: int,
-    pol_cnt: int,
-    chunk: Dict[str, slice],
-) -> Tuple[Tuple[int, int], Tuple[int, int]]:
-    """
-    For a column with 2d array values (FLAG, DATA, WEIGHT_SPECTRUM,
-    etc., calculate the the start/stop indices for the last two
-    dimensions of the chunk (freq and pol).
-    The dimension names can be assumed to be the full list of dims in
-    visibilities (time, baseline, freq, pol).
-    Parameters
-    ----------
-    cell_shape : Tuple[int]
-        shape of the column
-    chan_cnt : int
-        number of channels
-    pol_cnt : int
-        number of pols
-    chunk : Dict[str, slice]
-        data chunk specification
-    Returns
-    -------
-    Tuple[Tuple[int, int], Tuple[int, int]]
-        first and last index for freq (channel) and pol axes of
-        the chunk
-    """
-    if "freq" in chunk:
-        chans = (
-            min(chan_cnt, chunk["freq"].start),
-            min(chan_cnt, chunk["freq"].stop) - 1,
-        )
-    else:
-        chans = (0, cell_shape[0])
-    if "pol" in chunk:
-        pols = (
-            min(pol_cnt, chunk["pol"].start),
-            min(pol_cnt, chunk["pol"].stop) - 1,
-        )
-    else:
-        pols = (0, cell_shape[1])
-    return chans, pols

xradio 0.0.55__py3-none-any.whl → 0.0.58__py3-none-any.whl

xradio 0.0.55py3-none-any.whl → 0.0.58py3-none-any.whl