PyPI - xradio - Versions diffs - 0.0.55__py3-none-any.whl → 0.0.58__py3-none-any.whl - Mend

xradio 0.0.55py3-none-any.whl → 0.0.58py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (64) hide show

xradio/__init__.py +2 -2
xradio/_utils/_casacore/casacore_from_casatools.py +1001 -0
xradio/_utils/_casacore/tables.py +6 -1
xradio/_utils/coord_math.py +22 -23
xradio/_utils/dict_helpers.py +76 -11
xradio/_utils/schema.py +5 -2
xradio/_utils/zarr/common.py +1 -73
xradio/image/_util/_casacore/common.py +11 -3
xradio/image/_util/_casacore/xds_from_casacore.py +59 -35
xradio/image/_util/_casacore/xds_to_casacore.py +47 -16
xradio/image/_util/_fits/xds_from_fits.py +172 -77
xradio/image/_util/casacore.py +9 -4
xradio/image/_util/common.py +4 -4
xradio/image/_util/image_factory.py +8 -8
xradio/image/image.py +45 -5
xradio/measurement_set/__init__.py +19 -9
xradio/measurement_set/_utils/__init__.py +1 -3
xradio/measurement_set/_utils/_msv2/__init__.py +0 -0
xradio/measurement_set/_utils/_msv2/_tables/read.py +35 -90
xradio/measurement_set/_utils/_msv2/_tables/read_main_table.py +6 -686
xradio/measurement_set/_utils/_msv2/_tables/table_query.py +13 -3
xradio/measurement_set/_utils/_msv2/conversion.py +129 -145
xradio/measurement_set/_utils/_msv2/create_antenna_xds.py +9 -16
xradio/measurement_set/_utils/_msv2/create_field_and_source_xds.py +125 -221
xradio/measurement_set/_utils/_msv2/msv2_to_msv4_meta.py +1 -2
xradio/measurement_set/_utils/_msv2/msv4_info_dicts.py +13 -8
xradio/measurement_set/_utils/_msv2/msv4_sub_xdss.py +27 -72
xradio/measurement_set/_utils/_msv2/partition_queries.py +5 -262
xradio/measurement_set/_utils/_msv2/subtables.py +0 -107
xradio/measurement_set/_utils/_utils/interpolate.py +60 -0
xradio/measurement_set/_utils/_zarr/encoding.py +2 -7
xradio/measurement_set/convert_msv2_to_processing_set.py +0 -2
xradio/measurement_set/load_processing_set.py +2 -2
xradio/measurement_set/measurement_set_xdt.py +14 -14
xradio/measurement_set/open_processing_set.py +1 -3
xradio/measurement_set/processing_set_xdt.py +41 -835
xradio/measurement_set/schema.py +96 -123
xradio/schema/check.py +91 -97
xradio/schema/dataclass.py +159 -22
xradio/schema/export.py +99 -0
xradio/schema/metamodel.py +51 -16
xradio/schema/typing.py +5 -5
{xradio-0.0.55.dist-info → xradio-0.0.58.dist-info}/METADATA +43 -11
xradio-0.0.58.dist-info/RECORD +65 -0
{xradio-0.0.55.dist-info → xradio-0.0.58.dist-info}/WHEEL +1 -1
xradio/image/_util/fits.py +0 -13
xradio/measurement_set/_utils/_msv2/_tables/load.py +0 -63
xradio/measurement_set/_utils/_msv2/_tables/load_main_table.py +0 -487
xradio/measurement_set/_utils/_msv2/_tables/read_subtables.py +0 -395
xradio/measurement_set/_utils/_msv2/_tables/write.py +0 -320
xradio/measurement_set/_utils/_msv2/_tables/write_exp_api.py +0 -385
xradio/measurement_set/_utils/_msv2/chunks.py +0 -115
xradio/measurement_set/_utils/_msv2/descr.py +0 -165
xradio/measurement_set/_utils/_msv2/msv2_msv3.py +0 -7
xradio/measurement_set/_utils/_msv2/partitions.py +0 -392
xradio/measurement_set/_utils/_utils/cds.py +0 -40
xradio/measurement_set/_utils/_utils/xds_helper.py +0 -404
xradio/measurement_set/_utils/_zarr/read.py +0 -263
xradio/measurement_set/_utils/_zarr/write.py +0 -329
xradio/measurement_set/_utils/msv2.py +0 -106
xradio/measurement_set/_utils/zarr.py +0 -133
xradio-0.0.55.dist-info/RECORD +0 -77
{xradio-0.0.55.dist-info → xradio-0.0.58.dist-info}/licenses/LICENSE.txt +0 -0
{xradio-0.0.55.dist-info → xradio-0.0.58.dist-info}/top_level.txt +0 -0

xradio/image/image.py CHANGED Viewed

@@ -15,13 +15,15 @@ import xarray as xr
 # from .._utils.zarr.common import _load_no_dask_zarr
 from ._util.casacore import _load_casa_image_block, _xds_to_casa_image
-from ._util.fits import _read_fits_image
+# from ._util.fits import _read_fits_image
 from ._util.image_factory import (
     _make_empty_aperture_image,
     _make_empty_lmuv_image,
     _make_empty_sky_image,
 )
 from ._util.zarr import _load_image_from_zarr_no_dask, _xds_from_zarr, _xds_to_zarr
+from ._util._fits.xds_from_fits import _fits_image_to_xds
 warnings.filterwarnings("ignore", category=FutureWarning)
@@ -32,12 +34,37 @@ def read_image(
     verbose: bool = False,
     do_sky_coords: bool = True,
     selection: dict = {},
+    compute_mask: bool = True,
 ) -> xr.Dataset:
     """
     Convert CASA, FITS, or zarr image to xradio image xds format
     ngCASA image spec is located at
     https://docs.google.com/spreadsheets/d/1WW0Gl6z85cJVPgtdgW4dxucurHFa06OKGjgoK8OREFA/edit#gid=1719181934
+    Notes on FITS compatibility and memory mapping:
+    This function relies on Astropy's `memmap=True` to avoid loading full image data into memory.
+    However, not all FITS files support memory-mapped reads.
+    ⚠️ The following FITS types are incompatible with memory mapping:
+    1. Compressed images (`CompImageHDU`)
+        = Workaround: decompress the FITS using tools like `funpack`, `cfitsio`,
+          or Astropy's `.scale()`/`.copy()` workflows
+    2. Some scaled images (using BSCALE/BZERO headers)
+        ✅ Supported:
+            - Files with no BSCALE/BZERO headers (or BSCALE=1.0 and BZERO=0.0)
+            - Uncompressed, unscaled primary HDUs
+        ⚠️ Unsupported: Files with BSCALE ≠ 1.0 or BZERO ≠ 0.0
+            - These require data rescaling in memory, which disables lazy access
+            - Attempting to slice such arrays forces eager read of the full dataset
+            - Workaround: remove scaling with Astropy's
+                `HDU.data = HDU.data * BSCALE + BZERO` and save a new file
+    These cases will raise `RuntimeError` to prevent silent eager loads that can exhaust memory.
+    If you encounter such an error, consider preprocessing the file to make it memory-mappable.
     Parameters
     ----------
     infile : str
@@ -69,11 +96,19 @@ def read_image(
         the selection, and the end pixel is not. An empty dictionary (the
         default) indicates that the entire image should be returned. Currently
         only supported for images stored in zarr format.
+     compute_mask : bool, optional
+        If True (default), compute and attach valid data masks when converting from FITS to xds.
+        If False, skip mask computation entirely. This may improve performance if the mask
+        is not required for subsequent processing. It may, however, result in unpredictable behavior
+        for applications that are not designed to handle missing data. It is the user's responsibility,
+        not the software's, to ensure that the mask is computed if it is necessary. Currently only
+        implemented for FITS images.
     Returns
     -------
     xarray.Dataset
     """
+    # from ._util.casacore import _read_casa_image
+    # return _read_casa_image(infile, chunks, verbose, do_sky_coords)
     emsgs = []
     do_casa = True
     try:
@@ -92,9 +127,10 @@ def read_image(
         except Exception as e:
             emsgs.append(f"image format appears not to be casacore: {e.args}")
     # next statement is for debug, comment when done debugging
-    # return _read_fits_image(infile, chunks, verbose, do_sky_coords)
+    # return _fits_image_to_xds(infile, chunks, verbose, do_sky_coords, compute_mask)
     try:
-        return _read_fits_image(infile, chunks, verbose, do_sky_coords)
+        img_full_path = os.path.expanduser(infile)
+        return _fits_image_to_xds(infile, chunks, verbose, do_sky_coords, compute_mask)
     except Exception as e:
         emsgs.append(f"image format appears not to be fits {e.args}")
     # when done debuggin comment out next line
@@ -111,7 +147,7 @@ def read_image(
     raise RuntimeError("\n".join(emsgs))
-def load_image(infile: str, block_des: dict = {}, do_sky_coords=True) -> xr.Dataset:
+def load_image(infile: str, block_des: dict = None, do_sky_coords=True) -> xr.Dataset:
     """
     Load an image or portion of an image (subimage) into memory with data variables
     being converted from dask to numpy arrays and coordinate arrays being converted
@@ -144,6 +180,10 @@ def load_image(infile: str, block_des: dict = {}, do_sky_coords=True) -> xr.Data
     """
     do_casa = True
     emsgs = []
+    if block_des is None:
+        block_des = {}
     selection = copy.deepcopy(block_des) if block_des else block_des
     if selection:
         for k, v in selection.items():

xradio/measurement_set/__init__.py CHANGED Viewed

@@ -4,13 +4,11 @@ convert, and retrieve information from Processing Set and Measurement Sets nodes
 Processing Set DataTree
 """
-from .processing_set_xdt import *
+import toolviper.utils.logger as _logger
+from .processing_set_xdt import ProcessingSetXdt
 from .open_processing_set import open_processing_set
-from .load_processing_set import load_processing_set  # , ProcessingSetIterator
-from .convert_msv2_to_processing_set import (
-    convert_msv2_to_processing_set,
-    estimate_conversion_memory_and_cores,
-)
+from .load_processing_set import load_processing_set
 from .measurement_set_xdt import MeasurementSetXdt
 from .schema import SpectrumXds, VisibilityXds
@@ -19,9 +17,21 @@ __all__ = [
     "MeasurementSetXdt",
     "open_processing_set",
     "load_processing_set",
-    "ProcessingSetIterator",
-    "convert_msv2_to_processing_set",
-    "estimate_conversion_memory_and_cores",
     "SpectrumXds",
     "VisibilityXds",
 ]
+try:
+    from .convert_msv2_to_processing_set import (
+        convert_msv2_to_processing_set,
+        estimate_conversion_memory_and_cores,
+    )
+except ModuleNotFoundError as exc:
+    _logger.warning(
+        "Could not import the function to convert from MSv2 to MSv4. "
+        f"That functionality will not be available. Details: {exc}"
+    )
+else:
+    __all__.extend(
+        ["convert_msv2_to_processing_set", "estimate_conversion_memory_and_cores"]
+    )

xradio/measurement_set/_utils/__init__.py CHANGED Viewed

@@ -1,5 +1,3 @@
-from . import msv2
-from . import zarr
 from . import _utils
-__all__ = ["msv2", "zarr", "_utils"]
+__all__ = ["_utils"]

xradio/measurement_set/_utils/_msv2/__init__.py ADDED Viewed

File without changes

xradio/measurement_set/_utils/_msv2/_tables/read.py CHANGED Viewed

@@ -10,7 +10,11 @@ import pandas as pd
 import xarray as xr
 import astropy.units
-from casacore import tables
+try:
+    from casacore import tables
+except ImportError:
+    import xradio._utils._casacore.casacore_from_casatools as tables
 from .table_query import open_query, open_table_ro, TableManager
 from xradio._utils.list_and_array import get_pad_value
@@ -42,16 +46,20 @@ def convert_casacore_time(
     rawtimes: np.ndarray, convert_to_datetime: bool = True
 ) -> np.ndarray:
     """
-    Read time columns to datetime format
-    pandas datetimes are referenced against a 0 of 1970-01-01
-    CASA's modified julian day reference time is (of course) 1858-11-17
+    Convert data from casacore time columns to a different format, either:
+    a) pandas style datetime,
+    b) simply seconds from 1970-01-01 00:00:00 UTC (as used in the Unix scale of
+       astropy).
+    Pandas datetimes and Unix times are referenced against a 0 of 1970-01-01.
+    CASA's (casacore) modified julian day reference time is (of course) 1858-11-17.
     This requires a correction of 3506716800 seconds which is hardcoded to save time
     Parameters
     ----------
     rawtimes : np.ndarray
-        times in casacore ref
+        time values wrt casacore reference
     convert_to_datetime : bool (Default value = True)
         whether to produce pandas style datetime
@@ -308,6 +316,8 @@ def add_units_measures(
                 ):  # Little fix for Meerkat data where the units are a string.
                     cc_units = [cc_units]
+                if isinstance(cc_units, np.ndarray):
+                    cc_units = cc_units.tolist()
                 if not isinstance(cc_units, list) or not cc_units:
                     logger.warning(
                         f"Invalid units found for column/variable {col}: {cc_units}"
@@ -345,70 +355,6 @@ def add_units_measures(
     return mvars
-def make_freq_attrs(spw_xds: xr.Dataset, spw_id: int) -> Dict[str, Any]:
-    """
-    Grab the units/measure metainfo for the xds.freq dimension of a
-    parttion from the SPECTRAL_WINDOW subtable CTDS attributes.
-    Has to read xds_spw.meas_freq_ref and use it as index in the CTDS
-    'VarRefCol' attrs of CHAN_FREQ and REF_FREQUENCY to give a
-    reference frame to xds_spw.ref_frequency and xds_spw.chan_freq
-    (then the ref frame from the second will be pulled to
-    xds.freq.attrs)
-    Parameters
-    ----------
-    spw_xds : xr.Dataset
-        (metainfo) SPECTRAL_WINDOW xds
-    spw_id : int
-        SPW id of a partition
-    Returns
-    -------
-    Dict[str, Any]
-        attributes (units/measure) for the freq dim of a partition
-    """
-    fallback_TabRefTypes = [
-        "REST",
-        "LSRK",
-        "LSRD",
-        "BARY",
-        "GEO",
-        "TOPO",
-        "GALACTO",
-        "LGROUP",
-        "CMB",
-    ]
-    ctds_cols = spw_xds.attrs["other"]["msv2"]["ctds_attrs"]["column_descriptions"]
-    cfreq = ctds_cols["CHAN_FREQ"]
-    cf_attrs = spw_xds.data_vars["CHAN_FREQ"].attrs
-    if "MEASINFO" in cfreq["keywords"] and "VarRefCol" in cfreq["keywords"]["MEASINFO"]:
-        fattrs = cfreq["keywords"]["MEASINFO"]
-        var_ref_col = fattrs["VarRefCol"]
-        # This should point to the SPW/MEAS_FREQ_REF col
-        meas_freq_ref_idx = spw_xds.data_vars[var_ref_col].values[spw_id]
-        if "TabRefCodes" not in fattrs or "TabRefTypes" not in fattrs:
-            # Datasets like vla/ic2233_1.ms say "VarRefCol" but "TabRefTypes" is missing
-            ref_frame = fallback_TabRefTypes[meas_freq_ref_idx]
-        else:
-            ref_type_code = fattrs["TabRefCodes"][meas_freq_ref_idx]
-            ref_frame = fattrs["TabRefTypes"][ref_type_code]
-        cf_attrs["measure"] = {
-            "type": fattrs["type"],
-            "ref_frame": ref_frame,
-        }
-        # Also set the 'VarRefCol' for CHAN_FREQ and REF_FREQUENCEY
-        spw_xds.data_vars["CHAN_FREQ"].attrs.update(cf_attrs)
-        spw_xds.data_vars["REF_FREQUENCY"].attrs.update(cf_attrs)
-    return cf_attrs
 def redimension_ms_subtable(xds: xr.Dataset, subt_name: str) -> xr.Dataset:
     """
     Expand a MeasurementSet subtable xds from single dimension (row)
@@ -545,8 +491,8 @@ def load_generic_table(
     tname : str
         (sub)table name, for example 'SOURCE' for myms.ms/SOURCE
     timecols : Union[List[str], None] (Default value = None)
-        column names to convert to numpy datetime format.
-        leaves times as their original casacore format.
+        Names of time column(s), to convert from casacore times to 1970-01-01 scale
+        An empty list leaves times as their original casacore format.
     ignore : Union[List[str], None] (Default value = None)
         list of column names to ignore and not try to read.
     rename_ids : Dict[str, str] (Default value = None)
@@ -742,7 +688,7 @@ def load_generic_cols(
     tb_tool : tables.table
         table to load the columns
     timecols : Union[List[str], None]
-        columns names to convert to datetime format
+        column names to convert from casacore time format
     ignore : Union[List[str], None]
         list of column names to skip and not try to load.
@@ -822,7 +768,7 @@ def load_fixed_size_cols(
     tb_tool : tables.table
         table to red the columns
     timecols : Union[List[str], None]
-        columns names to convert to datetime format
+        column names to convert from casacore time format
     ignore : Union[List[str], None]
         list of column names to skip and not try to load.
@@ -917,7 +863,8 @@ def raw_col_data_to_coords_vars(
     data: np.ndarray :
         column data
     timecols: Union[List[str], None]
-        columns to be treated as TIME-related
+        columns to be treated as TIME-related (they are coordinate, need conversion from
+        casacore time format.
     Returns
     -------
@@ -947,7 +894,7 @@ def raw_col_data_to_coords_vars(
             data = convert_mjd_time(data).astype("float64") / 1e9
         else:
             try:
-                data = convert_casacore_time(data)
+                data = convert_casacore_time(data, False)
             except pd.errors.OutOfBoundsDatetime as exc:
                 if inpath.endswith("WEATHER"):
                     # intentionally not callling logging.exception
@@ -987,7 +934,7 @@ def raw_col_data_to_coords_vars(
 def get_pad_value_in_tablerow_column(trows: tables.tablerow, col: str) -> object:
     """
-    Gets the pad value for the type of a column (IMPORTANTLY) as froun in the
+    Gets the pad value for the type of a column (IMPORTANTLY) as found in the
     the type specified in the row / column value dict returned by tablerow.
     This can differ from the type of the column as given in the casacore
     column descriptions. See https://github.com/casangi/xradio/issues/242.
@@ -1189,7 +1136,7 @@ def read_col_chunk(
     np.ndarray
     """
     # TODO: consider calling load_col_chunk() from inside the withs
-    # for read_delayed_pointing_table and read_expanded_main_table
+    # for read_expanded_main_table
     with open_table_ro(infile) as mtable:
         with open_query(mtable, ts_taql) as query:
             if (len(cshape) == 2) or (col == "UVW"):  # all the scalars and UVW
@@ -1250,22 +1197,20 @@ def read_col_conversion_numpy(
         # Use casacore to get the shape of a row for this column
         #################################################################################
-        # getcolshapestring() only works on columns where a row element is an
-        # array ie. fails for TIME
-        # Assumes the RuntimeError is because the column is a scalar
-        try:
+        # getcolshapestring() only works for array-valued columns.
+        # For scalar columns (e.g., EXPOSURE, TIME_CENTROID), it raises a RuntimeError.
+        # So we first check if the column is scalar to avoid that.
+        if tb_tool.isscalarcol(col):
+            extra_dimensions = ()
+        else:
+            # Get the shape string for the first row of the column (e.g., "[4, 2]")
             shape_string = tb_tool.getcolshapestring(col)[0]
-            # Convert `shape_string` into a tuple that numpy understands
+            # Convert the shape string into a tuple of integers (e.g., (4, 2)) that numpy
+            # understands.
             extra_dimensions = tuple(
-                [
-                    int(idx)
-                    for idx in shape_string.replace("[", "")
-                    .replace("]", "")
-                    .split(", ")
-                ]
+                int(dim) for dim in shape_string.strip("[]").split(", ")
             )
-        except RuntimeError:
-            extra_dimensions = ()
         #################################################################################

xradio 0.0.55__py3-none-any.whl → 0.0.58__py3-none-any.whl

xradio 0.0.55py3-none-any.whl → 0.0.58py3-none-any.whl