PyPI - xradio - Versions diffs - 0.0.41__py3-none-any.whl → 0.0.43__py3-none-any.whl - Mend

xradio 0.0.41py3-none-any.whl → 0.0.43py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (62) hide show

xradio/{correlated_data/_utils/_ms → measurement_set/_utils/_msv2}/create_field_and_source_xds.py RENAMED Viewed

@@ -6,20 +6,27 @@ import numpy as np
 import xarray as xr
 import toolviper.utils.logger as logger
-from xradio.correlated_data._utils._ms.msv4_sub_xdss import interpolate_to_time
-from xradio.correlated_data._utils._ms.subtables import subt_rename_ids
-from xradio.correlated_data._utils._ms._tables.read import (
+from xradio.measurement_set._utils._msv2.msv4_sub_xdss import interpolate_to_time
+from xradio.measurement_set._utils._msv2.subtables import subt_rename_ids
+from xradio.measurement_set._utils._msv2._tables.read import (
     convert_casacore_time_to_mjd,
     make_taql_where_between_min_max,
     load_generic_table,
 )
-from xradio._utils.common import cast_to_str, convert_to_si_units, add_position_offsets
+from xradio._utils.list_and_array import cast_to_str
+from xradio._utils.coord_math import (
+    convert_to_si_units,
+    add_position_offsets,
+    wrap_to_pi,
+)
 from xradio._utils.list_and_array import (
     check_if_consistent,
     unique_1d,
     to_np_array,
 )
 from xradio._utils.schema import (
+    casacore_to_msv4_measure_type,
     column_description_casacore_to_msv4_measure,
     convert_generic_xds_to_xradio_schema,
 )
@@ -169,9 +176,21 @@ def extract_ephemeris_info(
     ), "Only geocentric observer ephemeris are supported."
     if "posrefsys" in ephemeris_meta:
-        sky_coord_frame = ephemeris_meta["posrefsys"].replace("ICRF/", "")
+        # Note the phase center can be given as "J2000" or "J2000.0"
+        ref_frame = (
+            ephemeris_meta["posrefsys"]
+            .replace("ICRF/", "", 1)
+            .replace("J2000.0", "J2000", 1)
+        )
+        if ref_frame in casacore_to_msv4_measure_type["direction"].get("Ref_map", {}):
+            ref_frame = casacore_to_msv4_measure_type["direction"]["Ref_map"][ref_frame]
+        else:
+            logger.debug(
+                f"Unrecognized casacore direction reference frame found in posrefsys: {ref_frame}"
+            )
+        sky_coord_frame = ref_frame.lower()
     else:
-        sky_coord_frame = "ICRS"  # We will have to just assume this.
+        sky_coord_frame = "icrs"  # We will have to just assume this.
     # Find out witch keyword is used for units (UNIT/QuantumUnits)
     if "UNIT" in ephemeris_column_description["RA"]["keywords"]:
@@ -195,7 +214,7 @@ def extract_ephemeris_info(
             "type": "location",
             "units": ["deg", "deg", "m"],
             "data": observer_position,
-            "ellipsoid": "WGS84",
+            "frame": "WGS84",
             "origin_object_name": "Earth",
             "coordinate_system": ephemeris_meta["obsloc"].lower(),
         }
@@ -260,7 +279,7 @@ def extract_ephemeris_info(
                 }
             )
-    # Add optional data: SUB_OBSERVER_POSITION and SUB_SOLAR_POSITION
+    # Add optional data: SUB_OBSERVER_DIRECTION and SUB_SOLAR_POSITION
     if "DiskLong" in ephemeris_column_description:
         key_lon = "DiskLong"
         key_lat = "DiskLat"
@@ -283,7 +302,7 @@ def extract_ephemeris_info(
         temp_xds["SUB_OBSERVER_DIRECTION"].attrs.update(
             {
                 "type": "location",
-                "ellipsoid": "NA",
+                "frame": "Undefined",
                 "origin_object_name": ephemeris_meta["NAME"],
                 "coordinate_system": "planetodetic",
                 "units": [
@@ -312,7 +331,7 @@ def extract_ephemeris_info(
         temp_xds["SUB_SOLAR_POSITION"].attrs.update(
             {
                 "type": "location",
-                "ellipsoid": "NA",
+                "frame": "Undefined",
                 "origin_object_name": "Sun",
                 "coordinate_system": "planetodetic",
                 "units": [
@@ -339,8 +358,8 @@ def extract_ephemeris_info(
     time_coord_attrs = {
         "type": "time",
         "units": ["s"],
-        "scale": "UTC",
-        "format": "UNIX",
+        "scale": "utc",
+        "format": "unix",
     }
     temp_xds["time_ephemeris"].attrs.update(time_coord_attrs)
@@ -374,21 +393,28 @@ def extract_ephemeris_info(
             interp_time is not None
         ), 'ephemeris_interpolate must be True if there is ephemeris data and multiple fields (this will occur if "FIELD_ID" is not in partition_scheme).'
+        field_phase_center = wrap_to_pi(
+            xds[center_dv].values + xds["SOURCE_LOCATION"][:, 0:2].values
+        )
+        field_phase_center = np.column_stack(
+            (field_phase_center, np.zeros(xds[center_dv].values.shape[0]))
+        )
+        field_phase_center[:, -1] = (
+            field_phase_center[:, -1] + xds["SOURCE_LOCATION"][:, -1].values
+        )
         xds[center_dv] = xr.DataArray(
-            add_position_offsets(
-                np.column_stack(
-                    (xds[center_dv].values, np.zeros(xds[center_dv].values.shape[0]))
-                ),
-                xds["SOURCE_LOCATION"].values,
-            ),
+            field_phase_center,
             dims=[xds["SOURCE_LOCATION"].dims[0], "sky_pos_label"],
         )
     else:
+        field_phase_center = (
+            np.append(xds[center_dv].values, 0) + xds["SOURCE_LOCATION"].values
+        )
+        field_phase_center[:, 0:2] = wrap_to_pi(field_phase_center[:, 0:2])
         xds[center_dv] = xr.DataArray(
-            add_position_offsets(
-                np.append(xds[center_dv].values, 0),
-                xds["SOURCE_LOCATION"].values,
-            ),
+            field_phase_center,
             dims=[xds["SOURCE_LOCATION"].dims[0], "sky_pos_label"],
         )
@@ -460,6 +486,65 @@ def make_line_dims_and_coords(
     return line_dims, line_coords
+def pad_missing_sources(
+    source_xds: xr.Dataset, unique_source_ids: np.array
+) -> xr.Dataset:
+    """
+    In some MSs there can be source IDs referenced from the field subtable which do not exist in
+    the source table: https://github.com/casangi/xradio/issues/266
+    This addresses the issue by padding/filling those IDs with "Unknown"/nan values. Produces a
+    source_xds that, in addition to the information loaded for the non-missing source IDs, has
+    padding for the IDs that are missing from the input MSv2 source table.
+    This function does not need to do anything when unique_source_ids is a single value
+    (partitioning by "FIELD_ID" or othwerwise single field/source)
+    Parameters:
+    ----------
+    xds: xr.Dataset
+        source dataset to fix/pad missing sources
+    unique_source_ids: np.array
+        IDs of the sources included in this partition
+    Returns:
+    -------
+    filled_source_xds : xr.Dataset
+        source dataset with padding in the originally missing sources
+    """
+    # Only fill gaps in multi-source xdss. If single source_id, no need to
+    if len(unique_source_ids) <= 1:
+        return source_xds
+    missing_source_ids = [
+        source_id
+        for source_id in unique_source_ids
+        if source_id not in source_xds.coords["SOURCE_ID"]
+    ]
+    # would like to use the new-ish xr.pad, but it creates issues with indices/coords and is
+    # also not free of overheads, as it for example changes all numeric types to float64
+    missing_source_xds = xr.full_like(source_xds.isel(SOURCE_ID=0), fill_value=np.nan)
+    pad_str = "Unknown"
+    pad_str_type = "<U9"
+    for var in missing_source_xds.data_vars:
+        if np.issubdtype(missing_source_xds.data_vars[var].dtype, np.str_):
+            # Avoid truncation to length of previously loaded strings
+            missing_source_xds[var] = missing_source_xds[var].astype(
+                np.dtype(pad_str_type)
+            )
+            missing_source_xds[var] = pad_str
+    concat_dim = "SOURCE_ID"
+    xdss_to_concat = [source_xds]
+    for missing_id in missing_source_ids:
+        missing_source_xds[concat_dim] = missing_id
+        xdss_to_concat.append(missing_source_xds)
+    filled_source_xds = xr.concat(xdss_to_concat, concat_dim).sortby(concat_dim)
+    return filled_source_xds
 def extract_source_info(
     xds: xr.Dataset,
     path: str,
@@ -533,15 +618,22 @@ def extract_source_info(
     # This source table time is not the same as the time in the field_and_source_xds that is derived from the main MSv4 time axis.
     # The source_id maps to the time axis in the field_and_source_xds. That is why "if len(source_id) == 1" is used to check if there should be a time axis.
-    assert len(source_xds.TIME) <= len(
-        unique_source_id
-    ), "Can only process source table with a single time entry for a source_id and spectral_window_id."
+    # assert len(source_xds.TIME) <= len(
+    #     unique_source_id
+    # ), "Can only process source table with a single time entry for a source_id and spectral_window_id."
+    if len(source_xds.TIME) > len(unique_source_id):
+        logger.warning(
+            f"Source table has more than one time entry for a source_id and spectral_window_id. This is not currently supported. Only the first time entry will be used."
+        )
+        source_xds = source_xds.drop_duplicates("SOURCE_ID", keep="first")
     source_xds = source_xds.isel(TIME=0, SPECTRAL_WINDOW_ID=0, drop=True)
     source_column_description = source_xds.attrs["other"]["msv2"]["ctds_attrs"][
         "column_descriptions"
     ]
+    source_xds = pad_missing_sources(source_xds, unique_source_id)
     # Get source name (the time axis is optional and will probably be required if the partition scheme does not include 'FIELD_ID' or 'SOURCE_ID'.).
     # Note again that this optional time axis has nothing to do with the original time axis in the source table that we drop.
     if len(source_id) == 1:

xradio/{correlated_data/_utils/_ms → measurement_set/_utils/_msv2}/msv4_sub_xdss.py RENAMED Viewed

@@ -6,6 +6,7 @@ from typing import Tuple, Union
 import numpy as np
 import xarray as xr
+from xradio._utils.coord_math import convert_to_si_units
 from xradio._utils.schema import (
     column_description_casacore_to_msv4_measure,
     convert_generic_xds_to_xradio_schema,
@@ -144,7 +145,7 @@ def create_weather_xds(in_file: str, ant_xds_station_name_ids: xr.DataArray):
     }
     weather_xds = weather_xds.assign_coords(coords)
-    dims_station_time = ["station_name", "time"]
+    dims_station_time = ["station_name", "time_weather"]
     to_new_data_variables = {
         "H20": ["H2O", dims_station_time],
         "IONOS_ELECTRON": ["IONOS_ELECTRON", dims_station_time],
@@ -157,13 +158,23 @@ def create_weather_xds(in_file: str, ant_xds_station_name_ids: xr.DataArray):
     }
     to_new_coords = {
-        "TIME": ["time", ["time"]],
+        "TIME": ["time_weather", ["time_weather"]],
     }
     weather_xds = convert_generic_xds_to_xradio_schema(
         generic_weather_xds, weather_xds, to_new_data_variables, to_new_coords
     )
+    # TODO: option to interpolate to main time
+    # PRESSURE: hPa in MSv2 specs and some MSs => Pa
+    weather_xds = convert_to_si_units(weather_xds)
+    # correct expected types (for example "IONOS_ELECTRON", "PRESSURE" can be float32)
+    for data_var in weather_xds:
+        if weather_xds.data_vars[data_var].dtype != np.float64:
+            weather_xds[data_var] = weather_xds[data_var].astype(np.float64)
     return weather_xds
@@ -199,9 +210,8 @@ def correct_generic_pointing_xds(
     correct_pointing_xds = generic_pointing_xds.copy()
-    for key in generic_pointing_xds:
-        if key in to_new_data_variables:
-            data_var_name = to_new_data_variables[key]
+    for data_var_name in generic_pointing_xds:
+        if data_var_name in to_new_data_variables:
             # Corrects dim sizes of "empty cell" variables, such as empty DIRECTION, TARGET, etc.
             if (
                 "dim_2" in generic_pointing_xds.sizes
@@ -296,6 +306,8 @@ def create_pointing_xds(
         size = generic_pointing_xds.sizes["n_polynomial"]
         if size == 1:
             generic_pointing_xds = generic_pointing_xds.sel({"n_polynomial": 0})
+        elif size == 0:
+            generic_pointing_xds = generic_pointing_xds.drop_dims("n_polynomial")
     time_ant_dims = ["time", "antenna_name"]
     time_ant_dir_dims = time_ant_dims + ["local_sky_dir_label"]
@@ -341,8 +353,8 @@ def prepare_generic_sys_cal_xds(generic_sys_cal_xds: xr.Dataset) -> xr.Dataset:
     This function performs various prepareation steps, such as:
     - filter out dimensions not neeed for an individual MSv4 (SPW, FEED),
     - drop variables loaded from columns with all items set to empty array,
-    - transpose the dimensions frequency,receptor,
-    - fix dimension names when needed.
+    - transpose the dimensions frequency,receptor
+    - fix dimension names (and order) when needed.
     Parameters
     ----------
@@ -374,15 +386,38 @@ def prepare_generic_sys_cal_xds(generic_sys_cal_xds: xr.Dataset) -> xr.Dataset:
         "receptor" in generic_sys_cal_xds.sizes
         and "frequency" in generic_sys_cal_xds.sizes
     ):
+        # dim_3 can be created for example when the T*_SPECTRUM have varying # channels!
+        # more generaly, could transpose with ... to avoid errors with additional spurious dimensions
+        if "dim_3" in generic_sys_cal_xds.dims:
+            generic_sys_cal_xds = generic_sys_cal_xds.drop_dims("dim_3")
         # From MSv2 tables we get (...,frequency, receptor)
         #  -> transpose to (...,receptor,frequency) ready for MSv4 sys_cal_xds
         generic_sys_cal_xds = generic_sys_cal_xds.transpose(
             "ANTENNA_ID", "TIME", "receptor", "frequency"
         )
-    else:
+    elif (
+        "frequency" in generic_sys_cal_xds.sizes
+        and not "dim_3" in generic_sys_cal_xds.sizes
+    ):
         # because order is (...,frequency,receptor), when frequency is missing
         # receptor can get wrongly labeled as frequency
         generic_sys_cal_xds = generic_sys_cal_xds.rename_dims({"frequency": "receptor"})
+    elif (
+        "frequency" not in generic_sys_cal_xds.sizes
+        and "receptor" in generic_sys_cal_xds.sizes
+        and "dim_3" in generic_sys_cal_xds.sizes
+    ):
+        # different *_SPECTRUM array sizes + some empty arrays can create an additional spurious
+        # generic dimension, which should have been "receptor"
+        generic_sys_cal_xds = generic_sys_cal_xds.rename_dims({"receptor": "frequency"})
+        generic_sys_cal_xds = generic_sys_cal_xds.rename_dims({"dim_3": "receptor"})
+        generic_sys_cal_xds = generic_sys_cal_xds.transpose(
+            "ANTENNA_ID", "TIME", "receptor", "frequency"
+        )
+    else:
+        raise RuntimeError(
+            "Cannot understand the arrangement of dimensions of {generic_sys_cal_xds=}"
+        )
     return generic_sys_cal_xds
@@ -462,7 +497,7 @@ def create_system_calibration_xds(
         "frequency": ["frequency_cal", ["frequency_cal"]],
     }
-    sys_cal_xds = xr.Dataset(attrs={"type": "sys_cal"})
+    sys_cal_xds = xr.Dataset(attrs={"type": "system_calibration"})
     coords = {
         "antenna_name": ant_xds_name_ids.sel(
             antenna_id=generic_sys_cal_xds["ANTENNA_ID"]
@@ -483,8 +518,7 @@ def create_system_calibration_xds(
         frequency_measure = {
             "type": main_xds_frequency.attrs["type"],
             "units": main_xds_frequency.attrs["units"],
-            "frame": main_xds_frequency.attrs["frame"],
-            "reference_value": main_xds_frequency.attrs["reference_frequency"],
+            "observer": main_xds_frequency.attrs["observer"],
         }
         sys_cal_xds.coords["frequency_cal"].attrs.update(frequency_measure)
@@ -499,8 +533,8 @@ def create_system_calibration_xds(
         time_coord_attrs = {
             "type": "time",
             "units": ["s"],
-            "scale": "UTC",
-            "format": "UNIX",
+            "scale": "utc",
+            "format": "unix",
         }
         # If interpolating time, rename time_cal => time
         time_coord = {"time": ("time_cal", sys_cal_interp_time.data)}

xradio/{correlated_data/_utils/_ms → measurement_set/_utils/_msv2}/partition_queries.py RENAMED Viewed

@@ -54,6 +54,7 @@ def create_partitions(in_file: str, partition_scheme: list):
     par_df["SCAN_NUMBER"] = main_tb.getcol("SCAN_NUMBER")
     par_df["STATE_ID"] = main_tb.getcol("STATE_ID")
     par_df["OBSERVATION_ID"] = main_tb.getcol("OBSERVATION_ID")
+    par_df["ANTENNA1"] = main_tb.getcol("ANTENNA1")
     par_df = par_df.drop_duplicates()
     field_tb = tables.table(
@@ -123,6 +124,9 @@ def create_partitions(in_file: str, partition_scheme: list):
         "OBS_MODE",
         "SUB_SCAN_NUMBER",
     ]
+    if "ANTENNA1" in partition_scheme:
+        partition_axis_names.append("ANTENNA1")
     for idx, pair in enumerated_partitions:
         query = ""
         for i, par in enumerate(partition_scheme_updated):

xradio/{correlated_data → measurement_set}/_utils/_utils/xds_helper.py RENAMED Viewed

@@ -7,7 +7,7 @@ import xarray as xr
 from .cds import CASAVisSet
 from .stokes_types import stokes_types
-from ...._utils.common import get_pad_value
+from xradio._utils.list_and_array import get_pad_value
 def make_coords(

xradio/{correlated_data/_utils/ms.py → measurement_set/_utils/msv2.py} RENAMED Viewed

@@ -2,14 +2,14 @@ import os
 import toolviper.utils.logger as logger
 from typing import List, Tuple, Union
-from ._utils.cds import CASAVisSet
-from ._ms.partitions import (
+from xradio.measurement_set._utils._utils.cds import CASAVisSet
+from xradio.measurement_set._utils._msv2.partitions import (
     finalize_partitions,
     read_ms_ddi_partitions,
     read_ms_scan_subscan_partitions,
 )
-from ._ms.subtables import read_ms_subtables
-from ._utils.xds_helper import vis_xds_packager_cds
+from xradio.measurement_set._utils._msv2.subtables import read_ms_subtables
+from xradio.measurement_set._utils._utils.xds_helper import vis_xds_packager_cds
 def read_ms(

xradio/{correlated_data → measurement_set}/convert_msv2_to_processing_set.py RENAMED Viewed

@@ -4,8 +4,8 @@ from typing import Dict, Union
 import dask
-from xradio.correlated_data._utils._ms.partition_queries import create_partitions
-from xradio.correlated_data._utils._ms.conversion import convert_and_write_partition
+from xradio.measurement_set._utils._msv2.partition_queries import create_partitions
+from xradio.measurement_set._utils._msv2.conversion import convert_and_write_partition
 def convert_msv2_to_processing_set(
@@ -82,6 +82,11 @@ def convert_msv2_to_processing_set(
             + str(partition_info["FIELD_ID"])
             + ", SCAN "
             + str(partition_info["SCAN_NUMBER"])
+            + (
+                ", ANTENNA " + str(partition_info["ANTENNA1"])
+                if "ANTENNA1" in partition_info
+                else ""
+            )
         )
         # prepend '0' to ms_v4_id as needed

xradio/{correlated_data → measurement_set}/load_processing_set.py RENAMED Viewed

@@ -1,5 +1,5 @@
 import os
-from xradio.correlated_data import ProcessingSet
+from xradio.measurement_set import ProcessingSet
 from typing import Dict, Union
@@ -43,10 +43,10 @@ def load_processing_set(
     ps = ProcessingSet()
     for ms_name, ms_xds_isel in sel_parms.items():
         ms_store = os.path.join(ps_store, ms_name)
-        ms_main_store = os.path.join(ms_store, "MAIN")
+        correlated_store = os.path.join(ms_store, "correlated_xds")
         xds = _open_dataset(
-            ms_main_store,
+            correlated_store,
             file_system,
             ms_xds_isel,
             data_variables,
@@ -55,7 +55,7 @@ def load_processing_set(
         data_groups = xds.attrs["data_groups"]
         if load_sub_datasets:
-            from xradio.correlated_data.open_processing_set import _open_sub_xds
+            from xradio.measurement_set.open_processing_set import _open_sub_xds
             sub_xds_dict, field_and_source_xds_dict = _open_sub_xds(
                 ms_store, file_system=file_system, load=True, data_groups=data_groups
@@ -76,7 +76,7 @@ def load_processing_set(
     return ps
-class processing_set_iterator:
+class ProcessingSetIterator:
     def __init__(
         self,
         sel_parms: dict,

xradio/measurement_set/measurement_set_xds.py ADDED Viewed

@@ -0,0 +1,110 @@
+import pandas as pd
+from xradio._utils.list_and_array import to_list
+import xarray as xr
+import numbers
+import os
+from collections.abc import Mapping, Iterable
+from typing import Any, Union
+class MeasurementSetXds(xr.Dataset):
+    __slots__ = ()
+    def __init__(self, xds):
+        super().__init__(xds.data_vars, xds.coords, xds.attrs)
+    def to_store(self, store, **kwargs):
+        """
+        Write the MeasurementSetXds to a Zarr store.
+        Does not write to cloud storage yet.
+        Args:
+            store (str): The path to the Zarr store.
+            **kwargs: Additional keyword arguments to be passed to `xarray.Dataset.to_zarr`. See https://docs.xarray.dev/en/latest/generated/xarray.Dataset.to_zarr.html for more information.
+        Returns:
+            None
+        """
+        copy_cor_xds = self.copy()  # No deep copy
+        # Remove field_and_source_xds from all correlated_data (VISIBILITY/SPECTRUM) data variables
+        # and save them as separate zarr files.
+        for data_group_name, data_group in self.attrs["data_groups"].items():
+            del copy_cor_xds[data_group["correlated_data"]].attrs[
+                "field_and_source_xds"
+            ]
+            # print("data_group_name", data_group_name)
+            xr.Dataset.to_zarr(
+                self[data_group["correlated_data"]].attrs["field_and_source_xds"],
+                os.path.join(store, "field_and_source_xds_" + data_group_name),
+                **kwargs,
+            )
+        # Remove xds attributes from copy_cor_xds and save xds attributes as separate zarr files.
+        for attrs_name in self.attrs:
+            if "xds" in attrs_name:
+                del copy_cor_xds.attrs[attrs_name]
+                xr.Dataset.to_zarr(
+                    self.attrs[attrs_name], os.path.join(store, attrs_name, **kwargs)
+                )
+        # Save copy_cor_xds as zarr file.
+        xr.Dataset.to_zarr(
+            copy_cor_xds, os.path.join(store, "correlated_xds"), **kwargs
+        )
+    def sel(
+        self,
+        indexers: Union[Mapping[Any, Any], None] = None,
+        method: Union[str, None] = None,
+        tolerance: Union[int, float, Iterable[Union[int, float]], None] = None,
+        drop: bool = False,
+        **indexers_kwargs: Any,
+    ):
+        """
+        Select data along dimension(s) by label. Overrides `xarray.Dataset.sel <https://xarray.pydata.org/en/stable/generated/xarray.Dataset.sel.html>`__ so that a data group can be selected by name by using the `data_group_name` parameter.
+        For more information on data groups see `Data Groups <https://xradio.readthedocs.io/en/latest/measurement_set_overview.html#Data-Groups>`__ section. See `xarray.Dataset.sel <https://xarray.pydata.org/en/stable/generated/xarray.Dataset.sel.html>`__ for parameter descriptions.
+        Returns:
+            MeasurementSetXds
+        Examples
+        --------
+        >>> # Select data group 'corrected' and polarization 'XX'.
+        >>> selected_ms_xds = ms_xds.sel(data_group_name='corrected', polarization='XX')
+        >>> # Select data group 'corrected' and polarization 'XX' using a dict.
+        >>> selected_ms_xds = ms_xds.sel({'data_group_name':'corrected', 'polarization':'XX')
+        """
+        if "data_group_name" in indexers_kwargs:
+            data_group_name = indexers_kwargs["data_group_name"]
+            del indexers_kwargs["data_group_name"]
+        if (indexers is not None) and ("data_group_name" in indexers):
+            data_group_name = indexers["data_group_name"]
+            del indexers["data_group_name"]
+        else:
+            data_group_name = None
+        if data_group_name is not None:
+            sel_data_group_set = set(
+                self.attrs["data_groups"][data_group_name].values()
+            )
+            data_variables_to_drop = []
+            for dg in self.attrs["data_groups"].values():
+                temp_set = set(dg.values()) - sel_data_group_set
+                data_variables_to_drop.extend(list(temp_set))
+            data_variables_to_drop = list(set(data_variables_to_drop))
+            return MeasurementSetXds(
+                super()
+                .sel(indexers, method, tolerance, drop, **indexers_kwargs)
+                .drop_vars(data_variables_to_drop)
+            )
+        else:
+            return MeasurementSetXds(
+                super().sel(indexers, method, tolerance, drop, **indexers_kwargs)
+            )

xradio/{correlated_data → measurement_set}/open_processing_set.py RENAMED Viewed

@@ -1,6 +1,6 @@
 import os
-from xradio.correlated_data import ProcessingSet
+from xradio.measurement_set import ProcessingSet
 import toolviper.utils.logger as logger
 from xradio._utils.zarr.common import _open_dataset, _get_file_system_and_items
 import s3fs
@@ -25,6 +25,8 @@ def open_processing_set(
     processing_set
         Lazy representation of processing set (data is represented by Dask.arrays).
     """
+    from xradio.measurement_set import MeasurementSetXds
     file_system, ms_store_list = _get_file_system_and_items(ps_store)
     ps = ProcessingSet()
@@ -32,9 +34,9 @@ def open_processing_set(
     for ms_name in ms_store_list:
         # try:
         ms_store = os.path.join(ps_store, ms_name)
-        ms_main_store = os.path.join(ms_store, "MAIN")
+        correlated_store = os.path.join(ms_store, "correlated_xds")
-        xds = _open_dataset(ms_main_store, file_system)
+        xds = _open_dataset(correlated_store, file_system)
         data_groups = xds.attrs["data_groups"]
         if (intents is None) or (
@@ -54,7 +56,7 @@ def open_processing_set(
                     "field_and_source_xds"
                 ] = field_and_source_xds_dict[data_group_name]
-            ps[ms_name] = xds
+            ps[ms_name] = MeasurementSetXds(xds)
         # except Exception as e:
         #     logger.warning(f"Could not open {ms_name} due to {e}")
         #     continue
@@ -66,15 +68,6 @@ def _open_sub_xds(ms_store, file_system, data_groups, load=False):
     sub_xds_dict = {}
     field_and_source_xds_dict = {}
-    xds_names = {
-        "ANTENNA": "antenna_xds",
-        "POINTING": "pointing_xds",
-        "SYSCAL": "system_calibration_xds",
-        "GAIN_CURVE": "gain_curve_xds",
-        "PHASE_CAL": "phase_calibration_xds",
-        "WEATHER": "weather_xds",
-    }
     if isinstance(file_system, s3fs.core.S3FileSystem):
         file_names = [
             bd.split(sep="/")[-1] for bd in file_system.listdir(ms_store, detail=False)
@@ -83,9 +76,9 @@ def _open_sub_xds(ms_store, file_system, data_groups, load=False):
         file_names = file_system.listdir(ms_store)
     file_names = [item for item in file_names if not item.startswith(".")]
-    file_names.remove("MAIN")
+    file_names.remove("correlated_xds")
-    field_dict = {"FIELD_AND_SOURCE_" + key.upper(): key for key in data_groups.keys()}
+    field_dict = {"field_and_source_xds_" + key: key for key in data_groups.keys()}
     # field_and_source_xds_name_start = "FIELD"
     for n in file_names:
@@ -98,7 +91,7 @@ def _open_sub_xds(ms_store, file_system, data_groups, load=False):
         if n in field_dict.keys():
             field_and_source_xds_dict[field_dict[n]] = xds
         else:
-            sub_xds_dict[xds_names[n]] = xds
+            sub_xds_dict[n] = xds
     return sub_xds_dict, field_and_source_xds_dict

xradio 0.0.41__py3-none-any.whl → 0.0.43__py3-none-any.whl

xradio 0.0.41py3-none-any.whl → 0.0.43py3-none-any.whl