PyPI - xradio - Versions diffs - 0.0.47__py3-none-any.whl → 0.0.49__py3-none-any.whl - Mend

xradio 0.0.47py3-none-any.whl → 0.0.49py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (38) hide show

xradio/__init__.py +1 -0
xradio/_utils/dict_helpers.py +69 -2
xradio/_utils/list_and_array.py +3 -1
xradio/_utils/schema.py +3 -1
xradio/image/_util/__init__.py +0 -3
xradio/image/_util/_casacore/common.py +0 -13
xradio/image/_util/_casacore/xds_from_casacore.py +102 -97
xradio/image/_util/_casacore/xds_to_casacore.py +36 -24
xradio/image/_util/_fits/xds_from_fits.py +81 -36
xradio/image/_util/_zarr/zarr_low_level.py +3 -3
xradio/image/_util/casacore.py +7 -5
xradio/image/_util/common.py +13 -26
xradio/image/_util/image_factory.py +143 -191
xradio/image/image.py +10 -59
xradio/measurement_set/__init__.py +11 -6
xradio/measurement_set/_utils/_msv2/_tables/read.py +187 -46
xradio/measurement_set/_utils/_msv2/_tables/table_query.py +22 -0
xradio/measurement_set/_utils/_msv2/conversion.py +347 -299
xradio/measurement_set/_utils/_msv2/create_field_and_source_xds.py +233 -150
xradio/measurement_set/_utils/_msv2/descr.py +1 -1
xradio/measurement_set/_utils/_msv2/msv4_info_dicts.py +20 -13
xradio/measurement_set/_utils/_msv2/msv4_sub_xdss.py +21 -22
xradio/measurement_set/convert_msv2_to_processing_set.py +46 -6
xradio/measurement_set/load_processing_set.py +100 -52
xradio/measurement_set/measurement_set_xdt.py +197 -0
xradio/measurement_set/open_processing_set.py +122 -86
xradio/measurement_set/processing_set_xdt.py +1552 -0
xradio/measurement_set/schema.py +375 -197
xradio/schema/bases.py +5 -1
xradio/schema/check.py +97 -5
xradio/sphinx/schema_table.py +12 -0
{xradio-0.0.47.dist-info → xradio-0.0.49.dist-info}/METADATA +4 -4
{xradio-0.0.47.dist-info → xradio-0.0.49.dist-info}/RECORD +36 -36
{xradio-0.0.47.dist-info → xradio-0.0.49.dist-info}/WHEEL +1 -1
xradio/measurement_set/measurement_set_xds.py +0 -117
xradio/measurement_set/processing_set.py +0 -777
{xradio-0.0.47.dist-info → xradio-0.0.49.dist-info/licenses}/LICENSE.txt +0 -0
{xradio-0.0.47.dist-info → xradio-0.0.49.dist-info}/top_level.txt +0 -0

xradio/measurement_set/_utils/_msv2/msv4_sub_xdss.py CHANGED Viewed

@@ -127,9 +127,9 @@ def interpolate_to_time(
         xds = xds.interp(
             {time_name: interp_time.data}, method=method, assume_sorted=True
         )
-        # scan_number sneaks in as a coordinate of the main time axis, drop it
-        if "scan_number" in xds.coords:
-            xds = xds.drop_vars("scan_number")
+        # scan_name sneaks in as a coordinate of the main time axis, drop it
+        if "scan_name" in xds.coords:
+            xds = xds.drop_vars("scan_name")
         points_after = xds[time_name].size
         logger.debug(
             f"{message_prefix}: interpolating the time coordinate "
@@ -497,7 +497,7 @@ def prepare_generic_sys_cal_xds(generic_sys_cal_xds: xr.Dataset) -> xr.Dataset:
 def create_system_calibration_xds(
     in_file: str,
     main_xds_frequency: xr.DataArray,
-    ant_xds_name_ids: xr.DataArray,
+    ant_xds: xr.DataArray,
     sys_cal_interp_time: Union[xr.DataArray, None] = None,
 ):
     """
@@ -510,8 +510,8 @@ def create_system_calibration_xds(
     main_xds_frequency: xr.DataArray
         frequency array of the main xds (MSv4), containing among other things
         spectral_window_id and measures metadata
-    ant_xds_name_ids : xr.Dataset
-        antenna_name data array from antenna_xds, with name/id information
+    ant_xds : xr.Dataset
+        The antenna_xds that has information such as names, stations, etc., for coordinates
     sys_cal_interp_time: Union[xr.DataArray, None] = None,
         Time axis to interpolate the data vars to (usually main MSv4 time)
@@ -529,7 +529,7 @@ def create_system_calibration_xds(
             rename_ids=subt_rename_ids["SYSCAL"],
             taql_where=(
                 f" where (SPECTRAL_WINDOW_ID = {spectral_window_id})"
-                f" AND (ANTENNA_ID IN [{','.join(map(str, ant_xds_name_ids.antenna_id.values))}])"
+                f" AND (ANTENNA_ID IN [{','.join(map(str, ant_xds.antenna_id.values))}])"
             ),
         )
     except ValueError as _exc:
@@ -541,14 +541,14 @@ def create_system_calibration_xds(
     generic_sys_cal_xds = prepare_generic_sys_cal_xds(generic_sys_cal_xds)
-    mandatory_dimensions = ["antenna_name", "time_cal", "receptor_label"]
+    mandatory_dimensions = ["antenna_name", "time_system_cal", "receptor_label"]
     if "frequency" not in generic_sys_cal_xds.sizes:
         dims_all = mandatory_dimensions
     else:
-        dims_all = mandatory_dimensions + ["frequency_cal"]
+        dims_all = mandatory_dimensions + ["frequency_system_cal"]
     to_new_data_variables = {
-        "PHASE_DIFF": ["PHASE_DIFFERENCE", ["antenna_name", "time_cal"]],
+        "PHASE_DIFF": ["PHASE_DIFFERENCE", ["antenna_name", "time_system_cal"]],
         "TCAL": ["TCAL", dims_all],
         "TCAL_SPECTRUM": ["TCAL", dims_all],
         "TRX": ["TRX", dims_all],
@@ -564,27 +564,26 @@ def create_system_calibration_xds(
     }
     to_new_coords = {
-        "TIME": ["time_cal", ["time_cal"]],
+        "TIME": ["time_system_cal", ["time_system_cal"]],
         "receptor": ["receptor_label", ["receptor_label"]],
-        "frequency": ["frequency_cal", ["frequency_cal"]],
+        "frequency": ["frequency_system_cal", ["frequency_system_cal"]],
     }
     sys_cal_xds = xr.Dataset(attrs={"type": "system_calibration"})
-    coords = {
-        "antenna_name": ant_xds_name_ids.sel(
-            antenna_id=generic_sys_cal_xds["ANTENNA_ID"]
-        ).data,
-        "receptor_label": generic_sys_cal_xds.coords["receptor"].data,
+    ant_borrowed_coords = {
+        "antenna_name": ant_xds.coords["antenna_name"],
+        "receptor_label": ant_xds.coords["receptor_label"],
+        "polarization_type": ant_xds.coords["polarization_type"],
     }
-    sys_cal_xds = sys_cal_xds.assign_coords(coords)
+    sys_cal_xds = sys_cal_xds.assign_coords(ant_borrowed_coords)
     sys_cal_xds = convert_generic_xds_to_xradio_schema(
         generic_sys_cal_xds, sys_cal_xds, to_new_data_variables, to_new_coords
     )
     # Add frequency coord and its measures data, if present
-    if "frequency_cal" in dims_all:
+    if "frequency_system_cal" in dims_all:
         frequency_coord = {
-            "frequency_cal": generic_sys_cal_xds.coords["frequency"].data
+            "frequency_system_cal": generic_sys_cal_xds.coords["frequency"].data
         }
         sys_cal_xds = sys_cal_xds.assign_coords(frequency_coord)
         frequency_measure = {
@@ -592,10 +591,10 @@ def create_system_calibration_xds(
             "units": main_xds_frequency.attrs["units"],
             "observer": main_xds_frequency.attrs["observer"],
         }
-        sys_cal_xds.coords["frequency_cal"].attrs.update(frequency_measure)
+        sys_cal_xds.coords["frequency_system_cal"].attrs.update(frequency_measure)
     sys_cal_xds = rename_and_interpolate_to_time(
-        sys_cal_xds, "time_cal", sys_cal_interp_time, "system_calibration_xds"
+        sys_cal_xds, "time_system_cal", sys_cal_interp_time, "system_calibration_xds"
     )
     # correct expected types

xradio/measurement_set/convert_msv2_to_processing_set.py CHANGED Viewed

@@ -18,6 +18,7 @@ def estimate_conversion_memory_and_cores(
     """
     Given an MSv2 and a partition_scheme to use when converting it to MSv4,
     estimates:
     - memory (in the sense of the amount expected to be enough to convert)
     - cores (in the sense of the recommended/optimal number of cores to use to convert)
@@ -36,7 +37,7 @@ def estimate_conversion_memory_and_cores(
         Partition scheme as used in the function convert_msv2_to_processing_set()
     Returns
-    ----------
+    -------
     tuple
         estimated maximum memory required for one partition,
         maximum number of cores it makes sense to use (number of partitions),
@@ -62,7 +63,7 @@ def convert_msv2_to_processing_set(
     use_table_iter: bool = False,
     compressor: numcodecs.abc.Codec = numcodecs.Zstd(level=2),
     storage_backend: str = "zarr",
-    parallel: bool = False,
+    parallel_mode: str = "none",
     overwrite: bool = False,
 ):
     """Convert a Measurement Set v2 into a Processing Set of Measurement Set v4.
@@ -99,14 +100,45 @@ def convert_msv2_to_processing_set(
         The Blosc compressor to use when saving the converted data to disk using Zarr, by default numcodecs.Zstd(level=2).
     storage_backend : {"zarr", "netcdf"}, optional
         The on-disk format to use. "netcdf" is not yet implemented.
-    parallel : bool, optional
-        Makes use of Dask to execute conversion in parallel, by default False.
+    parallel_mode : {"none", "partition", "time"}, optional
+        Choose whether to use Dask to execute conversion in parallel, by default "none" and conversion occurs serially.
+        The option "partition", parallelises the conversion over partitions specified by `partition_scheme`. The option "time" can only be used for phased array interferometers where there are no partitions
+        in the MS v2; instead the MS v2 is parallelised along the time dimension and can be controlled by `main_chunksize`.
     overwrite : bool, optional
         Whether to overwrite an existing processing set, by default False.
     """
+    # Create empty data tree
+    import xarray as xr
+    ps_dt = xr.DataTree()
+    if not str(out_file).endswith("ps.zarr"):
+        out_file += ".ps.zarr"
+    print("Output file: ", out_file)
+    if overwrite:
+        ps_dt.to_zarr(store=out_file, mode="w")
+    else:
+        ps_dt.to_zarr(store=out_file, mode="w-")
+    # Check `parallel_mode` is valid
+    try:
+        assert parallel_mode in ["none", "partition", "time"]
+    except AssertionError:
+        logger.warning(
+            f"`parallel_mode` {parallel_mode} not recognosed. Defauling to 'none'."
+        )
+        parallel_mode = "none"
     partitions = create_partitions(in_file, partition_scheme=partition_scheme)
     logger.info("Number of partitions: " + str(len(partitions)))
+    if parallel_mode == "time":
+        assert (
+            len(partitions) == 1
+        ), "MS v2 contains more than one partition. `parallel_mode = 'time'` not valid."
     delayed_list = []
     for ms_v4_id, partition_info in enumerate(partitions):
@@ -132,7 +164,7 @@ def convert_msv2_to_processing_set(
         # prepend '0' to ms_v4_id as needed
         ms_v4_id = f"{ms_v4_id:0>{len(str(len(partitions) - 1))}}"
-        if parallel:
+        if parallel_mode == "partition":
             delayed_list.append(
                 dask.delayed(convert_and_write_partition)(
                     in_file,
@@ -149,6 +181,7 @@ def convert_msv2_to_processing_set(
                     phase_cal_interpolate=phase_cal_interpolate,
                     sys_cal_interpolate=sys_cal_interpolate,
                     compressor=compressor,
+                    parallel_mode=parallel_mode,
                     overwrite=overwrite,
                 )
             )
@@ -168,8 +201,15 @@ def convert_msv2_to_processing_set(
                 phase_cal_interpolate=phase_cal_interpolate,
                 sys_cal_interpolate=sys_cal_interpolate,
                 compressor=compressor,
+                parallel_mode=parallel_mode,
                 overwrite=overwrite,
             )
-    if parallel:
+    if parallel_mode == "partition":
         dask.compute(delayed_list)
+    import zarr
+    root_group = zarr.open(out_file, mode="r+")  # Open in read/write mode
+    root_group.attrs["type"] = "processing_set"  # Replace
+    zarr.convenience.consolidate_metadata(root_group.store)

xradio/measurement_set/load_processing_set.py CHANGED Viewed

@@ -1,79 +1,115 @@
 import os
-from xradio.measurement_set import ProcessingSet
 from typing import Dict, Union
+import dask
+import xarray as xr
+import s3fs
 def load_processing_set(
     ps_store: str,
-    sel_parms: dict,
-    data_variables: Union[list, None] = None,
+    sel_parms: dict = None,
+    data_group_name: str = None,
+    include_variables: Union[list, None] = None,
+    drop_variables: Union[list, None] = None,
     load_sub_datasets: bool = True,
-) -> ProcessingSet:
+) -> xr.DataTree:
     """Loads a processing set into memory.
     Parameters
     ----------
     ps_store : str
         String of the path and name of the processing set. For example '/users/user_1/uid___A002_Xf07bba_Xbe5c_target.lsrk.vis.zarr' for a file stored on a local file system, or 's3://viper-test-data/Antennae_North.cal.lsrk.split.vis.zarr/' for a file in AWS object storage.
-    sel_parms : dict
-        A dictionary where the keys are the names of the ms_xds's and the values are slice_dicts.
+    sel_parms : dict, optional
+        A dictionary where the keys are the names of the ms_xdt's (measurement set xarray data trees) and the values are slice_dicts.
         slice_dicts: A dictionary where the keys are the dimension names and the values are slices.
         For example::
             {
                 'ms_v4_name_1': {'frequency': slice(0, 160, None),'time':slice(0,100)},
                 ...
                 'ms_v4_name_n': {'frequency': slice(0, 160, None),'time':slice(0,100)},
             }
-    data_variables : Union[list, None], optional
+        By default None, which loads all ms_xdts.
+    data_group_name : str, optional
+        The name of the data group to select. By default None, which loads all data groups.
+    include_variables : Union[list, None], optional
         The list of data variables to load into memory for example ['VISIBILITY', 'WEIGHT, 'FLAGS']. By default None which will load all data variables into memory.
+    drop_variables : Union[list, None], optional
+        The list of data variables to drop from memory for example ['VISIBILITY', 'WEIGHT, 'FLAGS']. By default None which will not drop any data variables from memory.
     load_sub_datasets : bool, optional
         If true sub-datasets (for example weather_xds, antenna_xds, pointing_xds, system_calibration_xds ...) will be loaded into memory, by default True.
     Returns
     -------
-    ProcessingSet
-        In memory representation of processing set (data is represented by Dask.arrays).
+    xarray.DataTree
+        In memory representation of processing set using xr.DataTree.
     """
-    from xradio._utils.zarr.common import _open_dataset, _get_file_system_and_items
+    from xradio._utils.zarr.common import _get_file_system_and_items
     file_system, ms_store_list = _get_file_system_and_items(ps_store)
-    ps = ProcessingSet()
-    for ms_name, ms_xds_isel in sel_parms.items():
-        ms_store = os.path.join(ps_store, ms_name)
-        correlated_store = os.path.join(ms_store, "correlated_xds")
-        xds = _open_dataset(
-            correlated_store,
-            file_system,
-            ms_xds_isel,
-            data_variables,
-            load=True,
-        )
-        data_groups = xds.attrs["data_groups"]
-        if load_sub_datasets:
-            from xradio.measurement_set.open_processing_set import _open_sub_xds
-            sub_xds_dict, field_and_source_xds_dict = _open_sub_xds(
-                ms_store, file_system=file_system, load=True, data_groups=data_groups
+    with dask.config.set(
+        scheduler="synchronous"
+    ):  # serial scheduler, critical so that this can be used within delayed functions.
+        ps_xdt = xr.DataTree()
+        if sel_parms:
+            for ms_name, ms_xds_isel in sel_parms.items():
+                ms_store = os.path.join(ps_store, ms_name)
+                if isinstance(file_system, s3fs.core.S3FileSystem):
+                    ms_store = s3fs.S3Map(root=ps_store, s3=file_system, check=False)
+                if ms_xds_isel:
+                    ms_xdt = (
+                        xr.open_datatree(
+                            ms_store, engine="zarr", drop_variables=drop_variables
+                        )
+                        .isel(ms_xds_isel)
+                        .xr_ms.sel(data_group_name=data_group_name)
+                    )
+                else:
+                    ms_xdt = xr.open_datatree(
+                        ms_store, engine="zarr", drop_variables=drop_variables
+                    ).xr_ms.sel(data_group_name=data_group_name)
+                if include_variables is not None:
+                    for data_vars in ms_xdt.ds.data_vars:
+                        if data_vars not in include_variables:
+                            ms_xdt.ds = ms_xdt.ds.drop_vars(data_vars)
+                ps_xdt[ms_name] = ms_xdt
+            ps_xdt.attrs["type"] = "processing_set"
+        else:
+            ps_xdt = xr.open_datatree(
+                ps_store, engine="zarr", drop_variables=drop_variables
             )
-            xds.attrs = {
-                **xds.attrs,
-                **sub_xds_dict,
-            }
-            for data_group_name, data_group_vals in data_groups.items():
+            if (include_variables is not None) or data_group_name:
+                for ms_name, ms_xdt in ps_xdt.items():
+                    ms_xdt = ms_xdt.xr_ms.sel(data_group_name=data_group_name)
+                    if include_variables is not None:
+                        for data_vars in ms_xdt.ds.data_vars:
+                            if data_vars not in include_variables:
+                                ms_xdt.ds = ms_xdt.ds.drop_vars(data_vars)
+                    ps_xdt[ms_name] = ms_xdt
-                xds[data_group_vals["correlated_data"]].attrs[
-                    "field_and_source_xds"
-                ] = field_and_source_xds_dict[data_group_name]
+        if not load_sub_datasets:
+            for ms_xdt in ps_xdt.children.values():
+                ms_xdt_names = list(ms_xdt.keys())
+                for sub_xds_name in ms_xdt_names:
+                    if "xds" in sub_xds_name:
+                        del ms_xdt[sub_xds_name]
-        ps[ms_name] = xds
+        ps_xdt = ps_xdt.load()
-    return ps
+    return ps_xdt
 class ProcessingSetIterator:
@@ -81,8 +117,10 @@ class ProcessingSetIterator:
         self,
         sel_parms: dict,
         input_data_store: str,
-        input_data: Union[Dict, ProcessingSet, None] = None,
-        data_variables: list = None,
+        input_data: Union[Dict, xr.DataTree, None] = None,
+        data_group_name: str = None,
+        include_variables: Union[list, None] = None,
+        drop_variables: Union[list, None] = None,
         load_sub_datasets: bool = True,
     ):
         """An iterator that will go through a processing set one MS v4 at a time.
@@ -101,10 +139,16 @@ class ProcessingSetIterator:
                 }
         input_data_store : str
             String of the path and name of the processing set. For example '/users/user_1/uid___A002_Xf07bba_Xbe5c_target.lsrk.vis.zarr'.
-        input_data : Union[Dict, processing_set, None], optional
+        input_data : Union[Dict, xr.DataTree, None], optional
             If the processing set is in memory already it can be supplied here. By default None which will make the iterator load data using the supplied input_data_store.
-        data_variables : list, optional
+        data_group_name : str, optional
+            The name of the data group to select. By default None, which loads all data groups.
+        data_group_name : str, optional
+            The name of the data group to select. By default None, which loads all data groups.
+        include_variables : Union[list, None], optional
             The list of data variables to load into memory for example ['VISIBILITY', 'WEIGHT, 'FLAGS']. By default None which will load all data variables into memory.
+        drop_variables : Union[list, None], optional
+            The list of data variables to drop from memory for example ['VISIBILITY', 'WEIGHT, 'FLAGS']. By default None which will not drop any data variables from memory.
         load_sub_datasets : bool, optional
             If true sub-datasets (for example weather_xds, antenna_xds, pointing_xds, system_calibration_xds ...) will be loaded into memory, by default True.
         """
@@ -113,7 +157,9 @@ class ProcessingSetIterator:
         self.input_data_store = input_data_store
         self.sel_parms = sel_parms
         self.xds_name_iter = iter(sel_parms.keys())
-        self.data_variables = data_variables
+        self.data_group_name = data_group_name
+        self.include_variables = include_variables
+        self.drop_variables = drop_variables
         self.load_sub_datasets = load_sub_datasets
     def __iter__(self):
@@ -121,20 +167,22 @@ class ProcessingSetIterator:
     def __next__(self):
         try:
-            xds_name = next(self.xds_name_iter)
+            sub_xds_name = next(self.xds_name_iter)
         except Exception as e:
             raise StopIteration
         if self.input_data is None:
-            slice_description = self.sel_parms[xds_name]
-            ps = load_processing_set(
+            slice_description = self.sel_parms[sub_xds_name]
+            ps_xdt = load_processing_set(
                 ps_store=self.input_data_store,
-                sel_parms={xds_name: slice_description},
-                data_variables=self.data_variables,
+                sel_parms={sub_xds_name: slice_description},
+                data_group_name=self.data_group_name,
+                include_variables=self.include_variables,
+                drop_variables=self.drop_variables,
                 load_sub_datasets=self.load_sub_datasets,
             )
-            xds = ps.get(0)
+            sub_xdt = ps_xdt.get(0)
         else:
-            xds = self.input_data[xds_name]  # In memory
+            sub_xdt = self.input_data[sub_xds_name]  # In memory
-        return xds
+        return sub_xdt

xradio/measurement_set/measurement_set_xdt.py ADDED Viewed

@@ -0,0 +1,197 @@
+import pandas as pd
+from xradio._utils.list_and_array import to_list
+import xarray as xr
+import numpy as np
+import numbers
+import os
+from collections.abc import Mapping, Iterable
+from typing import Any, Union
+MS_DATASET_TYPES = {"visibility", "spectrum", "radiometer"}
+class InvalidAccessorLocation(ValueError):
+    """
+    Raised by MeasurementSetXdt accessor functions called on a wrong DataTree node (not MSv4).
+    """
+    pass
+@xr.register_datatree_accessor("xr_ms")
+class MeasurementSetXdt:
+    """Accessor to the Measurement Set DataTree node. Provides MSv4 specific functionality
+    such as:
+        - get_partition_info(): produce an info dict with a general MSv4 description including
+          intents, SPW name, field and source names, etc.
+        - get_field_and_source_xds() to retrieve the field_and_source_xds for a given data
+          group.
+        - sel(): select data by dimension labels, for example by data group and polaritzation
+    """
+    _xdt: xr.DataTree
+    def __init__(self, datatree: xr.DataTree):
+        """
+        Initialize the MeasurementSetXdt instance.
+        Parameters
+        ----------
+        datatree: xarray.DataTree
+            The MSv4 DataTree node to construct a MeasurementSetXdt accessor.
+        """
+        self._xdt = datatree
+        self.meta = {"summary": {}}
+    def sel(
+        self,
+        indexers: Union[Mapping[Any, Any], None] = None,
+        method: Union[str, None] = None,
+        tolerance: Union[int, float, Iterable[Union[int, float]], None] = None,
+        drop: bool = False,
+        **indexers_kwargs: Any,
+    ) -> xr.DataTree:
+        """
+        Select data along dimension(s) by label. Alternative to `xarray.Dataset.sel <https://xarray.pydata.org/en/stable/generated/xarray.Dataset.sel.html>`__ so that a data group can be selected by name by using the `data_group_name` parameter.
+        For more information on data groups see `Data Groups <https://xradio.readthedocs.io/en/latest/measurement_set_overview.html#Data-Groups>`__ section. See `xarray.Dataset.sel <https://xarray.pydata.org/en/stable/generated/xarray.Dataset.sel.html>`__ for parameter descriptions.
+        Returns
+        -------
+        xarray.DataTree
+            xarray DataTree with MeasurementSetXdt accessors
+        Examples
+        --------
+        >>> # Select data group 'corrected' and polarization 'XX'.
+        >>> selected_ms_xdt = ms_xdt.xr_ms.sel(data_group_name='corrected', polarization='XX')
+        >>> # Select data group 'corrected' and polarization 'XX' using a dict.
+        >>> selected_ms_xdt = ms_xdt.xr_ms.sel({'data_group_name':'corrected', 'polarization':'XX')
+        """
+        if self._xdt.attrs.get("type") not in MS_DATASET_TYPES:
+            raise InvalidAccessorLocation(f"{self._xdt.path} is not a MSv4 node.")
+        assert self._xdt.attrs["type"] in [
+            "visibility",
+            "spectrum",
+            "radiometer",
+        ], "The type of the xdt must be 'visibility', 'spectrum' or 'radiometer'."
+        if "data_group_name" in indexers_kwargs:
+            data_group_name = indexers_kwargs["data_group_name"]
+            del indexers_kwargs["data_group_name"]
+        elif (indexers is not None) and ("data_group_name" in indexers):
+            data_group_name = indexers["data_group_name"]
+            del indexers["data_group_name"]
+        else:
+            data_group_name = None
+        if data_group_name is not None:
+            sel_data_group_set = set(
+                self._xdt.attrs["data_groups"][data_group_name].values()
+            )
+            data_variables_to_drop = []
+            for dg in self._xdt.attrs["data_groups"].values():
+                temp_set = set(dg.values()) - sel_data_group_set
+                data_variables_to_drop.extend(list(temp_set))
+            data_variables_to_drop = list(set(data_variables_to_drop))
+            sel_ms_xdt = self._xdt
+            sel_corr_xds = self._xdt.ds.sel(
+                indexers, method, tolerance, drop, **indexers_kwargs
+            ).drop_vars(data_variables_to_drop)
+            sel_ms_xdt.ds = sel_corr_xds
+            sel_ms_xdt.attrs["data_groups"] = {
+                data_group_name: self._xdt.attrs["data_groups"][data_group_name]
+            }
+            return sel_ms_xdt
+        else:
+            return self._xdt.sel(indexers, method, tolerance, drop, **indexers_kwargs)
+    def get_field_and_source_xds(self, data_group_name: str = None) -> xr.Dataset:
+        """Get the field_and_source_xds associated with data group `data_group_name`.
+        Parameters
+        ----------
+        data_group_name : str, optional
+            The data group to process. Default is "base" or if not found to first data group.
+        Returns
+        -------
+        xarray.Dataset
+            field_and_source_xds associated with the data group.
+        """
+        if self._xdt.attrs.get("type") not in MS_DATASET_TYPES:
+            raise InvalidAccessorLocation(f"{self._xdt.path} is not a MSv4 node.")
+        if data_group_name is None:
+            if "base" in self._xdt.attrs["data_groups"].keys():
+                data_group_name = "base"
+            else:
+                data_group_name = list(self._xdt.attrs["data_groups"].keys())[0]
+        return self._xdt[f"field_and_source_xds_{data_group_name}"].ds
+    def get_partition_info(self, data_group_name: str = None) -> dict:
+        """
+        Generate a partition info dict for an MSv4, with general MSv4 description including
+        information such as field and source names, SPW name, scan name, the intents string,
+        etc.
+        The information is gathered from various coordinates, secondary datasets, and info
+        dicts of the MSv4. For example, the SPW name comes from the attributes of the
+        frequency coordinate, whereas field and source related information such as field and
+        source names come from the field_and_source_xds (base) dataset of the MSv4.
+        Parameters
+        ----------
+        data_group_name : str, optional
+            The data group to process. Default is "base" or if not found to first data group.
+        Returns
+        -------
+        dict
+            Partition info dict for the MSv4
+        """
+        if self._xdt.attrs.get("type") not in MS_DATASET_TYPES:
+            raise InvalidAccessorLocation(
+                f"{self._xdt.path} is not a MSv4 node (type {self._xdt.attrs.get('type')}."
+            )
+        if data_group_name is None:
+            if "base" in self._xdt.attrs["data_groups"].keys():
+                data_group_name = "base"
+            else:
+                data_group_name = list(self._xdt.attrs["data_groups"].keys())[0]
+        field_and_source_xds = self._xdt.xr_ms.get_field_and_source_xds(data_group_name)
+        if "line_name" in field_and_source_xds.coords:
+            line_name = to_list(
+                np.unique(np.ravel(field_and_source_xds.line_name.values))
+            )
+        else:
+            line_name = []
+        partition_info = {
+            "spectral_window_name": self._xdt.frequency.attrs["spectral_window_name"],
+            "field_name": to_list(np.unique(field_and_source_xds.field_name.values)),
+            "polarization_setup": to_list(self._xdt.polarization.values),
+            "scan_name": to_list(np.unique(self._xdt.scan_name.values)),
+            "source_name": to_list(np.unique(field_and_source_xds.source_name.values)),
+            "intents": self._xdt.observation_info["intents"],
+            "line_name": line_name,
+            "data_group_name": data_group_name,
+        }
+        return partition_info

xradio 0.0.47__py3-none-any.whl → 0.0.49__py3-none-any.whl

xradio 0.0.47py3-none-any.whl → 0.0.49py3-none-any.whl