PyPI - xradio - Versions diffs - 0.0.33__py3-none-any.whl → 0.0.36__py3-none-any.whl - Mend

xradio 0.0.33py3-none-any.whl → 0.0.36py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

xradio/_utils/list_and_array.py +3 -1
xradio/_utils/schema.py +190 -0
xradio/_utils/zarr/common.py +11 -5
xradio/image/_util/_zarr/xds_from_zarr.py +15 -2
xradio/image/_util/_zarr/zarr_low_level.py +65 -14
xradio/schema/bases.py +37 -8
xradio/schema/check.py +15 -3
xradio/schema/dataclass.py +2 -2
xradio/vis/_processing_set.py +136 -10
xradio/vis/_vis_utils/_ms/_tables/read.py +9 -0
xradio/vis/_vis_utils/_ms/conversion.py +166 -116
xradio/vis/_vis_utils/_ms/create_antenna_xds.py +479 -0
xradio/vis/_vis_utils/_ms/create_field_and_source_xds.py +84 -42
xradio/vis/_vis_utils/_ms/msv2_to_msv4_meta.py +1 -105
xradio/vis/_vis_utils/_ms/msv4_sub_xdss.py +4 -224
xradio/vis/_vis_utils/_utils/xds_helper.py +10 -2
xradio/vis/convert_msv2_to_processing_set.py +6 -1
xradio/vis/load_processing_set.py +2 -2
xradio/vis/read_processing_set.py +5 -2
xradio/vis/schema.py +348 -112
{xradio-0.0.33.dist-info → xradio-0.0.36.dist-info}/METADATA +1 -1
{xradio-0.0.33.dist-info → xradio-0.0.36.dist-info}/RECORD +25 -23
{xradio-0.0.33.dist-info → xradio-0.0.36.dist-info}/WHEEL +1 -1
{xradio-0.0.33.dist-info → xradio-0.0.36.dist-info}/LICENSE.txt +0 -0
{xradio-0.0.33.dist-info → xradio-0.0.36.dist-info}/top_level.txt +0 -0

xradio/vis/_processing_set.py CHANGED Viewed

@@ -1,19 +1,57 @@
 import pandas as pd
+from xradio._utils.list_and_array import to_list
+import numbers
 class processing_set(dict):
+    """
+    A dictionary subclass representing a Processing Set (PS) that is a set of Measurement Sets v4 (MS).
+    This class extends the built-in `dict` class and provides additional methods for manipulating and selecting subsets of the Processing Set.
+    Attributes:
+        meta (dict): A dictionary containing metadata information about the Processing Set.
+    Methods:
+        summary(data_group="base"): Returns a summary of the Processing Set as a Pandas table.
+        get_ps_max_dims(): Returns the maximum dimension of all the MSs in the Processing Set.
+        get_ps_freq_axis(): Combines the frequency axis of all MSs.
+        sel(query:str=None, **kwargs): Selects a subset of the Processing Set based on column names and values or a Pandas query.
+        ms_sel(**kwargs): Selects a subset of the Processing Set by applying the `sel` method to each individual MS.
+        ms_isel(**kwargs): Selects a subset of the Processing Set by applying the `isel` method to each individual MS.
+    """
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
         self.meta = {"summary": {}}
     def summary(self, data_group="base"):
+        """
+        Returns a summary of the Processing Set as a Pandas table.
+        Args:
+            data_group (str): The data group to summarize. Default is "base".
+        Returns:
+            pandas.DataFrame: A DataFrame containing the summary information.
+        """
         if data_group in self.meta["summary"]:
             return self.meta["summary"][data_group]
         else:
-            self.meta["summary"][data_group] = self._summary(data_group)
+            self.meta["summary"][data_group] = self._summary(data_group).sort_values(
+                by=["name"], ascending=True
+            )
             return self.meta["summary"][data_group]
     def get_ps_max_dims(self):
+        """
+        Returns the maximum dimension of all the MSs in the Processing Set.
+        For example, if the Processing Set contains two MSs with dimensions (50, 20, 30) and (10, 30, 40), the maximum dimensions will be (50, 30, 40).
+        Returns:
+            dict: A dictionary containing the maximum dimensions of the Processing Set.
+        """
         if "max_dims" in self.meta:
             return self.meta["max_dims"]
         else:
@@ -21,6 +59,12 @@ class processing_set(dict):
             return self.meta["max_dims"]
     def get_ps_freq_axis(self):
+        """
+        Combines the frequency axis of all MSs.
+        Returns:
+            xarray.DataArray: The frequency axis of the Processing Set.
+        """
         if "freq_axis" in self.meta:
             return self.meta["freq_axis"]
         else:
@@ -33,11 +77,14 @@ class processing_set(dict):
             "obs_mode": [],
             "shape": [],
             "polarization": [],
+            "scan_number": [],
             "spw_name": [],
             # "field_id": [],
             "field_name": [],
             # "source_id": [],
             "source_name": [],
+            # "num_lines": [],
+            "line_name": [],
             "field_coords": [],
             "start_frequency": [],
             "end_frequency": [],
@@ -52,6 +99,9 @@ class processing_set(dict):
                 value.attrs["partition_info"]["spectral_window_name"]
             )
             summary_data["polarization"].append(value.polarization.values)
+            summary_data["scan_number"].append(
+                value.attrs["partition_info"]["scan_number"]
+            )
             if "visibility" in value.attrs["data_groups"][data_group]:
                 data_name = value.attrs["data_groups"][data_group]["visibility"]
@@ -72,8 +122,14 @@ class processing_set(dict):
             summary_data["source_name"].append(
                 value.attrs["partition_info"]["source_name"]
             )
-            summary_data["start_frequency"].append(value["frequency"].values[0])
-            summary_data["end_frequency"].append(value["frequency"].values[-1])
+            summary_data["line_name"].append(value.attrs["partition_info"]["line_name"])
+            # summary_data["num_lines"].append(value.attrs["partition_info"]["num_lines"])
+            summary_data["start_frequency"].append(
+                to_list(value["frequency"].values)[0]
+            )
+            summary_data["end_frequency"].append(to_list(value["frequency"].values)[-1])
             if value[data_name].attrs["field_and_source_xds"].is_ephemeris:
                 summary_data["field_coords"].append("Ephemeris")
@@ -117,7 +173,7 @@ class processing_set(dict):
         for ms_xds in self.values():
             assert (
                 frame == ms_xds.frequency.attrs["frame"]
-            ), "Frequency reference frame not consistent in processing set."
+            ), "Frequency reference frame not consistent in Processing Set."
             if ms_xds.frequency.attrs["spectral_window_id"] not in spw_ids:
                 spw_ids.append(ms_xds.frequency.attrs["spectral_window_id"])
                 freq_axis_list.append(ms_xds.frequency)
@@ -142,19 +198,71 @@ class processing_set(dict):
     def get(self, id):
         return self[list(self.keys())[id]]
-    def sel(self, **kwargs):
+    def sel(self, string_exact_match: bool = True, query: str = None, **kwargs):
+        """
+        Selects a subset of the Processing Set based on column names and values or a Pandas query.
+        The following columns are supported: name, obs_mode, polarization, spw_name, field_name, source_name, field_coords, start_frequency, end_frequency.
+        This function will not apply any selection on the MS data so data will not be dropped for example if a MS has field_name=['field_0','field_10','field_08'] and ps.sel(field_name='field_0') is done the resulting MS will still have field_name=['field_0','field_10','field_08'].
+        Examples:
+            ps.sel(obs_mode='OBSERVE_TARGET#ON_SOURCE', polarization=['RR', 'LL']) # Select all MSs with obs_mode 'OBSERVE_TARGET#ON_SOURCE' and polarization 'RR' or 'LL'.
+            ps.sel(query='start_frequency > 100e9 AND end_frequency < 200e9') # Select all MSs with start_frequency greater than 100 GHz and less than 200 GHz.
+        Args:
+            query (str): A Pandas query string. Default is None.
+            string_exact_match (bool): If True, the selection will be an exact match for string and string list columns. Default is True.
+            **kwargs: Keyword arguments representing column names and values to filter the Processing Set.
+        Returns:
+            processing_set: The subset of the Processing Set.
+        """
         import numpy as np
+        # def select_rows(df, col, input_strings):
+        #     return df[df[col].apply(lambda x: any(i in x for i in input_strings))]
+        # def select_rows(df, col, sel, string_exact_match):
+        #     def check_selection(row_val):
+        #         if isinstance(row_val, numbers.Number) or string_exact_match:
+        #             return any(i == row_val for i in sel) #If values are numbers
+        #         return any(i in row_val for i in sel) #If values are strings
+        #     return df[df[col].apply(check_selection)]
+        def select_rows(df, col, sel_vals, string_exact_match):
+            def check_selection(row_val):
+                row_val = to_list(
+                    row_val
+                )  # make sure that it is a list so that we can iterate over it.
+                for rw in row_val:
+                    for s in sel_vals:
+                        if string_exact_match:
+                            if rw == s:
+                                return True
+                        else:
+                            if s in rw:
+                                return True
+                return False
+            return df[df[col].apply(check_selection)]
         summary_table = self.summary()
         for key, value in kwargs.items():
-            if isinstance(value, list) or isinstance(value, np.ndarray):
-                summary_table = summary_table[summary_table[key].isin(value)]
-            elif isinstance(value, slice):
+            value = to_list(value)  # make sure value is a list.
+            if len(value) == 1 and isinstance(value[0], slice):
                 summary_table = summary_table[
-                    summary_table[key].between(value.start, value.stop)
+                    summary_table[key].between(value[0].start, value[0].stop)
                 ]
             else:
-                summary_table = summary_table[summary_table[key] == value]
+                summary_table = select_rows(
+                    summary_table, key, value, string_exact_match
+                )
+        if query is not None:
+            summary_table = summary_table.query(query)
         sub_ps = processing_set()
         for key, val in self.items():
@@ -164,12 +272,30 @@ class processing_set(dict):
         return sub_ps
     def ms_sel(self, **kwargs):
+        """
+        Selects a subset of the Processing Set by applying the `sel` method to each MS.
+        Args:
+            **kwargs: Keyword arguments representing column names and values to filter the Processing Set.
+        Returns:
+            processing_set: The subset of the Processing Set.
+        """
         sub_ps = processing_set()
         for key, val in self.items():
             sub_ps[key] = val.sel(kwargs)
         return sub_ps
     def ms_isel(self, **kwargs):
+        """
+        Selects a subset of the Processing Set by applying the `isel` method to each MS.
+        Args:
+            **kwargs: Keyword arguments representing dimension names and indices to select from the Processing Set.
+        Returns:
+            processing_set: The subset of the Processing Set.
+        """
         sub_ps = processing_set()
         for key, val in self.items():
             sub_ps[key] = val.isel(kwargs)

xradio/vis/_vis_utils/_ms/_tables/read.py CHANGED Viewed

@@ -455,6 +455,9 @@ def redimension_ms_subtable(xds: xr.Dataset, subt_name: str) -> xr.Dataset:
         "SOURCE": ["SOURCE_ID", "TIME", "SPECTRAL_WINDOW_ID"],
         "SYSCAL": ["ANTENNA_ID", "FEED_ID", "SPECTRAL_WINDOW_ID", "TIME"],
         "WEATHER": ["ANTENNA_ID", "TIME"],
+        "PHASE_CAL": ["ANTENNA_ID", "TIME", "SPECTRAL_WINDOW_ID"],
+        "GAIN_CURVE": ["ANTENNA_ID", "TIME", "SPECTRAL_WINDOW_ID"],
+        "FEED": ["ANTENNA_ID", "SPECTRAL_WINDOW_ID"],
         # added tables (MSv3 but not preent in MSv2). Build it from "EPHEMi_... tables
         # Not clear what to do about 'time' var/dim:  , "time"],
         "EPHEMERIDES": ["ephemeris_row_id", "ephemeris_id"],
@@ -645,6 +648,9 @@ def load_generic_table(
         "SOURCE",
         "SYSCAL",
         "WEATHER",
+        "PHASE_CAL",
+        "GAIN_CURVE",
+        "FEED",
     ]:
         xds = redimension_ms_subtable(xds, tname)
@@ -944,6 +950,9 @@ def raw_col_data_to_coords_vars(
         "SOURCE",
         "SYSCAL",
         "WEATHER",
+        "PHASE_CAL",
+        "GAIN_CURVE",
+        "FEED",
     )
     dim_prefix = "dim"

xradio 0.0.33__py3-none-any.whl → 0.0.36__py3-none-any.whl

xradio 0.0.33py3-none-any.whl → 0.0.36py3-none-any.whl