PyPI - xradio - Versions diffs - 0.0.48__py3-none-any.whl → 0.0.50__py3-none-any.whl - Mend

xradio 0.0.48py3-none-any.whl → 0.0.50py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

xradio/__init__.py +1 -0
xradio/_utils/dict_helpers.py +69 -2
xradio/image/_util/__init__.py +0 -3
xradio/image/_util/_casacore/common.py +0 -13
xradio/image/_util/_casacore/xds_from_casacore.py +102 -97
xradio/image/_util/_casacore/xds_to_casacore.py +36 -24
xradio/image/_util/_fits/xds_from_fits.py +81 -36
xradio/image/_util/_zarr/zarr_low_level.py +3 -3
xradio/image/_util/casacore.py +7 -5
xradio/image/_util/common.py +13 -26
xradio/image/_util/image_factory.py +143 -191
xradio/image/image.py +10 -59
xradio/measurement_set/__init__.py +11 -6
xradio/measurement_set/_utils/_msv2/_tables/read.py +187 -46
xradio/measurement_set/_utils/_msv2/_tables/table_query.py +22 -0
xradio/measurement_set/_utils/_msv2/conversion.py +352 -318
xradio/measurement_set/_utils/_msv2/msv4_info_dicts.py +20 -17
xradio/measurement_set/convert_msv2_to_processing_set.py +46 -6
xradio/measurement_set/load_processing_set.py +100 -53
xradio/measurement_set/measurement_set_xdt.py +319 -0
xradio/measurement_set/open_processing_set.py +122 -86
xradio/measurement_set/processing_set_xdt.py +1552 -0
xradio/measurement_set/schema.py +201 -94
xradio/schema/bases.py +5 -1
xradio/schema/check.py +97 -5
{xradio-0.0.48.dist-info → xradio-0.0.50.dist-info}/METADATA +5 -4
{xradio-0.0.48.dist-info → xradio-0.0.50.dist-info}/RECORD +30 -30
{xradio-0.0.48.dist-info → xradio-0.0.50.dist-info}/WHEEL +1 -1
xradio/measurement_set/measurement_set_xds.py +0 -117
xradio/measurement_set/processing_set.py +0 -803
{xradio-0.0.48.dist-info → xradio-0.0.50.dist-info/licenses}/LICENSE.txt +0 -0
{xradio-0.0.48.dist-info → xradio-0.0.50.dist-info}/top_level.txt +0 -0

xradio/measurement_set/_utils/_msv2/msv4_info_dicts.py CHANGED Viewed

@@ -49,28 +49,31 @@ def create_info_dicts(
         line_name = []
     info_dicts = {}
-    info_dicts["partition_info"] = {
-        # "spectral_window_id": xds.frequency.attrs["spectral_window_id"],
-        "spectral_window_name": xds.frequency.attrs["spectral_window_name"],
-        # "field_id": to_list(unique_1d(field_id)),
-        "field_name": to_list(np.unique(field_and_source_xds.field_name.values)),
-        "polarization_setup": to_list(xds.polarization.values),
-        "scan_name": to_list(np.unique(partition_info_misc_fields["scan_name"])),
-        "source_name": to_list(np.unique(field_and_source_xds.source_name.values)),
-        # "source_id": to_list(unique_1d(source_id)),
-        "intents": partition_info_misc_fields["intents"].split(","),
-        "taql": partition_info_misc_fields["taql_where"],
-        "line_name": line_name,
-    }
-    if "antenna_name" in partition_info_misc_fields:
-        info_dicts["partition_info"]["antenna_name"] = partition_info_misc_fields[
-            "antenna_name"
-        ]
+    # info_dicts["partition_info"] = {
+    #     # "spectral_window_id": xds.frequency.attrs["spectral_window_id"],
+    #     "spectral_window_name": xds.frequency.attrs["spectral_window_name"],
+    #     # "field_id": to_list(unique_1d(field_id)),
+    #     "field_name": to_list(np.unique(field_and_source_xds.field_name.values)),
+    #     "polarization_setup": to_list(xds.polarization.values),
+    #     "scan_name": to_list(np.unique(partition_info_misc_fields["scan_name"])),
+    #     "source_name": to_list(np.unique(field_and_source_xds.source_name.values)),
+    #     # "source_id": to_list(unique_1d(source_id)),
+    #     "intents": partition_info_misc_fields["intents"].split(","),
+    #     "taql": partition_info_misc_fields["taql_where"],
+    #     "line_name": line_name,
+    # }
+    # if "antenna_name" in partition_info_misc_fields:
+    #     info_dicts["partition_info"]["antenna_name"] = partition_info_misc_fields[
+    #         "antenna_name"
+    #     ]
     observation_id = check_if_consistent(
         tb_tool.getcol("OBSERVATION_ID"), "OBSERVATION_ID"
     )
     info_dicts["observation_info"] = create_observation_info(in_file, observation_id)
+    info_dicts["observation_info"]["intents"] = partition_info_misc_fields[
+        "intents"
+    ].split(",")
     processor_id = check_if_consistent(tb_tool.getcol("PROCESSOR_ID"), "PROCESSOR_ID")
     info_dicts["processor_info"] = create_processor_info(in_file, processor_id)

xradio/measurement_set/convert_msv2_to_processing_set.py CHANGED Viewed

@@ -18,6 +18,7 @@ def estimate_conversion_memory_and_cores(
     """
     Given an MSv2 and a partition_scheme to use when converting it to MSv4,
     estimates:
     - memory (in the sense of the amount expected to be enough to convert)
     - cores (in the sense of the recommended/optimal number of cores to use to convert)
@@ -36,7 +37,7 @@ def estimate_conversion_memory_and_cores(
         Partition scheme as used in the function convert_msv2_to_processing_set()
     Returns
-    ----------
+    -------
     tuple
         estimated maximum memory required for one partition,
         maximum number of cores it makes sense to use (number of partitions),
@@ -62,7 +63,7 @@ def convert_msv2_to_processing_set(
     use_table_iter: bool = False,
     compressor: numcodecs.abc.Codec = numcodecs.Zstd(level=2),
     storage_backend: str = "zarr",
-    parallel: bool = False,
+    parallel_mode: str = "none",
     overwrite: bool = False,
 ):
     """Convert a Measurement Set v2 into a Processing Set of Measurement Set v4.
@@ -99,14 +100,45 @@ def convert_msv2_to_processing_set(
         The Blosc compressor to use when saving the converted data to disk using Zarr, by default numcodecs.Zstd(level=2).
     storage_backend : {"zarr", "netcdf"}, optional
         The on-disk format to use. "netcdf" is not yet implemented.
-    parallel : bool, optional
-        Makes use of Dask to execute conversion in parallel, by default False.
+    parallel_mode : {"none", "partition", "time"}, optional
+        Choose whether to use Dask to execute conversion in parallel, by default "none" and conversion occurs serially.
+        The option "partition", parallelises the conversion over partitions specified by `partition_scheme`. The option "time" can only be used for phased array interferometers where there are no partitions
+        in the MS v2; instead the MS v2 is parallelised along the time dimension and can be controlled by `main_chunksize`.
     overwrite : bool, optional
         Whether to overwrite an existing processing set, by default False.
     """
+    # Create empty data tree
+    import xarray as xr
+    ps_dt = xr.DataTree()
+    if not str(out_file).endswith("ps.zarr"):
+        out_file += ".ps.zarr"
+    print("Output file: ", out_file)
+    if overwrite:
+        ps_dt.to_zarr(store=out_file, mode="w")
+    else:
+        ps_dt.to_zarr(store=out_file, mode="w-")
+    # Check `parallel_mode` is valid
+    try:
+        assert parallel_mode in ["none", "partition", "time"]
+    except AssertionError:
+        logger.warning(
+            f"`parallel_mode` {parallel_mode} not recognosed. Defauling to 'none'."
+        )
+        parallel_mode = "none"
     partitions = create_partitions(in_file, partition_scheme=partition_scheme)
     logger.info("Number of partitions: " + str(len(partitions)))
+    if parallel_mode == "time":
+        assert (
+            len(partitions) == 1
+        ), "MS v2 contains more than one partition. `parallel_mode = 'time'` not valid."
     delayed_list = []
     for ms_v4_id, partition_info in enumerate(partitions):
@@ -132,7 +164,7 @@ def convert_msv2_to_processing_set(
         # prepend '0' to ms_v4_id as needed
         ms_v4_id = f"{ms_v4_id:0>{len(str(len(partitions) - 1))}}"
-        if parallel:
+        if parallel_mode == "partition":
             delayed_list.append(
                 dask.delayed(convert_and_write_partition)(
                     in_file,
@@ -149,6 +181,7 @@ def convert_msv2_to_processing_set(
                     phase_cal_interpolate=phase_cal_interpolate,
                     sys_cal_interpolate=sys_cal_interpolate,
                     compressor=compressor,
+                    parallel_mode=parallel_mode,
                     overwrite=overwrite,
                 )
             )
@@ -168,8 +201,15 @@ def convert_msv2_to_processing_set(
                 phase_cal_interpolate=phase_cal_interpolate,
                 sys_cal_interpolate=sys_cal_interpolate,
                 compressor=compressor,
+                parallel_mode=parallel_mode,
                 overwrite=overwrite,
             )
-    if parallel:
+    if parallel_mode == "partition":
         dask.compute(delayed_list)
+    import zarr
+    root_group = zarr.open(out_file, mode="r+")  # Open in read/write mode
+    root_group.attrs["type"] = "processing_set"  # Replace
+    zarr.convenience.consolidate_metadata(root_group.store)

xradio/measurement_set/load_processing_set.py CHANGED Viewed

@@ -1,80 +1,115 @@
 import os
-from xradio.measurement_set import ProcessingSet
 from typing import Dict, Union
+import dask
+import xarray as xr
+import s3fs
 def load_processing_set(
     ps_store: str,
-    sel_parms: dict,
-    data_variables: Union[list, None] = None,
+    sel_parms: dict = None,
+    data_group_name: str = None,
+    include_variables: Union[list, None] = None,
+    drop_variables: Union[list, None] = None,
     load_sub_datasets: bool = True,
-) -> ProcessingSet:
+) -> xr.DataTree:
     """Loads a processing set into memory.
     Parameters
     ----------
     ps_store : str
         String of the path and name of the processing set. For example '/users/user_1/uid___A002_Xf07bba_Xbe5c_target.lsrk.vis.zarr' for a file stored on a local file system, or 's3://viper-test-data/Antennae_North.cal.lsrk.split.vis.zarr/' for a file in AWS object storage.
-    sel_parms : dict
-        A dictionary where the keys are the names of the ms_xds's and the values are slice_dicts.
+    sel_parms : dict, optional
+        A dictionary where the keys are the names of the ms_xdt's (measurement set xarray data trees) and the values are slice_dicts.
         slice_dicts: A dictionary where the keys are the dimension names and the values are slices.
         For example::
             {
                 'ms_v4_name_1': {'frequency': slice(0, 160, None),'time':slice(0,100)},
                 ...
                 'ms_v4_name_n': {'frequency': slice(0, 160, None),'time':slice(0,100)},
             }
-    data_variables : Union[list, None], optional
+        By default None, which loads all ms_xdts.
+    data_group_name : str, optional
+        The name of the data group to select. By default None, which loads all data groups.
+    include_variables : Union[list, None], optional
         The list of data variables to load into memory for example ['VISIBILITY', 'WEIGHT, 'FLAGS']. By default None which will load all data variables into memory.
+    drop_variables : Union[list, None], optional
+        The list of data variables to drop from memory for example ['VISIBILITY', 'WEIGHT, 'FLAGS']. By default None which will not drop any data variables from memory.
     load_sub_datasets : bool, optional
         If true sub-datasets (for example weather_xds, antenna_xds, pointing_xds, system_calibration_xds ...) will be loaded into memory, by default True.
     Returns
     -------
-    ProcessingSet
-        In memory representation of processing set (data is represented by Dask.arrays).
+    xarray.DataTree
+        In memory representation of processing set using xr.DataTree.
     """
-    from xradio._utils.zarr.common import _open_dataset, _get_file_system_and_items
-    from xradio.measurement_set import MeasurementSetXds
+    from xradio._utils.zarr.common import _get_file_system_and_items
     file_system, ms_store_list = _get_file_system_and_items(ps_store)
-    ps = ProcessingSet()
-    for ms_name, ms_xds_isel in sel_parms.items():
-        ms_store = os.path.join(ps_store, ms_name)
-        correlated_store = os.path.join(ms_store, "correlated_xds")
-        xds = _open_dataset(
-            correlated_store,
-            file_system,
-            ms_xds_isel,
-            data_variables,
-            load=True,
-        )
-        data_groups = xds.attrs["data_groups"]
-        if load_sub_datasets:
-            from xradio.measurement_set.open_processing_set import _open_sub_xds
-            sub_xds_dict, field_and_source_xds_dict = _open_sub_xds(
-                ms_store, file_system=file_system, load=True, data_groups=data_groups
+    with dask.config.set(
+        scheduler="synchronous"
+    ):  # serial scheduler, critical so that this can be used within delayed functions.
+        ps_xdt = xr.DataTree()
+        if sel_parms:
+            for ms_name, ms_xds_isel in sel_parms.items():
+                ms_store = os.path.join(ps_store, ms_name)
+                if isinstance(file_system, s3fs.core.S3FileSystem):
+                    ms_store = s3fs.S3Map(root=ps_store, s3=file_system, check=False)
+                if ms_xds_isel:
+                    ms_xdt = (
+                        xr.open_datatree(
+                            ms_store, engine="zarr", drop_variables=drop_variables
+                        )
+                        .isel(ms_xds_isel)
+                        .xr_ms.sel(data_group_name=data_group_name)
+                    )
+                else:
+                    ms_xdt = xr.open_datatree(
+                        ms_store, engine="zarr", drop_variables=drop_variables
+                    ).xr_ms.sel(data_group_name=data_group_name)
+                if include_variables is not None:
+                    for data_vars in ms_xdt.ds.data_vars:
+                        if data_vars not in include_variables:
+                            ms_xdt.ds = ms_xdt.ds.drop_vars(data_vars)
+                ps_xdt[ms_name] = ms_xdt
+            ps_xdt.attrs["type"] = "processing_set"
+        else:
+            ps_xdt = xr.open_datatree(
+                ps_store, engine="zarr", drop_variables=drop_variables
             )
-            xds.attrs = {
-                **xds.attrs,
-                **sub_xds_dict,
-            }
-            for data_group_name, data_group_vals in data_groups.items():
+            if (include_variables is not None) or data_group_name:
+                for ms_name, ms_xdt in ps_xdt.items():
+                    ms_xdt = ms_xdt.xr_ms.sel(data_group_name=data_group_name)
+                    if include_variables is not None:
+                        for data_vars in ms_xdt.ds.data_vars:
+                            if data_vars not in include_variables:
+                                ms_xdt.ds = ms_xdt.ds.drop_vars(data_vars)
+                    ps_xdt[ms_name] = ms_xdt
-                xds[data_group_vals["correlated_data"]].attrs[
-                    "field_and_source_xds"
-                ] = field_and_source_xds_dict[data_group_name]
+        if not load_sub_datasets:
+            for ms_xdt in ps_xdt.children.values():
+                ms_xdt_names = list(ms_xdt.keys())
+                for sub_xds_name in ms_xdt_names:
+                    if "xds" in sub_xds_name:
+                        del ms_xdt[sub_xds_name]
-        ps[ms_name] = MeasurementSetXds(xds)
+        ps_xdt = ps_xdt.load()
-    return ps
+    return ps_xdt
 class ProcessingSetIterator:
@@ -82,8 +117,10 @@ class ProcessingSetIterator:
         self,
         sel_parms: dict,
         input_data_store: str,
-        input_data: Union[Dict, ProcessingSet, None] = None,
-        data_variables: list = None,
+        input_data: Union[Dict, xr.DataTree, None] = None,
+        data_group_name: str = None,
+        include_variables: Union[list, None] = None,
+        drop_variables: Union[list, None] = None,
         load_sub_datasets: bool = True,
     ):
         """An iterator that will go through a processing set one MS v4 at a time.
@@ -102,10 +139,16 @@ class ProcessingSetIterator:
                 }
         input_data_store : str
             String of the path and name of the processing set. For example '/users/user_1/uid___A002_Xf07bba_Xbe5c_target.lsrk.vis.zarr'.
-        input_data : Union[Dict, processing_set, None], optional
+        input_data : Union[Dict, xr.DataTree, None], optional
             If the processing set is in memory already it can be supplied here. By default None which will make the iterator load data using the supplied input_data_store.
-        data_variables : list, optional
+        data_group_name : str, optional
+            The name of the data group to select. By default None, which loads all data groups.
+        data_group_name : str, optional
+            The name of the data group to select. By default None, which loads all data groups.
+        include_variables : Union[list, None], optional
             The list of data variables to load into memory for example ['VISIBILITY', 'WEIGHT, 'FLAGS']. By default None which will load all data variables into memory.
+        drop_variables : Union[list, None], optional
+            The list of data variables to drop from memory for example ['VISIBILITY', 'WEIGHT, 'FLAGS']. By default None which will not drop any data variables from memory.
         load_sub_datasets : bool, optional
             If true sub-datasets (for example weather_xds, antenna_xds, pointing_xds, system_calibration_xds ...) will be loaded into memory, by default True.
         """
@@ -114,7 +157,9 @@ class ProcessingSetIterator:
         self.input_data_store = input_data_store
         self.sel_parms = sel_parms
         self.xds_name_iter = iter(sel_parms.keys())
-        self.data_variables = data_variables
+        self.data_group_name = data_group_name
+        self.include_variables = include_variables
+        self.drop_variables = drop_variables
         self.load_sub_datasets = load_sub_datasets
     def __iter__(self):
@@ -122,20 +167,22 @@ class ProcessingSetIterator:
     def __next__(self):
         try:
-            xds_name = next(self.xds_name_iter)
+            sub_xds_name = next(self.xds_name_iter)
         except Exception as e:
             raise StopIteration
         if self.input_data is None:
-            slice_description = self.sel_parms[xds_name]
-            ps = load_processing_set(
+            slice_description = self.sel_parms[sub_xds_name]
+            ps_xdt = load_processing_set(
                 ps_store=self.input_data_store,
-                sel_parms={xds_name: slice_description},
-                data_variables=self.data_variables,
+                sel_parms={sub_xds_name: slice_description},
+                data_group_name=self.data_group_name,
+                include_variables=self.include_variables,
+                drop_variables=self.drop_variables,
                 load_sub_datasets=self.load_sub_datasets,
             )
-            xds = ps.get(0)
+            sub_xdt = ps_xdt.get(0)
         else:
-            xds = self.input_data[xds_name]  # In memory
+            sub_xdt = self.input_data[sub_xds_name]  # In memory
-        return xds
+        return sub_xdt

xradio 0.0.48__py3-none-any.whl → 0.0.50__py3-none-any.whl

xradio 0.0.48py3-none-any.whl → 0.0.50py3-none-any.whl