PyPI - xradio - Versions diffs - 0.0.27__py3-none-any.whl → 0.0.29__py3-none-any.whl - Mend

xradio 0.0.27py3-none-any.whl → 0.0.29py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (51) hide show

xradio/__init__.py +5 -4
xradio/_utils/array.py +90 -0
xradio/_utils/zarr/common.py +48 -3
xradio/image/_util/_fits/xds_from_fits.py +10 -5
xradio/image/_util/_zarr/zarr_low_level.py +27 -24
xradio/image/_util/common.py +4 -1
xradio/image/_util/zarr.py +4 -1
xradio/schema/__init__.py +24 -6
xradio/schema/bases.py +440 -2
xradio/schema/check.py +96 -55
xradio/schema/dataclass.py +123 -27
xradio/schema/metamodel.py +21 -4
xradio/schema/typing.py +33 -18
xradio/vis/__init__.py +5 -2
xradio/vis/_processing_set.py +30 -9
xradio/vis/_vis_utils/_ms/_tables/create_field_and_source_xds.py +710 -0
xradio/vis/_vis_utils/_ms/_tables/load.py +23 -10
xradio/vis/_vis_utils/_ms/_tables/load_main_table.py +145 -64
xradio/vis/_vis_utils/_ms/_tables/read.py +782 -156
xradio/vis/_vis_utils/_ms/_tables/read_main_table.py +176 -45
xradio/vis/_vis_utils/_ms/_tables/read_subtables.py +79 -28
xradio/vis/_vis_utils/_ms/_tables/write.py +102 -45
xradio/vis/_vis_utils/_ms/_tables/write_exp_api.py +127 -65
xradio/vis/_vis_utils/_ms/chunks.py +58 -21
xradio/vis/_vis_utils/_ms/conversion.py +536 -67
xradio/vis/_vis_utils/_ms/descr.py +52 -20
xradio/vis/_vis_utils/_ms/msv2_to_msv4_meta.py +70 -35
xradio/vis/_vis_utils/_ms/msv4_infos.py +0 -59
xradio/vis/_vis_utils/_ms/msv4_sub_xdss.py +76 -9
xradio/vis/_vis_utils/_ms/optimised_functions.py +0 -46
xradio/vis/_vis_utils/_ms/partition_queries.py +308 -119
xradio/vis/_vis_utils/_ms/partitions.py +82 -25
xradio/vis/_vis_utils/_ms/subtables.py +32 -14
xradio/vis/_vis_utils/_utils/partition_attrs.py +30 -11
xradio/vis/_vis_utils/_utils/xds_helper.py +136 -45
xradio/vis/_vis_utils/_zarr/read.py +60 -22
xradio/vis/_vis_utils/_zarr/write.py +83 -9
xradio/vis/_vis_utils/ms.py +48 -29
xradio/vis/_vis_utils/zarr.py +44 -20
xradio/vis/convert_msv2_to_processing_set.py +106 -32
xradio/vis/load_processing_set.py +38 -61
xradio/vis/read_processing_set.py +62 -96
xradio/vis/schema.py +687 -0
xradio/vis/vis_io.py +75 -43
{xradio-0.0.27.dist-info → xradio-0.0.29.dist-info}/LICENSE.txt +6 -1
{xradio-0.0.27.dist-info → xradio-0.0.29.dist-info}/METADATA +10 -5
xradio-0.0.29.dist-info/RECORD +73 -0
{xradio-0.0.27.dist-info → xradio-0.0.29.dist-info}/WHEEL +1 -1
xradio/vis/model.py +0 -497
xradio-0.0.27.dist-info/RECORD +0 -71
{xradio-0.0.27.dist-info → xradio-0.0.29.dist-info}/top_level.txt +0 -0

xradio/vis/_vis_utils/ms.py CHANGED Viewed

@@ -24,7 +24,8 @@ def read_ms(
     expand: bool = False,
     **kwargs: str,
 ) -> CASAVisSet:
-    """Read a MeasurementSet (MSv2 format) into a next generation CASA
+    """
+    Read a MeasurementSet (MSv2 format) into a next generation CASA
     dataset (visibilities dataset as a set of Xarray datasets).
     The MS is partitioned into multiple sub- Xarray datasets (where the data variables are read as
@@ -33,28 +34,37 @@ def read_ms(
     and polarizations) and, subject to experimentation, by scan and subscan. This results in multiple
     partitions as xarray datasets (xds) contained within a main xds (mxds).
-    :param infile: Input MS filename
-    :param subtables: Also read and include subtables along with main table selection. Default False will
-    omit subtables (faster)
-    :param asdm_subtables: in addition to MeasurementSet subtables (if enabled), also read extension
-    subtables named "ASDM_*"
-    :param partition_scheme: (experimenting) Whether to partition sub-xds datasets by scan/subscan
-    (in addition to DDI), or other alternative partitioning schemes. Accepted values: 'scan/subscan',
-    'scan', 'ddi', 'intent'. Default: 'intent'
-    :param chunks: Can be used to set a specific chunk shape (with a tuple of ints), or to control the
-    optimization used for automatic chunking (with a list of ints). A tuple of ints in the form of (row,
-    chan, pol) will use a fixed chunk shape. A list or numpy array of ints in the form of [idx1, etc]
-    will trigger auto-chunking optimized for the given indices, with row=0, chan=1, pol=2. Default None
-    uses auto-chunking with a best fit across all dimensions (probably sub-optimal for most cases).
-    :param expand: (to be removed) Whether or not to return the original flat row structure of the MS (False)
-    or expand the rows to time x baseline dimensions (True). Expanding the rows allows for easier indexing
-    and parallelization across time and baseline dimensions, at the cost of some conversion time. Default
-    False
-    :param **kwargs: (to be removed?) Selection parameters from the standard way of making CASA MS
-    selections. Supported keys are: spw, field, scan, baseline, time, scanintent, uvdist, polarization,
-    array, observation.  Values are strings.
-    :return: Main xarray dataset of datasets for this visibility dataset
+    Parameters
+    ----------
+    infile : str
+        Input MS filename
+    subtables : bool (Default value = True)
+        Also read and include subtables along with main table selection. Default False will
+        omit subtables (faster)
+    asdm_subtables : bool (Default value = False)
+        in addition to MeasurementSet subtables (if enabled), also read extension
+        subtables named "ASDM_*"
+    partition_scheme : str (Default value = "intent")
+        experimenting) Whether to partition sub-xds datasets by scan/subscan
+        (in addition to DDI), or other alternative partitioning schemes. Accepted values: 'scan/subscan',
+        'scan', 'ddi', 'intent'. Default: 'intent'
+    chunks : Union[Tuple[int], List[int]] (Default value = None)
+        Can be used to set a specific chunk shape (with a tuple of ints), or to control the
+        optimization used for automatic chunking (with a list of ints). A tuple of ints in the form of (row,
+        chan, pol) will use a fixed chunk shape. A list or numpy array of ints in the form of [idx1, etc]
+        will trigger auto-chunking optimized for the given indices, with row=0, chan=1, pol=2. Default None
+        uses auto-chunking with a best fit across all dimensions (probably sub-optimal for most cases).
+    expand : bool (Default value = False)
+        to be removed) Whether or not to return the original flat row structure of the MS (False)
+        or expand the rows to time x baseline dimensions (True). Expanding the rows allows for easier indexing
+        and parallelization across time and baseline dimensions, at the cost of some conversion time.
+    **kwargs: str :
+    Returns
+    -------
+    CASAVisSet
+        Main xarray dataset of datasets for this visibility dataset
     """
     infile = os.path.expanduser(infile)
@@ -104,14 +114,23 @@ def load_vis_chunk(
     block_des: Dict[str, slice],
     partition_key: Tuple[int, int, str],
 ) -> Dict[Tuple[int, int], xr.Dataset]:
-    """Read a chunk of a MeasurementSet (MSv2 format) into an Xarray
+    """
+    Read a chunk of a MeasurementSet (MSv2 format) into an Xarray
     dataset, loading the data in memory.
-    :param infile: Input MS filename
-    :param block_des: specification of chunk to load
-    :return: Xarray datasets with chunk of visibility data, one per DDI
-    (spw_id, pol_setup_id pair)
+    Parameters
+    ----------
+    infile : str
+        Input MS filename
+    block_des : Dict[str, slice]
+        specification of chunk to load
+    partition_key: partition_key: Tuple[int, int, str]
+    Returns
+    -------
+    Dict[Tuple[int, int], xr.Dataset]
+        Xarray datasets with chunk of visibility data, one per DDI
+        (spw_id, pol_setup_id pair)
     """
     infile = os.path.expanduser(infile)

xradio/vis/_vis_utils/zarr.py CHANGED Viewed

@@ -11,13 +11,19 @@ from ._zarr.read import read_part_keys, read_partitions, read_subtables
 from ._zarr.write import write_metainfo, write_part_keys, write_partitions
-def is_zarr_vis(inpath) -> bool:
+def is_zarr_vis(inpath: str) -> bool:
     """
     Check if a given path has a visibilities dataset in Zarr format
-    :param inpath: path to a (possibly) Zarr vis dataset
+    Parameters
+    ----------
+    inpath : str
+        path to a (possibly) Zarr vis dataset
-    :return: whether zarr.open can open this path
+    Returns
+    -------
+    bool
+        whether zarr.open can open this path
     """
     try:
         with zarr.open(Path(inpath, "partition_keys"), mode="r"):
@@ -35,11 +41,19 @@ def read_vis(
     """
     Read a CASAVisSet stored in zarr format.
-    :param inpath: Input Zarr path
-    :param subtables: Also read and (metainformation) subtables along with main visibilities data.
-    :param asdm_subtables: Also read extension subtables named "ASDM_*"
-    :return: Main xarray dataset of datasets for this visibility dataset
+    Parameters
+    ----------
+    inpath : str
+        Input Zarr path
+    subtables : bool (Default value = True)
+        Also read and (metainformation) subtables along with main visibilities data.
+    asdm_subtables : bool (Default value = False)
+        Also read extension subtables named "ASDM_*"
+    Returns
+    -------
+    CASAVisSet
+        Main xarray dataset of datasets for this visibility dataset
     """
     inpath = os.path.expanduser(inpath)
     if not os.path.isdir(inpath):
@@ -59,7 +73,7 @@ def read_vis(
     all_time = time.time() - all_start
     logger.info(f"Time to read dataset from_zarr {inpath}: {all_time}")
-    vers = xradio.__version__
+    vers = "version-WIP"
     descr_add = "read_vis from zarr"
     cds = CASAVisSet(
         metainfo=metainfo,
@@ -71,25 +85,35 @@ def read_vis(
 def write_vis(
-    cds,
+    cds: CASAVisSet,
     outpath: str,
     chunks_on_disk: Union[Dict, None] = None,
     compressor: Union[numcodecs.abc.Codec, None] = None,
 ) -> None:
-    """Write CASA vis dataset to zarr format on disk. When
+    """
+    Write CASA vis dataset to zarr format on disk. When
     chunks_on_disk is not specified the chunking in the input dataset
     is used. When chunks_on_disk is specified that dataset is saved
     using that chunking.
-    :param cds: CASA visibilities dataset to write to disk
-    :param outpath: output path, generally ends in .zarr
-    :param chunks_on_disk: a dictionary with the chunk size that will
-    be used when writing to disk. For example {'time': 20, 'chan': 6}.
-    If chunks_on_disk is not specified the chunking of dataset will
-    be used.
-    :param compressor: the blosc compressor to use when saving the
-    converted data to disk using zarr. If None the zstd compression
-    algorithm used with compression level 2.
+    Parameters
+    ----------
+    cds : CASAVisSet
+        CASA visibilities dataset to write to disk
+    outpath : str
+        output path, generally ends in .zarr
+    chunks_on_disk : Union[Dict, None] = None (Default value = None)
+        a dictionary with the chunk size that will
+        be used when writing to disk. For example {'time': 20, 'chan': 6}.
+        If chunks_on_disk is not specified the chunking of dataset will
+        be used.
+    compressor : Union[numcodecs.abc.Codec, None] (Default value = None)
+        the blosc compressor to use when saving the
+        converted data to disk using zarr. If None the zstd compression
+        algorithm used with compression level 2.
+    Returns
+    -------
     """
     if compressor is None:

xradio/vis/convert_msv2_to_processing_set.py CHANGED Viewed

@@ -4,10 +4,7 @@ from typing import Dict, Union
 import dask
-from xradio.vis._vis_utils._ms.msv2_msv3 import ignore_msv2_cols
-from xradio.vis._vis_utils._ms.partition_queries import (
-    create_partition_enumerated_product,
-)
+from xradio.vis._vis_utils._ms.partition_queries import create_partitions
 from xradio.vis._vis_utils._ms.conversion import convert_and_write_partition
@@ -15,8 +12,11 @@ def convert_msv2_to_processing_set(
     in_file: str,
     out_file: str,
     partition_scheme: {"ddi_intent_field", "ddi_state_field"} = "ddi_intent_field",
-    main_chunksize: Union[Dict, str, None] = None,
-    pointing_chunksize: Union[Dict, str, None] = None,
+    main_chunksize: Union[Dict, float, None] = None,
+    with_pointing: bool = True,
+    pointing_chunksize: Union[Dict, float, None] = None,
+    pointing_interpolate: bool = False,
+    ephemeris_interpolate: bool = False,
     compressor: numcodecs.abc.Codec = numcodecs.Zstd(level=2),
     storage_backend="zarr",
     parallel: bool = False,
@@ -34,10 +34,16 @@ def convert_msv2_to_processing_set(
         A MS v4 can only contain a single spectral window, polarization setup, intent, and field. Consequently, the MS v2 is partitioned when converting to MS v4.
         The partition_scheme "ddi_intent_field" gives the largest partition that meets the MS v4 specification. The partition_scheme "ddi_state_field" gives a finer granularity where the data is also partitioned by state (the state partitioning will ensure a single intent).
         By default, "ddi_intent_field".
-    main_chunksize : Union[Dict, str, None], optional
-        A dictionary that defines the chunk size of the main dataset. Acceptable keys are "time", "baseline", "antenna", "frequency", "polarization". By default, None.
-    pointing_chunksize : Union[Dict, str, None], optional
-        A dictionary that defines the chunk size of the pointing dataset. Acceptable keys are "time", "antenna", "polarization". By default, None.
+    main_chunksize : Union[Dict, float, None], optional
+        Defines the chunk size of the main dataset. If given as a dictionary, defines the sizes of several dimensions, and acceptable keys are "time", "baseline_id", "antenna_id", "frequency", "polarization". If given as a float, gives the size of a chunk in GiB. By default, None.
+    with_pointing : bool, optional
+        Whether to convert the POINTING subtable into pointing sub-datasets
+    pointing_chunksize : Union[Dict, float, None], optional
+        Defines the chunk size of the pointing dataset. If given as a dictionary, defines the sizes of several dimensions, acceptable keys are "time" and "antenna_id". If given as a float, defines the size of a chunk in GiB. By default, None.
+    pointing_interpolate : bool, optional
+        Whether to interpolate the time axis of the pointing sub-dataset to the time axis of the main dataset
+    ephemeris_interpolate : bool, optional
+        Whether to interpolate the time axis of the ephemeris data variables (of the field_and_source sub-dataset) to the time axis of the main dataset
     compressor : numcodecs.abc.Codec, optional
         The Blosc compressor to use when saving the converted data to disk using Zarr, by default numcodecs.Zstd(level=2).
     storage_backend : {"zarr", "netcdf"}, optional
@@ -48,33 +54,36 @@ def convert_msv2_to_processing_set(
         Whether to overwrite an existing processing set, by default False.
     """
-    partition_enumerated_product, intents = create_partition_enumerated_product(
-        in_file, partition_scheme
-    )
+    partitions = create_partitions(in_file, partition_scheme=partition_scheme)
+    logger.info("Number of partitions: " + str(len(partitions)))
     delayed_list = []
-    for idx, pair in partition_enumerated_product:
-        ddi, state_id, field_id = pair
+    ms_v4_id = 0
+    for partition_info in partitions:
         logger.debug(
-            "DDI " + str(ddi) + ", STATE " + str(state_id) + ", FIELD " + str(field_id)
+            "DDI "
+            + str(partition_info["DATA_DESC_ID"])
+            + ", STATE "
+            + str(partition_info["STATE_ID"])
+            + ", FIELD "
+            + str(partition_info["FIELD_ID"])
+            + ", SCAN "
+            + str(partition_info["SCAN_NUMBER"])
         )
-        if partition_scheme == "ddi_intent_field":
-            intent = intents[idx[1]]
-        else:
-            intent = intents[idx[1]] + "_" + str(state_id)
         if parallel:
             delayed_list.append(
                 dask.delayed(convert_and_write_partition)(
                     in_file,
                     out_file,
-                    intent,
-                    ddi,
-                    state_id,
-                    field_id,
-                    ignore_msv2_cols=ignore_msv2_cols,
+                    ms_v4_id,
+                    partition_info=partition_info,
+                    partition_scheme=partition_scheme,
                     main_chunksize=main_chunksize,
+                    with_pointing=with_pointing,
+                    pointing_chunksize=pointing_chunksize,
+                    pointing_interpolate=pointing_interpolate,
+                    ephemeris_interpolate=ephemeris_interpolate,
                     compressor=compressor,
                     overwrite=overwrite,
                 )
@@ -83,16 +92,81 @@ def convert_msv2_to_processing_set(
             convert_and_write_partition(
                 in_file,
                 out_file,
-                intent,
-                ddi,
-                state_id,
-                field_id,
-                ignore_msv2_cols=ignore_msv2_cols,
+                ms_v4_id,
+                partition_info=partition_info,
+                partition_scheme=partition_scheme,
                 main_chunksize=main_chunksize,
+                with_pointing=with_pointing,
+                pointing_chunksize=pointing_chunksize,
+                pointing_interpolate=pointing_interpolate,
+                ephemeris_interpolate=ephemeris_interpolate,
                 compressor=compressor,
-                storage_backend=storage_backend,
                 overwrite=overwrite,
             )
+        ms_v4_id = ms_v4_id + 1
     if parallel:
         dask.compute(delayed_list)
+    # delayed_list = []
+    # ms_v4_id = 0
+    # for idx, pair in partition_enumerated_product:
+    #     ddi, state_id, field_id, scan_id = pair
+    #     # logger.debug(
+    #     #     "DDI " + str(ddi) + ", STATE " + str(state_id) + ", FIELD " + str(field_id) + ", SCAN " + str(scan_id)
+    #     # )
+    #     # if scan_id == 67: #67
+    #     #     logger.debug(
+    #     #     "DDI " + str(ddi) + ", STATE " + str(state_id) + ", FIELD " + str(field_id) + ", SCAN " + str(scan_id)
+    #     #     )
+    #     if partition_scheme == "ddi_intent_field":
+    #         intent = intents[idx[1]]
+    #     else:
+    #         intent = intents[idx[1]] + "_" + str(state_id)
+    #     if parallel:
+    #         delayed_list.append(
+    #             dask.delayed(convert_and_write_partition)(
+    #                 in_file,
+    #                 out_file,
+    #                 intent,
+    #                 ms_v4_id,
+    #                 ddi,
+    #                 state_id,
+    #                 field_id,
+    #                 scan_id,
+    #                 partition_scheme,
+    #                 main_chunksize=main_chunksize,
+    #                 with_pointing=with_pointing,
+    #                 pointing_chunksize=pointing_chunksize,
+    #                 pointing_interpolate=pointing_interpolate,
+    #                 ephemeris_interpolate=ephemeris_interpolate,
+    #                 compressor=compressor,
+    #                 overwrite=overwrite,
+    #             )
+    #         )
+    #     else:
+    #         convert_and_write_partition(
+    #             in_file,
+    #             out_file,
+    #             intent,
+    #             ms_v4_id,
+    #             ddi,
+    #             state_id,
+    #             field_id,
+    #             scan_id,
+    #             partition_scheme,
+    #             main_chunksize=main_chunksize,
+    #             with_pointing=with_pointing,
+    #             pointing_chunksize=pointing_chunksize,
+    #             pointing_interpolate=pointing_interpolate,
+    #             ephemeris_interpolate=ephemeris_interpolate,
+    #             compressor=compressor,
+    #             storage_backend=storage_backend,
+    #             overwrite=overwrite,
+    #         )
+    #     ms_v4_id = ms_v4_id + 1
+    # if parallel:
+    #     dask.compute(delayed_list)

xradio/vis/load_processing_set.py CHANGED Viewed

@@ -39,74 +39,51 @@ def load_processing_set(
     processing_set
         In memory representation of processing set (data is represented by Dask.arrays).
     """
-    from xradio._utils.zarr.common import _open_dataset
-    import s3fs
-    from botocore.exceptions import NoCredentialsError
+    from xradio._utils.zarr.common import _open_dataset, _get_ms_stores_and_file_system
+    file_system, ms_store_list = _get_ms_stores_and_file_system(ps_store)
-    s3 = None
     ps = processing_set()
-    for ms_dir_name, ms_xds_isel in sel_parms.items():
-        # before the _open_dataset call, check if dealing with an S3 bucket URL
-        if ps_store.startswith("s3"):
-            if not ps_store.endswith("/"):
-                # just for consistency, as there is no os.path equivalent in s3fs
-                ps_store = ps_store + "/"
-            try:
-                s3 = s3fs.S3FileSystem(anon=False, requester_pays=False)
-                main_xds = ps_store + ms_dir_name + "/MAIN"
-                xds = _open_dataset(
-                    main_xds, ms_xds_isel, data_variables, load=True, s3=s3
-                )
-                if load_sub_datasets:
-                    from xradio.vis.read_processing_set import _read_sub_xds
-                    xds.attrs = {
-                        **xds.attrs,
-                        **_read_sub_xds(
-                            os.path.join(ps_store, ms_dir_name), load=True, s3=s3
-                        ),
-                    }
-            except (NoCredentialsError, PermissionError) as e:
-                # only public, read-only buckets will be accessible
-                s3 = s3fs.S3FileSystem(anon=True)
-                main_xds = ps_store + ms_dir_name + "/MAIN"
-                xds = _open_dataset(
-                    main_xds, ms_xds_isel, data_variables, load=True, s3=s3
-                )
-                if load_sub_datasets:
-                    from xradio.vis.read_processing_set import _read_sub_xds
-                    xds.attrs = {
-                        **xds.attrs,
-                        **_read_sub_xds(
-                            os.path.join(ps_store, ms_dir_name), load=True, s3=s3
-                        ),
-                    }
-        else:
-            # fall back to the default case of assuming the files are on local disk
-            main_xds = os.path.join(ps_store, ms_dir_name, "MAIN")
-            xds = _open_dataset(main_xds, ms_xds_isel, data_variables, load=True)
-            if load_sub_datasets:
-                from xradio.vis.read_processing_set import _read_sub_xds
-                xds.attrs = {
-                    **xds.attrs,
-                    **_read_sub_xds(os.path.join(ps_store, ms_dir_name), load=True),
-                }
+    for ms_name, ms_xds_isel in sel_parms.items():
+        ms_store = os.path.join(ps_store, ms_name)
+        ms_main_store = os.path.join(ms_store, "MAIN")
+        xds = _open_dataset(
+            ms_main_store,
+            file_system,
+            ms_xds_isel,
+            data_variables,
+            load=True,
+        )
+        data_groups = xds.attrs["data_groups"]
+        if load_sub_datasets:
+            from xradio.vis.read_processing_set import _read_sub_xds
+            sub_xds_dict, field_and_source_xds_dict = _read_sub_xds(
+                ms_store, file_system=file_system, load=True, data_groups=data_groups
+            )
+            xds.attrs = {
+                **xds.attrs,
+                **sub_xds_dict,
+            }
+            for data_group_name, data_group_vals in data_groups.items():
+                if "visibility" in data_group_vals:
+                    xds[data_group_vals["visibility"]].attrs["field_and_source_xds"] = (
+                        field_and_source_xds_dict[data_group_name]
+                    )
+                elif "spectrum" in data_group_vals:
+                    xds[data_group_vals["spectrum"]].attrs["field_and_source_xds"] = (
+                        field_and_source_xds_dict[data_group_name]
+                    )
+        ps[ms_name] = xds
-        ps[ms_dir_name] = xds
     return ps
 class processing_set_iterator:
     def __init__(
         self,
         sel_parms: dict,

xradio 0.0.27__py3-none-any.whl → 0.0.29__py3-none-any.whl

xradio 0.0.27py3-none-any.whl → 0.0.29py3-none-any.whl