xradio 0.0.28__py3-none-any.whl → 0.0.30__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. xradio/__init__.py +5 -4
  2. xradio/_utils/array.py +90 -0
  3. xradio/_utils/zarr/common.py +48 -3
  4. xradio/image/_util/zarr.py +4 -1
  5. xradio/schema/__init__.py +24 -6
  6. xradio/schema/bases.py +440 -2
  7. xradio/schema/check.py +96 -55
  8. xradio/schema/dataclass.py +123 -27
  9. xradio/schema/metamodel.py +21 -4
  10. xradio/schema/typing.py +33 -18
  11. xradio/vis/__init__.py +5 -2
  12. xradio/vis/_processing_set.py +71 -32
  13. xradio/vis/_vis_utils/_ms/_tables/create_field_and_source_xds.py +710 -0
  14. xradio/vis/_vis_utils/_ms/_tables/load.py +23 -10
  15. xradio/vis/_vis_utils/_ms/_tables/load_main_table.py +145 -64
  16. xradio/vis/_vis_utils/_ms/_tables/read.py +747 -172
  17. xradio/vis/_vis_utils/_ms/_tables/read_main_table.py +173 -44
  18. xradio/vis/_vis_utils/_ms/_tables/read_subtables.py +79 -28
  19. xradio/vis/_vis_utils/_ms/_tables/write.py +102 -45
  20. xradio/vis/_vis_utils/_ms/_tables/write_exp_api.py +127 -65
  21. xradio/vis/_vis_utils/_ms/chunks.py +58 -21
  22. xradio/vis/_vis_utils/_ms/conversion.py +582 -102
  23. xradio/vis/_vis_utils/_ms/descr.py +52 -20
  24. xradio/vis/_vis_utils/_ms/msv2_to_msv4_meta.py +72 -35
  25. xradio/vis/_vis_utils/_ms/msv4_infos.py +0 -59
  26. xradio/vis/_vis_utils/_ms/msv4_sub_xdss.py +76 -9
  27. xradio/vis/_vis_utils/_ms/optimised_functions.py +0 -46
  28. xradio/vis/_vis_utils/_ms/partition_queries.py +308 -119
  29. xradio/vis/_vis_utils/_ms/partitions.py +82 -25
  30. xradio/vis/_vis_utils/_ms/subtables.py +32 -14
  31. xradio/vis/_vis_utils/_utils/partition_attrs.py +30 -11
  32. xradio/vis/_vis_utils/_utils/xds_helper.py +136 -45
  33. xradio/vis/_vis_utils/_zarr/read.py +60 -22
  34. xradio/vis/_vis_utils/_zarr/write.py +83 -9
  35. xradio/vis/_vis_utils/ms.py +48 -29
  36. xradio/vis/_vis_utils/zarr.py +44 -20
  37. xradio/vis/convert_msv2_to_processing_set.py +43 -32
  38. xradio/vis/load_processing_set.py +38 -61
  39. xradio/vis/read_processing_set.py +64 -96
  40. xradio/vis/schema.py +687 -0
  41. xradio/vis/vis_io.py +75 -43
  42. {xradio-0.0.28.dist-info → xradio-0.0.30.dist-info}/LICENSE.txt +6 -1
  43. {xradio-0.0.28.dist-info → xradio-0.0.30.dist-info}/METADATA +10 -5
  44. xradio-0.0.30.dist-info/RECORD +73 -0
  45. {xradio-0.0.28.dist-info → xradio-0.0.30.dist-info}/WHEEL +1 -1
  46. xradio/vis/model.py +0 -497
  47. xradio-0.0.28.dist-info/RECORD +0 -71
  48. {xradio-0.0.28.dist-info → xradio-0.0.30.dist-info}/top_level.txt +0 -0
@@ -24,7 +24,8 @@ def read_ms(
24
24
  expand: bool = False,
25
25
  **kwargs: str,
26
26
  ) -> CASAVisSet:
27
- """Read a MeasurementSet (MSv2 format) into a next generation CASA
27
+ """
28
+ Read a MeasurementSet (MSv2 format) into a next generation CASA
28
29
  dataset (visibilities dataset as a set of Xarray datasets).
29
30
 
30
31
  The MS is partitioned into multiple sub- Xarray datasets (where the data variables are read as
@@ -33,28 +34,37 @@ def read_ms(
33
34
  and polarizations) and, subject to experimentation, by scan and subscan. This results in multiple
34
35
  partitions as xarray datasets (xds) contained within a main xds (mxds).
35
36
 
36
- :param infile: Input MS filename
37
- :param subtables: Also read and include subtables along with main table selection. Default False will
38
- omit subtables (faster)
39
- :param asdm_subtables: in addition to MeasurementSet subtables (if enabled), also read extension
40
- subtables named "ASDM_*"
41
- :param partition_scheme: (experimenting) Whether to partition sub-xds datasets by scan/subscan
42
- (in addition to DDI), or other alternative partitioning schemes. Accepted values: 'scan/subscan',
43
- 'scan', 'ddi', 'intent'. Default: 'intent'
44
- :param chunks: Can be used to set a specific chunk shape (with a tuple of ints), or to control the
45
- optimization used for automatic chunking (with a list of ints). A tuple of ints in the form of (row,
46
- chan, pol) will use a fixed chunk shape. A list or numpy array of ints in the form of [idx1, etc]
47
- will trigger auto-chunking optimized for the given indices, with row=0, chan=1, pol=2. Default None
48
- uses auto-chunking with a best fit across all dimensions (probably sub-optimal for most cases).
49
- :param expand: (to be removed) Whether or not to return the original flat row structure of the MS (False)
50
- or expand the rows to time x baseline dimensions (True). Expanding the rows allows for easier indexing
51
- and parallelization across time and baseline dimensions, at the cost of some conversion time. Default
52
- False
53
- :param **kwargs: (to be removed?) Selection parameters from the standard way of making CASA MS
54
- selections. Supported keys are: spw, field, scan, baseline, time, scanintent, uvdist, polarization,
55
- array, observation. Values are strings.
56
-
57
- :return: Main xarray dataset of datasets for this visibility dataset
37
+ Parameters
38
+ ----------
39
+ infile : str
40
+ Input MS filename
41
+ subtables : bool (Default value = True)
42
+ Also read and include subtables along with main table selection. Default False will
43
+ omit subtables (faster)
44
+ asdm_subtables : bool (Default value = False)
45
+ in addition to MeasurementSet subtables (if enabled), also read extension
46
+ subtables named "ASDM_*"
47
+ partition_scheme : str (Default value = "intent")
48
+ experimenting) Whether to partition sub-xds datasets by scan/subscan
49
+ (in addition to DDI), or other alternative partitioning schemes. Accepted values: 'scan/subscan',
50
+ 'scan', 'ddi', 'intent'. Default: 'intent'
51
+ chunks : Union[Tuple[int], List[int]] (Default value = None)
52
+ Can be used to set a specific chunk shape (with a tuple of ints), or to control the
53
+ optimization used for automatic chunking (with a list of ints). A tuple of ints in the form of (row,
54
+ chan, pol) will use a fixed chunk shape. A list or numpy array of ints in the form of [idx1, etc]
55
+ will trigger auto-chunking optimized for the given indices, with row=0, chan=1, pol=2. Default None
56
+ uses auto-chunking with a best fit across all dimensions (probably sub-optimal for most cases).
57
+ expand : bool (Default value = False)
58
+ to be removed) Whether or not to return the original flat row structure of the MS (False)
59
+ or expand the rows to time x baseline dimensions (True). Expanding the rows allows for easier indexing
60
+ and parallelization across time and baseline dimensions, at the cost of some conversion time.
61
+ **kwargs: str :
62
+
63
+
64
+ Returns
65
+ -------
66
+ CASAVisSet
67
+ Main xarray dataset of datasets for this visibility dataset
58
68
  """
59
69
 
60
70
  infile = os.path.expanduser(infile)
@@ -104,14 +114,23 @@ def load_vis_chunk(
104
114
  block_des: Dict[str, slice],
105
115
  partition_key: Tuple[int, int, str],
106
116
  ) -> Dict[Tuple[int, int], xr.Dataset]:
107
- """Read a chunk of a MeasurementSet (MSv2 format) into an Xarray
117
+ """
118
+ Read a chunk of a MeasurementSet (MSv2 format) into an Xarray
108
119
  dataset, loading the data in memory.
109
120
 
110
- :param infile: Input MS filename
111
- :param block_des: specification of chunk to load
112
-
113
- :return: Xarray datasets with chunk of visibility data, one per DDI
114
- (spw_id, pol_setup_id pair)
121
+ Parameters
122
+ ----------
123
+ infile : str
124
+ Input MS filename
125
+ block_des : Dict[str, slice]
126
+ specification of chunk to load
127
+ partition_key: partition_key: Tuple[int, int, str]
128
+
129
+ Returns
130
+ -------
131
+ Dict[Tuple[int, int], xr.Dataset]
132
+ Xarray datasets with chunk of visibility data, one per DDI
133
+ (spw_id, pol_setup_id pair)
115
134
  """
116
135
  infile = os.path.expanduser(infile)
117
136
 
@@ -11,13 +11,19 @@ from ._zarr.read import read_part_keys, read_partitions, read_subtables
11
11
  from ._zarr.write import write_metainfo, write_part_keys, write_partitions
12
12
 
13
13
 
14
- def is_zarr_vis(inpath) -> bool:
14
+ def is_zarr_vis(inpath: str) -> bool:
15
15
  """
16
16
  Check if a given path has a visibilities dataset in Zarr format
17
17
 
18
- :param inpath: path to a (possibly) Zarr vis dataset
18
+ Parameters
19
+ ----------
20
+ inpath : str
21
+ path to a (possibly) Zarr vis dataset
19
22
 
20
- :return: whether zarr.open can open this path
23
+ Returns
24
+ -------
25
+ bool
26
+ whether zarr.open can open this path
21
27
  """
22
28
  try:
23
29
  with zarr.open(Path(inpath, "partition_keys"), mode="r"):
@@ -35,11 +41,19 @@ def read_vis(
35
41
  """
36
42
  Read a CASAVisSet stored in zarr format.
37
43
 
38
- :param inpath: Input Zarr path
39
- :param subtables: Also read and (metainformation) subtables along with main visibilities data.
40
- :param asdm_subtables: Also read extension subtables named "ASDM_*"
41
-
42
- :return: Main xarray dataset of datasets for this visibility dataset
44
+ Parameters
45
+ ----------
46
+ inpath : str
47
+ Input Zarr path
48
+ subtables : bool (Default value = True)
49
+ Also read and (metainformation) subtables along with main visibilities data.
50
+ asdm_subtables : bool (Default value = False)
51
+ Also read extension subtables named "ASDM_*"
52
+
53
+ Returns
54
+ -------
55
+ CASAVisSet
56
+ Main xarray dataset of datasets for this visibility dataset
43
57
  """
44
58
  inpath = os.path.expanduser(inpath)
45
59
  if not os.path.isdir(inpath):
@@ -59,7 +73,7 @@ def read_vis(
59
73
  all_time = time.time() - all_start
60
74
  logger.info(f"Time to read dataset from_zarr {inpath}: {all_time}")
61
75
 
62
- vers = xradio.__version__
76
+ vers = "version-WIP"
63
77
  descr_add = "read_vis from zarr"
64
78
  cds = CASAVisSet(
65
79
  metainfo=metainfo,
@@ -71,25 +85,35 @@ def read_vis(
71
85
 
72
86
 
73
87
  def write_vis(
74
- cds,
88
+ cds: CASAVisSet,
75
89
  outpath: str,
76
90
  chunks_on_disk: Union[Dict, None] = None,
77
91
  compressor: Union[numcodecs.abc.Codec, None] = None,
78
92
  ) -> None:
79
- """Write CASA vis dataset to zarr format on disk. When
93
+ """
94
+ Write CASA vis dataset to zarr format on disk. When
80
95
  chunks_on_disk is not specified the chunking in the input dataset
81
96
  is used. When chunks_on_disk is specified that dataset is saved
82
97
  using that chunking.
83
98
 
84
- :param cds: CASA visibilities dataset to write to disk
85
- :param outpath: output path, generally ends in .zarr
86
- :param chunks_on_disk: a dictionary with the chunk size that will
87
- be used when writing to disk. For example {'time': 20, 'chan': 6}.
88
- If chunks_on_disk is not specified the chunking of dataset will
89
- be used.
90
- :param compressor: the blosc compressor to use when saving the
91
- converted data to disk using zarr. If None the zstd compression
92
- algorithm used with compression level 2.
99
+ Parameters
100
+ ----------
101
+ cds : CASAVisSet
102
+ CASA visibilities dataset to write to disk
103
+ outpath : str
104
+ output path, generally ends in .zarr
105
+ chunks_on_disk : Union[Dict, None] = None (Default value = None)
106
+ a dictionary with the chunk size that will
107
+ be used when writing to disk. For example {'time': 20, 'chan': 6}.
108
+ If chunks_on_disk is not specified the chunking of dataset will
109
+ be used.
110
+ compressor : Union[numcodecs.abc.Codec, None] (Default value = None)
111
+ the blosc compressor to use when saving the
112
+ converted data to disk using zarr. If None the zstd compression
113
+ algorithm used with compression level 2.
114
+
115
+ Returns
116
+ -------
93
117
  """
94
118
 
95
119
  if compressor is None:
@@ -4,10 +4,7 @@ from typing import Dict, Union
4
4
 
5
5
  import dask
6
6
 
7
- from xradio.vis._vis_utils._ms.msv2_msv3 import ignore_msv2_cols
8
- from xradio.vis._vis_utils._ms.partition_queries import (
9
- create_partition_enumerated_product,
10
- )
7
+ from xradio.vis._vis_utils._ms.partition_queries import create_partitions
11
8
  from xradio.vis._vis_utils._ms.conversion import convert_and_write_partition
12
9
 
13
10
 
@@ -15,8 +12,11 @@ def convert_msv2_to_processing_set(
15
12
  in_file: str,
16
13
  out_file: str,
17
14
  partition_scheme: {"ddi_intent_field", "ddi_state_field"} = "ddi_intent_field",
18
- main_chunksize: Union[Dict, str, None] = None,
19
- pointing_chunksize: Union[Dict, str, None] = None,
15
+ main_chunksize: Union[Dict, float, None] = None,
16
+ with_pointing: bool = True,
17
+ pointing_chunksize: Union[Dict, float, None] = None,
18
+ pointing_interpolate: bool = False,
19
+ ephemeris_interpolate: bool = False,
20
20
  compressor: numcodecs.abc.Codec = numcodecs.Zstd(level=2),
21
21
  storage_backend="zarr",
22
22
  parallel: bool = False,
@@ -34,10 +34,16 @@ def convert_msv2_to_processing_set(
34
34
  A MS v4 can only contain a single spectral window, polarization setup, intent, and field. Consequently, the MS v2 is partitioned when converting to MS v4.
35
35
  The partition_scheme "ddi_intent_field" gives the largest partition that meets the MS v4 specification. The partition_scheme "ddi_state_field" gives a finer granularity where the data is also partitioned by state (the state partitioning will ensure a single intent).
36
36
  By default, "ddi_intent_field".
37
- main_chunksize : Union[Dict, str, None], optional
38
- A dictionary that defines the chunk size of the main dataset. Acceptable keys are "time", "baseline", "antenna", "frequency", "polarization". By default, None.
39
- pointing_chunksize : Union[Dict, str, None], optional
40
- A dictionary that defines the chunk size of the pointing dataset. Acceptable keys are "time", "antenna", "polarization". By default, None.
37
+ main_chunksize : Union[Dict, float, None], optional
38
+ Defines the chunk size of the main dataset. If given as a dictionary, defines the sizes of several dimensions, and acceptable keys are "time", "baseline_id", "antenna_id", "frequency", "polarization". If given as a float, gives the size of a chunk in GiB. By default, None.
39
+ with_pointing : bool, optional
40
+ Whether to convert the POINTING subtable into pointing sub-datasets
41
+ pointing_chunksize : Union[Dict, float, None], optional
42
+ Defines the chunk size of the pointing dataset. If given as a dictionary, defines the sizes of several dimensions, acceptable keys are "time" and "antenna_id". If given as a float, defines the size of a chunk in GiB. By default, None.
43
+ pointing_interpolate : bool, optional
44
+ Whether to interpolate the time axis of the pointing sub-dataset to the time axis of the main dataset
45
+ ephemeris_interpolate : bool, optional
46
+ Whether to interpolate the time axis of the ephemeris data variables (of the field_and_source sub-dataset) to the time axis of the main dataset
41
47
  compressor : numcodecs.abc.Codec, optional
42
48
  The Blosc compressor to use when saving the converted data to disk using Zarr, by default numcodecs.Zstd(level=2).
43
49
  storage_backend : {"zarr", "netcdf"}, optional
@@ -48,33 +54,36 @@ def convert_msv2_to_processing_set(
48
54
  Whether to overwrite an existing processing set, by default False.
49
55
  """
50
56
 
51
- partition_enumerated_product, intents = create_partition_enumerated_product(
52
- in_file, partition_scheme
53
- )
57
+ partitions = create_partitions(in_file, partition_scheme=partition_scheme)
58
+ logger.info("Number of partitions: " + str(len(partitions)))
54
59
 
55
60
  delayed_list = []
56
- for idx, pair in partition_enumerated_product:
57
- ddi, state_id, field_id = pair
61
+ ms_v4_id = 0
62
+ for partition_info in partitions:
58
63
  logger.debug(
59
- "DDI " + str(ddi) + ", STATE " + str(state_id) + ", FIELD " + str(field_id)
64
+ "DDI "
65
+ + str(partition_info["DATA_DESC_ID"])
66
+ + ", STATE "
67
+ + str(partition_info["STATE_ID"])
68
+ + ", FIELD "
69
+ + str(partition_info["FIELD_ID"])
70
+ + ", SCAN "
71
+ + str(partition_info["SCAN_NUMBER"])
60
72
  )
61
73
 
62
- if partition_scheme == "ddi_intent_field":
63
- intent = intents[idx[1]]
64
- else:
65
- intent = intents[idx[1]] + "_" + str(state_id)
66
-
67
74
  if parallel:
68
75
  delayed_list.append(
69
76
  dask.delayed(convert_and_write_partition)(
70
77
  in_file,
71
78
  out_file,
72
- intent,
73
- ddi,
74
- state_id,
75
- field_id,
76
- ignore_msv2_cols=ignore_msv2_cols,
79
+ ms_v4_id,
80
+ partition_info=partition_info,
81
+ partition_scheme=partition_scheme,
77
82
  main_chunksize=main_chunksize,
83
+ with_pointing=with_pointing,
84
+ pointing_chunksize=pointing_chunksize,
85
+ pointing_interpolate=pointing_interpolate,
86
+ ephemeris_interpolate=ephemeris_interpolate,
78
87
  compressor=compressor,
79
88
  overwrite=overwrite,
80
89
  )
@@ -83,16 +92,18 @@ def convert_msv2_to_processing_set(
83
92
  convert_and_write_partition(
84
93
  in_file,
85
94
  out_file,
86
- intent,
87
- ddi,
88
- state_id,
89
- field_id,
90
- ignore_msv2_cols=ignore_msv2_cols,
95
+ ms_v4_id,
96
+ partition_info=partition_info,
97
+ partition_scheme=partition_scheme,
91
98
  main_chunksize=main_chunksize,
99
+ with_pointing=with_pointing,
100
+ pointing_chunksize=pointing_chunksize,
101
+ pointing_interpolate=pointing_interpolate,
102
+ ephemeris_interpolate=ephemeris_interpolate,
92
103
  compressor=compressor,
93
- storage_backend=storage_backend,
94
104
  overwrite=overwrite,
95
105
  )
106
+ ms_v4_id = ms_v4_id + 1
96
107
 
97
108
  if parallel:
98
109
  dask.compute(delayed_list)
@@ -39,74 +39,51 @@ def load_processing_set(
39
39
  processing_set
40
40
  In memory representation of processing set (data is represented by Dask.arrays).
41
41
  """
42
- from xradio._utils.zarr.common import _open_dataset
43
- import s3fs
44
- from botocore.exceptions import NoCredentialsError
42
+ from xradio._utils.zarr.common import _open_dataset, _get_ms_stores_and_file_system
43
+
44
+ file_system, ms_store_list = _get_ms_stores_and_file_system(ps_store)
45
45
 
46
- s3 = None
47
46
  ps = processing_set()
48
- for ms_dir_name, ms_xds_isel in sel_parms.items():
49
-
50
- # before the _open_dataset call, check if dealing with an S3 bucket URL
51
- if ps_store.startswith("s3"):
52
- if not ps_store.endswith("/"):
53
- # just for consistency, as there is no os.path equivalent in s3fs
54
- ps_store = ps_store + "/"
55
-
56
- try:
57
- s3 = s3fs.S3FileSystem(anon=False, requester_pays=False)
58
-
59
- main_xds = ps_store + ms_dir_name + "/MAIN"
60
- xds = _open_dataset(
61
- main_xds, ms_xds_isel, data_variables, load=True, s3=s3
62
- )
63
-
64
- if load_sub_datasets:
65
- from xradio.vis.read_processing_set import _read_sub_xds
66
-
67
- xds.attrs = {
68
- **xds.attrs,
69
- **_read_sub_xds(
70
- os.path.join(ps_store, ms_dir_name), load=True, s3=s3
71
- ),
72
- }
73
-
74
- except (NoCredentialsError, PermissionError) as e:
75
- # only public, read-only buckets will be accessible
76
- s3 = s3fs.S3FileSystem(anon=True)
77
-
78
- main_xds = ps_store + ms_dir_name + "/MAIN"
79
- xds = _open_dataset(
80
- main_xds, ms_xds_isel, data_variables, load=True, s3=s3
81
- )
82
-
83
- if load_sub_datasets:
84
- from xradio.vis.read_processing_set import _read_sub_xds
85
-
86
- xds.attrs = {
87
- **xds.attrs,
88
- **_read_sub_xds(
89
- os.path.join(ps_store, ms_dir_name), load=True, s3=s3
90
- ),
91
- }
92
- else:
93
- # fall back to the default case of assuming the files are on local disk
94
- main_xds = os.path.join(ps_store, ms_dir_name, "MAIN")
95
- xds = _open_dataset(main_xds, ms_xds_isel, data_variables, load=True)
96
- if load_sub_datasets:
97
- from xradio.vis.read_processing_set import _read_sub_xds
98
-
99
- xds.attrs = {
100
- **xds.attrs,
101
- **_read_sub_xds(os.path.join(ps_store, ms_dir_name), load=True),
102
- }
47
+ for ms_name, ms_xds_isel in sel_parms.items():
48
+ ms_store = os.path.join(ps_store, ms_name)
49
+ ms_main_store = os.path.join(ms_store, "MAIN")
50
+
51
+ xds = _open_dataset(
52
+ ms_main_store,
53
+ file_system,
54
+ ms_xds_isel,
55
+ data_variables,
56
+ load=True,
57
+ )
58
+ data_groups = xds.attrs["data_groups"]
59
+
60
+ if load_sub_datasets:
61
+ from xradio.vis.read_processing_set import _read_sub_xds
62
+
63
+ sub_xds_dict, field_and_source_xds_dict = _read_sub_xds(
64
+ ms_store, file_system=file_system, load=True, data_groups=data_groups
65
+ )
66
+
67
+ xds.attrs = {
68
+ **xds.attrs,
69
+ **sub_xds_dict,
70
+ }
71
+ for data_group_name, data_group_vals in data_groups.items():
72
+ if "visibility" in data_group_vals:
73
+ xds[data_group_vals["visibility"]].attrs["field_and_source_xds"] = (
74
+ field_and_source_xds_dict[data_group_name]
75
+ )
76
+ elif "spectrum" in data_group_vals:
77
+ xds[data_group_vals["spectrum"]].attrs["field_and_source_xds"] = (
78
+ field_and_source_xds_dict[data_group_name]
79
+ )
80
+
81
+ ps[ms_name] = xds
103
82
 
104
- ps[ms_dir_name] = xds
105
83
  return ps
106
84
 
107
85
 
108
86
  class processing_set_iterator:
109
-
110
87
  def __init__(
111
88
  self,
112
89
  sel_parms: dict,
@@ -2,13 +2,13 @@ import os
2
2
  import xarray as xr
3
3
  from ._processing_set import processing_set
4
4
  import graphviper.utils.logger as logger
5
- from xradio._utils.zarr.common import _open_dataset
5
+ from xradio._utils.zarr.common import _open_dataset, _get_ms_stores_and_file_system
6
6
  import s3fs
7
- from botocore.exceptions import NoCredentialsError
8
7
 
9
8
 
10
9
  def read_processing_set(
11
- ps_store: str, intents: list = None, fields: str = None
10
+ ps_store: str,
11
+ intents: list = None,
12
12
  ) -> processing_set:
13
13
  """Creates a lazy representation of a Processing Set (only meta-data is loaded into memory).
14
14
 
@@ -19,119 +19,87 @@ def read_processing_set(
19
19
  intents : list, optional
20
20
  A list of the intents to be read for example ['OBSERVE_TARGET#ON_SOURCE']. The intents in a processing set can be seem by calling processing_set.summary().
21
21
  By default None, which will read all intents.
22
- fields : str, optional
23
- The list of field names that will be read, by default None which will read all fields.
24
22
 
25
23
  Returns
26
24
  -------
27
25
  processing_set
28
26
  Lazy representation of processing set (data is represented by Dask.arrays).
29
27
  """
30
- s3 = None
31
- ps_store_is_s3dir = None
28
+ file_system, ms_store_list = _get_ms_stores_and_file_system(ps_store)
32
29
 
33
- if os.path.isdir(ps_store):
34
- ps_store_is_s3dir = False
35
- # default to assuming the data are accessible on local file system
36
- items = os.listdir(ps_store)
37
-
38
- elif ps_store.startswith("s3"):
39
- # only if not found locally, check if dealing with an S3 bucket URL
40
- ps_store_is_s3dir = True
41
- if not ps_store.endswith("/"):
42
- # just for consistency, as there is no os.path equivalent in s3fs
43
- ps_store = ps_store + "/"
44
-
45
- try:
46
- # initialize the S3 "file system", first attempting to use pre-configured credentials
47
- s3 = s3fs.S3FileSystem(anon=False, requester_pays=False)
48
-
49
- items = [bd.split(sep="/")[-1] for bd in s3.listdir(ps_store, detail=False)]
30
+ ps = processing_set()
31
+ data_group = "base"
32
+ for ms_name in ms_store_list:
33
+ # try:
34
+ ms_store = os.path.join(ps_store, ms_name)
35
+ ms_main_store = os.path.join(ms_store, "MAIN")
50
36
 
51
- except (NoCredentialsError, PermissionError) as e:
52
- # only public, read-only buckets will be accessible
53
- # we will want to add messaging and error handling here
54
- s3 = s3fs.S3FileSystem(anon=True)
37
+ xds = _open_dataset(ms_main_store, file_system)
38
+ data_groups = xds.attrs["data_groups"]
55
39
 
56
- items = [bd.split(sep="/")[-1] for bd in s3.listdir(ps_store, detail=False)]
40
+ print(xds)
57
41
 
58
- else:
59
- raise (
60
- FileNotFoundError,
61
- f"Could not find {ps_store} either locally or in the cloud.",
62
- )
42
+ if (intents is None) or (xds.attrs["partition_info"]["intent"] in intents):
43
+ sub_xds_dict, field_and_source_xds_dict = _read_sub_xds(
44
+ ms_store, file_system=file_system, data_groups=data_groups
45
+ )
63
46
 
64
- ms_xds = xr.Dataset()
65
- ps = processing_set()
66
- data_group = "base"
67
- for ms_dir_name in items:
68
- if "ddi" in ms_dir_name:
69
- if ps_store_is_s3dir:
70
- store_path = ps_store + ms_dir_name
71
- store_path_main = store_path + "/MAIN"
72
- else:
73
- store_path_main = os.path.join(ps_store, ms_dir_name, "MAIN")
74
- store_path = os.path.split(store_path_main)[0]
75
- if s3 is not None:
76
- xds = _open_dataset(store_path_main, s3=s3)
77
- else:
78
- xds = _open_dataset(store_path_main)
79
-
80
- if (intents is None) or (xds.attrs["intent"] in intents):
81
- data_name = _get_data_name(xds, data_group)
82
-
83
- if (fields is None) or (
84
- xds[data_name].attrs["field_info"]["name"] in fields
85
- ):
86
- if s3 is not None:
87
- xds.attrs = {
88
- **xds.attrs,
89
- **_read_sub_xds(store_path, s3=s3),
90
- }
91
- ps[ms_dir_name] = xds
92
- else:
93
- xds.attrs = {
94
- **xds.attrs,
95
- **_read_sub_xds(store_path),
96
- }
97
- ps[ms_dir_name] = xds
47
+ xds.attrs = {
48
+ **xds.attrs,
49
+ **sub_xds_dict,
50
+ }
51
+
52
+ for data_group_name, data_group_vals in data_groups.items():
53
+ if "visibility" in data_group_vals:
54
+ xds[data_group_vals["visibility"]].attrs["field_and_source_xds"] = (
55
+ field_and_source_xds_dict[data_group_name]
56
+ )
57
+ elif "spectrum" in data_group_vals:
58
+ xds[data_group_vals["spectrum"]].attrs["field_and_source_xds"] = (
59
+ field_and_source_xds_dict[data_group_name]
60
+ )
61
+
62
+ ps[ms_name] = xds
63
+ # except Exception as e:
64
+ # logger.warning(f"Could not read {ms_name} due to {e}")
65
+ # continue
98
66
 
99
67
  return ps
100
68
 
101
69
 
102
- def _read_sub_xds(ms_store, load=False, **kwargs):
70
+ def _read_sub_xds(ms_store, file_system, data_groups, load=False):
103
71
  sub_xds_dict = {}
72
+ field_and_source_xds_dict = {}
104
73
 
105
- sub_xds = {
106
- "antenna_xds": "ANTENNA",
74
+ xds_names = {
75
+ "ANTENNA": "antenna_xds",
76
+ "WEATHER": "weather_xds",
77
+ "POINTING": "pointing_xds",
107
78
  }
108
- for sub_xds_key, sub_xds_name in sub_xds.items():
109
- if "s3" in kwargs.keys():
110
- joined_store = ms_store + "/" + sub_xds_name
111
- sub_xds_dict[sub_xds_key] = _open_dataset(
112
- joined_store, load=load, s3=kwargs["s3"]
113
- )
79
+
80
+ if isinstance(file_system, s3fs.core.S3FileSystem):
81
+ file_names = [
82
+ bd.split(sep="/")[-1] for bd in file_system.listdir(ms_store, detail=False)
83
+ ]
84
+ else:
85
+ file_names = file_system.listdir(ms_store)
86
+ file_names = [item for item in file_names if not item.startswith(".")]
87
+
88
+ file_names.remove("MAIN")
89
+
90
+ field_dict = {"FIELD_AND_SOURCE_" + key.upper(): key for key in data_groups.keys()}
91
+
92
+ # field_and_source_xds_name_start = "FIELD"
93
+ for n in file_names:
94
+ xds = _open_dataset(
95
+ os.path.join(ms_store, n), load=load, file_system=file_system
96
+ )
97
+ if n in field_dict.keys():
98
+ field_and_source_xds_dict[field_dict[n]] = xds
114
99
  else:
115
- sub_xds_dict[sub_xds_key] = _open_dataset(
116
- os.path.join(ms_store, sub_xds_name), load=load
117
- )
100
+ sub_xds_dict[xds_names[n]] = xds
118
101
 
119
- optional_sub_xds = {
120
- "weather_xds": "WEATHER",
121
- "pointing_xds": "POINTING",
122
- }
123
- for sub_xds_key, sub_xds_name in optional_sub_xds.items():
124
- sub_xds_path = os.path.join(ms_store, sub_xds_name)
125
- if os.path.isdir(sub_xds_path):
126
- sub_xds_dict[sub_xds_key] = _open_dataset(sub_xds_path, load=load)
127
- elif "s3" in kwargs.keys():
128
- joined_store = ms_store + "/" + sub_xds_name
129
- if kwargs["s3"].isdir(joined_store):
130
- sub_xds_dict[sub_xds_key] = _open_dataset(
131
- joined_store, load=load, s3=kwargs["s3"]
132
- )
133
-
134
- return sub_xds_dict
102
+ return sub_xds_dict, field_and_source_xds_dict
135
103
 
136
104
 
137
105
  def _get_data_name(xds, data_group):