xradio 0.0.22__tar.gz → 0.0.24__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (81) hide show
  1. {xradio-0.0.22/src/xradio.egg-info → xradio-0.0.24}/PKG-INFO +1 -1
  2. {xradio-0.0.22 → xradio-0.0.24}/pyproject.toml +1 -1
  3. xradio-0.0.24/src/xradio/_utils/zarr/__init__.py +0 -0
  4. xradio-0.0.24/src/xradio/_utils/zarr/common.py +113 -0
  5. {xradio-0.0.22 → xradio-0.0.24}/src/xradio/image/_util/_zarr/zarr_low_level.py +4 -8
  6. xradio-0.0.24/src/xradio/image/_util/zarr.py +50 -0
  7. {xradio-0.0.22 → xradio-0.0.24}/src/xradio/image/image.py +11 -4
  8. xradio-0.0.24/src/xradio/vis/_processing_set.py +103 -0
  9. xradio-0.0.24/src/xradio/vis/load_processing_set.py +127 -0
  10. xradio-0.0.24/src/xradio/vis/read_processing_set.py +85 -0
  11. {xradio-0.0.22 → xradio-0.0.24/src/xradio.egg-info}/PKG-INFO +1 -1
  12. {xradio-0.0.22 → xradio-0.0.24}/src/xradio.egg-info/SOURCES.txt +2 -0
  13. xradio-0.0.22/src/xradio/image/_util/zarr.py +0 -21
  14. xradio-0.0.22/src/xradio/vis/_processing_set.py +0 -45
  15. xradio-0.0.22/src/xradio/vis/load_processing_set.py +0 -189
  16. xradio-0.0.22/src/xradio/vis/read_processing_set.py +0 -43
  17. {xradio-0.0.22 → xradio-0.0.24}/LICENSE.txt +0 -0
  18. {xradio-0.0.22 → xradio-0.0.24}/MANIFEST.in +0 -0
  19. {xradio-0.0.22 → xradio-0.0.24}/README.md +0 -0
  20. {xradio-0.0.22 → xradio-0.0.24}/setup.cfg +0 -0
  21. {xradio-0.0.22 → xradio-0.0.24}/src/xradio/__init__.py +0 -0
  22. {xradio-0.0.22 → xradio-0.0.24}/src/xradio/_utils/__init__.py +0 -0
  23. {xradio-0.0.22 → xradio-0.0.24}/src/xradio/_utils/_casacore/tables.py +0 -0
  24. {xradio-0.0.22 → xradio-0.0.24}/src/xradio/_utils/common.py +0 -0
  25. {xradio-0.0.22 → xradio-0.0.24}/src/xradio/image/__init__.py +0 -0
  26. {xradio-0.0.22 → xradio-0.0.24}/src/xradio/image/_util/__init__.py +0 -0
  27. {xradio-0.0.22 → xradio-0.0.24}/src/xradio/image/_util/_casacore/__init__.py +0 -0
  28. {xradio-0.0.22 → xradio-0.0.24}/src/xradio/image/_util/_casacore/common.py +0 -0
  29. {xradio-0.0.22 → xradio-0.0.24}/src/xradio/image/_util/_casacore/xds_from_casacore.py +0 -0
  30. {xradio-0.0.22 → xradio-0.0.24}/src/xradio/image/_util/_casacore/xds_to_casacore.py +0 -0
  31. {xradio-0.0.22 → xradio-0.0.24}/src/xradio/image/_util/_fits/xds_from_fits.py +0 -0
  32. {xradio-0.0.22 → xradio-0.0.24}/src/xradio/image/_util/_zarr/common.py +0 -0
  33. {xradio-0.0.22 → xradio-0.0.24}/src/xradio/image/_util/_zarr/xds_from_zarr.py +0 -0
  34. {xradio-0.0.22 → xradio-0.0.24}/src/xradio/image/_util/_zarr/xds_to_zarr.py +0 -0
  35. {xradio-0.0.22 → xradio-0.0.24}/src/xradio/image/_util/casacore.py +0 -0
  36. {xradio-0.0.22 → xradio-0.0.24}/src/xradio/image/_util/common.py +0 -0
  37. {xradio-0.0.22 → xradio-0.0.24}/src/xradio/image/_util/fits.py +0 -0
  38. {xradio-0.0.22 → xradio-0.0.24}/src/xradio/image/_util/image_factory.py +0 -0
  39. {xradio-0.0.22 → xradio-0.0.24}/src/xradio/schema/__init__.py +0 -0
  40. {xradio-0.0.22 → xradio-0.0.24}/src/xradio/schema/bases.py +0 -0
  41. {xradio-0.0.22 → xradio-0.0.24}/src/xradio/schema/check.py +0 -0
  42. {xradio-0.0.22 → xradio-0.0.24}/src/xradio/schema/dataclass.py +0 -0
  43. {xradio-0.0.22 → xradio-0.0.24}/src/xradio/schema/metamodel.py +0 -0
  44. {xradio-0.0.22 → xradio-0.0.24}/src/xradio/schema/typing.py +0 -0
  45. {xradio-0.0.22 → xradio-0.0.24}/src/xradio/vis/__init__.py +0 -0
  46. {xradio-0.0.22 → xradio-0.0.24}/src/xradio/vis/_vis_utils/__init__.py +0 -0
  47. {xradio-0.0.22 → xradio-0.0.24}/src/xradio/vis/_vis_utils/_ms/_tables/load.py +0 -0
  48. {xradio-0.0.22 → xradio-0.0.24}/src/xradio/vis/_vis_utils/_ms/_tables/load_main_table.py +0 -0
  49. {xradio-0.0.22 → xradio-0.0.24}/src/xradio/vis/_vis_utils/_ms/_tables/read.py +0 -0
  50. {xradio-0.0.22 → xradio-0.0.24}/src/xradio/vis/_vis_utils/_ms/_tables/read_main_table.py +0 -0
  51. {xradio-0.0.22 → xradio-0.0.24}/src/xradio/vis/_vis_utils/_ms/_tables/read_subtables.py +0 -0
  52. {xradio-0.0.22 → xradio-0.0.24}/src/xradio/vis/_vis_utils/_ms/_tables/table_query.py +0 -0
  53. {xradio-0.0.22 → xradio-0.0.24}/src/xradio/vis/_vis_utils/_ms/_tables/write.py +0 -0
  54. {xradio-0.0.22 → xradio-0.0.24}/src/xradio/vis/_vis_utils/_ms/_tables/write_exp_api.py +0 -0
  55. {xradio-0.0.22 → xradio-0.0.24}/src/xradio/vis/_vis_utils/_ms/chunks.py +0 -0
  56. {xradio-0.0.22 → xradio-0.0.24}/src/xradio/vis/_vis_utils/_ms/conversion.py +0 -0
  57. {xradio-0.0.22 → xradio-0.0.24}/src/xradio/vis/_vis_utils/_ms/descr.py +0 -0
  58. {xradio-0.0.22 → xradio-0.0.24}/src/xradio/vis/_vis_utils/_ms/msv2_msv3.py +0 -0
  59. {xradio-0.0.22 → xradio-0.0.24}/src/xradio/vis/_vis_utils/_ms/msv2_to_msv4_meta.py +0 -0
  60. {xradio-0.0.22 → xradio-0.0.24}/src/xradio/vis/_vis_utils/_ms/msv4_infos.py +0 -0
  61. {xradio-0.0.22 → xradio-0.0.24}/src/xradio/vis/_vis_utils/_ms/msv4_sub_xdss.py +0 -0
  62. {xradio-0.0.22 → xradio-0.0.24}/src/xradio/vis/_vis_utils/_ms/optimised_functions.py +0 -0
  63. {xradio-0.0.22 → xradio-0.0.24}/src/xradio/vis/_vis_utils/_ms/partition_queries.py +0 -0
  64. {xradio-0.0.22 → xradio-0.0.24}/src/xradio/vis/_vis_utils/_ms/partitions.py +0 -0
  65. {xradio-0.0.22 → xradio-0.0.24}/src/xradio/vis/_vis_utils/_ms/subtables.py +0 -0
  66. {xradio-0.0.22 → xradio-0.0.24}/src/xradio/vis/_vis_utils/_utils/cds.py +0 -0
  67. {xradio-0.0.22 → xradio-0.0.24}/src/xradio/vis/_vis_utils/_utils/partition_attrs.py +0 -0
  68. {xradio-0.0.22 → xradio-0.0.24}/src/xradio/vis/_vis_utils/_utils/stokes_types.py +0 -0
  69. {xradio-0.0.22 → xradio-0.0.24}/src/xradio/vis/_vis_utils/_utils/xds_helper.py +0 -0
  70. {xradio-0.0.22 → xradio-0.0.24}/src/xradio/vis/_vis_utils/_zarr/encoding.py +0 -0
  71. {xradio-0.0.22 → xradio-0.0.24}/src/xradio/vis/_vis_utils/_zarr/read.py +0 -0
  72. {xradio-0.0.22 → xradio-0.0.24}/src/xradio/vis/_vis_utils/_zarr/write.py +0 -0
  73. {xradio-0.0.22 → xradio-0.0.24}/src/xradio/vis/_vis_utils/ms.py +0 -0
  74. {xradio-0.0.22 → xradio-0.0.24}/src/xradio/vis/_vis_utils/ms_column_descriptions_dicts.py +0 -0
  75. {xradio-0.0.22 → xradio-0.0.24}/src/xradio/vis/_vis_utils/zarr.py +0 -0
  76. {xradio-0.0.22 → xradio-0.0.24}/src/xradio/vis/convert_msv2_to_processing_set.py +0 -0
  77. {xradio-0.0.22 → xradio-0.0.24}/src/xradio/vis/model.py +0 -0
  78. {xradio-0.0.22 → xradio-0.0.24}/src/xradio/vis/vis_io.py +0 -0
  79. {xradio-0.0.22 → xradio-0.0.24}/src/xradio.egg-info/dependency_links.txt +0 -0
  80. {xradio-0.0.22 → xradio-0.0.24}/src/xradio.egg-info/requires.txt +0 -0
  81. {xradio-0.0.22 → xradio-0.0.24}/src/xradio.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: xradio
3
- Version: 0.0.22
3
+ Version: 0.0.24
4
4
  Summary: Xarray Radio Astronomy Data IO
5
5
  Author-email: Jan-Willem Steeb <jsteeb@nrao.edu>
6
6
  License: BSD 3-Clause License
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "xradio"
3
- version = "0.0.22"
3
+ version = "0.0.24"
4
4
  description = "Xarray Radio Astronomy Data IO "
5
5
  authors = [
6
6
  {name = "Jan-Willem Steeb", email="jsteeb@nrao.edu"},
File without changes
@@ -0,0 +1,113 @@
1
+ import copy
2
+ import xarray as xr
3
+ import zarr
4
+
5
+
6
+ def _open_dataset(store, xds_isel=None, data_variables=None, load=False):
7
+ """
8
+
9
+ Parameters
10
+ ----------
11
+ store : _type_
12
+ _description_
13
+ xds_isel : _type_, optional
14
+ Example {'time':slice(0,10), 'frequency':slice(5,7)}, by default None
15
+ data_variables : _type_, optional
16
+ Example ['VISIBILITY','WEIGHT'], by default None
17
+ load : bool, optional
18
+ _description_, by default False
19
+
20
+ Returns
21
+ -------
22
+ _type_
23
+ _description_
24
+ """
25
+
26
+ import dask
27
+
28
+ xds = xr.open_zarr(store)
29
+
30
+ if xds_isel is not None:
31
+ xds = xds.isel(xds_isel)
32
+
33
+ if data_variables is not None:
34
+ xds_sub = xr.Dataset()
35
+ for dv in data_variables:
36
+ xds_sub[dv] = xds[dv]
37
+ xds_sub.attrs = xds.attrs
38
+ xds = xds_sub
39
+
40
+ if load:
41
+ with dask.config.set(scheduler="synchronous"):
42
+ xds = xds.load()
43
+ return xds
44
+
45
+
46
+ # Code to depricate:
47
+ def _get_attrs(zarr_obj):
48
+ """
49
+ get attributes of zarr obj (groups or arrays)
50
+ """
51
+ return {k: v for k, v in zarr_obj.attrs.asdict().items() if not k.startswith("_NC")}
52
+
53
+
54
+ def _load_no_dask_zarr(zarr_name, slice_dict={}):
55
+ """
56
+ Alternative to xarray open_zarr where the arrays are not Dask Arrays.
57
+
58
+ slice_dict: A dictionary of slice objects for which values to read form a dimension.
59
+ For example silce_dict={'time':slice(0,10)} would select the first 10 elements in the time dimension.
60
+ If a dim is not specified all values are retruned.
61
+ return:
62
+ xarray.Dataset()
63
+
64
+ #Should go into general utils.
65
+ """
66
+ # Used by xarray to store array labeling info in zarr meta data.
67
+ DIMENSION_KEY = "_ARRAY_DIMENSIONS"
68
+
69
+ # logger = _get_logger()
70
+ zarr_group = zarr.open_group(store=zarr_name, mode="r")
71
+ group_attrs = _get_attrs(zarr_group)
72
+
73
+ slice_dict_complete = copy.deepcopy(slice_dict)
74
+ coords = {}
75
+ xds = xr.Dataset()
76
+ for var_name, var in zarr_group.arrays():
77
+ print("Hallo 3", var_name, var.shape)
78
+ var_attrs = _get_attrs(var)
79
+
80
+ for dim in var_attrs[DIMENSION_KEY]:
81
+ if dim not in slice_dict_complete:
82
+ slice_dict_complete[dim] = slice(None) # No slicing.
83
+
84
+ if (var_attrs[DIMENSION_KEY][0] == var_name) and (
85
+ len(var_attrs[DIMENSION_KEY]) == 1
86
+ ):
87
+ coord = var[
88
+ slice_dict_complete[var_attrs[DIMENSION_KEY][0]]
89
+ ] # Dimension coordinates.
90
+ del var_attrs["_ARRAY_DIMENSIONS"]
91
+ xds = xds.assign_coords({var_name: coord})
92
+ xds[var_name].attrs = var_attrs
93
+ else:
94
+ # Construct slicing
95
+ slicing_list = []
96
+ for dim in var_attrs[DIMENSION_KEY]:
97
+ slicing_list.append(slice_dict_complete[dim])
98
+ slicing_tuple = tuple(slicing_list)
99
+
100
+ print(var_attrs[DIMENSION_KEY])
101
+
102
+ xds[var_name] = xr.DataArray(
103
+ var[slicing_tuple], dims=var_attrs[DIMENSION_KEY]
104
+ )
105
+
106
+ if "coordinates" in var_attrs:
107
+ del var_attrs["coordinates"]
108
+ del var_attrs["_ARRAY_DIMENSIONS"]
109
+ xds[var_name].attrs = var_attrs
110
+
111
+ xds.attrs = group_attrs
112
+
113
+ return xds
@@ -70,7 +70,6 @@ image_data_variables_and_dims_single_precision = {
70
70
  }
71
71
 
72
72
 
73
-
74
73
  def pad_array_with_nans(input_array, output_shape, dtype):
75
74
  """
76
75
  Pad an integer array with NaN values to match the specified output shape.
@@ -93,7 +92,7 @@ def pad_array_with_nans(input_array, output_shape, dtype):
93
92
  padded_array[:] = np.nan
94
93
 
95
94
  # Copy the input array to the appropriate position within the padded array
96
- padded_array[: input_shape[0], : input_shape[1]] = input_array
95
+ padded_array[: input_shape[0], : input_shape[1], : input_shape[2]] = input_array
97
96
 
98
97
  return padded_array
99
98
 
@@ -277,8 +276,7 @@ def create_data_variable_meta_data_on_disk(
277
276
  return zarr_meta
278
277
 
279
278
 
280
-
281
- def write_chunk(img_xds,meta,parallel_dims_chunk_id,compressor,image_file):
279
+ def write_chunk(img_xds, meta, parallel_dims_chunk_id, compressor, image_file):
282
280
  dims = meta["dims"]
283
281
  dtype = meta["dtype"]
284
282
  data_varaible_name = meta["name"]
@@ -304,8 +302,6 @@ def write_chunk(img_xds,meta,parallel_dims_chunk_id,compressor,image_file):
304
302
 
305
303
  write_binary_blob_to_disk(
306
304
  array,
307
- file_path=os.path.join(
308
- image_file, data_varaible_name, chunk_name
309
- ),
305
+ file_path=os.path.join(image_file, data_varaible_name, chunk_name),
310
306
  compressor=compressor,
311
- )
307
+ )
@@ -0,0 +1,50 @@
1
+ from ._zarr.xds_to_zarr import _write_zarr
2
+ from ._zarr.xds_from_zarr import _read_zarr
3
+ import numpy as np
4
+ import os
5
+ import xarray as xr
6
+ from ..._utils.zarr.common import _open_dataset
7
+
8
+
9
+ def _xds_to_zarr(xds: xr.Dataset, zarr_store: str):
10
+ _write_zarr(xds, zarr_store)
11
+
12
+
13
+ def _xds_from_zarr(
14
+ zarr_store: str, output: dict = {}, selection: dict = {}
15
+ ) -> xr.Dataset:
16
+ # supported key/values in output are:
17
+ # "dv"
18
+ # what data variables should be returned as.
19
+ # "numpy": numpy arrays
20
+ # "dask": dask arrays
21
+ # "coords"
22
+ # what coordinates should be returned as
23
+ # "numpy": numpy arrays
24
+ return _read_zarr(zarr_store, output, selection)
25
+
26
+
27
+ def _load_image_from_zarr_no_dask(zarr_file: str, selection: dict) -> xr.Dataset:
28
+ image_xds = _open_dataset(zarr_file, selection, load=True)
29
+ for h in ["HISTORY", "_attrs_xds_history"]:
30
+ history = os.sep.join([zarr_file, h])
31
+ if os.path.isdir(history):
32
+ image_xds.attrs["history"] = _open_dataset(history, load=True)
33
+ break
34
+ _iter_dict(image_xds.attrs)
35
+ return image_xds
36
+
37
+
38
+ def _iter_dict(d: dict) -> None:
39
+ for k, v in d.items():
40
+ if isinstance(v, dict):
41
+ keys = v.keys()
42
+ if (
43
+ len(keys) == 3
44
+ and "_dtype" in keys
45
+ and "_type" in keys
46
+ and "_value" in keys
47
+ ):
48
+ d[k] = np.array(v["_value"], dtype=v["_dtype"])
49
+ else:
50
+ _iter_dict(v)
@@ -9,6 +9,8 @@ import copy
9
9
  import numpy as np
10
10
  import xarray as xr
11
11
 
12
+ # from .._utils.zarr.common import _load_no_dask_zarr
13
+
12
14
  from ._util.casacore import _load_casa_image_block, _xds_to_casa_image
13
15
  from ._util.fits import _read_fits_image
14
16
  from ._util.image_factory import (
@@ -16,7 +18,7 @@ from ._util.image_factory import (
16
18
  _make_empty_lmuv_image,
17
19
  _make_empty_sky_image,
18
20
  )
19
- from ._util.zarr import _xds_to_zarr, _xds_from_zarr
21
+ from ._util.zarr import _load_image_from_zarr_no_dask, _xds_from_zarr, _xds_to_zarr
20
22
 
21
23
  warnings.filterwarnings("ignore", category=FutureWarning)
22
24
 
@@ -166,9 +168,11 @@ def load_image(infile: str, block_des: dict = {}, do_sky_coords=True) -> xr.Data
166
168
  emsgs.append(f'image format appears not to be fits {e.args}')
167
169
  """
168
170
  # when done debugging, comment out next line
171
+ # return _load_image_from_zarr_no_dask(infile, block_des)
169
172
  # return _xds_from_zarr(infile, {"dv": "numpy"}, selection)
170
173
  try:
171
- return _xds_from_zarr(infile, {"dv": "numpy", "coords": "numpy"}, selection)
174
+ return _load_image_from_zarr_no_dask(infile, block_des)
175
+ # return _xds_from_zarr(infile, {"dv": "numpy", "coords": "numpy"}, selection)
172
176
  except Exception as e:
173
177
  emsgs.append(f"image format appears not to be zarr {e.args}")
174
178
  emsgs.insert(
@@ -177,7 +181,9 @@ def load_image(infile: str, block_des: dict = {}, do_sky_coords=True) -> xr.Data
177
181
  raise RuntimeError("\n".join(emsgs))
178
182
 
179
183
 
180
- def write_image(xds: xr.Dataset, imagename: str, out_format: str = "casa", overwrite=False) -> None:
184
+ def write_image(
185
+ xds: xr.Dataset, imagename: str, out_format: str = "casa", overwrite=False
186
+ ) -> None:
181
187
  """
182
188
  Convert an xds image to CASA or zarr image.
183
189
  xds : xarray.Dataset
@@ -194,7 +200,8 @@ def write_image(xds: xr.Dataset, imagename: str, out_format: str = "casa", overw
194
200
 
195
201
  if overwrite:
196
202
  import os
197
- os.system('rm -rf ' + imagename)
203
+
204
+ os.system("rm -rf " + imagename)
198
205
 
199
206
  if my_format == "casa":
200
207
  _xds_to_casa_image(xds, imagename)
@@ -0,0 +1,103 @@
1
+ import pandas as pd
2
+
3
+
4
+ class processing_set(dict):
5
+ def __init__(self, *args, **kwargs):
6
+ super().__init__(*args, **kwargs)
7
+ self.meta = {"summary": {}}
8
+
9
+ # generate_meta(self)
10
+
11
+ # def generate_meta(self):
12
+ # self.meta['summary'] = {"base": _summary(self)}
13
+ # self.meta['max_dims'] = _get_ps_max_dims(self)
14
+
15
+ def summary(self, data_group="base"):
16
+ if data_group in self.meta["summary"]:
17
+ return self.meta["summary"][data_group]
18
+ else:
19
+ self.meta["summary"][data_group] = self._summary(data_group)
20
+ return self.meta["summary"][data_group]
21
+
22
+ def get_ps_max_dims(self):
23
+ if "max_dims" in self.meta:
24
+ return self.meta["max_dims"]
25
+ else:
26
+ self.meta["max_dims"] = self._get_ps_max_dims()
27
+ return self.meta["max_dims"]
28
+
29
+ def get_ps_freq_axis(self):
30
+ if "freq_axis" in self.meta:
31
+ return self.meta["freq_axis"]
32
+ else:
33
+ self.meta["freq_axis"] = self._get_ps_freq_axis()
34
+ return self.meta["freq_axis"]
35
+
36
+ def _summary(self, data_group="base"):
37
+ summary_data = {
38
+ "name": [],
39
+ "ddi": [],
40
+ "intent": [],
41
+ "field_id": [],
42
+ "field_name": [],
43
+ "start_frequency": [],
44
+ "end_frequency": [],
45
+ "shape": [],
46
+ }
47
+ for key, value in self.items():
48
+ summary_data["name"].append(key)
49
+ summary_data["ddi"].append(value.attrs["ddi"])
50
+ summary_data["intent"].append(value.attrs["intent"])
51
+
52
+ if "visibility" in value.attrs["data_groups"][data_group]:
53
+ data_name = value.attrs["data_groups"][data_group]["visibility"]
54
+
55
+ if "spectrum" in value.attrs["data_groups"][data_group]:
56
+ data_name = value.attrs["data_groups"][data_group]["spectrum"]
57
+
58
+ summary_data["shape"].append(value[data_name].shape)
59
+
60
+ summary_data["field_id"].append(
61
+ value[data_name].attrs["field_info"]["field_id"]
62
+ )
63
+ summary_data["field_name"].append(
64
+ value[data_name].attrs["field_info"]["name"]
65
+ )
66
+ summary_data["start_frequency"].append(value["frequency"].values[0])
67
+ summary_data["end_frequency"].append(value["frequency"].values[-1])
68
+ summary_df = pd.DataFrame(summary_data)
69
+ return summary_df
70
+
71
+ def _get_ps_freq_axis(self):
72
+ import xarray as xr
73
+
74
+ spw_ids = []
75
+ freq_axis_list = []
76
+ frame = self.get(0).frequency.attrs["frame"]
77
+ for ms_xds in self.values():
78
+ assert (
79
+ frame == ms_xds.frequency.attrs["frame"]
80
+ ), "Frequency reference frame not consistent in processing set."
81
+ if ms_xds.frequency.attrs["spw_id"] not in spw_ids:
82
+ spw_ids.append(ms_xds.frequency.attrs["spw_id"])
83
+ freq_axis_list.append(ms_xds.frequency)
84
+
85
+ freq_axis = xr.concat(freq_axis_list, dim="frequency").sortby("frequency")
86
+ return freq_axis
87
+
88
+ def _get_ps_max_dims(self):
89
+ max_dims = None
90
+ for ms_xds in self.values():
91
+ if max_dims is None:
92
+ max_dims = dict(ms_xds.sizes)
93
+ else:
94
+ for dim_name, size in ms_xds.sizes.items():
95
+ if dim_name in max_dims:
96
+ if max_dims[dim_name] < size:
97
+ max_dims[dim_name] = size
98
+ else:
99
+ max_dims[dim_name] = size
100
+ return max_dims
101
+
102
+ def get(self, id):
103
+ return self[list(self.keys())[id]]
@@ -0,0 +1,127 @@
1
+ import xarray as xr
2
+ import zarr
3
+ import copy
4
+ import os
5
+ from ._processing_set import processing_set
6
+ from typing import Dict, Union
7
+
8
+
9
+ def load_processing_set(
10
+ ps_store: str,
11
+ sel_parms: dict,
12
+ data_variables: Union[list, None] = None,
13
+ load_sub_datasets: bool = True,
14
+ )->processing_set:
15
+ """Loads a processing set into memory.
16
+
17
+ Parameters
18
+ ----------
19
+ ps_store : str
20
+ String of the path and name of the processing set. For example '/users/user_1/uid___A002_Xf07bba_Xbe5c_target.lsrk.vis.zarr'.
21
+ sel_parms : dict
22
+ A dictionary where the keys are the names of the ms_xds's and the values are slice_dicts.
23
+ slice_dicts: A dictionary where the keys are the dimension names and the values are slices.
24
+ For example::
25
+
26
+ {
27
+ 'ms_v4_name_1': {'frequency': slice(0, 160, None),'time':slice(0,100)},
28
+ ...
29
+ 'ms_v4_name_n': {'frequency': slice(0, 160, None),'time':slice(0,100)},
30
+ }
31
+
32
+ data_variables : Union[list, None], optional
33
+ The list of data variables to load into memory for example ['VISIBILITY', 'WEIGHT, 'FLAGS']. By default None which will load all data variables into memory.
34
+ load_sub_datasets : bool, optional
35
+ If true sub-datasets (for example weather_xds, antenna_xds, pointing_xds, ...) will be loaded into memory, by default True.
36
+
37
+ Returns
38
+ -------
39
+ processing_set
40
+ In memory representation of processing set (data is represented by Dask.arrays).
41
+ """
42
+ from xradio._utils.zarr.common import _open_dataset
43
+
44
+ ps = processing_set()
45
+ for ms_dir_name, ms_xds_isel in sel_parms.items():
46
+ xds = _open_dataset(
47
+ os.path.join(ps_store, ms_dir_name, "MAIN"),
48
+ ms_xds_isel,
49
+ data_variables,
50
+ load=True,
51
+ )
52
+
53
+ if load_sub_datasets:
54
+ from xradio.vis.read_processing_set import _read_sub_xds
55
+
56
+ xds.attrs = {
57
+ **xds.attrs,
58
+ **_read_sub_xds(os.path.join(ps_store, ms_dir_name), load=True),
59
+ }
60
+
61
+ ps[ms_dir_name] = xds
62
+ return ps
63
+
64
+
65
+ class processing_set_iterator:
66
+
67
+ def __init__(
68
+ self,
69
+ sel_parms: dict,
70
+ input_data_store: str,
71
+ input_data: Union[Dict, processing_set, None] = None,
72
+ data_variables: list = None,
73
+ load_sub_datasets: bool = True,
74
+ ):
75
+ """An iterator that will go through a processing set one MS v4 at a time.
76
+
77
+ Parameters
78
+ ----------
79
+ sel_parms : dict
80
+ A dictionary where the keys are the names of the ms_xds's and the values are slice_dicts.
81
+ slice_dicts: A dictionary where the keys are the dimension names and the values are slices.
82
+ For example::
83
+
84
+ {
85
+ 'ms_v4_name_1': {'frequency': slice(0, 160, None),'time':slice(0,100)},
86
+ ...
87
+ 'ms_v4_name_n': {'frequency': slice(0, 160, None),'time':slice(0,100)},
88
+ }
89
+ input_data_store : str
90
+ String of the path and name of the processing set. For example '/users/user_1/uid___A002_Xf07bba_Xbe5c_target.lsrk.vis.zarr'.
91
+ input_data : Union[Dict, processing_set, None], optional
92
+ If the processing set is in memory already it can be supplied here. By default None which will make the iterator load data using the supplied input_data_store.
93
+ data_variables : list, optional
94
+ The list of data variables to load into memory for example ['VISIBILITY', 'WEIGHT, 'FLAGS']. By default None which will load all data variables into memory.
95
+ load_sub_datasets : bool, optional
96
+ If true sub-datasets (for example weather_xds, antenna_xds, pointing_xds, ...) will be loaded into memory, by default True.
97
+ """
98
+
99
+ self.input_data = input_data
100
+ self.input_data_store = input_data_store
101
+ self.sel_parms = sel_parms
102
+ self.xds_name_iter = iter(sel_parms.keys())
103
+ self.data_variables = data_variables
104
+ self.load_sub_datasets = load_sub_datasets
105
+
106
+ def __iter__(self):
107
+ return self
108
+
109
+ def __next__(self):
110
+ try:
111
+ xds_name = next(self.xds_name_iter)
112
+ except Exception as e:
113
+ raise StopIteration
114
+
115
+ if self.input_data is None:
116
+ slice_description = self.sel_parms[xds_name]
117
+ ps = load_processing_set(
118
+ ps_store=self.input_data_store,
119
+ sel_parms={xds_name: slice_description},
120
+ data_variables=self.data_variables,
121
+ load_sub_datasets=self.load_sub_datasets,
122
+ )
123
+ xds = ps.get(0)
124
+ else:
125
+ xds = self.input_data[xds_name] # In memory
126
+
127
+ return xds
@@ -0,0 +1,85 @@
1
+ import os
2
+ import xarray as xr
3
+ from ._processing_set import processing_set
4
+ import graphviper.utils.logger as logger
5
+ from xradio._utils.zarr.common import _open_dataset
6
+
7
+
8
+ def read_processing_set(
9
+ ps_store: str, intents: list = None, fields: str = None
10
+ )->processing_set:
11
+ """Creates a lazy representation of a Processing Set (only meta-data is loaded into memory).
12
+
13
+ Parameters
14
+ ----------
15
+ ps_store : str
16
+ String of the path and name of the processing set. For example '/users/user_1/uid___A002_Xf07bba_Xbe5c_target.lsrk.vis.zarr'.
17
+ intents : list, optional
18
+ A list of the intents to be read for example ['OBSERVE_TARGET#ON_SOURCE']. The intents in a processing set can be seem by calling processing_set.summary().
19
+ By default None, which will read all intents.
20
+ fields : str, optional
21
+ The list of field names that will be read, by default None which will read all fields.
22
+
23
+ Returns
24
+ -------
25
+ processing_set
26
+ Lazy representation of processing set (data is represented by Dask.arrays).
27
+ """
28
+ items = os.listdir(ps_store)
29
+ ms_xds = xr.Dataset()
30
+ ps = processing_set()
31
+ data_group = 'base'
32
+ for ms_dir_name in items:
33
+ if "ddi" in ms_dir_name:
34
+ xds = _open_dataset(os.path.join(ps_store, ms_dir_name, "MAIN"))
35
+ if (intents is None) or (xds.attrs["intent"] in intents):
36
+ data_name = _get_data_name(xds, data_group)
37
+
38
+ if (fields is None) or (
39
+ xds[data_name].attrs["field_info"]["name"] in fields
40
+ ):
41
+ xds.attrs = {
42
+ **xds.attrs,
43
+ **_read_sub_xds(os.path.join(ps_store, ms_dir_name)),
44
+ }
45
+ ps[ms_dir_name] = xds
46
+ return ps
47
+
48
+
49
+ def _read_sub_xds(ms_store, load=False):
50
+ sub_xds_dict = {}
51
+
52
+ sub_xds = {
53
+ "antenna_xds": "ANTENNA",
54
+ }
55
+ for sub_xds_key, sub_xds_name in sub_xds.items():
56
+ sub_xds_dict[sub_xds_key] = _open_dataset(
57
+ os.path.join(ms_store, sub_xds_name), load=load
58
+ )
59
+
60
+ optional_sub_xds = {
61
+ "weather_xds": "WEATHER",
62
+ "pointing_xds": "POINTING",
63
+ }
64
+ for sub_xds_key, sub_xds_name in optional_sub_xds.items():
65
+ sub_xds_path = os.path.join(ms_store, sub_xds_name)
66
+ if os.path.isdir(sub_xds_path):
67
+ sub_xds_dict[sub_xds_key] = _open_dataset(sub_xds_path, load=load)
68
+
69
+ return sub_xds_dict
70
+
71
+
72
+ def _get_data_name(xds, data_group):
73
+ if "visibility" in xds.attrs["data_groups"][data_group]:
74
+ data_name = xds.attrs["data_groups"][data_group]["visibility"]
75
+ elif "spectrum" in xds.attrs["data_groups"][data_group]:
76
+ data_name = xds.attrs["data_groups"][data_group]["spectrum"]
77
+ else:
78
+ error_message = (
79
+ "No Visibility or Spectrum data variable found in data_group "
80
+ + data_group
81
+ + "."
82
+ )
83
+ logger.exception(error_message)
84
+ raise ValueError(error_message)
85
+ return data_name
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: xradio
3
- Version: 0.0.22
3
+ Version: 0.0.24
4
4
  Summary: Xarray Radio Astronomy Data IO
5
5
  Author-email: Jan-Willem Steeb <jsteeb@nrao.edu>
6
6
  License: BSD 3-Clause License
@@ -11,6 +11,8 @@ src/xradio.egg-info/top_level.txt
11
11
  src/xradio/_utils/__init__.py
12
12
  src/xradio/_utils/common.py
13
13
  src/xradio/_utils/_casacore/tables.py
14
+ src/xradio/_utils/zarr/__init__.py
15
+ src/xradio/_utils/zarr/common.py
14
16
  src/xradio/image/__init__.py
15
17
  src/xradio/image/image.py
16
18
  src/xradio/image/_util/__init__.py
@@ -1,21 +0,0 @@
1
- from ._zarr.xds_to_zarr import _write_zarr
2
- from ._zarr.xds_from_zarr import _read_zarr
3
- import xarray as xr
4
-
5
-
6
- def _xds_to_zarr(xds: xr.Dataset, zarr_store: str):
7
- _write_zarr(xds, zarr_store)
8
-
9
-
10
- def _xds_from_zarr(
11
- zarr_store: str, output: dict = {}, selection: dict = {}
12
- ) -> xr.Dataset:
13
- # supported key/values in output are:
14
- # "dv"
15
- # what data variables should be returned as.
16
- # "numpy": numpy arrays
17
- # "dask": dask arrays
18
- # "coords"
19
- # what coordinates should be returned as
20
- # "numpy": numpy arrays
21
- return _read_zarr(zarr_store, output, selection)
@@ -1,45 +0,0 @@
1
- import pandas as pd
2
-
3
-
4
- class processing_set(dict):
5
- def __init__(self, *args, **kwargs):
6
- super().__init__(*args, **kwargs)
7
-
8
- def summary(self, data_group="base"):
9
- summary_data = {
10
- "name": [],
11
- "ddi": [],
12
- "intent": [],
13
- "field_id": [],
14
- "field_name": [],
15
- "start_frequency": [],
16
- "end_frequency": [],
17
- }
18
- for key, value in self.items():
19
- summary_data["name"].append(key)
20
- summary_data["ddi"].append(value.attrs["ddi"])
21
- summary_data["intent"].append(value.attrs["intent"])
22
-
23
- if "visibility" in value.attrs["data_groups"][data_group]:
24
- data_name = value.attrs["data_groups"][data_group]["visibility"]
25
-
26
- if "spectrum" in value.attrs["data_groups"][data_group]:
27
- data_name = value.attrs["data_groups"][data_group]["spectrum"]
28
-
29
- summary_data["field_id"].append(
30
- value[data_name].attrs[
31
- "field_info"
32
- ]["field_id"]
33
- )
34
- summary_data["field_name"].append(
35
- value[data_name].attrs[
36
- "field_info"
37
- ]["name"]
38
- )
39
- summary_data["start_frequency"].append(value["frequency"].values[0])
40
- summary_data["end_frequency"].append(value["frequency"].values[-1])
41
- summary_df = pd.DataFrame(summary_data)
42
- return summary_df
43
-
44
- def get(self, id):
45
- return self[list(self.keys())[id]]
@@ -1,189 +0,0 @@
1
- import xarray as xr
2
- import zarr
3
- import copy
4
- import os
5
- from ._processing_set import processing_set
6
-
7
- DIMENSION_KEY = "_ARRAY_DIMENSIONS" # Used by xarray to store array labeling info in zarr meta data.
8
- # from xradio._utils._logger import _get_logger
9
-
10
-
11
- def _get_attrs(zarr_obj):
12
- """
13
- get attributes of zarr obj (groups or arrays)
14
- """
15
- return {k: v for k, v in zarr_obj.attrs.asdict().items() if not k.startswith("_NC")}
16
-
17
-
18
- def _load_ms_xds(
19
- ps_name, ms_xds_name, slice_dict={}, cache_dir=None, chunk_id=None, date_time=""
20
- ):
21
- # logger = _get_logger()
22
- if cache_dir:
23
- xds_cached_name = (
24
- os.path.join(cache_dir, ms_xds_name) + "_" + str(chunk_id) + "_" + date_time
25
- )
26
-
27
- # Check if already chached:
28
- try:
29
- ms_xds = _load_ms_xds_core(
30
- ms_xds_name=xds_cached_name, slice_dict=slice_dict
31
- )
32
-
33
- # logger.debug(ms_xds_name + ' chunk ' + str(slice_dict) + ' was found in cache: ' + xds_cached)
34
- found_in_cache = True
35
- return xds, found_in_cache
36
- except:
37
- # logger.debug(xds_cached + ' chunk ' + str(slice_dict) + ' was not found in cache or failed to load. Retrieving chunk from ' + ms_xds_name + ' .')
38
- ms_xds = _load_ms_xds_core(
39
- ms_xds_name=os.path.join(ps_name, ms_xds_name), slice_dict=slice_dict
40
- )
41
- write_ms_xds(ms_xds, xds_cached_name)
42
-
43
- found_in_cache = False
44
- return xds, found_in_cache
45
- else:
46
- found_in_cache = None
47
- ms_xds = _load_ms_xds_core(
48
- ms_xds_name=os.path.join(ps_name, ms_xds_name), slice_dict=slice_dict
49
- )
50
- return ms_xds, found_in_cache
51
-
52
-
53
- def _write_ms_xds(ms_xds, ms_xds_name):
54
- ms_xds_temp = ms_xds
55
- xr.Dataset.to_zarr(
56
- ms_xds.attrs["ANTENNA"],
57
- os.path.join(xds_cached_name, "ANTENNA"),
58
- consolidated=True,
59
- )
60
- ms_xds_temp = ms_xds
61
- ms_xds_temp.attrs["ANTENNA"] = {}
62
- xr.Dataset.to_zarr(
63
- ms_xds_temp, os.path.join(xds_cached_name, "MAIN"), consolidated=True
64
- )
65
-
66
-
67
- def _load_ms_xds_core(ms_xds_name, slice_dict):
68
- ms_xds = _load_no_dask_zarr(
69
- zarr_name=os.path.join(ms_xds_name, "MAIN"), slice_dict=slice_dict
70
- )
71
- ms_xds.attrs["antenna_xds"] = _load_no_dask_zarr(
72
- zarr_name=os.path.join(ms_xds_name, "ANTENNA")
73
- )
74
- sub_xds = {
75
- "antenna_xds": "ANTENNA",
76
- }
77
- for sub_xds_key, sub_xds_name in sub_xds.items():
78
- ms_xds.attrs[sub_xds_key] = _load_no_dask_zarr(
79
- zarr_name=os.path.join(ms_xds_name, sub_xds_name)
80
- )
81
- optional_sub_xds = {
82
- "weather_xds": "WEATHER",
83
- "pointing_xds": "POINTING",
84
- }
85
- for sub_xds_key, sub_xds_name in sub_xds.items():
86
- sub_xds_path = os.path.join(ms_xds_name, sub_xds_name)
87
- if os.path.isdir(sub_xds_path):
88
- ms_xds.attrs[sub_xds_key] = _load_no_dask_zarr(
89
- zarr_name=os.path.join(ms_xds_name, sub_xds_name)
90
- )
91
-
92
- return ms_xds
93
-
94
-
95
- def _load_no_dask_zarr(zarr_name, slice_dict={}):
96
- """
97
- Alternative to xarray open_zarr where the arrays are not Dask Arrays.
98
-
99
- slice_dict: A dictionary of slice objects for which values to read form a dimension.
100
- For example silce_dict={'time':slice(0,10)} would select the first 10 elements in the time dimension.
101
- If a dim is not specified all values are retruned.
102
- return:
103
- xarray.Dataset()
104
-
105
- #Should go into general utils.
106
- """
107
-
108
- # logger = _get_logger()
109
- zarr_group = zarr.open_group(store=zarr_name, mode="r")
110
- group_attrs = _get_attrs(zarr_group)
111
-
112
- slice_dict_complete = copy.deepcopy(slice_dict)
113
- coords = {}
114
- xds = xr.Dataset()
115
- for var_name, var in zarr_group.arrays():
116
- var_attrs = _get_attrs(var)
117
-
118
- for dim in var_attrs[DIMENSION_KEY]:
119
- if dim not in slice_dict_complete:
120
- slice_dict_complete[dim] = slice(None) # No slicing.
121
-
122
- if (var_attrs[DIMENSION_KEY][0] == var_name) and (
123
- len(var_attrs[DIMENSION_KEY]) == 1
124
- ):
125
- coord = var[
126
- slice_dict_complete[var_attrs[DIMENSION_KEY][0]]
127
- ] # Dimension coordinates.
128
- del var_attrs["_ARRAY_DIMENSIONS"]
129
- xds = xds.assign_coords({var_name: coord})
130
- xds[var_name].attrs = var_attrs
131
- else:
132
- # Construct slicing
133
- slicing_list = []
134
- for dim in var_attrs[DIMENSION_KEY]:
135
- slicing_list.append(slice_dict_complete[dim])
136
- slicing_tuple = tuple(slicing_list)
137
- xds[var_name] = xr.DataArray(
138
- var[slicing_tuple], dims=var_attrs[DIMENSION_KEY]
139
- )
140
-
141
- if "coordinates" in var_attrs:
142
- del var_attrs["coordinates"]
143
- del var_attrs["_ARRAY_DIMENSIONS"]
144
- xds[var_name].attrs = var_attrs
145
-
146
- xds.attrs = group_attrs
147
-
148
- return xds
149
-
150
-
151
- def load_processing_set(ps_name, sel_parms):
152
- """
153
- sel_parms
154
- A dictionary where the keys are the names of the ms_xds's and the values are slice_dicts.
155
- slice_dicts: A dictionary where the keys are the dimension names and the values are slices.
156
- """
157
- ps = processing_set()
158
- for name_ms_xds, ms_xds_sel_parms in sel_parms.items():
159
- ps[name_ms_xds] = _load_ms_xds(ps_name, name_ms_xds, ms_xds_sel_parms)[0]
160
- return ps
161
-
162
- class processing_set_iterator:
163
-
164
- def __init__(self, data_selection, input_data_store, input_data=None):
165
- self.input_data = input_data
166
- self.input_data_store = input_data_store
167
- self.data_selection = data_selection
168
- self.xds_name_iter = iter(data_selection.keys())
169
-
170
- def __iter__(self):
171
- return self
172
-
173
- def __next__(self):
174
- try:
175
- xds_name = next(self.xds_name_iter)
176
- except Exception as e:
177
- raise StopIteration
178
-
179
- if self.input_data is None:
180
- slice_description = self.data_selection[xds_name]
181
- ps = load_processing_set(
182
- ps_name=self.input_data_store,
183
- sel_parms={xds_name: slice_description},
184
- )
185
- xds = ps.get(0)
186
- else:
187
- xds = self.input_data[xds_name] #In memory
188
-
189
- return xds
@@ -1,43 +0,0 @@
1
- import os
2
-
3
- import xarray as xr
4
-
5
- from ._processing_set import processing_set
6
-
7
-
8
- def read_processing_set(ps_name, intents=None, data_group='base', fields=None):
9
- items = os.listdir(ps_name)
10
- ms_xds = xr.Dataset()
11
- ps = processing_set()
12
- for i in items:
13
- if "ddi" in i:
14
- xds = xr.open_zarr(ps_name + "/" + i + "/MAIN")
15
-
16
- if (intents is None) or (xds.attrs["intent"] in intents):
17
-
18
- if "visibility" in xds.attrs["data_groups"][data_group]:
19
- data_name = xds.attrs["data_groups"][data_group]["visibility"]
20
-
21
- if "spectrum" in xds.attrs["data_groups"][data_group]:
22
- data_name = xds.attrs["data_groups"][data_group]["spectrum"]
23
-
24
- if (fields is None) or (xds[data_name].attrs["field_info"]["name"] in fields):
25
- ps[i] = xds
26
- sub_xds = {
27
- "antenna_xds": "ANTENNA",
28
- }
29
- for sub_xds_key, sub_xds_name in sub_xds.items():
30
- ps[i].attrs[sub_xds_key] = xr.open_zarr(
31
- ps_name + "/" + i + "/" + sub_xds_name
32
- )
33
-
34
- optional_sub_xds = {
35
- "weather_xds": "WEATHER",
36
- "pointing_xds": "POINTING",
37
- }
38
- for sub_xds_key, sub_xds_name in optional_sub_xds.items():
39
- sub_xds_path = ps_name + "/" + i + "/" + sub_xds_name
40
- if os.path.isdir(sub_xds_path):
41
- ps[i].attrs[sub_xds_key] = xr.open_zarr(sub_xds_path)
42
-
43
- return ps
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes