xradio 0.0.33__tar.gz → 0.0.36__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. {xradio-0.0.33/src/xradio.egg-info → xradio-0.0.36}/PKG-INFO +1 -1
  2. {xradio-0.0.33 → xradio-0.0.36}/pyproject.toml +1 -1
  3. {xradio-0.0.33 → xradio-0.0.36}/src/xradio/_utils/list_and_array.py +3 -1
  4. xradio-0.0.33/src/xradio/vis/_vis_utils/_ms/msv2_to_msv4_meta.py → xradio-0.0.36/src/xradio/_utils/schema.py +128 -86
  5. {xradio-0.0.33 → xradio-0.0.36}/src/xradio/_utils/zarr/common.py +11 -5
  6. {xradio-0.0.33 → xradio-0.0.36}/src/xradio/image/_util/_zarr/xds_from_zarr.py +15 -2
  7. {xradio-0.0.33 → xradio-0.0.36}/src/xradio/image/_util/_zarr/zarr_low_level.py +65 -14
  8. {xradio-0.0.33 → xradio-0.0.36}/src/xradio/schema/bases.py +37 -8
  9. {xradio-0.0.33 → xradio-0.0.36}/src/xradio/schema/check.py +15 -3
  10. {xradio-0.0.33 → xradio-0.0.36}/src/xradio/schema/dataclass.py +2 -2
  11. xradio-0.0.36/src/xradio/vis/_processing_set.py +302 -0
  12. {xradio-0.0.33 → xradio-0.0.36}/src/xradio/vis/_vis_utils/_ms/_tables/read.py +9 -0
  13. {xradio-0.0.33 → xradio-0.0.36}/src/xradio/vis/_vis_utils/_ms/conversion.py +166 -116
  14. xradio-0.0.36/src/xradio/vis/_vis_utils/_ms/create_antenna_xds.py +479 -0
  15. {xradio-0.0.33 → xradio-0.0.36}/src/xradio/vis/_vis_utils/_ms/create_field_and_source_xds.py +84 -42
  16. xradio-0.0.36/src/xradio/vis/_vis_utils/_ms/msv2_to_msv4_meta.py +44 -0
  17. {xradio-0.0.33 → xradio-0.0.36}/src/xradio/vis/_vis_utils/_ms/msv4_sub_xdss.py +4 -224
  18. {xradio-0.0.33 → xradio-0.0.36}/src/xradio/vis/_vis_utils/_utils/xds_helper.py +10 -2
  19. {xradio-0.0.33 → xradio-0.0.36}/src/xradio/vis/convert_msv2_to_processing_set.py +6 -1
  20. {xradio-0.0.33 → xradio-0.0.36}/src/xradio/vis/load_processing_set.py +2 -2
  21. {xradio-0.0.33 → xradio-0.0.36}/src/xradio/vis/read_processing_set.py +5 -2
  22. {xradio-0.0.33 → xradio-0.0.36}/src/xradio/vis/schema.py +348 -112
  23. {xradio-0.0.33 → xradio-0.0.36/src/xradio.egg-info}/PKG-INFO +1 -1
  24. {xradio-0.0.33 → xradio-0.0.36}/src/xradio.egg-info/SOURCES.txt +2 -0
  25. xradio-0.0.33/src/xradio/vis/_processing_set.py +0 -176
  26. {xradio-0.0.33 → xradio-0.0.36}/LICENSE.txt +0 -0
  27. {xradio-0.0.33 → xradio-0.0.36}/MANIFEST.in +0 -0
  28. {xradio-0.0.33 → xradio-0.0.36}/README.md +0 -0
  29. {xradio-0.0.33 → xradio-0.0.36}/setup.cfg +0 -0
  30. {xradio-0.0.33 → xradio-0.0.36}/src/xradio/__init__.py +0 -0
  31. {xradio-0.0.33 → xradio-0.0.36}/src/xradio/_utils/__init__.py +0 -0
  32. {xradio-0.0.33 → xradio-0.0.36}/src/xradio/_utils/_casacore/tables.py +0 -0
  33. {xradio-0.0.33 → xradio-0.0.36}/src/xradio/_utils/common.py +0 -0
  34. {xradio-0.0.33 → xradio-0.0.36}/src/xradio/_utils/zarr/__init__.py +0 -0
  35. {xradio-0.0.33 → xradio-0.0.36}/src/xradio/image/__init__.py +0 -0
  36. {xradio-0.0.33 → xradio-0.0.36}/src/xradio/image/_util/__init__.py +0 -0
  37. {xradio-0.0.33 → xradio-0.0.36}/src/xradio/image/_util/_casacore/__init__.py +0 -0
  38. {xradio-0.0.33 → xradio-0.0.36}/src/xradio/image/_util/_casacore/common.py +0 -0
  39. {xradio-0.0.33 → xradio-0.0.36}/src/xradio/image/_util/_casacore/xds_from_casacore.py +0 -0
  40. {xradio-0.0.33 → xradio-0.0.36}/src/xradio/image/_util/_casacore/xds_to_casacore.py +0 -0
  41. {xradio-0.0.33 → xradio-0.0.36}/src/xradio/image/_util/_fits/xds_from_fits.py +0 -0
  42. {xradio-0.0.33 → xradio-0.0.36}/src/xradio/image/_util/_zarr/common.py +0 -0
  43. {xradio-0.0.33 → xradio-0.0.36}/src/xradio/image/_util/_zarr/xds_to_zarr.py +0 -0
  44. {xradio-0.0.33 → xradio-0.0.36}/src/xradio/image/_util/casacore.py +0 -0
  45. {xradio-0.0.33 → xradio-0.0.36}/src/xradio/image/_util/common.py +0 -0
  46. {xradio-0.0.33 → xradio-0.0.36}/src/xradio/image/_util/fits.py +0 -0
  47. {xradio-0.0.33 → xradio-0.0.36}/src/xradio/image/_util/image_factory.py +0 -0
  48. {xradio-0.0.33 → xradio-0.0.36}/src/xradio/image/_util/zarr.py +0 -0
  49. {xradio-0.0.33 → xradio-0.0.36}/src/xradio/image/image.py +0 -0
  50. {xradio-0.0.33 → xradio-0.0.36}/src/xradio/schema/__init__.py +0 -0
  51. {xradio-0.0.33 → xradio-0.0.36}/src/xradio/schema/metamodel.py +0 -0
  52. {xradio-0.0.33 → xradio-0.0.36}/src/xradio/schema/typing.py +0 -0
  53. {xradio-0.0.33 → xradio-0.0.36}/src/xradio/vis/__init__.py +0 -0
  54. {xradio-0.0.33 → xradio-0.0.36}/src/xradio/vis/_vis_utils/__init__.py +0 -0
  55. {xradio-0.0.33 → xradio-0.0.36}/src/xradio/vis/_vis_utils/_ms/_tables/load.py +0 -0
  56. {xradio-0.0.33 → xradio-0.0.36}/src/xradio/vis/_vis_utils/_ms/_tables/load_main_table.py +0 -0
  57. {xradio-0.0.33 → xradio-0.0.36}/src/xradio/vis/_vis_utils/_ms/_tables/read_main_table.py +0 -0
  58. {xradio-0.0.33 → xradio-0.0.36}/src/xradio/vis/_vis_utils/_ms/_tables/read_subtables.py +0 -0
  59. {xradio-0.0.33 → xradio-0.0.36}/src/xradio/vis/_vis_utils/_ms/_tables/table_query.py +0 -0
  60. {xradio-0.0.33 → xradio-0.0.36}/src/xradio/vis/_vis_utils/_ms/_tables/write.py +0 -0
  61. {xradio-0.0.33 → xradio-0.0.36}/src/xradio/vis/_vis_utils/_ms/_tables/write_exp_api.py +0 -0
  62. {xradio-0.0.33 → xradio-0.0.36}/src/xradio/vis/_vis_utils/_ms/chunks.py +0 -0
  63. {xradio-0.0.33 → xradio-0.0.36}/src/xradio/vis/_vis_utils/_ms/descr.py +0 -0
  64. {xradio-0.0.33 → xradio-0.0.36}/src/xradio/vis/_vis_utils/_ms/msv2_msv3.py +0 -0
  65. {xradio-0.0.33 → xradio-0.0.36}/src/xradio/vis/_vis_utils/_ms/msv4_infos.py +0 -0
  66. {xradio-0.0.33 → xradio-0.0.36}/src/xradio/vis/_vis_utils/_ms/optimised_functions.py +0 -0
  67. {xradio-0.0.33 → xradio-0.0.36}/src/xradio/vis/_vis_utils/_ms/partition_queries.py +0 -0
  68. {xradio-0.0.33 → xradio-0.0.36}/src/xradio/vis/_vis_utils/_ms/partitions.py +0 -0
  69. {xradio-0.0.33 → xradio-0.0.36}/src/xradio/vis/_vis_utils/_ms/subtables.py +0 -0
  70. {xradio-0.0.33 → xradio-0.0.36}/src/xradio/vis/_vis_utils/_utils/cds.py +0 -0
  71. {xradio-0.0.33 → xradio-0.0.36}/src/xradio/vis/_vis_utils/_utils/partition_attrs.py +0 -0
  72. {xradio-0.0.33 → xradio-0.0.36}/src/xradio/vis/_vis_utils/_utils/stokes_types.py +0 -0
  73. {xradio-0.0.33 → xradio-0.0.36}/src/xradio/vis/_vis_utils/_zarr/encoding.py +0 -0
  74. {xradio-0.0.33 → xradio-0.0.36}/src/xradio/vis/_vis_utils/_zarr/read.py +0 -0
  75. {xradio-0.0.33 → xradio-0.0.36}/src/xradio/vis/_vis_utils/_zarr/write.py +0 -0
  76. {xradio-0.0.33 → xradio-0.0.36}/src/xradio/vis/_vis_utils/ms.py +0 -0
  77. {xradio-0.0.33 → xradio-0.0.36}/src/xradio/vis/_vis_utils/zarr.py +0 -0
  78. {xradio-0.0.33 → xradio-0.0.36}/src/xradio.egg-info/dependency_links.txt +0 -0
  79. {xradio-0.0.33 → xradio-0.0.36}/src/xradio.egg-info/requires.txt +0 -0
  80. {xradio-0.0.33 → xradio-0.0.36}/src/xradio.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: xradio
3
- Version: 0.0.33
3
+ Version: 0.0.36
4
4
  Summary: Xarray Radio Astronomy Data IO
5
5
  Author-email: Jan-Willem Steeb <jsteeb@nrao.edu>
6
6
  License: BSD 3-Clause License
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "xradio"
3
- version = "0.0.33"
3
+ version = "0.0.36"
4
4
  description = " Xarray Radio Astronomy Data IO"
5
5
  authors = [
6
6
  {name = "Jan-Willem Steeb", email="jsteeb@nrao.edu"},
@@ -5,10 +5,12 @@ import xarray as xr
5
5
 
6
6
 
7
7
  def to_list(x):
8
- if isinstance(x, (list, np.ndarray)):
8
+ if isinstance(x, np.ndarray):
9
9
  if x.ndim == 0:
10
10
  return [x.item()]
11
11
  return list(x) # needed for json serialization
12
+ elif isinstance(x, list):
13
+ return x
12
14
  return [x]
13
15
 
14
16
 
@@ -1,81 +1,87 @@
1
1
  import graphviper.utils.logger as logger
2
-
3
- col_to_data_variable_names = {
4
- "FLOAT_DATA": "SPECTRUM",
5
- "DATA": "VISIBILITY",
6
- "CORRECTED_DATA": "VISIBILITY_CORRECTED",
7
- "MODEL_DATA": "VISIBILITY_MODEL",
8
- "WEIGHT_SPECTRUM": "WEIGHT",
9
- "WEIGHT": "WEIGHT",
10
- "FLAG": "FLAG",
11
- "UVW": "UVW",
12
- "TIME_CENTROID": "TIME_CENTROID",
13
- "EXPOSURE": "EFFECTIVE_INTEGRATION_TIME",
14
- }
15
- col_dims = {
16
- "DATA": ("time", "baseline_id", "frequency", "polarization"),
17
- "CORRECTED_DATA": ("time", "baseline_id", "frequency", "polarization"),
18
- "MODEL_DATA": ("time", "baseline_id", "frequency", "polarization"),
19
- "WEIGHT_SPECTRUM": ("time", "baseline_id", "frequency", "polarization"),
20
- "WEIGHT": ("time", "baseline_id", "frequency", "polarization"),
21
- "FLAG": ("time", "baseline_id", "frequency", "polarization"),
22
- "UVW": ("time", "baseline_id", "uvw_label"),
23
- "TIME_CENTROID": ("time", "baseline_id"),
24
- "EXPOSURE": ("time", "baseline_id"),
25
- "FLOAT_DATA": ("time", "baseline_id", "frequency", "polarization"),
26
- }
27
- col_to_coord_names = {
28
- "TIME": "time",
29
- "ANTENNA1": "baseline_ant1_id",
30
- "ANTENNA2": "baseline_ant2_id",
31
- }
32
-
33
- # Map casacore measures to astropy
34
- casacore_to_msv4_measure_type = {
35
- "quanta": {
36
- "type": "quantity",
37
- },
38
- "direction": {"type": "sky_coord", "Ref": "frame", "Ref_map": {"J2000": "fk5"}},
39
- "epoch": {"type": "time", "Ref": "scale", "Ref_map": {"UTC": "utc"}},
40
- "frequency": {
41
- "type": "spectral_coord",
42
- "Ref": "frame",
43
- "Ref_map": {
44
- "REST": "REST",
45
- "LSRK": "LSRK",
46
- "LSRD": "LSRD",
47
- "BARY": "BARY",
48
- "GEO": "GEO",
49
- "TOPO": "TOPO",
50
- "GALACTO": "GALACTO",
51
- "LGROUP": "LGROUP",
52
- "CMB": "CMB",
53
- "Undefined": "Undefined",
54
- },
55
- },
56
- "position": {
57
- "type": "earth_location",
58
- "Ref": "ellipsoid",
59
- "Ref_map": {"ITRF": "GRS80"},
60
- },
61
- "uvw": {"type": "uvw", "Ref": "frame", "Ref_map": {"ITRF": "GRS80"}},
62
- "radialvelocity": {"type": "quantity"},
63
- }
64
-
65
- casa_frequency_frames = [
66
- "REST",
67
- "LSRK",
68
- "LSRD",
69
- "BARY",
70
- "GEO",
71
- "TOPO",
72
- "GALACTO",
73
- "LGROUP",
74
- "CMB",
75
- "Undefined",
76
- ]
77
-
78
- casa_frequency_frames_codes = [0, 1, 2, 3, 4, 5, 6, 7, 8, 64]
2
+ import xarray as xr
3
+
4
+
5
+ def convert_generic_xds_to_xradio_schema(
6
+ generic_xds: xr.Dataset,
7
+ msv4_xds: xr.Dataset,
8
+ to_new_data_variables: dict,
9
+ to_new_coords: dict,
10
+ ) -> xr.Dataset:
11
+ """Converts a generic xarray Dataset to the xradio schema.
12
+
13
+ This function takes a generic xarray Dataset and converts it to an xradio schema
14
+ represented by the msv4_xds Dataset. It performs the conversion based on the provided
15
+ mappings in the to_new_data_variables and to_new_coords dictionaries.
16
+
17
+ Parameters
18
+ ----------
19
+ generic_xds : xr.Dataset
20
+ The generic xarray Dataset to be converted.
21
+ msv4_xds : xr.Dataset
22
+ The xradio schema represented by the msv4_xds Dataset.
23
+ to_new_data_variables : dict
24
+ A dictionary mapping the data variables/coordinates in the generic_xds Dataset to the new data variables
25
+ in the msv4_xds Dataset. The keys are the old data variables/coordinates and the values are a list of the new name and a list of the new dimension names.
26
+ to_new_coords : dict
27
+ A dictionary mapping data variables/coordinates in the generic_xds Dataset to the new coordinates
28
+ in the msv4_xds Dataset. The keys are the old data variables/coordinates and the values are a list of the new name and a list of the new dimension names.
29
+
30
+ Returns
31
+ -------
32
+ xr.Dataset
33
+ The converted xradio schema represented by the msv4_xds Dataset.
34
+
35
+ Notes
36
+ -----
37
+ Example to_new_data_variables:
38
+ to_new_data_variables = {
39
+ "POSITION": ["ANTENNA_POSITION",["name", "cartesian_pos_label"]],
40
+ "OFFSET": ["ANTENNA_FEED_OFFSET",["name", "cartesian_pos_label"]],
41
+ "DISH_DIAMETER": ["ANTENNA_DISH_DIAMETER",["name"]],
42
+ }
43
+
44
+ Example to_new_coords:
45
+ to_new_coords = {
46
+ "NAME": ["name",["name"]],
47
+ "STATION": ["station",["name"]],
48
+ "MOUNT": ["mount",["name"]],
49
+ "PHASED_ARRAY_ID": ["phased_array_id",["name"]],
50
+ "antenna_id": ["antenna_id",["name"]],
51
+ }
52
+ """
53
+
54
+ column_description = generic_xds.attrs["other"]["msv2"]["ctds_attrs"][
55
+ "column_descriptions"
56
+ ]
57
+ coords = {}
58
+
59
+ name_keys = list(generic_xds.data_vars.keys()) + list(generic_xds.coords.keys())
60
+
61
+ for key in name_keys:
62
+
63
+ if key in column_description:
64
+ msv4_measure = column_description_casacore_to_msv4_measure(
65
+ column_description[key]
66
+ )
67
+ else:
68
+ msv4_measure = None
69
+
70
+ if key in to_new_data_variables:
71
+ new_dv = to_new_data_variables[key]
72
+ msv4_xds[new_dv[0]] = xr.DataArray(generic_xds[key].data, dims=new_dv[1])
73
+
74
+ if msv4_measure:
75
+ msv4_xds[new_dv[0]].attrs.update(msv4_measure)
76
+
77
+ if key in to_new_coords:
78
+ new_coord = to_new_coords[key]
79
+ coords[new_coord[0]] = (
80
+ new_coord[1],
81
+ generic_xds[key].data,
82
+ )
83
+ msv4_xds = msv4_xds.assign_coords(coords)
84
+ return msv4_xds
79
85
 
80
86
 
81
87
  def column_description_casacore_to_msv4_measure(
@@ -136,13 +142,49 @@ def column_description_casacore_to_msv4_measure(
136
142
  return msv4_measure
137
143
 
138
144
 
139
- def create_attribute_metadata(col, main_column_descriptions):
140
- attrs_metadata = column_description_casacore_to_msv4_measure(
141
- main_column_descriptions[col]
142
- )
143
- if col in ["DATA", "CORRECTED_DATA", "WEIGHT"]:
144
- if not attrs_metadata:
145
- attrs_metadata["type"] = "quanta"
146
- attrs_metadata["units"] = ["unkown"]
145
+ # Map casacore measures to astropy
146
+ casacore_to_msv4_measure_type = {
147
+ "quanta": {
148
+ "type": "quantity",
149
+ },
150
+ "direction": {"type": "sky_coord", "Ref": "frame", "Ref_map": {"J2000": "fk5"}},
151
+ "epoch": {"type": "time", "Ref": "scale", "Ref_map": {"UTC": "utc"}},
152
+ "frequency": {
153
+ "type": "spectral_coord",
154
+ "Ref": "frame",
155
+ "Ref_map": {
156
+ "REST": "REST",
157
+ "LSRK": "LSRK",
158
+ "LSRD": "LSRD",
159
+ "BARY": "BARY",
160
+ "GEO": "GEO",
161
+ "TOPO": "TOPO",
162
+ "GALACTO": "GALACTO",
163
+ "LGROUP": "LGROUP",
164
+ "CMB": "CMB",
165
+ "Undefined": "Undefined",
166
+ },
167
+ },
168
+ "position": {
169
+ "type": "earth_location",
170
+ "Ref": "ellipsoid",
171
+ "Ref_map": {"ITRF": "GRS80"},
172
+ },
173
+ "uvw": {"type": "uvw", "Ref": "frame", "Ref_map": {"ITRF": "GRS80"}},
174
+ "radialvelocity": {"type": "quantity"},
175
+ }
176
+
177
+ casa_frequency_frames = [
178
+ "REST",
179
+ "LSRK",
180
+ "LSRD",
181
+ "BARY",
182
+ "GEO",
183
+ "TOPO",
184
+ "GALACTO",
185
+ "LGROUP",
186
+ "CMB",
187
+ "Undefined",
188
+ ]
147
189
 
148
- return attrs_metadata
190
+ casa_frequency_frames_codes = [0, 1, 2, 3, 4, 5, 6, 7, 8, 64]
@@ -5,11 +5,18 @@ import s3fs
5
5
  import os
6
6
  from botocore.exceptions import NoCredentialsError
7
7
 
8
+ # from xradio.vis._vis_utils._ms.msv2_to_msv4_meta import (
9
+ # column_description_casacore_to_msv4_measure,
10
+ # )
8
11
 
9
- def _get_ms_stores_and_file_system(ps_store: str):
10
12
 
13
+ def _get_file_system_and_items(ps_store: str):
14
+
15
+ # default to assuming the data are accessible on local file system
11
16
  if os.path.isdir(ps_store):
12
- # default to assuming the data are accessible on local file system
17
+ # handle a common shell convention
18
+ if ps_store.startswith("~"):
19
+ ps_store = os.path.expanduser(ps_store)
13
20
  items = os.listdir(ps_store)
14
21
  file_system = os
15
22
 
@@ -36,9 +43,8 @@ def _get_ms_stores_and_file_system(ps_store: str):
36
43
  for bd in file_system.listdir(ps_store, detail=False)
37
44
  ]
38
45
  else:
39
- raise (
40
- FileNotFoundError,
41
- f"Could not find {ps_store} either locally or in the cloud.",
46
+ raise FileNotFoundError(
47
+ f"Could not find {ps_store} either locally or in the cloud."
42
48
  )
43
49
 
44
50
  items = [
@@ -5,8 +5,10 @@ import dask.array as da
5
5
  import numpy as np
6
6
  import os
7
7
  import xarray as xr
8
+ import s3fs
8
9
  from .common import _np_types, _top_level_sub_xds
9
10
  from ..common import _coords_to_numpy, _dask_arrayize_dv, _numpy_arrayize_dv
11
+ from xradio._utils.zarr.common import _get_file_system_and_items
10
12
 
11
13
 
12
14
  def _read_zarr(
@@ -85,8 +87,18 @@ def _decode_dict(my_dict: dict, top_key: str) -> dict:
85
87
 
86
88
  def _decode_sub_xdses(zarr_store: str, output: dict) -> dict:
87
89
  sub_xdses = {}
88
- for root, dirs, files in os.walk(zarr_store):
89
- # top down walk
90
+
91
+ fs, store_contents = _get_file_system_and_items(zarr_store)
92
+
93
+ if isinstance(fs, s3fs.core.S3FileSystem):
94
+ # could we just use the items as returned from the helper function..?
95
+ store_tree = fs.walk(zarr_store, topdown=True)
96
+ prepend_s3 = "s3://"
97
+ else:
98
+ store_tree = os.walk(zarr_store, topdown=True)
99
+ prepend_s3 = ""
100
+
101
+ for root, dirs, files in store_tree:
90
102
  for d in dirs:
91
103
  if d.startswith(_top_level_sub_xds):
92
104
  xds = _read_zarr(os.sep.join([root, d]), output)
@@ -94,4 +106,5 @@ def _decode_sub_xdses(zarr_store: str, output: dict) -> dict:
94
106
  # xds = xds.drop_vars([k]).assign({k: v.compute()})
95
107
  ky = d[len(_top_level_sub_xds) + 1 :]
96
108
  sub_xdses[ky] = xds
109
+
97
110
  return sub_xdses
@@ -1,6 +1,9 @@
1
1
  import os
2
2
  import numpy as np
3
3
  import json
4
+ import zarr
5
+ import s3fs
6
+ from xradio._utils.zarr.common import _get_file_system_and_items
4
7
 
5
8
  from numcodecs.compat import (
6
9
  ensure_text,
@@ -246,19 +249,29 @@ def write_json_file(data, file_path):
246
249
  )
247
250
 
248
251
 
249
- def create_data_variable_meta_data_on_disk(
252
+ def create_data_variable_meta_data(
250
253
  zarr_group_name, data_variables_and_dims, xds_dims, parallel_coords, compressor
251
254
  ):
252
255
  zarr_meta = data_variables_and_dims
253
256
 
257
+ fs, items = _get_file_system_and_items(zarr_group_name)
258
+
254
259
  for data_variable_key, dims_dtype_name in data_variables_and_dims.items():
255
260
  # print(data_variable_key, dims_dtype_name)
256
261
 
257
262
  dims = dims_dtype_name["dims"]
258
263
  dtype = dims_dtype_name["dtype"]
259
264
  data_variable_name = dims_dtype_name["name"]
265
+
260
266
  data_variable_path = os.path.join(zarr_group_name, data_variable_name)
261
- os.system("mkdir " + data_variable_path)
267
+ if isinstance(fs, s3fs.core.S3FileSystem):
268
+ # N.b.,stateful "folder creation" is not a well defined concept for S3 objects and URIs
269
+ # see https://github.com/fsspec/s3fs/issues/401
270
+ # nor is a path specifier (cf. "URI")
271
+ fs.mkdir(data_variable_path)
272
+ else:
273
+ # default to assuming we can use the os module and mkdir system call
274
+ os.system("mkdir " + data_variable_path)
262
275
  # Create .zattrs
263
276
  zattrs = {
264
277
  "_ARRAY_DIMENSIONS": dims,
@@ -275,7 +288,23 @@ def create_data_variable_meta_data_on_disk(
275
288
  chunks.append(xds_dims[d])
276
289
 
277
290
  # print(chunks,shape)
278
- write_json_file(zattrs, os.path.join(data_variable_path, ".zattrs"))
291
+ # assuming data_variable_path has been set compatibly
292
+ zattrs_file = os.path.join(data_variable_path, ".zattrs")
293
+
294
+ if isinstance(fs, s3fs.core.S3FileSystem):
295
+ with fs.open(zattrs_file, "w") as file:
296
+ json.dump(
297
+ zattrs,
298
+ file,
299
+ indent=4,
300
+ sort_keys=True,
301
+ ensure_ascii=True,
302
+ separators=(",", ": "),
303
+ cls=NumberEncoder,
304
+ )
305
+ else:
306
+ # default to assuming we can use primitives
307
+ write_json_file(zattrs, zattrs_file)
279
308
 
280
309
  # Create .zarray
281
310
  from zarr import n5
@@ -311,36 +340,58 @@ def create_data_variable_meta_data_on_disk(
311
340
  zarr_meta[data_variable_key]["chunks"] = chunks
312
341
  zarr_meta[data_variable_key]["shape"] = shape
313
342
 
314
- write_json_file(zarray, os.path.join(data_variable_path, ".zarray"))
343
+ # again, assuming data_variable_path has been set compatibly
344
+ zarray_file = os.path.join(data_variable_path, ".zarray")
345
+
346
+ if isinstance(fs, s3fs.core.S3FileSystem):
347
+ with fs.open(zarray_file, "w") as file:
348
+ json.dump(
349
+ zarray,
350
+ file,
351
+ indent=4,
352
+ sort_keys=True,
353
+ ensure_ascii=True,
354
+ separators=(",", ": "),
355
+ cls=NumberEncoder,
356
+ )
357
+ else:
358
+ # default to assuming we can use primitives
359
+ write_json_file(zarray, zarray_file)
360
+
315
361
  return zarr_meta
316
362
 
317
363
 
318
364
  def write_chunk(img_xds, meta, parallel_dims_chunk_id, compressor, image_file):
319
365
  dims = meta["dims"]
320
366
  dtype = meta["dtype"]
321
- data_varaible_name = meta["name"]
367
+ data_variable_name = meta["name"]
322
368
  chunks = meta["chunks"]
323
369
  shape = meta["shape"]
324
370
  chunk_name = ""
325
- if data_varaible_name in img_xds:
326
- for d in img_xds[data_varaible_name].dims:
371
+ if data_variable_name in img_xds:
372
+ for d in img_xds[data_variable_name].dims:
327
373
  if d in parallel_dims_chunk_id:
328
374
  chunk_name = chunk_name + str(parallel_dims_chunk_id[d]) + "."
329
375
  else:
330
376
  chunk_name = chunk_name + "0."
331
377
  chunk_name = chunk_name[:-1]
332
378
 
333
- if list(img_xds[data_varaible_name].shape) != list(chunks):
379
+ if list(img_xds[data_variable_name].shape) != list(chunks):
334
380
  array = pad_array_with_nans(
335
- img_xds[data_varaible_name].values,
381
+ img_xds[data_variable_name].values,
336
382
  output_shape=chunks,
337
383
  dtype=dtype,
338
384
  )
339
385
  else:
340
- array = img_xds[data_varaible_name].values
341
-
342
- write_binary_blob_to_disk(
343
- array,
344
- file_path=os.path.join(image_file, data_varaible_name, chunk_name),
386
+ array = img_xds[data_variable_name].values
387
+
388
+ z_chunk = zarr.open(
389
+ os.path.join(image_file, data_variable_name, chunk_name),
390
+ mode="a",
391
+ shape=meta["shape"],
392
+ chunks=meta["chunks"],
393
+ dtype=meta["dtype"],
345
394
  compressor=compressor,
346
395
  )
396
+
397
+ return z_chunk
@@ -284,6 +284,22 @@ def _dataset_new(cls, *args, data_vars=None, coords=None, attrs=None, **kwargs):
284
284
  val = _np_convert(
285
285
  _set_parameter(coords.get(coord.name), mapping.arguments, coord), coord
286
286
  )
287
+ # Determine dimensions / convert to Variable
288
+ if (
289
+ val is not None
290
+ and not isinstance(val, xarray.DataArray)
291
+ and not isinstance(val, xarray.Variable)
292
+ and not isinstance(val, tuple)
293
+ ):
294
+ default_attrs = {
295
+ attr.name: attr.default
296
+ for attr in coord.attributes
297
+ if attr.default is not None
298
+ }
299
+ for dims in coord.dimensions:
300
+ if len(dims) == len(val.shape):
301
+ val = xarray.Variable(dims, val, default_attrs)
302
+ break
287
303
  if val is not None:
288
304
  coords[coord.name] = val
289
305
  for data_var in schema.data_vars:
@@ -291,7 +307,9 @@ def _dataset_new(cls, *args, data_vars=None, coords=None, attrs=None, **kwargs):
291
307
 
292
308
  # Determine dimensions / convert to Variable
293
309
  dims = None
294
- if isinstance(val, xarray.Variable):
310
+ if val is None:
311
+ dims = None
312
+ elif isinstance(val, xarray.Variable):
295
313
  dims = val.dims
296
314
  elif isinstance(val, xarray.DataArray):
297
315
  val = val.variable
@@ -316,18 +334,29 @@ def _dataset_new(cls, *args, data_vars=None, coords=None, attrs=None, **kwargs):
316
334
  f" expected {' or '.join(options)}!"
317
335
  )
318
336
 
337
+ # Get default attributes
338
+ default_attrs = {
339
+ attr.name: attr.default
340
+ for attr in data_var.attributes
341
+ if attr.default is not dataclasses.MISSING
342
+ }
343
+
319
344
  # Replace by variable
320
- val = xarray.Variable(dims, val)
345
+ val = xarray.Variable(dims, val, default_attrs)
321
346
 
322
347
  # Default coordinates used by this data variable to numpy arange. We
323
348
  # can only do this now because we need an example to determine the
324
349
  # intended size of the coordinate
325
- for coord in schema.coordinates:
326
- if coord.name in dims and coords.get(coord.name) is None:
327
- dim_ix = dims.index(coord.name)
328
- if dim_ix is not None and dim_ix < len(val.shape):
329
- dtype = coord.dtypes[0]
330
- coords[coord.name] = numpy.arange(val.shape[dim_ix], dtype=dtype)
350
+ if dims is not None:
351
+ for coord in schema.coordinates:
352
+ if coord.name in dims and coords.get(coord.name) is None:
353
+ dim_ix = dims.index(coord.name)
354
+ if dim_ix is not None and dim_ix < len(val.shape):
355
+ dtype = coord.dtypes[0]
356
+ if numpy.issubdtype(dtype, numpy.number):
357
+ coords[coord.name] = numpy.arange(
358
+ val.shape[dim_ix], dtype=dtype
359
+ )
331
360
 
332
361
  if val is not None:
333
362
  data_vars[data_var.name] = val
@@ -58,7 +58,10 @@ class SchemaIssue:
58
58
  err = f"Schema issue with {self.path_str()}: {self.message}"
59
59
  if self.expected is not None:
60
60
  options = " or ".join(repr(option) for option in self.expected)
61
- err += f" (expected: {options} found: {repr(self.found)})"
61
+ if self.found is not None:
62
+ err += f" (expected: {options} found: {repr(self.found)})"
63
+ else:
64
+ err += f" (expected: {options})"
62
65
  return err
63
66
 
64
67
 
@@ -255,7 +258,7 @@ def check_dimensions(
255
258
  if hint_remove and hint_add:
256
259
  message = f"Unexpected coordinates, replace {','.join(hint_remove)} by {','.join(hint_add)}?"
257
260
  elif hint_remove:
258
- message = f"Superflous coordinate {','.join(hint_remove)}?"
261
+ message = f"Superfluous coordinate {','.join(hint_remove)}?"
259
262
  elif hint_add:
260
263
  message = f"Missing dimension {','.join(hint_add)}!"
261
264
  else:
@@ -381,7 +384,8 @@ def check_data_vars(
381
384
  )
382
385
  else:
383
386
  message = (
384
- f"Required data variable '{data_var_schema.name}' is missing!"
387
+ f"Required data variable '{data_var_schema.name}' is missing "
388
+ f"(have {','.join(data_vars)})!"
385
389
  )
386
390
  issues.add(
387
391
  SchemaIssue(
@@ -440,6 +444,14 @@ def _check_value(val, ann):
440
444
  )
441
445
  ]
442
446
  )
447
+ except TypeError as e:
448
+ return SchemaIssues(
449
+ [
450
+ SchemaIssue(
451
+ path=[], message=str(e), expected=[ann], found=type(val)
452
+ )
453
+ ]
454
+ )
443
455
 
444
456
  if not isinstance(val, xarray.DataArray):
445
457
  # Fall through to plain type check
@@ -108,8 +108,8 @@ def extract_xarray_dataclass(klass):
108
108
  is_coord = False
109
109
  else:
110
110
  raise ValueError(
111
- f"Unexpected role in '{klass.__name__}',"
112
- f" field '{field.name}': {get_role(typ)}"
111
+ f"Expected field '{field.name}' in '{klass.__name__}' "
112
+ "to be annotated with either Coord, Data or Attr!"
113
113
  )
114
114
 
115
115
  # Defined using a dataclass, i.e. Coordof/Dataof?