xradio 0.0.34__py3-none-any.whl → 0.0.37__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,17 +5,10 @@ from typing import Tuple, Union
5
5
  import numpy as np
6
6
  import xarray as xr
7
7
 
8
- from .msv2_to_msv4_meta import column_description_casacore_to_msv4_measure
8
+ from xradio._utils.schema import column_description_casacore_to_msv4_measure
9
9
  from .subtables import subt_rename_ids
10
10
  from ._tables.read import make_taql_where_between_min_max, load_generic_table
11
11
 
12
- from xradio._utils.list_and_array import (
13
- check_if_consistent,
14
- unique_1d,
15
- to_list,
16
- to_np_array,
17
- )
18
-
19
12
 
20
13
  def interpolate_to_time(
21
14
  xds: xr.Dataset,
@@ -63,219 +56,6 @@ def interpolate_to_time(
63
56
  return xds
64
57
 
65
58
 
66
- def create_ant_xds(
67
- in_file: str,
68
- spectral_window_id: int,
69
- antenna_id: list,
70
- feed_id: list,
71
- telescope_name: str,
72
- ):
73
- """
74
- Creates an Antenna Xarray Dataset from a MS v2 ANTENNA table.
75
-
76
- Parameters
77
- ----------
78
- in_file : str
79
- Input MS name.
80
-
81
- Returns
82
- -------
83
- xr.Dataset
84
- Antenna Xarray Dataset.
85
- """
86
- # generic_obs_xds = load_generic_table(
87
- # in_file,
88
- # "OBSERVATION",
89
- # taql_where=f" where (ROWID() IN [{','.join(map(str,unique_antenna_id))}])", # order is not guaranteed
90
- # )
91
-
92
- # Dictionaries that define the conversion from MSv2 to MSv4:
93
- to_new_data_variable_names = {
94
- "POSITION": "ANTENNA_POSITION",
95
- "OFFSET": "ANTENNA_FEED_OFFSET",
96
- "DISH_DIAMETER": "ANTENNA_DISH_DIAMETER",
97
- }
98
- data_variable_dims = {
99
- "POSITION": ["antenna_id", "cartesian_pos_label"],
100
- "OFFSET": ["antenna_id", "cartesian_pos_label"],
101
- "DISH_DIAMETER": ["antenna_id"],
102
- }
103
- to_new_coord_names = {
104
- "NAME": "name",
105
- "STATION": "station",
106
- "MOUNT": "mount",
107
- "PHASED_ARRAY_ID": "phased_array_id",
108
- }
109
-
110
- # coord_dims = {
111
- # "name": ["antenna_id"],
112
- # "station": ["antenna_id"],
113
- # "mount": ["antenna_id"],
114
- # "phased_array_id": ["antenna_id"],
115
- # "POSITION": "POSITION",
116
- # "OFFSET": "FEED_OFFSET",
117
- # "DISH_DIAMETER": "DISH_DIAMETER",
118
- # }
119
- # data_variable_dims = {
120
- # "POSITION": ["antenna_id", "xyz_label"],
121
- # "OFFSET": ["antenna_id", "xyz_label"],
122
- # "DISH_DIAMETER": ["antenna_id"],
123
- # }
124
- # to_new_coord_names = {
125
- # "NAME": "name",
126
- # "STATION": "station",
127
- # "TYPE": "type",
128
- # "MOUNT": "mount",
129
- # "PHASED_ARRAY_ID": "phased_array_id",
130
- # }
131
- coord_dims = {
132
- "NAME": ["antenna_id"],
133
- "STATION": ["antenna_id"],
134
- "TYPE": ["antenna_id"],
135
- "MOUNT": ["antenna_id"],
136
- "PHASED_ARRAY_ID": ["antenna_id"],
137
- }
138
-
139
- # Read ANTENNA table into a Xarray Dataset.
140
- unique_antenna_id = unique_1d(
141
- antenna_id
142
- ) # Also ensures that it is sorted otherwise TaQL will give wrong results.
143
-
144
- generic_ant_xds = load_generic_table(
145
- in_file,
146
- "ANTENNA",
147
- rename_ids=subt_rename_ids["ANTENNA"],
148
- taql_where=f" where (ROWID() IN [{','.join(map(str,unique_antenna_id))}])", # order is not guaranteed
149
- )
150
- generic_ant_xds = generic_ant_xds.assign_coords({"antenna_id": unique_antenna_id})
151
- generic_ant_xds = generic_ant_xds.sel(
152
- antenna_id=antenna_id, drop=False
153
- ) # Make sure the antenna_id order is correct.
154
-
155
- ant_column_description = generic_ant_xds.attrs["other"]["msv2"]["ctds_attrs"][
156
- "column_descriptions"
157
- ]
158
-
159
- # ['OFFSET', 'POSITION', 'DISH_DIAMETER', 'FLAG_ROW', 'MOUNT', 'NAME', 'STATION']
160
- ant_xds = xr.Dataset()
161
- ant_xds = ant_xds.assign_coords(
162
- {"antenna_id": antenna_id, "cartesian_pos_label": ["x", "y", "z"]}
163
- )
164
-
165
- coords = {}
166
- for key in generic_ant_xds:
167
- msv4_measure = column_description_casacore_to_msv4_measure(
168
- ant_column_description[key.upper()]
169
- )
170
- if key in to_new_data_variable_names:
171
- ant_xds[to_new_data_variable_names[key]] = xr.DataArray(
172
- generic_ant_xds[key].data, dims=data_variable_dims[key]
173
- )
174
-
175
- if msv4_measure:
176
- ant_xds[to_new_data_variable_names[key]].attrs.update(msv4_measure)
177
-
178
- if key in ["DISH_DIAMETER"]:
179
- ant_xds[to_new_data_variable_names[key]].attrs.update(
180
- {"units": ["m"], "type": "quantity"}
181
- )
182
-
183
- if key in to_new_coord_names:
184
- coords[to_new_coord_names[key]] = (
185
- coord_dims[key],
186
- generic_ant_xds[key].data,
187
- )
188
-
189
- ant_xds["ANTENNA_FEED_OFFSET"].attrs["type"] = "earth_location_offset"
190
- ant_xds["ANTENNA_FEED_OFFSET"].attrs["coordinate_system"] = "geocentric"
191
- ant_xds["ANTENNA_POSITION"].attrs["coordinate_system"] = "geocentric"
192
-
193
- # Extract feed information
194
- generic_feed_xds = load_generic_table(
195
- in_file,
196
- "FEED",
197
- rename_ids=subt_rename_ids["FEED"],
198
- taql_where=f" where (ANTENNA_ID IN [{','.join(map(str, unique_antenna_id))}]) AND (FEED_ID IN [{','.join(map(str, feed_id))}])",
199
- ) # Some Lofar and MeerKAT data have the spw column set to -1 so we can't use '(SPECTRAL_WINDOW_ID = {spectral_window_id})'
200
-
201
- if "SPECTRAL_WINDOW_ID" in generic_feed_xds and not all(
202
- generic_feed_xds.SPECTRAL_WINDOW_ID == -1
203
- ):
204
- generic_feed_xds = generic_feed_xds.where(
205
- generic_feed_xds.SPECTRAL_WINDOW_ID == spectral_window_id, drop=True
206
- )
207
- if "row" in generic_feed_xds and len(generic_feed_xds.row) > 0:
208
- # Some times the feed table is empty (this is the case with ALMA spw WVR#NOMINAL).
209
- assert len(generic_feed_xds.ANTENNA_ID) == len(
210
- ant_xds.antenna_id
211
- ), "Can only process feed table with a single time entry for an antenna and spectral_window_id."
212
- generic_feed_xds = generic_feed_xds.set_xindex(
213
- "ANTENNA_ID"
214
- ) # Allows for non-dimension coordinate selection.
215
- generic_feed_xds = generic_feed_xds.sel(
216
- ANTENNA_ID=ant_xds.antenna_id
217
- ) # Make sure the antenna_id is in the same order as the xds.
218
-
219
- num_receptors = np.ravel(generic_feed_xds.NUM_RECEPTORS)
220
- num_receptors = unique_1d(num_receptors[~np.isnan(num_receptors)])
221
-
222
- assert (
223
- len(num_receptors) == 1
224
- ), "The number of receptors must be constant in feed table."
225
-
226
- feed_column_description = generic_feed_xds.attrs["other"]["msv2"]["ctds_attrs"][
227
- "column_descriptions"
228
- ]
229
-
230
- to_new_data_variable_names = {
231
- "BEAM_OFFSET": "BEAM_OFFSET",
232
- "RECEPTOR_ANGLE": "RECEPTOR_ANGLE",
233
- "POLARIZATION_TYPE": "POLARIZATION_TYPE",
234
- # "pol_response": "POLARIZATION_RESPONSE", ?repeated dim creates problems.
235
- "FOCUS_LENGTH": "FOCUS_LENGTH", # optional
236
- # "position": "ANTENNA_FEED_OFFSET" #Will be added to the existing position in ant_xds
237
- }
238
-
239
- data_variable_dims = {
240
- "BEAM_OFFSET": ["antenna_id", "receptor_name", "sky_dir_label"],
241
- "RECEPTOR_ANGLE": ["antenna_id", "receptor_name"],
242
- "POLARIZATION_TYPE": ["antenna_id", "receptor_name"],
243
- # "pol_response": ["antenna_id", "receptor_name", "receptor_name_"],
244
- "FOCUS_LENGTH": ["antenna_id"],
245
- # "position": ["antenna_id", "cartesian_pos_label"],
246
- }
247
-
248
- for key in generic_feed_xds:
249
- msv4_measure = column_description_casacore_to_msv4_measure(
250
- feed_column_description[key.upper()]
251
- )
252
- if key in to_new_data_variable_names:
253
- ant_xds[to_new_data_variable_names[key]] = xr.DataArray(
254
- generic_feed_xds[key].data, dims=data_variable_dims[key]
255
- )
256
-
257
- if msv4_measure:
258
- ant_xds[to_new_data_variable_names[key]].attrs.update(msv4_measure)
259
-
260
- if key in to_new_coord_names:
261
- coords[to_new_coord_names[key]] = (
262
- coord_dims[key],
263
- generic_feed_xds[key].data,
264
- )
265
-
266
- ant_xds["ANTENNA_FEED_OFFSET"] = (
267
- ant_xds["ANTENNA_FEED_OFFSET"] + generic_ant_xds["POSITION"].data
268
- )
269
-
270
- coords["receptor_name"] = np.arange(ant_xds.sizes["receptor_name"]).astype(str)
271
-
272
- ant_xds = ant_xds.assign_coords(coords)
273
-
274
- ant_xds.attrs["overall_telescope_name"] = telescope_name
275
-
276
- return ant_xds
277
-
278
-
279
59
  def create_weather_xds(in_file: str):
280
60
  """
281
61
  Creates a Weather Xarray Dataset from a MS v2 WEATHER table.
@@ -341,11 +121,13 @@ def create_weather_xds(in_file: str):
341
121
  # ['ANTENNA_ID', 'TIME', 'INTERVAL', 'H2O', 'IONOS_ELECTRON',
342
122
  # 'PRESSURE', 'REL_HUMIDITY', 'TEMPERATURE', 'DEW_POINT',
343
123
  # 'WIND_DIRECTION', 'WIND_SPEED']
344
- weather_xds = xr.Dataset()
345
-
124
+ weather_xds = xr.Dataset(attrs={"type": "weather"})
125
+ time_attrs = column_description_casacore_to_msv4_measure(
126
+ weather_column_description["TIME"]
127
+ )
346
128
  coords = {
347
129
  "station_id": generic_weather_xds["STATION_ID"].data,
348
- "time": generic_weather_xds["TIME"].data,
130
+ "time": ("time", generic_weather_xds["TIME"].data, time_attrs),
349
131
  }
350
132
  for key in generic_weather_xds:
351
133
  msv4_measure = column_description_casacore_to_msv4_measure(
@@ -401,6 +183,7 @@ def create_weather_xds(in_file: str):
401
183
 
402
184
  def create_pointing_xds(
403
185
  in_file: str,
186
+ ant_xds_name_ids: xr.DataArray,
404
187
  time_min_max: Union[Tuple[np.float64, np.float64], None],
405
188
  interp_time: Union[xr.DataArray, None] = None,
406
189
  ) -> xr.Dataset:
@@ -414,6 +197,8 @@ def create_pointing_xds(
414
197
  ----------
415
198
  in_file : str
416
199
  Input MS name.
200
+ ant_xds_name_ids : xr.Dataset
201
+ antenna_name data array from antenna_xds, with name/id information
417
202
  time_min_max : tuple
418
203
  min / max times values to constrain loading (from the TIME column)
419
204
  interp_time : Union[xr.DataArray, None] (Default value = None)
@@ -440,19 +225,19 @@ def create_pointing_xds(
440
225
  # "on_source": "ON_SOURCE", # removed
441
226
  "OVER_THE_TOP": "OVER_THE_TOP",
442
227
  }
443
- time_ant_dims = ["time", "antenna_id"]
444
- time_ant_dir_dims = time_ant_dims + ["direction"]
228
+ time_ant_dims = ["time", "antenna_name"]
229
+ time_ant_dir_dims = time_ant_dims + ["sky_dir_label"]
445
230
  data_variable_dims = {
446
- # "name": ["time", "antenna_id"], # removed
447
- # "time_origin": ["time", "antenna_id"], # removed?
231
+ # "name": ["time", "antenna_name"], # removed
232
+ # "time_origin": ["time", "antenna_name"], # removed?
448
233
  "DIRECTION": time_ant_dir_dims,
449
234
  "ENCODER": time_ant_dir_dims,
450
235
  "TARGET": time_ant_dir_dims,
451
236
  "POINTING_OFFSET": time_ant_dir_dims,
452
237
  "SOURCE_OFFSET": time_ant_dir_dims,
453
- # "pointing_model_id": ["time", "antenna_id"], # removed
454
- # "tracking": ["time", "antenna_id"], # => attribute
455
- # "on_source": ["time", "antenna_id"], # removed
238
+ # "pointing_model_id": ["time", "antenna_name"], # removed
239
+ # "tracking": ["time", "antenna_name"], # => attribute
240
+ # "on_source": ["time", "antenna_name"], # removed
456
241
  "OVER_THE_TOP": time_ant_dims,
457
242
  }
458
243
  # Unused here
@@ -483,7 +268,7 @@ def create_pointing_xds(
483
268
  "ctds_attrs"
484
269
  ]["column_descriptions"]
485
270
 
486
- pointing_xds = xr.Dataset()
271
+ pointing_xds = xr.Dataset(attrs={"type": "pointing"})
487
272
  for key in generic_pointing_xds:
488
273
  if key in to_new_data_variable_names:
489
274
  data_var_name = to_new_data_variable_names[key]
@@ -532,8 +317,10 @@ def create_pointing_xds(
532
317
 
533
318
  coords = {
534
319
  "time": generic_pointing_xds["TIME"].values,
535
- "antenna_id": np.arange(generic_pointing_xds.sizes["ANTENNA_ID"]),
536
- "direction": ["ra", "dec"],
320
+ "antenna_name": ant_xds_name_ids.sel(
321
+ antenna_id=generic_pointing_xds["ANTENNA_ID"]
322
+ ).data,
323
+ "sky_dir_label": ["ra", "dec"],
537
324
  }
538
325
  pointing_xds = pointing_xds.assign_coords(coords)
539
326
 
@@ -550,7 +337,7 @@ def create_pointing_xds(
550
337
  if move_target_as_attr:
551
338
  target = generic_pointing_xds.data_vars["TARGET"]
552
339
  pointing_xds.attrs["target"] = {
553
- "dims": ["direction"],
340
+ "dims": ["sky_dir_label"],
554
341
  "data": target.values[0, 0].tolist(),
555
342
  "attrs": column_description_casacore_to_msv4_measure(
556
343
  pointing_column_descriptions["TARGET"]
@@ -171,11 +171,13 @@ def expand_xds(xds: xr.Dataset) -> xr.Dataset:
171
171
  assert "baseline" not in xds.coords
172
172
 
173
173
  txds = xds.copy()
174
+
174
175
  unique_baselines, baselines = np.unique(
175
176
  [txds.baseline_ant1_id.values, txds.baseline_ant2_id.values],
176
177
  axis=1,
177
178
  return_inverse=True,
178
179
  )
180
+
179
181
  txds["baseline"] = xr.DataArray(baselines.astype("int32"), dims=["row"])
180
182
 
181
183
  try:
@@ -184,7 +186,6 @@ def expand_xds(xds: xr.Dataset) -> xr.Dataset:
184
186
  .unstack("row")
185
187
  .transpose("time", "baseline", ...)
186
188
  )
187
-
188
189
  # unstack changes type to float when it needs to introduce NaNs, so
189
190
  # we need to reset to the proper type. Avoid if possible, as the
190
191
  # astype are costly
@@ -224,8 +225,15 @@ def flatten_xds(xds: xr.Dataset) -> xr.Dataset:
224
225
  txds = xds.stack({"row": ("time", "baseline")}).transpose("row", ...)
225
226
  # compute for issue https://github.com/hainegroup/oceanspy/issues/332
226
227
  # drop=True silently does compute (or at least used to)
228
+
229
+ # Skip this step for now since on Mac nan_int=0. See issue https://github.com/casangi/xradio/issues/219
230
+ # txds = txds.where(
231
+ # ((txds.STATE_ID != nan_int) & (txds.FIELD_ID != nan_int)).compute(),
232
+ # drop=True,
233
+ # ) # .unify_chunks()
234
+
227
235
  txds = txds.where(
228
- ((txds.STATE_ID != nan_int) & (txds.FIELD_ID != nan_int)).compute(),
236
+ ~np.isnan(txds["EXPOSURE"]).compute(),
229
237
  drop=True,
230
238
  ) # .unify_chunks()
231
239
 
@@ -17,9 +17,10 @@ def convert_msv2_to_processing_set(
17
17
  pointing_chunksize: Union[Dict, float, None] = None,
18
18
  pointing_interpolate: bool = False,
19
19
  ephemeris_interpolate: bool = False,
20
+ phase_cal_interpolate: bool = False,
20
21
  use_table_iter: bool = False,
21
22
  compressor: numcodecs.abc.Codec = numcodecs.Zstd(level=2),
22
- storage_backend="zarr",
23
+ storage_backend: str = "zarr",
23
24
  parallel: bool = False,
24
25
  overwrite: bool = False,
25
26
  ):
@@ -78,6 +79,8 @@ def convert_msv2_to_processing_set(
78
79
  + str(partition_info["SCAN_NUMBER"])
79
80
  )
80
81
 
82
+ # prepend '0' to ms_v4_id as needed
83
+ ms_v4_id = f"{ms_v4_id:0>{len(str(len(partitions) - 1))}}"
81
84
  if parallel:
82
85
  delayed_list.append(
83
86
  dask.delayed(convert_and_write_partition)(
@@ -92,6 +95,7 @@ def convert_msv2_to_processing_set(
92
95
  pointing_chunksize=pointing_chunksize,
93
96
  pointing_interpolate=pointing_interpolate,
94
97
  ephemeris_interpolate=ephemeris_interpolate,
98
+ phase_cal_interpolate=phase_cal_interpolate,
95
99
  compressor=compressor,
96
100
  overwrite=overwrite,
97
101
  )
@@ -109,6 +113,7 @@ def convert_msv2_to_processing_set(
109
113
  pointing_chunksize=pointing_chunksize,
110
114
  pointing_interpolate=pointing_interpolate,
111
115
  ephemeris_interpolate=ephemeris_interpolate,
116
+ phase_cal_interpolate=phase_cal_interpolate,
112
117
  compressor=compressor,
113
118
  overwrite=overwrite,
114
119
  )
@@ -36,9 +36,9 @@ def load_processing_set(
36
36
  processing_set
37
37
  In memory representation of processing set (data is represented by Dask.arrays).
38
38
  """
39
- from xradio._utils.zarr.common import _open_dataset, _get_ms_stores_and_file_system
39
+ from xradio._utils.zarr.common import _open_dataset, _get_file_system_and_items
40
40
 
41
- file_system, ms_store_list = _get_ms_stores_and_file_system(ps_store)
41
+ file_system, ms_store_list = _get_file_system_and_items(ps_store)
42
42
 
43
43
  ps = processing_set()
44
44
  for ms_name, ms_xds_isel in sel_parms.items():
@@ -2,7 +2,7 @@ import os
2
2
 
3
3
  from ._processing_set import processing_set
4
4
  import graphviper.utils.logger as logger
5
- from xradio._utils.zarr.common import _open_dataset, _get_ms_stores_and_file_system
5
+ from xradio._utils.zarr.common import _open_dataset, _get_file_system_and_items
6
6
  import s3fs
7
7
 
8
8
 
@@ -25,7 +25,7 @@ def read_processing_set(
25
25
  processing_set
26
26
  Lazy representation of processing set (data is represented by Dask.arrays).
27
27
  """
28
- file_system, ms_store_list = _get_ms_stores_and_file_system(ps_store)
28
+ file_system, ms_store_list = _get_file_system_and_items(ps_store)
29
29
 
30
30
  ps = processing_set()
31
31
  data_group = "base"
@@ -38,7 +38,7 @@ def read_processing_set(
38
38
  data_groups = xds.attrs["data_groups"]
39
39
 
40
40
  if (obs_modes is None) or (
41
- xds.attrs["partition_info"]["obs_mode"] in obs_modes
41
+ bool(set(xds.attrs["partition_info"]["obs_mode"]).intersection(obs_modes))
42
42
  ):
43
43
  sub_xds_dict, field_and_source_xds_dict = _read_sub_xds(
44
44
  ms_store, file_system=file_system, data_groups=data_groups
@@ -94,6 +94,9 @@ def _read_sub_xds(ms_store, file_system, data_groups, load=False):
94
94
  xds = _open_dataset(
95
95
  os.path.join(ms_store, n), load=load, file_system=file_system
96
96
  )
97
+ # Skip empty tables
98
+ if not xds.coords and not xds.data_vars:
99
+ continue
97
100
  if n in field_dict.keys():
98
101
  field_and_source_xds_dict[field_dict[n]] = xds
99
102
  else: