xradio 0.0.41__py3-none-any.whl → 0.0.43__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. xradio/_utils/coord_math.py +100 -0
  2. xradio/_utils/list_and_array.py +49 -4
  3. xradio/_utils/schema.py +36 -16
  4. xradio/image/_util/_casacore/xds_from_casacore.py +5 -5
  5. xradio/image/_util/_casacore/xds_to_casacore.py +12 -11
  6. xradio/image/_util/_fits/xds_from_fits.py +18 -17
  7. xradio/image/_util/_zarr/zarr_low_level.py +29 -12
  8. xradio/image/_util/common.py +1 -1
  9. xradio/image/_util/image_factory.py +1 -1
  10. xradio/{correlated_data → measurement_set}/__init__.py +7 -4
  11. xradio/measurement_set/_utils/__init__.py +5 -0
  12. xradio/{correlated_data/_utils/_ms → measurement_set/_utils/_msv2}/_tables/load_main_table.py +1 -1
  13. xradio/{correlated_data/_utils/_ms → measurement_set/_utils/_msv2}/_tables/read.py +1 -1
  14. xradio/{correlated_data/_utils/_ms → measurement_set/_utils/_msv2}/conversion.py +115 -37
  15. xradio/{correlated_data/_utils/_ms → measurement_set/_utils/_msv2}/create_antenna_xds.py +62 -37
  16. xradio/{correlated_data/_utils/_ms → measurement_set/_utils/_msv2}/create_field_and_source_xds.py +117 -25
  17. xradio/{correlated_data/_utils/_ms → measurement_set/_utils/_msv2}/msv4_sub_xdss.py +47 -13
  18. xradio/{correlated_data/_utils/_ms → measurement_set/_utils/_msv2}/partition_queries.py +4 -0
  19. xradio/{correlated_data → measurement_set}/_utils/_utils/xds_helper.py +1 -1
  20. xradio/{correlated_data/_utils/ms.py → measurement_set/_utils/msv2.py} +4 -4
  21. xradio/{correlated_data → measurement_set}/convert_msv2_to_processing_set.py +7 -2
  22. xradio/{correlated_data → measurement_set}/load_processing_set.py +5 -5
  23. xradio/measurement_set/measurement_set_xds.py +110 -0
  24. xradio/{correlated_data → measurement_set}/open_processing_set.py +9 -16
  25. xradio/measurement_set/processing_set.py +777 -0
  26. xradio/{correlated_data → measurement_set}/schema.py +1110 -586
  27. xradio/schema/check.py +42 -22
  28. xradio/schema/dataclass.py +56 -6
  29. xradio/sphinx/__init__.py +12 -0
  30. xradio/sphinx/schema_table.py +351 -0
  31. {xradio-0.0.41.dist-info → xradio-0.0.43.dist-info}/METADATA +9 -6
  32. xradio-0.0.43.dist-info/RECORD +76 -0
  33. {xradio-0.0.41.dist-info → xradio-0.0.43.dist-info}/WHEEL +1 -1
  34. xradio/_utils/common.py +0 -101
  35. xradio/correlated_data/_utils/__init__.py +0 -5
  36. xradio/correlated_data/correlated_xds.py +0 -13
  37. xradio/correlated_data/processing_set.py +0 -301
  38. xradio/correlated_data/test__processing_set.py +0 -74
  39. xradio-0.0.41.dist-info/RECORD +0 -75
  40. /xradio/{correlated_data/_utils/_ms → measurement_set/_utils/_msv2}/_tables/load.py +0 -0
  41. /xradio/{correlated_data/_utils/_ms → measurement_set/_utils/_msv2}/_tables/read_main_table.py +0 -0
  42. /xradio/{correlated_data/_utils/_ms → measurement_set/_utils/_msv2}/_tables/read_subtables.py +0 -0
  43. /xradio/{correlated_data/_utils/_ms → measurement_set/_utils/_msv2}/_tables/table_query.py +0 -0
  44. /xradio/{correlated_data/_utils/_ms → measurement_set/_utils/_msv2}/_tables/write.py +0 -0
  45. /xradio/{correlated_data/_utils/_ms → measurement_set/_utils/_msv2}/_tables/write_exp_api.py +0 -0
  46. /xradio/{correlated_data/_utils/_ms → measurement_set/_utils/_msv2}/chunks.py +0 -0
  47. /xradio/{correlated_data/_utils/_ms → measurement_set/_utils/_msv2}/descr.py +0 -0
  48. /xradio/{correlated_data/_utils/_ms → measurement_set/_utils/_msv2}/msv2_msv3.py +0 -0
  49. /xradio/{correlated_data/_utils/_ms → measurement_set/_utils/_msv2}/msv2_to_msv4_meta.py +0 -0
  50. /xradio/{correlated_data/_utils/_ms → measurement_set/_utils/_msv2}/msv4_info_dicts.py +0 -0
  51. /xradio/{correlated_data/_utils/_ms → measurement_set/_utils/_msv2}/optimised_functions.py +0 -0
  52. /xradio/{correlated_data/_utils/_ms → measurement_set/_utils/_msv2}/partitions.py +0 -0
  53. /xradio/{correlated_data/_utils/_ms → measurement_set/_utils/_msv2}/subtables.py +0 -0
  54. /xradio/{correlated_data → measurement_set}/_utils/_utils/cds.py +0 -0
  55. /xradio/{correlated_data → measurement_set}/_utils/_utils/partition_attrs.py +0 -0
  56. /xradio/{correlated_data → measurement_set}/_utils/_utils/stokes_types.py +0 -0
  57. /xradio/{correlated_data → measurement_set}/_utils/_zarr/encoding.py +0 -0
  58. /xradio/{correlated_data → measurement_set}/_utils/_zarr/read.py +0 -0
  59. /xradio/{correlated_data → measurement_set}/_utils/_zarr/write.py +0 -0
  60. /xradio/{correlated_data → measurement_set}/_utils/zarr.py +0 -0
  61. {xradio-0.0.41.dist-info → xradio-0.0.43.dist-info}/LICENSE.txt +0 -0
  62. {xradio-0.0.41.dist-info → xradio-0.0.43.dist-info}/top_level.txt +0 -0
@@ -6,20 +6,27 @@ import numpy as np
6
6
  import xarray as xr
7
7
 
8
8
  import toolviper.utils.logger as logger
9
- from xradio.correlated_data._utils._ms.msv4_sub_xdss import interpolate_to_time
10
- from xradio.correlated_data._utils._ms.subtables import subt_rename_ids
11
- from xradio.correlated_data._utils._ms._tables.read import (
9
+ from xradio.measurement_set._utils._msv2.msv4_sub_xdss import interpolate_to_time
10
+ from xradio.measurement_set._utils._msv2.subtables import subt_rename_ids
11
+ from xradio.measurement_set._utils._msv2._tables.read import (
12
12
  convert_casacore_time_to_mjd,
13
13
  make_taql_where_between_min_max,
14
14
  load_generic_table,
15
15
  )
16
- from xradio._utils.common import cast_to_str, convert_to_si_units, add_position_offsets
16
+ from xradio._utils.list_and_array import cast_to_str
17
+ from xradio._utils.coord_math import (
18
+ convert_to_si_units,
19
+ add_position_offsets,
20
+ wrap_to_pi,
21
+ )
22
+
17
23
  from xradio._utils.list_and_array import (
18
24
  check_if_consistent,
19
25
  unique_1d,
20
26
  to_np_array,
21
27
  )
22
28
  from xradio._utils.schema import (
29
+ casacore_to_msv4_measure_type,
23
30
  column_description_casacore_to_msv4_measure,
24
31
  convert_generic_xds_to_xradio_schema,
25
32
  )
@@ -169,9 +176,21 @@ def extract_ephemeris_info(
169
176
  ), "Only geocentric observer ephemeris are supported."
170
177
 
171
178
  if "posrefsys" in ephemeris_meta:
172
- sky_coord_frame = ephemeris_meta["posrefsys"].replace("ICRF/", "")
179
+ # Note the phase center can be given as "J2000" or "J2000.0"
180
+ ref_frame = (
181
+ ephemeris_meta["posrefsys"]
182
+ .replace("ICRF/", "", 1)
183
+ .replace("J2000.0", "J2000", 1)
184
+ )
185
+ if ref_frame in casacore_to_msv4_measure_type["direction"].get("Ref_map", {}):
186
+ ref_frame = casacore_to_msv4_measure_type["direction"]["Ref_map"][ref_frame]
187
+ else:
188
+ logger.debug(
189
+ f"Unrecognized casacore direction reference frame found in posrefsys: {ref_frame}"
190
+ )
191
+ sky_coord_frame = ref_frame.lower()
173
192
  else:
174
- sky_coord_frame = "ICRS" # We will have to just assume this.
193
+ sky_coord_frame = "icrs" # We will have to just assume this.
175
194
 
176
195
  # Find out witch keyword is used for units (UNIT/QuantumUnits)
177
196
  if "UNIT" in ephemeris_column_description["RA"]["keywords"]:
@@ -195,7 +214,7 @@ def extract_ephemeris_info(
195
214
  "type": "location",
196
215
  "units": ["deg", "deg", "m"],
197
216
  "data": observer_position,
198
- "ellipsoid": "WGS84",
217
+ "frame": "WGS84",
199
218
  "origin_object_name": "Earth",
200
219
  "coordinate_system": ephemeris_meta["obsloc"].lower(),
201
220
  }
@@ -260,7 +279,7 @@ def extract_ephemeris_info(
260
279
  }
261
280
  )
262
281
 
263
- # Add optional data: SUB_OBSERVER_POSITION and SUB_SOLAR_POSITION
282
+ # Add optional data: SUB_OBSERVER_DIRECTION and SUB_SOLAR_POSITION
264
283
  if "DiskLong" in ephemeris_column_description:
265
284
  key_lon = "DiskLong"
266
285
  key_lat = "DiskLat"
@@ -283,7 +302,7 @@ def extract_ephemeris_info(
283
302
  temp_xds["SUB_OBSERVER_DIRECTION"].attrs.update(
284
303
  {
285
304
  "type": "location",
286
- "ellipsoid": "NA",
305
+ "frame": "Undefined",
287
306
  "origin_object_name": ephemeris_meta["NAME"],
288
307
  "coordinate_system": "planetodetic",
289
308
  "units": [
@@ -312,7 +331,7 @@ def extract_ephemeris_info(
312
331
  temp_xds["SUB_SOLAR_POSITION"].attrs.update(
313
332
  {
314
333
  "type": "location",
315
- "ellipsoid": "NA",
334
+ "frame": "Undefined",
316
335
  "origin_object_name": "Sun",
317
336
  "coordinate_system": "planetodetic",
318
337
  "units": [
@@ -339,8 +358,8 @@ def extract_ephemeris_info(
339
358
  time_coord_attrs = {
340
359
  "type": "time",
341
360
  "units": ["s"],
342
- "scale": "UTC",
343
- "format": "UNIX",
361
+ "scale": "utc",
362
+ "format": "unix",
344
363
  }
345
364
  temp_xds["time_ephemeris"].attrs.update(time_coord_attrs)
346
365
 
@@ -374,21 +393,28 @@ def extract_ephemeris_info(
374
393
  interp_time is not None
375
394
  ), 'ephemeris_interpolate must be True if there is ephemeris data and multiple fields (this will occur if "FIELD_ID" is not in partition_scheme).'
376
395
 
396
+ field_phase_center = wrap_to_pi(
397
+ xds[center_dv].values + xds["SOURCE_LOCATION"][:, 0:2].values
398
+ )
399
+ field_phase_center = np.column_stack(
400
+ (field_phase_center, np.zeros(xds[center_dv].values.shape[0]))
401
+ )
402
+ field_phase_center[:, -1] = (
403
+ field_phase_center[:, -1] + xds["SOURCE_LOCATION"][:, -1].values
404
+ )
405
+
377
406
  xds[center_dv] = xr.DataArray(
378
- add_position_offsets(
379
- np.column_stack(
380
- (xds[center_dv].values, np.zeros(xds[center_dv].values.shape[0]))
381
- ),
382
- xds["SOURCE_LOCATION"].values,
383
- ),
407
+ field_phase_center,
384
408
  dims=[xds["SOURCE_LOCATION"].dims[0], "sky_pos_label"],
385
409
  )
386
410
  else:
411
+ field_phase_center = (
412
+ np.append(xds[center_dv].values, 0) + xds["SOURCE_LOCATION"].values
413
+ )
414
+ field_phase_center[:, 0:2] = wrap_to_pi(field_phase_center[:, 0:2])
415
+
387
416
  xds[center_dv] = xr.DataArray(
388
- add_position_offsets(
389
- np.append(xds[center_dv].values, 0),
390
- xds["SOURCE_LOCATION"].values,
391
- ),
417
+ field_phase_center,
392
418
  dims=[xds["SOURCE_LOCATION"].dims[0], "sky_pos_label"],
393
419
  )
394
420
 
@@ -460,6 +486,65 @@ def make_line_dims_and_coords(
460
486
  return line_dims, line_coords
461
487
 
462
488
 
489
+ def pad_missing_sources(
490
+ source_xds: xr.Dataset, unique_source_ids: np.array
491
+ ) -> xr.Dataset:
492
+ """
493
+ In some MSs there can be source IDs referenced from the field subtable which do not exist in
494
+ the source table: https://github.com/casangi/xradio/issues/266
495
+
496
+ This addresses the issue by padding/filling those IDs with "Unknown"/nan values. Produces a
497
+ source_xds that, in addition to the information loaded for the non-missing source IDs, has
498
+ padding for the IDs that are missing from the input MSv2 source table.
499
+ This function does not need to do anything when unique_source_ids is a single value
500
+ (partitioning by "FIELD_ID" or othwerwise single field/source)
501
+
502
+ Parameters:
503
+ ----------
504
+ xds: xr.Dataset
505
+ source dataset to fix/pad missing sources
506
+ unique_source_ids: np.array
507
+ IDs of the sources included in this partition
508
+
509
+ Returns:
510
+ -------
511
+ filled_source_xds : xr.Dataset
512
+ source dataset with padding in the originally missing sources
513
+ """
514
+
515
+ # Only fill gaps in multi-source xdss. If single source_id, no need to
516
+ if len(unique_source_ids) <= 1:
517
+ return source_xds
518
+
519
+ missing_source_ids = [
520
+ source_id
521
+ for source_id in unique_source_ids
522
+ if source_id not in source_xds.coords["SOURCE_ID"]
523
+ ]
524
+
525
+ # would like to use the new-ish xr.pad, but it creates issues with indices/coords and is
526
+ # also not free of overheads, as it for example changes all numeric types to float64
527
+ missing_source_xds = xr.full_like(source_xds.isel(SOURCE_ID=0), fill_value=np.nan)
528
+ pad_str = "Unknown"
529
+ pad_str_type = "<U9"
530
+ for var in missing_source_xds.data_vars:
531
+ if np.issubdtype(missing_source_xds.data_vars[var].dtype, np.str_):
532
+ # Avoid truncation to length of previously loaded strings
533
+ missing_source_xds[var] = missing_source_xds[var].astype(
534
+ np.dtype(pad_str_type)
535
+ )
536
+ missing_source_xds[var] = pad_str
537
+
538
+ concat_dim = "SOURCE_ID"
539
+ xdss_to_concat = [source_xds]
540
+ for missing_id in missing_source_ids:
541
+ missing_source_xds[concat_dim] = missing_id
542
+ xdss_to_concat.append(missing_source_xds)
543
+ filled_source_xds = xr.concat(xdss_to_concat, concat_dim).sortby(concat_dim)
544
+
545
+ return filled_source_xds
546
+
547
+
463
548
  def extract_source_info(
464
549
  xds: xr.Dataset,
465
550
  path: str,
@@ -533,15 +618,22 @@ def extract_source_info(
533
618
 
534
619
  # This source table time is not the same as the time in the field_and_source_xds that is derived from the main MSv4 time axis.
535
620
  # The source_id maps to the time axis in the field_and_source_xds. That is why "if len(source_id) == 1" is used to check if there should be a time axis.
536
- assert len(source_xds.TIME) <= len(
537
- unique_source_id
538
- ), "Can only process source table with a single time entry for a source_id and spectral_window_id."
621
+ # assert len(source_xds.TIME) <= len(
622
+ # unique_source_id
623
+ # ), "Can only process source table with a single time entry for a source_id and spectral_window_id."
624
+ if len(source_xds.TIME) > len(unique_source_id):
625
+ logger.warning(
626
+ f"Source table has more than one time entry for a source_id and spectral_window_id. This is not currently supported. Only the first time entry will be used."
627
+ )
628
+ source_xds = source_xds.drop_duplicates("SOURCE_ID", keep="first")
539
629
 
540
630
  source_xds = source_xds.isel(TIME=0, SPECTRAL_WINDOW_ID=0, drop=True)
541
631
  source_column_description = source_xds.attrs["other"]["msv2"]["ctds_attrs"][
542
632
  "column_descriptions"
543
633
  ]
544
634
 
635
+ source_xds = pad_missing_sources(source_xds, unique_source_id)
636
+
545
637
  # Get source name (the time axis is optional and will probably be required if the partition scheme does not include 'FIELD_ID' or 'SOURCE_ID'.).
546
638
  # Note again that this optional time axis has nothing to do with the original time axis in the source table that we drop.
547
639
  if len(source_id) == 1:
@@ -6,6 +6,7 @@ from typing import Tuple, Union
6
6
  import numpy as np
7
7
  import xarray as xr
8
8
 
9
+ from xradio._utils.coord_math import convert_to_si_units
9
10
  from xradio._utils.schema import (
10
11
  column_description_casacore_to_msv4_measure,
11
12
  convert_generic_xds_to_xradio_schema,
@@ -144,7 +145,7 @@ def create_weather_xds(in_file: str, ant_xds_station_name_ids: xr.DataArray):
144
145
  }
145
146
  weather_xds = weather_xds.assign_coords(coords)
146
147
 
147
- dims_station_time = ["station_name", "time"]
148
+ dims_station_time = ["station_name", "time_weather"]
148
149
  to_new_data_variables = {
149
150
  "H20": ["H2O", dims_station_time],
150
151
  "IONOS_ELECTRON": ["IONOS_ELECTRON", dims_station_time],
@@ -157,13 +158,23 @@ def create_weather_xds(in_file: str, ant_xds_station_name_ids: xr.DataArray):
157
158
  }
158
159
 
159
160
  to_new_coords = {
160
- "TIME": ["time", ["time"]],
161
+ "TIME": ["time_weather", ["time_weather"]],
161
162
  }
162
163
 
163
164
  weather_xds = convert_generic_xds_to_xradio_schema(
164
165
  generic_weather_xds, weather_xds, to_new_data_variables, to_new_coords
165
166
  )
166
167
 
168
+ # TODO: option to interpolate to main time
169
+
170
+ # PRESSURE: hPa in MSv2 specs and some MSs => Pa
171
+ weather_xds = convert_to_si_units(weather_xds)
172
+
173
+ # correct expected types (for example "IONOS_ELECTRON", "PRESSURE" can be float32)
174
+ for data_var in weather_xds:
175
+ if weather_xds.data_vars[data_var].dtype != np.float64:
176
+ weather_xds[data_var] = weather_xds[data_var].astype(np.float64)
177
+
167
178
  return weather_xds
168
179
 
169
180
 
@@ -199,9 +210,8 @@ def correct_generic_pointing_xds(
199
210
 
200
211
  correct_pointing_xds = generic_pointing_xds.copy()
201
212
 
202
- for key in generic_pointing_xds:
203
- if key in to_new_data_variables:
204
- data_var_name = to_new_data_variables[key]
213
+ for data_var_name in generic_pointing_xds:
214
+ if data_var_name in to_new_data_variables:
205
215
  # Corrects dim sizes of "empty cell" variables, such as empty DIRECTION, TARGET, etc.
206
216
  if (
207
217
  "dim_2" in generic_pointing_xds.sizes
@@ -296,6 +306,8 @@ def create_pointing_xds(
296
306
  size = generic_pointing_xds.sizes["n_polynomial"]
297
307
  if size == 1:
298
308
  generic_pointing_xds = generic_pointing_xds.sel({"n_polynomial": 0})
309
+ elif size == 0:
310
+ generic_pointing_xds = generic_pointing_xds.drop_dims("n_polynomial")
299
311
 
300
312
  time_ant_dims = ["time", "antenna_name"]
301
313
  time_ant_dir_dims = time_ant_dims + ["local_sky_dir_label"]
@@ -341,8 +353,8 @@ def prepare_generic_sys_cal_xds(generic_sys_cal_xds: xr.Dataset) -> xr.Dataset:
341
353
  This function performs various prepareation steps, such as:
342
354
  - filter out dimensions not neeed for an individual MSv4 (SPW, FEED),
343
355
  - drop variables loaded from columns with all items set to empty array,
344
- - transpose the dimensions frequency,receptor,
345
- - fix dimension names when needed.
356
+ - transpose the dimensions frequency,receptor
357
+ - fix dimension names (and order) when needed.
346
358
 
347
359
  Parameters
348
360
  ----------
@@ -374,15 +386,38 @@ def prepare_generic_sys_cal_xds(generic_sys_cal_xds: xr.Dataset) -> xr.Dataset:
374
386
  "receptor" in generic_sys_cal_xds.sizes
375
387
  and "frequency" in generic_sys_cal_xds.sizes
376
388
  ):
389
+ # dim_3 can be created for example when the T*_SPECTRUM have varying # channels!
390
+ # more generaly, could transpose with ... to avoid errors with additional spurious dimensions
391
+ if "dim_3" in generic_sys_cal_xds.dims:
392
+ generic_sys_cal_xds = generic_sys_cal_xds.drop_dims("dim_3")
377
393
  # From MSv2 tables we get (...,frequency, receptor)
378
394
  # -> transpose to (...,receptor,frequency) ready for MSv4 sys_cal_xds
379
395
  generic_sys_cal_xds = generic_sys_cal_xds.transpose(
380
396
  "ANTENNA_ID", "TIME", "receptor", "frequency"
381
397
  )
382
- else:
398
+ elif (
399
+ "frequency" in generic_sys_cal_xds.sizes
400
+ and not "dim_3" in generic_sys_cal_xds.sizes
401
+ ):
383
402
  # because order is (...,frequency,receptor), when frequency is missing
384
403
  # receptor can get wrongly labeled as frequency
385
404
  generic_sys_cal_xds = generic_sys_cal_xds.rename_dims({"frequency": "receptor"})
405
+ elif (
406
+ "frequency" not in generic_sys_cal_xds.sizes
407
+ and "receptor" in generic_sys_cal_xds.sizes
408
+ and "dim_3" in generic_sys_cal_xds.sizes
409
+ ):
410
+ # different *_SPECTRUM array sizes + some empty arrays can create an additional spurious
411
+ # generic dimension, which should have been "receptor"
412
+ generic_sys_cal_xds = generic_sys_cal_xds.rename_dims({"receptor": "frequency"})
413
+ generic_sys_cal_xds = generic_sys_cal_xds.rename_dims({"dim_3": "receptor"})
414
+ generic_sys_cal_xds = generic_sys_cal_xds.transpose(
415
+ "ANTENNA_ID", "TIME", "receptor", "frequency"
416
+ )
417
+ else:
418
+ raise RuntimeError(
419
+ "Cannot understand the arrangement of dimensions of {generic_sys_cal_xds=}"
420
+ )
386
421
 
387
422
  return generic_sys_cal_xds
388
423
 
@@ -462,7 +497,7 @@ def create_system_calibration_xds(
462
497
  "frequency": ["frequency_cal", ["frequency_cal"]],
463
498
  }
464
499
 
465
- sys_cal_xds = xr.Dataset(attrs={"type": "sys_cal"})
500
+ sys_cal_xds = xr.Dataset(attrs={"type": "system_calibration"})
466
501
  coords = {
467
502
  "antenna_name": ant_xds_name_ids.sel(
468
503
  antenna_id=generic_sys_cal_xds["ANTENNA_ID"]
@@ -483,8 +518,7 @@ def create_system_calibration_xds(
483
518
  frequency_measure = {
484
519
  "type": main_xds_frequency.attrs["type"],
485
520
  "units": main_xds_frequency.attrs["units"],
486
- "frame": main_xds_frequency.attrs["frame"],
487
- "reference_value": main_xds_frequency.attrs["reference_frequency"],
521
+ "observer": main_xds_frequency.attrs["observer"],
488
522
  }
489
523
  sys_cal_xds.coords["frequency_cal"].attrs.update(frequency_measure)
490
524
 
@@ -499,8 +533,8 @@ def create_system_calibration_xds(
499
533
  time_coord_attrs = {
500
534
  "type": "time",
501
535
  "units": ["s"],
502
- "scale": "UTC",
503
- "format": "UNIX",
536
+ "scale": "utc",
537
+ "format": "unix",
504
538
  }
505
539
  # If interpolating time, rename time_cal => time
506
540
  time_coord = {"time": ("time_cal", sys_cal_interp_time.data)}
@@ -54,6 +54,7 @@ def create_partitions(in_file: str, partition_scheme: list):
54
54
  par_df["SCAN_NUMBER"] = main_tb.getcol("SCAN_NUMBER")
55
55
  par_df["STATE_ID"] = main_tb.getcol("STATE_ID")
56
56
  par_df["OBSERVATION_ID"] = main_tb.getcol("OBSERVATION_ID")
57
+ par_df["ANTENNA1"] = main_tb.getcol("ANTENNA1")
57
58
  par_df = par_df.drop_duplicates()
58
59
 
59
60
  field_tb = tables.table(
@@ -123,6 +124,9 @@ def create_partitions(in_file: str, partition_scheme: list):
123
124
  "OBS_MODE",
124
125
  "SUB_SCAN_NUMBER",
125
126
  ]
127
+ if "ANTENNA1" in partition_scheme:
128
+ partition_axis_names.append("ANTENNA1")
129
+
126
130
  for idx, pair in enumerated_partitions:
127
131
  query = ""
128
132
  for i, par in enumerate(partition_scheme_updated):
@@ -7,7 +7,7 @@ import xarray as xr
7
7
 
8
8
  from .cds import CASAVisSet
9
9
  from .stokes_types import stokes_types
10
- from ...._utils.common import get_pad_value
10
+ from xradio._utils.list_and_array import get_pad_value
11
11
 
12
12
 
13
13
  def make_coords(
@@ -2,14 +2,14 @@ import os
2
2
  import toolviper.utils.logger as logger
3
3
  from typing import List, Tuple, Union
4
4
 
5
- from ._utils.cds import CASAVisSet
6
- from ._ms.partitions import (
5
+ from xradio.measurement_set._utils._utils.cds import CASAVisSet
6
+ from xradio.measurement_set._utils._msv2.partitions import (
7
7
  finalize_partitions,
8
8
  read_ms_ddi_partitions,
9
9
  read_ms_scan_subscan_partitions,
10
10
  )
11
- from ._ms.subtables import read_ms_subtables
12
- from ._utils.xds_helper import vis_xds_packager_cds
11
+ from xradio.measurement_set._utils._msv2.subtables import read_ms_subtables
12
+ from xradio.measurement_set._utils._utils.xds_helper import vis_xds_packager_cds
13
13
 
14
14
 
15
15
  def read_ms(
@@ -4,8 +4,8 @@ from typing import Dict, Union
4
4
 
5
5
  import dask
6
6
 
7
- from xradio.correlated_data._utils._ms.partition_queries import create_partitions
8
- from xradio.correlated_data._utils._ms.conversion import convert_and_write_partition
7
+ from xradio.measurement_set._utils._msv2.partition_queries import create_partitions
8
+ from xradio.measurement_set._utils._msv2.conversion import convert_and_write_partition
9
9
 
10
10
 
11
11
  def convert_msv2_to_processing_set(
@@ -82,6 +82,11 @@ def convert_msv2_to_processing_set(
82
82
  + str(partition_info["FIELD_ID"])
83
83
  + ", SCAN "
84
84
  + str(partition_info["SCAN_NUMBER"])
85
+ + (
86
+ ", ANTENNA " + str(partition_info["ANTENNA1"])
87
+ if "ANTENNA1" in partition_info
88
+ else ""
89
+ )
85
90
  )
86
91
 
87
92
  # prepend '0' to ms_v4_id as needed
@@ -1,5 +1,5 @@
1
1
  import os
2
- from xradio.correlated_data import ProcessingSet
2
+ from xradio.measurement_set import ProcessingSet
3
3
  from typing import Dict, Union
4
4
 
5
5
 
@@ -43,10 +43,10 @@ def load_processing_set(
43
43
  ps = ProcessingSet()
44
44
  for ms_name, ms_xds_isel in sel_parms.items():
45
45
  ms_store = os.path.join(ps_store, ms_name)
46
- ms_main_store = os.path.join(ms_store, "MAIN")
46
+ correlated_store = os.path.join(ms_store, "correlated_xds")
47
47
 
48
48
  xds = _open_dataset(
49
- ms_main_store,
49
+ correlated_store,
50
50
  file_system,
51
51
  ms_xds_isel,
52
52
  data_variables,
@@ -55,7 +55,7 @@ def load_processing_set(
55
55
  data_groups = xds.attrs["data_groups"]
56
56
 
57
57
  if load_sub_datasets:
58
- from xradio.correlated_data.open_processing_set import _open_sub_xds
58
+ from xradio.measurement_set.open_processing_set import _open_sub_xds
59
59
 
60
60
  sub_xds_dict, field_and_source_xds_dict = _open_sub_xds(
61
61
  ms_store, file_system=file_system, load=True, data_groups=data_groups
@@ -76,7 +76,7 @@ def load_processing_set(
76
76
  return ps
77
77
 
78
78
 
79
- class processing_set_iterator:
79
+ class ProcessingSetIterator:
80
80
  def __init__(
81
81
  self,
82
82
  sel_parms: dict,
@@ -0,0 +1,110 @@
1
+ import pandas as pd
2
+ from xradio._utils.list_and_array import to_list
3
+ import xarray as xr
4
+ import numbers
5
+ import os
6
+ from collections.abc import Mapping, Iterable
7
+ from typing import Any, Union
8
+
9
+
10
+ class MeasurementSetXds(xr.Dataset):
11
+ __slots__ = ()
12
+
13
+ def __init__(self, xds):
14
+ super().__init__(xds.data_vars, xds.coords, xds.attrs)
15
+
16
+ def to_store(self, store, **kwargs):
17
+ """
18
+ Write the MeasurementSetXds to a Zarr store.
19
+ Does not write to cloud storage yet.
20
+
21
+ Args:
22
+ store (str): The path to the Zarr store.
23
+ **kwargs: Additional keyword arguments to be passed to `xarray.Dataset.to_zarr`. See https://docs.xarray.dev/en/latest/generated/xarray.Dataset.to_zarr.html for more information.
24
+
25
+ Returns:
26
+ None
27
+ """
28
+
29
+ copy_cor_xds = self.copy() # No deep copy
30
+
31
+ # Remove field_and_source_xds from all correlated_data (VISIBILITY/SPECTRUM) data variables
32
+ # and save them as separate zarr files.
33
+ for data_group_name, data_group in self.attrs["data_groups"].items():
34
+ del copy_cor_xds[data_group["correlated_data"]].attrs[
35
+ "field_and_source_xds"
36
+ ]
37
+
38
+ # print("data_group_name", data_group_name)
39
+ xr.Dataset.to_zarr(
40
+ self[data_group["correlated_data"]].attrs["field_and_source_xds"],
41
+ os.path.join(store, "field_and_source_xds_" + data_group_name),
42
+ **kwargs,
43
+ )
44
+
45
+ # Remove xds attributes from copy_cor_xds and save xds attributes as separate zarr files.
46
+ for attrs_name in self.attrs:
47
+ if "xds" in attrs_name:
48
+ del copy_cor_xds.attrs[attrs_name]
49
+ xr.Dataset.to_zarr(
50
+ self.attrs[attrs_name], os.path.join(store, attrs_name, **kwargs)
51
+ )
52
+
53
+ # Save copy_cor_xds as zarr file.
54
+ xr.Dataset.to_zarr(
55
+ copy_cor_xds, os.path.join(store, "correlated_xds"), **kwargs
56
+ )
57
+
58
+ def sel(
59
+ self,
60
+ indexers: Union[Mapping[Any, Any], None] = None,
61
+ method: Union[str, None] = None,
62
+ tolerance: Union[int, float, Iterable[Union[int, float]], None] = None,
63
+ drop: bool = False,
64
+ **indexers_kwargs: Any,
65
+ ):
66
+ """
67
+ Select data along dimension(s) by label. Overrides `xarray.Dataset.sel <https://xarray.pydata.org/en/stable/generated/xarray.Dataset.sel.html>`__ so that a data group can be selected by name by using the `data_group_name` parameter.
68
+ For more information on data groups see `Data Groups <https://xradio.readthedocs.io/en/latest/measurement_set_overview.html#Data-Groups>`__ section. See `xarray.Dataset.sel <https://xarray.pydata.org/en/stable/generated/xarray.Dataset.sel.html>`__ for parameter descriptions.
69
+
70
+ Returns:
71
+ MeasurementSetXds
72
+
73
+ Examples
74
+ --------
75
+ >>> # Select data group 'corrected' and polarization 'XX'.
76
+ >>> selected_ms_xds = ms_xds.sel(data_group_name='corrected', polarization='XX')
77
+
78
+ >>> # Select data group 'corrected' and polarization 'XX' using a dict.
79
+ >>> selected_ms_xds = ms_xds.sel({'data_group_name':'corrected', 'polarization':'XX')
80
+ """
81
+ if "data_group_name" in indexers_kwargs:
82
+ data_group_name = indexers_kwargs["data_group_name"]
83
+ del indexers_kwargs["data_group_name"]
84
+ if (indexers is not None) and ("data_group_name" in indexers):
85
+ data_group_name = indexers["data_group_name"]
86
+ del indexers["data_group_name"]
87
+ else:
88
+ data_group_name = None
89
+
90
+ if data_group_name is not None:
91
+ sel_data_group_set = set(
92
+ self.attrs["data_groups"][data_group_name].values()
93
+ )
94
+
95
+ data_variables_to_drop = []
96
+ for dg in self.attrs["data_groups"].values():
97
+ temp_set = set(dg.values()) - sel_data_group_set
98
+ data_variables_to_drop.extend(list(temp_set))
99
+
100
+ data_variables_to_drop = list(set(data_variables_to_drop))
101
+
102
+ return MeasurementSetXds(
103
+ super()
104
+ .sel(indexers, method, tolerance, drop, **indexers_kwargs)
105
+ .drop_vars(data_variables_to_drop)
106
+ )
107
+ else:
108
+ return MeasurementSetXds(
109
+ super().sel(indexers, method, tolerance, drop, **indexers_kwargs)
110
+ )
@@ -1,6 +1,6 @@
1
1
  import os
2
2
 
3
- from xradio.correlated_data import ProcessingSet
3
+ from xradio.measurement_set import ProcessingSet
4
4
  import toolviper.utils.logger as logger
5
5
  from xradio._utils.zarr.common import _open_dataset, _get_file_system_and_items
6
6
  import s3fs
@@ -25,6 +25,8 @@ def open_processing_set(
25
25
  processing_set
26
26
  Lazy representation of processing set (data is represented by Dask.arrays).
27
27
  """
28
+ from xradio.measurement_set import MeasurementSetXds
29
+
28
30
  file_system, ms_store_list = _get_file_system_and_items(ps_store)
29
31
 
30
32
  ps = ProcessingSet()
@@ -32,9 +34,9 @@ def open_processing_set(
32
34
  for ms_name in ms_store_list:
33
35
  # try:
34
36
  ms_store = os.path.join(ps_store, ms_name)
35
- ms_main_store = os.path.join(ms_store, "MAIN")
37
+ correlated_store = os.path.join(ms_store, "correlated_xds")
36
38
 
37
- xds = _open_dataset(ms_main_store, file_system)
39
+ xds = _open_dataset(correlated_store, file_system)
38
40
  data_groups = xds.attrs["data_groups"]
39
41
 
40
42
  if (intents is None) or (
@@ -54,7 +56,7 @@ def open_processing_set(
54
56
  "field_and_source_xds"
55
57
  ] = field_and_source_xds_dict[data_group_name]
56
58
 
57
- ps[ms_name] = xds
59
+ ps[ms_name] = MeasurementSetXds(xds)
58
60
  # except Exception as e:
59
61
  # logger.warning(f"Could not open {ms_name} due to {e}")
60
62
  # continue
@@ -66,15 +68,6 @@ def _open_sub_xds(ms_store, file_system, data_groups, load=False):
66
68
  sub_xds_dict = {}
67
69
  field_and_source_xds_dict = {}
68
70
 
69
- xds_names = {
70
- "ANTENNA": "antenna_xds",
71
- "POINTING": "pointing_xds",
72
- "SYSCAL": "system_calibration_xds",
73
- "GAIN_CURVE": "gain_curve_xds",
74
- "PHASE_CAL": "phase_calibration_xds",
75
- "WEATHER": "weather_xds",
76
- }
77
-
78
71
  if isinstance(file_system, s3fs.core.S3FileSystem):
79
72
  file_names = [
80
73
  bd.split(sep="/")[-1] for bd in file_system.listdir(ms_store, detail=False)
@@ -83,9 +76,9 @@ def _open_sub_xds(ms_store, file_system, data_groups, load=False):
83
76
  file_names = file_system.listdir(ms_store)
84
77
  file_names = [item for item in file_names if not item.startswith(".")]
85
78
 
86
- file_names.remove("MAIN")
79
+ file_names.remove("correlated_xds")
87
80
 
88
- field_dict = {"FIELD_AND_SOURCE_" + key.upper(): key for key in data_groups.keys()}
81
+ field_dict = {"field_and_source_xds_" + key: key for key in data_groups.keys()}
89
82
 
90
83
  # field_and_source_xds_name_start = "FIELD"
91
84
  for n in file_names:
@@ -98,7 +91,7 @@ def _open_sub_xds(ms_store, file_system, data_groups, load=False):
98
91
  if n in field_dict.keys():
99
92
  field_and_source_xds_dict[field_dict[n]] = xds
100
93
  else:
101
- sub_xds_dict[xds_names[n]] = xds
94
+ sub_xds_dict[n] = xds
102
95
 
103
96
  return sub_xds_dict, field_and_source_xds_dict
104
97