xradio 0.0.41__py3-none-any.whl → 0.0.42__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. xradio/_utils/coord_math.py +100 -0
  2. xradio/_utils/list_and_array.py +49 -4
  3. xradio/_utils/schema.py +36 -16
  4. xradio/image/_util/_casacore/xds_from_casacore.py +5 -5
  5. xradio/image/_util/_casacore/xds_to_casacore.py +12 -11
  6. xradio/image/_util/_fits/xds_from_fits.py +18 -17
  7. xradio/image/_util/_zarr/zarr_low_level.py +29 -12
  8. xradio/image/_util/common.py +1 -1
  9. xradio/image/_util/image_factory.py +1 -1
  10. xradio/{correlated_data → measurement_set}/__init__.py +7 -4
  11. xradio/measurement_set/_utils/__init__.py +5 -0
  12. xradio/{correlated_data/_utils/_ms → measurement_set/_utils/_msv2}/_tables/load_main_table.py +1 -1
  13. xradio/{correlated_data/_utils/_ms → measurement_set/_utils/_msv2}/_tables/read.py +1 -1
  14. xradio/{correlated_data/_utils/_ms → measurement_set/_utils/_msv2}/conversion.py +78 -35
  15. xradio/{correlated_data/_utils/_ms → measurement_set/_utils/_msv2}/create_antenna_xds.py +62 -37
  16. xradio/{correlated_data/_utils/_ms → measurement_set/_utils/_msv2}/create_field_and_source_xds.py +109 -22
  17. xradio/{correlated_data/_utils/_ms → measurement_set/_utils/_msv2}/msv4_sub_xdss.py +47 -13
  18. xradio/{correlated_data → measurement_set}/_utils/_utils/xds_helper.py +1 -1
  19. xradio/{correlated_data/_utils/ms.py → measurement_set/_utils/msv2.py} +4 -4
  20. xradio/{correlated_data → measurement_set}/convert_msv2_to_processing_set.py +2 -2
  21. xradio/{correlated_data → measurement_set}/load_processing_set.py +5 -5
  22. xradio/measurement_set/measurement_set_xds.py +83 -0
  23. xradio/{correlated_data → measurement_set}/open_processing_set.py +9 -16
  24. xradio/measurement_set/processing_set.py +777 -0
  25. xradio/{correlated_data → measurement_set}/schema.py +1101 -610
  26. xradio/schema/check.py +42 -22
  27. xradio/schema/dataclass.py +56 -6
  28. xradio/sphinx/__init__.py +12 -0
  29. xradio/sphinx/schema_table.py +351 -0
  30. {xradio-0.0.41.dist-info → xradio-0.0.42.dist-info}/METADATA +9 -6
  31. xradio-0.0.42.dist-info/RECORD +76 -0
  32. {xradio-0.0.41.dist-info → xradio-0.0.42.dist-info}/WHEEL +1 -1
  33. xradio/_utils/common.py +0 -101
  34. xradio/correlated_data/_utils/__init__.py +0 -5
  35. xradio/correlated_data/correlated_xds.py +0 -13
  36. xradio/correlated_data/processing_set.py +0 -301
  37. xradio/correlated_data/test__processing_set.py +0 -74
  38. xradio-0.0.41.dist-info/RECORD +0 -75
  39. /xradio/{correlated_data/_utils/_ms → measurement_set/_utils/_msv2}/_tables/load.py +0 -0
  40. /xradio/{correlated_data/_utils/_ms → measurement_set/_utils/_msv2}/_tables/read_main_table.py +0 -0
  41. /xradio/{correlated_data/_utils/_ms → measurement_set/_utils/_msv2}/_tables/read_subtables.py +0 -0
  42. /xradio/{correlated_data/_utils/_ms → measurement_set/_utils/_msv2}/_tables/table_query.py +0 -0
  43. /xradio/{correlated_data/_utils/_ms → measurement_set/_utils/_msv2}/_tables/write.py +0 -0
  44. /xradio/{correlated_data/_utils/_ms → measurement_set/_utils/_msv2}/_tables/write_exp_api.py +0 -0
  45. /xradio/{correlated_data/_utils/_ms → measurement_set/_utils/_msv2}/chunks.py +0 -0
  46. /xradio/{correlated_data/_utils/_ms → measurement_set/_utils/_msv2}/descr.py +0 -0
  47. /xradio/{correlated_data/_utils/_ms → measurement_set/_utils/_msv2}/msv2_msv3.py +0 -0
  48. /xradio/{correlated_data/_utils/_ms → measurement_set/_utils/_msv2}/msv2_to_msv4_meta.py +0 -0
  49. /xradio/{correlated_data/_utils/_ms → measurement_set/_utils/_msv2}/msv4_info_dicts.py +0 -0
  50. /xradio/{correlated_data/_utils/_ms → measurement_set/_utils/_msv2}/optimised_functions.py +0 -0
  51. /xradio/{correlated_data/_utils/_ms → measurement_set/_utils/_msv2}/partition_queries.py +0 -0
  52. /xradio/{correlated_data/_utils/_ms → measurement_set/_utils/_msv2}/partitions.py +0 -0
  53. /xradio/{correlated_data/_utils/_ms → measurement_set/_utils/_msv2}/subtables.py +0 -0
  54. /xradio/{correlated_data → measurement_set}/_utils/_utils/cds.py +0 -0
  55. /xradio/{correlated_data → measurement_set}/_utils/_utils/partition_attrs.py +0 -0
  56. /xradio/{correlated_data → measurement_set}/_utils/_utils/stokes_types.py +0 -0
  57. /xradio/{correlated_data → measurement_set}/_utils/_zarr/encoding.py +0 -0
  58. /xradio/{correlated_data → measurement_set}/_utils/_zarr/read.py +0 -0
  59. /xradio/{correlated_data → measurement_set}/_utils/_zarr/write.py +0 -0
  60. /xradio/{correlated_data → measurement_set}/_utils/zarr.py +0 -0
  61. {xradio-0.0.41.dist-info → xradio-0.0.42.dist-info}/LICENSE.txt +0 -0
  62. {xradio-0.0.41.dist-info → xradio-0.0.42.dist-info}/top_level.txt +0 -0
@@ -11,18 +11,18 @@ import xarray as xr
11
11
 
12
12
  import toolviper.utils.logger as logger
13
13
  from casacore import tables
14
- from xradio.correlated_data._utils._ms.msv4_sub_xdss import (
14
+ from xradio.measurement_set._utils._msv2.msv4_sub_xdss import (
15
15
  create_pointing_xds,
16
16
  create_system_calibration_xds,
17
17
  create_weather_xds,
18
18
  )
19
19
  from .msv4_info_dicts import create_info_dicts
20
- from xradio.correlated_data._utils._ms.create_antenna_xds import (
20
+ from xradio.measurement_set._utils._msv2.create_antenna_xds import (
21
21
  create_antenna_xds,
22
22
  create_gain_curve_xds,
23
23
  create_phase_calibration_xds,
24
24
  )
25
- from xradio.correlated_data._utils._ms.create_field_and_source_xds import (
25
+ from xradio.measurement_set._utils._msv2.create_field_and_source_xds import (
26
26
  create_field_and_source_xds,
27
27
  )
28
28
  from xradio._utils.schema import column_description_casacore_to_msv4_measure
@@ -422,9 +422,9 @@ def create_coordinates(
422
422
  "time": utime,
423
423
  "baseline_antenna1_id": ("baseline_id", baseline_ant1_id),
424
424
  "baseline_antenna2_id": ("baseline_id", baseline_ant2_id),
425
- "uvw_label": ["u", "v", "w"],
426
425
  "baseline_id": np.arange(len(baseline_ant1_id)),
427
426
  "scan_number": ("time", scan_id),
427
+ "uvw_label": ["u", "v", "w"],
428
428
  }
429
429
 
430
430
  ddi_xds = load_generic_table(in_file, "DATA_DESCRIPTION").sel(row=ddi)
@@ -482,7 +482,6 @@ def create_coordinates(
482
482
  }
483
483
  xds.frequency.attrs["spectral_window_id"] = spectral_window_id
484
484
 
485
- # xds.frequency.attrs["effective_channel_width"] = "EFFECTIVE_CHANNEL_WIDTH"
486
485
  # Add if doppler table is present
487
486
  # xds.frequency.attrs["doppler_velocity"] =
488
487
  # xds.frequency.attrs["doppler_type"] =
@@ -605,8 +604,8 @@ def create_data_variables(
605
604
  logger.debug(
606
605
  "Time to read column " + str(col) + " : " + str(time.time() - start)
607
606
  )
608
- except Exception as e:
609
- logger.debug("Could not load column", col)
607
+ except Exception as exc:
608
+ logger.debug(f"Could not load column {col}, exception: {exc}")
610
609
 
611
610
  if ("WEIGHT_SPECTRUM" == col) and (
612
611
  "WEIGHT" in col_names
@@ -624,13 +623,27 @@ def create_data_variables(
624
623
 
625
624
 
626
625
  def add_missing_data_var_attrs(xds):
627
- """Adds in attributes expected metadata that cannot be found
628
- in the input MSv2. For now specifically for missing
629
- single-dish/SPECTRUM metadata"""
626
+ """
627
+ Adds in the xds attributes expected metadata that cannot be found in the input MSv2.
628
+ For now:
629
+ - missing single-dish/SPECTRUM metadata
630
+ - missing interferometry/VISIBILITY_MODEL metadata
631
+ """
630
632
  data_var_names = ["SPECTRUM", "SPECTRUM_CORRECTED"]
631
633
  for var_name in data_var_names:
632
634
  if var_name in xds.data_vars:
633
- xds.data_vars[var_name].attrs["units"] = ["Jy"]
635
+ xds.data_vars[var_name].attrs["units"] = [""]
636
+
637
+ vis_var_names = ["VISIBILITY_MODEL"]
638
+ for var_name in vis_var_names:
639
+ if var_name in xds.data_vars and "units" not in xds.data_vars[var_name].attrs:
640
+ # Assume MODEL uses the same units
641
+ if "VISIBILITY" in xds.data_vars:
642
+ xds.data_vars[var_name].attrs["units"] = xds.data_vars[
643
+ "VISIBILITY"
644
+ ].attrs["units"]
645
+ else:
646
+ xds.data_vars[var_name].attrs["units"] = [""]
634
647
 
635
648
  return xds
636
649
 
@@ -688,6 +701,28 @@ def create_taql_query(partition_info):
688
701
  return taql_where
689
702
 
690
703
 
704
+ def fix_uvw_frame(
705
+ xds: xr.Dataset, field_and_source_xds: xr.Dataset, is_single_dish: bool
706
+ ) -> xr.Dataset:
707
+ """
708
+ Fix UVW frame
709
+
710
+ From CASA fixvis docs: clean and the im tool ignore the reference frame claimed by the UVW column (it is often
711
+ mislabelled as ITRF when it is really FK5 (J2000)) and instead assume the (u, v, w)s are in the same frame as the phase
712
+ tracking center. calcuvw does not yet force the UVW column and field centers to use the same reference frame!
713
+ Blank = use the phase tracking frame of vis.
714
+ """
715
+ if xds.UVW.attrs["frame"] == "ITRF":
716
+ if is_single_dish:
717
+ center_var = "FIELD_REFERENCE_CENTER"
718
+ else:
719
+ center_var = "FIELD_PHASE_CENTER"
720
+
721
+ xds.UVW.attrs["frame"] = field_and_source_xds[center_var].attrs["frame"]
722
+
723
+ return xds
724
+
725
+
691
726
  def convert_and_write_partition(
692
727
  in_file: str,
693
728
  out_file: str,
@@ -798,9 +833,9 @@ def convert_and_write_partition(
798
833
  start = time.time()
799
834
  xds = xr.Dataset(
800
835
  attrs={
801
- "creation_date": datetime.datetime.now().isoformat(),
836
+ "creation_date": datetime.datetime.utcnow().isoformat(),
802
837
  "xradio_version": importlib.metadata.version("xradio"),
803
- "schema_version": "4.0.-9999",
838
+ "schema_version": "4.0.-9994",
804
839
  "type": "visibility",
805
840
  }
806
841
  )
@@ -847,7 +882,6 @@ def convert_and_write_partition(
847
882
 
848
883
  # Add data_groups
849
884
  xds, is_single_dish = add_data_groups(xds)
850
-
851
885
  xds = add_missing_data_var_attrs(xds)
852
886
 
853
887
  if (
@@ -895,8 +929,8 @@ def convert_and_write_partition(
895
929
  antenna_id,
896
930
  feed_id,
897
931
  telescope_name,
932
+ xds.polarization,
898
933
  )
899
-
900
934
  logger.debug("Time antenna xds " + str(time.time() - start))
901
935
 
902
936
  start = time.time()
@@ -943,6 +977,8 @@ def convert_and_write_partition(
943
977
  logger.debug("Time weather " + str(time.time() - start))
944
978
 
945
979
  # Create pointing_xds
980
+ pointing_xds = xr.Dataset()
981
+ print("with_pointing", with_pointing)
946
982
  if with_pointing:
947
983
  start = time.time()
948
984
  if pointing_interpolate:
@@ -1003,16 +1039,7 @@ def convert_and_write_partition(
1003
1039
  )
1004
1040
  logger.debug("Time field_and_source_xds " + str(time.time() - start))
1005
1041
 
1006
- # Fix UVW frame
1007
- # From CASA fixvis docs: clean and the im tool ignore the reference frame claimed by the UVW column (it is often mislabelled as ITRF when it is really FK5 (J2000)) and instead assume the (u, v, w)s are in the same frame as the phase tracking center. calcuvw does not yet force the UVW column and field centers to use the same reference frame! Blank = use the phase tracking frame of vis.
1008
- if is_single_dish:
1009
- xds.UVW.attrs["frame"] = field_and_source_xds[
1010
- "FIELD_REFERENCE_CENTER"
1011
- ].attrs["frame"]
1012
- else:
1013
- xds.UVW.attrs["frame"] = field_and_source_xds[
1014
- "FIELD_PHASE_CENTER"
1015
- ].attrs["frame"]
1042
+ xds = fix_uvw_frame(xds, field_and_source_xds, is_single_dish)
1016
1043
 
1017
1044
  partition_info_misc_fields = {
1018
1045
  "scan_id": scan_id,
@@ -1040,41 +1067,53 @@ def convert_and_write_partition(
1040
1067
  else:
1041
1068
  mode = "w-"
1042
1069
 
1070
+ if is_single_dish:
1071
+ xds.attrs["type"] = "spectrum"
1072
+ xds = xds.drop_vars(["UVW"])
1073
+ del xds["uvw_label"]
1074
+ else:
1075
+ if any("WVR" in s for s in intents):
1076
+ xds.attrs["type"] = "wvr"
1077
+ else:
1078
+ xds.attrs["type"] = "visibility"
1079
+
1043
1080
  start = time.time()
1044
1081
  if storage_backend == "zarr":
1045
- xds.to_zarr(store=os.path.join(file_name, "MAIN"), mode=mode)
1046
- ant_xds.to_zarr(store=os.path.join(file_name, "ANTENNA"), mode=mode)
1082
+ xds.to_zarr(store=os.path.join(file_name, "correlated_xds"), mode=mode)
1083
+ ant_xds.to_zarr(store=os.path.join(file_name, "antenna_xds"), mode=mode)
1047
1084
  for group_name in xds.attrs["data_groups"]:
1048
1085
  field_and_source_xds.to_zarr(
1049
1086
  store=os.path.join(
1050
- file_name, f"FIELD_AND_SOURCE_{group_name.upper()}"
1087
+ file_name, f"field_and_source_xds_{group_name}"
1051
1088
  ),
1052
1089
  mode=mode,
1053
1090
  )
1054
1091
 
1055
- if with_pointing and len(pointing_xds.data_vars) > 1:
1092
+ if with_pointing and len(pointing_xds.data_vars) > 0:
1056
1093
  pointing_xds.to_zarr(
1057
- store=os.path.join(file_name, "POINTING"), mode=mode
1094
+ store=os.path.join(file_name, "pointing_xds"), mode=mode
1058
1095
  )
1059
1096
 
1060
1097
  if system_calibration_xds:
1061
1098
  system_calibration_xds.to_zarr(
1062
- store=os.path.join(file_name, "SYSCAL"), mode=mode
1099
+ store=os.path.join(file_name, "system_calibration_xds"),
1100
+ mode=mode,
1063
1101
  )
1064
1102
 
1065
1103
  if gain_curve_xds:
1066
1104
  gain_curve_xds.to_zarr(
1067
- store=os.path.join(file_name, "GAIN_CURVE"), mode=mode
1105
+ store=os.path.join(file_name, "gain_curve_xds"), mode=mode
1068
1106
  )
1069
1107
 
1070
1108
  if phase_calibration_xds:
1071
1109
  phase_calibration_xds.to_zarr(
1072
- store=os.path.join(file_name, "PHASE_CAL"), mode=mode
1110
+ store=os.path.join(file_name, "phase_calibration_xds"),
1111
+ mode=mode,
1073
1112
  )
1074
1113
 
1075
1114
  if weather_xds:
1076
1115
  weather_xds.to_zarr(
1077
- store=os.path.join(file_name, "WEATHER"), mode=mode
1116
+ store=os.path.join(file_name, "weather_xds"), mode=mode
1078
1117
  )
1079
1118
 
1080
1119
  elif storage_backend == "netcdf":
@@ -1106,7 +1145,11 @@ def antenna_ids_to_names(
1106
1145
  }
1107
1146
  )
1108
1147
  else:
1109
- xds["baseline_id"] = ant_xds["antenna_name"].sel(antenna_id=xds["baseline_id"])
1148
+ # baseline_antenna1_id will be removed soon below, but it is useful here to know the actual antenna_ids,
1149
+ # as opposed to the baseline_ids which can mismatch when data is missing for some antennas
1150
+ xds["baseline_id"] = ant_xds["antenna_name"].sel(
1151
+ antenna_id=xds["baseline_antenna1_id"]
1152
+ )
1110
1153
  unwanted_coords_from_ant_xds = [
1111
1154
  "antenna_id",
1112
1155
  "antenna_name",
@@ -6,8 +6,8 @@ import numpy as np
6
6
  import xarray as xr
7
7
  import os
8
8
 
9
- from xradio.correlated_data._utils._ms.subtables import subt_rename_ids
10
- from xradio.correlated_data._utils._ms._tables.read import (
9
+ from xradio.measurement_set._utils._msv2.subtables import subt_rename_ids
10
+ from xradio.measurement_set._utils._msv2._tables.read import (
11
11
  load_generic_table,
12
12
  convert_casacore_time,
13
13
  convert_casacore_time_to_mjd,
@@ -15,7 +15,7 @@ from xradio.correlated_data._utils._ms._tables.read import (
15
15
  table_exists,
16
16
  )
17
17
  from xradio._utils.schema import convert_generic_xds_to_xradio_schema
18
- from xradio.correlated_data._utils._ms.msv4_sub_xdss import interpolate_to_time
18
+ from xradio.measurement_set._utils._msv2.msv4_sub_xdss import interpolate_to_time
19
19
 
20
20
  from xradio._utils.list_and_array import (
21
21
  check_if_consistent,
@@ -31,6 +31,7 @@ def create_antenna_xds(
31
31
  antenna_id: list,
32
32
  feed_id: list,
33
33
  telescope_name: str,
34
+ partition_polarization: xr.DataArray,
34
35
  ) -> xr.Dataset:
35
36
  """
36
37
  Create an Xarray Dataset containing antenna information.
@@ -47,6 +48,8 @@ def create_antenna_xds(
47
48
  List of feed IDs.
48
49
  telescope_name : str
49
50
  Name of the telescope.
51
+ partition_polarization: xr.DataArray
52
+ Polarization labels of this partition, needed if that info is not present in FEED
50
53
 
51
54
  Returns
52
55
  ----------
@@ -59,6 +62,18 @@ def create_antenna_xds(
59
62
  ant_xds = extract_feed_info(
60
63
  ant_xds, in_file, antenna_id, feed_id, spectral_window_id
61
64
  )
65
+ # Needed for special SPWs such as ALMA WVR or CHANNEL_AVERAGE data (have no feed info)
66
+ if "polarization_type" not in ant_xds:
67
+ pols_chars = list(partition_polarization.values[0])
68
+ pols_labels = [f"pol_{idx}" for idx in np.arange(0, len(pols_chars))]
69
+ ant_xds = ant_xds.assign_coords(receptor_label=pols_labels)
70
+ pol_type_values = [pols_chars] * len(ant_xds.antenna_name)
71
+ ant_xds = ant_xds.assign_coords(
72
+ polarization_type=(
73
+ ["antenna_name", "receptor_label"],
74
+ pol_type_values,
75
+ )
76
+ )
62
77
 
63
78
  ant_xds.attrs["overall_telescope_name"] = telescope_name
64
79
  return ant_xds
@@ -87,7 +102,6 @@ def extract_antenna_info(
87
102
  """
88
103
  to_new_data_variables = {
89
104
  "POSITION": ["ANTENNA_POSITION", ["antenna_name", "cartesian_pos_label"]],
90
- "OFFSET": ["ANTENNA_FEED_OFFSET", ["antenna_name", "cartesian_pos_label"]],
91
105
  "DISH_DIAMETER": ["ANTENNA_DISH_DIAMETER", ["antenna_name"]],
92
106
  }
93
107
 
@@ -95,7 +109,7 @@ def extract_antenna_info(
95
109
  "NAME": ["antenna_name", ["antenna_name"]],
96
110
  "STATION": ["station", ["antenna_name"]],
97
111
  "MOUNT": ["mount", ["antenna_name"]],
98
- "PHASED_ARRAY_ID": ["phased_array_id", ["antenna_name"]],
112
+ # "PHASED_ARRAY_ID": ["phased_array_id", ["antenna_name"]],
99
113
  "antenna_id": ["antenna_id", ["antenna_name"]],
100
114
  }
101
115
 
@@ -124,9 +138,8 @@ def extract_antenna_info(
124
138
 
125
139
  ant_xds["ANTENNA_DISH_DIAMETER"].attrs.update({"units": ["m"], "type": "quantity"})
126
140
 
127
- ant_xds["ANTENNA_FEED_OFFSET"].attrs["type"] = "earth_location_offset"
128
- ant_xds["ANTENNA_FEED_OFFSET"].attrs["coordinate_system"] = "geocentric"
129
141
  ant_xds["ANTENNA_POSITION"].attrs["coordinate_system"] = "geocentric"
142
+ ant_xds["ANTENNA_POSITION"].attrs["origin_object_name"] = "earth"
130
143
 
131
144
  if telescope_name in ["ALMA", "VLA", "NOEMA", "EVLA"]:
132
145
  # antenna_name = ant_xds["antenna_name"].values + "_" + ant_xds["station"].values
@@ -202,12 +215,17 @@ def extract_feed_info(
202
215
  taql_where=f" where (ANTENNA_ID IN [{','.join(map(str, ant_xds.antenna_id.values))}]) AND (FEED_ID IN [{','.join(map(str, feed_id))}])",
203
216
  ) # Some Lofar and MeerKAT data have the spw column set to -1 so we can't use '(SPECTRAL_WINDOW_ID = {spectral_window_id})'
204
217
 
218
+ if not generic_feed_xds:
219
+ # Some MSv2 have a FEED table that does not cover all antenna_id (and feed_id)
220
+ return ant_xds
221
+
205
222
  feed_spw = np.unique(generic_feed_xds.SPECTRAL_WINDOW_ID)
206
223
  if len(feed_spw) == 1 and feed_spw[0] == -1:
207
224
  generic_feed_xds = generic_feed_xds.isel(SPECTRAL_WINDOW_ID=0, drop=True)
208
225
  else:
209
226
  if spectral_window_id not in feed_spw:
210
- return ant_xds # For some spw the feed table is empty (this is the case with ALMA spw WVR#NOMINAL).
227
+ # For some spw the feed table is empty (this is the case with ALMA spw WVR#NOMINAL).
228
+ return ant_xds
211
229
  else:
212
230
  generic_feed_xds = generic_feed_xds.sel(
213
231
  SPECTRAL_WINDOW_ID=spectral_window_id, drop=True
@@ -228,14 +246,14 @@ def extract_feed_info(
228
246
  ), "The number of receptors must be constant in feed table."
229
247
 
230
248
  to_new_data_variables = {
231
- "BEAM_OFFSET": [
232
- "BEAM_OFFSET",
233
- ["antenna_name", "receptor_label", "sky_dir_label"],
249
+ "RECEPTOR_ANGLE": [
250
+ "ANTENNA_RECEPTOR_ANGLE",
251
+ ["antenna_name", "receptor_label"],
234
252
  ],
235
- "RECEPTOR_ANGLE": ["RECEPTOR_ANGLE", ["antenna_name", "receptor_label"]],
236
- # "pol_response": ["POLARIZATION_RESPONSE", ["antenna_name", "receptor_label", "receptor_name_"]] #repeated dim creates problems.
237
- "FOCUS_LENGTH": ["FOCUS_LENGTH", ["antenna_name"]], # optional
238
- # "position": ["ANTENNA_FEED_OFFSET",["antenna_name", "cartesian_pos_label"]] #Will be added to the existing position in ant_xds
253
+ "FOCUS_LENGTH": [
254
+ "ANTENNA_FOCUS_LENGTH",
255
+ ["antenna_name"],
256
+ ], # optional
239
257
  }
240
258
 
241
259
  to_new_coords = {
@@ -249,29 +267,31 @@ def extract_feed_info(
249
267
  to_new_coords=to_new_coords,
250
268
  )
251
269
 
252
- # print('ant_xds["ANTENNA_FEED_OFFSET"]',ant_xds["ANTENNA_FEED_OFFSET"].data)
253
- # print('generic_feed_xds["POSITION"].data',generic_feed_xds["POSITION"].data)
254
- feed_offset_attrs = ant_xds["ANTENNA_FEED_OFFSET"].attrs
255
- ant_xds["ANTENNA_FEED_OFFSET"] = (
256
- ant_xds["ANTENNA_FEED_OFFSET"] + generic_feed_xds["POSITION"].data
257
- )
258
- # recover attrs after arithmetic operation
259
- ant_xds["ANTENNA_FEED_OFFSET"].attrs.update(feed_offset_attrs)
260
-
261
- coords = {}
262
270
  # coords["receptor_label"] = "pol_" + np.arange(ant_xds.sizes["receptor_label"]).astype(str) #Works on laptop but fails in github test runner.
263
- coords["receptor_label"] = np.array(
264
- list(
265
- map(
266
- lambda x, y: x + "_" + y,
267
- ["pol"] * ant_xds.sizes["receptor_label"],
268
- np.arange(ant_xds.sizes["receptor_label"]).astype(str),
269
- )
271
+ coords = {
272
+ "receptor_label": np.array(
273
+ list(
274
+ map(
275
+ lambda x, y: x + "_" + y,
276
+ ["pol"] * ant_xds.sizes["receptor_label"],
277
+ np.arange(ant_xds.sizes["receptor_label"]).astype(str),
278
+ )
279
+ ),
280
+ dtype=str,
270
281
  )
271
- )
282
+ }
272
283
 
273
- coords["sky_dir_label"] = ["ra", "dec"]
274
284
  ant_xds = ant_xds.assign_coords(coords)
285
+
286
+ # Correct to expected types. Some ALMA-SD (at least) leave receptor_label, polarization_type columns
287
+ # in the MS empty, causing a type mismatch
288
+ if (
289
+ "polarization_type" in ant_xds.coords
290
+ and ant_xds.coords["polarization_type"].dtype != str
291
+ ):
292
+ ant_xds.coords["polarization_type"] = ant_xds.coords[
293
+ "polarization_type"
294
+ ].astype(str)
275
295
  return ant_xds
276
296
 
277
297
 
@@ -370,6 +390,11 @@ def create_gain_curve_xds(
370
390
  }
371
391
  )
372
392
 
393
+ # correct expected types (for example "GAIN_CURVE" can be float32)
394
+ for data_var in gain_curve_xds:
395
+ if gain_curve_xds.data_vars[data_var].dtype != np.float64:
396
+ gain_curve_xds[data_var] = gain_curve_xds[data_var].astype(np.float64)
397
+
373
398
  return gain_curve_xds
374
399
 
375
400
 
@@ -445,7 +470,7 @@ def create_phase_calibration_xds(
445
470
  "TIME": ["time_phase_cal", ["time_phase_cal"]],
446
471
  }
447
472
 
448
- phase_cal_xds = xr.Dataset(attrs={"type": "phase_cal"})
473
+ phase_cal_xds = xr.Dataset(attrs={"type": "phase_calibration"})
449
474
  phase_cal_xds = convert_generic_xds_to_xradio_schema(
450
475
  generic_phase_cal_xds, phase_cal_xds, to_new_data_variables, to_new_coords
451
476
  )
@@ -494,8 +519,8 @@ def create_phase_calibration_xds(
494
519
  time_coord_attrs = {
495
520
  "type": "time",
496
521
  "units": ["s"],
497
- "scale": "UTC",
498
- "format": "UNIX",
522
+ "scale": "utc",
523
+ "format": "unix",
499
524
  }
500
525
 
501
526
  # If we interpolate rename the time_phase_cal axis to time.
@@ -6,20 +6,27 @@ import numpy as np
6
6
  import xarray as xr
7
7
 
8
8
  import toolviper.utils.logger as logger
9
- from xradio.correlated_data._utils._ms.msv4_sub_xdss import interpolate_to_time
10
- from xradio.correlated_data._utils._ms.subtables import subt_rename_ids
11
- from xradio.correlated_data._utils._ms._tables.read import (
9
+ from xradio.measurement_set._utils._msv2.msv4_sub_xdss import interpolate_to_time
10
+ from xradio.measurement_set._utils._msv2.subtables import subt_rename_ids
11
+ from xradio.measurement_set._utils._msv2._tables.read import (
12
12
  convert_casacore_time_to_mjd,
13
13
  make_taql_where_between_min_max,
14
14
  load_generic_table,
15
15
  )
16
- from xradio._utils.common import cast_to_str, convert_to_si_units, add_position_offsets
16
+ from xradio._utils.list_and_array import cast_to_str
17
+ from xradio._utils.coord_math import (
18
+ convert_to_si_units,
19
+ add_position_offsets,
20
+ wrap_to_pi,
21
+ )
22
+
17
23
  from xradio._utils.list_and_array import (
18
24
  check_if_consistent,
19
25
  unique_1d,
20
26
  to_np_array,
21
27
  )
22
28
  from xradio._utils.schema import (
29
+ casacore_to_msv4_measure_type,
23
30
  column_description_casacore_to_msv4_measure,
24
31
  convert_generic_xds_to_xradio_schema,
25
32
  )
@@ -169,9 +176,21 @@ def extract_ephemeris_info(
169
176
  ), "Only geocentric observer ephemeris are supported."
170
177
 
171
178
  if "posrefsys" in ephemeris_meta:
172
- sky_coord_frame = ephemeris_meta["posrefsys"].replace("ICRF/", "")
179
+ # Note the phase center can be given as "J2000" or "J2000.0"
180
+ ref_frame = (
181
+ ephemeris_meta["posrefsys"]
182
+ .replace("ICRF/", "", 1)
183
+ .replace("J2000.0", "J2000", 1)
184
+ )
185
+ if ref_frame in casacore_to_msv4_measure_type["direction"].get("Ref_map", {}):
186
+ ref_frame = casacore_to_msv4_measure_type["direction"]["Ref_map"][ref_frame]
187
+ else:
188
+ logger.debug(
189
+ f"Unrecognized casacore direction reference frame found in posrefsys: {ref_frame}"
190
+ )
191
+ sky_coord_frame = ref_frame.lower()
173
192
  else:
174
- sky_coord_frame = "ICRS" # We will have to just assume this.
193
+ sky_coord_frame = "icrs" # We will have to just assume this.
175
194
 
176
195
  # Find out witch keyword is used for units (UNIT/QuantumUnits)
177
196
  if "UNIT" in ephemeris_column_description["RA"]["keywords"]:
@@ -195,7 +214,7 @@ def extract_ephemeris_info(
195
214
  "type": "location",
196
215
  "units": ["deg", "deg", "m"],
197
216
  "data": observer_position,
198
- "ellipsoid": "WGS84",
217
+ "frame": "WGS84",
199
218
  "origin_object_name": "Earth",
200
219
  "coordinate_system": ephemeris_meta["obsloc"].lower(),
201
220
  }
@@ -260,7 +279,7 @@ def extract_ephemeris_info(
260
279
  }
261
280
  )
262
281
 
263
- # Add optional data: SUB_OBSERVER_POSITION and SUB_SOLAR_POSITION
282
+ # Add optional data: SUB_OBSERVER_DIRECTION and SUB_SOLAR_POSITION
264
283
  if "DiskLong" in ephemeris_column_description:
265
284
  key_lon = "DiskLong"
266
285
  key_lat = "DiskLat"
@@ -283,7 +302,7 @@ def extract_ephemeris_info(
283
302
  temp_xds["SUB_OBSERVER_DIRECTION"].attrs.update(
284
303
  {
285
304
  "type": "location",
286
- "ellipsoid": "NA",
305
+ "frame": "Undefined",
287
306
  "origin_object_name": ephemeris_meta["NAME"],
288
307
  "coordinate_system": "planetodetic",
289
308
  "units": [
@@ -312,7 +331,7 @@ def extract_ephemeris_info(
312
331
  temp_xds["SUB_SOLAR_POSITION"].attrs.update(
313
332
  {
314
333
  "type": "location",
315
- "ellipsoid": "NA",
334
+ "frame": "Undefined",
316
335
  "origin_object_name": "Sun",
317
336
  "coordinate_system": "planetodetic",
318
337
  "units": [
@@ -339,8 +358,8 @@ def extract_ephemeris_info(
339
358
  time_coord_attrs = {
340
359
  "type": "time",
341
360
  "units": ["s"],
342
- "scale": "UTC",
343
- "format": "UNIX",
361
+ "scale": "utc",
362
+ "format": "unix",
344
363
  }
345
364
  temp_xds["time_ephemeris"].attrs.update(time_coord_attrs)
346
365
 
@@ -374,21 +393,28 @@ def extract_ephemeris_info(
374
393
  interp_time is not None
375
394
  ), 'ephemeris_interpolate must be True if there is ephemeris data and multiple fields (this will occur if "FIELD_ID" is not in partition_scheme).'
376
395
 
396
+ field_phase_center = wrap_to_pi(
397
+ xds[center_dv].values + xds["SOURCE_LOCATION"][:, 0:2].values
398
+ )
399
+ field_phase_center = np.column_stack(
400
+ (field_phase_center, np.zeros(xds[center_dv].values.shape[0]))
401
+ )
402
+ field_phase_center[:, -1] = (
403
+ field_phase_center[:, -1] + xds["SOURCE_LOCATION"][:, -1].values
404
+ )
405
+
377
406
  xds[center_dv] = xr.DataArray(
378
- add_position_offsets(
379
- np.column_stack(
380
- (xds[center_dv].values, np.zeros(xds[center_dv].values.shape[0]))
381
- ),
382
- xds["SOURCE_LOCATION"].values,
383
- ),
407
+ field_phase_center,
384
408
  dims=[xds["SOURCE_LOCATION"].dims[0], "sky_pos_label"],
385
409
  )
386
410
  else:
411
+ field_phase_center = (
412
+ np.append(xds[center_dv].values, 0) + xds["SOURCE_LOCATION"].values
413
+ )
414
+ field_phase_center[:, 0:2] = wrap_to_pi(field_phase_center[:, 0:2])
415
+
387
416
  xds[center_dv] = xr.DataArray(
388
- add_position_offsets(
389
- np.append(xds[center_dv].values, 0),
390
- xds["SOURCE_LOCATION"].values,
391
- ),
417
+ field_phase_center,
392
418
  dims=[xds["SOURCE_LOCATION"].dims[0], "sky_pos_label"],
393
419
  )
394
420
 
@@ -460,6 +486,65 @@ def make_line_dims_and_coords(
460
486
  return line_dims, line_coords
461
487
 
462
488
 
489
+ def pad_missing_sources(
490
+ source_xds: xr.Dataset, unique_source_ids: np.array
491
+ ) -> xr.Dataset:
492
+ """
493
+ In some MSs there can be source IDs referenced from the field subtable which do not exist in
494
+ the source table: https://github.com/casangi/xradio/issues/266
495
+
496
+ This addresses the issue by padding/filling those IDs with "Unknown"/nan values. Produces a
497
+ source_xds that, in addition to the information loaded for the non-missing source IDs, has
498
+ padding for the IDs that are missing from the input MSv2 source table.
499
+ This function does not need to do anything when unique_source_ids is a single value
500
+ (partitioning by "FIELD_ID" or othwerwise single field/source)
501
+
502
+ Parameters:
503
+ ----------
504
+ xds: xr.Dataset
505
+ source dataset to fix/pad missing sources
506
+ unique_source_ids: np.array
507
+ IDs of the sources included in this partition
508
+
509
+ Returns:
510
+ -------
511
+ filled_source_xds : xr.Dataset
512
+ source dataset with padding in the originally missing sources
513
+ """
514
+
515
+ # Only fill gaps in multi-source xdss. If single source_id, no need to
516
+ if len(unique_source_ids) <= 1:
517
+ return source_xds
518
+
519
+ missing_source_ids = [
520
+ source_id
521
+ for source_id in unique_source_ids
522
+ if source_id not in source_xds.coords["SOURCE_ID"]
523
+ ]
524
+
525
+ # would like to use the new-ish xr.pad, but it creates issues with indices/coords and is
526
+ # also not free of overheads, as it for example changes all numeric types to float64
527
+ missing_source_xds = xr.full_like(source_xds.isel(SOURCE_ID=0), fill_value=np.nan)
528
+ pad_str = "Unknown"
529
+ pad_str_type = "<U9"
530
+ for var in missing_source_xds.data_vars:
531
+ if np.issubdtype(missing_source_xds.data_vars[var].dtype, np.str_):
532
+ # Avoid truncation to length of previously loaded strings
533
+ missing_source_xds[var] = missing_source_xds[var].astype(
534
+ np.dtype(pad_str_type)
535
+ )
536
+ missing_source_xds[var] = pad_str
537
+
538
+ concat_dim = "SOURCE_ID"
539
+ xdss_to_concat = [source_xds]
540
+ for missing_id in missing_source_ids:
541
+ missing_source_xds[concat_dim] = missing_id
542
+ xdss_to_concat.append(missing_source_xds)
543
+ filled_source_xds = xr.concat(xdss_to_concat, concat_dim).sortby(concat_dim)
544
+
545
+ return filled_source_xds
546
+
547
+
463
548
  def extract_source_info(
464
549
  xds: xr.Dataset,
465
550
  path: str,
@@ -542,6 +627,8 @@ def extract_source_info(
542
627
  "column_descriptions"
543
628
  ]
544
629
 
630
+ source_xds = pad_missing_sources(source_xds, unique_source_id)
631
+
545
632
  # Get source name (the time axis is optional and will probably be required if the partition scheme does not include 'FIELD_ID' or 'SOURCE_ID'.).
546
633
  # Note again that this optional time axis has nothing to do with the original time axis in the source table that we drop.
547
634
  if len(source_id) == 1: