xradio 0.0.60__tar.gz → 1.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. {xradio-0.0.60/src/xradio.egg-info → xradio-1.0.0}/PKG-INFO +3 -3
  2. {xradio-0.0.60 → xradio-1.0.0}/README.md +2 -2
  3. {xradio-0.0.60 → xradio-1.0.0}/pyproject.toml +1 -1
  4. {xradio-0.0.60 → xradio-1.0.0}/src/xradio/_utils/list_and_array.py +4 -2
  5. {xradio-0.0.60 → xradio-1.0.0}/src/xradio/image/_util/_casacore/xds_to_casacore.py +11 -4
  6. {xradio-0.0.60 → xradio-1.0.0}/src/xradio/image/image.py +4 -2
  7. {xradio-0.0.60 → xradio-1.0.0}/src/xradio/measurement_set/_utils/_msv2/conversion.py +27 -15
  8. {xradio-0.0.60 → xradio-1.0.0}/src/xradio/measurement_set/_utils/_msv2/create_field_and_source_xds.py +3 -1
  9. xradio-1.0.0/src/xradio/measurement_set/_utils/_msv2/msv4_info_dicts.py +362 -0
  10. xradio-1.0.0/src/xradio/measurement_set/_utils/_msv2/partition_queries.py +337 -0
  11. {xradio-0.0.60 → xradio-1.0.0}/src/xradio/measurement_set/convert_msv2_to_processing_set.py +23 -10
  12. {xradio-0.0.60 → xradio-1.0.0}/src/xradio/measurement_set/measurement_set_xdt.py +10 -3
  13. {xradio-0.0.60 → xradio-1.0.0}/src/xradio/measurement_set/open_processing_set.py +6 -6
  14. {xradio-0.0.60 → xradio-1.0.0}/src/xradio/measurement_set/processing_set_xdt.py +69 -12
  15. {xradio-0.0.60 → xradio-1.0.0}/src/xradio/measurement_set/schema.py +136 -179
  16. {xradio-0.0.60 → xradio-1.0.0}/src/xradio/schema/__init__.py +0 -3
  17. {xradio-0.0.60 → xradio-1.0.0}/src/xradio/schema/bases.py +23 -28
  18. {xradio-0.0.60 → xradio-1.0.0}/src/xradio/schema/check.py +23 -15
  19. xradio-1.0.0/src/xradio/schema/common.py +45 -0
  20. {xradio-0.0.60 → xradio-1.0.0}/src/xradio/schema/export.py +23 -2
  21. {xradio-0.0.60 → xradio-1.0.0}/src/xradio/schema/metamodel.py +12 -8
  22. {xradio-0.0.60 → xradio-1.0.0}/src/xradio/schema/typing.py +7 -13
  23. {xradio-0.0.60 → xradio-1.0.0/src/xradio.egg-info}/PKG-INFO +3 -3
  24. {xradio-0.0.60 → xradio-1.0.0}/src/xradio.egg-info/SOURCES.txt +1 -0
  25. xradio-0.0.60/src/xradio/measurement_set/_utils/_msv2/msv4_info_dicts.py +0 -215
  26. xradio-0.0.60/src/xradio/measurement_set/_utils/_msv2/partition_queries.py +0 -150
  27. {xradio-0.0.60 → xradio-1.0.0}/LICENSE.txt +0 -0
  28. {xradio-0.0.60 → xradio-1.0.0}/MANIFEST.in +0 -0
  29. {xradio-0.0.60 → xradio-1.0.0}/setup.cfg +0 -0
  30. {xradio-0.0.60 → xradio-1.0.0}/src/xradio/__init__.py +0 -0
  31. {xradio-0.0.60 → xradio-1.0.0}/src/xradio/_utils/__init__.py +0 -0
  32. {xradio-0.0.60 → xradio-1.0.0}/src/xradio/_utils/_casacore/casacore_from_casatools.py +0 -0
  33. {xradio-0.0.60 → xradio-1.0.0}/src/xradio/_utils/_casacore/tables.py +0 -0
  34. {xradio-0.0.60 → xradio-1.0.0}/src/xradio/_utils/coord_math.py +0 -0
  35. {xradio-0.0.60 → xradio-1.0.0}/src/xradio/_utils/dict_helpers.py +0 -0
  36. {xradio-0.0.60 → xradio-1.0.0}/src/xradio/_utils/schema.py +0 -0
  37. {xradio-0.0.60 → xradio-1.0.0}/src/xradio/_utils/zarr/__init__.py +0 -0
  38. {xradio-0.0.60 → xradio-1.0.0}/src/xradio/_utils/zarr/common.py +0 -0
  39. {xradio-0.0.60 → xradio-1.0.0}/src/xradio/image/__init__.py +0 -0
  40. {xradio-0.0.60 → xradio-1.0.0}/src/xradio/image/_util/__init__.py +0 -0
  41. {xradio-0.0.60 → xradio-1.0.0}/src/xradio/image/_util/_casacore/__init__.py +0 -0
  42. {xradio-0.0.60 → xradio-1.0.0}/src/xradio/image/_util/_casacore/common.py +0 -0
  43. {xradio-0.0.60 → xradio-1.0.0}/src/xradio/image/_util/_casacore/xds_from_casacore.py +0 -0
  44. {xradio-0.0.60 → xradio-1.0.0}/src/xradio/image/_util/_fits/xds_from_fits.py +0 -0
  45. {xradio-0.0.60 → xradio-1.0.0}/src/xradio/image/_util/_zarr/common.py +0 -0
  46. {xradio-0.0.60 → xradio-1.0.0}/src/xradio/image/_util/_zarr/xds_from_zarr.py +0 -0
  47. {xradio-0.0.60 → xradio-1.0.0}/src/xradio/image/_util/_zarr/xds_to_zarr.py +0 -0
  48. {xradio-0.0.60 → xradio-1.0.0}/src/xradio/image/_util/_zarr/zarr_low_level.py +0 -0
  49. {xradio-0.0.60 → xradio-1.0.0}/src/xradio/image/_util/casacore.py +0 -0
  50. {xradio-0.0.60 → xradio-1.0.0}/src/xradio/image/_util/common.py +0 -0
  51. {xradio-0.0.60 → xradio-1.0.0}/src/xradio/image/_util/image_factory.py +0 -0
  52. {xradio-0.0.60 → xradio-1.0.0}/src/xradio/image/_util/zarr.py +0 -0
  53. {xradio-0.0.60 → xradio-1.0.0}/src/xradio/measurement_set/__init__.py +0 -0
  54. {xradio-0.0.60 → xradio-1.0.0}/src/xradio/measurement_set/_utils/__init__.py +0 -0
  55. {xradio-0.0.60 → xradio-1.0.0}/src/xradio/measurement_set/_utils/_msv2/__init__.py +0 -0
  56. {xradio-0.0.60 → xradio-1.0.0}/src/xradio/measurement_set/_utils/_msv2/_tables/read.py +0 -0
  57. {xradio-0.0.60 → xradio-1.0.0}/src/xradio/measurement_set/_utils/_msv2/_tables/read_main_table.py +0 -0
  58. {xradio-0.0.60 → xradio-1.0.0}/src/xradio/measurement_set/_utils/_msv2/_tables/table_query.py +0 -0
  59. {xradio-0.0.60 → xradio-1.0.0}/src/xradio/measurement_set/_utils/_msv2/create_antenna_xds.py +0 -0
  60. {xradio-0.0.60 → xradio-1.0.0}/src/xradio/measurement_set/_utils/_msv2/msv2_to_msv4_meta.py +0 -0
  61. {xradio-0.0.60 → xradio-1.0.0}/src/xradio/measurement_set/_utils/_msv2/msv4_sub_xdss.py +0 -0
  62. {xradio-0.0.60 → xradio-1.0.0}/src/xradio/measurement_set/_utils/_msv2/optimised_functions.py +0 -0
  63. {xradio-0.0.60 → xradio-1.0.0}/src/xradio/measurement_set/_utils/_msv2/subtables.py +0 -0
  64. {xradio-0.0.60 → xradio-1.0.0}/src/xradio/measurement_set/_utils/_utils/interpolate.py +0 -0
  65. {xradio-0.0.60 → xradio-1.0.0}/src/xradio/measurement_set/_utils/_utils/partition_attrs.py +0 -0
  66. {xradio-0.0.60 → xradio-1.0.0}/src/xradio/measurement_set/_utils/_utils/stokes_types.py +0 -0
  67. {xradio-0.0.60 → xradio-1.0.0}/src/xradio/measurement_set/_utils/_zarr/encoding.py +0 -0
  68. {xradio-0.0.60 → xradio-1.0.0}/src/xradio/measurement_set/load_processing_set.py +0 -0
  69. {xradio-0.0.60 → xradio-1.0.0}/src/xradio/schema/dataclass.py +0 -0
  70. {xradio-0.0.60 → xradio-1.0.0}/src/xradio/sphinx/__init__.py +0 -0
  71. {xradio-0.0.60 → xradio-1.0.0}/src/xradio/sphinx/schema_table.py +0 -0
  72. {xradio-0.0.60 → xradio-1.0.0}/src/xradio.egg-info/dependency_links.txt +0 -0
  73. {xradio-0.0.60 → xradio-1.0.0}/src/xradio.egg-info/requires.txt +0 -0
  74. {xradio-0.0.60 → xradio-1.0.0}/src/xradio.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: xradio
3
- Version: 0.0.60
3
+ Version: 1.0.0
4
4
  Summary: Xarray Radio Astronomy Data IO
5
5
  Author-email: Jan-Willem Steeb <jsteeb@nrao.edu>, Federico Montesino Pouzols <pouzols@eso.edu>, Dave Mehringer <dmehring@nrao.edu>, Peter Wortmann <peter.wortmann@skao.int>
6
6
  License: BSD 3-Clause License
@@ -110,10 +110,10 @@ Xarray Radio Astronomy Data IO is still in development.
110
110
  # Installing
111
111
  XRADIO can be installed in virtual environments via pip. It is recommended to use the conda environment manager from [miniforge](https://github.com/conda-forge/miniforge) to create a clean, self-contained runtime where XRADIO and all its dependencies can be installed, for example:
112
112
  ```sh
113
- conda create --name xradio python=3.12 --no-default-packages
113
+ conda create --name xradio python=3.13 --no-default-packages
114
114
  conda activate xradio
115
115
  ```
116
- > 📝 On macOS it is required to pre-install `python-casacore` using `conda install -c conda-forge python-casacore`.
116
+ > 📝 On macOS, if one wants to use the functions to convert MSv2=>MSv4, it is required to pre-install `python-casacore`. That can be done using `conda install -c conda-forge python-casacore`. See more alternatives below.
117
117
 
118
118
  XRADIO can now be installed using:
119
119
  ```sh
@@ -12,10 +12,10 @@ Xarray Radio Astronomy Data IO is still in development.
12
12
  # Installing
13
13
  XRADIO can be installed in virtual environments via pip. It is recommended to use the conda environment manager from [miniforge](https://github.com/conda-forge/miniforge) to create a clean, self-contained runtime where XRADIO and all its dependencies can be installed, for example:
14
14
  ```sh
15
- conda create --name xradio python=3.12 --no-default-packages
15
+ conda create --name xradio python=3.13 --no-default-packages
16
16
  conda activate xradio
17
17
  ```
18
- > 📝 On macOS it is required to pre-install `python-casacore` using `conda install -c conda-forge python-casacore`.
18
+ > 📝 On macOS, if one wants to use the functions to convert MSv2=>MSv4, it is required to pre-install `python-casacore`. That can be done using `conda install -c conda-forge python-casacore`. See more alternatives below.
19
19
 
20
20
  XRADIO can now be installed using:
21
21
  ```sh
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "xradio"
3
- version = "v0.0.60"
3
+ version = "v1.0.0"
4
4
  description = " Xarray Radio Astronomy Data IO"
5
5
  authors = [
6
6
  {name = "Jan-Willem Steeb", email="jsteeb@nrao.edu"},
@@ -73,7 +73,9 @@ def to_np_array(x):
73
73
  return np.array([x])
74
74
 
75
75
 
76
- def check_if_consistent(array: np.ndarray, array_name: str) -> np.ndarray:
76
+ def check_if_consistent(
77
+ array: np.ndarray, array_name: str, err_msg: str = ""
78
+ ) -> np.ndarray:
77
79
  """_summary_
78
80
 
79
81
  Parameters
@@ -92,7 +94,7 @@ def check_if_consistent(array: np.ndarray, array_name: str) -> np.ndarray:
92
94
  return array.item()
93
95
 
94
96
  array_unique = unique_1d(array)
95
- assert len(array_unique) == 1, array_name + " is not consistent."
97
+ assert len(array_unique) == 1, array_name + " is not consistent." + err_msg
96
98
  return array_unique[0]
97
99
 
98
100
 
@@ -5,6 +5,7 @@ import dask.array as da
5
5
  import numpy as np
6
6
  import xarray as xr
7
7
  from astropy.coordinates import Angle
8
+ from astropy import units as apu
8
9
 
9
10
  try:
10
11
  from casacore import tables
@@ -93,10 +94,9 @@ def _compute_spectral_dict(xds: xr.Dataset) -> dict:
93
94
  spec["system"] = xds.frequency.attrs["reference_frequency"]["attrs"][
94
95
  "observer"
95
96
  ].upper()
96
- u = xds.frequency.attrs["reference_frequency"]["attrs"]["units"]
97
- spec["unit"] = u
97
+ spec["unit"] = xds.frequency.attrs["reference_frequency"]["attrs"]["units"]
98
98
  spec["velType"] = _doppler_types.index(xds.velocity.attrs["doppler_type"])
99
- u = xds.velocity.attrs["units"]
99
+ # u = xds.velocity.attrs["units"]
100
100
  spec["version"] = 2
101
101
  # vel unit is a list[str] in the xds but needs to be a str in the casa image
102
102
  spec["velUnit"] = xds.velocity.attrs["units"]
@@ -106,7 +106,14 @@ def _compute_spectral_dict(xds: xr.Dataset) -> dict:
106
106
  wcs["ctype"] = "FREQ"
107
107
  wcs["pc"] = 1.0
108
108
  wcs["crval"] = float(xds.frequency.attrs["reference_frequency"]["data"])
109
- wcs["cdelt"] = float(xds.frequency.values[1] - xds.frequency.values[0])
109
+ if len(xds.frequency.values) > 1:
110
+ wcs["cdelt"] = float(xds.frequency.values[1] - xds.frequency.values[0])
111
+ else:
112
+ # TODO this is just a temporary fix, likely schema will be updated to include chan widths
113
+ myu = apu.Unit(spec["unit"])
114
+ mydel = 1.8 * apu.GHz
115
+ my_del_converted = mydel.to(spec["unit"])
116
+ wcs["cdelt"] = my_del_converted.value
110
117
  wcs["crpix"] = float((wcs["crval"] - xds.frequency.values[0]) / wcs["cdelt"])
111
118
  spec["wcs"] = wcs
112
119
  return spec
@@ -14,8 +14,6 @@ import xarray as xr
14
14
 
15
15
  # from .._utils.zarr.common import _load_no_dask_zarr
16
16
 
17
- from ._util.casacore import _load_casa_image_block, _xds_to_casa_image
18
-
19
17
  # from ._util.fits import _read_fits_image
20
18
  from ._util.image_factory import (
21
19
  _make_empty_aperture_image,
@@ -201,6 +199,8 @@ def load_image(infile: str, block_des: dict = None, do_sky_coords=True) -> xr.Da
201
199
  # comment next line when done debugging
202
200
  # return _load_casa_image_block(infile, selection, do_sky_coords)
203
201
  try:
202
+ from ._util.casacore import _load_casa_image_block
203
+
204
204
  return _load_casa_image_block(infile, selection, do_sky_coords)
205
205
  except Exception as e:
206
206
  emsgs.append(f"image format appears not to be casacore: {e.args}")
@@ -256,6 +256,8 @@ def write_image(
256
256
  )
257
257
  my_format = out_format.lower()
258
258
  if my_format == "casa":
259
+ from ._util.casacore import _xds_to_casa_image
260
+
259
261
  _xds_to_casa_image(xds, imagename)
260
262
  elif my_format == "zarr":
261
263
  _xds_to_zarr(xds, imagename)
@@ -440,6 +440,7 @@ def create_coordinates(
440
440
  baseline_ant1_id: np.ndarray,
441
441
  baseline_ant2_id: np.ndarray,
442
442
  scan_id: np.ndarray,
443
+ scan_intents: list[str],
443
444
  ) -> tuple[xr.Dataset, int]:
444
445
  """
445
446
  Creates coordinates of a VisibilityXds/SpectrumXds and assigns them to the input
@@ -464,6 +465,9 @@ def create_coordinates(
464
465
  ANTENNA2 ids to be used as coord
465
466
  scan_id :
466
467
  SCAN_ID values from MSv2, for the scan_name coord
468
+ scan_intents :
469
+ list of SCAN_INTENT values from MSv2, for the scan_intents attribute of the
470
+ scan_name coord
467
471
 
468
472
  Returns
469
473
  -------
@@ -509,6 +513,9 @@ def create_coordinates(
509
513
 
510
514
  xds = xds.assign_coords(coords)
511
515
 
516
+ ##### Add scan intents attribute to scan_name coord #####
517
+ xds.scan_name.attrs["scan_intents"] = scan_intents
518
+
512
519
  ###### Create Frequency Coordinate ######
513
520
  freq_column_description = spectral_window_xds.attrs["other"]["msv2"]["ctds_attrs"][
514
521
  "column_descriptions"
@@ -528,7 +535,7 @@ def create_coordinates(
528
535
  spw_name = spw_name + "_" + str(spectral_window_id)
529
536
 
530
537
  xds.frequency.attrs["spectral_window_name"] = spw_name
531
- xds.frequency.attrs["spectral_window_intent"] = "UNSPECIFIED"
538
+ xds.frequency.attrs["spectral_window_intents"] = ["UNSPECIFIED"]
532
539
  msv4_measure = column_description_casacore_to_msv4_measure(
533
540
  freq_column_description["REF_FREQUENCY"],
534
541
  ref_code=spectral_window_xds["MEAS_FREQ_REF"].data,
@@ -769,15 +776,17 @@ def create_taql_query_where(partition_info: dict):
769
776
  taql_where = "WHERE "
770
777
  for col_name in main_par_table_cols:
771
778
  if col_name in partition_info:
772
- taql_where = (
773
- taql_where
774
- + f"({col_name} IN [{','.join(map(str, partition_info[col_name]))}]) AND"
775
- )
776
- if col_name == "ANTENNA1":
779
+
780
+ if partition_info[col_name][0] is not None:
777
781
  taql_where = (
778
782
  taql_where
779
- + f"(ANTENNA2 IN [{','.join(map(str, partition_info[col_name]))}]) AND"
783
+ + f"({col_name} IN [{','.join(map(str, partition_info[col_name]))}]) AND"
780
784
  )
785
+ if col_name == "ANTENNA1":
786
+ taql_where = (
787
+ taql_where
788
+ + f"(ANTENNA2 IN [{','.join(map(str, partition_info[col_name]))}]) AND"
789
+ )
781
790
  taql_where = taql_where[:-3]
782
791
 
783
792
  return taql_where
@@ -933,6 +942,7 @@ def estimate_memory_for_partition(in_file: str, partition: dict) -> float:
933
942
 
934
943
  taql_partition = create_taql_query_where(partition)
935
944
  taql_main = f"select * from $mtable {taql_partition}"
945
+
936
946
  with open_table_ro(in_file) as mtable:
937
947
  with open_query(mtable, taql_main) as tb_tool:
938
948
  # Do not feel tempted to rely on nrows. nrows tends to underestimate memory when baselines are missing.
@@ -1017,7 +1027,7 @@ def convert_and_write_partition(
1017
1027
  _description_
1018
1028
  out_file : str
1019
1029
  _description_
1020
- intents : str
1030
+ scan_intents : str
1021
1031
  _description_
1022
1032
  ddi : int, optional
1023
1033
  _description_, by default 0
@@ -1061,7 +1071,7 @@ def convert_and_write_partition(
1061
1071
  taql_where = create_taql_query_where(partition_info)
1062
1072
  table_manager = TableManager(in_file, taql_where)
1063
1073
  ddi = partition_info["DATA_DESC_ID"][0]
1064
- intents = str(partition_info["OBS_MODE"][0])
1074
+ scan_intents = str(partition_info["OBS_MODE"][0]).split(",")
1065
1075
 
1066
1076
  start = time.time()
1067
1077
  with table_manager.get_table() as tb_tool:
@@ -1085,19 +1095,21 @@ def convert_and_write_partition(
1085
1095
  tb_tool.getcol("OBSERVATION_ID"), "OBSERVATION_ID"
1086
1096
  )
1087
1097
 
1088
- def get_observation_info(in_file, observation_id, intents):
1098
+ def get_observation_info(in_file, observation_id, scan_intents):
1089
1099
  generic_observation_xds = load_generic_table(
1090
1100
  in_file,
1091
1101
  "OBSERVATION",
1092
1102
  taql_where=f" where (ROWID() IN [{str(observation_id)}])",
1093
1103
  )
1094
1104
 
1095
- if intents == "None":
1096
- intents = "obs_" + str(observation_id)
1105
+ if scan_intents == "None":
1106
+ scan_intents = "obs_" + str(observation_id)
1097
1107
 
1098
- return generic_observation_xds["TELESCOPE_NAME"].values[0], intents
1108
+ return generic_observation_xds["TELESCOPE_NAME"].values[0], scan_intents
1099
1109
 
1100
- telescope_name, intents = get_observation_info(in_file, observation_id, intents)
1110
+ telescope_name, scan_intents = get_observation_info(
1111
+ in_file, observation_id, scan_intents
1112
+ )
1101
1113
 
1102
1114
  start = time.time()
1103
1115
  xds = xr.Dataset(
@@ -1139,6 +1151,7 @@ def convert_and_write_partition(
1139
1151
  baseline_ant1_id,
1140
1152
  baseline_ant2_id,
1141
1153
  scan_id,
1154
+ scan_intents,
1142
1155
  )
1143
1156
  logger.debug("Time create coordinates " + str(time.time() - start))
1144
1157
 
@@ -1334,7 +1347,6 @@ def convert_and_write_partition(
1334
1347
 
1335
1348
  partition_info_misc_fields = {
1336
1349
  "scan_name": xds.coords["scan_name"].data,
1337
- "intents": intents,
1338
1350
  "taql_where": taql_where,
1339
1351
  }
1340
1352
  if with_antenna_partitioning:
@@ -819,7 +819,9 @@ def extract_field_info_and_check_ephemeris(
819
819
  # Need to check if ephemeris_id is present and if ephemeris table is present.
820
820
  if "EPHEMERIS_ID" in field_xds:
821
821
  # Note: this assumes partition_scheme includes "FIELD_ID"
822
- ephemeris_id = check_if_consistent(field_xds.EPHEMERIS_ID, "EPHEMERIS_ID")
822
+ ephemeris_id = check_if_consistent(
823
+ field_xds.EPHEMERIS_ID, "EPHEMERIS_ID", taql_where
824
+ )
823
825
 
824
826
  if ephemeris_id > -1:
825
827
  files = os.listdir(os.path.join(in_file, "FIELD"))
@@ -0,0 +1,362 @@
1
+ import re
2
+
3
+ import numpy as np
4
+ import xarray as xr
5
+
6
+ try:
7
+ from casacore import tables
8
+ except ImportError:
9
+ import xradio._utils._casacore.casacore_from_casatools as tables
10
+
11
+ import toolviper.utils.logger as logger
12
+
13
+ from .subtables import subt_rename_ids
14
+ from ._tables.read import load_generic_table, convert_casacore_time
15
+ from xradio._utils.list_and_array import check_if_consistent
16
+
17
+
18
+ def create_info_dicts(
19
+ in_file: str,
20
+ xds: xr.Dataset,
21
+ field_and_source_xds: xr.Dataset,
22
+ partition_info_misc_fields: dict,
23
+ tb_tool: tables.table,
24
+ ) -> dict[str, dict]:
25
+ """
26
+ For an MSv4, produces several info dicts (partition_info, processor_info,
27
+ observation_info). The info dicts are returned in a dictionary that
28
+ contains them indexed by their corresponding keys, which can be used
29
+ directly to update the attrs dict of an MSv4.
30
+
31
+ Parameters:
32
+ -----------
33
+ in_file: str
34
+ path to the input MSv2
35
+ xds: xr.Dataset
36
+ main xds of the MSv4 being converted
37
+ field_and_source_xds: xr.Dataset
38
+ field_and_source_xds subdataset
39
+ partition_info_misc_fiels: dict
40
+ dict with several scalar fields for the partition_info dict that are
41
+ collected while processing the main MSv4 table. Expected: scan_id,
42
+ obs_mode, taql_where
43
+ tb_tool: tables.table
44
+ table (query) on the main table with an MSv4 query
45
+
46
+ Returns:
47
+ --------
48
+ info_dicts: dict
49
+ info dicts ready to be used to update the attrs of the MSv4
50
+ """
51
+
52
+ info_dicts = {}
53
+
54
+ observation_id = check_if_consistent(
55
+ tb_tool.getcol("OBSERVATION_ID"), "OBSERVATION_ID"
56
+ )
57
+ info_dicts["observation_info"] = create_observation_info(in_file, observation_id)
58
+ # info_dicts["observation_info"]["intents"] = partition_info_misc_fields[
59
+ # "intents"
60
+ # ].split(",")
61
+
62
+ processor_id = check_if_consistent(tb_tool.getcol("PROCESSOR_ID"), "PROCESSOR_ID")
63
+ info_dicts["processor_info"] = create_processor_info(in_file, processor_id)
64
+
65
+ return info_dicts
66
+
67
+
68
+ def create_observation_info(
69
+ in_file: str, observation_id: int
70
+ ) -> dict[str, list[str] | str]:
71
+ """
72
+ Makes a dict with the observation info extracted from the PROCESSOR subtable.
73
+ When available, it also takes metadata from the ASDM tables (imported 'asis')
74
+ ASDM_EXECBLOCK and ASDM_SBSUMMARY
75
+
76
+ Parameters
77
+ ----------
78
+ in_file: str
79
+ path to an input MSv2
80
+ processor_id: int
81
+ processor ID for one MSv4 dataset
82
+
83
+ Returns:
84
+ --------
85
+ observation_info: dict
86
+ observation description ready for the MSv4 observation_info attr
87
+ """
88
+
89
+ generic_observation_xds = load_generic_table(
90
+ in_file,
91
+ "OBSERVATION",
92
+ rename_ids=subt_rename_ids["OBSERVATION"],
93
+ taql_where=f" where ROWID() = {observation_id}",
94
+ )
95
+
96
+ observation_info = {
97
+ "observer": [str(generic_observation_xds["OBSERVER"].values[0])],
98
+ "release_date": str(
99
+ convert_casacore_time(generic_observation_xds["RELEASE_DATE"].values)[0]
100
+ ),
101
+ }
102
+ # could just assume lower:upper case but keeping explicit dict for now
103
+ mandatory_fields = {"project_UID": "PROJECT", "observing_log": "LOG"}
104
+ for field_msv4, col_msv2 in mandatory_fields.items():
105
+ observation_info[field_msv4] = str(generic_observation_xds[col_msv2].values[0])
106
+
107
+ execblock_optional_fields = {
108
+ "execution_block_UID": "execBlockUID",
109
+ "session_reference_UID": "sessionReference",
110
+ "observing_log": "observingLog",
111
+ }
112
+ execblock_info = try_optional_asdm_asis_table_info(
113
+ in_file, "ASDM_EXECBLOCK", execblock_optional_fields
114
+ )
115
+ observation_info.update(execblock_info)
116
+
117
+ sbsummary_optional_fields = {
118
+ "scheduling_block_UID": "sbSummaryUID",
119
+ }
120
+ sbsummary_info = try_optional_asdm_asis_table_info(
121
+ in_file, "ASDM_SBSUMMARY", sbsummary_optional_fields
122
+ )
123
+ observation_info.update(sbsummary_info)
124
+
125
+ observation_info = replace_entity_ids(observation_info)
126
+
127
+ observation_info = try_find_uids_from_observation_schedule(
128
+ generic_observation_xds, observation_info
129
+ )
130
+
131
+ return observation_info
132
+
133
+
134
+ def try_optional_asdm_asis_table_info(
135
+ in_file: str, asdm_table_name: str, optional_fields: dict[str, str]
136
+ ) -> dict[str, str]:
137
+ """
138
+ Tries to find an optional ASDM_* subtable (ASDM_EXECBLOCK, ASDM_SBSUMMARY, etc.),
139
+ and if available, gets the optional fields requested into a metadata dict. That
140
+ dict can be used to populate the observation_info dict.
141
+
142
+ Parameters
143
+ ----------
144
+ in_file: str
145
+ path to an input MSv2
146
+ asm_table_name: str
147
+ name of the "asis" ASDM table to look for.
148
+ optional_fields: dict[str, str]
149
+ dictionary of field/column names (as {MSv4_name: MSv2/ASDM_name}
150
+
151
+ Returns:
152
+ --------
153
+ table_info: dict
154
+ observation description (partial, some fields) ready for the MSv4
155
+ observation_info attr
156
+ """
157
+ asdm_asis_xds = None
158
+
159
+ try:
160
+ asdm_asis_xds = load_generic_table(in_file, asdm_table_name)
161
+ except ValueError as exc:
162
+ logger.debug(
163
+ f"Did not find the {asdm_table_name} subtable, not loading optional fields in observation_info. Exception: {exc}"
164
+ )
165
+
166
+ if asdm_asis_xds:
167
+ table_info = extract_optional_fields_asdm_asis_table(
168
+ asdm_asis_xds, optional_fields
169
+ )
170
+ else:
171
+ table_info = {}
172
+
173
+ return table_info
174
+
175
+
176
+ def extract_optional_fields_asdm_asis_table(
177
+ asdm_asis_xds: xr.Dataset, optional_fields: dict[str, str]
178
+ ) -> dict[str, str]:
179
+ """
180
+ Get the (optional) fields of the observation_info that come from "asis" ASDM
181
+ tables like the ASDM_EXECBLOCK and ASDM_SBSUMMARY subtables.
182
+
183
+ Note this does not parse strings like 'session_reference':
184
+ '<EntityRef entityId="uid://A001/X133d/X169f" partId="X00000000" entityTypeName="OUSStatus"'.
185
+ If only the UID is required that needs to be filtered afterwards.
186
+
187
+ Parameters
188
+ ----------
189
+ asdm_asis_xds: xr.Dataset
190
+ raw xds read from subtable ASDM_*
191
+
192
+ Returns:
193
+ --------
194
+ info: dict
195
+ info dict with description from an ASDM_* subtable, ready
196
+ for the MSv4 observation_info dict
197
+ """
198
+
199
+ table_info = {}
200
+ for field_msv4, col_msv2 in optional_fields.items():
201
+ if col_msv2 in asdm_asis_xds.data_vars:
202
+ msv2_value = asdm_asis_xds[col_msv2].values[0]
203
+ if isinstance(msv2_value, np.ndarray):
204
+ table_info[field_msv4] = ",".join([log for log in msv2_value])
205
+ else:
206
+ table_info[field_msv4] = msv2_value
207
+
208
+ return table_info
209
+
210
+
211
+ def try_find_uids_from_observation_schedule(
212
+ generic_observation_xds: xr.Dataset, observation_info: dict
213
+ ) -> dict[str, str]:
214
+ """
215
+ This function tries to parse the execution_block_UID and scheduling_block_UID
216
+ from the SCHEDULE column of the OBSERVATION subtable. If found, and they
217
+ could not alreadly be loaded from the ASDM_* subtables, adds them to the
218
+ output observation_info dict.
219
+
220
+ Sometimes, even if the ASDM_EXECBLOCK and ASDM_SBSUMMARY are not available to
221
+ load various ASDM UIDs, we can still find a couple of them in the
222
+ OBSERVATION/SCHEDULE column (when the MS is imported from an ASDM, by
223
+ importasdm). The SCHEDULE column can have values like:
224
+
225
+ '[SchedulingBlock uid://A001/X3571/X122, ExecBlock uid://A002/X1003af4/X75a3]'
226
+
227
+ Parameters
228
+ ----------
229
+ generic_observation_xds: xr.Dataset
230
+ generic observation dataset from the OBSERVATION subtable
231
+ observation_info: dict
232
+ an observation_info being populated
233
+
234
+ Returns:
235
+ --------
236
+ info: dict
237
+ info dict with possibly additional UIDs found in the OBSERVATION
238
+ subtable
239
+ """
240
+
241
+ out_info = dict(observation_info)
242
+
243
+ if "SCHEDULE" in generic_observation_xds.data_vars:
244
+ schedule = generic_observation_xds["SCHEDULE"].values[0]
245
+ if isinstance(schedule, np.ndarray) and 2 == len(schedule):
246
+ if "scheduling_block_UID" not in observation_info:
247
+ scheduling_uid_match = re.search(
248
+ "SchedulingBlock ([\\w/:]+)", schedule[0]
249
+ )
250
+ if scheduling_uid_match:
251
+ out_info["scheduling_block_UID"] = scheduling_uid_match.group(1)
252
+ if "execution_block_UID" not in observation_info:
253
+ execution_uid_match = re.search("ExecBlock ([\\w/:]+)", schedule[1])
254
+ if execution_uid_match:
255
+ out_info["execution_block_UID"] = execution_uid_match.group(1)
256
+
257
+ return out_info
258
+
259
+
260
+ def replace_entity_ids(observation_info: dict) -> dict[str, list[str] | str]:
261
+ """
262
+ For several fields of the input dictionary, which are known to be of "UID" type,
263
+ replace their lengthy XML string with the UID value contained in it. For example, from
264
+ '<EntityRef entityId="uid://A001/X133d/X169f" partId="X00000000" entityTypeName="OUSStatus">'
265
+ it takes 'uid://A001/X133d/X169f'.
266
+
267
+ The UID values are written in the MSv2 "asis" ASDM_* subtables imported from ASDM tables
268
+ as the full string of the EntityRef XML elements. This function takes only the entityId
269
+ ("uid://A00...") from the EntityRef.
270
+
271
+
272
+ Parameters
273
+ ----------
274
+ observation_info: dict
275
+ info dict where some UID fields (as xml element strings) need to be replaced/simplified
276
+
277
+ Returns:
278
+ --------
279
+ info: dict
280
+ dictionary as the input where the UIDs have been replaced by their entityId (uid://A00...)
281
+
282
+ """
283
+ out_info = dict(observation_info)
284
+
285
+ entity_refs = [
286
+ "execution_block_UID",
287
+ "session_reference_UID",
288
+ "scheduling_block_UID",
289
+ ]
290
+ for ref_name in entity_refs:
291
+ if ref_name in observation_info:
292
+ out_info[ref_name] = search_entity_id(observation_info[ref_name])
293
+
294
+ return out_info
295
+
296
+
297
+ def search_entity_id(entity_ref_xml: str) -> str:
298
+ """
299
+ Given an EntityRef XML string from an ASDM, like the following
300
+ examples:
301
+
302
+ - example sbSummaryID:
303
+ '<EntityRef entityId="uid://A001/X133d/X169a" partId="X00000000" entityTypeName="SchedBlock" documentVersion="1"/>'
304
+
305
+ - example sessionReferenceUID:
306
+ '<EntityRef entityId="uid://A001/X133d/X169f" partId="X00000000" entityTypeName="OUSStatus"'
307
+
308
+ this funcion takes the "uid://..." value of the entityId.
309
+
310
+ Parameters
311
+ ----------
312
+ entity_ref_xml: str
313
+ An EntityRef from an ASDM table (usually ExecBlock or
314
+ SBSUMMARY) as found in columns like or execBlockUID,
315
+ sessionReference or sbSummaryUID.
316
+
317
+ Returns:
318
+ --------
319
+ str
320
+ the entityId string value of the EntityRef received, or
321
+ the same string as received if no entityId could be found.
322
+ """
323
+ uid_match = re.search('entityId="([\\w/:]+)"', entity_ref_xml)
324
+ entity_id = uid_match.group(1) if uid_match else entity_ref_xml
325
+ return entity_id
326
+
327
+
328
+ def create_processor_info(in_file: str, processor_id: int) -> dict[str, str]:
329
+ """
330
+ Makes a dict with the processor info extracted from the PROCESSOR subtable.
331
+
332
+ Parameters
333
+ ----------
334
+ in_file: str
335
+ path to an input MSv2
336
+ processor_id: int
337
+ processor ID for one MSv4 dataset
338
+
339
+ Returns:
340
+ --------
341
+ processor_info: dict
342
+ processor description ready for the MSv4 processor_info attr
343
+ """
344
+
345
+ generic_processor_xds = load_generic_table(
346
+ in_file,
347
+ "PROCESSOR",
348
+ rename_ids=subt_rename_ids["PROCESSOR"],
349
+ taql_where=f" where ROWID() = {processor_id}",
350
+ )
351
+
352
+ # Many telescopes (ASKAP, MeerKAT, SKA-Mid, VLBI, VLBA, ngEHT) seem to
353
+ # produce an empty PROCESSOR subtable
354
+ if len(generic_processor_xds.data_vars) <= 0:
355
+ processor_info = {"type": "", "sub_type": ""}
356
+ else:
357
+ processor_info = {
358
+ "type": generic_processor_xds["TYPE"].values[0],
359
+ "sub_type": generic_processor_xds["SUB_TYPE"].values[0],
360
+ }
361
+
362
+ return processor_info