xradio 0.0.60__py3-none-any.whl → 1.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -73,7 +73,9 @@ def to_np_array(x):
73
73
  return np.array([x])
74
74
 
75
75
 
76
- def check_if_consistent(array: np.ndarray, array_name: str) -> np.ndarray:
76
+ def check_if_consistent(
77
+ array: np.ndarray, array_name: str, err_msg: str = ""
78
+ ) -> np.ndarray:
77
79
  """_summary_
78
80
 
79
81
  Parameters
@@ -92,7 +94,7 @@ def check_if_consistent(array: np.ndarray, array_name: str) -> np.ndarray:
92
94
  return array.item()
93
95
 
94
96
  array_unique = unique_1d(array)
95
- assert len(array_unique) == 1, array_name + " is not consistent."
97
+ assert len(array_unique) == 1, array_name + " is not consistent." + err_msg
96
98
  return array_unique[0]
97
99
 
98
100
 
@@ -5,6 +5,7 @@ import dask.array as da
5
5
  import numpy as np
6
6
  import xarray as xr
7
7
  from astropy.coordinates import Angle
8
+ from astropy import units as apu
8
9
 
9
10
  try:
10
11
  from casacore import tables
@@ -93,10 +94,9 @@ def _compute_spectral_dict(xds: xr.Dataset) -> dict:
93
94
  spec["system"] = xds.frequency.attrs["reference_frequency"]["attrs"][
94
95
  "observer"
95
96
  ].upper()
96
- u = xds.frequency.attrs["reference_frequency"]["attrs"]["units"]
97
- spec["unit"] = u
97
+ spec["unit"] = xds.frequency.attrs["reference_frequency"]["attrs"]["units"]
98
98
  spec["velType"] = _doppler_types.index(xds.velocity.attrs["doppler_type"])
99
- u = xds.velocity.attrs["units"]
99
+ # u = xds.velocity.attrs["units"]
100
100
  spec["version"] = 2
101
101
  # vel unit is a list[str] in the xds but needs to be a str in the casa image
102
102
  spec["velUnit"] = xds.velocity.attrs["units"]
@@ -106,7 +106,14 @@ def _compute_spectral_dict(xds: xr.Dataset) -> dict:
106
106
  wcs["ctype"] = "FREQ"
107
107
  wcs["pc"] = 1.0
108
108
  wcs["crval"] = float(xds.frequency.attrs["reference_frequency"]["data"])
109
- wcs["cdelt"] = float(xds.frequency.values[1] - xds.frequency.values[0])
109
+ if len(xds.frequency.values) > 1:
110
+ wcs["cdelt"] = float(xds.frequency.values[1] - xds.frequency.values[0])
111
+ else:
112
+ # TODO this is just a temporary fix, likely schema will be updated to include chan widths
113
+ myu = apu.Unit(spec["unit"])
114
+ mydel = 1.8 * apu.GHz
115
+ my_del_converted = mydel.to(spec["unit"])
116
+ wcs["cdelt"] = my_del_converted.value
110
117
  wcs["crpix"] = float((wcs["crval"] - xds.frequency.values[0]) / wcs["cdelt"])
111
118
  spec["wcs"] = wcs
112
119
  return spec
xradio/image/image.py CHANGED
@@ -14,8 +14,6 @@ import xarray as xr
14
14
 
15
15
  # from .._utils.zarr.common import _load_no_dask_zarr
16
16
 
17
- from ._util.casacore import _load_casa_image_block, _xds_to_casa_image
18
-
19
17
  # from ._util.fits import _read_fits_image
20
18
  from ._util.image_factory import (
21
19
  _make_empty_aperture_image,
@@ -201,6 +199,8 @@ def load_image(infile: str, block_des: dict = None, do_sky_coords=True) -> xr.Da
201
199
  # comment next line when done debugging
202
200
  # return _load_casa_image_block(infile, selection, do_sky_coords)
203
201
  try:
202
+ from ._util.casacore import _load_casa_image_block
203
+
204
204
  return _load_casa_image_block(infile, selection, do_sky_coords)
205
205
  except Exception as e:
206
206
  emsgs.append(f"image format appears not to be casacore: {e.args}")
@@ -256,6 +256,8 @@ def write_image(
256
256
  )
257
257
  my_format = out_format.lower()
258
258
  if my_format == "casa":
259
+ from ._util.casacore import _xds_to_casa_image
260
+
259
261
  _xds_to_casa_image(xds, imagename)
260
262
  elif my_format == "zarr":
261
263
  _xds_to_zarr(xds, imagename)
@@ -440,6 +440,7 @@ def create_coordinates(
440
440
  baseline_ant1_id: np.ndarray,
441
441
  baseline_ant2_id: np.ndarray,
442
442
  scan_id: np.ndarray,
443
+ scan_intents: list[str],
443
444
  ) -> tuple[xr.Dataset, int]:
444
445
  """
445
446
  Creates coordinates of a VisibilityXds/SpectrumXds and assigns them to the input
@@ -464,6 +465,9 @@ def create_coordinates(
464
465
  ANTENNA2 ids to be used as coord
465
466
  scan_id :
466
467
  SCAN_ID values from MSv2, for the scan_name coord
468
+ scan_intents :
469
+ list of SCAN_INTENT values from MSv2, for the scan_intents attribute of the
470
+ scan_name coord
467
471
 
468
472
  Returns
469
473
  -------
@@ -509,6 +513,9 @@ def create_coordinates(
509
513
 
510
514
  xds = xds.assign_coords(coords)
511
515
 
516
+ ##### Add scan intents attribute to scan_name coord #####
517
+ xds.scan_name.attrs["scan_intents"] = scan_intents
518
+
512
519
  ###### Create Frequency Coordinate ######
513
520
  freq_column_description = spectral_window_xds.attrs["other"]["msv2"]["ctds_attrs"][
514
521
  "column_descriptions"
@@ -528,7 +535,7 @@ def create_coordinates(
528
535
  spw_name = spw_name + "_" + str(spectral_window_id)
529
536
 
530
537
  xds.frequency.attrs["spectral_window_name"] = spw_name
531
- xds.frequency.attrs["spectral_window_intent"] = "UNSPECIFIED"
538
+ xds.frequency.attrs["spectral_window_intents"] = ["UNSPECIFIED"]
532
539
  msv4_measure = column_description_casacore_to_msv4_measure(
533
540
  freq_column_description["REF_FREQUENCY"],
534
541
  ref_code=spectral_window_xds["MEAS_FREQ_REF"].data,
@@ -769,15 +776,17 @@ def create_taql_query_where(partition_info: dict):
769
776
  taql_where = "WHERE "
770
777
  for col_name in main_par_table_cols:
771
778
  if col_name in partition_info:
772
- taql_where = (
773
- taql_where
774
- + f"({col_name} IN [{','.join(map(str, partition_info[col_name]))}]) AND"
775
- )
776
- if col_name == "ANTENNA1":
779
+
780
+ if partition_info[col_name][0] is not None:
777
781
  taql_where = (
778
782
  taql_where
779
- + f"(ANTENNA2 IN [{','.join(map(str, partition_info[col_name]))}]) AND"
783
+ + f"({col_name} IN [{','.join(map(str, partition_info[col_name]))}]) AND"
780
784
  )
785
+ if col_name == "ANTENNA1":
786
+ taql_where = (
787
+ taql_where
788
+ + f"(ANTENNA2 IN [{','.join(map(str, partition_info[col_name]))}]) AND"
789
+ )
781
790
  taql_where = taql_where[:-3]
782
791
 
783
792
  return taql_where
@@ -933,6 +942,7 @@ def estimate_memory_for_partition(in_file: str, partition: dict) -> float:
933
942
 
934
943
  taql_partition = create_taql_query_where(partition)
935
944
  taql_main = f"select * from $mtable {taql_partition}"
945
+
936
946
  with open_table_ro(in_file) as mtable:
937
947
  with open_query(mtable, taql_main) as tb_tool:
938
948
  # Do not feel tempted to rely on nrows. nrows tends to underestimate memory when baselines are missing.
@@ -1017,7 +1027,7 @@ def convert_and_write_partition(
1017
1027
  _description_
1018
1028
  out_file : str
1019
1029
  _description_
1020
- intents : str
1030
+ scan_intents : str
1021
1031
  _description_
1022
1032
  ddi : int, optional
1023
1033
  _description_, by default 0
@@ -1061,7 +1071,7 @@ def convert_and_write_partition(
1061
1071
  taql_where = create_taql_query_where(partition_info)
1062
1072
  table_manager = TableManager(in_file, taql_where)
1063
1073
  ddi = partition_info["DATA_DESC_ID"][0]
1064
- intents = str(partition_info["OBS_MODE"][0])
1074
+ scan_intents = str(partition_info["OBS_MODE"][0]).split(",")
1065
1075
 
1066
1076
  start = time.time()
1067
1077
  with table_manager.get_table() as tb_tool:
@@ -1085,19 +1095,21 @@ def convert_and_write_partition(
1085
1095
  tb_tool.getcol("OBSERVATION_ID"), "OBSERVATION_ID"
1086
1096
  )
1087
1097
 
1088
- def get_observation_info(in_file, observation_id, intents):
1098
+ def get_observation_info(in_file, observation_id, scan_intents):
1089
1099
  generic_observation_xds = load_generic_table(
1090
1100
  in_file,
1091
1101
  "OBSERVATION",
1092
1102
  taql_where=f" where (ROWID() IN [{str(observation_id)}])",
1093
1103
  )
1094
1104
 
1095
- if intents == "None":
1096
- intents = "obs_" + str(observation_id)
1105
+ if scan_intents == "None":
1106
+ scan_intents = "obs_" + str(observation_id)
1097
1107
 
1098
- return generic_observation_xds["TELESCOPE_NAME"].values[0], intents
1108
+ return generic_observation_xds["TELESCOPE_NAME"].values[0], scan_intents
1099
1109
 
1100
- telescope_name, intents = get_observation_info(in_file, observation_id, intents)
1110
+ telescope_name, scan_intents = get_observation_info(
1111
+ in_file, observation_id, scan_intents
1112
+ )
1101
1113
 
1102
1114
  start = time.time()
1103
1115
  xds = xr.Dataset(
@@ -1139,6 +1151,7 @@ def convert_and_write_partition(
1139
1151
  baseline_ant1_id,
1140
1152
  baseline_ant2_id,
1141
1153
  scan_id,
1154
+ scan_intents,
1142
1155
  )
1143
1156
  logger.debug("Time create coordinates " + str(time.time() - start))
1144
1157
 
@@ -1334,7 +1347,6 @@ def convert_and_write_partition(
1334
1347
 
1335
1348
  partition_info_misc_fields = {
1336
1349
  "scan_name": xds.coords["scan_name"].data,
1337
- "intents": intents,
1338
1350
  "taql_where": taql_where,
1339
1351
  }
1340
1352
  if with_antenna_partitioning:
@@ -819,7 +819,9 @@ def extract_field_info_and_check_ephemeris(
819
819
  # Need to check if ephemeris_id is present and if ephemeris table is present.
820
820
  if "EPHEMERIS_ID" in field_xds:
821
821
  # Note: this assumes partition_scheme includes "FIELD_ID"
822
- ephemeris_id = check_if_consistent(field_xds.EPHEMERIS_ID, "EPHEMERIS_ID")
822
+ ephemeris_id = check_if_consistent(
823
+ field_xds.EPHEMERIS_ID, "EPHEMERIS_ID", taql_where
824
+ )
823
825
 
824
826
  if ephemeris_id > -1:
825
827
  files = os.listdir(os.path.join(in_file, "FIELD"))
@@ -1,3 +1,5 @@
1
+ import re
2
+
1
3
  import numpy as np
2
4
  import xarray as xr
3
5
 
@@ -19,7 +21,7 @@ def create_info_dicts(
19
21
  field_and_source_xds: xr.Dataset,
20
22
  partition_info_misc_fields: dict,
21
23
  tb_tool: tables.table,
22
- ) -> dict:
24
+ ) -> dict[str, dict]:
23
25
  """
24
26
  For an MSv4, produces several info dicts (partition_info, processor_info,
25
27
  observation_info). The info dicts are returned in a dictionary that
@@ -49,36 +51,13 @@ def create_info_dicts(
49
51
 
50
52
  info_dicts = {}
51
53
 
52
- # if "line_name" in field_and_source_xds.coords:
53
- # line_name = to_list(unique_1d(np.ravel(field_and_source_xds.line_name.values)))
54
- # else:
55
- # line_name = []
56
-
57
- # info_dicts["partition_info"] = {
58
- # # "spectral_window_id": xds.frequency.attrs["spectral_window_id"],
59
- # "spectral_window_name": xds.frequency.attrs["spectral_window_name"],
60
- # # "field_id": to_list(unique_1d(field_id)),
61
- # "field_name": to_list(np.unique(field_and_source_xds.field_name.values)),
62
- # "polarization_setup": to_list(xds.polarization.values),
63
- # "scan_name": to_list(np.unique(partition_info_misc_fields["scan_name"])),
64
- # "source_name": to_list(np.unique(field_and_source_xds.source_name.values)),
65
- # # "source_id": to_list(unique_1d(source_id)),
66
- # "intents": partition_info_misc_fields["intents"].split(","),
67
- # "taql": partition_info_misc_fields["taql_where"],
68
- # "line_name": line_name,
69
- # }
70
- # if "antenna_name" in partition_info_misc_fields:
71
- # info_dicts["partition_info"]["antenna_name"] = partition_info_misc_fields[
72
- # "antenna_name"
73
- # ]
74
-
75
54
  observation_id = check_if_consistent(
76
55
  tb_tool.getcol("OBSERVATION_ID"), "OBSERVATION_ID"
77
56
  )
78
57
  info_dicts["observation_info"] = create_observation_info(in_file, observation_id)
79
- info_dicts["observation_info"]["intents"] = partition_info_misc_fields[
80
- "intents"
81
- ].split(",")
58
+ # info_dicts["observation_info"]["intents"] = partition_info_misc_fields[
59
+ # "intents"
60
+ # ].split(",")
82
61
 
83
62
  processor_id = check_if_consistent(tb_tool.getcol("PROCESSOR_ID"), "PROCESSOR_ID")
84
63
  info_dicts["processor_info"] = create_processor_info(in_file, processor_id)
@@ -86,9 +65,13 @@ def create_info_dicts(
86
65
  return info_dicts
87
66
 
88
67
 
89
- def create_observation_info(in_file: str, observation_id: int):
68
+ def create_observation_info(
69
+ in_file: str, observation_id: int
70
+ ) -> dict[str, list[str] | str]:
90
71
  """
91
72
  Makes a dict with the observation info extracted from the PROCESSOR subtable.
73
+ When available, it also takes metadata from the ASDM tables (imported 'asis')
74
+ ASDM_EXECBLOCK and ASDM_SBSUMMARY
92
75
 
93
76
  Parameters
94
77
  ----------
@@ -111,74 +94,238 @@ def create_observation_info(in_file: str, observation_id: int):
111
94
  )
112
95
 
113
96
  observation_info = {
114
- "observer": [generic_observation_xds["OBSERVER"].values[0]],
97
+ "observer": [str(generic_observation_xds["OBSERVER"].values[0])],
115
98
  "release_date": str(
116
99
  convert_casacore_time(generic_observation_xds["RELEASE_DATE"].values)[0]
117
100
  ),
118
101
  }
119
102
  # could just assume lower:upper case but keeping explicit dict for now
120
- mandatory_fields = {"project": "PROJECT"}
121
- for field_msv4, row_msv2 in mandatory_fields.items():
122
- observation_info[field_msv4] = generic_observation_xds[row_msv2].values[0]
103
+ mandatory_fields = {"project_UID": "PROJECT", "observing_log": "LOG"}
104
+ for field_msv4, col_msv2 in mandatory_fields.items():
105
+ observation_info[field_msv4] = str(generic_observation_xds[col_msv2].values[0])
106
+
107
+ execblock_optional_fields = {
108
+ "execution_block_UID": "execBlockUID",
109
+ "session_reference_UID": "sessionReference",
110
+ "observing_log": "observingLog",
111
+ }
112
+ execblock_info = try_optional_asdm_asis_table_info(
113
+ in_file, "ASDM_EXECBLOCK", execblock_optional_fields
114
+ )
115
+ observation_info.update(execblock_info)
116
+
117
+ sbsummary_optional_fields = {
118
+ "scheduling_block_UID": "sbSummaryUID",
119
+ }
120
+ sbsummary_info = try_optional_asdm_asis_table_info(
121
+ in_file, "ASDM_SBSUMMARY", sbsummary_optional_fields
122
+ )
123
+ observation_info.update(sbsummary_info)
124
+
125
+ observation_info = replace_entity_ids(observation_info)
126
+
127
+ observation_info = try_find_uids_from_observation_schedule(
128
+ generic_observation_xds, observation_info
129
+ )
130
+
131
+ return observation_info
132
+
133
+
134
+ def try_optional_asdm_asis_table_info(
135
+ in_file: str, asdm_table_name: str, optional_fields: dict[str, str]
136
+ ) -> dict[str, str]:
137
+ """
138
+ Tries to find an optional ASDM_* subtable (ASDM_EXECBLOCK, ASDM_SBSUMMARY, etc.),
139
+ and if available, gets the optional fields requested into a metadata dict. That
140
+ dict can be used to populate the observation_info dict.
141
+
142
+ Parameters
143
+ ----------
144
+ in_file: str
145
+ path to an input MSv2
146
+ asm_table_name: str
147
+ name of the "asis" ASDM table to look for.
148
+ optional_fields: dict[str, str]
149
+ dictionary of field/column names (as {MSv4_name: MSv2/ASDM_name}
150
+
151
+ Returns:
152
+ --------
153
+ table_info: dict
154
+ observation description (partial, some fields) ready for the MSv4
155
+ observation_info attr
156
+ """
157
+ asdm_asis_xds = None
123
158
 
124
- exec_block_xds = None
125
159
  try:
126
- exec_block_xds = load_generic_table(in_file, "ASDM_EXECBLOCK")
160
+ asdm_asis_xds = load_generic_table(in_file, asdm_table_name)
127
161
  except ValueError as exc:
128
162
  logger.debug(
129
- f"Did not find the ASDM_EXECBLOCK subtable, not loading optional fields in observation_info. Exception: {exc}"
163
+ f"Did not find the {asdm_table_name} subtable, not loading optional fields in observation_info. Exception: {exc}"
130
164
  )
131
- if exec_block_xds:
132
- exec_block_info = extract_exec_block_info(exec_block_xds)
133
- observation_info.update(exec_block_info)
134
165
 
135
- return observation_info
166
+ if asdm_asis_xds:
167
+ table_info = extract_optional_fields_asdm_asis_table(
168
+ asdm_asis_xds, optional_fields
169
+ )
170
+ else:
171
+ table_info = {}
172
+
173
+ return table_info
136
174
 
137
175
 
138
- def extract_exec_block_info(exec_block_xds: xr.Dataset) -> dict:
176
+ def extract_optional_fields_asdm_asis_table(
177
+ asdm_asis_xds: xr.Dataset, optional_fields: dict[str, str]
178
+ ) -> dict[str, str]:
139
179
  """
140
- Get the (optional) fields of the observation_info that come from the
141
- ASDM_EXECBLOCK subtable.
180
+ Get the (optional) fields of the observation_info that come from "asis" ASDM
181
+ tables like the ASDM_EXECBLOCK and ASDM_SBSUMMARY subtables.
142
182
 
143
183
  Note this does not parse strings like 'session_reference':
144
- '<EntityRef entityId="uid://A001/X133d/X169f" partId="X00000000" entityTypeName="OUSStatus"'
145
- We might want to simplify that to 'uid://A001/X133d/X169f', but keeping the
146
- full string for now, as it has additional information such as the type.
184
+ '<EntityRef entityId="uid://A001/X133d/X169f" partId="X00000000" entityTypeName="OUSStatus"'.
185
+ If only the UID is required that needs to be filtered afterwards.
147
186
 
148
187
  Parameters
149
188
  ----------
150
- exec_block_xds: xr.Dataset
151
- raw xds read from subtable ASDM_EXECBLOCK
189
+ asdm_asis_xds: xr.Dataset
190
+ raw xds read from subtable ASDM_*
152
191
 
153
192
  Returns:
154
193
  --------
155
- exec_block_info: dict
156
- Execution block description ready for the MSv4 observation_info dict
194
+ info: dict
195
+ info dict with description from an ASDM_* subtable, ready
196
+ for the MSv4 observation_info dict
157
197
  """
158
198
 
159
- optional_fields = {
160
- "execution_block_id": "execBlockId",
161
- "execution_block_number": "execBlockNum",
162
- "execution_block_UID": "execBlockUID",
163
- "session_reference": "sessionReference",
164
- "observing_script": "observingScript",
165
- "observing_script_UID": "observingScriptUID",
166
- "observing_log": "observingLog",
167
- }
168
-
169
- exec_block_info = {}
170
- for field_msv4, row_msv2 in optional_fields.items():
171
- if row_msv2 in exec_block_xds.data_vars:
172
- msv2_value = exec_block_xds[row_msv2].values[0]
199
+ table_info = {}
200
+ for field_msv4, col_msv2 in optional_fields.items():
201
+ if col_msv2 in asdm_asis_xds.data_vars:
202
+ msv2_value = asdm_asis_xds[col_msv2].values[0]
173
203
  if isinstance(msv2_value, np.ndarray):
174
- exec_block_info[field_msv4] = ",".join([log for log in msv2_value])
204
+ table_info[field_msv4] = ",".join([log for log in msv2_value])
175
205
  else:
176
- exec_block_info[field_msv4] = msv2_value
206
+ table_info[field_msv4] = msv2_value
207
+
208
+ return table_info
209
+
210
+
211
+ def try_find_uids_from_observation_schedule(
212
+ generic_observation_xds: xr.Dataset, observation_info: dict
213
+ ) -> dict[str, str]:
214
+ """
215
+ This function tries to parse the execution_block_UID and scheduling_block_UID
216
+ from the SCHEDULE column of the OBSERVATION subtable. If found, and they
217
+ could not alreadly be loaded from the ASDM_* subtables, adds them to the
218
+ output observation_info dict.
219
+
220
+ Sometimes, even if the ASDM_EXECBLOCK and ASDM_SBSUMMARY are not available to
221
+ load various ASDM UIDs, we can still find a couple of them in the
222
+ OBSERVATION/SCHEDULE column (when the MS is imported from an ASDM, by
223
+ importasdm). The SCHEDULE column can have values like:
177
224
 
178
- return exec_block_info
225
+ '[SchedulingBlock uid://A001/X3571/X122, ExecBlock uid://A002/X1003af4/X75a3]'
226
+
227
+ Parameters
228
+ ----------
229
+ generic_observation_xds: xr.Dataset
230
+ generic observation dataset from the OBSERVATION subtable
231
+ observation_info: dict
232
+ an observation_info being populated
233
+
234
+ Returns:
235
+ --------
236
+ info: dict
237
+ info dict with possibly additional UIDs found in the OBSERVATION
238
+ subtable
239
+ """
240
+
241
+ out_info = dict(observation_info)
242
+
243
+ if "SCHEDULE" in generic_observation_xds.data_vars:
244
+ schedule = generic_observation_xds["SCHEDULE"].values[0]
245
+ if isinstance(schedule, np.ndarray) and 2 == len(schedule):
246
+ if "scheduling_block_UID" not in observation_info:
247
+ scheduling_uid_match = re.search(
248
+ "SchedulingBlock ([\\w/:]+)", schedule[0]
249
+ )
250
+ if scheduling_uid_match:
251
+ out_info["scheduling_block_UID"] = scheduling_uid_match.group(1)
252
+ if "execution_block_UID" not in observation_info:
253
+ execution_uid_match = re.search("ExecBlock ([\\w/:]+)", schedule[1])
254
+ if execution_uid_match:
255
+ out_info["execution_block_UID"] = execution_uid_match.group(1)
256
+
257
+ return out_info
258
+
259
+
260
+ def replace_entity_ids(observation_info: dict) -> dict[str, list[str] | str]:
261
+ """
262
+ For several fields of the input dictionary, which are known to be of "UID" type,
263
+ replace their lengthy XML string with the UID value contained in it. For example, from
264
+ '<EntityRef entityId="uid://A001/X133d/X169f" partId="X00000000" entityTypeName="OUSStatus">'
265
+ it takes 'uid://A001/X133d/X169f'.
266
+
267
+ The UID values are written in the MSv2 "asis" ASDM_* subtables imported from ASDM tables
268
+ as the full string of the EntityRef XML elements. This function takes only the entityId
269
+ ("uid://A00...") from the EntityRef.
270
+
271
+
272
+ Parameters
273
+ ----------
274
+ observation_info: dict
275
+ info dict where some UID fields (as xml element strings) need to be replaced/simplified
276
+
277
+ Returns:
278
+ --------
279
+ info: dict
280
+ dictionary as the input where the UIDs have been replaced by their entityId (uid://A00...)
281
+
282
+ """
283
+ out_info = dict(observation_info)
284
+
285
+ entity_refs = [
286
+ "execution_block_UID",
287
+ "session_reference_UID",
288
+ "scheduling_block_UID",
289
+ ]
290
+ for ref_name in entity_refs:
291
+ if ref_name in observation_info:
292
+ out_info[ref_name] = search_entity_id(observation_info[ref_name])
293
+
294
+ return out_info
295
+
296
+
297
+ def search_entity_id(entity_ref_xml: str) -> str:
298
+ """
299
+ Given an EntityRef XML string from an ASDM, like the following
300
+ examples:
301
+
302
+ - example sbSummaryID:
303
+ '<EntityRef entityId="uid://A001/X133d/X169a" partId="X00000000" entityTypeName="SchedBlock" documentVersion="1"/>'
304
+
305
+ - example sessionReferenceUID:
306
+ '<EntityRef entityId="uid://A001/X133d/X169f" partId="X00000000" entityTypeName="OUSStatus"'
307
+
308
+ this funcion takes the "uid://..." value of the entityId.
309
+
310
+ Parameters
311
+ ----------
312
+ entity_ref_xml: str
313
+ An EntityRef from an ASDM table (usually ExecBlock or
314
+ SBSUMMARY) as found in columns like or execBlockUID,
315
+ sessionReference or sbSummaryUID.
316
+
317
+ Returns:
318
+ --------
319
+ str
320
+ the entityId string value of the EntityRef received, or
321
+ the same string as received if no entityId could be found.
322
+ """
323
+ uid_match = re.search('entityId="([\\w/:]+)"', entity_ref_xml)
324
+ entity_id = uid_match.group(1) if uid_match else entity_ref_xml
325
+ return entity_id
179
326
 
180
327
 
181
- def create_processor_info(in_file: str, processor_id: int):
328
+ def create_processor_info(in_file: str, processor_id: int) -> dict[str, str]:
182
329
  """
183
330
  Makes a dict with the processor info extracted from the PROCESSOR subtable.
184
331