xradio 0.0.40__py3-none-any.whl → 0.0.42__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. xradio/_utils/coord_math.py +100 -0
  2. xradio/_utils/list_and_array.py +49 -4
  3. xradio/_utils/schema.py +36 -16
  4. xradio/image/_util/_casacore/xds_from_casacore.py +5 -5
  5. xradio/image/_util/_casacore/xds_to_casacore.py +12 -11
  6. xradio/image/_util/_fits/xds_from_fits.py +18 -17
  7. xradio/image/_util/_zarr/zarr_low_level.py +29 -12
  8. xradio/image/_util/common.py +1 -1
  9. xradio/image/_util/image_factory.py +1 -1
  10. xradio/measurement_set/__init__.py +18 -0
  11. xradio/measurement_set/_utils/__init__.py +5 -0
  12. xradio/{vis/_vis_utils/_ms → measurement_set/_utils/_msv2}/_tables/load_main_table.py +1 -1
  13. xradio/{vis/_vis_utils/_ms → measurement_set/_utils/_msv2}/_tables/read.py +15 -1
  14. xradio/{vis/_vis_utils/_ms → measurement_set/_utils/_msv2}/conversion.py +186 -84
  15. xradio/measurement_set/_utils/_msv2/create_antenna_xds.py +535 -0
  16. xradio/{vis/_vis_utils/_ms → measurement_set/_utils/_msv2}/create_field_and_source_xds.py +146 -58
  17. xradio/measurement_set/_utils/_msv2/msv4_info_dicts.py +203 -0
  18. xradio/measurement_set/_utils/_msv2/msv4_sub_xdss.py +550 -0
  19. xradio/{vis/_vis_utils/_ms → measurement_set/_utils/_msv2}/subtables.py +1 -1
  20. xradio/{vis/_vis_utils → measurement_set/_utils}/_utils/xds_helper.py +1 -1
  21. xradio/{vis/_vis_utils/ms.py → measurement_set/_utils/msv2.py} +4 -4
  22. xradio/{vis/_vis_utils → measurement_set/_utils}/zarr.py +3 -3
  23. xradio/{vis → measurement_set}/convert_msv2_to_processing_set.py +9 -2
  24. xradio/{vis → measurement_set}/load_processing_set.py +16 -20
  25. xradio/measurement_set/measurement_set_xds.py +83 -0
  26. xradio/{vis/read_processing_set.py → measurement_set/open_processing_set.py} +25 -34
  27. xradio/measurement_set/processing_set.py +777 -0
  28. xradio/measurement_set/schema.py +1979 -0
  29. xradio/schema/check.py +42 -22
  30. xradio/schema/dataclass.py +56 -6
  31. xradio/sphinx/__init__.py +12 -0
  32. xradio/sphinx/schema_table.py +351 -0
  33. {xradio-0.0.40.dist-info → xradio-0.0.42.dist-info}/METADATA +17 -15
  34. xradio-0.0.42.dist-info/RECORD +76 -0
  35. {xradio-0.0.40.dist-info → xradio-0.0.42.dist-info}/WHEEL +1 -1
  36. xradio/_utils/common.py +0 -101
  37. xradio/vis/__init__.py +0 -14
  38. xradio/vis/_processing_set.py +0 -302
  39. xradio/vis/_vis_utils/__init__.py +0 -5
  40. xradio/vis/_vis_utils/_ms/create_antenna_xds.py +0 -482
  41. xradio/vis/_vis_utils/_ms/msv4_infos.py +0 -0
  42. xradio/vis/_vis_utils/_ms/msv4_sub_xdss.py +0 -306
  43. xradio/vis/schema.py +0 -1102
  44. xradio-0.0.40.dist-info/RECORD +0 -73
  45. /xradio/{vis/_vis_utils/_ms → measurement_set/_utils/_msv2}/_tables/load.py +0 -0
  46. /xradio/{vis/_vis_utils/_ms → measurement_set/_utils/_msv2}/_tables/read_main_table.py +0 -0
  47. /xradio/{vis/_vis_utils/_ms → measurement_set/_utils/_msv2}/_tables/read_subtables.py +0 -0
  48. /xradio/{vis/_vis_utils/_ms → measurement_set/_utils/_msv2}/_tables/table_query.py +0 -0
  49. /xradio/{vis/_vis_utils/_ms → measurement_set/_utils/_msv2}/_tables/write.py +0 -0
  50. /xradio/{vis/_vis_utils/_ms → measurement_set/_utils/_msv2}/_tables/write_exp_api.py +0 -0
  51. /xradio/{vis/_vis_utils/_ms → measurement_set/_utils/_msv2}/chunks.py +0 -0
  52. /xradio/{vis/_vis_utils/_ms → measurement_set/_utils/_msv2}/descr.py +0 -0
  53. /xradio/{vis/_vis_utils/_ms → measurement_set/_utils/_msv2}/msv2_msv3.py +0 -0
  54. /xradio/{vis/_vis_utils/_ms → measurement_set/_utils/_msv2}/msv2_to_msv4_meta.py +0 -0
  55. /xradio/{vis/_vis_utils/_ms → measurement_set/_utils/_msv2}/optimised_functions.py +0 -0
  56. /xradio/{vis/_vis_utils/_ms → measurement_set/_utils/_msv2}/partition_queries.py +0 -0
  57. /xradio/{vis/_vis_utils/_ms → measurement_set/_utils/_msv2}/partitions.py +0 -0
  58. /xradio/{vis/_vis_utils → measurement_set/_utils}/_utils/cds.py +0 -0
  59. /xradio/{vis/_vis_utils → measurement_set/_utils}/_utils/partition_attrs.py +0 -0
  60. /xradio/{vis/_vis_utils → measurement_set/_utils}/_utils/stokes_types.py +0 -0
  61. /xradio/{vis/_vis_utils → measurement_set/_utils}/_zarr/encoding.py +0 -0
  62. /xradio/{vis/_vis_utils → measurement_set/_utils}/_zarr/read.py +0 -0
  63. /xradio/{vis/_vis_utils → measurement_set/_utils}/_zarr/write.py +0 -0
  64. {xradio-0.0.40.dist-info → xradio-0.0.42.dist-info}/LICENSE.txt +0 -0
  65. {xradio-0.0.40.dist-info → xradio-0.0.42.dist-info}/top_level.txt +0 -0
@@ -6,20 +6,27 @@ import numpy as np
6
6
  import xarray as xr
7
7
 
8
8
  import toolviper.utils.logger as logger
9
- from xradio.vis._vis_utils._ms.msv4_sub_xdss import interpolate_to_time
10
- from xradio.vis._vis_utils._ms.subtables import subt_rename_ids
11
- from xradio.vis._vis_utils._ms._tables.read import (
9
+ from xradio.measurement_set._utils._msv2.msv4_sub_xdss import interpolate_to_time
10
+ from xradio.measurement_set._utils._msv2.subtables import subt_rename_ids
11
+ from xradio.measurement_set._utils._msv2._tables.read import (
12
12
  convert_casacore_time_to_mjd,
13
13
  make_taql_where_between_min_max,
14
14
  load_generic_table,
15
15
  )
16
- from xradio._utils.common import cast_to_str, convert_to_si_units, add_position_offsets
16
+ from xradio._utils.list_and_array import cast_to_str
17
+ from xradio._utils.coord_math import (
18
+ convert_to_si_units,
19
+ add_position_offsets,
20
+ wrap_to_pi,
21
+ )
22
+
17
23
  from xradio._utils.list_and_array import (
18
24
  check_if_consistent,
19
25
  unique_1d,
20
26
  to_np_array,
21
27
  )
22
28
  from xradio._utils.schema import (
29
+ casacore_to_msv4_measure_type,
23
30
  column_description_casacore_to_msv4_measure,
24
31
  convert_generic_xds_to_xradio_schema,
25
32
  )
@@ -160,7 +167,7 @@ def extract_ephemeris_info(
160
167
 
161
168
  # Get meta data.
162
169
  ephemeris_meta = ephemeris_xds.attrs["other"]["msv2"]["ctds_attrs"]
163
- ephemris_column_description = ephemeris_xds.attrs["other"]["msv2"]["ctds_attrs"][
170
+ ephemeris_column_description = ephemeris_xds.attrs["other"]["msv2"]["ctds_attrs"][
164
171
  "column_descriptions"
165
172
  ]
166
173
 
@@ -169,33 +176,45 @@ def extract_ephemeris_info(
169
176
  ), "Only geocentric observer ephemeris are supported."
170
177
 
171
178
  if "posrefsys" in ephemeris_meta:
172
- sky_coord_frame = ephemeris_meta["posrefsys"].replace("ICRF/", "")
179
+ # Note the phase center can be given as "J2000" or "J2000.0"
180
+ ref_frame = (
181
+ ephemeris_meta["posrefsys"]
182
+ .replace("ICRF/", "", 1)
183
+ .replace("J2000.0", "J2000", 1)
184
+ )
185
+ if ref_frame in casacore_to_msv4_measure_type["direction"].get("Ref_map", {}):
186
+ ref_frame = casacore_to_msv4_measure_type["direction"]["Ref_map"][ref_frame]
187
+ else:
188
+ logger.debug(
189
+ f"Unrecognized casacore direction reference frame found in posrefsys: {ref_frame}"
190
+ )
191
+ sky_coord_frame = ref_frame.lower()
173
192
  else:
174
- sky_coord_frame = "ICRS" # We will have to just assume this.
193
+ sky_coord_frame = "icrs" # We will have to just assume this.
175
194
 
176
195
  # Find out witch keyword is used for units (UNIT/QuantumUnits)
177
- if "UNIT" in ephemris_column_description["RA"]["keywords"]:
196
+ if "UNIT" in ephemeris_column_description["RA"]["keywords"]:
178
197
  unit_keyword = "UNIT"
179
198
  else:
180
199
  unit_keyword = "QuantumUnits"
181
200
 
182
201
  temp_xds = xr.Dataset()
183
202
 
184
- # Add mandatory data: OBSERVATION_POSITION
185
- observation_position = [
203
+ # Add mandatory data: OBSERVER_POSITION
204
+ observer_position = [
186
205
  ephemeris_meta["GeoLong"],
187
206
  ephemeris_meta["GeoLat"],
188
207
  ephemeris_meta["GeoDist"],
189
208
  ]
190
- temp_xds["OBSERVATION_POSITION"] = xr.DataArray(
191
- observation_position, dims=["ellipsoid_pos_label"]
209
+ temp_xds["OBSERVER_POSITION"] = xr.DataArray(
210
+ observer_position, dims=["ellipsoid_pos_label"]
192
211
  )
193
- temp_xds["OBSERVATION_POSITION"].attrs.update(
212
+ temp_xds["OBSERVER_POSITION"].attrs.update(
194
213
  {
195
214
  "type": "location",
196
215
  "units": ["deg", "deg", "m"],
197
- "data": observation_position,
198
- "ellipsoid": "WGS84",
216
+ "data": observer_position,
217
+ "frame": "WGS84",
199
218
  "origin_object_name": "Earth",
200
219
  "coordinate_system": ephemeris_meta["obsloc"].lower(),
201
220
  }
@@ -210,13 +229,13 @@ def extract_ephemeris_info(
210
229
  ephemeris_xds["Rho"].data,
211
230
  )
212
231
  ),
213
- dims=["time_ephemeris_axis", "sky_pos_label"],
232
+ dims=["time_ephemeris", "sky_pos_label"],
214
233
  )
215
234
  # Have to use cast_to_str because the ephemeris table units are not consistently in a list or a string.
216
235
  sky_coord_units = [
217
- cast_to_str(ephemris_column_description["RA"]["keywords"][unit_keyword]),
218
- cast_to_str(ephemris_column_description["DEC"]["keywords"][unit_keyword]),
219
- cast_to_str(ephemris_column_description["Rho"]["keywords"][unit_keyword]),
236
+ cast_to_str(ephemeris_column_description["RA"]["keywords"][unit_keyword]),
237
+ cast_to_str(ephemeris_column_description["DEC"]["keywords"][unit_keyword]),
238
+ cast_to_str(ephemeris_column_description["Rho"]["keywords"][unit_keyword]),
220
239
  ]
221
240
  temp_xds["SOURCE_LOCATION"].attrs.update(
222
241
  {"type": "sky_coord", "frame": sky_coord_frame, "units": sky_coord_units}
@@ -227,16 +246,17 @@ def extract_ephemeris_info(
227
246
  # Metadata has to be fixed manually. Alternatively, issues like
228
247
  # UNIT/QuantumUnits issue could be handled in convert_generic_xds_to_xradio_schema,
229
248
  # but for now preferring not to pollute that function.
249
+ time_ephemeris_dim = ["time_ephemeris"]
230
250
  to_new_data_variables = {
231
251
  # mandatory: SOURCE_RADIAL_VELOCITY
232
- "RadVel": ["SOURCE_RADIAL_VELOCITY", ["time_ephemeris_axis"]],
252
+ "RadVel": ["SOURCE_RADIAL_VELOCITY", time_ephemeris_dim],
233
253
  # optional: data NORTH_POLE_POSITION_ANGLE and NORTH_POLE_ANGULAR_DISTANCE
234
- "NP_ang": ["NORTH_POLE_POSITION_ANGLE", ["time_ephemeris_axis"]],
235
- "NP_dist": ["NORTH_POLE_ANGULAR_DISTANCE", ["time_ephemeris_axis"]],
254
+ "NP_ang": ["NORTH_POLE_POSITION_ANGLE", time_ephemeris_dim],
255
+ "NP_dist": ["NORTH_POLE_ANGULAR_DISTANCE", time_ephemeris_dim],
236
256
  # optional: HELIOCENTRIC_RADIAL_VELOCITY
237
- "rdot": ["HELIOCENTRIC_RADIAL_VELOCITY", ["time_ephemeris_axis"]],
257
+ "rdot": ["HELIOCENTRIC_RADIAL_VELOCITY", time_ephemeris_dim],
238
258
  # optional: OBSERVER_PHASE_ANGLE
239
- "phang": ["OBSERVER_PHASE_ANGLE", ["time_ephemeris_axis"]],
259
+ "phang": ["OBSERVER_PHASE_ANGLE", time_ephemeris_dim],
240
260
  }
241
261
  convert_generic_xds_to_xradio_schema(
242
262
  ephemeris_xds, temp_xds, to_new_data_variables, {}
@@ -251,7 +271,7 @@ def extract_ephemeris_info(
251
271
  "type": "quantity",
252
272
  "units": [
253
273
  cast_to_str(
254
- ephemris_column_description[generic_var_name]["keywords"][
274
+ ephemeris_column_description[generic_var_name]["keywords"][
255
275
  unit_keyword
256
276
  ]
257
277
  )
@@ -259,8 +279,8 @@ def extract_ephemeris_info(
259
279
  }
260
280
  )
261
281
 
262
- # Add optional data: SUB_OBSERVER_POSITION and SUB_SOLAR_POSITION
263
- if "DiskLong" in ephemris_column_description:
282
+ # Add optional data: SUB_OBSERVER_DIRECTION and SUB_SOLAR_POSITION
283
+ if "DiskLong" in ephemeris_column_description:
264
284
  key_lon = "DiskLong"
265
285
  key_lat = "DiskLat"
266
286
  else:
@@ -268,7 +288,7 @@ def extract_ephemeris_info(
268
288
  key_lat = "diskLat"
269
289
 
270
290
  if key_lon in ephemeris_xds.data_vars:
271
- temp_xds["SUB_OBSERVER_POSITION"] = xr.DataArray(
291
+ temp_xds["SUB_OBSERVER_DIRECTION"] = xr.DataArray(
272
292
  np.column_stack(
273
293
  (
274
294
  ephemeris_xds[key_lon].data,
@@ -276,21 +296,21 @@ def extract_ephemeris_info(
276
296
  np.zeros(ephemeris_xds[key_lon].shape),
277
297
  )
278
298
  ),
279
- dims=["time_ephemeris_axis", "ellipsoid_pos_label"],
299
+ dims=["time_ephemeris", "ellipsoid_pos_label"],
280
300
  )
281
301
 
282
- temp_xds["SUB_OBSERVER_POSITION"].attrs.update(
302
+ temp_xds["SUB_OBSERVER_DIRECTION"].attrs.update(
283
303
  {
284
304
  "type": "location",
285
- "ellipsoid": "NA",
305
+ "frame": "Undefined",
286
306
  "origin_object_name": ephemeris_meta["NAME"],
287
307
  "coordinate_system": "planetodetic",
288
308
  "units": [
289
309
  cast_to_str(
290
- ephemris_column_description[key_lon]["keywords"][unit_keyword]
310
+ ephemeris_column_description[key_lon]["keywords"][unit_keyword]
291
311
  ),
292
312
  cast_to_str(
293
- ephemris_column_description[key_lat]["keywords"][unit_keyword]
313
+ ephemeris_column_description[key_lat]["keywords"][unit_keyword]
294
314
  ),
295
315
  "m",
296
316
  ],
@@ -306,56 +326,56 @@ def extract_ephemeris_info(
306
326
  ephemeris_xds["r"].data,
307
327
  )
308
328
  ),
309
- dims=["time_ephemeris_axis", "ellipsoid_pos_label"],
329
+ dims=["time_ephemeris", "ellipsoid_pos_label"],
310
330
  )
311
331
  temp_xds["SUB_SOLAR_POSITION"].attrs.update(
312
332
  {
313
333
  "type": "location",
314
- "ellipsoid": "NA",
334
+ "frame": "Undefined",
315
335
  "origin_object_name": "Sun",
316
336
  "coordinate_system": "planetodetic",
317
337
  "units": [
318
338
  cast_to_str(
319
- ephemris_column_description["SI_lon"]["keywords"][unit_keyword]
339
+ ephemeris_column_description["SI_lon"]["keywords"][unit_keyword]
320
340
  ),
321
341
  cast_to_str(
322
- ephemris_column_description["SI_lat"]["keywords"][unit_keyword]
342
+ ephemeris_column_description["SI_lat"]["keywords"][unit_keyword]
323
343
  ),
324
344
  cast_to_str(
325
- ephemris_column_description["r"]["keywords"][unit_keyword]
345
+ ephemeris_column_description["r"]["keywords"][unit_keyword]
326
346
  ),
327
347
  ],
328
348
  }
329
349
  )
330
350
 
331
- # We are using the "time_ephemeris_axis" label because it might not match the optional time axis of the source and field info. If ephemeris_interpolate=True then rename it to time.
351
+ # We are using the "time_ephemeris" label because it might not match the optional time axis of the source and field info. If ephemeris_interpolate=True then rename it to time.
332
352
  coords = {
333
353
  "ellipsoid_pos_label": ["lon", "lat", "dist"],
334
- "time_ephemeris_axis": ephemeris_xds["time"].data,
354
+ "time_ephemeris": ephemeris_xds["time"].data,
335
355
  "sky_pos_label": ["ra", "dec", "dist"],
336
356
  }
337
357
  temp_xds = temp_xds.assign_coords(coords)
338
358
  time_coord_attrs = {
339
359
  "type": "time",
340
360
  "units": ["s"],
341
- "scale": "UTC",
342
- "format": "UNIX",
361
+ "scale": "utc",
362
+ "format": "unix",
343
363
  }
344
- temp_xds["time_ephemeris_axis"].attrs.update(time_coord_attrs)
364
+ temp_xds["time_ephemeris"].attrs.update(time_coord_attrs)
345
365
 
346
366
  # Convert to si units and interpolate if ephemeris_interpolate=True:
347
367
  temp_xds = convert_to_si_units(temp_xds)
348
368
  temp_xds = interpolate_to_time(
349
- temp_xds, interp_time, "field_and_source_xds", time_name="time_ephemeris_axis"
369
+ temp_xds, interp_time, "field_and_source_xds", time_name="time_ephemeris"
350
370
  )
351
371
 
352
- # If we interpolate rename the time_ephemeris_axis axis to time.
372
+ # If we interpolate rename the time_ephemeris axis to time.
353
373
  if interp_time is not None:
354
- time_coord = {"time": ("time_ephemeris_axis", interp_time.data)}
374
+ time_coord = {"time": ("time_ephemeris", interp_time.data)}
355
375
  temp_xds = temp_xds.assign_coords(time_coord)
356
376
  temp_xds.coords["time"].attrs.update(time_coord_attrs)
357
- temp_xds = temp_xds.swap_dims({"time_ephemeris_axis": "time"}).drop_vars(
358
- "time_ephemeris_axis"
377
+ temp_xds = temp_xds.swap_dims({"time_ephemeris": "time"}).drop_vars(
378
+ "time_ephemeris"
359
379
  )
360
380
 
361
381
  xds = xr.merge([xds, temp_xds])
@@ -373,21 +393,28 @@ def extract_ephemeris_info(
373
393
  interp_time is not None
374
394
  ), 'ephemeris_interpolate must be True if there is ephemeris data and multiple fields (this will occur if "FIELD_ID" is not in partition_scheme).'
375
395
 
396
+ field_phase_center = wrap_to_pi(
397
+ xds[center_dv].values + xds["SOURCE_LOCATION"][:, 0:2].values
398
+ )
399
+ field_phase_center = np.column_stack(
400
+ (field_phase_center, np.zeros(xds[center_dv].values.shape[0]))
401
+ )
402
+ field_phase_center[:, -1] = (
403
+ field_phase_center[:, -1] + xds["SOURCE_LOCATION"][:, -1].values
404
+ )
405
+
376
406
  xds[center_dv] = xr.DataArray(
377
- add_position_offsets(
378
- np.column_stack(
379
- (xds[center_dv].values, np.zeros(xds[center_dv].values.shape[0]))
380
- ),
381
- xds["SOURCE_LOCATION"].values,
382
- ),
407
+ field_phase_center,
383
408
  dims=[xds["SOURCE_LOCATION"].dims[0], "sky_pos_label"],
384
409
  )
385
410
  else:
411
+ field_phase_center = (
412
+ np.append(xds[center_dv].values, 0) + xds["SOURCE_LOCATION"].values
413
+ )
414
+ field_phase_center[:, 0:2] = wrap_to_pi(field_phase_center[:, 0:2])
415
+
386
416
  xds[center_dv] = xr.DataArray(
387
- add_position_offsets(
388
- np.append(xds[center_dv].values, 0),
389
- xds["SOURCE_LOCATION"].values,
390
- ),
417
+ field_phase_center,
391
418
  dims=[xds["SOURCE_LOCATION"].dims[0], "sky_pos_label"],
392
419
  )
393
420
 
@@ -459,6 +486,65 @@ def make_line_dims_and_coords(
459
486
  return line_dims, line_coords
460
487
 
461
488
 
489
+ def pad_missing_sources(
490
+ source_xds: xr.Dataset, unique_source_ids: np.array
491
+ ) -> xr.Dataset:
492
+ """
493
+ In some MSs there can be source IDs referenced from the field subtable which do not exist in
494
+ the source table: https://github.com/casangi/xradio/issues/266
495
+
496
+ This addresses the issue by padding/filling those IDs with "Unknown"/nan values. Produces a
497
+ source_xds that, in addition to the information loaded for the non-missing source IDs, has
498
+ padding for the IDs that are missing from the input MSv2 source table.
499
+ This function does not need to do anything when unique_source_ids is a single value
500
+ (partitioning by "FIELD_ID" or othwerwise single field/source)
501
+
502
+ Parameters:
503
+ ----------
504
+ xds: xr.Dataset
505
+ source dataset to fix/pad missing sources
506
+ unique_source_ids: np.array
507
+ IDs of the sources included in this partition
508
+
509
+ Returns:
510
+ -------
511
+ filled_source_xds : xr.Dataset
512
+ source dataset with padding in the originally missing sources
513
+ """
514
+
515
+ # Only fill gaps in multi-source xdss. If single source_id, no need to
516
+ if len(unique_source_ids) <= 1:
517
+ return source_xds
518
+
519
+ missing_source_ids = [
520
+ source_id
521
+ for source_id in unique_source_ids
522
+ if source_id not in source_xds.coords["SOURCE_ID"]
523
+ ]
524
+
525
+ # would like to use the new-ish xr.pad, but it creates issues with indices/coords and is
526
+ # also not free of overheads, as it for example changes all numeric types to float64
527
+ missing_source_xds = xr.full_like(source_xds.isel(SOURCE_ID=0), fill_value=np.nan)
528
+ pad_str = "Unknown"
529
+ pad_str_type = "<U9"
530
+ for var in missing_source_xds.data_vars:
531
+ if np.issubdtype(missing_source_xds.data_vars[var].dtype, np.str_):
532
+ # Avoid truncation to length of previously loaded strings
533
+ missing_source_xds[var] = missing_source_xds[var].astype(
534
+ np.dtype(pad_str_type)
535
+ )
536
+ missing_source_xds[var] = pad_str
537
+
538
+ concat_dim = "SOURCE_ID"
539
+ xdss_to_concat = [source_xds]
540
+ for missing_id in missing_source_ids:
541
+ missing_source_xds[concat_dim] = missing_id
542
+ xdss_to_concat.append(missing_source_xds)
543
+ filled_source_xds = xr.concat(xdss_to_concat, concat_dim).sortby(concat_dim)
544
+
545
+ return filled_source_xds
546
+
547
+
462
548
  def extract_source_info(
463
549
  xds: xr.Dataset,
464
550
  path: str,
@@ -541,6 +627,8 @@ def extract_source_info(
541
627
  "column_descriptions"
542
628
  ]
543
629
 
630
+ source_xds = pad_missing_sources(source_xds, unique_source_id)
631
+
544
632
  # Get source name (the time axis is optional and will probably be required if the partition scheme does not include 'FIELD_ID' or 'SOURCE_ID'.).
545
633
  # Note again that this optional time axis has nothing to do with the original time axis in the source table that we drop.
546
634
  if len(source_id) == 1:
@@ -0,0 +1,203 @@
1
+ import numpy as np
2
+ import xarray as xr
3
+
4
+ from casacore import tables
5
+ import toolviper.utils.logger as logger
6
+
7
+ from .subtables import subt_rename_ids
8
+ from ._tables.read import load_generic_table, convert_casacore_time
9
+ from xradio._utils.list_and_array import check_if_consistent, unique_1d, to_list
10
+
11
+
12
+ def create_info_dicts(
13
+ in_file: str,
14
+ xds: xr.Dataset,
15
+ field_and_source_xds: xr.Dataset,
16
+ partition_info_misc_fields: dict,
17
+ tb_tool: tables.table,
18
+ ) -> dict:
19
+ """
20
+ For an MSv4, produces several info dicts (partition_info, processor_info,
21
+ observation_info). The info dicts are returned in a dictionary that
22
+ contains them indexed by their corresponding keys, which can be used
23
+ directly to update the attrs dict of an MSv4.
24
+
25
+ Parameters:
26
+ -----------
27
+ in_file: str
28
+ path to the input MSv2
29
+ xds: xr.Dataset
30
+ main xds of the MSv4 being converted
31
+ field_and_source_xds: xr.Dataset
32
+ field_and_source_xds subdataset
33
+ partition_info_misc_fiels: dict
34
+ dict with several scalar fields for the partition_info dict that are
35
+ collected while processing the main MSv4 table. Expected: scan_id,
36
+ obs_mode, taql_where
37
+ tb_tool: tables.table
38
+ table (query) on the main table with an MSv4 query
39
+
40
+ Returns:
41
+ --------
42
+ info_dicts: dict
43
+ info dicts ready to be used to update the attrs of the MSv4
44
+ """
45
+
46
+ if "line_name" in field_and_source_xds.coords:
47
+ line_name = to_list(unique_1d(np.ravel(field_and_source_xds.line_name.values)))
48
+ else:
49
+ line_name = []
50
+
51
+ info_dicts = {}
52
+ info_dicts["partition_info"] = {
53
+ # "spectral_window_id": xds.frequency.attrs["spectral_window_id"],
54
+ "spectral_window_name": xds.frequency.attrs["spectral_window_name"],
55
+ # "field_id": to_list(unique_1d(field_id)),
56
+ "field_name": to_list(np.unique(field_and_source_xds.field_name.values)),
57
+ "polarization_setup": to_list(xds.polarization.values),
58
+ "scan_number": to_list(np.unique(partition_info_misc_fields["scan_id"])),
59
+ "source_name": to_list(np.unique(field_and_source_xds.source_name.values)),
60
+ # "source_id": to_list(unique_1d(source_id)),
61
+ "intents": partition_info_misc_fields["intents"].split(","),
62
+ "taql": partition_info_misc_fields["taql_where"],
63
+ "line_name": line_name,
64
+ }
65
+
66
+ observation_id = check_if_consistent(
67
+ tb_tool.getcol("OBSERVATION_ID"), "OBSERVATION_ID"
68
+ )
69
+ info_dicts["observation_info"] = create_observation_info(in_file, observation_id)
70
+
71
+ processor_id = check_if_consistent(tb_tool.getcol("PROCESSOR_ID"), "PROCESSOR_ID")
72
+ info_dicts["processor_info"] = create_processor_info(in_file, processor_id)
73
+
74
+ return info_dicts
75
+
76
+
77
+ def create_observation_info(in_file: str, observation_id: int):
78
+ """
79
+ Makes a dict with the observation info extracted from the PROCESSOR subtable.
80
+
81
+ Parameters
82
+ ----------
83
+ in_file: str
84
+ path to an input MSv2
85
+ processor_id: int
86
+ processor ID for one MSv4 dataset
87
+
88
+ Returns:
89
+ --------
90
+ observation_info: dict
91
+ observation description ready for the MSv4 observation_info attr
92
+ """
93
+
94
+ generic_observation_xds = load_generic_table(
95
+ in_file,
96
+ "OBSERVATION",
97
+ rename_ids=subt_rename_ids["OBSERVATION"],
98
+ taql_where=f" where ROWID() = {observation_id}",
99
+ )
100
+
101
+ observation_info = {
102
+ "observer": [generic_observation_xds["OBSERVER"].values[0]],
103
+ "release_date": str(
104
+ convert_casacore_time(generic_observation_xds["RELEASE_DATE"].values)[0]
105
+ ),
106
+ }
107
+ # could just assume lower:upper case but keeping explicit dict for now
108
+ mandatory_fields = {"project": "PROJECT"}
109
+ for field_msv4, row_msv2 in mandatory_fields.items():
110
+ observation_info[field_msv4] = generic_observation_xds[row_msv2].values[0]
111
+
112
+ exec_block_xds = None
113
+ try:
114
+ exec_block_xds = load_generic_table(in_file, "ASDM_EXECBLOCK")
115
+ except ValueError as exc:
116
+ logger.debug(
117
+ "Did not find the ASDM_EXECBLOCK subtable, not loading optional fields in observation_info"
118
+ )
119
+ if exec_block_xds:
120
+ exec_block_info = extract_exec_block_info(exec_block_xds)
121
+ observation_info.update(exec_block_info)
122
+
123
+ return observation_info
124
+
125
+
126
+ def extract_exec_block_info(exec_block_xds: xr.Dataset) -> dict:
127
+ """
128
+ Get the (optional) fields of the observation_info that come from the
129
+ ASDM_EXECBLOCK subtable.
130
+
131
+ Note this does not parse strings like 'session_reference':
132
+ '<EntityRef entityId="uid://A001/X133d/X169f" partId="X00000000" entityTypeName="OUSStatus"'
133
+ We might want to simplify that to 'uid://A001/X133d/X169f', but keeping the
134
+ full string for now, as it has additional information such as the type.
135
+
136
+ Parameters
137
+ ----------
138
+ exec_block_xds: xr.Dataset
139
+ raw xds read from subtable ASDM_EXECBLOCK
140
+
141
+ Returns:
142
+ --------
143
+ exec_block_info: dict
144
+ Execution block description ready for the MSv4 observation_info dict
145
+ """
146
+
147
+ optional_fields = {
148
+ "execution_block_id": "execBlockId",
149
+ "execution_block_number": "execBlockNum",
150
+ "execution_block_UID": "execBlockUID",
151
+ "session_reference": "sessionReference",
152
+ "observing_script": "observingScript",
153
+ "observing_script_UID": "observingScriptUID",
154
+ "observing_log": "observingLog",
155
+ }
156
+
157
+ exec_block_info = {}
158
+ for field_msv4, row_msv2 in optional_fields.items():
159
+ if row_msv2 in exec_block_xds.data_vars:
160
+ msv2_value = exec_block_xds[row_msv2].values[0]
161
+ if isinstance(msv2_value, np.ndarray):
162
+ exec_block_info[field_msv4] = ",".join([log for log in msv2_value])
163
+ else:
164
+ exec_block_info[field_msv4] = msv2_value
165
+
166
+ return exec_block_info
167
+
168
+
169
+ def create_processor_info(in_file: str, processor_id: int):
170
+ """
171
+ Makes a dict with the processor info extracted from the PROCESSOR subtable.
172
+
173
+ Parameters
174
+ ----------
175
+ in_file: str
176
+ path to an input MSv2
177
+ processor_id: int
178
+ processor ID for one MSv4 dataset
179
+
180
+ Returns:
181
+ --------
182
+ processor_info: dict
183
+ processor description ready for the MSv4 processor_info attr
184
+ """
185
+
186
+ generic_processor_xds = load_generic_table(
187
+ in_file,
188
+ "PROCESSOR",
189
+ rename_ids=subt_rename_ids["PROCESSOR"],
190
+ taql_where=f" where ROWID() = {processor_id}",
191
+ )
192
+
193
+ # Many telescopes (ASKAP, MeerKAT, SKA-Mid, VLBI, VLBA, ngEHT) seem to
194
+ # produce an empty PROCESSOR subtable
195
+ if len(generic_processor_xds.data_vars) <= 0:
196
+ processor_info = {"type": "", "sub_type": ""}
197
+ else:
198
+ processor_info = {
199
+ "type": generic_processor_xds["TYPE"].values[0],
200
+ "sub_type": generic_processor_xds["SUB_TYPE"].values[0],
201
+ }
202
+
203
+ return processor_info