xradio 0.0.39__py3-none-any.whl → 0.0.41__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. xradio/__init__.py +1 -1
  2. xradio/_utils/schema.py +14 -3
  3. xradio/{vis → correlated_data}/__init__.py +3 -2
  4. xradio/{vis/_vis_utils → correlated_data/_utils}/_ms/_tables/load_main_table.py +2 -2
  5. xradio/{vis/_vis_utils → correlated_data/_utils}/_ms/_tables/read.py +15 -1
  6. xradio/{vis/_vis_utils → correlated_data/_utils}/_ms/_tables/read_main_table.py +1 -1
  7. xradio/{vis/_vis_utils → correlated_data/_utils}/_ms/_tables/read_subtables.py +1 -1
  8. xradio/{vis/_vis_utils → correlated_data/_utils}/_ms/_tables/write.py +1 -1
  9. xradio/{vis/_vis_utils → correlated_data/_utils}/_ms/conversion.py +117 -58
  10. xradio/{vis/_vis_utils → correlated_data/_utils}/_ms/create_antenna_xds.py +196 -168
  11. xradio/{vis/_vis_utils → correlated_data/_utils}/_ms/create_field_and_source_xds.py +234 -200
  12. xradio/{vis/_vis_utils → correlated_data/_utils}/_ms/msv2_to_msv4_meta.py +1 -1
  13. xradio/correlated_data/_utils/_ms/msv4_info_dicts.py +203 -0
  14. xradio/correlated_data/_utils/_ms/msv4_sub_xdss.py +516 -0
  15. xradio/{vis/_vis_utils → correlated_data/_utils}/_ms/partition_queries.py +1 -1
  16. xradio/{vis/_vis_utils → correlated_data/_utils}/_ms/partitions.py +1 -1
  17. xradio/{vis/_vis_utils → correlated_data/_utils}/_ms/subtables.py +2 -2
  18. xradio/{vis/_vis_utils → correlated_data/_utils}/_utils/xds_helper.py +1 -1
  19. xradio/{vis/_vis_utils → correlated_data/_utils}/_zarr/read.py +1 -1
  20. xradio/{vis/_vis_utils → correlated_data/_utils}/_zarr/write.py +1 -1
  21. xradio/{vis/_vis_utils → correlated_data/_utils}/ms.py +1 -1
  22. xradio/{vis/_vis_utils → correlated_data/_utils}/zarr.py +4 -4
  23. xradio/{vis → correlated_data}/convert_msv2_to_processing_set.py +10 -3
  24. xradio/correlated_data/correlated_xds.py +13 -0
  25. xradio/{vis → correlated_data}/load_processing_set.py +13 -17
  26. xradio/{vis/read_processing_set.py → correlated_data/open_processing_set.py} +21 -23
  27. xradio/{vis/_processing_set.py → correlated_data/processing_set.py} +11 -12
  28. xradio/{vis → correlated_data}/schema.py +572 -186
  29. xradio/correlated_data/test__processing_set.py +74 -0
  30. xradio/image/_util/_casacore/xds_from_casacore.py +1 -1
  31. xradio/image/_util/_zarr/xds_from_zarr.py +1 -1
  32. xradio/image/_util/_zarr/zarr_low_level.py +1 -1
  33. {xradio-0.0.39.dist-info → xradio-0.0.41.dist-info}/METADATA +10 -10
  34. xradio-0.0.41.dist-info/RECORD +75 -0
  35. {xradio-0.0.39.dist-info → xradio-0.0.41.dist-info}/WHEEL +1 -1
  36. xradio/vis/_vis_utils/_ms/msv4_infos.py +0 -0
  37. xradio/vis/_vis_utils/_ms/msv4_sub_xdss.py +0 -351
  38. xradio-0.0.39.dist-info/RECORD +0 -73
  39. /xradio/{vis/_vis_utils → correlated_data/_utils}/__init__.py +0 -0
  40. /xradio/{vis/_vis_utils → correlated_data/_utils}/_ms/_tables/load.py +0 -0
  41. /xradio/{vis/_vis_utils → correlated_data/_utils}/_ms/_tables/table_query.py +0 -0
  42. /xradio/{vis/_vis_utils → correlated_data/_utils}/_ms/_tables/write_exp_api.py +0 -0
  43. /xradio/{vis/_vis_utils → correlated_data/_utils}/_ms/chunks.py +0 -0
  44. /xradio/{vis/_vis_utils → correlated_data/_utils}/_ms/descr.py +0 -0
  45. /xradio/{vis/_vis_utils → correlated_data/_utils}/_ms/msv2_msv3.py +0 -0
  46. /xradio/{vis/_vis_utils → correlated_data/_utils}/_ms/optimised_functions.py +0 -0
  47. /xradio/{vis/_vis_utils → correlated_data/_utils}/_utils/cds.py +0 -0
  48. /xradio/{vis/_vis_utils → correlated_data/_utils}/_utils/partition_attrs.py +0 -0
  49. /xradio/{vis/_vis_utils → correlated_data/_utils}/_utils/stokes_types.py +0 -0
  50. /xradio/{vis/_vis_utils → correlated_data/_utils}/_zarr/encoding.py +0 -0
  51. {xradio-0.0.39.dist-info → xradio-0.0.41.dist-info}/LICENSE.txt +0 -0
  52. {xradio-0.0.39.dist-info → xradio-0.0.41.dist-info}/top_level.txt +0 -0
@@ -5,21 +5,24 @@ from typing import Tuple, Union
5
5
  import numpy as np
6
6
  import xarray as xr
7
7
 
8
- from xradio._utils.schema import column_description_casacore_to_msv4_measure
9
- from xradio.vis._vis_utils._ms.msv4_sub_xdss import interpolate_to_time
10
- from xradio.vis._vis_utils._ms.subtables import subt_rename_ids
11
- from xradio.vis._vis_utils._ms._tables.read import (
8
+ import toolviper.utils.logger as logger
9
+ from xradio.correlated_data._utils._ms.msv4_sub_xdss import interpolate_to_time
10
+ from xradio.correlated_data._utils._ms.subtables import subt_rename_ids
11
+ from xradio.correlated_data._utils._ms._tables.read import (
12
12
  convert_casacore_time_to_mjd,
13
13
  make_taql_where_between_min_max,
14
14
  load_generic_table,
15
15
  )
16
- import graphviper.utils.logger as logger
16
+ from xradio._utils.common import cast_to_str, convert_to_si_units, add_position_offsets
17
17
  from xradio._utils.list_and_array import (
18
18
  check_if_consistent,
19
19
  unique_1d,
20
20
  to_np_array,
21
21
  )
22
- from xradio._utils.common import cast_to_str, convert_to_si_units, add_position_offsets
22
+ from xradio._utils.schema import (
23
+ column_description_casacore_to_msv4_measure,
24
+ convert_generic_xds_to_xradio_schema,
25
+ )
23
26
 
24
27
 
25
28
  def create_field_and_source_xds(
@@ -157,7 +160,7 @@ def extract_ephemeris_info(
157
160
 
158
161
  # Get meta data.
159
162
  ephemeris_meta = ephemeris_xds.attrs["other"]["msv2"]["ctds_attrs"]
160
- ephemris_column_description = ephemeris_xds.attrs["other"]["msv2"]["ctds_attrs"][
163
+ ephemeris_column_description = ephemeris_xds.attrs["other"]["msv2"]["ctds_attrs"][
161
164
  "column_descriptions"
162
165
  ]
163
166
 
@@ -171,21 +174,34 @@ def extract_ephemeris_info(
171
174
  sky_coord_frame = "ICRS" # We will have to just assume this.
172
175
 
173
176
  # Find out witch keyword is used for units (UNIT/QuantumUnits)
174
- if "UNIT" in ephemris_column_description["RA"]["keywords"]:
177
+ if "UNIT" in ephemeris_column_description["RA"]["keywords"]:
175
178
  unit_keyword = "UNIT"
176
179
  else:
177
180
  unit_keyword = "QuantumUnits"
178
181
 
179
- # We are using the "time_ephemeris_axis" label because it might not match the optional time axis of the source and field info. If ephemeris_interpolate=True then rename it to time.
180
- coords = {
181
- "ellipsoid_pos_label": ["lon", "lat", "dist"],
182
- "time_ephemeris_axis": ephemeris_xds["time"].data,
183
- "sky_pos_label": ["ra", "dec", "dist"],
184
- }
185
-
186
182
  temp_xds = xr.Dataset()
187
183
 
188
- # Add mandatory data: SOURCE_LOCATION (POSITION / sky_pos_label)
184
+ # Add mandatory data: OBSERVER_POSITION
185
+ observer_position = [
186
+ ephemeris_meta["GeoLong"],
187
+ ephemeris_meta["GeoLat"],
188
+ ephemeris_meta["GeoDist"],
189
+ ]
190
+ temp_xds["OBSERVER_POSITION"] = xr.DataArray(
191
+ observer_position, dims=["ellipsoid_pos_label"]
192
+ )
193
+ temp_xds["OBSERVER_POSITION"].attrs.update(
194
+ {
195
+ "type": "location",
196
+ "units": ["deg", "deg", "m"],
197
+ "data": observer_position,
198
+ "ellipsoid": "WGS84",
199
+ "origin_object_name": "Earth",
200
+ "coordinate_system": ephemeris_meta["obsloc"].lower(),
201
+ }
202
+ ) # I think the units are ['deg','deg','m'] and 'WGS84'.
203
+
204
+ # Add (optional) data: SOURCE_LOCATION (POSITION / sky_pos_label)
189
205
  temp_xds["SOURCE_LOCATION"] = xr.DataArray(
190
206
  np.column_stack(
191
207
  (
@@ -194,86 +210,58 @@ def extract_ephemeris_info(
194
210
  ephemeris_xds["Rho"].data,
195
211
  )
196
212
  ),
197
- dims=["time_ephemeris_axis", "sky_pos_label"],
213
+ dims=["time_ephemeris", "sky_pos_label"],
198
214
  )
199
215
  # Have to use cast_to_str because the ephemeris table units are not consistently in a list or a string.
200
216
  sky_coord_units = [
201
- cast_to_str(ephemris_column_description["RA"]["keywords"][unit_keyword]),
202
- cast_to_str(ephemris_column_description["DEC"]["keywords"][unit_keyword]),
203
- cast_to_str(ephemris_column_description["Rho"]["keywords"][unit_keyword]),
217
+ cast_to_str(ephemeris_column_description["RA"]["keywords"][unit_keyword]),
218
+ cast_to_str(ephemeris_column_description["DEC"]["keywords"][unit_keyword]),
219
+ cast_to_str(ephemeris_column_description["Rho"]["keywords"][unit_keyword]),
204
220
  ]
205
221
  temp_xds["SOURCE_LOCATION"].attrs.update(
206
222
  {"type": "sky_coord", "frame": sky_coord_frame, "units": sky_coord_units}
207
223
  )
208
224
 
209
- # Add mandatory data: SOURCE_RADIAL_VELOCITY
210
- temp_xds["SOURCE_RADIAL_VELOCITY"] = xr.DataArray(
211
- ephemeris_xds["RadVel"].data, dims=["time_ephemeris_axis"]
212
- )
213
- temp_xds["SOURCE_RADIAL_VELOCITY"].attrs.update(
214
- {
215
- "type": "quantity",
216
- "units": [
217
- cast_to_str(
218
- ephemris_column_description["RadVel"]["keywords"][unit_keyword]
219
- )
220
- ],
221
- }
222
- )
223
-
224
- # Add mandatory data: OBSERVATION_POSITION
225
- observation_position = [
226
- ephemeris_meta["GeoLong"],
227
- ephemeris_meta["GeoLat"],
228
- ephemeris_meta["GeoDist"],
229
- ]
230
- temp_xds["OBSERVATION_POSITION"] = xr.DataArray(
231
- observation_position, dims=["ellipsoid_pos_label"]
225
+ # Convert a few columns/variables that can be converted with standard
226
+ # convert_generic_xds_to_xradio_schema().
227
+ # Metadata has to be fixed manually. Alternatively, issues like
228
+ # UNIT/QuantumUnits issue could be handled in convert_generic_xds_to_xradio_schema,
229
+ # but for now preferring not to pollute that function.
230
+ time_ephemeris_dim = ["time_ephemeris"]
231
+ to_new_data_variables = {
232
+ # mandatory: SOURCE_RADIAL_VELOCITY
233
+ "RadVel": ["SOURCE_RADIAL_VELOCITY", time_ephemeris_dim],
234
+ # optional: data NORTH_POLE_POSITION_ANGLE and NORTH_POLE_ANGULAR_DISTANCE
235
+ "NP_ang": ["NORTH_POLE_POSITION_ANGLE", time_ephemeris_dim],
236
+ "NP_dist": ["NORTH_POLE_ANGULAR_DISTANCE", time_ephemeris_dim],
237
+ # optional: HELIOCENTRIC_RADIAL_VELOCITY
238
+ "rdot": ["HELIOCENTRIC_RADIAL_VELOCITY", time_ephemeris_dim],
239
+ # optional: OBSERVER_PHASE_ANGLE
240
+ "phang": ["OBSERVER_PHASE_ANGLE", time_ephemeris_dim],
241
+ }
242
+ convert_generic_xds_to_xradio_schema(
243
+ ephemeris_xds, temp_xds, to_new_data_variables, {}
232
244
  )
233
- temp_xds["OBSERVATION_POSITION"].attrs.update(
234
- {
235
- "type": "location",
236
- "units": ["deg", "deg", "m"],
237
- "data": observation_position,
238
- "ellipsoid": "WGS84",
239
- "origin_object_name": "Earth",
240
- "coordinate_system": ephemeris_meta["obsloc"].lower(),
241
- }
242
- ) # I think the units are ['deg','deg','m'] and 'WGS84'.
243
245
 
244
- # Add optional data NORTH_POLE_POSITION_ANGLE and NORTH_POLE_ANGULAR_DISTANCE
245
- if "NP_ang" in ephemeris_xds.data_vars:
246
- temp_xds["NORTH_POLE_POSITION_ANGLE"] = xr.DataArray(
247
- ephemeris_xds["NP_ang"].data, dims=["time_ephemeris_axis"]
248
- )
249
- temp_xds["NORTH_POLE_POSITION_ANGLE"].attrs.update(
250
- {
251
- "type": "quantity",
252
- "units": [
253
- cast_to_str(
254
- ephemris_column_description["NP_ang"]["keywords"][unit_keyword]
255
- )
256
- ],
257
- }
258
- )
259
-
260
- if "NP_dist" in ephemeris_xds.data_vars:
261
- temp_xds["NORTH_POLE_ANGULAR_DISTANCE"] = xr.DataArray(
262
- ephemeris_xds["NP_dist"].data, dims=["time_ephemeris_axis"]
263
- )
264
- temp_xds["NORTH_POLE_ANGULAR_DISTANCE"].attrs.update(
265
- {
266
- "type": "quantity",
267
- "units": [
268
- cast_to_str(
269
- ephemris_column_description["NP_dist"]["keywords"][unit_keyword]
270
- )
271
- ],
272
- }
273
- )
246
+ # Adjust metadata:
247
+ for generic_var_name, msv4_variable_def in to_new_data_variables.items():
248
+ msv4_var_name = msv4_variable_def[0]
249
+ if msv4_var_name in temp_xds:
250
+ temp_xds[msv4_var_name].attrs.update(
251
+ {
252
+ "type": "quantity",
253
+ "units": [
254
+ cast_to_str(
255
+ ephemeris_column_description[generic_var_name]["keywords"][
256
+ unit_keyword
257
+ ]
258
+ )
259
+ ],
260
+ }
261
+ )
274
262
 
275
263
  # Add optional data: SUB_OBSERVER_POSITION and SUB_SOLAR_POSITION
276
- if "DiskLong" in ephemris_column_description:
264
+ if "DiskLong" in ephemeris_column_description:
277
265
  key_lon = "DiskLong"
278
266
  key_lat = "DiskLat"
279
267
  else:
@@ -281,7 +269,7 @@ def extract_ephemeris_info(
281
269
  key_lat = "diskLat"
282
270
 
283
271
  if key_lon in ephemeris_xds.data_vars:
284
- temp_xds["SUB_OBSERVER_POSITION"] = xr.DataArray(
272
+ temp_xds["SUB_OBSERVER_DIRECTION"] = xr.DataArray(
285
273
  np.column_stack(
286
274
  (
287
275
  ephemeris_xds[key_lon].data,
@@ -289,10 +277,10 @@ def extract_ephemeris_info(
289
277
  np.zeros(ephemeris_xds[key_lon].shape),
290
278
  )
291
279
  ),
292
- dims=["time_ephemeris_axis", "ellipsoid_pos_label"],
280
+ dims=["time_ephemeris", "ellipsoid_pos_label"],
293
281
  )
294
282
 
295
- temp_xds["SUB_OBSERVER_POSITION"].attrs.update(
283
+ temp_xds["SUB_OBSERVER_DIRECTION"].attrs.update(
296
284
  {
297
285
  "type": "location",
298
286
  "ellipsoid": "NA",
@@ -300,10 +288,10 @@ def extract_ephemeris_info(
300
288
  "coordinate_system": "planetodetic",
301
289
  "units": [
302
290
  cast_to_str(
303
- ephemris_column_description[key_lon]["keywords"][unit_keyword]
291
+ ephemeris_column_description[key_lon]["keywords"][unit_keyword]
304
292
  ),
305
293
  cast_to_str(
306
- ephemris_column_description[key_lat]["keywords"][unit_keyword]
294
+ ephemeris_column_description[key_lat]["keywords"][unit_keyword]
307
295
  ),
308
296
  "m",
309
297
  ],
@@ -319,7 +307,7 @@ def extract_ephemeris_info(
319
307
  ephemeris_xds["r"].data,
320
308
  )
321
309
  ),
322
- dims=["time_ephemeris_axis", "ellipsoid_pos_label"],
310
+ dims=["time_ephemeris", "ellipsoid_pos_label"],
323
311
  )
324
312
  temp_xds["SUB_SOLAR_POSITION"].attrs.update(
325
313
  {
@@ -329,50 +317,24 @@ def extract_ephemeris_info(
329
317
  "coordinate_system": "planetodetic",
330
318
  "units": [
331
319
  cast_to_str(
332
- ephemris_column_description["SI_lon"]["keywords"][unit_keyword]
320
+ ephemeris_column_description["SI_lon"]["keywords"][unit_keyword]
333
321
  ),
334
322
  cast_to_str(
335
- ephemris_column_description["SI_lat"]["keywords"][unit_keyword]
323
+ ephemeris_column_description["SI_lat"]["keywords"][unit_keyword]
336
324
  ),
337
325
  cast_to_str(
338
- ephemris_column_description["r"]["keywords"][unit_keyword]
326
+ ephemeris_column_description["r"]["keywords"][unit_keyword]
339
327
  ),
340
328
  ],
341
329
  }
342
330
  )
343
331
 
344
- # Add optional data: HELIOCENTRIC_RADIAL_VELOCITY
345
- if "rdot" in ephemeris_xds.data_vars:
346
- temp_xds["HELIOCENTRIC_RADIAL_VELOCITY"] = xr.DataArray(
347
- ephemeris_xds["rdot"].data, dims=["time_ephemeris_axis"]
348
- )
349
- temp_xds["HELIOCENTRIC_RADIAL_VELOCITY"].attrs.update(
350
- {
351
- "type": "quantity",
352
- "units": [
353
- cast_to_str(
354
- ephemris_column_description["rdot"]["keywords"][unit_keyword]
355
- )
356
- ],
357
- }
358
- )
359
-
360
- # Add optional data: OBSERVER_PHASE_ANGLE
361
- if "phang" in ephemeris_xds.data_vars:
362
- temp_xds["OBSERVER_PHASE_ANGLE"] = xr.DataArray(
363
- ephemeris_xds["phang"].data, dims=["time_ephemeris_axis"]
364
- )
365
- temp_xds["OBSERVER_PHASE_ANGLE"].attrs.update(
366
- {
367
- "type": "quantity",
368
- "units": [
369
- cast_to_str(
370
- ephemris_column_description["phang"]["keywords"][unit_keyword]
371
- )
372
- ],
373
- }
374
- )
375
-
332
+ # We are using the "time_ephemeris" label because it might not match the optional time axis of the source and field info. If ephemeris_interpolate=True then rename it to time.
333
+ coords = {
334
+ "ellipsoid_pos_label": ["lon", "lat", "dist"],
335
+ "time_ephemeris": ephemeris_xds["time"].data,
336
+ "sky_pos_label": ["ra", "dec", "dist"],
337
+ }
376
338
  temp_xds = temp_xds.assign_coords(coords)
377
339
  time_coord_attrs = {
378
340
  "type": "time",
@@ -380,21 +342,21 @@ def extract_ephemeris_info(
380
342
  "scale": "UTC",
381
343
  "format": "UNIX",
382
344
  }
383
- temp_xds["time_ephemeris_axis"].attrs.update(time_coord_attrs)
345
+ temp_xds["time_ephemeris"].attrs.update(time_coord_attrs)
384
346
 
385
347
  # Convert to si units and interpolate if ephemeris_interpolate=True:
386
348
  temp_xds = convert_to_si_units(temp_xds)
387
349
  temp_xds = interpolate_to_time(
388
- temp_xds, interp_time, "field_and_source_xds", time_name="time_ephemeris_axis"
350
+ temp_xds, interp_time, "field_and_source_xds", time_name="time_ephemeris"
389
351
  )
390
352
 
391
- # If we interpolate rename the time_ephemeris_axis axis to time.
353
+ # If we interpolate rename the time_ephemeris axis to time.
392
354
  if interp_time is not None:
393
- time_coord = {"time": ("time_ephemeris_axis", interp_time.data)}
355
+ time_coord = {"time": ("time_ephemeris", interp_time.data)}
394
356
  temp_xds = temp_xds.assign_coords(time_coord)
395
357
  temp_xds.coords["time"].attrs.update(time_coord_attrs)
396
- temp_xds = temp_xds.swap_dims({"time_ephemeris_axis": "time"}).drop_vars(
397
- "time_ephemeris_axis"
358
+ temp_xds = temp_xds.swap_dims({"time_ephemeris": "time"}).drop_vars(
359
+ "time_ephemeris"
398
360
  )
399
361
 
400
362
  xds = xr.merge([xds, temp_xds])
@@ -435,8 +397,74 @@ def extract_ephemeris_info(
435
397
  return xds
436
398
 
437
399
 
400
+ def make_line_dims_and_coords(
401
+ source_xds: xr.Dataset, source_id: Union[int, np.ndarray], num_lines: int
402
+ ) -> tuple[list, dict]:
403
+ """
404
+ Produces the dimensions and coordinates used in data variables related
405
+ to line information (LINE_REST_FREQUENCY, LINE_SYSTEMIC_VELOCITY).
406
+
407
+ In the dimensions, "time" is optional. To produce the points of the
408
+ coordinates we need to look into the (optional) TRANSITION column or
409
+ alternatively other columns (DIRECTION) to produce coordinates points of
410
+ appropriate shape, given the "num_lines" "and source_id".
411
+
412
+ Parameters:
413
+ ----------
414
+ source_xds: xr.Dataset
415
+ generic source xarray dataset
416
+ source_id: Union[int, np.ndarray]
417
+ source_id of the dataset, when it is an array that indicates the
418
+ presence of the "time" dimension
419
+ num_line: int
420
+ number of lines in the source dataset
421
+
422
+ Returns:
423
+ -------
424
+ tuple : tuple[list, dict]
425
+ The dimensions and coordinates to use with line data variables. The
426
+ dimensions are produced as a list of dimension names, and the
427
+ coordinates as a dict for xarray coords.
428
+ """
429
+
430
+ # Transition is an optional column and occasionally not populated
431
+ if "TRANSITION" in source_xds.data_vars:
432
+ transition_var_data = source_xds["TRANSITION"].data
433
+ else:
434
+ transition_var_data = np.zeros(source_xds["DIRECTION"].shape, dtype="str")
435
+
436
+ # if TRANSITION is left empty (or otherwise incomplete), and num_lines > 1,
437
+ # the data_vars expect a "num_lines" size in the last dimension
438
+ vars_shape = transition_var_data.shape[:-1] + (np.max(num_lines),)
439
+ if transition_var_data.shape == vars_shape:
440
+ coords_lines_data = transition_var_data
441
+ else:
442
+ coords_lines_data = np.broadcast_to(
443
+ transition_var_data, max(transition_var_data.shape, vars_shape)
444
+ )
445
+
446
+ line_label_data = np.arange(coords_lines_data.shape[-1]).astype(str)
447
+ if len(source_id) == 1:
448
+ line_coords = {
449
+ "line_name": ("line_label", coords_lines_data),
450
+ "line_label": line_label_data,
451
+ }
452
+ line_dims = ["line_label"]
453
+ else:
454
+ line_coords = {
455
+ "line_name": (("time", "line_label"), coords_lines_data),
456
+ "line_label": line_label_data,
457
+ }
458
+ line_dims = ["time", "line_label"]
459
+
460
+ return line_dims, line_coords
461
+
462
+
438
463
  def extract_source_info(
439
- xds: xr.Dataset, path: str, source_id: int, spectral_window_id: int
464
+ xds: xr.Dataset,
465
+ path: str,
466
+ source_id: Union[int, np.ndarray],
467
+ spectral_window_id: int,
440
468
  ) -> tuple[xr.Dataset, int]:
441
469
  """
442
470
  Extracts source information from the given path and adds it to the xarray dataset.
@@ -447,7 +475,7 @@ def extract_source_info(
447
475
  The xarray dataset to which the source information will be added.
448
476
  path : str
449
477
  The path to the input file.
450
- source_id : int
478
+ source_id : Union[int, np.ndarray]
451
479
  The ID of the source.
452
480
  spectral_window_id : int
453
481
  The ID of the spectral window.
@@ -554,11 +582,12 @@ def extract_source_info(
554
582
  direction_var = source_xds[direction_msv2_col]
555
583
 
556
584
  # SOURCE_LOCATION (DIRECTION / sky_dir_label)
557
- xds["SOURCE_LOCATION"] = xr.DataArray(direction_var.data, dims=direction_dims)
558
585
  location_msv4_measure = column_description_casacore_to_msv4_measure(
559
586
  source_column_description[direction_msv2_col]
560
587
  )
561
- xds["SOURCE_LOCATION"].attrs.update(location_msv4_measure)
588
+ xds["SOURCE_LOCATION"] = xr.DataArray(
589
+ direction_var.data, dims=direction_dims, attrs=location_msv4_measure
590
+ )
562
591
 
563
592
  # Do we have line data:
564
593
  if source_xds["NUM_LINES"].data.ndim == 0:
@@ -567,53 +596,21 @@ def extract_source_info(
567
596
  num_lines = source_xds["NUM_LINES"].data
568
597
 
569
598
  if any(num_lines > 0):
599
+ line_dims, line_coords = make_line_dims_and_coords(
600
+ source_xds, source_id, num_lines
601
+ )
602
+ xds = xds.assign_coords(line_coords)
570
603
 
571
- # Transition is an optional column and occasionally not populated
572
- if "TRANSITION" in source_xds.data_vars:
573
- transition_var_data = source_xds["TRANSITION"].data
574
- else:
575
- transition_var_data = np.zeros(source_xds["DIRECTION"].shape, dtype="str")
576
-
577
- # if TRANSITION is left empty (or otherwise incomplete), and num_lines > 1,
578
- # the data_vars expect a "num_lines" size in the last dimension
579
- vars_shape = transition_var_data.shape[:-1] + (np.max(num_lines),)
580
- if transition_var_data.shape == vars_shape:
581
- coords_lines_data = transition_var_data
582
- else:
583
- coords_lines_data = np.broadcast_to(
584
- transition_var_data, max(transition_var_data.shape, vars_shape)
585
- )
586
-
587
- line_label_data = np.arange(coords_lines_data.shape[-1]).astype(str)
588
- if len(source_id) == 1:
589
- coords_lines = {
590
- "line_name": ("line_label", coords_lines_data),
591
- "line_label": line_label_data,
592
- }
593
- xds = xds.assign_coords(coords_lines)
594
- line_dims = ["line_label"]
595
- else:
596
- coords_lines = {
597
- "line_name": (("time", "line_label"), coords_lines_data),
598
- "line_label": line_label_data,
599
- }
600
- xds = xds.assign_coords(coords_lines)
601
- line_dims = ["time", "line_label"]
602
-
603
- optional_data_variables = {
604
- "REST_FREQUENCY": "LINE_REST_FREQUENCY",
605
- "SYSVEL": "LINE_SYSTEMIC_VELOCITY",
604
+ to_new_data_variables = {
605
+ "REST_FREQUENCY": ["LINE_REST_FREQUENCY", line_dims],
606
+ "SYSVEL": ["LINE_SYSTEMIC_VELOCITY", line_dims],
606
607
  }
607
- for generic_name, msv4_name in optional_data_variables.items():
608
- if generic_name in source_xds:
609
- msv4_measure = column_description_casacore_to_msv4_measure(
610
- source_column_description[generic_name]
611
- )
612
-
613
- xds[msv4_name] = xr.DataArray(
614
- source_xds[generic_name].data, dims=line_dims
615
- )
616
- xds[msv4_name].attrs.update(msv4_measure)
608
+ to_new_coords = {
609
+ "TIME": ["time", ["time"]],
610
+ }
611
+ convert_generic_xds_to_xradio_schema(
612
+ source_xds, xds, to_new_data_variables, to_new_coords
613
+ )
617
614
 
618
615
  # Need to add doppler info if present. Add check.
619
616
  try:
@@ -634,8 +631,56 @@ def extract_source_info(
634
631
  return xds, np.sum(num_lines[unique_source_ids_indices])
635
632
 
636
633
 
634
+ def make_field_dims_and_coords(
635
+ field_xds: xr.Dataset, field_id: Union[int, np.ndarray], field_times: list
636
+ ) -> tuple[list, dict]:
637
+ """
638
+ Produces the dimensions and coordinates used in the field data variables
639
+ extracted from the MSv2 FIELD subtable (FIELD_PHASE_CENTER/
640
+ FIELD_REFERENCE_CENTER).
641
+
642
+ Parameters:
643
+ ----------
644
+ field_xds: xr.Dataset
645
+ generic field xarray dataset
646
+ field_id: Union[int, np.ndarray]
647
+ field_id of the dataset
648
+ field_times:
649
+ Unique times for the dataset (when not partitioning by FIELD_ID)
650
+
651
+ Returns:
652
+ -------
653
+ tuple : tuple[list, dict]
654
+ The dimensions and coordinates to use with field data variables. The
655
+ dimensions are produced as a list of dimension names, and the
656
+ coordinates as a dict for xarray coords.
657
+ """
658
+
659
+ coords = {"sky_dir_label": ["ra", "dec"]}
660
+
661
+ # field_times is the same as the time axis in the main MSv4 dataset and is used if more than one field is present.
662
+ if field_times is not None:
663
+ coords["time"] = field_times
664
+ dims = ["time", "sky_dir_label"]
665
+ coords["field_name"] = (
666
+ "time",
667
+ np.char.add(field_xds["NAME"].data, np.char.add("_", field_id.astype(str))),
668
+ )
669
+ # coords["field_id"] = ("time", field_id)
670
+ else:
671
+ coords["field_name"] = field_xds["NAME"].values.item() + "_" + str(field_id)
672
+ # coords["field_id"] = field_id
673
+ dims = ["sky_dir_label"]
674
+
675
+ return dims, coords
676
+
677
+
637
678
  def extract_field_info_and_check_ephemeris(
638
- field_and_source_xds, in_file, field_id, field_times, is_single_dish
679
+ field_and_source_xds: xr.Dataset,
680
+ in_file: str,
681
+ field_id: Union[int, np.ndarray],
682
+ field_times: list,
683
+ is_single_dish: bool,
639
684
  ):
640
685
  """
641
686
  Create field information and check for ephemeris in the FIELD table folder.
@@ -646,8 +691,12 @@ def extract_field_info_and_check_ephemeris(
646
691
  The xarray dataset to which the field and source information will be added.
647
692
  in_file : str
648
693
  The path to the input file.
649
- field_id : int
694
+ field_id : Union[int, np.ndarray]
650
695
  The ID of the field.
696
+ field_times: list
697
+ Time of the MSv4
698
+ is_single_dish: bool
699
+ Whether to extract single dish (FIELD_REFERENCE_CENTER) info
651
700
 
652
701
  Returns:
653
702
  -------
@@ -658,7 +707,6 @@ def extract_field_info_and_check_ephemeris(
658
707
  ephemeris_table_name : str
659
708
  The name of the ephemeris table.
660
709
  """
661
- coords = {}
662
710
 
663
711
  unique_field_id = unique_1d(
664
712
  field_id
@@ -718,6 +766,8 @@ def extract_field_info_and_check_ephemeris(
718
766
  f"Could not find ephemeris table for field_id {field_id}. Ephemeris information will not be included in the field_and_source_xds."
719
767
  )
720
768
 
769
+ dims, coords = make_field_dims_and_coords(field_xds, field_id, field_times)
770
+
721
771
  if is_single_dish:
722
772
  field_data_variables = {
723
773
  "REFERENCE_DIR": "FIELD_REFERENCE_CENTER",
@@ -729,27 +779,10 @@ def extract_field_info_and_check_ephemeris(
729
779
  # "REFERENCE_DIR": "FIELD_REFERENCE_CENTER",
730
780
  }
731
781
 
732
- field_measures_type = "sky_coord"
733
-
734
- coords["sky_dir_label"] = ["ra", "dec"]
735
782
  field_column_description = field_xds.attrs["other"]["msv2"]["ctds_attrs"][
736
783
  "column_descriptions"
737
784
  ]
738
785
 
739
- # field_times is the same as the time axis in the main MSv4 dataset and is used if more than one field is present.
740
- if field_times is not None:
741
- coords["time"] = field_times
742
- dims = ["time", "sky_dir_label"]
743
- coords["field_name"] = (
744
- "time",
745
- np.char.add(field_xds["NAME"].data, np.char.add("_", field_id.astype(str))),
746
- )
747
- # coords["field_id"] = ("time", field_id)
748
- else:
749
- coords["field_name"] = field_xds["NAME"].values.item() + "_" + str(field_id)
750
- # coords["field_id"] = field_id
751
- dims = ["sky_dir_label"]
752
-
753
786
  for generic_name, msv4_name in field_data_variables.items():
754
787
 
755
788
  delay_dir_ref_col = "DelayDir_Ref"
@@ -772,6 +805,7 @@ def extract_field_info_and_check_ephemeris(
772
805
  }
773
806
  )
774
807
 
808
+ field_measures_type = "sky_coord"
775
809
  field_and_source_xds[msv4_name].attrs["type"] = field_measures_type
776
810
 
777
811
  field_and_source_xds = field_and_source_xds.assign_coords(coords)
@@ -1,4 +1,4 @@
1
- import graphviper.utils.logger as logger
1
+ import toolviper.utils.logger as logger
2
2
  from xradio._utils.schema import column_description_casacore_to_msv4_measure
3
3
 
4
4
  col_to_data_variable_names = {