xradio 0.0.56__py3-none-any.whl → 0.0.59__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. xradio/__init__.py +2 -2
  2. xradio/_utils/_casacore/casacore_from_casatools.py +12 -2
  3. xradio/_utils/_casacore/tables.py +1 -0
  4. xradio/_utils/coord_math.py +22 -23
  5. xradio/_utils/dict_helpers.py +76 -11
  6. xradio/_utils/schema.py +5 -2
  7. xradio/_utils/zarr/common.py +1 -73
  8. xradio/image/_util/_casacore/xds_from_casacore.py +49 -33
  9. xradio/image/_util/_casacore/xds_to_casacore.py +41 -14
  10. xradio/image/_util/_fits/xds_from_fits.py +146 -35
  11. xradio/image/_util/casacore.py +4 -3
  12. xradio/image/_util/common.py +4 -4
  13. xradio/image/_util/image_factory.py +8 -8
  14. xradio/image/image.py +45 -5
  15. xradio/measurement_set/__init__.py +19 -9
  16. xradio/measurement_set/_utils/__init__.py +1 -3
  17. xradio/measurement_set/_utils/_msv2/__init__.py +0 -0
  18. xradio/measurement_set/_utils/_msv2/_tables/read.py +17 -76
  19. xradio/measurement_set/_utils/_msv2/_tables/read_main_table.py +2 -685
  20. xradio/measurement_set/_utils/_msv2/conversion.py +174 -156
  21. xradio/measurement_set/_utils/_msv2/create_antenna_xds.py +9 -16
  22. xradio/measurement_set/_utils/_msv2/create_field_and_source_xds.py +128 -222
  23. xradio/measurement_set/_utils/_msv2/msv2_to_msv4_meta.py +1 -2
  24. xradio/measurement_set/_utils/_msv2/msv4_info_dicts.py +8 -7
  25. xradio/measurement_set/_utils/_msv2/msv4_sub_xdss.py +31 -74
  26. xradio/measurement_set/_utils/_msv2/partition_queries.py +1 -261
  27. xradio/measurement_set/_utils/_msv2/subtables.py +0 -107
  28. xradio/measurement_set/_utils/_utils/interpolate.py +60 -0
  29. xradio/measurement_set/_utils/_zarr/encoding.py +2 -7
  30. xradio/measurement_set/convert_msv2_to_processing_set.py +0 -2
  31. xradio/measurement_set/load_processing_set.py +2 -2
  32. xradio/measurement_set/measurement_set_xdt.py +20 -16
  33. xradio/measurement_set/open_processing_set.py +1 -3
  34. xradio/measurement_set/processing_set_xdt.py +54 -841
  35. xradio/measurement_set/schema.py +122 -132
  36. xradio/schema/check.py +95 -101
  37. xradio/schema/dataclass.py +159 -22
  38. xradio/schema/export.py +99 -0
  39. xradio/schema/metamodel.py +51 -16
  40. xradio/schema/typing.py +5 -5
  41. xradio/sphinx/schema_table.py +41 -77
  42. {xradio-0.0.56.dist-info → xradio-0.0.59.dist-info}/METADATA +20 -5
  43. xradio-0.0.59.dist-info/RECORD +65 -0
  44. {xradio-0.0.56.dist-info → xradio-0.0.59.dist-info}/WHEEL +1 -1
  45. xradio/image/_util/fits.py +0 -13
  46. xradio/measurement_set/_utils/_msv2/_tables/load.py +0 -66
  47. xradio/measurement_set/_utils/_msv2/_tables/load_main_table.py +0 -490
  48. xradio/measurement_set/_utils/_msv2/_tables/read_subtables.py +0 -398
  49. xradio/measurement_set/_utils/_msv2/_tables/write.py +0 -323
  50. xradio/measurement_set/_utils/_msv2/_tables/write_exp_api.py +0 -388
  51. xradio/measurement_set/_utils/_msv2/chunks.py +0 -115
  52. xradio/measurement_set/_utils/_msv2/descr.py +0 -165
  53. xradio/measurement_set/_utils/_msv2/msv2_msv3.py +0 -7
  54. xradio/measurement_set/_utils/_msv2/partitions.py +0 -392
  55. xradio/measurement_set/_utils/_utils/cds.py +0 -40
  56. xradio/measurement_set/_utils/_utils/xds_helper.py +0 -404
  57. xradio/measurement_set/_utils/_zarr/read.py +0 -263
  58. xradio/measurement_set/_utils/_zarr/write.py +0 -329
  59. xradio/measurement_set/_utils/msv2.py +0 -106
  60. xradio/measurement_set/_utils/zarr.py +0 -133
  61. xradio-0.0.56.dist-info/RECORD +0 -78
  62. {xradio-0.0.56.dist-info → xradio-0.0.59.dist-info}/licenses/LICENSE.txt +0 -0
  63. {xradio-0.0.56.dist-info → xradio-0.0.59.dist-info}/top_level.txt +0 -0
@@ -1,11 +1,13 @@
1
+ from collections import deque
1
2
  import datetime
2
3
  import importlib
3
4
  import numcodecs
4
5
  import os
5
6
  import pathlib
6
7
  import time
7
- from typing import Dict, Union
8
+ from typing import Callable, Dict, Union
8
9
 
10
+ import dask.array as da
9
11
  import numpy as np
10
12
  import xarray as xr
11
13
  import traceback
@@ -52,7 +54,9 @@ from ._tables.read import (
52
54
  )
53
55
  from ._tables.read_main_table import get_baselines, get_baseline_indices, get_utimes_tol
54
56
  from .._utils.stokes_types import stokes_types
55
- from xradio._utils.list_and_array import check_if_consistent, unique_1d, to_list
57
+
58
+ from xradio._utils.list_and_array import check_if_consistent, unique_1d
59
+ from xradio._utils.dict_helpers import make_spectral_coord_reference_dict, make_quantity
56
60
 
57
61
 
58
62
  def parse_chunksize(
@@ -218,7 +222,6 @@ def mem_chunksize_to_dict_main_balanced(
218
222
  dictionary of chunk sizes (as dim->size)
219
223
  """
220
224
 
221
- dim_names = [name for name in xds_dim_sizes.keys()]
222
225
  dim_sizes = [size for size in xds_dim_sizes.values()]
223
226
  # Fix fourth dim (polarization) to all (not free to auto-calculate)
224
227
  free_dims_mask = np.array([True, True, True, False])
@@ -429,8 +432,49 @@ def calc_indx_for_row_split(tb_tool, taql_where):
429
432
 
430
433
 
431
434
  def create_coordinates(
432
- xds, in_file, ddi, utime, interval, baseline_ant1_id, baseline_ant2_id, scan_id
433
- ):
435
+ xds: xr.Dataset,
436
+ in_file: str,
437
+ ddi: int,
438
+ utime: np.ndarray,
439
+ interval: np.ndarray,
440
+ baseline_ant1_id: np.ndarray,
441
+ baseline_ant2_id: np.ndarray,
442
+ scan_id: np.ndarray,
443
+ ) -> tuple[xr.Dataset, int]:
444
+ """
445
+ Creates coordinates of a VisibilityXds/SpectrumXds and assigns them to the input
446
+ correlated dataset.
447
+
448
+ Parameters
449
+ ----------
450
+ xds :
451
+ dataset to add the coords to
452
+ in_file :
453
+ path to input MSv2
454
+ ddi :
455
+ DDI index (row) for this MSv4
456
+ utime :
457
+ unique times, for the time coordinate
458
+ interval :
459
+ interval col values from the MSv2, for the integration_time attribute
460
+ of the time coord
461
+ baseline_ant1_id :
462
+ ANTENNA1 ids to be used as coord
463
+ baseline_ant2_id :
464
+ ANTENNA2 ids to be used as coord
465
+ scan_id :
466
+ SCAN_ID values from MSv2, for the scan_name coord
467
+
468
+ Returns
469
+ -------
470
+ tuple[xr.Dataset, int]
471
+ A tuple of:
472
+ - The input dataset with coordinates added and populated with all MSv4 schema
473
+ attributes.
474
+ - The MSv2 spectral_window_id of this DDI/MSv4, which is no longer added to
475
+ the frequency coord but is required to create other secondary xdss (antenna,
476
+ gain_curve, phase_calibration, system_calibration, field_and_source).
477
+ """
434
478
  coords = {
435
479
  "time": utime,
436
480
  "baseline_antenna1_id": ("baseline_id", baseline_ant1_id),
@@ -484,16 +528,16 @@ def create_coordinates(
484
528
  spw_name = spw_name + "_" + str(spectral_window_id)
485
529
 
486
530
  xds.frequency.attrs["spectral_window_name"] = spw_name
531
+ xds.frequency.attrs["spectral_window_intent"] = "UNSPECIFIED"
487
532
  msv4_measure = column_description_casacore_to_msv4_measure(
488
533
  freq_column_description["REF_FREQUENCY"],
489
534
  ref_code=spectral_window_xds["MEAS_FREQ_REF"].data,
490
535
  )
491
- xds.frequency.attrs["reference_frequency"] = {
492
- "dims": [],
493
- "data": float(spectral_window_xds.REF_FREQUENCY.values),
494
- "attrs": msv4_measure,
495
- }
496
- xds.frequency.attrs["spectral_window_id"] = spectral_window_id
536
+ xds.frequency.attrs["reference_frequency"] = make_spectral_coord_reference_dict(
537
+ float(spectral_window_xds.REF_FREQUENCY.values),
538
+ msv4_measure["units"],
539
+ msv4_measure["observer"],
540
+ )
497
541
 
498
542
  # Add if doppler table is present
499
543
  # xds.frequency.attrs["doppler_velocity"] =
@@ -512,14 +556,9 @@ def create_coordinates(
512
556
  freq_column_description["CHAN_WIDTH"],
513
557
  ref_code=spectral_window_xds["MEAS_FREQ_REF"].data,
514
558
  )
515
- if not msv4_measure:
516
- msv4_measure["type"] = "quantity"
517
- msv4_measure["units"] = ["Hz"]
518
- xds.frequency.attrs["channel_width"] = {
519
- "dims": [],
520
- "data": np.abs(unique_chan_width[0]),
521
- "attrs": msv4_measure,
522
- }
559
+ xds.frequency.attrs["channel_width"] = make_quantity(
560
+ np.abs(unique_chan_width[0]), msv4_measure["units"] if msv4_measure else "Hz"
561
+ )
523
562
 
524
563
  ###### Create Time Coordinate ######
525
564
  main_table_attrs = extract_table_attributes(in_file)
@@ -532,16 +571,11 @@ def create_coordinates(
532
571
  msv4_measure = column_description_casacore_to_msv4_measure(
533
572
  main_column_descriptions["INTERVAL"]
534
573
  )
535
- if not msv4_measure:
536
- msv4_measure["type"] = "quantity"
537
- msv4_measure["units"] = ["s"]
538
- xds.time.attrs["integration_time"] = {
539
- "dims": [],
540
- "data": interval,
541
- "attrs": msv4_measure,
542
- }
543
- xds.time.attrs["effective_integration_time"] = "EFFECTIVE_INTEGRATION_TIME"
544
- return xds
574
+ xds.time.attrs["integration_time"] = make_quantity(
575
+ interval, msv4_measure["units"] if msv4_measure else "s"
576
+ )
577
+
578
+ return xds, spectral_window_id
545
579
 
546
580
 
547
581
  def find_min_max_times(tb_tool: tables.table, taql_where: str) -> tuple:
@@ -584,91 +618,115 @@ def create_data_variables(
584
618
  parallel_mode,
585
619
  main_chunksize,
586
620
  ):
587
-
588
- # Get time chunks
589
- time_chunksize = None
590
- if parallel_mode == "time":
591
- try:
592
- time_chunksize = main_chunksize["time"]
593
- except KeyError:
594
- # If time isn't chunked then `read_col_conversion_dask` is slower than `read_col_conversion_numpy`
595
- logger.warning(
596
- "'time' isn't specified in `main_chunksize`. Defaulting to `parallel_mode = 'none'`."
597
- )
598
- parallel_mode = "none"
599
-
600
- # Set read_col_conversion from value of `parallel_mode` argument
601
- # TODO: To make this compatible with multi-node conversion, `read_col_conversion_dask` and TableManager must be pickled.
602
- # Casacore will make this difficult
603
- global read_col_conversion
604
- if parallel_mode == "time":
605
- read_col_conversion = read_col_conversion_dask
606
- else:
607
- read_col_conversion = read_col_conversion_numpy
621
+ time_chunksize = main_chunksize.get("time", None) if main_chunksize else None
622
+ if parallel_mode == "time" and time_chunksize is None:
623
+ logger.warning(
624
+ "'time' isn't specified in `main_chunksize`. Defaulting to `parallel_mode = 'none'`."
625
+ )
626
+ parallel_mode = "none"
608
627
 
609
628
  # Create Data Variables
610
629
  with table_manager.get_table() as tb_tool:
611
630
  col_names = tb_tool.colnames()
612
631
 
632
+ target_cols = set(col_names) & set(col_to_data_variable_names.keys())
633
+ if target_cols.issuperset({"WEIGHT", "WEIGHT_SPECTRUM"}):
634
+ target_cols.remove("WEIGHT")
635
+
613
636
  main_table_attrs = extract_table_attributes(in_file)
614
637
  main_column_descriptions = main_table_attrs["column_descriptions"]
615
- for col in col_names:
616
- if col in col_to_data_variable_names:
617
- if (col == "WEIGHT") and ("WEIGHT_SPECTRUM" in col_names):
618
- continue
619
- try:
620
- start = time.time()
621
- if col == "WEIGHT":
622
- xds = get_weight(
623
- xds,
624
- col,
625
- table_manager,
626
- time_baseline_shape,
627
- tidxs,
628
- bidxs,
629
- use_table_iter,
630
- main_column_descriptions,
631
- time_chunksize,
632
- )
633
- else:
634
- xds[col_to_data_variable_names[col]] = xr.DataArray(
635
- read_col_conversion(
636
- table_manager,
637
- col,
638
- time_baseline_shape,
639
- tidxs,
640
- bidxs,
641
- use_table_iter,
642
- time_chunksize,
643
- ),
644
- dims=col_dims[col],
645
- )
646
-
647
- xds[col_to_data_variable_names[col]].attrs.update(
648
- create_attribute_metadata(col, main_column_descriptions)
638
+
639
+ # Use a double-ended queue in case WEIGHT_SPECTRUM conversion fails, and
640
+ # we need to add WEIGHT to list of columns to convert during iteration
641
+ target_cols = deque(target_cols)
642
+
643
+ while target_cols:
644
+ col = target_cols.popleft()
645
+ datavar_name = col_to_data_variable_names[col]
646
+ read_col_conversion = get_read_col_conversion_function(col, parallel_mode)
647
+
648
+ try:
649
+ start = time.time()
650
+ col_data = read_col_conversion(
651
+ table_manager,
652
+ col,
653
+ time_baseline_shape,
654
+ tidxs,
655
+ bidxs,
656
+ use_table_iter,
657
+ time_chunksize,
658
+ )
659
+
660
+ if col == "TIME_CENTROID":
661
+ col_data = convert_casacore_time(col_data, False)
662
+
663
+ elif col == "WEIGHT":
664
+ col_data = repeat_weight_array(
665
+ col_data, parallel_mode, xds.sizes, main_chunksize
649
666
  )
650
667
 
668
+ xds[datavar_name] = xr.DataArray(
669
+ col_data,
670
+ dims=col_dims[col],
671
+ attrs=create_attribute_metadata(col, main_column_descriptions),
672
+ )
673
+ logger.debug(f"Time to read column {col} : {time.time() - start}")
674
+
675
+ except Exception as exc:
676
+ logger.debug(f"Could not load column {col}, exception: {exc}")
677
+ logger.debug(traceback.format_exc())
678
+
679
+ if col == "WEIGHT_SPECTRUM" and "WEIGHT" in col_names:
651
680
  logger.debug(
652
- "Time to read column " + str(col) + " : " + str(time.time() - start)
681
+ "Failed to convert WEIGHT_SPECTRUM column: "
682
+ "will attempt to use WEIGHT instead"
653
683
  )
654
- except Exception as exc:
655
- logger.debug(f"Could not load column {col}, exception: {exc}")
656
- logger.debug(traceback.format_exc())
657
-
658
- if ("WEIGHT_SPECTRUM" == col) and (
659
- "WEIGHT" in col_names
660
- ): # Bogus WEIGHT_SPECTRUM column, need to use WEIGHT.
661
- xds = get_weight(
662
- xds,
663
- "WEIGHT",
664
- table_manager,
665
- time_baseline_shape,
666
- tidxs,
667
- bidxs,
668
- use_table_iter,
669
- main_column_descriptions,
670
- time_chunksize,
671
- )
684
+ target_cols.append("WEIGHT")
685
+
686
+
687
+ def get_read_col_conversion_function(col_name: str, parallel_mode: str) -> Callable:
688
+ """
689
+ Returns the appropriate read_col_conversion function: use the dask version
690
+ for large columns and parallel_mode="time", or the numpy version otherwise.
691
+ """
692
+ large_columns = {
693
+ "DATA",
694
+ "CORRECTED_DATA",
695
+ "MODEL_DATA",
696
+ "WEIGHT_SPECTRUM",
697
+ "WEIGHT",
698
+ "FLAG",
699
+ }
700
+ return (
701
+ read_col_conversion_dask
702
+ if parallel_mode == "time" and col_name in large_columns
703
+ else read_col_conversion_numpy
704
+ )
705
+
706
+
707
+ def repeat_weight_array(
708
+ weight_arr,
709
+ parallel_mode: str,
710
+ main_sizes: dict[str, int],
711
+ main_chunksize: dict[str, int],
712
+ ):
713
+ """
714
+ Repeat the weights read from the WEIGHT column along the frequency dimension.
715
+ Returns a dask array if parallel_mode="time", or a numpy array otherwise.
716
+ """
717
+ reshaped_arr = weight_arr[:, :, None, :]
718
+ repeats = (1, 1, main_sizes["frequency"], 1)
719
+
720
+ if parallel_mode == "time":
721
+ result = da.tile(reshaped_arr, repeats)
722
+ # da.tile() adds each repeat as a separate chunk, so rechunking is necessary
723
+ chunksizes = tuple(
724
+ main_chunksize.get(dim, main_sizes[dim])
725
+ for dim in ("time", "baseline_id", "frequency", "polarization")
726
+ )
727
+ return result.rechunk(chunksizes)
728
+
729
+ return np.tile(reshaped_arr, repeats)
672
730
 
673
731
 
674
732
  def add_missing_data_var_attrs(xds):
@@ -681,7 +739,7 @@ def add_missing_data_var_attrs(xds):
681
739
  data_var_names = ["SPECTRUM", "SPECTRUM_CORRECTED"]
682
740
  for var_name in data_var_names:
683
741
  if var_name in xds.data_vars:
684
- xds.data_vars[var_name].attrs["units"] = [""]
742
+ xds.data_vars[var_name].attrs["units"] = ""
685
743
 
686
744
  vis_var_names = ["VISIBILITY_MODEL"]
687
745
  for var_name in vis_var_names:
@@ -692,44 +750,11 @@ def add_missing_data_var_attrs(xds):
692
750
  "VISIBILITY"
693
751
  ].attrs["units"]
694
752
  else:
695
- xds.data_vars[var_name].attrs["units"] = [""]
753
+ xds.data_vars[var_name].attrs["units"] = ""
696
754
 
697
755
  return xds
698
756
 
699
757
 
700
- def get_weight(
701
- xds,
702
- col,
703
- table_manager,
704
- time_baseline_shape,
705
- tidxs,
706
- bidxs,
707
- use_table_iter,
708
- main_column_descriptions,
709
- time_chunksize,
710
- ):
711
- xds[col_to_data_variable_names[col]] = xr.DataArray(
712
- np.tile(
713
- read_col_conversion(
714
- table_manager,
715
- col,
716
- time_baseline_shape,
717
- tidxs,
718
- bidxs,
719
- use_table_iter,
720
- time_chunksize,
721
- )[:, :, None, :],
722
- (1, 1, xds.sizes["frequency"], 1),
723
- ),
724
- dims=col_dims[col],
725
- )
726
-
727
- xds[col_to_data_variable_names[col]].attrs.update(
728
- create_attribute_metadata(col, main_column_descriptions)
729
- )
730
- return xds
731
-
732
-
733
758
  def create_taql_query_where(partition_info: dict):
734
759
  main_par_table_cols = [
735
760
  "DATA_DESC_ID",
@@ -771,9 +796,9 @@ def fix_uvw_frame(
771
796
  """
772
797
  if xds.UVW.attrs["frame"] == "ITRF":
773
798
  if is_single_dish:
774
- center_var = "FIELD_REFERENCE_CENTER"
799
+ center_var = "FIELD_REFERENCE_CENTER_DIRECTION"
775
800
  else:
776
- center_var = "FIELD_PHASE_CENTER"
801
+ center_var = "FIELD_PHASE_CENTER_DIRECTION"
777
802
 
778
803
  xds.UVW.attrs["frame"] = field_and_source_xds[center_var].attrs["frame"]
779
804
 
@@ -800,11 +825,9 @@ def estimate_memory_for_partition(in_file: str, partition: dict) -> float:
800
825
  if "shape" in col_descr and isinstance(col_descr["shape"], np.ndarray):
801
826
  # example: "shape": array([15, 4]) => gives pols x channels
802
827
  cells_in_row = col_descr["shape"].prod()
803
- npols = col_descr["shape"][-1]
804
828
  else:
805
829
  first_row = np.array(tb_tool.col(data_col)[0])
806
830
  cells_in_row = np.prod(first_row.shape)
807
- npols = first_row.shape[-1]
808
831
 
809
832
  if col_descr["valueType"] == "complex":
810
833
  # Assume. Otherwise, read first column and get the itemsize:
@@ -911,7 +934,6 @@ def estimate_memory_for_partition(in_file: str, partition: dict) -> float:
911
934
  taql_partition = create_taql_query_where(partition)
912
935
  taql_main = f"select * from $mtable {taql_partition}"
913
936
  with open_table_ro(in_file) as mtable:
914
- col_names = mtable.colnames()
915
937
  with open_query(mtable, taql_main) as tb_tool:
916
938
  # Do not feel tempted to rely on nrows. nrows tends to underestimate memory when baselines are missing.
917
939
  # For some EVN datasets that can easily underestimate by a 50%
@@ -940,6 +962,7 @@ def estimate_memory_for_partition(in_file: str, partition: dict) -> float:
940
962
  + calculate_term_other_msv2_indices(msv2_nrows)
941
963
  + calculate_term_sub_xds(estimate_main_xds)
942
964
  + calculate_term_to_zarr(estimate_main_xds)
965
+ + calculate_term_attrs(estimate_main_xds)
943
966
  )
944
967
  estimate /= GiBYTES_TO_BYTES
945
968
 
@@ -1104,7 +1127,7 @@ def convert_and_write_partition(
1104
1127
  scan_id[tidxs, bidxs] = tb_tool.getcol("SCAN_NUMBER")
1105
1128
  scan_id = np.max(scan_id, axis=1)
1106
1129
 
1107
- xds = create_coordinates(
1130
+ xds, spectral_window_id = create_coordinates(
1108
1131
  xds,
1109
1132
  in_file,
1110
1133
  ddi,
@@ -1172,7 +1195,7 @@ def convert_and_write_partition(
1172
1195
 
1173
1196
  ant_xds = create_antenna_xds(
1174
1197
  in_file,
1175
- xds.frequency.attrs["spectral_window_id"],
1198
+ spectral_window_id,
1176
1199
  antenna_id,
1177
1200
  feed_id,
1178
1201
  telescope_name,
@@ -1181,9 +1204,7 @@ def convert_and_write_partition(
1181
1204
  logger.debug("Time antenna xds " + str(time.time() - start))
1182
1205
 
1183
1206
  start = time.time()
1184
- gain_curve_xds = create_gain_curve_xds(
1185
- in_file, xds.frequency.attrs["spectral_window_id"], ant_xds
1186
- )
1207
+ gain_curve_xds = create_gain_curve_xds(in_file, spectral_window_id, ant_xds)
1187
1208
  logger.debug("Time gain_curve xds " + str(time.time() - start))
1188
1209
 
1189
1210
  start = time.time()
@@ -1193,7 +1214,7 @@ def convert_and_write_partition(
1193
1214
  phase_cal_interp_time = None
1194
1215
  phase_calibration_xds = create_phase_calibration_xds(
1195
1216
  in_file,
1196
- xds.frequency.attrs["spectral_window_id"],
1217
+ spectral_window_id,
1197
1218
  ant_xds,
1198
1219
  time_min_max,
1199
1220
  phase_cal_interp_time,
@@ -1208,6 +1229,7 @@ def convert_and_write_partition(
1208
1229
  sys_cal_interp_time = None
1209
1230
  system_calibration_xds = create_system_calibration_xds(
1210
1231
  in_file,
1232
+ spectral_window_id,
1211
1233
  xds.frequency,
1212
1234
  ant_xds,
1213
1235
  sys_cal_interp_time,
@@ -1269,10 +1291,6 @@ def convert_and_write_partition(
1269
1291
 
1270
1292
  # Create field_and_source_xds (combines field, source and ephemeris data into one super dataset)
1271
1293
  start = time.time()
1272
- if ephemeris_interpolate:
1273
- ephemeris_interp_time = xds.time.values
1274
- else:
1275
- ephemeris_interp_time = None
1276
1294
 
1277
1295
  # if "FIELD_ID" not in partition_scheme:
1278
1296
  # field_id = np.full(time_baseline_shape, -42, dtype=int)
@@ -1298,7 +1316,7 @@ def convert_and_write_partition(
1298
1316
  create_field_and_source_xds(
1299
1317
  in_file,
1300
1318
  field_id,
1301
- xds.frequency.attrs["spectral_window_id"],
1319
+ spectral_window_id,
1302
1320
  field_times,
1303
1321
  is_single_dish,
1304
1322
  time_min_max,
@@ -1330,7 +1348,7 @@ def convert_and_write_partition(
1330
1348
  add_encoding(xds, compressor=compressor, chunks=main_chunksize)
1331
1349
  logger.debug("Time add compressor and chunk " + str(time.time() - start))
1332
1350
 
1333
- file_name = os.path.join(
1351
+ os.path.join(
1334
1352
  out_file,
1335
1353
  pathlib.Path(in_file).name.replace(".ms", "") + "_" + str(ms_v4_id),
1336
1354
  )
@@ -1377,7 +1395,7 @@ def convert_and_write_partition(
1377
1395
  ms_xdt["/phased_array_xds"] = phased_array_xds
1378
1396
 
1379
1397
  if storage_backend == "zarr":
1380
- ms_xdt.to_zarr(store=os.path.join(out_file, ms_v4_name))
1398
+ ms_xdt.to_zarr(store=os.path.join(out_file, ms_v4_name), mode=mode)
1381
1399
  elif storage_backend == "netcdf":
1382
1400
  # xds.to_netcdf(path=file_name+"/MAIN", mode=mode) #Does not work
1383
1401
  raise
@@ -1,5 +1,3 @@
1
- import toolviper.utils.logger as logger
2
- import time
3
1
  from typing import Tuple, Union
4
2
 
5
3
  import numpy as np
@@ -10,7 +8,6 @@ from xradio.measurement_set._utils._msv2.subtables import subt_rename_ids
10
8
  from xradio.measurement_set._utils._msv2._tables.read import (
11
9
  load_generic_table,
12
10
  convert_casacore_time,
13
- convert_casacore_time_to_mjd,
14
11
  make_taql_where_between_min_max,
15
12
  table_exists,
16
13
  )
@@ -20,17 +17,15 @@ from xradio.measurement_set._utils._msv2.msv4_sub_xdss import (
20
17
  )
21
18
 
22
19
  from xradio._utils.list_and_array import (
23
- check_if_consistent,
24
20
  unique_1d,
25
- to_list,
26
- to_np_array,
27
21
  )
22
+ from xradio._utils.dict_helpers import make_quantity_attrs
28
23
 
29
24
 
30
25
  def create_antenna_xds(
31
26
  in_file: str,
32
27
  spectral_window_id: int,
33
- antenna_id: list,
28
+ antenna_id: np.ndarray,
34
29
  feed_id: list,
35
30
  telescope_name: str,
36
31
  partition_polarization: xr.DataArray,
@@ -44,8 +39,8 @@ def create_antenna_xds(
44
39
  Path to the input MSv2.
45
40
  spectral_window_id : int
46
41
  Spectral window ID.
47
- antenna_id : list
48
- List of antenna IDs.
42
+ antenna_id : np.ndarray
43
+ Antenna IDs.
49
44
  feed_id : list
50
45
  List of feed IDs.
51
46
  telescope_name : str
@@ -82,7 +77,7 @@ def create_antenna_xds(
82
77
 
83
78
 
84
79
  def extract_antenna_info(
85
- ant_xds: xr.Dataset, in_file: str, antenna_id: list, telescope_name: str
80
+ ant_xds: xr.Dataset, in_file: str, antenna_id: np.ndarray, telescope_name: str
86
81
  ) -> xr.Dataset:
87
82
  """Reformats MSv2 Antenna table content to MSv4 schema.
88
83
 
@@ -92,8 +87,8 @@ def extract_antenna_info(
92
87
  The dataset that will be updated with antenna information.
93
88
  in_file : str
94
89
  Path to the input MSv2.
95
- antenna_id : list
96
- A list of antenna IDs to extract information for.
90
+ antenna_id : np.array
91
+ Antenna IDs to extract information for.
97
92
  telescope_name : str
98
93
  The name of the telescope.
99
94
 
@@ -138,7 +133,7 @@ def extract_antenna_info(
138
133
  generic_ant_xds, ant_xds, to_new_data_variables, to_new_coords
139
134
  )
140
135
 
141
- ant_xds["ANTENNA_DISH_DIAMETER"].attrs.update({"units": ["m"], "type": "quantity"})
136
+ ant_xds["ANTENNA_DISH_DIAMETER"].attrs.update(make_quantity_attrs(["m"]))
142
137
 
143
138
  ant_xds["ANTENNA_POSITION"].attrs["coordinate_system"] = "geocentric"
144
139
  ant_xds["ANTENNA_POSITION"].attrs["origin_object_name"] = "earth"
@@ -507,9 +502,7 @@ def create_phase_calibration_xds(
507
502
  phase_cal_xds = phase_cal_xds.assign_coords(ant_borrowed_coords | tone_label_coord)
508
503
 
509
504
  # Adjust expected types
510
- phase_cal_xds["time_phase_cal"] = (
511
- phase_cal_xds.time_phase_cal.astype("float64").astype("float64") / 10**9
512
- )
505
+ phase_cal_xds["time_phase_cal"] = phase_cal_xds.time_phase_cal
513
506
 
514
507
  phase_cal_xds = rename_and_interpolate_to_time(
515
508
  phase_cal_xds, "time_phase_cal", phase_cal_interp_time, "phase_cal_xds"