xradio 0.0.30__tar.gz → 0.0.33__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. {xradio-0.0.30/src/xradio.egg-info → xradio-0.0.33}/PKG-INFO +1 -1
  2. {xradio-0.0.30 → xradio-0.0.33}/pyproject.toml +1 -1
  3. xradio-0.0.33/src/xradio/__init__.py +13 -0
  4. xradio-0.0.33/src/xradio/_utils/common.py +60 -0
  5. xradio-0.0.30/src/xradio/_utils/array.py → xradio-0.0.33/src/xradio/_utils/list_and_array.py +28 -3
  6. {xradio-0.0.30 → xradio-0.0.33}/src/xradio/vis/__init__.py +3 -5
  7. {xradio-0.0.30 → xradio-0.0.33}/src/xradio/vis/_processing_set.py +22 -10
  8. {xradio-0.0.30 → xradio-0.0.33}/src/xradio/vis/_vis_utils/_ms/_tables/load_main_table.py +5 -5
  9. {xradio-0.0.30 → xradio-0.0.33}/src/xradio/vis/_vis_utils/_ms/_tables/read.py +77 -60
  10. {xradio-0.0.30 → xradio-0.0.33}/src/xradio/vis/_vis_utils/_ms/_tables/read_main_table.py +18 -19
  11. {xradio-0.0.30 → xradio-0.0.33}/src/xradio/vis/_vis_utils/_ms/_tables/read_subtables.py +6 -6
  12. {xradio-0.0.30 → xradio-0.0.33}/src/xradio/vis/_vis_utils/_ms/_tables/write.py +2 -4
  13. {xradio-0.0.30 → xradio-0.0.33}/src/xradio/vis/_vis_utils/_ms/_tables/write_exp_api.py +19 -13
  14. {xradio-0.0.30 → xradio-0.0.33}/src/xradio/vis/_vis_utils/_ms/chunks.py +5 -72
  15. {xradio-0.0.30 → xradio-0.0.33}/src/xradio/vis/_vis_utils/_ms/conversion.py +182 -45
  16. xradio-0.0.33/src/xradio/vis/_vis_utils/_ms/create_field_and_source_xds.py +741 -0
  17. {xradio-0.0.30 → xradio-0.0.33}/src/xradio/vis/_vis_utils/_ms/descr.py +9 -9
  18. {xradio-0.0.30 → xradio-0.0.33}/src/xradio/vis/_vis_utils/_ms/msv2_to_msv4_meta.py +2 -2
  19. xradio-0.0.33/src/xradio/vis/_vis_utils/_ms/msv4_sub_xdss.py +564 -0
  20. {xradio-0.0.30 → xradio-0.0.33}/src/xradio/vis/_vis_utils/_ms/partition_queries.py +32 -196
  21. {xradio-0.0.30 → xradio-0.0.33}/src/xradio/vis/_vis_utils/_ms/partitions.py +18 -22
  22. {xradio-0.0.30 → xradio-0.0.33}/src/xradio/vis/_vis_utils/_ms/subtables.py +2 -2
  23. {xradio-0.0.30 → xradio-0.0.33}/src/xradio/vis/_vis_utils/_utils/partition_attrs.py +2 -2
  24. {xradio-0.0.30 → xradio-0.0.33}/src/xradio/vis/_vis_utils/_utils/xds_helper.py +12 -12
  25. {xradio-0.0.30 → xradio-0.0.33}/src/xradio/vis/_vis_utils/ms.py +1 -43
  26. {xradio-0.0.30 → xradio-0.0.33}/src/xradio/vis/_vis_utils/zarr.py +0 -1
  27. {xradio-0.0.30 → xradio-0.0.33}/src/xradio/vis/convert_msv2_to_processing_set.py +19 -11
  28. {xradio-0.0.30 → xradio-0.0.33}/src/xradio/vis/load_processing_set.py +0 -3
  29. {xradio-0.0.30 → xradio-0.0.33}/src/xradio/vis/read_processing_set.py +8 -8
  30. {xradio-0.0.30 → xradio-0.0.33/src/xradio.egg-info}/PKG-INFO +1 -1
  31. {xradio-0.0.30 → xradio-0.0.33}/src/xradio.egg-info/SOURCES.txt +2 -4
  32. xradio-0.0.30/src/xradio/__init__.py +0 -13
  33. xradio-0.0.30/src/xradio/_utils/common.py +0 -3
  34. xradio-0.0.30/src/xradio/vis/_vis_utils/_ms/_tables/create_field_and_source_xds.py +0 -710
  35. xradio-0.0.30/src/xradio/vis/_vis_utils/_ms/msv4_sub_xdss.py +0 -389
  36. xradio-0.0.30/src/xradio/vis/_vis_utils/ms_column_descriptions_dicts.py +0 -1360
  37. xradio-0.0.30/src/xradio/vis/vis_io.py +0 -146
  38. {xradio-0.0.30 → xradio-0.0.33}/LICENSE.txt +0 -0
  39. {xradio-0.0.30 → xradio-0.0.33}/MANIFEST.in +0 -0
  40. {xradio-0.0.30 → xradio-0.0.33}/README.md +0 -0
  41. {xradio-0.0.30 → xradio-0.0.33}/setup.cfg +0 -0
  42. {xradio-0.0.30 → xradio-0.0.33}/src/xradio/_utils/__init__.py +0 -0
  43. {xradio-0.0.30 → xradio-0.0.33}/src/xradio/_utils/_casacore/tables.py +0 -0
  44. {xradio-0.0.30 → xradio-0.0.33}/src/xradio/_utils/zarr/__init__.py +0 -0
  45. {xradio-0.0.30 → xradio-0.0.33}/src/xradio/_utils/zarr/common.py +0 -0
  46. {xradio-0.0.30 → xradio-0.0.33}/src/xradio/image/__init__.py +0 -0
  47. {xradio-0.0.30 → xradio-0.0.33}/src/xradio/image/_util/__init__.py +0 -0
  48. {xradio-0.0.30 → xradio-0.0.33}/src/xradio/image/_util/_casacore/__init__.py +0 -0
  49. {xradio-0.0.30 → xradio-0.0.33}/src/xradio/image/_util/_casacore/common.py +0 -0
  50. {xradio-0.0.30 → xradio-0.0.33}/src/xradio/image/_util/_casacore/xds_from_casacore.py +0 -0
  51. {xradio-0.0.30 → xradio-0.0.33}/src/xradio/image/_util/_casacore/xds_to_casacore.py +0 -0
  52. {xradio-0.0.30 → xradio-0.0.33}/src/xradio/image/_util/_fits/xds_from_fits.py +0 -0
  53. {xradio-0.0.30 → xradio-0.0.33}/src/xradio/image/_util/_zarr/common.py +0 -0
  54. {xradio-0.0.30 → xradio-0.0.33}/src/xradio/image/_util/_zarr/xds_from_zarr.py +0 -0
  55. {xradio-0.0.30 → xradio-0.0.33}/src/xradio/image/_util/_zarr/xds_to_zarr.py +0 -0
  56. {xradio-0.0.30 → xradio-0.0.33}/src/xradio/image/_util/_zarr/zarr_low_level.py +0 -0
  57. {xradio-0.0.30 → xradio-0.0.33}/src/xradio/image/_util/casacore.py +0 -0
  58. {xradio-0.0.30 → xradio-0.0.33}/src/xradio/image/_util/common.py +0 -0
  59. {xradio-0.0.30 → xradio-0.0.33}/src/xradio/image/_util/fits.py +0 -0
  60. {xradio-0.0.30 → xradio-0.0.33}/src/xradio/image/_util/image_factory.py +0 -0
  61. {xradio-0.0.30 → xradio-0.0.33}/src/xradio/image/_util/zarr.py +0 -0
  62. {xradio-0.0.30 → xradio-0.0.33}/src/xradio/image/image.py +0 -0
  63. {xradio-0.0.30 → xradio-0.0.33}/src/xradio/schema/__init__.py +0 -0
  64. {xradio-0.0.30 → xradio-0.0.33}/src/xradio/schema/bases.py +0 -0
  65. {xradio-0.0.30 → xradio-0.0.33}/src/xradio/schema/check.py +0 -0
  66. {xradio-0.0.30 → xradio-0.0.33}/src/xradio/schema/dataclass.py +0 -0
  67. {xradio-0.0.30 → xradio-0.0.33}/src/xradio/schema/metamodel.py +0 -0
  68. {xradio-0.0.30 → xradio-0.0.33}/src/xradio/schema/typing.py +0 -0
  69. {xradio-0.0.30 → xradio-0.0.33}/src/xradio/vis/_vis_utils/__init__.py +0 -0
  70. {xradio-0.0.30 → xradio-0.0.33}/src/xradio/vis/_vis_utils/_ms/_tables/load.py +0 -0
  71. {xradio-0.0.30 → xradio-0.0.33}/src/xradio/vis/_vis_utils/_ms/_tables/table_query.py +0 -0
  72. {xradio-0.0.30 → xradio-0.0.33}/src/xradio/vis/_vis_utils/_ms/msv2_msv3.py +0 -0
  73. {xradio-0.0.30 → xradio-0.0.33}/src/xradio/vis/_vis_utils/_ms/msv4_infos.py +0 -0
  74. {xradio-0.0.30 → xradio-0.0.33}/src/xradio/vis/_vis_utils/_ms/optimised_functions.py +0 -0
  75. {xradio-0.0.30 → xradio-0.0.33}/src/xradio/vis/_vis_utils/_utils/cds.py +0 -0
  76. {xradio-0.0.30 → xradio-0.0.33}/src/xradio/vis/_vis_utils/_utils/stokes_types.py +0 -0
  77. {xradio-0.0.30 → xradio-0.0.33}/src/xradio/vis/_vis_utils/_zarr/encoding.py +0 -0
  78. {xradio-0.0.30 → xradio-0.0.33}/src/xradio/vis/_vis_utils/_zarr/read.py +0 -0
  79. {xradio-0.0.30 → xradio-0.0.33}/src/xradio/vis/_vis_utils/_zarr/write.py +0 -0
  80. {xradio-0.0.30 → xradio-0.0.33}/src/xradio/vis/schema.py +0 -0
  81. {xradio-0.0.30 → xradio-0.0.33}/src/xradio.egg-info/dependency_links.txt +0 -0
  82. {xradio-0.0.30 → xradio-0.0.33}/src/xradio.egg-info/requires.txt +0 -0
  83. {xradio-0.0.30 → xradio-0.0.33}/src/xradio.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: xradio
3
- Version: 0.0.30
3
+ Version: 0.0.33
4
4
  Summary: Xarray Radio Astronomy Data IO
5
5
  Author-email: Jan-Willem Steeb <jsteeb@nrao.edu>
6
6
  License: BSD 3-Clause License
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "xradio"
3
- version = "0.0.30"
3
+ version = "0.0.33"
4
4
  description = " Xarray Radio Astronomy Data IO"
5
5
  authors = [
6
6
  {name = "Jan-Willem Steeb", email="jsteeb@nrao.edu"},
@@ -0,0 +1,13 @@
1
+ import os
2
+ from graphviper.utils.logger import setup_logger
3
+
4
+ # _logger_name = "xradio"
5
+ # if os.getenv("VIPER_LOGGER_NAME") != _logger_name:
6
+ # os.environ["VIPER_LOGGER_NAME"] = _logger_name
7
+ # setup_logger(
8
+ # logger_name="xradio",
9
+ # log_to_term=True,
10
+ # log_to_file=False, # True
11
+ # log_file="xradio-logfile",
12
+ # log_level="DEBUG",
13
+ # )
@@ -0,0 +1,60 @@
1
+ import numpy as np
2
+
3
+ _deg_to_rad = np.pi / 180
4
+
5
+
6
+ def cast_to_str(x):
7
+ if isinstance(x, list):
8
+ return x[0]
9
+ else:
10
+ return x
11
+
12
+
13
+ def convert_to_si_units(xds):
14
+ for data_var in xds.data_vars:
15
+ if "units" in xds[data_var].attrs:
16
+ for u_i, u in enumerate(xds[data_var].attrs["units"]):
17
+ if u == "km":
18
+ xds[data_var][..., u_i] = xds[data_var][..., u_i] * 1e3
19
+ xds[data_var].attrs["units"][u_i] = "m"
20
+ if u == "km/s":
21
+ xds[data_var][..., u_i] = xds[data_var][..., u_i] * 1e3
22
+ xds[data_var].attrs["units"][u_i] = "m/s"
23
+ if u == "deg":
24
+ xds[data_var][..., u_i] = xds[data_var][..., u_i] * np.pi / 180
25
+ xds[data_var].attrs["units"][u_i] = "rad"
26
+ if u == "Au" or u == "AU":
27
+ xds[data_var][..., u_i] = xds[data_var][..., u_i] * 149597870700
28
+ xds[data_var].attrs["units"][u_i] = "m"
29
+ if u == "Au/d" or u == "AU/d":
30
+ xds[data_var][..., u_i] = (
31
+ xds[data_var][..., u_i] * 149597870700 / 86400
32
+ )
33
+ xds[data_var].attrs["units"][u_i] = "m/s"
34
+ if u == "arcsec":
35
+ xds[data_var][..., u_i] = xds[data_var][..., u_i] * np.pi / 648000
36
+ xds[data_var].attrs["units"][u_i] = "rad"
37
+ return xds
38
+
39
+
40
+ def add_position_offsets(dv_1, dv_2):
41
+ # Fun with angles: We are adding angles together. We need to make sure that the results are between -pi and pi.
42
+ new_pos = dv_1 + dv_2
43
+
44
+ while np.any(new_pos[:, 0] > np.pi) or np.any(new_pos[:, 0] < -np.pi):
45
+ new_pos[:, 0] = np.where(
46
+ new_pos[:, 0] > np.pi, new_pos[:, 0] - 2 * np.pi, new_pos[:, 0]
47
+ )
48
+ new_pos[:, 0] = np.where(
49
+ new_pos[:, 0] < -np.pi, new_pos[:, 0] + 2 * np.pi, new_pos[:, 0]
50
+ )
51
+
52
+ while np.any(new_pos[:, 1] > np.pi / 2) or np.any(new_pos[:, 1] < -np.pi / 2):
53
+ new_pos[:, 1] = np.where(
54
+ new_pos[:, 1] > np.pi / 2, new_pos[:, 1] - np.pi, new_pos[:, 1]
55
+ )
56
+ new_pos[:, 1] = np.where(
57
+ new_pos[:, 1] < -np.pi / 2, new_pos[:, 1] + np.pi, new_pos[:, 1]
58
+ )
59
+
60
+ return new_pos
@@ -1,6 +1,23 @@
1
1
  """Contains optimised functions to be used within other modules."""
2
2
 
3
3
  import numpy as np
4
+ import xarray as xr
5
+
6
+
7
+ def to_list(x):
8
+ if isinstance(x, (list, np.ndarray)):
9
+ if x.ndim == 0:
10
+ return [x.item()]
11
+ return list(x) # needed for json serialization
12
+ return [x]
13
+
14
+
15
+ def to_np_array(x):
16
+ if isinstance(x, (list, np.ndarray)):
17
+ if x.ndim == 0:
18
+ return np.array([x.item()])
19
+ return np.array(x) # needed for json serialization
20
+ return np.array([x])
4
21
 
5
22
 
6
23
  def check_if_consistent(array: np.ndarray, array_name: str) -> np.ndarray:
@@ -8,9 +25,9 @@ def check_if_consistent(array: np.ndarray, array_name: str) -> np.ndarray:
8
25
 
9
26
  Parameters
10
27
  ----------
11
- col : _type_
28
+ array : _type_
12
29
  _description_
13
- col_name : _type_
30
+ array_name : _type_
14
31
  _description_
15
32
 
16
33
  Returns
@@ -45,7 +62,15 @@ def unique_1d(array: np.ndarray) -> np.ndarray:
45
62
  a sorted array of unique values.
46
63
 
47
64
  """
48
- return np.sort(pd.unique(array))
65
+ if isinstance(array, xr.core.dataarray.DataArray):
66
+ array = array.values
67
+
68
+ if array.ndim == 0:
69
+ return np.array([array.item()])
70
+
71
+ return np.sort(
72
+ pd.unique(array)
73
+ ) # Don't remove the sort! It will cause errors that are very difficult to detect. Specifically create_field_info_and_check_ephemeris has a TaQL query that requires this.
49
74
 
50
75
 
51
76
  def pairing_function(antenna_pairs: np.ndarray) -> np.ndarray:
@@ -2,14 +2,12 @@ from .read_processing_set import read_processing_set
2
2
  from .load_processing_set import load_processing_set
3
3
  from .convert_msv2_to_processing_set import convert_msv2_to_processing_set
4
4
 
5
- from .vis_io import read_vis, load_vis_block, write_vis
6
-
7
5
  from .schema import VisibilityXds
8
6
 
9
7
  __all__ = [
10
- "read_vis",
11
- "load_vis_block",
12
- "write_vis",
8
+ "read_processing_set",
9
+ "load_processing_set",
10
+ "convert_msv2_to_processing_set",
13
11
  "VisibilityXds",
14
12
  "PointingXds",
15
13
  "AntennaXds",
@@ -30,12 +30,14 @@ class processing_set(dict):
30
30
  def _summary(self, data_group="base"):
31
31
  summary_data = {
32
32
  "name": [],
33
- "intent": [],
33
+ "obs_mode": [],
34
34
  "shape": [],
35
35
  "polarization": [],
36
- "spw_id": [],
36
+ "spw_name": [],
37
+ # "field_id": [],
37
38
  "field_name": [],
38
- "field_id": [],
39
+ # "source_id": [],
40
+ "source_name": [],
39
41
  "field_coords": [],
40
42
  "start_frequency": [],
41
43
  "end_frequency": [],
@@ -45,9 +47,9 @@ class processing_set(dict):
45
47
 
46
48
  for key, value in self.items():
47
49
  summary_data["name"].append(key)
48
- summary_data["intent"].append(value.attrs["partition_info"]["intent"])
49
- summary_data["spw_id"].append(
50
- value.attrs["partition_info"]["spectral_window_id"]
50
+ summary_data["obs_mode"].append(value.attrs["partition_info"]["obs_mode"])
51
+ summary_data["spw_name"].append(
52
+ value.attrs["partition_info"]["spectral_window_name"]
51
53
  )
52
54
  summary_data["polarization"].append(value.polarization.values)
53
55
 
@@ -61,15 +63,25 @@ class processing_set(dict):
61
63
 
62
64
  summary_data["shape"].append(value[data_name].shape)
63
65
 
64
- summary_data["field_id"].append(value.attrs["partition_info"]["field_id"])
66
+ # summary_data["field_id"].append(value.attrs["partition_info"]["field_id"])
67
+ # summary_data["source_id"].append(value.attrs["partition_info"]["source_id"])
68
+
65
69
  summary_data["field_name"].append(
66
- value[data_name].attrs["field_and_source_xds"].attrs["field_name"]
70
+ value.attrs["partition_info"]["field_name"]
71
+ )
72
+ summary_data["source_name"].append(
73
+ value.attrs["partition_info"]["source_name"]
67
74
  )
68
75
  summary_data["start_frequency"].append(value["frequency"].values[0])
69
76
  summary_data["end_frequency"].append(value["frequency"].values[-1])
70
77
 
71
78
  if value[data_name].attrs["field_and_source_xds"].is_ephemeris:
72
79
  summary_data["field_coords"].append("Ephemeris")
80
+ elif (
81
+ "time"
82
+ in value[data_name].attrs["field_and_source_xds"][center_name].coords
83
+ ):
84
+ summary_data["field_coords"].append("Multi-Phase-Center")
73
85
  else:
74
86
  ra_dec_rad = (
75
87
  value[data_name].attrs["field_and_source_xds"][center_name].values
@@ -106,8 +118,8 @@ class processing_set(dict):
106
118
  assert (
107
119
  frame == ms_xds.frequency.attrs["frame"]
108
120
  ), "Frequency reference frame not consistent in processing set."
109
- if ms_xds.frequency.attrs["spw_id"] not in spw_ids:
110
- spw_ids.append(ms_xds.frequency.attrs["spw_id"])
121
+ if ms_xds.frequency.attrs["spectral_window_id"] not in spw_ids:
122
+ spw_ids.append(ms_xds.frequency.attrs["spectral_window_id"])
111
123
  freq_axis_list.append(ms_xds.frequency)
112
124
 
113
125
  freq_axis = xr.concat(freq_axis_list, dim="frequency").sortby("frequency")
@@ -16,7 +16,7 @@ from xradio.vis._vis_utils._ms._tables.read_main_table import (
16
16
  get_baselines,
17
17
  get_baseline_indices,
18
18
  )
19
- from xradio._utils.array import unique_1d
19
+ from xradio._utils.list_and_array import unique_1d
20
20
 
21
21
 
22
22
  def load_expanded_main_table_chunk(
@@ -187,7 +187,7 @@ def load_ddi_cols_chunk(
187
187
  cell_shape = cdata.shape
188
188
  if len(cell_shape) == 0:
189
189
  col_dims = dims[:2]
190
- mvars[col.lower()] = xr.DataArray(
190
+ mvars[col] = xr.DataArray(
191
191
  load_col_chunk(
192
192
  tb_tool, col, (ctlen, cblen), tidxs, bidxs, didxs, None, None
193
193
  ),
@@ -196,7 +196,7 @@ def load_ddi_cols_chunk(
196
196
 
197
197
  elif col == "UVW":
198
198
  col_dims = dims[:2] + ["uvw_coords"]
199
- mvars[col.lower()] = xr.DataArray(
199
+ mvars[col] = xr.DataArray(
200
200
  load_col_chunk(
201
201
  tb_tool, col, (ctlen, cblen, 3), tidxs, bidxs, didxs, None, None
202
202
  ),
@@ -206,7 +206,7 @@ def load_ddi_cols_chunk(
206
206
  elif len(cell_shape) == 1:
207
207
  pols, col_dims = get_col_1d_pols(cell_shape, dims, chan_cnt, pol_cnt, chunk)
208
208
  cshape = (ctlen, cblen) + (pols[1] - pols[0] + 1,)
209
- mvars[col.lower()] = xr.DataArray(
209
+ mvars[col] = xr.DataArray(
210
210
  load_col_chunk(tb_tool, col, cshape, tidxs, bidxs, didxs, pols, None),
211
211
  dims=col_dims,
212
212
  )
@@ -215,7 +215,7 @@ def load_ddi_cols_chunk(
215
215
  chans, pols = get_col_2d_chans_pols(cell_shape, chan_cnt, pol_cnt, chunk)
216
216
  cshape = (ctlen, cblen) + (chans[1] - chans[0] + 1, pols[1] - pols[0] + 1)
217
217
  col_dims = dims
218
- mvars[col.lower()] = xr.DataArray(
218
+ mvars[col] = xr.DataArray(
219
219
  load_col_chunk(tb_tool, col, cshape, tidxs, bidxs, didxs, chans, pols),
220
220
  dims=col_dims,
221
221
  )
@@ -279,7 +279,10 @@ def add_units_measures(
279
279
  col_descrs = cc_attrs["column_descriptions"]
280
280
  # TODO: Should probably loop the other way around, over mvars
281
281
  for col in col_descrs:
282
- var_name = col.lower()
282
+ if col == "TIME":
283
+ var_name = "time"
284
+ else:
285
+ var_name = col
283
286
  if var_name in mvars and "keywords" in col_descrs[col]:
284
287
  if "QuantumUnits" in col_descrs[col]["keywords"]:
285
288
  cc_units = col_descrs[col]["keywords"]["QuantumUnits"]
@@ -364,12 +367,12 @@ def make_freq_attrs(spw_xds: xr.Dataset, spw_id: int) -> Dict[str, Any]:
364
367
  ctds_cols = spw_xds.attrs["other"]["msv2"]["ctds_attrs"]["column_descriptions"]
365
368
  cfreq = ctds_cols["CHAN_FREQ"]
366
369
 
367
- cf_attrs = spw_xds.chan_freq.attrs
370
+ cf_attrs = spw_xds.data_vars["CHAN_FREQ"].attrs
368
371
  if "MEASINFO" in cfreq["keywords"] and "VarRefCol" in cfreq["keywords"]["MEASINFO"]:
369
372
  fattrs = cfreq["keywords"]["MEASINFO"]
370
373
  var_ref_col = fattrs["VarRefCol"]
371
374
  # This should point to the SPW/MEAS_FREQ_REF col
372
- meas_freq_ref_idx = spw_xds.data_vars[var_ref_col.lower()].values[spw_id]
375
+ meas_freq_ref_idx = spw_xds.data_vars[var_ref_col].values[spw_id]
373
376
 
374
377
  if "TabRefCodes" not in fattrs or "TabRefTypes" not in fattrs:
375
378
  # Datasets like vla/ic2233_1.ms say "VarRefCol" but "TabRefTypes" is missing
@@ -384,8 +387,8 @@ def make_freq_attrs(spw_xds: xr.Dataset, spw_id: int) -> Dict[str, Any]:
384
387
  }
385
388
 
386
389
  # Also set the 'VarRefCol' for CHAN_FREQ and REF_FREQUENCEY
387
- spw_xds.data_vars["chan_freq"].attrs.update(cf_attrs)
388
- spw_xds.data_vars["ref_frequency"].attrs.update(cf_attrs)
390
+ spw_xds.data_vars["CHAN_FREQ"].attrs.update(cf_attrs)
391
+ spw_xds.data_vars["REF_FREQUENCY"].attrs.update(cf_attrs)
389
392
 
390
393
  return cf_attrs
391
394
 
@@ -440,18 +443,18 @@ def redimension_ms_subtable(xds: xr.Dataset, subt_name: str) -> xr.Dataset:
440
443
  (one dimension for every columns)
441
444
  """
442
445
  subt_key_cols = {
443
- "DOPPLER": ["doppler_id", "source_id"],
446
+ "DOPPLER": ["DOPPLER_ID", "SOURCE_ID"],
444
447
  "FREQ_OFFSET": [
445
- "antenna1",
446
- "antenna2",
447
- "feed_id",
448
- "spectral_window_id",
449
- "time",
448
+ "ANTENNA1",
449
+ "ANTENNA2",
450
+ "FEED_ID",
451
+ "SPECTRAL_WINDOW_ID",
452
+ "TIME",
450
453
  ],
451
- "POINTING": ["time", "antenna_id"],
452
- "SOURCE": ["source_id", "time", "spectral_window_id"],
453
- "SYSCAL": ["antenna_id", "feed_id", "spectral_window_id", "time"],
454
- "WEATHER": ["antenna_id", "time"],
454
+ "POINTING": ["TIME", "ANTENNA_ID"],
455
+ "SOURCE": ["SOURCE_ID", "TIME", "SPECTRAL_WINDOW_ID"],
456
+ "SYSCAL": ["ANTENNA_ID", "FEED_ID", "SPECTRAL_WINDOW_ID", "TIME"],
457
+ "WEATHER": ["ANTENNA_ID", "TIME"],
455
458
  # added tables (MSv3 but not preent in MSv2). Build it from "EPHEMi_... tables
456
459
  # Not clear what to do about 'time' var/dim: , "time"],
457
460
  "EPHEMERIDES": ["ephemeris_row_id", "ephemeris_id"],
@@ -476,10 +479,13 @@ def redimension_ms_subtable(xds: xr.Dataset, subt_name: str) -> xr.Dataset:
476
479
  # we need to reset to the original type.
477
480
  for var in rxds.data_vars:
478
481
  if rxds[var].dtype != xds[var].dtype:
479
- rxds[var] = rxds[var].astype(xds[var].dtype)
482
+ # beware of gaps/empty==nan values when redimensioning
483
+ with np.errstate(invalid="ignore"):
484
+ rxds[var] = rxds[var].astype(xds[var].dtype)
480
485
  except Exception as exc:
481
486
  logger.warning(
482
- f"Cannot expand rows to {key_dims}, possibly duplicate values in those coordinates. Exception: {exc}"
487
+ f"Cannot expand rows in table {subt_name} to {key_dims}, possibly duplicate values in those coordinates. "
488
+ f"Exception: {exc}"
483
489
  )
484
490
  rxds = xds.copy()
485
491
 
@@ -500,9 +506,9 @@ def add_ephemeris_vars(tname: str, xds: xr.Dataset) -> xr.Dataset:
500
506
  ephem_id = 0
501
507
 
502
508
  xds["ephemeris_id"] = np.uint32(ephem_id) * xr.ones_like(
503
- xds["mjd"], dtype=np.uint32
509
+ xds["MJD"], dtype=np.uint32
504
510
  )
505
- xds = xds.rename({"mjd": "time"})
511
+ xds = xds.rename({"MJD": "time"})
506
512
  xds["ephemeris_row_id"] = (
507
513
  xr.zeros_like(xds["time"], dtype=np.uint32) + xds["row"].values
508
514
  )
@@ -529,7 +535,7 @@ def is_nested_ms(attrs: Dict) -> bool:
529
535
  )
530
536
 
531
537
 
532
- def read_generic_table(
538
+ def load_generic_table(
533
539
  inpath: str,
534
540
  tname: str,
535
541
  timecols: Union[List[str], None] = None,
@@ -574,7 +580,7 @@ def read_generic_table(
574
580
  infile = str(infile.expanduser())
575
581
  if not os.path.isdir(infile):
576
582
  raise ValueError(
577
- f"invalid input filename to read_generic_table: {infile} table {tname}"
583
+ f"invalid input filename to load_generic_table: {infile} table {tname}"
578
584
  )
579
585
 
580
586
  cc_attrs = extract_table_attributes(infile)
@@ -632,7 +638,14 @@ def read_generic_table(
632
638
  )
633
639
  )
634
640
 
635
- if tname in ["DOPPLER", "FREQ_OFFSET", "POINTING", "SOURCE", "SYSCAL", "WEATHER"]:
641
+ if tname in [
642
+ "DOPPLER",
643
+ "FREQ_OFFSET",
644
+ "POINTING",
645
+ "SOURCE",
646
+ "SYSCAL",
647
+ "WEATHER",
648
+ ]:
636
649
  xds = redimension_ms_subtable(xds, tname)
637
650
 
638
651
  if is_ephem_subtable(tname):
@@ -747,13 +760,13 @@ def load_generic_cols(
747
760
  dict of coordinates and dict of data vars.
748
761
  """
749
762
 
750
- col_cells = find_loadable_filled_cols(tb_tool, ignore)
763
+ col_types = find_loadable_cols(tb_tool, ignore)
751
764
 
752
765
  trows = tb_tool.row(ignore, exclude=True)[:]
753
766
 
754
767
  # Produce coords and data vars from MS columns
755
768
  mcoords, mvars = {}, {}
756
- for col in col_cells.keys():
769
+ for col in col_types.keys():
757
770
  try:
758
771
  # TODO
759
772
  # benchmark np.stack() performance
@@ -779,7 +792,7 @@ def load_generic_cols(
779
792
  if len(set([isinstance(row[col], dict) for row in trows])) > 1:
780
793
  continue # can't deal with this case
781
794
 
782
- data = handle_variable_col_issues(inpath, col, col_cells, trows)
795
+ data = handle_variable_col_issues(inpath, col, col_types[col], trows)
783
796
 
784
797
  if len(data) == 0:
785
798
  continue
@@ -788,9 +801,9 @@ def load_generic_cols(
788
801
  inpath, tb_tool, col, data, timecols
789
802
  )
790
803
  if array_type == "coord":
791
- mcoords[col.lower()] = array_data
804
+ mcoords[col] = array_data
792
805
  elif array_type == "data_var":
793
- mvars[col.lower()] = array_data
806
+ mvars[col] = array_data
794
807
 
795
808
  return mcoords, mvars
796
809
 
@@ -827,7 +840,7 @@ def load_fixed_size_cols(
827
840
  dict of coordinates and dict of data vars, ready to construct an xr.Dataset
828
841
  """
829
842
 
830
- loadable_cols = find_loadable_filled_cols(tb_tool, ignore)
843
+ loadable_cols = find_loadable_cols(tb_tool, ignore)
831
844
 
832
845
  # Produce coords and data vars from MS columns
833
846
  mcoords, mvars = {}, {}
@@ -849,20 +862,23 @@ def load_fixed_size_cols(
849
862
  inpath, tb_tool, col, data, timecols
850
863
  )
851
864
  if array_type == "coord":
852
- mcoords[col.lower()] = array_data
865
+ mcoords[col] = array_data
853
866
  elif array_type == "data_var":
854
- mvars[col.lower()] = array_data
867
+ mvars[col] = array_data
855
868
 
856
869
  return mcoords, mvars
857
870
 
858
871
 
859
- def find_loadable_filled_cols(
872
+ def find_loadable_cols(
860
873
  tb_tool: tables.table, ignore: Union[List[str], None]
861
- ) -> Dict:
874
+ ) -> Dict[str, str]:
862
875
  """
863
- For a table, finds the columns that are:
864
- - loadable = not of record type, and not to be ignored
865
- - filled = the column cells are populated.
876
+ For a table, finds the columns that are loadable = not of record type,
877
+ and not to be ignored
878
+ In extreme cases of variable size columns, it can happen that all the
879
+ cells are empty (iscelldefined() == false). This is still considered a
880
+ loadable column, even though all values of the resulting data var will
881
+ be empty.
866
882
 
867
883
  Parameters
868
884
  ----------
@@ -874,17 +890,15 @@ def find_loadable_filled_cols(
874
890
  Returns
875
891
  -------
876
892
  Dict
877
- dict of {column name => first cell} for columns that can/should be loaded
893
+ dict of {column name: column type} for columns that can/should be loaded
878
894
  """
879
895
 
880
896
  colnames = tb_tool.colnames()
881
- # columns that are not populated are skipped. record columns are not supported
897
+ table_desc = tb_tool.getdesc()
882
898
  loadable_cols = {
883
- col: tb_tool.getcell(col, 0)
899
+ col: table_desc[col]["valueType"]
884
900
  for col in colnames
885
- if (col not in ignore)
886
- and (tb_tool.iscelldefined(col, 0))
887
- and tb_tool.coldatatype(col) != "record"
901
+ if (col not in ignore) and tb_tool.coldatatype(col) != "record"
888
902
  }
889
903
  return loadable_cols
890
904
 
@@ -923,7 +937,6 @@ def raw_col_data_to_coords_vars(
923
937
  # Almost sure that when TIME is present (in a standard MS subt) it
924
938
  # is part of the key. But what about non-std subtables, ASDM subts?
925
939
  subts_with_time_key = (
926
- "FEED",
927
940
  "FLAG_CMD",
928
941
  "FREQ_OFFSET",
929
942
  "HISTORY",
@@ -978,7 +991,7 @@ def raw_col_data_to_coords_vars(
978
991
 
979
992
 
980
993
  def handle_variable_col_issues(
981
- inpath: str, col: str, col_cells: dict, trows: tables.tablerow
994
+ inpath: str, col: str, col_type: str, trows: tables.tablerow
982
995
  ) -> np.ndarray:
983
996
  """
984
997
  load variable-size array columns, padding with nans wherever
@@ -992,8 +1005,8 @@ def handle_variable_col_issues(
992
1005
  path name of the MS
993
1006
  col : str
994
1007
  column being loaded
995
- col_cells : dict
996
- col: cell} values
1008
+ col_type : str
1009
+ type of the column cell values
997
1010
  trows : tables.tablerow
998
1011
  rows from a table as loaded by tables.row()
999
1012
 
@@ -1008,7 +1021,7 @@ def handle_variable_col_issues(
1008
1021
 
1009
1022
  mshape = np.array(max([np.array(row[col]).shape for row in trows]))
1010
1023
  try:
1011
- pad_nan = get_pad_nan(col_cells[col])
1024
+ pad_nan = get_pad_nan(np.array((), dtype=col_type))
1012
1025
 
1013
1026
  # TODO
1014
1027
  # benchmark np.stack() performance
@@ -1175,6 +1188,7 @@ def read_col_conversion(
1175
1188
  cshape: Tuple[int],
1176
1189
  tidxs: np.ndarray,
1177
1190
  bidxs: np.ndarray,
1191
+ use_table_iter: bool,
1178
1192
  ) -> np.ndarray:
1179
1193
  """
1180
1194
  Function to perform delayed reads from table columns when converting
@@ -1232,23 +1246,26 @@ def read_col_conversion(
1232
1246
  data = np.full(cshape + extra_dimensions, np.nan, dtype=col_dtype)
1233
1247
 
1234
1248
  # Use built-in casacore table iterator to populate the data column by unique times.
1235
- start_row = 0
1236
- for ts in tb_tool.iter("TIME", sort=False):
1237
- num_rows = ts.nrows()
1249
+ if use_table_iter:
1250
+ start_row = 0
1251
+ for ts in tb_tool.iter("TIME", sort=False):
1252
+ num_rows = ts.nrows()
1238
1253
 
1239
- # Create small temporary array to store the partial column
1240
- tmp_arr = np.full((num_rows,) + extra_dimensions, np.nan, dtype=col_dtype)
1254
+ # Create small temporary array to store the partial column
1255
+ tmp_arr = np.full((num_rows,) + extra_dimensions, np.nan, dtype=col_dtype)
1241
1256
 
1242
- # Note we don't use `getcol()` because it's less safe. See:
1243
- # https://github.com/casacore/python-casacore/issues/130#issuecomment-463202373
1244
- ts.getcolnp(col, tmp_arr)
1257
+ # Note we don't use `getcol()` because it's less safe. See:
1258
+ # https://github.com/casacore/python-casacore/issues/130#issuecomment-463202373
1259
+ ts.getcolnp(col, tmp_arr)
1245
1260
 
1246
- # Get the slice of rows contained in `tmp_arr`.
1247
- # Used to get the relevant integer indexes from `tidxs` and `bidxs`
1248
- tmp_slice = slice(start_row, start_row + num_rows)
1261
+ # Get the slice of rows contained in `tmp_arr`.
1262
+ # Used to get the relevant integer indexes from `tidxs` and `bidxs`
1263
+ tmp_slice = slice(start_row, start_row + num_rows)
1249
1264
 
1250
- # Copy `tmp_arr` into correct elements of `tmp_arr`
1251
- data[tidxs[tmp_slice], bidxs[tmp_slice]] = tmp_arr
1252
- start_row += num_rows
1265
+ # Copy `tmp_arr` into correct elements of `tmp_arr`
1266
+ data[tidxs[tmp_slice], bidxs[tmp_slice]] = tmp_arr
1267
+ start_row += num_rows
1268
+ else:
1269
+ data[tidxs, bidxs] = tb_tool.getcol(col)
1253
1270
 
1254
1271
  return data
@@ -17,23 +17,23 @@ from .read import (
17
17
  )
18
18
 
19
19
  from .table_query import open_table_ro, open_query
20
- from xradio._utils.array import (
20
+ from xradio._utils.list_and_array import (
21
21
  unique_1d,
22
22
  pairing_function,
23
23
  inverse_pairing_function,
24
24
  )
25
25
 
26
26
  rename_msv2_cols = {
27
- "antenna1": "antenna1_id",
28
- "antenna2": "antenna2_id",
29
- "feed1": "feed1_id",
30
- "feed2": "feed2_id",
27
+ "ANTENNA1": "antenna1_id",
28
+ "ANTENNA2": "antenna2_id",
29
+ "FEED1": "feed1_id",
30
+ "FEED2": "feed2_id",
31
31
  # optional cols:
32
- "weight_spectrum": "weight",
33
- "corrected_data": "vis_corrected",
34
- "data": "vis",
35
- "model_data": "vis_model",
36
- "float_data": "autocorr",
32
+ "WEIGHT_SPECTRUM": "WEIGHT",
33
+ "CORRECTED_DATA": "VIS_CORRECTED",
34
+ "DATA": "VIS",
35
+ "MODEL_DATA": "VIS_MODEL",
36
+ "FLOAT_DATA": "AUTOCORR",
37
37
  }
38
38
 
39
39
 
@@ -83,11 +83,11 @@ def redim_id_data_vars(mvars: Dict[str, xr.DataArray]) -> Dict[str, xr.DataArray
83
83
  """
84
84
  # Vars to drop baseline dim
85
85
  var_names = [
86
- "array_id",
87
- "observation_id",
88
- "processor_id",
89
- "scan_number",
90
- "state_id",
86
+ "ARRAY_ID",
87
+ "OBSERVATION_ID",
88
+ "PROCESSOR_ID",
89
+ "SCAN_NUMBER",
90
+ "STATE_ID",
91
91
  ]
92
92
  for vname in var_names:
93
93
  if "baseline" in mvars[vname].coords:
@@ -566,7 +566,7 @@ def concat_tvars_to_mvars(
566
566
 
567
567
  mvars = {}
568
568
  for tvr in tvars.keys():
569
- data_var = tvr.lower()
569
+ data_var = tvr
570
570
  if tvr == "UVW":
571
571
  mvars[data_var] = xr.DataArray(
572
572
  dask.array.concatenate(tvars[tvr], axis=0),
@@ -742,8 +742,7 @@ def read_flat_main_table(
742
742
  # now concat all the dask chunks from each time to make the xds
743
743
  mvars = {}
744
744
  for kk in bvars.keys():
745
- # from uppercase MS col names to lowercase xds var names:
746
- data_var = kk.lower()
745
+ data_var = kk
747
746
  if len(bvars[kk]) == 0:
748
747
  ignore += [kk]
749
748
  continue
@@ -766,7 +765,7 @@ def read_flat_main_table(
766
765
  )
767
766
 
768
767
  mvars["time"] = xr.DataArray(
769
- convert_casacore_time(mvars["time"].values), dims=["row"]
768
+ convert_casacore_time(mvars["TIME"].values), dims=["row"]
770
769
  ).chunk({"row": chunks[0]})
771
770
 
772
771
  # add xds global attributes