xradio 0.0.55__py3-none-any.whl → 0.0.58__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. xradio/__init__.py +2 -2
  2. xradio/_utils/_casacore/casacore_from_casatools.py +1001 -0
  3. xradio/_utils/_casacore/tables.py +6 -1
  4. xradio/_utils/coord_math.py +22 -23
  5. xradio/_utils/dict_helpers.py +76 -11
  6. xradio/_utils/schema.py +5 -2
  7. xradio/_utils/zarr/common.py +1 -73
  8. xradio/image/_util/_casacore/common.py +11 -3
  9. xradio/image/_util/_casacore/xds_from_casacore.py +59 -35
  10. xradio/image/_util/_casacore/xds_to_casacore.py +47 -16
  11. xradio/image/_util/_fits/xds_from_fits.py +172 -77
  12. xradio/image/_util/casacore.py +9 -4
  13. xradio/image/_util/common.py +4 -4
  14. xradio/image/_util/image_factory.py +8 -8
  15. xradio/image/image.py +45 -5
  16. xradio/measurement_set/__init__.py +19 -9
  17. xradio/measurement_set/_utils/__init__.py +1 -3
  18. xradio/measurement_set/_utils/_msv2/__init__.py +0 -0
  19. xradio/measurement_set/_utils/_msv2/_tables/read.py +35 -90
  20. xradio/measurement_set/_utils/_msv2/_tables/read_main_table.py +6 -686
  21. xradio/measurement_set/_utils/_msv2/_tables/table_query.py +13 -3
  22. xradio/measurement_set/_utils/_msv2/conversion.py +129 -145
  23. xradio/measurement_set/_utils/_msv2/create_antenna_xds.py +9 -16
  24. xradio/measurement_set/_utils/_msv2/create_field_and_source_xds.py +125 -221
  25. xradio/measurement_set/_utils/_msv2/msv2_to_msv4_meta.py +1 -2
  26. xradio/measurement_set/_utils/_msv2/msv4_info_dicts.py +13 -8
  27. xradio/measurement_set/_utils/_msv2/msv4_sub_xdss.py +27 -72
  28. xradio/measurement_set/_utils/_msv2/partition_queries.py +5 -262
  29. xradio/measurement_set/_utils/_msv2/subtables.py +0 -107
  30. xradio/measurement_set/_utils/_utils/interpolate.py +60 -0
  31. xradio/measurement_set/_utils/_zarr/encoding.py +2 -7
  32. xradio/measurement_set/convert_msv2_to_processing_set.py +0 -2
  33. xradio/measurement_set/load_processing_set.py +2 -2
  34. xradio/measurement_set/measurement_set_xdt.py +14 -14
  35. xradio/measurement_set/open_processing_set.py +1 -3
  36. xradio/measurement_set/processing_set_xdt.py +41 -835
  37. xradio/measurement_set/schema.py +96 -123
  38. xradio/schema/check.py +91 -97
  39. xradio/schema/dataclass.py +159 -22
  40. xradio/schema/export.py +99 -0
  41. xradio/schema/metamodel.py +51 -16
  42. xradio/schema/typing.py +5 -5
  43. {xradio-0.0.55.dist-info → xradio-0.0.58.dist-info}/METADATA +43 -11
  44. xradio-0.0.58.dist-info/RECORD +65 -0
  45. {xradio-0.0.55.dist-info → xradio-0.0.58.dist-info}/WHEEL +1 -1
  46. xradio/image/_util/fits.py +0 -13
  47. xradio/measurement_set/_utils/_msv2/_tables/load.py +0 -63
  48. xradio/measurement_set/_utils/_msv2/_tables/load_main_table.py +0 -487
  49. xradio/measurement_set/_utils/_msv2/_tables/read_subtables.py +0 -395
  50. xradio/measurement_set/_utils/_msv2/_tables/write.py +0 -320
  51. xradio/measurement_set/_utils/_msv2/_tables/write_exp_api.py +0 -385
  52. xradio/measurement_set/_utils/_msv2/chunks.py +0 -115
  53. xradio/measurement_set/_utils/_msv2/descr.py +0 -165
  54. xradio/measurement_set/_utils/_msv2/msv2_msv3.py +0 -7
  55. xradio/measurement_set/_utils/_msv2/partitions.py +0 -392
  56. xradio/measurement_set/_utils/_utils/cds.py +0 -40
  57. xradio/measurement_set/_utils/_utils/xds_helper.py +0 -404
  58. xradio/measurement_set/_utils/_zarr/read.py +0 -263
  59. xradio/measurement_set/_utils/_zarr/write.py +0 -329
  60. xradio/measurement_set/_utils/msv2.py +0 -106
  61. xradio/measurement_set/_utils/zarr.py +0 -133
  62. xradio-0.0.55.dist-info/RECORD +0 -77
  63. {xradio-0.0.55.dist-info → xradio-0.0.58.dist-info}/licenses/LICENSE.txt +0 -0
  64. {xradio-0.0.55.dist-info → xradio-0.0.58.dist-info}/top_level.txt +0 -0
@@ -8,12 +8,19 @@ import xarray as xr
8
8
  from numpy.typing import ArrayLike
9
9
 
10
10
  from xradio._utils.coord_math import convert_to_si_units
11
+ from xradio._utils.dict_helpers import (
12
+ make_time_measure_attrs,
13
+ make_spectral_coord_measure_attrs,
14
+ )
11
15
  from xradio._utils.schema import (
12
16
  column_description_casacore_to_msv4_measure,
13
17
  convert_generic_xds_to_xradio_schema,
14
18
  )
15
- from .subtables import subt_rename_ids
16
- from ._tables.read import (
19
+ from xradio.measurement_set._utils._utils.interpolate import (
20
+ interpolate_to_time,
21
+ )
22
+ from xradio.measurement_set._utils._msv2.subtables import subt_rename_ids
23
+ from xradio.measurement_set._utils._msv2._tables.read import (
17
24
  load_generic_table,
18
25
  make_taql_where_between_min_max,
19
26
  table_exists,
@@ -21,12 +28,7 @@ from ._tables.read import (
21
28
  )
22
29
 
23
30
 
24
- standard_time_coord_attrs = {
25
- "type": "time",
26
- "units": ["s"],
27
- "scale": "utc",
28
- "format": "unix",
29
- }
31
+ standard_time_coord_attrs = make_time_measure_attrs(time_format="unix")
30
32
 
31
33
 
32
34
  def rename_and_interpolate_to_time(
@@ -89,57 +91,6 @@ def rename_and_interpolate_to_time(
89
91
  return renamed_time_xds
90
92
 
91
93
 
92
- def interpolate_to_time(
93
- xds: xr.Dataset,
94
- interp_time: Union[xr.DataArray, None],
95
- message_prefix: str,
96
- time_name: str = "time",
97
- ) -> xr.Dataset:
98
- """
99
- Interpolate the time coordinate of the input xarray dataset to the
100
- a data array. This can be used for example to interpolate a pointing_xds
101
- to the time coord of the (main) MSv4, or similarly the ephemeris
102
- data variables of a field_and_source_xds.
103
-
104
- Uses interpolation method "linear", unless the source number of points is
105
- 1 in which case "nearest" is used, to avoid divide-by-zero issues.
106
-
107
- Parameters:
108
- ----------
109
- xds : xr.Dataset
110
- Xarray dataset to interpolate (presumably a pointing_xds or an xds of
111
- ephemeris variables)
112
- interp_time : Union[xr.DataArray, None]
113
- Time axis to interpolate the dataset to (usually main MSv4 time)
114
- message_prefix: str
115
- A prefix for info/debug/etc. messages
116
-
117
- Returns:
118
- -------
119
- interpolated_xds : xr.Dataset
120
- xarray dataset with time axis interpolated to interp_time.
121
- """
122
- if interp_time is not None:
123
- points_before = xds[time_name].size
124
- if points_before > 1:
125
- method = "linear"
126
- else:
127
- method = "nearest"
128
- xds = xds.interp(
129
- {time_name: interp_time.data}, method=method, assume_sorted=True
130
- )
131
- # scan_name sneaks in as a coordinate of the main time axis, drop it
132
- if "scan_name" in xds.coords:
133
- xds = xds.drop_vars("scan_name")
134
- points_after = xds[time_name].size
135
- logger.debug(
136
- f"{message_prefix}: interpolating the time coordinate "
137
- f"from {points_before} to {points_after} points"
138
- )
139
-
140
- return xds
141
-
142
-
143
94
  def make_taql_where_weather(
144
95
  in_file: str, ant_xds_station_name_ids: xr.DataArray
145
96
  ) -> str:
@@ -233,6 +184,7 @@ def prepare_generic_weather_xds_and_station_name(
233
184
  generic_weather_xds = load_generic_table(
234
185
  in_file,
235
186
  "WEATHER",
187
+ timecols=["TIME"],
236
188
  rename_ids=subt_rename_ids["WEATHER"],
237
189
  taql_where=taql_where,
238
190
  )
@@ -282,7 +234,7 @@ def finalize_station_position(
282
234
  # borrow location frame attributes from antenna position
283
235
  weather_xds["STATION_POSITION"].attrs = ant_position_with_ids.attrs
284
236
  else:
285
- # borrow from ant_posision_with_ids but without carrying over other coords
237
+ # borrow from ant_position_with_ids but without carrying over other coords
286
238
  weather_xds = weather_xds.assign(
287
239
  {
288
240
  "STATION_POSITION": (
@@ -318,6 +270,7 @@ def create_weather_xds(in_file: str, ant_position_with_ids: xr.DataArray):
318
270
  generic_weather_xds = load_generic_table(
319
271
  in_file,
320
272
  "WEATHER",
273
+ timecols=["TIME"],
321
274
  rename_ids=subt_rename_ids["WEATHER"],
322
275
  )
323
276
  except ValueError as _exc:
@@ -342,7 +295,7 @@ def create_weather_xds(in_file: str, ant_position_with_ids: xr.DataArray):
342
295
  dims_station_time = ["station_name", "time_weather"]
343
296
  dims_station_time_position = dims_station_time + ["cartesian_pos_label"]
344
297
  to_new_data_variables = {
345
- "H20": ["H2O", dims_station_time],
298
+ "H2O": ["H2O", dims_station_time],
346
299
  "IONOS_ELECTRON": ["IONOS_ELECTRON", dims_station_time],
347
300
  "PRESSURE": ["PRESSURE", dims_station_time],
348
301
  "REL_HUMIDITY": ["REL_HUMIDITY", dims_station_time],
@@ -381,6 +334,8 @@ def create_weather_xds(in_file: str, ant_position_with_ids: xr.DataArray):
381
334
  for data_var in weather_xds:
382
335
  if weather_xds.data_vars[data_var].dtype != np.float64:
383
336
  weather_xds[data_var] = weather_xds[data_var].astype(np.float64)
337
+ if "time_weather" in weather_xds.coords:
338
+ weather_xds.coords["time_weather"].attrs["type"] = "time_weather"
384
339
 
385
340
  return weather_xds
386
341
 
@@ -437,10 +392,10 @@ def correct_generic_pointing_xds(
437
392
  and generic_pointing_xds.sizes["dir"] == 0
438
393
  ):
439
394
  # When some direction variables are "empty" but some are populated properly
440
- if "dim_2" in generic_pointing_xds[key].sizes:
395
+ if "dim_2" in generic_pointing_xds[data_var_name].sizes:
441
396
  data_var_data = xr.DataArray(
442
- generic_pointing_xds[key].values,
443
- dims=generic_pointing_xds[key].dims,
397
+ generic_pointing_xds[data_var_name].values,
398
+ dims=generic_pointing_xds[data_var_name].dims,
444
399
  )
445
400
  else:
446
401
  shape = tuple(
@@ -449,7 +404,7 @@ def correct_generic_pointing_xds(
449
404
  ) + (2,)
450
405
  data_var_data = xr.DataArray(
451
406
  np.full(shape, np.nan),
452
- dims=generic_pointing_xds[key].dims,
407
+ dims=generic_pointing_xds[data_var_name].dims,
453
408
  )
454
409
  correct_pointing_xds[data_var_name].data = data_var_data
455
410
 
@@ -501,6 +456,7 @@ def create_pointing_xds(
501
456
  generic_pointing_xds = load_generic_table(
502
457
  in_file,
503
458
  "POINTING",
459
+ timecols=["TIME"],
504
460
  rename_ids=subt_rename_ids["POINTING"],
505
461
  taql_where=taql_where,
506
462
  )
@@ -608,7 +564,7 @@ def prepare_generic_sys_cal_xds(generic_sys_cal_xds: xr.Dataset) -> xr.Dataset:
608
564
  )
609
565
  elif (
610
566
  "frequency" in generic_sys_cal_xds.sizes
611
- and not "dim_3" in generic_sys_cal_xds.sizes
567
+ and "dim_3" not in generic_sys_cal_xds.sizes
612
568
  ):
613
569
  # because order is (...,frequency,receptor), when frequency is missing
614
570
  # receptor can get wrongly labeled as frequency
@@ -665,6 +621,7 @@ def create_system_calibration_xds(
665
621
  generic_sys_cal_xds = load_generic_table(
666
622
  in_file,
667
623
  "SYSCAL",
624
+ timecols=["TIME"],
668
625
  rename_ids=subt_rename_ids["SYSCAL"],
669
626
  taql_where=(
670
627
  f" where (SPECTRAL_WINDOW_ID = {spectral_window_id})"
@@ -725,11 +682,9 @@ def create_system_calibration_xds(
725
682
  "frequency_system_cal": generic_sys_cal_xds.coords["frequency"].data
726
683
  }
727
684
  sys_cal_xds = sys_cal_xds.assign_coords(frequency_coord)
728
- frequency_measure = {
729
- "type": main_xds_frequency.attrs["type"],
730
- "units": main_xds_frequency.attrs["units"],
731
- "observer": main_xds_frequency.attrs["observer"],
732
- }
685
+ frequency_measure = make_spectral_coord_measure_attrs(
686
+ main_xds_frequency.attrs["units"], main_xds_frequency.attrs["observer"]
687
+ )
733
688
  sys_cal_xds.coords["frequency_system_cal"].attrs.update(frequency_measure)
734
689
 
735
690
  sys_cal_xds = rename_and_interpolate_to_time(
@@ -828,7 +783,7 @@ def create_phased_array_xds(
828
783
  }
829
784
  data_vars["COORDINATE_AXES"].attrs = {
830
785
  "type": "rotation_matrix",
831
- "units": ["undimensioned", "undimensioned", "undimensioned"],
786
+ "units": "dimensionless",
832
787
  }
833
788
  # Remove the "frame" attribute if it exists, because ELEMENT_OFFSET is
834
789
  # defined in a station-local frame for which no standard name exists
@@ -1,14 +1,13 @@
1
1
  import itertools
2
2
  import toolviper.utils.logger as logger
3
- from pathlib import Path
4
- from typing import Dict, List, Tuple, Union
5
3
 
6
4
  import numpy as np
7
- import xarray as xr
8
5
 
9
- from casacore import tables
6
+ try:
7
+ from casacore import tables
8
+ except ImportError:
9
+ import xradio._utils._casacore.casacore_from_casatools as tables
10
10
 
11
- from ._tables.table_query import open_table_ro, open_query
12
11
  from ._tables.read import table_exists
13
12
 
14
13
 
@@ -28,7 +27,7 @@ def create_partitions(in_file: str, partition_scheme: list) -> list[dict]:
28
27
  partition_scheme: list
29
28
  A MS v4 can only contain a single data description (spectral window and polarization setup), and observation mode. Consequently, the MS v2 is partitioned when converting to MS v4.
30
29
  In addition to data description and polarization setup a finer partitioning is possible by specifying a list of partitioning keys. Any combination of the following keys are possible:
31
- "FIELD_ID", "SCAN_NUMBER", "STATE_ID", "SOURCE_ID", "SUB_SCAN_NUMBER".
30
+ "FIELD_ID", "SCAN_NUMBER", "STATE_ID", "SOURCE_ID", "SUB_SCAN_NUMBER", "ANTENNA1".
32
31
  For mosaics where the phase center is rapidly changing (such as VLA on the fly mosaics) partition_scheme should be set to an empty list []. By default, ["FIELD_ID"].
33
32
  Returns
34
33
  -------
@@ -149,259 +148,3 @@ def create_partitions(in_file: str, partition_scheme: list) -> list[dict]:
149
148
  partitions.append(partition_info)
150
149
 
151
150
  return partitions
152
-
153
-
154
- # Used by code that will be deprecated at some stage. See #192
155
- # Still need to clarify what to do about intent string filtering ('WVR', etc.)
156
-
157
-
158
- def make_partition_ids_by_ddi_intent(
159
- infile: str, spw_names: xr.DataArray
160
- ) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
161
- """
162
- Produces arrays of per-partition ddi, scan, state_id, for when
163
- using the partition scheme 'intents' (ddi, scan, subscans(state_ids))
164
-
165
- Parameters
166
- ----------
167
- infile : str
168
- return: arrays with indices that define every partition
169
- spw_names: xr.DataArray
170
-
171
-
172
- Returns
173
- -------
174
- Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]
175
- arrays with indices that define every partition
176
- """
177
- # TODO: could explore other TAQL alternatives, like
178
- # select ... from ::STATE where OBS_MODE = ...
179
- #
180
- # This will work only if intents are already alphabetically sorted (grouped),
181
- # won't work for alternating intents:
182
- # taql_intents = "select rowid() as ROWS from $state_tbl GROUPBY OBS_MODE "
183
-
184
- with open_table_ro(str(Path(infile, "STATE"))) as state_tbl:
185
- distinct_obs_mode = find_distinct_obs_mode(infile, state_tbl)
186
-
187
- if distinct_obs_mode is None:
188
- return partition_when_empty_state(infile)
189
-
190
- with open_table_ro(infile) as main_tbl:
191
- (
192
- data_desc_id,
193
- state_id_partitions,
194
- intent_names,
195
- ) = make_ddi_state_intent_lists(
196
- main_tbl, state_tbl, distinct_obs_mode, spw_names
197
- )
198
-
199
- # Take whatever scans given by the STATE_IDs and DDIs
200
- scan_number = [None] * len(state_id_partitions)
201
-
202
- return data_desc_id, scan_number, state_id_partitions, intent_names
203
-
204
-
205
- def find_distinct_obs_mode(
206
- infile: str, state_table: tables.table
207
- ) -> Union[List[str], None]:
208
- """
209
- Produce a list of unique "scan/subscan" intents.
210
-
211
- Parameters
212
- ----------
213
- infile : str
214
- Path to the MS
215
- state_table : tables.table
216
- casacore table object to read from
217
-
218
- Returns
219
- -------
220
- Union[List[str], None]
221
- List of unique "scan/subscan" intents as given in the
222
- OBS_MODE column of the STATE subtable. None if the STATE subtable
223
- is empty or there is a problem reading it
224
- """
225
- taql_distinct_intents = "select DISTINCT OBS_MODE from $state_table"
226
- with open_query(state_table, taql_distinct_intents) as query_intents:
227
- if query_intents.nrows() == 0:
228
- logger.warning(
229
- "STATE subtable has no data. Cannot partition by scan/subscan intent"
230
- )
231
- return None
232
-
233
- distinct_obs_mode = query_intents.getcol("OBS_MODE")
234
- logger.debug(
235
- f" Query for distinct OBS_MODE len: {len(distinct_obs_mode)}, values: {distinct_obs_mode}"
236
- )
237
- return distinct_obs_mode
238
-
239
-
240
- def make_ddi_state_intent_lists(
241
- main_tbl: tables.table,
242
- state_tbl: tables.table,
243
- distinct_obs_mode: np.ndarray,
244
- spw_name_by_ddi: Dict[int, str],
245
- ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
246
- """
247
- Produce arrays of (ddi indices, state indices, intent string)
248
- for every distinct intent string, where every item represents one
249
- partition of the main table
250
-
251
- As the obs_mode strings have concatenated intent strings from all
252
- the scan and subscan intents, this function has started
253
- implementing some simple heuristics to remove the intent items
254
- that are not related to the respective DDIs (for example WVR
255
- intent is the only kept when the DDI/SPW has WVR in its name). See
256
- call to filter_intents_per_ddi()
257
-
258
- Parameters
259
- ----------
260
- main_tbl : tables.table
261
- main MS table openend as a casacore.tables.table
262
- state_tbl : tables.table
263
- STATE subtable openend as a casacore.tables.table
264
- distinct_obs_mode : np.ndarray
265
- list of unique/distinct OBS_MODE strings from the STATE table
266
- spw_name_by_ddi: Dict[int, str]
267
-
268
-
269
- Returns
270
- -------
271
- Tuple[np.ndarray, np.ndarray, np.ndarray]
272
- arrays of (ddi indices, state indices, intent string)
273
- """
274
- data_desc_id, state_id_partitions, intent_names = [], [], []
275
- for intent in distinct_obs_mode:
276
- where_intent = f"where OBS_MODE = '{intent}'"
277
- taql_states = f"select ROWID() as ROWS from $state_tbl {where_intent}"
278
- with open_query(state_tbl, taql_states) as query_states:
279
- state_ids_for_intent = query_states.getcol("ROWS")
280
-
281
- state_ids = " OR STATE_ID = ".join(np.char.mod("%d", state_ids_for_intent))
282
- taql_partition = (
283
- f"select DISTINCT DATA_DESC_ID from $main_tbl where STATE_ID = {state_ids}"
284
- )
285
- with open_query(main_tbl, taql_partition) as query_ddi_intent:
286
- # No data for these STATE_IDs
287
- if query_ddi_intent.nrows() == 0:
288
- continue
289
-
290
- # Will implicitly take whatever scans given the STATE_IDs
291
- # and DDIs scan_number. Not needed:
292
- # scan_number = query_ddi_intent.getcol("SCAN_NUMBER")
293
- ddis = query_ddi_intent.getcol("DATA_DESC_ID")
294
-
295
- data_desc_id.extend(ddis)
296
- state_id_partitions.extend([state_ids_for_intent] * len(ddis))
297
-
298
- # Try to select/exclude confusing or mixed intent names such as 'WVR#*'
299
- intents_ddi = filter_intents_per_ddi(ddis, "WVR", intent, spw_name_by_ddi)
300
- intent_names.extend(intents_ddi)
301
-
302
- logger.debug(
303
- f"Produced data_desc_id: {data_desc_id},\n state_id_partitions: {state_id_partitions}"
304
- )
305
- return data_desc_id, state_id_partitions, intent_names
306
-
307
-
308
- def filter_intents_per_ddi(
309
- ddis: List[int], substr: str, intents: str, spw_name_by_ddi: Dict[int, str]
310
- ) -> List[str]:
311
- """
312
- For a given pair of:
313
- - substring (say 'WVR') associated with a type of intent we want to differentiate
314
- - intents string (multiple comma-separated scan/subscan intents)
315
- => do: for every DDI passed in the list of ddis, either keep only the
316
- intents that have that substring (if there are any) or drop them, depending on
317
- whether that substring is present in the SPW name. This is to filter in only
318
- the intents that really apply to every DDI/SPW.
319
-
320
- Parameters
321
- ----------
322
- ddis : List[int]
323
- list of ddis for which the intents have to be filtered
324
- substr : str
325
- substring to filter by
326
- intents : str
327
- string with a comma-separated list of individual
328
- scan/subscan intent strings (like scan/subscan intents as stored
329
- in the MS STATE/OBS_MODE
330
- spw_name_by_ddi : Dict[int, str]
331
- SPW names by DDI ID (row index) key
332
-
333
- Returns
334
- -------
335
- List[str]
336
- list where the intents related to 'substr' have been filtered in our out
337
- """
338
- present = substr in intents
339
- # Nothing to effectively filter, full cs-list of intents apply to all DDIs
340
- if not present:
341
- return [intents] * len(ddis)
342
-
343
- every_intent = intents.split(",")
344
- filtered_intents = []
345
- for ddi in ddis:
346
- spw_name = spw_name_by_ddi.get(ddi, "")
347
-
348
- if not spw_name:
349
- # we cannot say / cannot filter
350
- filtered_intents.append(intents)
351
- continue
352
-
353
- # A not-xor to select/deselect (or keep-only/drop) the intents that apply
354
- # to this DDI
355
- ddi_intents = [
356
- intnt for intnt in every_intent if (substr in intnt) == (substr in spw_name)
357
- ]
358
- ddi_intents = ",".join(ddi_intents)
359
- filtered_intents.append(ddi_intents)
360
-
361
- return filtered_intents
362
-
363
-
364
- def partition_when_empty_state(
365
- infile: str,
366
- ) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
367
- """
368
- Generate fallback partition ids when trying to partition by
369
- 'intent' but the STATE table is empty.
370
-
371
- Some MSs have no STATE rows and in the main table STATE_ID==-1
372
- (that is not a valid MSv2 but it happens).
373
-
374
- Parameters
375
- ----------
376
- infile : str
377
- Path to the MS
378
-
379
- Returns
380
- -------
381
- Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]
382
- same as make_partition_ids_by_ddi_intent but with
383
- effectively only ddi indices and other indices set to None ("any
384
- IDs found")
385
- """
386
- try:
387
- main_table = None
388
-
389
- main_table = tables.table(
390
- infile, readonly=True, lockoptions={"option": "usernoread"}, ack=False
391
- )
392
- taql_ddis = "select DISTINCT DATA_DESC_ID from $main_table"
393
- with open_query(main_table, taql_ddis) as query_per_intent:
394
- # Will take whatever scans given the STATE_IDs and DDIs
395
- # scan_number = query_per_intent.getcol("SCAN_NUMBER")
396
- distinct_ddis = query_per_intent.getcol("DATA_DESC_ID")
397
-
398
- logger.debug(
399
- f"Producing {len(distinct_ddis)} partitions for ddis: {distinct_ddis}"
400
- )
401
- nparts = len(distinct_ddis)
402
-
403
- finally:
404
- if main_table:
405
- main_table.close()
406
-
407
- return distinct_ddis, [None] * nparts, [None] * nparts, [""] * nparts
@@ -1,16 +1,3 @@
1
- import os
2
-
3
- import toolviper.utils.logger as logger
4
-
5
- from pathlib import Path
6
- from typing import Dict, List
7
-
8
- import xarray as xr
9
-
10
- from ._tables.read import load_generic_table, table_exists
11
- from ._tables.read_subtables import read_ephemerides, read_delayed_pointing_table
12
-
13
-
14
1
  subt_rename_ids = {
15
2
  "ANTENNA": {"row": "antenna_id", "dim_1": "xyz"},
16
3
  "FEED": {"dim_1": "xyz", "dim_2": "receptor", "dim_3": "receptor2"},
@@ -27,97 +14,3 @@ subt_rename_ids = {
27
14
  # Would make sense for non-std "WS_NX_STATION_POSITION"
28
15
  "WEATHER": {"dim_1": "xyz"},
29
16
  }
30
-
31
-
32
- def read_ms_subtables(
33
- infile: str, done_subt: List[str], asdm_subtables: bool = False
34
- ) -> Dict[str, xr.Dataset]:
35
- """
36
- Read MSv2 subtables (main table keywords) as xr.Dataset
37
-
38
- Parameters
39
- ----------
40
- infile : str
41
- input MeasurementSet path
42
- done_subt : List[str]
43
- Subtables that were already read, to skip them
44
- asdm_subtables : bool (Default value = False)
45
- Whether to also read ASDM_* subtables
46
-
47
- Returns
48
- -------
49
- Dict[str, xr.Dataset]
50
- dict of xarray datasets read from subtables (metadata tables)
51
-
52
- """
53
- ignore_msv2_cols_subt = ["FLAG_CMD", "FLAG_ROW", "BEAM_ID"]
54
- skip_tables = ["SORTED_TABLE", "FLAG_CMD"] + done_subt
55
- stbl_list = sorted(
56
- [
57
- tname
58
- for tname in os.listdir(infile)
59
- if (tname not in skip_tables)
60
- and (os.path.isdir(os.path.join(infile, tname)))
61
- and (table_exists(os.path.join(infile, tname)))
62
- ]
63
- )
64
-
65
- subtables = {}
66
- for _ii, subt_name in enumerate(stbl_list):
67
- if not asdm_subtables and subt_name.startswith("ASDM_"):
68
- logger.debug(f"skipping ASDM_ subtable {subt_name}...")
69
- continue
70
- else:
71
- logger.debug(f"reading subtable {subt_name}...")
72
-
73
- if subt_name == "POINTING":
74
- subt_path = Path(infile, subt_name)
75
- rename_ids = {"dim_2": "n_polynomial", "dim_3": "dir"}
76
- xds = read_delayed_pointing_table(str(subt_path), rename_ids=rename_ids)
77
- else:
78
- xds = load_generic_table(
79
- infile,
80
- subt_name,
81
- timecols=["TIME"],
82
- ignore=ignore_msv2_cols_subt,
83
- rename_ids=subt_rename_ids.get(subt_name, None),
84
- )
85
-
86
- if len(xds.sizes) != 0:
87
- subtables[subt_name.lower()] = xds
88
-
89
- if "field" in subtables:
90
- ephem_xds = read_ephemerides(infile)
91
- if ephem_xds:
92
- subtables["ephemerides"] = ephem_xds
93
-
94
- return subtables
95
-
96
-
97
- def add_pointing_to_partition(
98
- xds_part: xr.Dataset, xds_pointing: xr.Dataset
99
- ) -> xr.Dataset:
100
- """
101
- Take pointing variables from a (delayed) pointing dataset and
102
- transfer them to a main table partition dataset (interpolating into
103
- the destination time axis)
104
-
105
- Parameters
106
- ----------
107
- xds_part : xr.Dataset
108
- a partition/sub-xds of the main table
109
- xds_pointing : xr.Dataset
110
- the xds read from the pointing subtable
111
-
112
- Returns
113
- -------
114
- xr.Dataset
115
- partition xds with pointing variables added/interpolated from the
116
- pointing_xds into its time axis
117
-
118
- """
119
- interp_xds = xds_pointing.interp(time=xds_part.time, method="nearest")
120
- for var in interp_xds.data_vars:
121
- xds_part[f"pointing_{var}"] = interp_xds[var]
122
-
123
- return xds_part
@@ -0,0 +1,60 @@
1
+ from typing import Union
2
+
3
+ import xarray as xr
4
+
5
+ import toolviper.utils.logger as logger
6
+
7
+
8
+ def interpolate_to_time(
9
+ xds: xr.Dataset,
10
+ interp_time: Union[xr.DataArray, None],
11
+ message_prefix: str,
12
+ time_name: str = "time",
13
+ ) -> xr.Dataset:
14
+ """
15
+ Interpolate the time coordinate of the input xarray dataset to the
16
+ a data array. This can be used for example to interpolate a pointing_xds
17
+ to the time coord of the (main) MSv4, or similarly the ephemeris
18
+ data variables of a field_and_source_xds.
19
+
20
+ Uses interpolation method "linear", unless the source number of points is
21
+ 1 in which case "nearest" is used, to avoid divide-by-zero issues.
22
+
23
+ Parameters:
24
+ ----------
25
+ xds : xr.Dataset
26
+ Xarray dataset to interpolate (presumably a pointing_xds or an xds of
27
+ ephemeris variables)
28
+ interp_time : Union[xr.DataArray, None]
29
+ Time axis to interpolate the dataset to (usually main MSv4 time)
30
+ message_prefix: str
31
+ A prefix for info/debug/etc. messages
32
+
33
+ Returns:
34
+ -------
35
+ interpolated_xds : xr.Dataset
36
+ xarray dataset with time axis interpolated to interp_time.
37
+ """
38
+ if interp_time is not None:
39
+ points_before = xds[time_name].size
40
+ if points_before > 1:
41
+ method = "linear"
42
+ else:
43
+ method = "nearest"
44
+ xds = xds.interp(
45
+ {time_name: interp_time.data}, method=method, assume_sorted=True
46
+ )
47
+ # scan_name sneaks in as a coordinate of the main time axis, drop it
48
+ if (
49
+ "type" in xds.attrs
50
+ and xds.attrs["type"] not in ["visibility", "spectrum", "wvr"]
51
+ and "scan_name" in xds.coords
52
+ ):
53
+ xds = xds.drop_vars("scan_name")
54
+ points_after = xds[time_name].size
55
+ logger.debug(
56
+ f"{message_prefix}: interpolating the time coordinate "
57
+ f"from {points_before} to {points_after} points"
58
+ )
59
+
60
+ return xds
@@ -4,11 +4,6 @@ def add_encoding(xds, compressor, chunks=None):
4
4
 
5
5
  chunks = {**dict(xds.sizes), **chunks} # Add missing sizes if presents.
6
6
 
7
- encoding = {}
8
7
  for da_name in list(xds.data_vars):
9
- if chunks:
10
- da_chunks = [chunks[dim_name] for dim_name in xds[da_name].sizes]
11
- xds[da_name].encoding = {"compressor": compressor, "chunks": da_chunks}
12
- # print(xds[da_name].encoding)
13
- else:
14
- xds[da_name].encoding = {"compressor": compressor}
8
+ da_chunks = [chunks[dim_name] for dim_name in xds[da_name].sizes]
9
+ xds[da_name].encoding = {"compressor": compressor, "chunks": da_chunks}
@@ -116,8 +116,6 @@ def convert_msv2_to_processing_set(
116
116
  if not str(out_file).endswith("ps.zarr"):
117
117
  out_file += ".ps.zarr"
118
118
 
119
- print("Output file: ", out_file)
120
-
121
119
  if overwrite:
122
120
  ps_dt.to_zarr(store=out_file, mode="w")
123
121
  else: