xradio 0.0.56__py3-none-any.whl → 0.0.58__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. xradio/__init__.py +2 -2
  2. xradio/_utils/_casacore/casacore_from_casatools.py +12 -2
  3. xradio/_utils/_casacore/tables.py +1 -0
  4. xradio/_utils/coord_math.py +22 -23
  5. xradio/_utils/dict_helpers.py +76 -11
  6. xradio/_utils/schema.py +5 -2
  7. xradio/_utils/zarr/common.py +1 -73
  8. xradio/image/_util/_casacore/xds_from_casacore.py +49 -33
  9. xradio/image/_util/_casacore/xds_to_casacore.py +41 -14
  10. xradio/image/_util/_fits/xds_from_fits.py +146 -35
  11. xradio/image/_util/casacore.py +4 -3
  12. xradio/image/_util/common.py +4 -4
  13. xradio/image/_util/image_factory.py +8 -8
  14. xradio/image/image.py +45 -5
  15. xradio/measurement_set/__init__.py +19 -9
  16. xradio/measurement_set/_utils/__init__.py +1 -3
  17. xradio/measurement_set/_utils/_msv2/__init__.py +0 -0
  18. xradio/measurement_set/_utils/_msv2/_tables/read.py +17 -76
  19. xradio/measurement_set/_utils/_msv2/_tables/read_main_table.py +2 -685
  20. xradio/measurement_set/_utils/_msv2/conversion.py +123 -145
  21. xradio/measurement_set/_utils/_msv2/create_antenna_xds.py +9 -16
  22. xradio/measurement_set/_utils/_msv2/create_field_and_source_xds.py +125 -221
  23. xradio/measurement_set/_utils/_msv2/msv2_to_msv4_meta.py +1 -2
  24. xradio/measurement_set/_utils/_msv2/msv4_info_dicts.py +8 -7
  25. xradio/measurement_set/_utils/_msv2/msv4_sub_xdss.py +27 -72
  26. xradio/measurement_set/_utils/_msv2/partition_queries.py +1 -261
  27. xradio/measurement_set/_utils/_msv2/subtables.py +0 -107
  28. xradio/measurement_set/_utils/_utils/interpolate.py +60 -0
  29. xradio/measurement_set/_utils/_zarr/encoding.py +2 -7
  30. xradio/measurement_set/convert_msv2_to_processing_set.py +0 -2
  31. xradio/measurement_set/load_processing_set.py +2 -2
  32. xradio/measurement_set/measurement_set_xdt.py +14 -14
  33. xradio/measurement_set/open_processing_set.py +1 -3
  34. xradio/measurement_set/processing_set_xdt.py +41 -835
  35. xradio/measurement_set/schema.py +95 -122
  36. xradio/schema/check.py +91 -97
  37. xradio/schema/dataclass.py +159 -22
  38. xradio/schema/export.py +99 -0
  39. xradio/schema/metamodel.py +51 -16
  40. xradio/schema/typing.py +5 -5
  41. {xradio-0.0.56.dist-info → xradio-0.0.58.dist-info}/METADATA +2 -1
  42. xradio-0.0.58.dist-info/RECORD +65 -0
  43. {xradio-0.0.56.dist-info → xradio-0.0.58.dist-info}/WHEEL +1 -1
  44. xradio/image/_util/fits.py +0 -13
  45. xradio/measurement_set/_utils/_msv2/_tables/load.py +0 -66
  46. xradio/measurement_set/_utils/_msv2/_tables/load_main_table.py +0 -490
  47. xradio/measurement_set/_utils/_msv2/_tables/read_subtables.py +0 -398
  48. xradio/measurement_set/_utils/_msv2/_tables/write.py +0 -323
  49. xradio/measurement_set/_utils/_msv2/_tables/write_exp_api.py +0 -388
  50. xradio/measurement_set/_utils/_msv2/chunks.py +0 -115
  51. xradio/measurement_set/_utils/_msv2/descr.py +0 -165
  52. xradio/measurement_set/_utils/_msv2/msv2_msv3.py +0 -7
  53. xradio/measurement_set/_utils/_msv2/partitions.py +0 -392
  54. xradio/measurement_set/_utils/_utils/cds.py +0 -40
  55. xradio/measurement_set/_utils/_utils/xds_helper.py +0 -404
  56. xradio/measurement_set/_utils/_zarr/read.py +0 -263
  57. xradio/measurement_set/_utils/_zarr/write.py +0 -329
  58. xradio/measurement_set/_utils/msv2.py +0 -106
  59. xradio/measurement_set/_utils/zarr.py +0 -133
  60. xradio-0.0.56.dist-info/RECORD +0 -78
  61. {xradio-0.0.56.dist-info → xradio-0.0.58.dist-info}/licenses/LICENSE.txt +0 -0
  62. {xradio-0.0.56.dist-info → xradio-0.0.58.dist-info}/top_level.txt +0 -0
@@ -8,12 +8,19 @@ import xarray as xr
8
8
  from numpy.typing import ArrayLike
9
9
 
10
10
  from xradio._utils.coord_math import convert_to_si_units
11
+ from xradio._utils.dict_helpers import (
12
+ make_time_measure_attrs,
13
+ make_spectral_coord_measure_attrs,
14
+ )
11
15
  from xradio._utils.schema import (
12
16
  column_description_casacore_to_msv4_measure,
13
17
  convert_generic_xds_to_xradio_schema,
14
18
  )
15
- from .subtables import subt_rename_ids
16
- from ._tables.read import (
19
+ from xradio.measurement_set._utils._utils.interpolate import (
20
+ interpolate_to_time,
21
+ )
22
+ from xradio.measurement_set._utils._msv2.subtables import subt_rename_ids
23
+ from xradio.measurement_set._utils._msv2._tables.read import (
17
24
  load_generic_table,
18
25
  make_taql_where_between_min_max,
19
26
  table_exists,
@@ -21,12 +28,7 @@ from ._tables.read import (
21
28
  )
22
29
 
23
30
 
24
- standard_time_coord_attrs = {
25
- "type": "time",
26
- "units": ["s"],
27
- "scale": "utc",
28
- "format": "unix",
29
- }
31
+ standard_time_coord_attrs = make_time_measure_attrs(time_format="unix")
30
32
 
31
33
 
32
34
  def rename_and_interpolate_to_time(
@@ -89,57 +91,6 @@ def rename_and_interpolate_to_time(
89
91
  return renamed_time_xds
90
92
 
91
93
 
92
- def interpolate_to_time(
93
- xds: xr.Dataset,
94
- interp_time: Union[xr.DataArray, None],
95
- message_prefix: str,
96
- time_name: str = "time",
97
- ) -> xr.Dataset:
98
- """
99
- Interpolate the time coordinate of the input xarray dataset to the
100
- a data array. This can be used for example to interpolate a pointing_xds
101
- to the time coord of the (main) MSv4, or similarly the ephemeris
102
- data variables of a field_and_source_xds.
103
-
104
- Uses interpolation method "linear", unless the source number of points is
105
- 1 in which case "nearest" is used, to avoid divide-by-zero issues.
106
-
107
- Parameters:
108
- ----------
109
- xds : xr.Dataset
110
- Xarray dataset to interpolate (presumably a pointing_xds or an xds of
111
- ephemeris variables)
112
- interp_time : Union[xr.DataArray, None]
113
- Time axis to interpolate the dataset to (usually main MSv4 time)
114
- message_prefix: str
115
- A prefix for info/debug/etc. messages
116
-
117
- Returns:
118
- -------
119
- interpolated_xds : xr.Dataset
120
- xarray dataset with time axis interpolated to interp_time.
121
- """
122
- if interp_time is not None:
123
- points_before = xds[time_name].size
124
- if points_before > 1:
125
- method = "linear"
126
- else:
127
- method = "nearest"
128
- xds = xds.interp(
129
- {time_name: interp_time.data}, method=method, assume_sorted=True
130
- )
131
- # scan_name sneaks in as a coordinate of the main time axis, drop it
132
- if "scan_name" in xds.coords:
133
- xds = xds.drop_vars("scan_name")
134
- points_after = xds[time_name].size
135
- logger.debug(
136
- f"{message_prefix}: interpolating the time coordinate "
137
- f"from {points_before} to {points_after} points"
138
- )
139
-
140
- return xds
141
-
142
-
143
94
  def make_taql_where_weather(
144
95
  in_file: str, ant_xds_station_name_ids: xr.DataArray
145
96
  ) -> str:
@@ -233,6 +184,7 @@ def prepare_generic_weather_xds_and_station_name(
233
184
  generic_weather_xds = load_generic_table(
234
185
  in_file,
235
186
  "WEATHER",
187
+ timecols=["TIME"],
236
188
  rename_ids=subt_rename_ids["WEATHER"],
237
189
  taql_where=taql_where,
238
190
  )
@@ -282,7 +234,7 @@ def finalize_station_position(
282
234
  # borrow location frame attributes from antenna position
283
235
  weather_xds["STATION_POSITION"].attrs = ant_position_with_ids.attrs
284
236
  else:
285
- # borrow from ant_posision_with_ids but without carrying over other coords
237
+ # borrow from ant_position_with_ids but without carrying over other coords
286
238
  weather_xds = weather_xds.assign(
287
239
  {
288
240
  "STATION_POSITION": (
@@ -318,6 +270,7 @@ def create_weather_xds(in_file: str, ant_position_with_ids: xr.DataArray):
318
270
  generic_weather_xds = load_generic_table(
319
271
  in_file,
320
272
  "WEATHER",
273
+ timecols=["TIME"],
321
274
  rename_ids=subt_rename_ids["WEATHER"],
322
275
  )
323
276
  except ValueError as _exc:
@@ -342,7 +295,7 @@ def create_weather_xds(in_file: str, ant_position_with_ids: xr.DataArray):
342
295
  dims_station_time = ["station_name", "time_weather"]
343
296
  dims_station_time_position = dims_station_time + ["cartesian_pos_label"]
344
297
  to_new_data_variables = {
345
- "H20": ["H2O", dims_station_time],
298
+ "H2O": ["H2O", dims_station_time],
346
299
  "IONOS_ELECTRON": ["IONOS_ELECTRON", dims_station_time],
347
300
  "PRESSURE": ["PRESSURE", dims_station_time],
348
301
  "REL_HUMIDITY": ["REL_HUMIDITY", dims_station_time],
@@ -381,6 +334,8 @@ def create_weather_xds(in_file: str, ant_position_with_ids: xr.DataArray):
381
334
  for data_var in weather_xds:
382
335
  if weather_xds.data_vars[data_var].dtype != np.float64:
383
336
  weather_xds[data_var] = weather_xds[data_var].astype(np.float64)
337
+ if "time_weather" in weather_xds.coords:
338
+ weather_xds.coords["time_weather"].attrs["type"] = "time_weather"
384
339
 
385
340
  return weather_xds
386
341
 
@@ -437,10 +392,10 @@ def correct_generic_pointing_xds(
437
392
  and generic_pointing_xds.sizes["dir"] == 0
438
393
  ):
439
394
  # When some direction variables are "empty" but some are populated properly
440
- if "dim_2" in generic_pointing_xds[key].sizes:
395
+ if "dim_2" in generic_pointing_xds[data_var_name].sizes:
441
396
  data_var_data = xr.DataArray(
442
- generic_pointing_xds[key].values,
443
- dims=generic_pointing_xds[key].dims,
397
+ generic_pointing_xds[data_var_name].values,
398
+ dims=generic_pointing_xds[data_var_name].dims,
444
399
  )
445
400
  else:
446
401
  shape = tuple(
@@ -449,7 +404,7 @@ def correct_generic_pointing_xds(
449
404
  ) + (2,)
450
405
  data_var_data = xr.DataArray(
451
406
  np.full(shape, np.nan),
452
- dims=generic_pointing_xds[key].dims,
407
+ dims=generic_pointing_xds[data_var_name].dims,
453
408
  )
454
409
  correct_pointing_xds[data_var_name].data = data_var_data
455
410
 
@@ -501,6 +456,7 @@ def create_pointing_xds(
501
456
  generic_pointing_xds = load_generic_table(
502
457
  in_file,
503
458
  "POINTING",
459
+ timecols=["TIME"],
504
460
  rename_ids=subt_rename_ids["POINTING"],
505
461
  taql_where=taql_where,
506
462
  )
@@ -608,7 +564,7 @@ def prepare_generic_sys_cal_xds(generic_sys_cal_xds: xr.Dataset) -> xr.Dataset:
608
564
  )
609
565
  elif (
610
566
  "frequency" in generic_sys_cal_xds.sizes
611
- and not "dim_3" in generic_sys_cal_xds.sizes
567
+ and "dim_3" not in generic_sys_cal_xds.sizes
612
568
  ):
613
569
  # because order is (...,frequency,receptor), when frequency is missing
614
570
  # receptor can get wrongly labeled as frequency
@@ -665,6 +621,7 @@ def create_system_calibration_xds(
665
621
  generic_sys_cal_xds = load_generic_table(
666
622
  in_file,
667
623
  "SYSCAL",
624
+ timecols=["TIME"],
668
625
  rename_ids=subt_rename_ids["SYSCAL"],
669
626
  taql_where=(
670
627
  f" where (SPECTRAL_WINDOW_ID = {spectral_window_id})"
@@ -725,11 +682,9 @@ def create_system_calibration_xds(
725
682
  "frequency_system_cal": generic_sys_cal_xds.coords["frequency"].data
726
683
  }
727
684
  sys_cal_xds = sys_cal_xds.assign_coords(frequency_coord)
728
- frequency_measure = {
729
- "type": main_xds_frequency.attrs["type"],
730
- "units": main_xds_frequency.attrs["units"],
731
- "observer": main_xds_frequency.attrs["observer"],
732
- }
685
+ frequency_measure = make_spectral_coord_measure_attrs(
686
+ main_xds_frequency.attrs["units"], main_xds_frequency.attrs["observer"]
687
+ )
733
688
  sys_cal_xds.coords["frequency_system_cal"].attrs.update(frequency_measure)
734
689
 
735
690
  sys_cal_xds = rename_and_interpolate_to_time(
@@ -828,7 +783,7 @@ def create_phased_array_xds(
828
783
  }
829
784
  data_vars["COORDINATE_AXES"].attrs = {
830
785
  "type": "rotation_matrix",
831
- "units": ["dimensionless", "dimensionless", "dimensionless"],
786
+ "units": "dimensionless",
832
787
  }
833
788
  # Remove the "frame" attribute if it exists, because ELEMENT_OFFSET is
834
789
  # defined in a station-local frame for which no standard name exists
@@ -1,17 +1,13 @@
1
1
  import itertools
2
2
  import toolviper.utils.logger as logger
3
- from pathlib import Path
4
- from typing import Dict, List, Tuple, Union
5
3
 
6
4
  import numpy as np
7
- import xarray as xr
8
5
 
9
6
  try:
10
7
  from casacore import tables
11
8
  except ImportError:
12
9
  import xradio._utils._casacore.casacore_from_casatools as tables
13
10
 
14
- from ._tables.table_query import open_table_ro, open_query
15
11
  from ._tables.read import table_exists
16
12
 
17
13
 
@@ -31,7 +27,7 @@ def create_partitions(in_file: str, partition_scheme: list) -> list[dict]:
31
27
  partition_scheme: list
32
28
  A MS v4 can only contain a single data description (spectral window and polarization setup), and observation mode. Consequently, the MS v2 is partitioned when converting to MS v4.
33
29
  In addition to data description and polarization setup a finer partitioning is possible by specifying a list of partitioning keys. Any combination of the following keys are possible:
34
- "FIELD_ID", "SCAN_NUMBER", "STATE_ID", "SOURCE_ID", "SUB_SCAN_NUMBER".
30
+ "FIELD_ID", "SCAN_NUMBER", "STATE_ID", "SOURCE_ID", "SUB_SCAN_NUMBER", "ANTENNA1".
35
31
  For mosaics where the phase center is rapidly changing (such as VLA on the fly mosaics) partition_scheme should be set to an empty list []. By default, ["FIELD_ID"].
36
32
  Returns
37
33
  -------
@@ -152,259 +148,3 @@ def create_partitions(in_file: str, partition_scheme: list) -> list[dict]:
152
148
  partitions.append(partition_info)
153
149
 
154
150
  return partitions
155
-
156
-
157
- # Used by code that will be deprecated at some stage. See #192
158
- # Still need to clarify what to do about intent string filtering ('WVR', etc.)
159
-
160
-
161
- def make_partition_ids_by_ddi_intent(
162
- infile: str, spw_names: xr.DataArray
163
- ) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
164
- """
165
- Produces arrays of per-partition ddi, scan, state_id, for when
166
- using the partition scheme 'intents' (ddi, scan, subscans(state_ids))
167
-
168
- Parameters
169
- ----------
170
- infile : str
171
- return: arrays with indices that define every partition
172
- spw_names: xr.DataArray
173
-
174
-
175
- Returns
176
- -------
177
- Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]
178
- arrays with indices that define every partition
179
- """
180
- # TODO: could explore other TAQL alternatives, like
181
- # select ... from ::STATE where OBS_MODE = ...
182
- #
183
- # This will work only if intents are already alphabetically sorted (grouped),
184
- # won't work for alternating intents:
185
- # taql_intents = "select rowid() as ROWS from $state_tbl GROUPBY OBS_MODE "
186
-
187
- with open_table_ro(str(Path(infile, "STATE"))) as state_tbl:
188
- distinct_obs_mode = find_distinct_obs_mode(infile, state_tbl)
189
-
190
- if distinct_obs_mode is None:
191
- return partition_when_empty_state(infile)
192
-
193
- with open_table_ro(infile) as main_tbl:
194
- (
195
- data_desc_id,
196
- state_id_partitions,
197
- intent_names,
198
- ) = make_ddi_state_intent_lists(
199
- main_tbl, state_tbl, distinct_obs_mode, spw_names
200
- )
201
-
202
- # Take whatever scans given by the STATE_IDs and DDIs
203
- scan_number = [None] * len(state_id_partitions)
204
-
205
- return data_desc_id, scan_number, state_id_partitions, intent_names
206
-
207
-
208
- def find_distinct_obs_mode(
209
- infile: str, state_table: tables.table
210
- ) -> Union[List[str], None]:
211
- """
212
- Produce a list of unique "scan/subscan" intents.
213
-
214
- Parameters
215
- ----------
216
- infile : str
217
- Path to the MS
218
- state_table : tables.table
219
- casacore table object to read from
220
-
221
- Returns
222
- -------
223
- Union[List[str], None]
224
- List of unique "scan/subscan" intents as given in the
225
- OBS_MODE column of the STATE subtable. None if the STATE subtable
226
- is empty or there is a problem reading it
227
- """
228
- taql_distinct_intents = "select DISTINCT OBS_MODE from $state_table"
229
- with open_query(state_table, taql_distinct_intents) as query_intents:
230
- if query_intents.nrows() == 0:
231
- logger.warning(
232
- "STATE subtable has no data. Cannot partition by scan/subscan intent"
233
- )
234
- return None
235
-
236
- distinct_obs_mode = query_intents.getcol("OBS_MODE")
237
- logger.debug(
238
- f" Query for distinct OBS_MODE len: {len(distinct_obs_mode)}, values: {distinct_obs_mode}"
239
- )
240
- return distinct_obs_mode
241
-
242
-
243
- def make_ddi_state_intent_lists(
244
- main_tbl: tables.table,
245
- state_tbl: tables.table,
246
- distinct_obs_mode: np.ndarray,
247
- spw_name_by_ddi: Dict[int, str],
248
- ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
249
- """
250
- Produce arrays of (ddi indices, state indices, intent string)
251
- for every distinct intent string, where every item represents one
252
- partition of the main table
253
-
254
- As the obs_mode strings have concatenated intent strings from all
255
- the scan and subscan intents, this function has started
256
- implementing some simple heuristics to remove the intent items
257
- that are not related to the respective DDIs (for example WVR
258
- intent is the only kept when the DDI/SPW has WVR in its name). See
259
- call to filter_intents_per_ddi()
260
-
261
- Parameters
262
- ----------
263
- main_tbl : tables.table
264
- main MS table openend as a casacore.tables.table
265
- state_tbl : tables.table
266
- STATE subtable openend as a casacore.tables.table
267
- distinct_obs_mode : np.ndarray
268
- list of unique/distinct OBS_MODE strings from the STATE table
269
- spw_name_by_ddi: Dict[int, str]
270
-
271
-
272
- Returns
273
- -------
274
- Tuple[np.ndarray, np.ndarray, np.ndarray]
275
- arrays of (ddi indices, state indices, intent string)
276
- """
277
- data_desc_id, state_id_partitions, intent_names = [], [], []
278
- for intent in distinct_obs_mode:
279
- where_intent = f"where OBS_MODE = '{intent}'"
280
- taql_states = f"select ROWID() as ROWS from $state_tbl {where_intent}"
281
- with open_query(state_tbl, taql_states) as query_states:
282
- state_ids_for_intent = query_states.getcol("ROWS")
283
-
284
- state_ids = " OR STATE_ID = ".join(np.char.mod("%d", state_ids_for_intent))
285
- taql_partition = (
286
- f"select DISTINCT DATA_DESC_ID from $main_tbl where STATE_ID = {state_ids}"
287
- )
288
- with open_query(main_tbl, taql_partition) as query_ddi_intent:
289
- # No data for these STATE_IDs
290
- if query_ddi_intent.nrows() == 0:
291
- continue
292
-
293
- # Will implicitly take whatever scans given the STATE_IDs
294
- # and DDIs scan_number. Not needed:
295
- # scan_number = query_ddi_intent.getcol("SCAN_NUMBER")
296
- ddis = query_ddi_intent.getcol("DATA_DESC_ID")
297
-
298
- data_desc_id.extend(ddis)
299
- state_id_partitions.extend([state_ids_for_intent] * len(ddis))
300
-
301
- # Try to select/exclude confusing or mixed intent names such as 'WVR#*'
302
- intents_ddi = filter_intents_per_ddi(ddis, "WVR", intent, spw_name_by_ddi)
303
- intent_names.extend(intents_ddi)
304
-
305
- logger.debug(
306
- f"Produced data_desc_id: {data_desc_id},\n state_id_partitions: {state_id_partitions}"
307
- )
308
- return data_desc_id, state_id_partitions, intent_names
309
-
310
-
311
- def filter_intents_per_ddi(
312
- ddis: List[int], substr: str, intents: str, spw_name_by_ddi: Dict[int, str]
313
- ) -> List[str]:
314
- """
315
- For a given pair of:
316
- - substring (say 'WVR') associated with a type of intent we want to differentiate
317
- - intents string (multiple comma-separated scan/subscan intents)
318
- => do: for every DDI passed in the list of ddis, either keep only the
319
- intents that have that substring (if there are any) or drop them, depending on
320
- whether that substring is present in the SPW name. This is to filter in only
321
- the intents that really apply to every DDI/SPW.
322
-
323
- Parameters
324
- ----------
325
- ddis : List[int]
326
- list of ddis for which the intents have to be filtered
327
- substr : str
328
- substring to filter by
329
- intents : str
330
- string with a comma-separated list of individual
331
- scan/subscan intent strings (like scan/subscan intents as stored
332
- in the MS STATE/OBS_MODE
333
- spw_name_by_ddi : Dict[int, str]
334
- SPW names by DDI ID (row index) key
335
-
336
- Returns
337
- -------
338
- List[str]
339
- list where the intents related to 'substr' have been filtered in our out
340
- """
341
- present = substr in intents
342
- # Nothing to effectively filter, full cs-list of intents apply to all DDIs
343
- if not present:
344
- return [intents] * len(ddis)
345
-
346
- every_intent = intents.split(",")
347
- filtered_intents = []
348
- for ddi in ddis:
349
- spw_name = spw_name_by_ddi.get(ddi, "")
350
-
351
- if not spw_name:
352
- # we cannot say / cannot filter
353
- filtered_intents.append(intents)
354
- continue
355
-
356
- # A not-xor to select/deselect (or keep-only/drop) the intents that apply
357
- # to this DDI
358
- ddi_intents = [
359
- intnt for intnt in every_intent if (substr in intnt) == (substr in spw_name)
360
- ]
361
- ddi_intents = ",".join(ddi_intents)
362
- filtered_intents.append(ddi_intents)
363
-
364
- return filtered_intents
365
-
366
-
367
- def partition_when_empty_state(
368
- infile: str,
369
- ) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
370
- """
371
- Generate fallback partition ids when trying to partition by
372
- 'intent' but the STATE table is empty.
373
-
374
- Some MSs have no STATE rows and in the main table STATE_ID==-1
375
- (that is not a valid MSv2 but it happens).
376
-
377
- Parameters
378
- ----------
379
- infile : str
380
- Path to the MS
381
-
382
- Returns
383
- -------
384
- Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]
385
- same as make_partition_ids_by_ddi_intent but with
386
- effectively only ddi indices and other indices set to None ("any
387
- IDs found")
388
- """
389
- try:
390
- main_table = None
391
-
392
- main_table = tables.table(
393
- infile, readonly=True, lockoptions={"option": "usernoread"}, ack=False
394
- )
395
- taql_ddis = "select DISTINCT DATA_DESC_ID from $main_table"
396
- with open_query(main_table, taql_ddis) as query_per_intent:
397
- # Will take whatever scans given the STATE_IDs and DDIs
398
- # scan_number = query_per_intent.getcol("SCAN_NUMBER")
399
- distinct_ddis = query_per_intent.getcol("DATA_DESC_ID")
400
-
401
- logger.debug(
402
- f"Producing {len(distinct_ddis)} partitions for ddis: {distinct_ddis}"
403
- )
404
- nparts = len(distinct_ddis)
405
-
406
- finally:
407
- if main_table:
408
- main_table.close()
409
-
410
- return distinct_ddis, [None] * nparts, [None] * nparts, [""] * nparts
@@ -1,16 +1,3 @@
1
- import os
2
-
3
- import toolviper.utils.logger as logger
4
-
5
- from pathlib import Path
6
- from typing import Dict, List
7
-
8
- import xarray as xr
9
-
10
- from ._tables.read import load_generic_table, table_exists
11
- from ._tables.read_subtables import read_ephemerides, read_delayed_pointing_table
12
-
13
-
14
1
  subt_rename_ids = {
15
2
  "ANTENNA": {"row": "antenna_id", "dim_1": "xyz"},
16
3
  "FEED": {"dim_1": "xyz", "dim_2": "receptor", "dim_3": "receptor2"},
@@ -27,97 +14,3 @@ subt_rename_ids = {
27
14
  # Would make sense for non-std "WS_NX_STATION_POSITION"
28
15
  "WEATHER": {"dim_1": "xyz"},
29
16
  }
30
-
31
-
32
- def read_ms_subtables(
33
- infile: str, done_subt: List[str], asdm_subtables: bool = False
34
- ) -> Dict[str, xr.Dataset]:
35
- """
36
- Read MSv2 subtables (main table keywords) as xr.Dataset
37
-
38
- Parameters
39
- ----------
40
- infile : str
41
- input MeasurementSet path
42
- done_subt : List[str]
43
- Subtables that were already read, to skip them
44
- asdm_subtables : bool (Default value = False)
45
- Whether to also read ASDM_* subtables
46
-
47
- Returns
48
- -------
49
- Dict[str, xr.Dataset]
50
- dict of xarray datasets read from subtables (metadata tables)
51
-
52
- """
53
- ignore_msv2_cols_subt = ["FLAG_CMD", "FLAG_ROW", "BEAM_ID"]
54
- skip_tables = ["SORTED_TABLE", "FLAG_CMD"] + done_subt
55
- stbl_list = sorted(
56
- [
57
- tname
58
- for tname in os.listdir(infile)
59
- if (tname not in skip_tables)
60
- and (os.path.isdir(os.path.join(infile, tname)))
61
- and (table_exists(os.path.join(infile, tname)))
62
- ]
63
- )
64
-
65
- subtables = {}
66
- for _ii, subt_name in enumerate(stbl_list):
67
- if not asdm_subtables and subt_name.startswith("ASDM_"):
68
- logger.debug(f"skipping ASDM_ subtable {subt_name}...")
69
- continue
70
- else:
71
- logger.debug(f"reading subtable {subt_name}...")
72
-
73
- if subt_name == "POINTING":
74
- subt_path = Path(infile, subt_name)
75
- rename_ids = {"dim_2": "n_polynomial", "dim_3": "dir"}
76
- xds = read_delayed_pointing_table(str(subt_path), rename_ids=rename_ids)
77
- else:
78
- xds = load_generic_table(
79
- infile,
80
- subt_name,
81
- timecols=["TIME"],
82
- ignore=ignore_msv2_cols_subt,
83
- rename_ids=subt_rename_ids.get(subt_name, None),
84
- )
85
-
86
- if len(xds.sizes) != 0:
87
- subtables[subt_name.lower()] = xds
88
-
89
- if "field" in subtables:
90
- ephem_xds = read_ephemerides(infile)
91
- if ephem_xds:
92
- subtables["ephemerides"] = ephem_xds
93
-
94
- return subtables
95
-
96
-
97
- def add_pointing_to_partition(
98
- xds_part: xr.Dataset, xds_pointing: xr.Dataset
99
- ) -> xr.Dataset:
100
- """
101
- Take pointing variables from a (delayed) pointing dataset and
102
- transfer them to a main table partition dataset (interpolating into
103
- the destination time axis)
104
-
105
- Parameters
106
- ----------
107
- xds_part : xr.Dataset
108
- a partition/sub-xds of the main table
109
- xds_pointing : xr.Dataset
110
- the xds read from the pointing subtable
111
-
112
- Returns
113
- -------
114
- xr.Dataset
115
- partition xds with pointing variables added/interpolated from the
116
- pointing_xds into its time axis
117
-
118
- """
119
- interp_xds = xds_pointing.interp(time=xds_part.time, method="nearest")
120
- for var in interp_xds.data_vars:
121
- xds_part[f"pointing_{var}"] = interp_xds[var]
122
-
123
- return xds_part
@@ -0,0 +1,60 @@
1
+ from typing import Union
2
+
3
+ import xarray as xr
4
+
5
+ import toolviper.utils.logger as logger
6
+
7
+
8
+ def interpolate_to_time(
9
+ xds: xr.Dataset,
10
+ interp_time: Union[xr.DataArray, None],
11
+ message_prefix: str,
12
+ time_name: str = "time",
13
+ ) -> xr.Dataset:
14
+ """
15
+ Interpolate the time coordinate of the input xarray dataset to the
16
+ a data array. This can be used for example to interpolate a pointing_xds
17
+ to the time coord of the (main) MSv4, or similarly the ephemeris
18
+ data variables of a field_and_source_xds.
19
+
20
+ Uses interpolation method "linear", unless the source number of points is
21
+ 1 in which case "nearest" is used, to avoid divide-by-zero issues.
22
+
23
+ Parameters:
24
+ ----------
25
+ xds : xr.Dataset
26
+ Xarray dataset to interpolate (presumably a pointing_xds or an xds of
27
+ ephemeris variables)
28
+ interp_time : Union[xr.DataArray, None]
29
+ Time axis to interpolate the dataset to (usually main MSv4 time)
30
+ message_prefix: str
31
+ A prefix for info/debug/etc. messages
32
+
33
+ Returns:
34
+ -------
35
+ interpolated_xds : xr.Dataset
36
+ xarray dataset with time axis interpolated to interp_time.
37
+ """
38
+ if interp_time is not None:
39
+ points_before = xds[time_name].size
40
+ if points_before > 1:
41
+ method = "linear"
42
+ else:
43
+ method = "nearest"
44
+ xds = xds.interp(
45
+ {time_name: interp_time.data}, method=method, assume_sorted=True
46
+ )
47
+ # scan_name sneaks in as a coordinate of the main time axis, drop it
48
+ if (
49
+ "type" in xds.attrs
50
+ and xds.attrs["type"] not in ["visibility", "spectrum", "wvr"]
51
+ and "scan_name" in xds.coords
52
+ ):
53
+ xds = xds.drop_vars("scan_name")
54
+ points_after = xds[time_name].size
55
+ logger.debug(
56
+ f"{message_prefix}: interpolating the time coordinate "
57
+ f"from {points_before} to {points_after} points"
58
+ )
59
+
60
+ return xds
@@ -4,11 +4,6 @@ def add_encoding(xds, compressor, chunks=None):
4
4
 
5
5
  chunks = {**dict(xds.sizes), **chunks} # Add missing sizes if presents.
6
6
 
7
- encoding = {}
8
7
  for da_name in list(xds.data_vars):
9
- if chunks:
10
- da_chunks = [chunks[dim_name] for dim_name in xds[da_name].sizes]
11
- xds[da_name].encoding = {"compressor": compressor, "chunks": da_chunks}
12
- # print(xds[da_name].encoding)
13
- else:
14
- xds[da_name].encoding = {"compressor": compressor}
8
+ da_chunks = [chunks[dim_name] for dim_name in xds[da_name].sizes]
9
+ xds[da_name].encoding = {"compressor": compressor, "chunks": da_chunks}
@@ -116,8 +116,6 @@ def convert_msv2_to_processing_set(
116
116
  if not str(out_file).endswith("ps.zarr"):
117
117
  out_file += ".ps.zarr"
118
118
 
119
- print("Output file: ", out_file)
120
-
121
119
  if overwrite:
122
120
  ps_dt.to_zarr(store=out_file, mode="w")
123
121
  else: