xradio 0.0.55__py3-none-any.whl → 0.0.58__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- xradio/__init__.py +2 -2
- xradio/_utils/_casacore/casacore_from_casatools.py +1001 -0
- xradio/_utils/_casacore/tables.py +6 -1
- xradio/_utils/coord_math.py +22 -23
- xradio/_utils/dict_helpers.py +76 -11
- xradio/_utils/schema.py +5 -2
- xradio/_utils/zarr/common.py +1 -73
- xradio/image/_util/_casacore/common.py +11 -3
- xradio/image/_util/_casacore/xds_from_casacore.py +59 -35
- xradio/image/_util/_casacore/xds_to_casacore.py +47 -16
- xradio/image/_util/_fits/xds_from_fits.py +172 -77
- xradio/image/_util/casacore.py +9 -4
- xradio/image/_util/common.py +4 -4
- xradio/image/_util/image_factory.py +8 -8
- xradio/image/image.py +45 -5
- xradio/measurement_set/__init__.py +19 -9
- xradio/measurement_set/_utils/__init__.py +1 -3
- xradio/measurement_set/_utils/_msv2/__init__.py +0 -0
- xradio/measurement_set/_utils/_msv2/_tables/read.py +35 -90
- xradio/measurement_set/_utils/_msv2/_tables/read_main_table.py +6 -686
- xradio/measurement_set/_utils/_msv2/_tables/table_query.py +13 -3
- xradio/measurement_set/_utils/_msv2/conversion.py +129 -145
- xradio/measurement_set/_utils/_msv2/create_antenna_xds.py +9 -16
- xradio/measurement_set/_utils/_msv2/create_field_and_source_xds.py +125 -221
- xradio/measurement_set/_utils/_msv2/msv2_to_msv4_meta.py +1 -2
- xradio/measurement_set/_utils/_msv2/msv4_info_dicts.py +13 -8
- xradio/measurement_set/_utils/_msv2/msv4_sub_xdss.py +27 -72
- xradio/measurement_set/_utils/_msv2/partition_queries.py +5 -262
- xradio/measurement_set/_utils/_msv2/subtables.py +0 -107
- xradio/measurement_set/_utils/_utils/interpolate.py +60 -0
- xradio/measurement_set/_utils/_zarr/encoding.py +2 -7
- xradio/measurement_set/convert_msv2_to_processing_set.py +0 -2
- xradio/measurement_set/load_processing_set.py +2 -2
- xradio/measurement_set/measurement_set_xdt.py +14 -14
- xradio/measurement_set/open_processing_set.py +1 -3
- xradio/measurement_set/processing_set_xdt.py +41 -835
- xradio/measurement_set/schema.py +96 -123
- xradio/schema/check.py +91 -97
- xradio/schema/dataclass.py +159 -22
- xradio/schema/export.py +99 -0
- xradio/schema/metamodel.py +51 -16
- xradio/schema/typing.py +5 -5
- {xradio-0.0.55.dist-info → xradio-0.0.58.dist-info}/METADATA +43 -11
- xradio-0.0.58.dist-info/RECORD +65 -0
- {xradio-0.0.55.dist-info → xradio-0.0.58.dist-info}/WHEEL +1 -1
- xradio/image/_util/fits.py +0 -13
- xradio/measurement_set/_utils/_msv2/_tables/load.py +0 -63
- xradio/measurement_set/_utils/_msv2/_tables/load_main_table.py +0 -487
- xradio/measurement_set/_utils/_msv2/_tables/read_subtables.py +0 -395
- xradio/measurement_set/_utils/_msv2/_tables/write.py +0 -320
- xradio/measurement_set/_utils/_msv2/_tables/write_exp_api.py +0 -385
- xradio/measurement_set/_utils/_msv2/chunks.py +0 -115
- xradio/measurement_set/_utils/_msv2/descr.py +0 -165
- xradio/measurement_set/_utils/_msv2/msv2_msv3.py +0 -7
- xradio/measurement_set/_utils/_msv2/partitions.py +0 -392
- xradio/measurement_set/_utils/_utils/cds.py +0 -40
- xradio/measurement_set/_utils/_utils/xds_helper.py +0 -404
- xradio/measurement_set/_utils/_zarr/read.py +0 -263
- xradio/measurement_set/_utils/_zarr/write.py +0 -329
- xradio/measurement_set/_utils/msv2.py +0 -106
- xradio/measurement_set/_utils/zarr.py +0 -133
- xradio-0.0.55.dist-info/RECORD +0 -77
- {xradio-0.0.55.dist-info → xradio-0.0.58.dist-info}/licenses/LICENSE.txt +0 -0
- {xradio-0.0.55.dist-info → xradio-0.0.58.dist-info}/top_level.txt +0 -0
|
@@ -8,12 +8,19 @@ import xarray as xr
|
|
|
8
8
|
from numpy.typing import ArrayLike
|
|
9
9
|
|
|
10
10
|
from xradio._utils.coord_math import convert_to_si_units
|
|
11
|
+
from xradio._utils.dict_helpers import (
|
|
12
|
+
make_time_measure_attrs,
|
|
13
|
+
make_spectral_coord_measure_attrs,
|
|
14
|
+
)
|
|
11
15
|
from xradio._utils.schema import (
|
|
12
16
|
column_description_casacore_to_msv4_measure,
|
|
13
17
|
convert_generic_xds_to_xradio_schema,
|
|
14
18
|
)
|
|
15
|
-
from .
|
|
16
|
-
|
|
19
|
+
from xradio.measurement_set._utils._utils.interpolate import (
|
|
20
|
+
interpolate_to_time,
|
|
21
|
+
)
|
|
22
|
+
from xradio.measurement_set._utils._msv2.subtables import subt_rename_ids
|
|
23
|
+
from xradio.measurement_set._utils._msv2._tables.read import (
|
|
17
24
|
load_generic_table,
|
|
18
25
|
make_taql_where_between_min_max,
|
|
19
26
|
table_exists,
|
|
@@ -21,12 +28,7 @@ from ._tables.read import (
|
|
|
21
28
|
)
|
|
22
29
|
|
|
23
30
|
|
|
24
|
-
standard_time_coord_attrs =
|
|
25
|
-
"type": "time",
|
|
26
|
-
"units": ["s"],
|
|
27
|
-
"scale": "utc",
|
|
28
|
-
"format": "unix",
|
|
29
|
-
}
|
|
31
|
+
standard_time_coord_attrs = make_time_measure_attrs(time_format="unix")
|
|
30
32
|
|
|
31
33
|
|
|
32
34
|
def rename_and_interpolate_to_time(
|
|
@@ -89,57 +91,6 @@ def rename_and_interpolate_to_time(
|
|
|
89
91
|
return renamed_time_xds
|
|
90
92
|
|
|
91
93
|
|
|
92
|
-
def interpolate_to_time(
|
|
93
|
-
xds: xr.Dataset,
|
|
94
|
-
interp_time: Union[xr.DataArray, None],
|
|
95
|
-
message_prefix: str,
|
|
96
|
-
time_name: str = "time",
|
|
97
|
-
) -> xr.Dataset:
|
|
98
|
-
"""
|
|
99
|
-
Interpolate the time coordinate of the input xarray dataset to the
|
|
100
|
-
a data array. This can be used for example to interpolate a pointing_xds
|
|
101
|
-
to the time coord of the (main) MSv4, or similarly the ephemeris
|
|
102
|
-
data variables of a field_and_source_xds.
|
|
103
|
-
|
|
104
|
-
Uses interpolation method "linear", unless the source number of points is
|
|
105
|
-
1 in which case "nearest" is used, to avoid divide-by-zero issues.
|
|
106
|
-
|
|
107
|
-
Parameters:
|
|
108
|
-
----------
|
|
109
|
-
xds : xr.Dataset
|
|
110
|
-
Xarray dataset to interpolate (presumably a pointing_xds or an xds of
|
|
111
|
-
ephemeris variables)
|
|
112
|
-
interp_time : Union[xr.DataArray, None]
|
|
113
|
-
Time axis to interpolate the dataset to (usually main MSv4 time)
|
|
114
|
-
message_prefix: str
|
|
115
|
-
A prefix for info/debug/etc. messages
|
|
116
|
-
|
|
117
|
-
Returns:
|
|
118
|
-
-------
|
|
119
|
-
interpolated_xds : xr.Dataset
|
|
120
|
-
xarray dataset with time axis interpolated to interp_time.
|
|
121
|
-
"""
|
|
122
|
-
if interp_time is not None:
|
|
123
|
-
points_before = xds[time_name].size
|
|
124
|
-
if points_before > 1:
|
|
125
|
-
method = "linear"
|
|
126
|
-
else:
|
|
127
|
-
method = "nearest"
|
|
128
|
-
xds = xds.interp(
|
|
129
|
-
{time_name: interp_time.data}, method=method, assume_sorted=True
|
|
130
|
-
)
|
|
131
|
-
# scan_name sneaks in as a coordinate of the main time axis, drop it
|
|
132
|
-
if "scan_name" in xds.coords:
|
|
133
|
-
xds = xds.drop_vars("scan_name")
|
|
134
|
-
points_after = xds[time_name].size
|
|
135
|
-
logger.debug(
|
|
136
|
-
f"{message_prefix}: interpolating the time coordinate "
|
|
137
|
-
f"from {points_before} to {points_after} points"
|
|
138
|
-
)
|
|
139
|
-
|
|
140
|
-
return xds
|
|
141
|
-
|
|
142
|
-
|
|
143
94
|
def make_taql_where_weather(
|
|
144
95
|
in_file: str, ant_xds_station_name_ids: xr.DataArray
|
|
145
96
|
) -> str:
|
|
@@ -233,6 +184,7 @@ def prepare_generic_weather_xds_and_station_name(
|
|
|
233
184
|
generic_weather_xds = load_generic_table(
|
|
234
185
|
in_file,
|
|
235
186
|
"WEATHER",
|
|
187
|
+
timecols=["TIME"],
|
|
236
188
|
rename_ids=subt_rename_ids["WEATHER"],
|
|
237
189
|
taql_where=taql_where,
|
|
238
190
|
)
|
|
@@ -282,7 +234,7 @@ def finalize_station_position(
|
|
|
282
234
|
# borrow location frame attributes from antenna position
|
|
283
235
|
weather_xds["STATION_POSITION"].attrs = ant_position_with_ids.attrs
|
|
284
236
|
else:
|
|
285
|
-
# borrow from
|
|
237
|
+
# borrow from ant_position_with_ids but without carrying over other coords
|
|
286
238
|
weather_xds = weather_xds.assign(
|
|
287
239
|
{
|
|
288
240
|
"STATION_POSITION": (
|
|
@@ -318,6 +270,7 @@ def create_weather_xds(in_file: str, ant_position_with_ids: xr.DataArray):
|
|
|
318
270
|
generic_weather_xds = load_generic_table(
|
|
319
271
|
in_file,
|
|
320
272
|
"WEATHER",
|
|
273
|
+
timecols=["TIME"],
|
|
321
274
|
rename_ids=subt_rename_ids["WEATHER"],
|
|
322
275
|
)
|
|
323
276
|
except ValueError as _exc:
|
|
@@ -342,7 +295,7 @@ def create_weather_xds(in_file: str, ant_position_with_ids: xr.DataArray):
|
|
|
342
295
|
dims_station_time = ["station_name", "time_weather"]
|
|
343
296
|
dims_station_time_position = dims_station_time + ["cartesian_pos_label"]
|
|
344
297
|
to_new_data_variables = {
|
|
345
|
-
"
|
|
298
|
+
"H2O": ["H2O", dims_station_time],
|
|
346
299
|
"IONOS_ELECTRON": ["IONOS_ELECTRON", dims_station_time],
|
|
347
300
|
"PRESSURE": ["PRESSURE", dims_station_time],
|
|
348
301
|
"REL_HUMIDITY": ["REL_HUMIDITY", dims_station_time],
|
|
@@ -381,6 +334,8 @@ def create_weather_xds(in_file: str, ant_position_with_ids: xr.DataArray):
|
|
|
381
334
|
for data_var in weather_xds:
|
|
382
335
|
if weather_xds.data_vars[data_var].dtype != np.float64:
|
|
383
336
|
weather_xds[data_var] = weather_xds[data_var].astype(np.float64)
|
|
337
|
+
if "time_weather" in weather_xds.coords:
|
|
338
|
+
weather_xds.coords["time_weather"].attrs["type"] = "time_weather"
|
|
384
339
|
|
|
385
340
|
return weather_xds
|
|
386
341
|
|
|
@@ -437,10 +392,10 @@ def correct_generic_pointing_xds(
|
|
|
437
392
|
and generic_pointing_xds.sizes["dir"] == 0
|
|
438
393
|
):
|
|
439
394
|
# When some direction variables are "empty" but some are populated properly
|
|
440
|
-
if "dim_2" in generic_pointing_xds[
|
|
395
|
+
if "dim_2" in generic_pointing_xds[data_var_name].sizes:
|
|
441
396
|
data_var_data = xr.DataArray(
|
|
442
|
-
generic_pointing_xds[
|
|
443
|
-
dims=generic_pointing_xds[
|
|
397
|
+
generic_pointing_xds[data_var_name].values,
|
|
398
|
+
dims=generic_pointing_xds[data_var_name].dims,
|
|
444
399
|
)
|
|
445
400
|
else:
|
|
446
401
|
shape = tuple(
|
|
@@ -449,7 +404,7 @@ def correct_generic_pointing_xds(
|
|
|
449
404
|
) + (2,)
|
|
450
405
|
data_var_data = xr.DataArray(
|
|
451
406
|
np.full(shape, np.nan),
|
|
452
|
-
dims=generic_pointing_xds[
|
|
407
|
+
dims=generic_pointing_xds[data_var_name].dims,
|
|
453
408
|
)
|
|
454
409
|
correct_pointing_xds[data_var_name].data = data_var_data
|
|
455
410
|
|
|
@@ -501,6 +456,7 @@ def create_pointing_xds(
|
|
|
501
456
|
generic_pointing_xds = load_generic_table(
|
|
502
457
|
in_file,
|
|
503
458
|
"POINTING",
|
|
459
|
+
timecols=["TIME"],
|
|
504
460
|
rename_ids=subt_rename_ids["POINTING"],
|
|
505
461
|
taql_where=taql_where,
|
|
506
462
|
)
|
|
@@ -608,7 +564,7 @@ def prepare_generic_sys_cal_xds(generic_sys_cal_xds: xr.Dataset) -> xr.Dataset:
|
|
|
608
564
|
)
|
|
609
565
|
elif (
|
|
610
566
|
"frequency" in generic_sys_cal_xds.sizes
|
|
611
|
-
and
|
|
567
|
+
and "dim_3" not in generic_sys_cal_xds.sizes
|
|
612
568
|
):
|
|
613
569
|
# because order is (...,frequency,receptor), when frequency is missing
|
|
614
570
|
# receptor can get wrongly labeled as frequency
|
|
@@ -665,6 +621,7 @@ def create_system_calibration_xds(
|
|
|
665
621
|
generic_sys_cal_xds = load_generic_table(
|
|
666
622
|
in_file,
|
|
667
623
|
"SYSCAL",
|
|
624
|
+
timecols=["TIME"],
|
|
668
625
|
rename_ids=subt_rename_ids["SYSCAL"],
|
|
669
626
|
taql_where=(
|
|
670
627
|
f" where (SPECTRAL_WINDOW_ID = {spectral_window_id})"
|
|
@@ -725,11 +682,9 @@ def create_system_calibration_xds(
|
|
|
725
682
|
"frequency_system_cal": generic_sys_cal_xds.coords["frequency"].data
|
|
726
683
|
}
|
|
727
684
|
sys_cal_xds = sys_cal_xds.assign_coords(frequency_coord)
|
|
728
|
-
frequency_measure =
|
|
729
|
-
"
|
|
730
|
-
|
|
731
|
-
"observer": main_xds_frequency.attrs["observer"],
|
|
732
|
-
}
|
|
685
|
+
frequency_measure = make_spectral_coord_measure_attrs(
|
|
686
|
+
main_xds_frequency.attrs["units"], main_xds_frequency.attrs["observer"]
|
|
687
|
+
)
|
|
733
688
|
sys_cal_xds.coords["frequency_system_cal"].attrs.update(frequency_measure)
|
|
734
689
|
|
|
735
690
|
sys_cal_xds = rename_and_interpolate_to_time(
|
|
@@ -828,7 +783,7 @@ def create_phased_array_xds(
|
|
|
828
783
|
}
|
|
829
784
|
data_vars["COORDINATE_AXES"].attrs = {
|
|
830
785
|
"type": "rotation_matrix",
|
|
831
|
-
"units":
|
|
786
|
+
"units": "dimensionless",
|
|
832
787
|
}
|
|
833
788
|
# Remove the "frame" attribute if it exists, because ELEMENT_OFFSET is
|
|
834
789
|
# defined in a station-local frame for which no standard name exists
|
|
@@ -1,14 +1,13 @@
|
|
|
1
1
|
import itertools
|
|
2
2
|
import toolviper.utils.logger as logger
|
|
3
|
-
from pathlib import Path
|
|
4
|
-
from typing import Dict, List, Tuple, Union
|
|
5
3
|
|
|
6
4
|
import numpy as np
|
|
7
|
-
import xarray as xr
|
|
8
5
|
|
|
9
|
-
|
|
6
|
+
try:
|
|
7
|
+
from casacore import tables
|
|
8
|
+
except ImportError:
|
|
9
|
+
import xradio._utils._casacore.casacore_from_casatools as tables
|
|
10
10
|
|
|
11
|
-
from ._tables.table_query import open_table_ro, open_query
|
|
12
11
|
from ._tables.read import table_exists
|
|
13
12
|
|
|
14
13
|
|
|
@@ -28,7 +27,7 @@ def create_partitions(in_file: str, partition_scheme: list) -> list[dict]:
|
|
|
28
27
|
partition_scheme: list
|
|
29
28
|
A MS v4 can only contain a single data description (spectral window and polarization setup), and observation mode. Consequently, the MS v2 is partitioned when converting to MS v4.
|
|
30
29
|
In addition to data description and polarization setup a finer partitioning is possible by specifying a list of partitioning keys. Any combination of the following keys are possible:
|
|
31
|
-
"FIELD_ID", "SCAN_NUMBER", "STATE_ID", "SOURCE_ID", "SUB_SCAN_NUMBER".
|
|
30
|
+
"FIELD_ID", "SCAN_NUMBER", "STATE_ID", "SOURCE_ID", "SUB_SCAN_NUMBER", "ANTENNA1".
|
|
32
31
|
For mosaics where the phase center is rapidly changing (such as VLA on the fly mosaics) partition_scheme should be set to an empty list []. By default, ["FIELD_ID"].
|
|
33
32
|
Returns
|
|
34
33
|
-------
|
|
@@ -149,259 +148,3 @@ def create_partitions(in_file: str, partition_scheme: list) -> list[dict]:
|
|
|
149
148
|
partitions.append(partition_info)
|
|
150
149
|
|
|
151
150
|
return partitions
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
# Used by code that will be deprecated at some stage. See #192
|
|
155
|
-
# Still need to clarify what to do about intent string filtering ('WVR', etc.)
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
def make_partition_ids_by_ddi_intent(
|
|
159
|
-
infile: str, spw_names: xr.DataArray
|
|
160
|
-
) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
|
|
161
|
-
"""
|
|
162
|
-
Produces arrays of per-partition ddi, scan, state_id, for when
|
|
163
|
-
using the partition scheme 'intents' (ddi, scan, subscans(state_ids))
|
|
164
|
-
|
|
165
|
-
Parameters
|
|
166
|
-
----------
|
|
167
|
-
infile : str
|
|
168
|
-
return: arrays with indices that define every partition
|
|
169
|
-
spw_names: xr.DataArray
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
Returns
|
|
173
|
-
-------
|
|
174
|
-
Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]
|
|
175
|
-
arrays with indices that define every partition
|
|
176
|
-
"""
|
|
177
|
-
# TODO: could explore other TAQL alternatives, like
|
|
178
|
-
# select ... from ::STATE where OBS_MODE = ...
|
|
179
|
-
#
|
|
180
|
-
# This will work only if intents are already alphabetically sorted (grouped),
|
|
181
|
-
# won't work for alternating intents:
|
|
182
|
-
# taql_intents = "select rowid() as ROWS from $state_tbl GROUPBY OBS_MODE "
|
|
183
|
-
|
|
184
|
-
with open_table_ro(str(Path(infile, "STATE"))) as state_tbl:
|
|
185
|
-
distinct_obs_mode = find_distinct_obs_mode(infile, state_tbl)
|
|
186
|
-
|
|
187
|
-
if distinct_obs_mode is None:
|
|
188
|
-
return partition_when_empty_state(infile)
|
|
189
|
-
|
|
190
|
-
with open_table_ro(infile) as main_tbl:
|
|
191
|
-
(
|
|
192
|
-
data_desc_id,
|
|
193
|
-
state_id_partitions,
|
|
194
|
-
intent_names,
|
|
195
|
-
) = make_ddi_state_intent_lists(
|
|
196
|
-
main_tbl, state_tbl, distinct_obs_mode, spw_names
|
|
197
|
-
)
|
|
198
|
-
|
|
199
|
-
# Take whatever scans given by the STATE_IDs and DDIs
|
|
200
|
-
scan_number = [None] * len(state_id_partitions)
|
|
201
|
-
|
|
202
|
-
return data_desc_id, scan_number, state_id_partitions, intent_names
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
def find_distinct_obs_mode(
|
|
206
|
-
infile: str, state_table: tables.table
|
|
207
|
-
) -> Union[List[str], None]:
|
|
208
|
-
"""
|
|
209
|
-
Produce a list of unique "scan/subscan" intents.
|
|
210
|
-
|
|
211
|
-
Parameters
|
|
212
|
-
----------
|
|
213
|
-
infile : str
|
|
214
|
-
Path to the MS
|
|
215
|
-
state_table : tables.table
|
|
216
|
-
casacore table object to read from
|
|
217
|
-
|
|
218
|
-
Returns
|
|
219
|
-
-------
|
|
220
|
-
Union[List[str], None]
|
|
221
|
-
List of unique "scan/subscan" intents as given in the
|
|
222
|
-
OBS_MODE column of the STATE subtable. None if the STATE subtable
|
|
223
|
-
is empty or there is a problem reading it
|
|
224
|
-
"""
|
|
225
|
-
taql_distinct_intents = "select DISTINCT OBS_MODE from $state_table"
|
|
226
|
-
with open_query(state_table, taql_distinct_intents) as query_intents:
|
|
227
|
-
if query_intents.nrows() == 0:
|
|
228
|
-
logger.warning(
|
|
229
|
-
"STATE subtable has no data. Cannot partition by scan/subscan intent"
|
|
230
|
-
)
|
|
231
|
-
return None
|
|
232
|
-
|
|
233
|
-
distinct_obs_mode = query_intents.getcol("OBS_MODE")
|
|
234
|
-
logger.debug(
|
|
235
|
-
f" Query for distinct OBS_MODE len: {len(distinct_obs_mode)}, values: {distinct_obs_mode}"
|
|
236
|
-
)
|
|
237
|
-
return distinct_obs_mode
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
def make_ddi_state_intent_lists(
|
|
241
|
-
main_tbl: tables.table,
|
|
242
|
-
state_tbl: tables.table,
|
|
243
|
-
distinct_obs_mode: np.ndarray,
|
|
244
|
-
spw_name_by_ddi: Dict[int, str],
|
|
245
|
-
) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
|
|
246
|
-
"""
|
|
247
|
-
Produce arrays of (ddi indices, state indices, intent string)
|
|
248
|
-
for every distinct intent string, where every item represents one
|
|
249
|
-
partition of the main table
|
|
250
|
-
|
|
251
|
-
As the obs_mode strings have concatenated intent strings from all
|
|
252
|
-
the scan and subscan intents, this function has started
|
|
253
|
-
implementing some simple heuristics to remove the intent items
|
|
254
|
-
that are not related to the respective DDIs (for example WVR
|
|
255
|
-
intent is the only kept when the DDI/SPW has WVR in its name). See
|
|
256
|
-
call to filter_intents_per_ddi()
|
|
257
|
-
|
|
258
|
-
Parameters
|
|
259
|
-
----------
|
|
260
|
-
main_tbl : tables.table
|
|
261
|
-
main MS table openend as a casacore.tables.table
|
|
262
|
-
state_tbl : tables.table
|
|
263
|
-
STATE subtable openend as a casacore.tables.table
|
|
264
|
-
distinct_obs_mode : np.ndarray
|
|
265
|
-
list of unique/distinct OBS_MODE strings from the STATE table
|
|
266
|
-
spw_name_by_ddi: Dict[int, str]
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
Returns
|
|
270
|
-
-------
|
|
271
|
-
Tuple[np.ndarray, np.ndarray, np.ndarray]
|
|
272
|
-
arrays of (ddi indices, state indices, intent string)
|
|
273
|
-
"""
|
|
274
|
-
data_desc_id, state_id_partitions, intent_names = [], [], []
|
|
275
|
-
for intent in distinct_obs_mode:
|
|
276
|
-
where_intent = f"where OBS_MODE = '{intent}'"
|
|
277
|
-
taql_states = f"select ROWID() as ROWS from $state_tbl {where_intent}"
|
|
278
|
-
with open_query(state_tbl, taql_states) as query_states:
|
|
279
|
-
state_ids_for_intent = query_states.getcol("ROWS")
|
|
280
|
-
|
|
281
|
-
state_ids = " OR STATE_ID = ".join(np.char.mod("%d", state_ids_for_intent))
|
|
282
|
-
taql_partition = (
|
|
283
|
-
f"select DISTINCT DATA_DESC_ID from $main_tbl where STATE_ID = {state_ids}"
|
|
284
|
-
)
|
|
285
|
-
with open_query(main_tbl, taql_partition) as query_ddi_intent:
|
|
286
|
-
# No data for these STATE_IDs
|
|
287
|
-
if query_ddi_intent.nrows() == 0:
|
|
288
|
-
continue
|
|
289
|
-
|
|
290
|
-
# Will implicitly take whatever scans given the STATE_IDs
|
|
291
|
-
# and DDIs scan_number. Not needed:
|
|
292
|
-
# scan_number = query_ddi_intent.getcol("SCAN_NUMBER")
|
|
293
|
-
ddis = query_ddi_intent.getcol("DATA_DESC_ID")
|
|
294
|
-
|
|
295
|
-
data_desc_id.extend(ddis)
|
|
296
|
-
state_id_partitions.extend([state_ids_for_intent] * len(ddis))
|
|
297
|
-
|
|
298
|
-
# Try to select/exclude confusing or mixed intent names such as 'WVR#*'
|
|
299
|
-
intents_ddi = filter_intents_per_ddi(ddis, "WVR", intent, spw_name_by_ddi)
|
|
300
|
-
intent_names.extend(intents_ddi)
|
|
301
|
-
|
|
302
|
-
logger.debug(
|
|
303
|
-
f"Produced data_desc_id: {data_desc_id},\n state_id_partitions: {state_id_partitions}"
|
|
304
|
-
)
|
|
305
|
-
return data_desc_id, state_id_partitions, intent_names
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
def filter_intents_per_ddi(
|
|
309
|
-
ddis: List[int], substr: str, intents: str, spw_name_by_ddi: Dict[int, str]
|
|
310
|
-
) -> List[str]:
|
|
311
|
-
"""
|
|
312
|
-
For a given pair of:
|
|
313
|
-
- substring (say 'WVR') associated with a type of intent we want to differentiate
|
|
314
|
-
- intents string (multiple comma-separated scan/subscan intents)
|
|
315
|
-
=> do: for every DDI passed in the list of ddis, either keep only the
|
|
316
|
-
intents that have that substring (if there are any) or drop them, depending on
|
|
317
|
-
whether that substring is present in the SPW name. This is to filter in only
|
|
318
|
-
the intents that really apply to every DDI/SPW.
|
|
319
|
-
|
|
320
|
-
Parameters
|
|
321
|
-
----------
|
|
322
|
-
ddis : List[int]
|
|
323
|
-
list of ddis for which the intents have to be filtered
|
|
324
|
-
substr : str
|
|
325
|
-
substring to filter by
|
|
326
|
-
intents : str
|
|
327
|
-
string with a comma-separated list of individual
|
|
328
|
-
scan/subscan intent strings (like scan/subscan intents as stored
|
|
329
|
-
in the MS STATE/OBS_MODE
|
|
330
|
-
spw_name_by_ddi : Dict[int, str]
|
|
331
|
-
SPW names by DDI ID (row index) key
|
|
332
|
-
|
|
333
|
-
Returns
|
|
334
|
-
-------
|
|
335
|
-
List[str]
|
|
336
|
-
list where the intents related to 'substr' have been filtered in our out
|
|
337
|
-
"""
|
|
338
|
-
present = substr in intents
|
|
339
|
-
# Nothing to effectively filter, full cs-list of intents apply to all DDIs
|
|
340
|
-
if not present:
|
|
341
|
-
return [intents] * len(ddis)
|
|
342
|
-
|
|
343
|
-
every_intent = intents.split(",")
|
|
344
|
-
filtered_intents = []
|
|
345
|
-
for ddi in ddis:
|
|
346
|
-
spw_name = spw_name_by_ddi.get(ddi, "")
|
|
347
|
-
|
|
348
|
-
if not spw_name:
|
|
349
|
-
# we cannot say / cannot filter
|
|
350
|
-
filtered_intents.append(intents)
|
|
351
|
-
continue
|
|
352
|
-
|
|
353
|
-
# A not-xor to select/deselect (or keep-only/drop) the intents that apply
|
|
354
|
-
# to this DDI
|
|
355
|
-
ddi_intents = [
|
|
356
|
-
intnt for intnt in every_intent if (substr in intnt) == (substr in spw_name)
|
|
357
|
-
]
|
|
358
|
-
ddi_intents = ",".join(ddi_intents)
|
|
359
|
-
filtered_intents.append(ddi_intents)
|
|
360
|
-
|
|
361
|
-
return filtered_intents
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
def partition_when_empty_state(
|
|
365
|
-
infile: str,
|
|
366
|
-
) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
|
|
367
|
-
"""
|
|
368
|
-
Generate fallback partition ids when trying to partition by
|
|
369
|
-
'intent' but the STATE table is empty.
|
|
370
|
-
|
|
371
|
-
Some MSs have no STATE rows and in the main table STATE_ID==-1
|
|
372
|
-
(that is not a valid MSv2 but it happens).
|
|
373
|
-
|
|
374
|
-
Parameters
|
|
375
|
-
----------
|
|
376
|
-
infile : str
|
|
377
|
-
Path to the MS
|
|
378
|
-
|
|
379
|
-
Returns
|
|
380
|
-
-------
|
|
381
|
-
Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]
|
|
382
|
-
same as make_partition_ids_by_ddi_intent but with
|
|
383
|
-
effectively only ddi indices and other indices set to None ("any
|
|
384
|
-
IDs found")
|
|
385
|
-
"""
|
|
386
|
-
try:
|
|
387
|
-
main_table = None
|
|
388
|
-
|
|
389
|
-
main_table = tables.table(
|
|
390
|
-
infile, readonly=True, lockoptions={"option": "usernoread"}, ack=False
|
|
391
|
-
)
|
|
392
|
-
taql_ddis = "select DISTINCT DATA_DESC_ID from $main_table"
|
|
393
|
-
with open_query(main_table, taql_ddis) as query_per_intent:
|
|
394
|
-
# Will take whatever scans given the STATE_IDs and DDIs
|
|
395
|
-
# scan_number = query_per_intent.getcol("SCAN_NUMBER")
|
|
396
|
-
distinct_ddis = query_per_intent.getcol("DATA_DESC_ID")
|
|
397
|
-
|
|
398
|
-
logger.debug(
|
|
399
|
-
f"Producing {len(distinct_ddis)} partitions for ddis: {distinct_ddis}"
|
|
400
|
-
)
|
|
401
|
-
nparts = len(distinct_ddis)
|
|
402
|
-
|
|
403
|
-
finally:
|
|
404
|
-
if main_table:
|
|
405
|
-
main_table.close()
|
|
406
|
-
|
|
407
|
-
return distinct_ddis, [None] * nparts, [None] * nparts, [""] * nparts
|
|
@@ -1,16 +1,3 @@
|
|
|
1
|
-
import os
|
|
2
|
-
|
|
3
|
-
import toolviper.utils.logger as logger
|
|
4
|
-
|
|
5
|
-
from pathlib import Path
|
|
6
|
-
from typing import Dict, List
|
|
7
|
-
|
|
8
|
-
import xarray as xr
|
|
9
|
-
|
|
10
|
-
from ._tables.read import load_generic_table, table_exists
|
|
11
|
-
from ._tables.read_subtables import read_ephemerides, read_delayed_pointing_table
|
|
12
|
-
|
|
13
|
-
|
|
14
1
|
subt_rename_ids = {
|
|
15
2
|
"ANTENNA": {"row": "antenna_id", "dim_1": "xyz"},
|
|
16
3
|
"FEED": {"dim_1": "xyz", "dim_2": "receptor", "dim_3": "receptor2"},
|
|
@@ -27,97 +14,3 @@ subt_rename_ids = {
|
|
|
27
14
|
# Would make sense for non-std "WS_NX_STATION_POSITION"
|
|
28
15
|
"WEATHER": {"dim_1": "xyz"},
|
|
29
16
|
}
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
def read_ms_subtables(
|
|
33
|
-
infile: str, done_subt: List[str], asdm_subtables: bool = False
|
|
34
|
-
) -> Dict[str, xr.Dataset]:
|
|
35
|
-
"""
|
|
36
|
-
Read MSv2 subtables (main table keywords) as xr.Dataset
|
|
37
|
-
|
|
38
|
-
Parameters
|
|
39
|
-
----------
|
|
40
|
-
infile : str
|
|
41
|
-
input MeasurementSet path
|
|
42
|
-
done_subt : List[str]
|
|
43
|
-
Subtables that were already read, to skip them
|
|
44
|
-
asdm_subtables : bool (Default value = False)
|
|
45
|
-
Whether to also read ASDM_* subtables
|
|
46
|
-
|
|
47
|
-
Returns
|
|
48
|
-
-------
|
|
49
|
-
Dict[str, xr.Dataset]
|
|
50
|
-
dict of xarray datasets read from subtables (metadata tables)
|
|
51
|
-
|
|
52
|
-
"""
|
|
53
|
-
ignore_msv2_cols_subt = ["FLAG_CMD", "FLAG_ROW", "BEAM_ID"]
|
|
54
|
-
skip_tables = ["SORTED_TABLE", "FLAG_CMD"] + done_subt
|
|
55
|
-
stbl_list = sorted(
|
|
56
|
-
[
|
|
57
|
-
tname
|
|
58
|
-
for tname in os.listdir(infile)
|
|
59
|
-
if (tname not in skip_tables)
|
|
60
|
-
and (os.path.isdir(os.path.join(infile, tname)))
|
|
61
|
-
and (table_exists(os.path.join(infile, tname)))
|
|
62
|
-
]
|
|
63
|
-
)
|
|
64
|
-
|
|
65
|
-
subtables = {}
|
|
66
|
-
for _ii, subt_name in enumerate(stbl_list):
|
|
67
|
-
if not asdm_subtables and subt_name.startswith("ASDM_"):
|
|
68
|
-
logger.debug(f"skipping ASDM_ subtable {subt_name}...")
|
|
69
|
-
continue
|
|
70
|
-
else:
|
|
71
|
-
logger.debug(f"reading subtable {subt_name}...")
|
|
72
|
-
|
|
73
|
-
if subt_name == "POINTING":
|
|
74
|
-
subt_path = Path(infile, subt_name)
|
|
75
|
-
rename_ids = {"dim_2": "n_polynomial", "dim_3": "dir"}
|
|
76
|
-
xds = read_delayed_pointing_table(str(subt_path), rename_ids=rename_ids)
|
|
77
|
-
else:
|
|
78
|
-
xds = load_generic_table(
|
|
79
|
-
infile,
|
|
80
|
-
subt_name,
|
|
81
|
-
timecols=["TIME"],
|
|
82
|
-
ignore=ignore_msv2_cols_subt,
|
|
83
|
-
rename_ids=subt_rename_ids.get(subt_name, None),
|
|
84
|
-
)
|
|
85
|
-
|
|
86
|
-
if len(xds.sizes) != 0:
|
|
87
|
-
subtables[subt_name.lower()] = xds
|
|
88
|
-
|
|
89
|
-
if "field" in subtables:
|
|
90
|
-
ephem_xds = read_ephemerides(infile)
|
|
91
|
-
if ephem_xds:
|
|
92
|
-
subtables["ephemerides"] = ephem_xds
|
|
93
|
-
|
|
94
|
-
return subtables
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
def add_pointing_to_partition(
|
|
98
|
-
xds_part: xr.Dataset, xds_pointing: xr.Dataset
|
|
99
|
-
) -> xr.Dataset:
|
|
100
|
-
"""
|
|
101
|
-
Take pointing variables from a (delayed) pointing dataset and
|
|
102
|
-
transfer them to a main table partition dataset (interpolating into
|
|
103
|
-
the destination time axis)
|
|
104
|
-
|
|
105
|
-
Parameters
|
|
106
|
-
----------
|
|
107
|
-
xds_part : xr.Dataset
|
|
108
|
-
a partition/sub-xds of the main table
|
|
109
|
-
xds_pointing : xr.Dataset
|
|
110
|
-
the xds read from the pointing subtable
|
|
111
|
-
|
|
112
|
-
Returns
|
|
113
|
-
-------
|
|
114
|
-
xr.Dataset
|
|
115
|
-
partition xds with pointing variables added/interpolated from the
|
|
116
|
-
pointing_xds into its time axis
|
|
117
|
-
|
|
118
|
-
"""
|
|
119
|
-
interp_xds = xds_pointing.interp(time=xds_part.time, method="nearest")
|
|
120
|
-
for var in interp_xds.data_vars:
|
|
121
|
-
xds_part[f"pointing_{var}"] = interp_xds[var]
|
|
122
|
-
|
|
123
|
-
return xds_part
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
from typing import Union
|
|
2
|
+
|
|
3
|
+
import xarray as xr
|
|
4
|
+
|
|
5
|
+
import toolviper.utils.logger as logger
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def interpolate_to_time(
|
|
9
|
+
xds: xr.Dataset,
|
|
10
|
+
interp_time: Union[xr.DataArray, None],
|
|
11
|
+
message_prefix: str,
|
|
12
|
+
time_name: str = "time",
|
|
13
|
+
) -> xr.Dataset:
|
|
14
|
+
"""
|
|
15
|
+
Interpolate the time coordinate of the input xarray dataset to the
|
|
16
|
+
a data array. This can be used for example to interpolate a pointing_xds
|
|
17
|
+
to the time coord of the (main) MSv4, or similarly the ephemeris
|
|
18
|
+
data variables of a field_and_source_xds.
|
|
19
|
+
|
|
20
|
+
Uses interpolation method "linear", unless the source number of points is
|
|
21
|
+
1 in which case "nearest" is used, to avoid divide-by-zero issues.
|
|
22
|
+
|
|
23
|
+
Parameters:
|
|
24
|
+
----------
|
|
25
|
+
xds : xr.Dataset
|
|
26
|
+
Xarray dataset to interpolate (presumably a pointing_xds or an xds of
|
|
27
|
+
ephemeris variables)
|
|
28
|
+
interp_time : Union[xr.DataArray, None]
|
|
29
|
+
Time axis to interpolate the dataset to (usually main MSv4 time)
|
|
30
|
+
message_prefix: str
|
|
31
|
+
A prefix for info/debug/etc. messages
|
|
32
|
+
|
|
33
|
+
Returns:
|
|
34
|
+
-------
|
|
35
|
+
interpolated_xds : xr.Dataset
|
|
36
|
+
xarray dataset with time axis interpolated to interp_time.
|
|
37
|
+
"""
|
|
38
|
+
if interp_time is not None:
|
|
39
|
+
points_before = xds[time_name].size
|
|
40
|
+
if points_before > 1:
|
|
41
|
+
method = "linear"
|
|
42
|
+
else:
|
|
43
|
+
method = "nearest"
|
|
44
|
+
xds = xds.interp(
|
|
45
|
+
{time_name: interp_time.data}, method=method, assume_sorted=True
|
|
46
|
+
)
|
|
47
|
+
# scan_name sneaks in as a coordinate of the main time axis, drop it
|
|
48
|
+
if (
|
|
49
|
+
"type" in xds.attrs
|
|
50
|
+
and xds.attrs["type"] not in ["visibility", "spectrum", "wvr"]
|
|
51
|
+
and "scan_name" in xds.coords
|
|
52
|
+
):
|
|
53
|
+
xds = xds.drop_vars("scan_name")
|
|
54
|
+
points_after = xds[time_name].size
|
|
55
|
+
logger.debug(
|
|
56
|
+
f"{message_prefix}: interpolating the time coordinate "
|
|
57
|
+
f"from {points_before} to {points_after} points"
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
return xds
|
|
@@ -4,11 +4,6 @@ def add_encoding(xds, compressor, chunks=None):
|
|
|
4
4
|
|
|
5
5
|
chunks = {**dict(xds.sizes), **chunks} # Add missing sizes if presents.
|
|
6
6
|
|
|
7
|
-
encoding = {}
|
|
8
7
|
for da_name in list(xds.data_vars):
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
xds[da_name].encoding = {"compressor": compressor, "chunks": da_chunks}
|
|
12
|
-
# print(xds[da_name].encoding)
|
|
13
|
-
else:
|
|
14
|
-
xds[da_name].encoding = {"compressor": compressor}
|
|
8
|
+
da_chunks = [chunks[dim_name] for dim_name in xds[da_name].sizes]
|
|
9
|
+
xds[da_name].encoding = {"compressor": compressor, "chunks": da_chunks}
|