xradio 0.0.41__py3-none-any.whl → 0.0.43__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- xradio/_utils/coord_math.py +100 -0
- xradio/_utils/list_and_array.py +49 -4
- xradio/_utils/schema.py +36 -16
- xradio/image/_util/_casacore/xds_from_casacore.py +5 -5
- xradio/image/_util/_casacore/xds_to_casacore.py +12 -11
- xradio/image/_util/_fits/xds_from_fits.py +18 -17
- xradio/image/_util/_zarr/zarr_low_level.py +29 -12
- xradio/image/_util/common.py +1 -1
- xradio/image/_util/image_factory.py +1 -1
- xradio/{correlated_data → measurement_set}/__init__.py +7 -4
- xradio/measurement_set/_utils/__init__.py +5 -0
- xradio/{correlated_data/_utils/_ms → measurement_set/_utils/_msv2}/_tables/load_main_table.py +1 -1
- xradio/{correlated_data/_utils/_ms → measurement_set/_utils/_msv2}/_tables/read.py +1 -1
- xradio/{correlated_data/_utils/_ms → measurement_set/_utils/_msv2}/conversion.py +115 -37
- xradio/{correlated_data/_utils/_ms → measurement_set/_utils/_msv2}/create_antenna_xds.py +62 -37
- xradio/{correlated_data/_utils/_ms → measurement_set/_utils/_msv2}/create_field_and_source_xds.py +117 -25
- xradio/{correlated_data/_utils/_ms → measurement_set/_utils/_msv2}/msv4_sub_xdss.py +47 -13
- xradio/{correlated_data/_utils/_ms → measurement_set/_utils/_msv2}/partition_queries.py +4 -0
- xradio/{correlated_data → measurement_set}/_utils/_utils/xds_helper.py +1 -1
- xradio/{correlated_data/_utils/ms.py → measurement_set/_utils/msv2.py} +4 -4
- xradio/{correlated_data → measurement_set}/convert_msv2_to_processing_set.py +7 -2
- xradio/{correlated_data → measurement_set}/load_processing_set.py +5 -5
- xradio/measurement_set/measurement_set_xds.py +110 -0
- xradio/{correlated_data → measurement_set}/open_processing_set.py +9 -16
- xradio/measurement_set/processing_set.py +777 -0
- xradio/{correlated_data → measurement_set}/schema.py +1110 -586
- xradio/schema/check.py +42 -22
- xradio/schema/dataclass.py +56 -6
- xradio/sphinx/__init__.py +12 -0
- xradio/sphinx/schema_table.py +351 -0
- {xradio-0.0.41.dist-info → xradio-0.0.43.dist-info}/METADATA +9 -6
- xradio-0.0.43.dist-info/RECORD +76 -0
- {xradio-0.0.41.dist-info → xradio-0.0.43.dist-info}/WHEEL +1 -1
- xradio/_utils/common.py +0 -101
- xradio/correlated_data/_utils/__init__.py +0 -5
- xradio/correlated_data/correlated_xds.py +0 -13
- xradio/correlated_data/processing_set.py +0 -301
- xradio/correlated_data/test__processing_set.py +0 -74
- xradio-0.0.41.dist-info/RECORD +0 -75
- /xradio/{correlated_data/_utils/_ms → measurement_set/_utils/_msv2}/_tables/load.py +0 -0
- /xradio/{correlated_data/_utils/_ms → measurement_set/_utils/_msv2}/_tables/read_main_table.py +0 -0
- /xradio/{correlated_data/_utils/_ms → measurement_set/_utils/_msv2}/_tables/read_subtables.py +0 -0
- /xradio/{correlated_data/_utils/_ms → measurement_set/_utils/_msv2}/_tables/table_query.py +0 -0
- /xradio/{correlated_data/_utils/_ms → measurement_set/_utils/_msv2}/_tables/write.py +0 -0
- /xradio/{correlated_data/_utils/_ms → measurement_set/_utils/_msv2}/_tables/write_exp_api.py +0 -0
- /xradio/{correlated_data/_utils/_ms → measurement_set/_utils/_msv2}/chunks.py +0 -0
- /xradio/{correlated_data/_utils/_ms → measurement_set/_utils/_msv2}/descr.py +0 -0
- /xradio/{correlated_data/_utils/_ms → measurement_set/_utils/_msv2}/msv2_msv3.py +0 -0
- /xradio/{correlated_data/_utils/_ms → measurement_set/_utils/_msv2}/msv2_to_msv4_meta.py +0 -0
- /xradio/{correlated_data/_utils/_ms → measurement_set/_utils/_msv2}/msv4_info_dicts.py +0 -0
- /xradio/{correlated_data/_utils/_ms → measurement_set/_utils/_msv2}/optimised_functions.py +0 -0
- /xradio/{correlated_data/_utils/_ms → measurement_set/_utils/_msv2}/partitions.py +0 -0
- /xradio/{correlated_data/_utils/_ms → measurement_set/_utils/_msv2}/subtables.py +0 -0
- /xradio/{correlated_data → measurement_set}/_utils/_utils/cds.py +0 -0
- /xradio/{correlated_data → measurement_set}/_utils/_utils/partition_attrs.py +0 -0
- /xradio/{correlated_data → measurement_set}/_utils/_utils/stokes_types.py +0 -0
- /xradio/{correlated_data → measurement_set}/_utils/_zarr/encoding.py +0 -0
- /xradio/{correlated_data → measurement_set}/_utils/_zarr/read.py +0 -0
- /xradio/{correlated_data → measurement_set}/_utils/_zarr/write.py +0 -0
- /xradio/{correlated_data → measurement_set}/_utils/zarr.py +0 -0
- {xradio-0.0.41.dist-info → xradio-0.0.43.dist-info}/LICENSE.txt +0 -0
- {xradio-0.0.41.dist-info → xradio-0.0.43.dist-info}/top_level.txt +0 -0
xradio/{correlated_data/_utils/_ms → measurement_set/_utils/_msv2}/create_field_and_source_xds.py
RENAMED
|
@@ -6,20 +6,27 @@ import numpy as np
|
|
|
6
6
|
import xarray as xr
|
|
7
7
|
|
|
8
8
|
import toolviper.utils.logger as logger
|
|
9
|
-
from xradio.
|
|
10
|
-
from xradio.
|
|
11
|
-
from xradio.
|
|
9
|
+
from xradio.measurement_set._utils._msv2.msv4_sub_xdss import interpolate_to_time
|
|
10
|
+
from xradio.measurement_set._utils._msv2.subtables import subt_rename_ids
|
|
11
|
+
from xradio.measurement_set._utils._msv2._tables.read import (
|
|
12
12
|
convert_casacore_time_to_mjd,
|
|
13
13
|
make_taql_where_between_min_max,
|
|
14
14
|
load_generic_table,
|
|
15
15
|
)
|
|
16
|
-
from xradio._utils.
|
|
16
|
+
from xradio._utils.list_and_array import cast_to_str
|
|
17
|
+
from xradio._utils.coord_math import (
|
|
18
|
+
convert_to_si_units,
|
|
19
|
+
add_position_offsets,
|
|
20
|
+
wrap_to_pi,
|
|
21
|
+
)
|
|
22
|
+
|
|
17
23
|
from xradio._utils.list_and_array import (
|
|
18
24
|
check_if_consistent,
|
|
19
25
|
unique_1d,
|
|
20
26
|
to_np_array,
|
|
21
27
|
)
|
|
22
28
|
from xradio._utils.schema import (
|
|
29
|
+
casacore_to_msv4_measure_type,
|
|
23
30
|
column_description_casacore_to_msv4_measure,
|
|
24
31
|
convert_generic_xds_to_xradio_schema,
|
|
25
32
|
)
|
|
@@ -169,9 +176,21 @@ def extract_ephemeris_info(
|
|
|
169
176
|
), "Only geocentric observer ephemeris are supported."
|
|
170
177
|
|
|
171
178
|
if "posrefsys" in ephemeris_meta:
|
|
172
|
-
|
|
179
|
+
# Note the phase center can be given as "J2000" or "J2000.0"
|
|
180
|
+
ref_frame = (
|
|
181
|
+
ephemeris_meta["posrefsys"]
|
|
182
|
+
.replace("ICRF/", "", 1)
|
|
183
|
+
.replace("J2000.0", "J2000", 1)
|
|
184
|
+
)
|
|
185
|
+
if ref_frame in casacore_to_msv4_measure_type["direction"].get("Ref_map", {}):
|
|
186
|
+
ref_frame = casacore_to_msv4_measure_type["direction"]["Ref_map"][ref_frame]
|
|
187
|
+
else:
|
|
188
|
+
logger.debug(
|
|
189
|
+
f"Unrecognized casacore direction reference frame found in posrefsys: {ref_frame}"
|
|
190
|
+
)
|
|
191
|
+
sky_coord_frame = ref_frame.lower()
|
|
173
192
|
else:
|
|
174
|
-
sky_coord_frame = "
|
|
193
|
+
sky_coord_frame = "icrs" # We will have to just assume this.
|
|
175
194
|
|
|
176
195
|
# Find out witch keyword is used for units (UNIT/QuantumUnits)
|
|
177
196
|
if "UNIT" in ephemeris_column_description["RA"]["keywords"]:
|
|
@@ -195,7 +214,7 @@ def extract_ephemeris_info(
|
|
|
195
214
|
"type": "location",
|
|
196
215
|
"units": ["deg", "deg", "m"],
|
|
197
216
|
"data": observer_position,
|
|
198
|
-
"
|
|
217
|
+
"frame": "WGS84",
|
|
199
218
|
"origin_object_name": "Earth",
|
|
200
219
|
"coordinate_system": ephemeris_meta["obsloc"].lower(),
|
|
201
220
|
}
|
|
@@ -260,7 +279,7 @@ def extract_ephemeris_info(
|
|
|
260
279
|
}
|
|
261
280
|
)
|
|
262
281
|
|
|
263
|
-
# Add optional data:
|
|
282
|
+
# Add optional data: SUB_OBSERVER_DIRECTION and SUB_SOLAR_POSITION
|
|
264
283
|
if "DiskLong" in ephemeris_column_description:
|
|
265
284
|
key_lon = "DiskLong"
|
|
266
285
|
key_lat = "DiskLat"
|
|
@@ -283,7 +302,7 @@ def extract_ephemeris_info(
|
|
|
283
302
|
temp_xds["SUB_OBSERVER_DIRECTION"].attrs.update(
|
|
284
303
|
{
|
|
285
304
|
"type": "location",
|
|
286
|
-
"
|
|
305
|
+
"frame": "Undefined",
|
|
287
306
|
"origin_object_name": ephemeris_meta["NAME"],
|
|
288
307
|
"coordinate_system": "planetodetic",
|
|
289
308
|
"units": [
|
|
@@ -312,7 +331,7 @@ def extract_ephemeris_info(
|
|
|
312
331
|
temp_xds["SUB_SOLAR_POSITION"].attrs.update(
|
|
313
332
|
{
|
|
314
333
|
"type": "location",
|
|
315
|
-
"
|
|
334
|
+
"frame": "Undefined",
|
|
316
335
|
"origin_object_name": "Sun",
|
|
317
336
|
"coordinate_system": "planetodetic",
|
|
318
337
|
"units": [
|
|
@@ -339,8 +358,8 @@ def extract_ephemeris_info(
|
|
|
339
358
|
time_coord_attrs = {
|
|
340
359
|
"type": "time",
|
|
341
360
|
"units": ["s"],
|
|
342
|
-
"scale": "
|
|
343
|
-
"format": "
|
|
361
|
+
"scale": "utc",
|
|
362
|
+
"format": "unix",
|
|
344
363
|
}
|
|
345
364
|
temp_xds["time_ephemeris"].attrs.update(time_coord_attrs)
|
|
346
365
|
|
|
@@ -374,21 +393,28 @@ def extract_ephemeris_info(
|
|
|
374
393
|
interp_time is not None
|
|
375
394
|
), 'ephemeris_interpolate must be True if there is ephemeris data and multiple fields (this will occur if "FIELD_ID" is not in partition_scheme).'
|
|
376
395
|
|
|
396
|
+
field_phase_center = wrap_to_pi(
|
|
397
|
+
xds[center_dv].values + xds["SOURCE_LOCATION"][:, 0:2].values
|
|
398
|
+
)
|
|
399
|
+
field_phase_center = np.column_stack(
|
|
400
|
+
(field_phase_center, np.zeros(xds[center_dv].values.shape[0]))
|
|
401
|
+
)
|
|
402
|
+
field_phase_center[:, -1] = (
|
|
403
|
+
field_phase_center[:, -1] + xds["SOURCE_LOCATION"][:, -1].values
|
|
404
|
+
)
|
|
405
|
+
|
|
377
406
|
xds[center_dv] = xr.DataArray(
|
|
378
|
-
|
|
379
|
-
np.column_stack(
|
|
380
|
-
(xds[center_dv].values, np.zeros(xds[center_dv].values.shape[0]))
|
|
381
|
-
),
|
|
382
|
-
xds["SOURCE_LOCATION"].values,
|
|
383
|
-
),
|
|
407
|
+
field_phase_center,
|
|
384
408
|
dims=[xds["SOURCE_LOCATION"].dims[0], "sky_pos_label"],
|
|
385
409
|
)
|
|
386
410
|
else:
|
|
411
|
+
field_phase_center = (
|
|
412
|
+
np.append(xds[center_dv].values, 0) + xds["SOURCE_LOCATION"].values
|
|
413
|
+
)
|
|
414
|
+
field_phase_center[:, 0:2] = wrap_to_pi(field_phase_center[:, 0:2])
|
|
415
|
+
|
|
387
416
|
xds[center_dv] = xr.DataArray(
|
|
388
|
-
|
|
389
|
-
np.append(xds[center_dv].values, 0),
|
|
390
|
-
xds["SOURCE_LOCATION"].values,
|
|
391
|
-
),
|
|
417
|
+
field_phase_center,
|
|
392
418
|
dims=[xds["SOURCE_LOCATION"].dims[0], "sky_pos_label"],
|
|
393
419
|
)
|
|
394
420
|
|
|
@@ -460,6 +486,65 @@ def make_line_dims_and_coords(
|
|
|
460
486
|
return line_dims, line_coords
|
|
461
487
|
|
|
462
488
|
|
|
489
|
+
def pad_missing_sources(
|
|
490
|
+
source_xds: xr.Dataset, unique_source_ids: np.array
|
|
491
|
+
) -> xr.Dataset:
|
|
492
|
+
"""
|
|
493
|
+
In some MSs there can be source IDs referenced from the field subtable which do not exist in
|
|
494
|
+
the source table: https://github.com/casangi/xradio/issues/266
|
|
495
|
+
|
|
496
|
+
This addresses the issue by padding/filling those IDs with "Unknown"/nan values. Produces a
|
|
497
|
+
source_xds that, in addition to the information loaded for the non-missing source IDs, has
|
|
498
|
+
padding for the IDs that are missing from the input MSv2 source table.
|
|
499
|
+
This function does not need to do anything when unique_source_ids is a single value
|
|
500
|
+
(partitioning by "FIELD_ID" or othwerwise single field/source)
|
|
501
|
+
|
|
502
|
+
Parameters:
|
|
503
|
+
----------
|
|
504
|
+
xds: xr.Dataset
|
|
505
|
+
source dataset to fix/pad missing sources
|
|
506
|
+
unique_source_ids: np.array
|
|
507
|
+
IDs of the sources included in this partition
|
|
508
|
+
|
|
509
|
+
Returns:
|
|
510
|
+
-------
|
|
511
|
+
filled_source_xds : xr.Dataset
|
|
512
|
+
source dataset with padding in the originally missing sources
|
|
513
|
+
"""
|
|
514
|
+
|
|
515
|
+
# Only fill gaps in multi-source xdss. If single source_id, no need to
|
|
516
|
+
if len(unique_source_ids) <= 1:
|
|
517
|
+
return source_xds
|
|
518
|
+
|
|
519
|
+
missing_source_ids = [
|
|
520
|
+
source_id
|
|
521
|
+
for source_id in unique_source_ids
|
|
522
|
+
if source_id not in source_xds.coords["SOURCE_ID"]
|
|
523
|
+
]
|
|
524
|
+
|
|
525
|
+
# would like to use the new-ish xr.pad, but it creates issues with indices/coords and is
|
|
526
|
+
# also not free of overheads, as it for example changes all numeric types to float64
|
|
527
|
+
missing_source_xds = xr.full_like(source_xds.isel(SOURCE_ID=0), fill_value=np.nan)
|
|
528
|
+
pad_str = "Unknown"
|
|
529
|
+
pad_str_type = "<U9"
|
|
530
|
+
for var in missing_source_xds.data_vars:
|
|
531
|
+
if np.issubdtype(missing_source_xds.data_vars[var].dtype, np.str_):
|
|
532
|
+
# Avoid truncation to length of previously loaded strings
|
|
533
|
+
missing_source_xds[var] = missing_source_xds[var].astype(
|
|
534
|
+
np.dtype(pad_str_type)
|
|
535
|
+
)
|
|
536
|
+
missing_source_xds[var] = pad_str
|
|
537
|
+
|
|
538
|
+
concat_dim = "SOURCE_ID"
|
|
539
|
+
xdss_to_concat = [source_xds]
|
|
540
|
+
for missing_id in missing_source_ids:
|
|
541
|
+
missing_source_xds[concat_dim] = missing_id
|
|
542
|
+
xdss_to_concat.append(missing_source_xds)
|
|
543
|
+
filled_source_xds = xr.concat(xdss_to_concat, concat_dim).sortby(concat_dim)
|
|
544
|
+
|
|
545
|
+
return filled_source_xds
|
|
546
|
+
|
|
547
|
+
|
|
463
548
|
def extract_source_info(
|
|
464
549
|
xds: xr.Dataset,
|
|
465
550
|
path: str,
|
|
@@ -533,15 +618,22 @@ def extract_source_info(
|
|
|
533
618
|
|
|
534
619
|
# This source table time is not the same as the time in the field_and_source_xds that is derived from the main MSv4 time axis.
|
|
535
620
|
# The source_id maps to the time axis in the field_and_source_xds. That is why "if len(source_id) == 1" is used to check if there should be a time axis.
|
|
536
|
-
assert len(source_xds.TIME) <= len(
|
|
537
|
-
|
|
538
|
-
), "Can only process source table with a single time entry for a source_id and spectral_window_id."
|
|
621
|
+
# assert len(source_xds.TIME) <= len(
|
|
622
|
+
# unique_source_id
|
|
623
|
+
# ), "Can only process source table with a single time entry for a source_id and spectral_window_id."
|
|
624
|
+
if len(source_xds.TIME) > len(unique_source_id):
|
|
625
|
+
logger.warning(
|
|
626
|
+
f"Source table has more than one time entry for a source_id and spectral_window_id. This is not currently supported. Only the first time entry will be used."
|
|
627
|
+
)
|
|
628
|
+
source_xds = source_xds.drop_duplicates("SOURCE_ID", keep="first")
|
|
539
629
|
|
|
540
630
|
source_xds = source_xds.isel(TIME=0, SPECTRAL_WINDOW_ID=0, drop=True)
|
|
541
631
|
source_column_description = source_xds.attrs["other"]["msv2"]["ctds_attrs"][
|
|
542
632
|
"column_descriptions"
|
|
543
633
|
]
|
|
544
634
|
|
|
635
|
+
source_xds = pad_missing_sources(source_xds, unique_source_id)
|
|
636
|
+
|
|
545
637
|
# Get source name (the time axis is optional and will probably be required if the partition scheme does not include 'FIELD_ID' or 'SOURCE_ID'.).
|
|
546
638
|
# Note again that this optional time axis has nothing to do with the original time axis in the source table that we drop.
|
|
547
639
|
if len(source_id) == 1:
|
|
@@ -6,6 +6,7 @@ from typing import Tuple, Union
|
|
|
6
6
|
import numpy as np
|
|
7
7
|
import xarray as xr
|
|
8
8
|
|
|
9
|
+
from xradio._utils.coord_math import convert_to_si_units
|
|
9
10
|
from xradio._utils.schema import (
|
|
10
11
|
column_description_casacore_to_msv4_measure,
|
|
11
12
|
convert_generic_xds_to_xradio_schema,
|
|
@@ -144,7 +145,7 @@ def create_weather_xds(in_file: str, ant_xds_station_name_ids: xr.DataArray):
|
|
|
144
145
|
}
|
|
145
146
|
weather_xds = weather_xds.assign_coords(coords)
|
|
146
147
|
|
|
147
|
-
dims_station_time = ["station_name", "
|
|
148
|
+
dims_station_time = ["station_name", "time_weather"]
|
|
148
149
|
to_new_data_variables = {
|
|
149
150
|
"H20": ["H2O", dims_station_time],
|
|
150
151
|
"IONOS_ELECTRON": ["IONOS_ELECTRON", dims_station_time],
|
|
@@ -157,13 +158,23 @@ def create_weather_xds(in_file: str, ant_xds_station_name_ids: xr.DataArray):
|
|
|
157
158
|
}
|
|
158
159
|
|
|
159
160
|
to_new_coords = {
|
|
160
|
-
"TIME": ["
|
|
161
|
+
"TIME": ["time_weather", ["time_weather"]],
|
|
161
162
|
}
|
|
162
163
|
|
|
163
164
|
weather_xds = convert_generic_xds_to_xradio_schema(
|
|
164
165
|
generic_weather_xds, weather_xds, to_new_data_variables, to_new_coords
|
|
165
166
|
)
|
|
166
167
|
|
|
168
|
+
# TODO: option to interpolate to main time
|
|
169
|
+
|
|
170
|
+
# PRESSURE: hPa in MSv2 specs and some MSs => Pa
|
|
171
|
+
weather_xds = convert_to_si_units(weather_xds)
|
|
172
|
+
|
|
173
|
+
# correct expected types (for example "IONOS_ELECTRON", "PRESSURE" can be float32)
|
|
174
|
+
for data_var in weather_xds:
|
|
175
|
+
if weather_xds.data_vars[data_var].dtype != np.float64:
|
|
176
|
+
weather_xds[data_var] = weather_xds[data_var].astype(np.float64)
|
|
177
|
+
|
|
167
178
|
return weather_xds
|
|
168
179
|
|
|
169
180
|
|
|
@@ -199,9 +210,8 @@ def correct_generic_pointing_xds(
|
|
|
199
210
|
|
|
200
211
|
correct_pointing_xds = generic_pointing_xds.copy()
|
|
201
212
|
|
|
202
|
-
for
|
|
203
|
-
if
|
|
204
|
-
data_var_name = to_new_data_variables[key]
|
|
213
|
+
for data_var_name in generic_pointing_xds:
|
|
214
|
+
if data_var_name in to_new_data_variables:
|
|
205
215
|
# Corrects dim sizes of "empty cell" variables, such as empty DIRECTION, TARGET, etc.
|
|
206
216
|
if (
|
|
207
217
|
"dim_2" in generic_pointing_xds.sizes
|
|
@@ -296,6 +306,8 @@ def create_pointing_xds(
|
|
|
296
306
|
size = generic_pointing_xds.sizes["n_polynomial"]
|
|
297
307
|
if size == 1:
|
|
298
308
|
generic_pointing_xds = generic_pointing_xds.sel({"n_polynomial": 0})
|
|
309
|
+
elif size == 0:
|
|
310
|
+
generic_pointing_xds = generic_pointing_xds.drop_dims("n_polynomial")
|
|
299
311
|
|
|
300
312
|
time_ant_dims = ["time", "antenna_name"]
|
|
301
313
|
time_ant_dir_dims = time_ant_dims + ["local_sky_dir_label"]
|
|
@@ -341,8 +353,8 @@ def prepare_generic_sys_cal_xds(generic_sys_cal_xds: xr.Dataset) -> xr.Dataset:
|
|
|
341
353
|
This function performs various prepareation steps, such as:
|
|
342
354
|
- filter out dimensions not neeed for an individual MSv4 (SPW, FEED),
|
|
343
355
|
- drop variables loaded from columns with all items set to empty array,
|
|
344
|
-
- transpose the dimensions frequency,receptor
|
|
345
|
-
- fix dimension names when needed.
|
|
356
|
+
- transpose the dimensions frequency,receptor
|
|
357
|
+
- fix dimension names (and order) when needed.
|
|
346
358
|
|
|
347
359
|
Parameters
|
|
348
360
|
----------
|
|
@@ -374,15 +386,38 @@ def prepare_generic_sys_cal_xds(generic_sys_cal_xds: xr.Dataset) -> xr.Dataset:
|
|
|
374
386
|
"receptor" in generic_sys_cal_xds.sizes
|
|
375
387
|
and "frequency" in generic_sys_cal_xds.sizes
|
|
376
388
|
):
|
|
389
|
+
# dim_3 can be created for example when the T*_SPECTRUM have varying # channels!
|
|
390
|
+
# more generaly, could transpose with ... to avoid errors with additional spurious dimensions
|
|
391
|
+
if "dim_3" in generic_sys_cal_xds.dims:
|
|
392
|
+
generic_sys_cal_xds = generic_sys_cal_xds.drop_dims("dim_3")
|
|
377
393
|
# From MSv2 tables we get (...,frequency, receptor)
|
|
378
394
|
# -> transpose to (...,receptor,frequency) ready for MSv4 sys_cal_xds
|
|
379
395
|
generic_sys_cal_xds = generic_sys_cal_xds.transpose(
|
|
380
396
|
"ANTENNA_ID", "TIME", "receptor", "frequency"
|
|
381
397
|
)
|
|
382
|
-
|
|
398
|
+
elif (
|
|
399
|
+
"frequency" in generic_sys_cal_xds.sizes
|
|
400
|
+
and not "dim_3" in generic_sys_cal_xds.sizes
|
|
401
|
+
):
|
|
383
402
|
# because order is (...,frequency,receptor), when frequency is missing
|
|
384
403
|
# receptor can get wrongly labeled as frequency
|
|
385
404
|
generic_sys_cal_xds = generic_sys_cal_xds.rename_dims({"frequency": "receptor"})
|
|
405
|
+
elif (
|
|
406
|
+
"frequency" not in generic_sys_cal_xds.sizes
|
|
407
|
+
and "receptor" in generic_sys_cal_xds.sizes
|
|
408
|
+
and "dim_3" in generic_sys_cal_xds.sizes
|
|
409
|
+
):
|
|
410
|
+
# different *_SPECTRUM array sizes + some empty arrays can create an additional spurious
|
|
411
|
+
# generic dimension, which should have been "receptor"
|
|
412
|
+
generic_sys_cal_xds = generic_sys_cal_xds.rename_dims({"receptor": "frequency"})
|
|
413
|
+
generic_sys_cal_xds = generic_sys_cal_xds.rename_dims({"dim_3": "receptor"})
|
|
414
|
+
generic_sys_cal_xds = generic_sys_cal_xds.transpose(
|
|
415
|
+
"ANTENNA_ID", "TIME", "receptor", "frequency"
|
|
416
|
+
)
|
|
417
|
+
else:
|
|
418
|
+
raise RuntimeError(
|
|
419
|
+
"Cannot understand the arrangement of dimensions of {generic_sys_cal_xds=}"
|
|
420
|
+
)
|
|
386
421
|
|
|
387
422
|
return generic_sys_cal_xds
|
|
388
423
|
|
|
@@ -462,7 +497,7 @@ def create_system_calibration_xds(
|
|
|
462
497
|
"frequency": ["frequency_cal", ["frequency_cal"]],
|
|
463
498
|
}
|
|
464
499
|
|
|
465
|
-
sys_cal_xds = xr.Dataset(attrs={"type": "
|
|
500
|
+
sys_cal_xds = xr.Dataset(attrs={"type": "system_calibration"})
|
|
466
501
|
coords = {
|
|
467
502
|
"antenna_name": ant_xds_name_ids.sel(
|
|
468
503
|
antenna_id=generic_sys_cal_xds["ANTENNA_ID"]
|
|
@@ -483,8 +518,7 @@ def create_system_calibration_xds(
|
|
|
483
518
|
frequency_measure = {
|
|
484
519
|
"type": main_xds_frequency.attrs["type"],
|
|
485
520
|
"units": main_xds_frequency.attrs["units"],
|
|
486
|
-
"
|
|
487
|
-
"reference_value": main_xds_frequency.attrs["reference_frequency"],
|
|
521
|
+
"observer": main_xds_frequency.attrs["observer"],
|
|
488
522
|
}
|
|
489
523
|
sys_cal_xds.coords["frequency_cal"].attrs.update(frequency_measure)
|
|
490
524
|
|
|
@@ -499,8 +533,8 @@ def create_system_calibration_xds(
|
|
|
499
533
|
time_coord_attrs = {
|
|
500
534
|
"type": "time",
|
|
501
535
|
"units": ["s"],
|
|
502
|
-
"scale": "
|
|
503
|
-
"format": "
|
|
536
|
+
"scale": "utc",
|
|
537
|
+
"format": "unix",
|
|
504
538
|
}
|
|
505
539
|
# If interpolating time, rename time_cal => time
|
|
506
540
|
time_coord = {"time": ("time_cal", sys_cal_interp_time.data)}
|
|
@@ -54,6 +54,7 @@ def create_partitions(in_file: str, partition_scheme: list):
|
|
|
54
54
|
par_df["SCAN_NUMBER"] = main_tb.getcol("SCAN_NUMBER")
|
|
55
55
|
par_df["STATE_ID"] = main_tb.getcol("STATE_ID")
|
|
56
56
|
par_df["OBSERVATION_ID"] = main_tb.getcol("OBSERVATION_ID")
|
|
57
|
+
par_df["ANTENNA1"] = main_tb.getcol("ANTENNA1")
|
|
57
58
|
par_df = par_df.drop_duplicates()
|
|
58
59
|
|
|
59
60
|
field_tb = tables.table(
|
|
@@ -123,6 +124,9 @@ def create_partitions(in_file: str, partition_scheme: list):
|
|
|
123
124
|
"OBS_MODE",
|
|
124
125
|
"SUB_SCAN_NUMBER",
|
|
125
126
|
]
|
|
127
|
+
if "ANTENNA1" in partition_scheme:
|
|
128
|
+
partition_axis_names.append("ANTENNA1")
|
|
129
|
+
|
|
126
130
|
for idx, pair in enumerated_partitions:
|
|
127
131
|
query = ""
|
|
128
132
|
for i, par in enumerate(partition_scheme_updated):
|
|
@@ -2,14 +2,14 @@ import os
|
|
|
2
2
|
import toolviper.utils.logger as logger
|
|
3
3
|
from typing import List, Tuple, Union
|
|
4
4
|
|
|
5
|
-
from ._utils.cds import CASAVisSet
|
|
6
|
-
from .
|
|
5
|
+
from xradio.measurement_set._utils._utils.cds import CASAVisSet
|
|
6
|
+
from xradio.measurement_set._utils._msv2.partitions import (
|
|
7
7
|
finalize_partitions,
|
|
8
8
|
read_ms_ddi_partitions,
|
|
9
9
|
read_ms_scan_subscan_partitions,
|
|
10
10
|
)
|
|
11
|
-
from .
|
|
12
|
-
from ._utils.xds_helper import vis_xds_packager_cds
|
|
11
|
+
from xradio.measurement_set._utils._msv2.subtables import read_ms_subtables
|
|
12
|
+
from xradio.measurement_set._utils._utils.xds_helper import vis_xds_packager_cds
|
|
13
13
|
|
|
14
14
|
|
|
15
15
|
def read_ms(
|
|
@@ -4,8 +4,8 @@ from typing import Dict, Union
|
|
|
4
4
|
|
|
5
5
|
import dask
|
|
6
6
|
|
|
7
|
-
from xradio.
|
|
8
|
-
from xradio.
|
|
7
|
+
from xradio.measurement_set._utils._msv2.partition_queries import create_partitions
|
|
8
|
+
from xradio.measurement_set._utils._msv2.conversion import convert_and_write_partition
|
|
9
9
|
|
|
10
10
|
|
|
11
11
|
def convert_msv2_to_processing_set(
|
|
@@ -82,6 +82,11 @@ def convert_msv2_to_processing_set(
|
|
|
82
82
|
+ str(partition_info["FIELD_ID"])
|
|
83
83
|
+ ", SCAN "
|
|
84
84
|
+ str(partition_info["SCAN_NUMBER"])
|
|
85
|
+
+ (
|
|
86
|
+
", ANTENNA " + str(partition_info["ANTENNA1"])
|
|
87
|
+
if "ANTENNA1" in partition_info
|
|
88
|
+
else ""
|
|
89
|
+
)
|
|
85
90
|
)
|
|
86
91
|
|
|
87
92
|
# prepend '0' to ms_v4_id as needed
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import os
|
|
2
|
-
from xradio.
|
|
2
|
+
from xradio.measurement_set import ProcessingSet
|
|
3
3
|
from typing import Dict, Union
|
|
4
4
|
|
|
5
5
|
|
|
@@ -43,10 +43,10 @@ def load_processing_set(
|
|
|
43
43
|
ps = ProcessingSet()
|
|
44
44
|
for ms_name, ms_xds_isel in sel_parms.items():
|
|
45
45
|
ms_store = os.path.join(ps_store, ms_name)
|
|
46
|
-
|
|
46
|
+
correlated_store = os.path.join(ms_store, "correlated_xds")
|
|
47
47
|
|
|
48
48
|
xds = _open_dataset(
|
|
49
|
-
|
|
49
|
+
correlated_store,
|
|
50
50
|
file_system,
|
|
51
51
|
ms_xds_isel,
|
|
52
52
|
data_variables,
|
|
@@ -55,7 +55,7 @@ def load_processing_set(
|
|
|
55
55
|
data_groups = xds.attrs["data_groups"]
|
|
56
56
|
|
|
57
57
|
if load_sub_datasets:
|
|
58
|
-
from xradio.
|
|
58
|
+
from xradio.measurement_set.open_processing_set import _open_sub_xds
|
|
59
59
|
|
|
60
60
|
sub_xds_dict, field_and_source_xds_dict = _open_sub_xds(
|
|
61
61
|
ms_store, file_system=file_system, load=True, data_groups=data_groups
|
|
@@ -76,7 +76,7 @@ def load_processing_set(
|
|
|
76
76
|
return ps
|
|
77
77
|
|
|
78
78
|
|
|
79
|
-
class
|
|
79
|
+
class ProcessingSetIterator:
|
|
80
80
|
def __init__(
|
|
81
81
|
self,
|
|
82
82
|
sel_parms: dict,
|
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
import pandas as pd
|
|
2
|
+
from xradio._utils.list_and_array import to_list
|
|
3
|
+
import xarray as xr
|
|
4
|
+
import numbers
|
|
5
|
+
import os
|
|
6
|
+
from collections.abc import Mapping, Iterable
|
|
7
|
+
from typing import Any, Union
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class MeasurementSetXds(xr.Dataset):
|
|
11
|
+
__slots__ = ()
|
|
12
|
+
|
|
13
|
+
def __init__(self, xds):
|
|
14
|
+
super().__init__(xds.data_vars, xds.coords, xds.attrs)
|
|
15
|
+
|
|
16
|
+
def to_store(self, store, **kwargs):
|
|
17
|
+
"""
|
|
18
|
+
Write the MeasurementSetXds to a Zarr store.
|
|
19
|
+
Does not write to cloud storage yet.
|
|
20
|
+
|
|
21
|
+
Args:
|
|
22
|
+
store (str): The path to the Zarr store.
|
|
23
|
+
**kwargs: Additional keyword arguments to be passed to `xarray.Dataset.to_zarr`. See https://docs.xarray.dev/en/latest/generated/xarray.Dataset.to_zarr.html for more information.
|
|
24
|
+
|
|
25
|
+
Returns:
|
|
26
|
+
None
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
copy_cor_xds = self.copy() # No deep copy
|
|
30
|
+
|
|
31
|
+
# Remove field_and_source_xds from all correlated_data (VISIBILITY/SPECTRUM) data variables
|
|
32
|
+
# and save them as separate zarr files.
|
|
33
|
+
for data_group_name, data_group in self.attrs["data_groups"].items():
|
|
34
|
+
del copy_cor_xds[data_group["correlated_data"]].attrs[
|
|
35
|
+
"field_and_source_xds"
|
|
36
|
+
]
|
|
37
|
+
|
|
38
|
+
# print("data_group_name", data_group_name)
|
|
39
|
+
xr.Dataset.to_zarr(
|
|
40
|
+
self[data_group["correlated_data"]].attrs["field_and_source_xds"],
|
|
41
|
+
os.path.join(store, "field_and_source_xds_" + data_group_name),
|
|
42
|
+
**kwargs,
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
# Remove xds attributes from copy_cor_xds and save xds attributes as separate zarr files.
|
|
46
|
+
for attrs_name in self.attrs:
|
|
47
|
+
if "xds" in attrs_name:
|
|
48
|
+
del copy_cor_xds.attrs[attrs_name]
|
|
49
|
+
xr.Dataset.to_zarr(
|
|
50
|
+
self.attrs[attrs_name], os.path.join(store, attrs_name, **kwargs)
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
# Save copy_cor_xds as zarr file.
|
|
54
|
+
xr.Dataset.to_zarr(
|
|
55
|
+
copy_cor_xds, os.path.join(store, "correlated_xds"), **kwargs
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
def sel(
|
|
59
|
+
self,
|
|
60
|
+
indexers: Union[Mapping[Any, Any], None] = None,
|
|
61
|
+
method: Union[str, None] = None,
|
|
62
|
+
tolerance: Union[int, float, Iterable[Union[int, float]], None] = None,
|
|
63
|
+
drop: bool = False,
|
|
64
|
+
**indexers_kwargs: Any,
|
|
65
|
+
):
|
|
66
|
+
"""
|
|
67
|
+
Select data along dimension(s) by label. Overrides `xarray.Dataset.sel <https://xarray.pydata.org/en/stable/generated/xarray.Dataset.sel.html>`__ so that a data group can be selected by name by using the `data_group_name` parameter.
|
|
68
|
+
For more information on data groups see `Data Groups <https://xradio.readthedocs.io/en/latest/measurement_set_overview.html#Data-Groups>`__ section. See `xarray.Dataset.sel <https://xarray.pydata.org/en/stable/generated/xarray.Dataset.sel.html>`__ for parameter descriptions.
|
|
69
|
+
|
|
70
|
+
Returns:
|
|
71
|
+
MeasurementSetXds
|
|
72
|
+
|
|
73
|
+
Examples
|
|
74
|
+
--------
|
|
75
|
+
>>> # Select data group 'corrected' and polarization 'XX'.
|
|
76
|
+
>>> selected_ms_xds = ms_xds.sel(data_group_name='corrected', polarization='XX')
|
|
77
|
+
|
|
78
|
+
>>> # Select data group 'corrected' and polarization 'XX' using a dict.
|
|
79
|
+
>>> selected_ms_xds = ms_xds.sel({'data_group_name':'corrected', 'polarization':'XX')
|
|
80
|
+
"""
|
|
81
|
+
if "data_group_name" in indexers_kwargs:
|
|
82
|
+
data_group_name = indexers_kwargs["data_group_name"]
|
|
83
|
+
del indexers_kwargs["data_group_name"]
|
|
84
|
+
if (indexers is not None) and ("data_group_name" in indexers):
|
|
85
|
+
data_group_name = indexers["data_group_name"]
|
|
86
|
+
del indexers["data_group_name"]
|
|
87
|
+
else:
|
|
88
|
+
data_group_name = None
|
|
89
|
+
|
|
90
|
+
if data_group_name is not None:
|
|
91
|
+
sel_data_group_set = set(
|
|
92
|
+
self.attrs["data_groups"][data_group_name].values()
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
data_variables_to_drop = []
|
|
96
|
+
for dg in self.attrs["data_groups"].values():
|
|
97
|
+
temp_set = set(dg.values()) - sel_data_group_set
|
|
98
|
+
data_variables_to_drop.extend(list(temp_set))
|
|
99
|
+
|
|
100
|
+
data_variables_to_drop = list(set(data_variables_to_drop))
|
|
101
|
+
|
|
102
|
+
return MeasurementSetXds(
|
|
103
|
+
super()
|
|
104
|
+
.sel(indexers, method, tolerance, drop, **indexers_kwargs)
|
|
105
|
+
.drop_vars(data_variables_to_drop)
|
|
106
|
+
)
|
|
107
|
+
else:
|
|
108
|
+
return MeasurementSetXds(
|
|
109
|
+
super().sel(indexers, method, tolerance, drop, **indexers_kwargs)
|
|
110
|
+
)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import os
|
|
2
2
|
|
|
3
|
-
from xradio.
|
|
3
|
+
from xradio.measurement_set import ProcessingSet
|
|
4
4
|
import toolviper.utils.logger as logger
|
|
5
5
|
from xradio._utils.zarr.common import _open_dataset, _get_file_system_and_items
|
|
6
6
|
import s3fs
|
|
@@ -25,6 +25,8 @@ def open_processing_set(
|
|
|
25
25
|
processing_set
|
|
26
26
|
Lazy representation of processing set (data is represented by Dask.arrays).
|
|
27
27
|
"""
|
|
28
|
+
from xradio.measurement_set import MeasurementSetXds
|
|
29
|
+
|
|
28
30
|
file_system, ms_store_list = _get_file_system_and_items(ps_store)
|
|
29
31
|
|
|
30
32
|
ps = ProcessingSet()
|
|
@@ -32,9 +34,9 @@ def open_processing_set(
|
|
|
32
34
|
for ms_name in ms_store_list:
|
|
33
35
|
# try:
|
|
34
36
|
ms_store = os.path.join(ps_store, ms_name)
|
|
35
|
-
|
|
37
|
+
correlated_store = os.path.join(ms_store, "correlated_xds")
|
|
36
38
|
|
|
37
|
-
xds = _open_dataset(
|
|
39
|
+
xds = _open_dataset(correlated_store, file_system)
|
|
38
40
|
data_groups = xds.attrs["data_groups"]
|
|
39
41
|
|
|
40
42
|
if (intents is None) or (
|
|
@@ -54,7 +56,7 @@ def open_processing_set(
|
|
|
54
56
|
"field_and_source_xds"
|
|
55
57
|
] = field_and_source_xds_dict[data_group_name]
|
|
56
58
|
|
|
57
|
-
ps[ms_name] = xds
|
|
59
|
+
ps[ms_name] = MeasurementSetXds(xds)
|
|
58
60
|
# except Exception as e:
|
|
59
61
|
# logger.warning(f"Could not open {ms_name} due to {e}")
|
|
60
62
|
# continue
|
|
@@ -66,15 +68,6 @@ def _open_sub_xds(ms_store, file_system, data_groups, load=False):
|
|
|
66
68
|
sub_xds_dict = {}
|
|
67
69
|
field_and_source_xds_dict = {}
|
|
68
70
|
|
|
69
|
-
xds_names = {
|
|
70
|
-
"ANTENNA": "antenna_xds",
|
|
71
|
-
"POINTING": "pointing_xds",
|
|
72
|
-
"SYSCAL": "system_calibration_xds",
|
|
73
|
-
"GAIN_CURVE": "gain_curve_xds",
|
|
74
|
-
"PHASE_CAL": "phase_calibration_xds",
|
|
75
|
-
"WEATHER": "weather_xds",
|
|
76
|
-
}
|
|
77
|
-
|
|
78
71
|
if isinstance(file_system, s3fs.core.S3FileSystem):
|
|
79
72
|
file_names = [
|
|
80
73
|
bd.split(sep="/")[-1] for bd in file_system.listdir(ms_store, detail=False)
|
|
@@ -83,9 +76,9 @@ def _open_sub_xds(ms_store, file_system, data_groups, load=False):
|
|
|
83
76
|
file_names = file_system.listdir(ms_store)
|
|
84
77
|
file_names = [item for item in file_names if not item.startswith(".")]
|
|
85
78
|
|
|
86
|
-
file_names.remove("
|
|
79
|
+
file_names.remove("correlated_xds")
|
|
87
80
|
|
|
88
|
-
field_dict = {"
|
|
81
|
+
field_dict = {"field_and_source_xds_" + key: key for key in data_groups.keys()}
|
|
89
82
|
|
|
90
83
|
# field_and_source_xds_name_start = "FIELD"
|
|
91
84
|
for n in file_names:
|
|
@@ -98,7 +91,7 @@ def _open_sub_xds(ms_store, file_system, data_groups, load=False):
|
|
|
98
91
|
if n in field_dict.keys():
|
|
99
92
|
field_and_source_xds_dict[field_dict[n]] = xds
|
|
100
93
|
else:
|
|
101
|
-
sub_xds_dict[
|
|
94
|
+
sub_xds_dict[n] = xds
|
|
102
95
|
|
|
103
96
|
return sub_xds_dict, field_and_source_xds_dict
|
|
104
97
|
|