xradio 0.0.56__py3-none-any.whl → 0.0.59__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- xradio/__init__.py +2 -2
- xradio/_utils/_casacore/casacore_from_casatools.py +12 -2
- xradio/_utils/_casacore/tables.py +1 -0
- xradio/_utils/coord_math.py +22 -23
- xradio/_utils/dict_helpers.py +76 -11
- xradio/_utils/schema.py +5 -2
- xradio/_utils/zarr/common.py +1 -73
- xradio/image/_util/_casacore/xds_from_casacore.py +49 -33
- xradio/image/_util/_casacore/xds_to_casacore.py +41 -14
- xradio/image/_util/_fits/xds_from_fits.py +146 -35
- xradio/image/_util/casacore.py +4 -3
- xradio/image/_util/common.py +4 -4
- xradio/image/_util/image_factory.py +8 -8
- xradio/image/image.py +45 -5
- xradio/measurement_set/__init__.py +19 -9
- xradio/measurement_set/_utils/__init__.py +1 -3
- xradio/measurement_set/_utils/_msv2/__init__.py +0 -0
- xradio/measurement_set/_utils/_msv2/_tables/read.py +17 -76
- xradio/measurement_set/_utils/_msv2/_tables/read_main_table.py +2 -685
- xradio/measurement_set/_utils/_msv2/conversion.py +174 -156
- xradio/measurement_set/_utils/_msv2/create_antenna_xds.py +9 -16
- xradio/measurement_set/_utils/_msv2/create_field_and_source_xds.py +128 -222
- xradio/measurement_set/_utils/_msv2/msv2_to_msv4_meta.py +1 -2
- xradio/measurement_set/_utils/_msv2/msv4_info_dicts.py +8 -7
- xradio/measurement_set/_utils/_msv2/msv4_sub_xdss.py +31 -74
- xradio/measurement_set/_utils/_msv2/partition_queries.py +1 -261
- xradio/measurement_set/_utils/_msv2/subtables.py +0 -107
- xradio/measurement_set/_utils/_utils/interpolate.py +60 -0
- xradio/measurement_set/_utils/_zarr/encoding.py +2 -7
- xradio/measurement_set/convert_msv2_to_processing_set.py +0 -2
- xradio/measurement_set/load_processing_set.py +2 -2
- xradio/measurement_set/measurement_set_xdt.py +20 -16
- xradio/measurement_set/open_processing_set.py +1 -3
- xradio/measurement_set/processing_set_xdt.py +54 -841
- xradio/measurement_set/schema.py +122 -132
- xradio/schema/check.py +95 -101
- xradio/schema/dataclass.py +159 -22
- xradio/schema/export.py +99 -0
- xradio/schema/metamodel.py +51 -16
- xradio/schema/typing.py +5 -5
- xradio/sphinx/schema_table.py +41 -77
- {xradio-0.0.56.dist-info → xradio-0.0.59.dist-info}/METADATA +20 -5
- xradio-0.0.59.dist-info/RECORD +65 -0
- {xradio-0.0.56.dist-info → xradio-0.0.59.dist-info}/WHEEL +1 -1
- xradio/image/_util/fits.py +0 -13
- xradio/measurement_set/_utils/_msv2/_tables/load.py +0 -66
- xradio/measurement_set/_utils/_msv2/_tables/load_main_table.py +0 -490
- xradio/measurement_set/_utils/_msv2/_tables/read_subtables.py +0 -398
- xradio/measurement_set/_utils/_msv2/_tables/write.py +0 -323
- xradio/measurement_set/_utils/_msv2/_tables/write_exp_api.py +0 -388
- xradio/measurement_set/_utils/_msv2/chunks.py +0 -115
- xradio/measurement_set/_utils/_msv2/descr.py +0 -165
- xradio/measurement_set/_utils/_msv2/msv2_msv3.py +0 -7
- xradio/measurement_set/_utils/_msv2/partitions.py +0 -392
- xradio/measurement_set/_utils/_utils/cds.py +0 -40
- xradio/measurement_set/_utils/_utils/xds_helper.py +0 -404
- xradio/measurement_set/_utils/_zarr/read.py +0 -263
- xradio/measurement_set/_utils/_zarr/write.py +0 -329
- xradio/measurement_set/_utils/msv2.py +0 -106
- xradio/measurement_set/_utils/zarr.py +0 -133
- xradio-0.0.56.dist-info/RECORD +0 -78
- {xradio-0.0.56.dist-info → xradio-0.0.59.dist-info}/licenses/LICENSE.txt +0 -0
- {xradio-0.0.56.dist-info → xradio-0.0.59.dist-info}/top_level.txt +0 -0
|
@@ -1,11 +1,13 @@
|
|
|
1
|
+
from collections import deque
|
|
1
2
|
import datetime
|
|
2
3
|
import importlib
|
|
3
4
|
import numcodecs
|
|
4
5
|
import os
|
|
5
6
|
import pathlib
|
|
6
7
|
import time
|
|
7
|
-
from typing import Dict, Union
|
|
8
|
+
from typing import Callable, Dict, Union
|
|
8
9
|
|
|
10
|
+
import dask.array as da
|
|
9
11
|
import numpy as np
|
|
10
12
|
import xarray as xr
|
|
11
13
|
import traceback
|
|
@@ -52,7 +54,9 @@ from ._tables.read import (
|
|
|
52
54
|
)
|
|
53
55
|
from ._tables.read_main_table import get_baselines, get_baseline_indices, get_utimes_tol
|
|
54
56
|
from .._utils.stokes_types import stokes_types
|
|
55
|
-
|
|
57
|
+
|
|
58
|
+
from xradio._utils.list_and_array import check_if_consistent, unique_1d
|
|
59
|
+
from xradio._utils.dict_helpers import make_spectral_coord_reference_dict, make_quantity
|
|
56
60
|
|
|
57
61
|
|
|
58
62
|
def parse_chunksize(
|
|
@@ -218,7 +222,6 @@ def mem_chunksize_to_dict_main_balanced(
|
|
|
218
222
|
dictionary of chunk sizes (as dim->size)
|
|
219
223
|
"""
|
|
220
224
|
|
|
221
|
-
dim_names = [name for name in xds_dim_sizes.keys()]
|
|
222
225
|
dim_sizes = [size for size in xds_dim_sizes.values()]
|
|
223
226
|
# Fix fourth dim (polarization) to all (not free to auto-calculate)
|
|
224
227
|
free_dims_mask = np.array([True, True, True, False])
|
|
@@ -429,8 +432,49 @@ def calc_indx_for_row_split(tb_tool, taql_where):
|
|
|
429
432
|
|
|
430
433
|
|
|
431
434
|
def create_coordinates(
|
|
432
|
-
xds
|
|
433
|
-
|
|
435
|
+
xds: xr.Dataset,
|
|
436
|
+
in_file: str,
|
|
437
|
+
ddi: int,
|
|
438
|
+
utime: np.ndarray,
|
|
439
|
+
interval: np.ndarray,
|
|
440
|
+
baseline_ant1_id: np.ndarray,
|
|
441
|
+
baseline_ant2_id: np.ndarray,
|
|
442
|
+
scan_id: np.ndarray,
|
|
443
|
+
) -> tuple[xr.Dataset, int]:
|
|
444
|
+
"""
|
|
445
|
+
Creates coordinates of a VisibilityXds/SpectrumXds and assigns them to the input
|
|
446
|
+
correlated dataset.
|
|
447
|
+
|
|
448
|
+
Parameters
|
|
449
|
+
----------
|
|
450
|
+
xds :
|
|
451
|
+
dataset to add the coords to
|
|
452
|
+
in_file :
|
|
453
|
+
path to input MSv2
|
|
454
|
+
ddi :
|
|
455
|
+
DDI index (row) for this MSv4
|
|
456
|
+
utime :
|
|
457
|
+
unique times, for the time coordinate
|
|
458
|
+
interval :
|
|
459
|
+
interval col values from the MSv2, for the integration_time attribute
|
|
460
|
+
of the time coord
|
|
461
|
+
baseline_ant1_id :
|
|
462
|
+
ANTENNA1 ids to be used as coord
|
|
463
|
+
baseline_ant2_id :
|
|
464
|
+
ANTENNA2 ids to be used as coord
|
|
465
|
+
scan_id :
|
|
466
|
+
SCAN_ID values from MSv2, for the scan_name coord
|
|
467
|
+
|
|
468
|
+
Returns
|
|
469
|
+
-------
|
|
470
|
+
tuple[xr.Dataset, int]
|
|
471
|
+
A tuple of:
|
|
472
|
+
- The input dataset with coordinates added and populated with all MSv4 schema
|
|
473
|
+
attributes.
|
|
474
|
+
- The MSv2 spectral_window_id of this DDI/MSv4, which is no longer added to
|
|
475
|
+
the frequency coord but is required to create other secondary xdss (antenna,
|
|
476
|
+
gain_curve, phase_calibration, system_calibration, field_and_source).
|
|
477
|
+
"""
|
|
434
478
|
coords = {
|
|
435
479
|
"time": utime,
|
|
436
480
|
"baseline_antenna1_id": ("baseline_id", baseline_ant1_id),
|
|
@@ -484,16 +528,16 @@ def create_coordinates(
|
|
|
484
528
|
spw_name = spw_name + "_" + str(spectral_window_id)
|
|
485
529
|
|
|
486
530
|
xds.frequency.attrs["spectral_window_name"] = spw_name
|
|
531
|
+
xds.frequency.attrs["spectral_window_intent"] = "UNSPECIFIED"
|
|
487
532
|
msv4_measure = column_description_casacore_to_msv4_measure(
|
|
488
533
|
freq_column_description["REF_FREQUENCY"],
|
|
489
534
|
ref_code=spectral_window_xds["MEAS_FREQ_REF"].data,
|
|
490
535
|
)
|
|
491
|
-
xds.frequency.attrs["reference_frequency"] =
|
|
492
|
-
|
|
493
|
-
"
|
|
494
|
-
"
|
|
495
|
-
|
|
496
|
-
xds.frequency.attrs["spectral_window_id"] = spectral_window_id
|
|
536
|
+
xds.frequency.attrs["reference_frequency"] = make_spectral_coord_reference_dict(
|
|
537
|
+
float(spectral_window_xds.REF_FREQUENCY.values),
|
|
538
|
+
msv4_measure["units"],
|
|
539
|
+
msv4_measure["observer"],
|
|
540
|
+
)
|
|
497
541
|
|
|
498
542
|
# Add if doppler table is present
|
|
499
543
|
# xds.frequency.attrs["doppler_velocity"] =
|
|
@@ -512,14 +556,9 @@ def create_coordinates(
|
|
|
512
556
|
freq_column_description["CHAN_WIDTH"],
|
|
513
557
|
ref_code=spectral_window_xds["MEAS_FREQ_REF"].data,
|
|
514
558
|
)
|
|
515
|
-
|
|
516
|
-
msv4_measure["
|
|
517
|
-
|
|
518
|
-
xds.frequency.attrs["channel_width"] = {
|
|
519
|
-
"dims": [],
|
|
520
|
-
"data": np.abs(unique_chan_width[0]),
|
|
521
|
-
"attrs": msv4_measure,
|
|
522
|
-
}
|
|
559
|
+
xds.frequency.attrs["channel_width"] = make_quantity(
|
|
560
|
+
np.abs(unique_chan_width[0]), msv4_measure["units"] if msv4_measure else "Hz"
|
|
561
|
+
)
|
|
523
562
|
|
|
524
563
|
###### Create Time Coordinate ######
|
|
525
564
|
main_table_attrs = extract_table_attributes(in_file)
|
|
@@ -532,16 +571,11 @@ def create_coordinates(
|
|
|
532
571
|
msv4_measure = column_description_casacore_to_msv4_measure(
|
|
533
572
|
main_column_descriptions["INTERVAL"]
|
|
534
573
|
)
|
|
535
|
-
|
|
536
|
-
msv4_measure["
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
"data": interval,
|
|
541
|
-
"attrs": msv4_measure,
|
|
542
|
-
}
|
|
543
|
-
xds.time.attrs["effective_integration_time"] = "EFFECTIVE_INTEGRATION_TIME"
|
|
544
|
-
return xds
|
|
574
|
+
xds.time.attrs["integration_time"] = make_quantity(
|
|
575
|
+
interval, msv4_measure["units"] if msv4_measure else "s"
|
|
576
|
+
)
|
|
577
|
+
|
|
578
|
+
return xds, spectral_window_id
|
|
545
579
|
|
|
546
580
|
|
|
547
581
|
def find_min_max_times(tb_tool: tables.table, taql_where: str) -> tuple:
|
|
@@ -584,91 +618,115 @@ def create_data_variables(
|
|
|
584
618
|
parallel_mode,
|
|
585
619
|
main_chunksize,
|
|
586
620
|
):
|
|
587
|
-
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
|
|
591
|
-
|
|
592
|
-
|
|
593
|
-
except KeyError:
|
|
594
|
-
# If time isn't chunked then `read_col_conversion_dask` is slower than `read_col_conversion_numpy`
|
|
595
|
-
logger.warning(
|
|
596
|
-
"'time' isn't specified in `main_chunksize`. Defaulting to `parallel_mode = 'none'`."
|
|
597
|
-
)
|
|
598
|
-
parallel_mode = "none"
|
|
599
|
-
|
|
600
|
-
# Set read_col_conversion from value of `parallel_mode` argument
|
|
601
|
-
# TODO: To make this compatible with multi-node conversion, `read_col_conversion_dask` and TableManager must be pickled.
|
|
602
|
-
# Casacore will make this difficult
|
|
603
|
-
global read_col_conversion
|
|
604
|
-
if parallel_mode == "time":
|
|
605
|
-
read_col_conversion = read_col_conversion_dask
|
|
606
|
-
else:
|
|
607
|
-
read_col_conversion = read_col_conversion_numpy
|
|
621
|
+
time_chunksize = main_chunksize.get("time", None) if main_chunksize else None
|
|
622
|
+
if parallel_mode == "time" and time_chunksize is None:
|
|
623
|
+
logger.warning(
|
|
624
|
+
"'time' isn't specified in `main_chunksize`. Defaulting to `parallel_mode = 'none'`."
|
|
625
|
+
)
|
|
626
|
+
parallel_mode = "none"
|
|
608
627
|
|
|
609
628
|
# Create Data Variables
|
|
610
629
|
with table_manager.get_table() as tb_tool:
|
|
611
630
|
col_names = tb_tool.colnames()
|
|
612
631
|
|
|
632
|
+
target_cols = set(col_names) & set(col_to_data_variable_names.keys())
|
|
633
|
+
if target_cols.issuperset({"WEIGHT", "WEIGHT_SPECTRUM"}):
|
|
634
|
+
target_cols.remove("WEIGHT")
|
|
635
|
+
|
|
613
636
|
main_table_attrs = extract_table_attributes(in_file)
|
|
614
637
|
main_column_descriptions = main_table_attrs["column_descriptions"]
|
|
615
|
-
|
|
616
|
-
|
|
617
|
-
|
|
618
|
-
|
|
619
|
-
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
|
|
631
|
-
|
|
632
|
-
|
|
633
|
-
|
|
634
|
-
|
|
635
|
-
|
|
636
|
-
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
|
|
640
|
-
|
|
641
|
-
|
|
642
|
-
|
|
643
|
-
),
|
|
644
|
-
dims=col_dims[col],
|
|
645
|
-
)
|
|
646
|
-
|
|
647
|
-
xds[col_to_data_variable_names[col]].attrs.update(
|
|
648
|
-
create_attribute_metadata(col, main_column_descriptions)
|
|
638
|
+
|
|
639
|
+
# Use a double-ended queue in case WEIGHT_SPECTRUM conversion fails, and
|
|
640
|
+
# we need to add WEIGHT to list of columns to convert during iteration
|
|
641
|
+
target_cols = deque(target_cols)
|
|
642
|
+
|
|
643
|
+
while target_cols:
|
|
644
|
+
col = target_cols.popleft()
|
|
645
|
+
datavar_name = col_to_data_variable_names[col]
|
|
646
|
+
read_col_conversion = get_read_col_conversion_function(col, parallel_mode)
|
|
647
|
+
|
|
648
|
+
try:
|
|
649
|
+
start = time.time()
|
|
650
|
+
col_data = read_col_conversion(
|
|
651
|
+
table_manager,
|
|
652
|
+
col,
|
|
653
|
+
time_baseline_shape,
|
|
654
|
+
tidxs,
|
|
655
|
+
bidxs,
|
|
656
|
+
use_table_iter,
|
|
657
|
+
time_chunksize,
|
|
658
|
+
)
|
|
659
|
+
|
|
660
|
+
if col == "TIME_CENTROID":
|
|
661
|
+
col_data = convert_casacore_time(col_data, False)
|
|
662
|
+
|
|
663
|
+
elif col == "WEIGHT":
|
|
664
|
+
col_data = repeat_weight_array(
|
|
665
|
+
col_data, parallel_mode, xds.sizes, main_chunksize
|
|
649
666
|
)
|
|
650
667
|
|
|
668
|
+
xds[datavar_name] = xr.DataArray(
|
|
669
|
+
col_data,
|
|
670
|
+
dims=col_dims[col],
|
|
671
|
+
attrs=create_attribute_metadata(col, main_column_descriptions),
|
|
672
|
+
)
|
|
673
|
+
logger.debug(f"Time to read column {col} : {time.time() - start}")
|
|
674
|
+
|
|
675
|
+
except Exception as exc:
|
|
676
|
+
logger.debug(f"Could not load column {col}, exception: {exc}")
|
|
677
|
+
logger.debug(traceback.format_exc())
|
|
678
|
+
|
|
679
|
+
if col == "WEIGHT_SPECTRUM" and "WEIGHT" in col_names:
|
|
651
680
|
logger.debug(
|
|
652
|
-
"
|
|
681
|
+
"Failed to convert WEIGHT_SPECTRUM column: "
|
|
682
|
+
"will attempt to use WEIGHT instead"
|
|
653
683
|
)
|
|
654
|
-
|
|
655
|
-
|
|
656
|
-
|
|
657
|
-
|
|
658
|
-
|
|
659
|
-
|
|
660
|
-
|
|
661
|
-
|
|
662
|
-
|
|
663
|
-
|
|
664
|
-
|
|
665
|
-
|
|
666
|
-
|
|
667
|
-
|
|
668
|
-
|
|
669
|
-
|
|
670
|
-
|
|
671
|
-
|
|
684
|
+
target_cols.append("WEIGHT")
|
|
685
|
+
|
|
686
|
+
|
|
687
|
+
def get_read_col_conversion_function(col_name: str, parallel_mode: str) -> Callable:
|
|
688
|
+
"""
|
|
689
|
+
Returns the appropriate read_col_conversion function: use the dask version
|
|
690
|
+
for large columns and parallel_mode="time", or the numpy version otherwise.
|
|
691
|
+
"""
|
|
692
|
+
large_columns = {
|
|
693
|
+
"DATA",
|
|
694
|
+
"CORRECTED_DATA",
|
|
695
|
+
"MODEL_DATA",
|
|
696
|
+
"WEIGHT_SPECTRUM",
|
|
697
|
+
"WEIGHT",
|
|
698
|
+
"FLAG",
|
|
699
|
+
}
|
|
700
|
+
return (
|
|
701
|
+
read_col_conversion_dask
|
|
702
|
+
if parallel_mode == "time" and col_name in large_columns
|
|
703
|
+
else read_col_conversion_numpy
|
|
704
|
+
)
|
|
705
|
+
|
|
706
|
+
|
|
707
|
+
def repeat_weight_array(
|
|
708
|
+
weight_arr,
|
|
709
|
+
parallel_mode: str,
|
|
710
|
+
main_sizes: dict[str, int],
|
|
711
|
+
main_chunksize: dict[str, int],
|
|
712
|
+
):
|
|
713
|
+
"""
|
|
714
|
+
Repeat the weights read from the WEIGHT column along the frequency dimension.
|
|
715
|
+
Returns a dask array if parallel_mode="time", or a numpy array otherwise.
|
|
716
|
+
"""
|
|
717
|
+
reshaped_arr = weight_arr[:, :, None, :]
|
|
718
|
+
repeats = (1, 1, main_sizes["frequency"], 1)
|
|
719
|
+
|
|
720
|
+
if parallel_mode == "time":
|
|
721
|
+
result = da.tile(reshaped_arr, repeats)
|
|
722
|
+
# da.tile() adds each repeat as a separate chunk, so rechunking is necessary
|
|
723
|
+
chunksizes = tuple(
|
|
724
|
+
main_chunksize.get(dim, main_sizes[dim])
|
|
725
|
+
for dim in ("time", "baseline_id", "frequency", "polarization")
|
|
726
|
+
)
|
|
727
|
+
return result.rechunk(chunksizes)
|
|
728
|
+
|
|
729
|
+
return np.tile(reshaped_arr, repeats)
|
|
672
730
|
|
|
673
731
|
|
|
674
732
|
def add_missing_data_var_attrs(xds):
|
|
@@ -681,7 +739,7 @@ def add_missing_data_var_attrs(xds):
|
|
|
681
739
|
data_var_names = ["SPECTRUM", "SPECTRUM_CORRECTED"]
|
|
682
740
|
for var_name in data_var_names:
|
|
683
741
|
if var_name in xds.data_vars:
|
|
684
|
-
xds.data_vars[var_name].attrs["units"] =
|
|
742
|
+
xds.data_vars[var_name].attrs["units"] = ""
|
|
685
743
|
|
|
686
744
|
vis_var_names = ["VISIBILITY_MODEL"]
|
|
687
745
|
for var_name in vis_var_names:
|
|
@@ -692,44 +750,11 @@ def add_missing_data_var_attrs(xds):
|
|
|
692
750
|
"VISIBILITY"
|
|
693
751
|
].attrs["units"]
|
|
694
752
|
else:
|
|
695
|
-
xds.data_vars[var_name].attrs["units"] =
|
|
753
|
+
xds.data_vars[var_name].attrs["units"] = ""
|
|
696
754
|
|
|
697
755
|
return xds
|
|
698
756
|
|
|
699
757
|
|
|
700
|
-
def get_weight(
|
|
701
|
-
xds,
|
|
702
|
-
col,
|
|
703
|
-
table_manager,
|
|
704
|
-
time_baseline_shape,
|
|
705
|
-
tidxs,
|
|
706
|
-
bidxs,
|
|
707
|
-
use_table_iter,
|
|
708
|
-
main_column_descriptions,
|
|
709
|
-
time_chunksize,
|
|
710
|
-
):
|
|
711
|
-
xds[col_to_data_variable_names[col]] = xr.DataArray(
|
|
712
|
-
np.tile(
|
|
713
|
-
read_col_conversion(
|
|
714
|
-
table_manager,
|
|
715
|
-
col,
|
|
716
|
-
time_baseline_shape,
|
|
717
|
-
tidxs,
|
|
718
|
-
bidxs,
|
|
719
|
-
use_table_iter,
|
|
720
|
-
time_chunksize,
|
|
721
|
-
)[:, :, None, :],
|
|
722
|
-
(1, 1, xds.sizes["frequency"], 1),
|
|
723
|
-
),
|
|
724
|
-
dims=col_dims[col],
|
|
725
|
-
)
|
|
726
|
-
|
|
727
|
-
xds[col_to_data_variable_names[col]].attrs.update(
|
|
728
|
-
create_attribute_metadata(col, main_column_descriptions)
|
|
729
|
-
)
|
|
730
|
-
return xds
|
|
731
|
-
|
|
732
|
-
|
|
733
758
|
def create_taql_query_where(partition_info: dict):
|
|
734
759
|
main_par_table_cols = [
|
|
735
760
|
"DATA_DESC_ID",
|
|
@@ -771,9 +796,9 @@ def fix_uvw_frame(
|
|
|
771
796
|
"""
|
|
772
797
|
if xds.UVW.attrs["frame"] == "ITRF":
|
|
773
798
|
if is_single_dish:
|
|
774
|
-
center_var = "
|
|
799
|
+
center_var = "FIELD_REFERENCE_CENTER_DIRECTION"
|
|
775
800
|
else:
|
|
776
|
-
center_var = "
|
|
801
|
+
center_var = "FIELD_PHASE_CENTER_DIRECTION"
|
|
777
802
|
|
|
778
803
|
xds.UVW.attrs["frame"] = field_and_source_xds[center_var].attrs["frame"]
|
|
779
804
|
|
|
@@ -800,11 +825,9 @@ def estimate_memory_for_partition(in_file: str, partition: dict) -> float:
|
|
|
800
825
|
if "shape" in col_descr and isinstance(col_descr["shape"], np.ndarray):
|
|
801
826
|
# example: "shape": array([15, 4]) => gives pols x channels
|
|
802
827
|
cells_in_row = col_descr["shape"].prod()
|
|
803
|
-
npols = col_descr["shape"][-1]
|
|
804
828
|
else:
|
|
805
829
|
first_row = np.array(tb_tool.col(data_col)[0])
|
|
806
830
|
cells_in_row = np.prod(first_row.shape)
|
|
807
|
-
npols = first_row.shape[-1]
|
|
808
831
|
|
|
809
832
|
if col_descr["valueType"] == "complex":
|
|
810
833
|
# Assume. Otherwise, read first column and get the itemsize:
|
|
@@ -911,7 +934,6 @@ def estimate_memory_for_partition(in_file: str, partition: dict) -> float:
|
|
|
911
934
|
taql_partition = create_taql_query_where(partition)
|
|
912
935
|
taql_main = f"select * from $mtable {taql_partition}"
|
|
913
936
|
with open_table_ro(in_file) as mtable:
|
|
914
|
-
col_names = mtable.colnames()
|
|
915
937
|
with open_query(mtable, taql_main) as tb_tool:
|
|
916
938
|
# Do not feel tempted to rely on nrows. nrows tends to underestimate memory when baselines are missing.
|
|
917
939
|
# For some EVN datasets that can easily underestimate by a 50%
|
|
@@ -940,6 +962,7 @@ def estimate_memory_for_partition(in_file: str, partition: dict) -> float:
|
|
|
940
962
|
+ calculate_term_other_msv2_indices(msv2_nrows)
|
|
941
963
|
+ calculate_term_sub_xds(estimate_main_xds)
|
|
942
964
|
+ calculate_term_to_zarr(estimate_main_xds)
|
|
965
|
+
+ calculate_term_attrs(estimate_main_xds)
|
|
943
966
|
)
|
|
944
967
|
estimate /= GiBYTES_TO_BYTES
|
|
945
968
|
|
|
@@ -1104,7 +1127,7 @@ def convert_and_write_partition(
|
|
|
1104
1127
|
scan_id[tidxs, bidxs] = tb_tool.getcol("SCAN_NUMBER")
|
|
1105
1128
|
scan_id = np.max(scan_id, axis=1)
|
|
1106
1129
|
|
|
1107
|
-
xds = create_coordinates(
|
|
1130
|
+
xds, spectral_window_id = create_coordinates(
|
|
1108
1131
|
xds,
|
|
1109
1132
|
in_file,
|
|
1110
1133
|
ddi,
|
|
@@ -1172,7 +1195,7 @@ def convert_and_write_partition(
|
|
|
1172
1195
|
|
|
1173
1196
|
ant_xds = create_antenna_xds(
|
|
1174
1197
|
in_file,
|
|
1175
|
-
|
|
1198
|
+
spectral_window_id,
|
|
1176
1199
|
antenna_id,
|
|
1177
1200
|
feed_id,
|
|
1178
1201
|
telescope_name,
|
|
@@ -1181,9 +1204,7 @@ def convert_and_write_partition(
|
|
|
1181
1204
|
logger.debug("Time antenna xds " + str(time.time() - start))
|
|
1182
1205
|
|
|
1183
1206
|
start = time.time()
|
|
1184
|
-
gain_curve_xds = create_gain_curve_xds(
|
|
1185
|
-
in_file, xds.frequency.attrs["spectral_window_id"], ant_xds
|
|
1186
|
-
)
|
|
1207
|
+
gain_curve_xds = create_gain_curve_xds(in_file, spectral_window_id, ant_xds)
|
|
1187
1208
|
logger.debug("Time gain_curve xds " + str(time.time() - start))
|
|
1188
1209
|
|
|
1189
1210
|
start = time.time()
|
|
@@ -1193,7 +1214,7 @@ def convert_and_write_partition(
|
|
|
1193
1214
|
phase_cal_interp_time = None
|
|
1194
1215
|
phase_calibration_xds = create_phase_calibration_xds(
|
|
1195
1216
|
in_file,
|
|
1196
|
-
|
|
1217
|
+
spectral_window_id,
|
|
1197
1218
|
ant_xds,
|
|
1198
1219
|
time_min_max,
|
|
1199
1220
|
phase_cal_interp_time,
|
|
@@ -1208,6 +1229,7 @@ def convert_and_write_partition(
|
|
|
1208
1229
|
sys_cal_interp_time = None
|
|
1209
1230
|
system_calibration_xds = create_system_calibration_xds(
|
|
1210
1231
|
in_file,
|
|
1232
|
+
spectral_window_id,
|
|
1211
1233
|
xds.frequency,
|
|
1212
1234
|
ant_xds,
|
|
1213
1235
|
sys_cal_interp_time,
|
|
@@ -1269,10 +1291,6 @@ def convert_and_write_partition(
|
|
|
1269
1291
|
|
|
1270
1292
|
# Create field_and_source_xds (combines field, source and ephemeris data into one super dataset)
|
|
1271
1293
|
start = time.time()
|
|
1272
|
-
if ephemeris_interpolate:
|
|
1273
|
-
ephemeris_interp_time = xds.time.values
|
|
1274
|
-
else:
|
|
1275
|
-
ephemeris_interp_time = None
|
|
1276
1294
|
|
|
1277
1295
|
# if "FIELD_ID" not in partition_scheme:
|
|
1278
1296
|
# field_id = np.full(time_baseline_shape, -42, dtype=int)
|
|
@@ -1298,7 +1316,7 @@ def convert_and_write_partition(
|
|
|
1298
1316
|
create_field_and_source_xds(
|
|
1299
1317
|
in_file,
|
|
1300
1318
|
field_id,
|
|
1301
|
-
|
|
1319
|
+
spectral_window_id,
|
|
1302
1320
|
field_times,
|
|
1303
1321
|
is_single_dish,
|
|
1304
1322
|
time_min_max,
|
|
@@ -1330,7 +1348,7 @@ def convert_and_write_partition(
|
|
|
1330
1348
|
add_encoding(xds, compressor=compressor, chunks=main_chunksize)
|
|
1331
1349
|
logger.debug("Time add compressor and chunk " + str(time.time() - start))
|
|
1332
1350
|
|
|
1333
|
-
|
|
1351
|
+
os.path.join(
|
|
1334
1352
|
out_file,
|
|
1335
1353
|
pathlib.Path(in_file).name.replace(".ms", "") + "_" + str(ms_v4_id),
|
|
1336
1354
|
)
|
|
@@ -1377,7 +1395,7 @@ def convert_and_write_partition(
|
|
|
1377
1395
|
ms_xdt["/phased_array_xds"] = phased_array_xds
|
|
1378
1396
|
|
|
1379
1397
|
if storage_backend == "zarr":
|
|
1380
|
-
ms_xdt.to_zarr(store=os.path.join(out_file, ms_v4_name))
|
|
1398
|
+
ms_xdt.to_zarr(store=os.path.join(out_file, ms_v4_name), mode=mode)
|
|
1381
1399
|
elif storage_backend == "netcdf":
|
|
1382
1400
|
# xds.to_netcdf(path=file_name+"/MAIN", mode=mode) #Does not work
|
|
1383
1401
|
raise
|
|
@@ -1,5 +1,3 @@
|
|
|
1
|
-
import toolviper.utils.logger as logger
|
|
2
|
-
import time
|
|
3
1
|
from typing import Tuple, Union
|
|
4
2
|
|
|
5
3
|
import numpy as np
|
|
@@ -10,7 +8,6 @@ from xradio.measurement_set._utils._msv2.subtables import subt_rename_ids
|
|
|
10
8
|
from xradio.measurement_set._utils._msv2._tables.read import (
|
|
11
9
|
load_generic_table,
|
|
12
10
|
convert_casacore_time,
|
|
13
|
-
convert_casacore_time_to_mjd,
|
|
14
11
|
make_taql_where_between_min_max,
|
|
15
12
|
table_exists,
|
|
16
13
|
)
|
|
@@ -20,17 +17,15 @@ from xradio.measurement_set._utils._msv2.msv4_sub_xdss import (
|
|
|
20
17
|
)
|
|
21
18
|
|
|
22
19
|
from xradio._utils.list_and_array import (
|
|
23
|
-
check_if_consistent,
|
|
24
20
|
unique_1d,
|
|
25
|
-
to_list,
|
|
26
|
-
to_np_array,
|
|
27
21
|
)
|
|
22
|
+
from xradio._utils.dict_helpers import make_quantity_attrs
|
|
28
23
|
|
|
29
24
|
|
|
30
25
|
def create_antenna_xds(
|
|
31
26
|
in_file: str,
|
|
32
27
|
spectral_window_id: int,
|
|
33
|
-
antenna_id:
|
|
28
|
+
antenna_id: np.ndarray,
|
|
34
29
|
feed_id: list,
|
|
35
30
|
telescope_name: str,
|
|
36
31
|
partition_polarization: xr.DataArray,
|
|
@@ -44,8 +39,8 @@ def create_antenna_xds(
|
|
|
44
39
|
Path to the input MSv2.
|
|
45
40
|
spectral_window_id : int
|
|
46
41
|
Spectral window ID.
|
|
47
|
-
antenna_id :
|
|
48
|
-
|
|
42
|
+
antenna_id : np.ndarray
|
|
43
|
+
Antenna IDs.
|
|
49
44
|
feed_id : list
|
|
50
45
|
List of feed IDs.
|
|
51
46
|
telescope_name : str
|
|
@@ -82,7 +77,7 @@ def create_antenna_xds(
|
|
|
82
77
|
|
|
83
78
|
|
|
84
79
|
def extract_antenna_info(
|
|
85
|
-
ant_xds: xr.Dataset, in_file: str, antenna_id:
|
|
80
|
+
ant_xds: xr.Dataset, in_file: str, antenna_id: np.ndarray, telescope_name: str
|
|
86
81
|
) -> xr.Dataset:
|
|
87
82
|
"""Reformats MSv2 Antenna table content to MSv4 schema.
|
|
88
83
|
|
|
@@ -92,8 +87,8 @@ def extract_antenna_info(
|
|
|
92
87
|
The dataset that will be updated with antenna information.
|
|
93
88
|
in_file : str
|
|
94
89
|
Path to the input MSv2.
|
|
95
|
-
antenna_id :
|
|
96
|
-
|
|
90
|
+
antenna_id : np.array
|
|
91
|
+
Antenna IDs to extract information for.
|
|
97
92
|
telescope_name : str
|
|
98
93
|
The name of the telescope.
|
|
99
94
|
|
|
@@ -138,7 +133,7 @@ def extract_antenna_info(
|
|
|
138
133
|
generic_ant_xds, ant_xds, to_new_data_variables, to_new_coords
|
|
139
134
|
)
|
|
140
135
|
|
|
141
|
-
ant_xds["ANTENNA_DISH_DIAMETER"].attrs.update(
|
|
136
|
+
ant_xds["ANTENNA_DISH_DIAMETER"].attrs.update(make_quantity_attrs(["m"]))
|
|
142
137
|
|
|
143
138
|
ant_xds["ANTENNA_POSITION"].attrs["coordinate_system"] = "geocentric"
|
|
144
139
|
ant_xds["ANTENNA_POSITION"].attrs["origin_object_name"] = "earth"
|
|
@@ -507,9 +502,7 @@ def create_phase_calibration_xds(
|
|
|
507
502
|
phase_cal_xds = phase_cal_xds.assign_coords(ant_borrowed_coords | tone_label_coord)
|
|
508
503
|
|
|
509
504
|
# Adjust expected types
|
|
510
|
-
phase_cal_xds["time_phase_cal"] =
|
|
511
|
-
phase_cal_xds.time_phase_cal.astype("float64").astype("float64") / 10**9
|
|
512
|
-
)
|
|
505
|
+
phase_cal_xds["time_phase_cal"] = phase_cal_xds.time_phase_cal
|
|
513
506
|
|
|
514
507
|
phase_cal_xds = rename_and_interpolate_to_time(
|
|
515
508
|
phase_cal_xds, "time_phase_cal", phase_cal_interp_time, "phase_cal_xds"
|