xradio 0.0.55__py3-none-any.whl → 0.0.58__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- xradio/__init__.py +2 -2
- xradio/_utils/_casacore/casacore_from_casatools.py +1001 -0
- xradio/_utils/_casacore/tables.py +6 -1
- xradio/_utils/coord_math.py +22 -23
- xradio/_utils/dict_helpers.py +76 -11
- xradio/_utils/schema.py +5 -2
- xradio/_utils/zarr/common.py +1 -73
- xradio/image/_util/_casacore/common.py +11 -3
- xradio/image/_util/_casacore/xds_from_casacore.py +59 -35
- xradio/image/_util/_casacore/xds_to_casacore.py +47 -16
- xradio/image/_util/_fits/xds_from_fits.py +172 -77
- xradio/image/_util/casacore.py +9 -4
- xradio/image/_util/common.py +4 -4
- xradio/image/_util/image_factory.py +8 -8
- xradio/image/image.py +45 -5
- xradio/measurement_set/__init__.py +19 -9
- xradio/measurement_set/_utils/__init__.py +1 -3
- xradio/measurement_set/_utils/_msv2/__init__.py +0 -0
- xradio/measurement_set/_utils/_msv2/_tables/read.py +35 -90
- xradio/measurement_set/_utils/_msv2/_tables/read_main_table.py +6 -686
- xradio/measurement_set/_utils/_msv2/_tables/table_query.py +13 -3
- xradio/measurement_set/_utils/_msv2/conversion.py +129 -145
- xradio/measurement_set/_utils/_msv2/create_antenna_xds.py +9 -16
- xradio/measurement_set/_utils/_msv2/create_field_and_source_xds.py +125 -221
- xradio/measurement_set/_utils/_msv2/msv2_to_msv4_meta.py +1 -2
- xradio/measurement_set/_utils/_msv2/msv4_info_dicts.py +13 -8
- xradio/measurement_set/_utils/_msv2/msv4_sub_xdss.py +27 -72
- xradio/measurement_set/_utils/_msv2/partition_queries.py +5 -262
- xradio/measurement_set/_utils/_msv2/subtables.py +0 -107
- xradio/measurement_set/_utils/_utils/interpolate.py +60 -0
- xradio/measurement_set/_utils/_zarr/encoding.py +2 -7
- xradio/measurement_set/convert_msv2_to_processing_set.py +0 -2
- xradio/measurement_set/load_processing_set.py +2 -2
- xradio/measurement_set/measurement_set_xdt.py +14 -14
- xradio/measurement_set/open_processing_set.py +1 -3
- xradio/measurement_set/processing_set_xdt.py +41 -835
- xradio/measurement_set/schema.py +96 -123
- xradio/schema/check.py +91 -97
- xradio/schema/dataclass.py +159 -22
- xradio/schema/export.py +99 -0
- xradio/schema/metamodel.py +51 -16
- xradio/schema/typing.py +5 -5
- {xradio-0.0.55.dist-info → xradio-0.0.58.dist-info}/METADATA +43 -11
- xradio-0.0.58.dist-info/RECORD +65 -0
- {xradio-0.0.55.dist-info → xradio-0.0.58.dist-info}/WHEEL +1 -1
- xradio/image/_util/fits.py +0 -13
- xradio/measurement_set/_utils/_msv2/_tables/load.py +0 -63
- xradio/measurement_set/_utils/_msv2/_tables/load_main_table.py +0 -487
- xradio/measurement_set/_utils/_msv2/_tables/read_subtables.py +0 -395
- xradio/measurement_set/_utils/_msv2/_tables/write.py +0 -320
- xradio/measurement_set/_utils/_msv2/_tables/write_exp_api.py +0 -385
- xradio/measurement_set/_utils/_msv2/chunks.py +0 -115
- xradio/measurement_set/_utils/_msv2/descr.py +0 -165
- xradio/measurement_set/_utils/_msv2/msv2_msv3.py +0 -7
- xradio/measurement_set/_utils/_msv2/partitions.py +0 -392
- xradio/measurement_set/_utils/_utils/cds.py +0 -40
- xradio/measurement_set/_utils/_utils/xds_helper.py +0 -404
- xradio/measurement_set/_utils/_zarr/read.py +0 -263
- xradio/measurement_set/_utils/_zarr/write.py +0 -329
- xradio/measurement_set/_utils/msv2.py +0 -106
- xradio/measurement_set/_utils/zarr.py +0 -133
- xradio-0.0.55.dist-info/RECORD +0 -77
- {xradio-0.0.55.dist-info → xradio-0.0.58.dist-info}/licenses/LICENSE.txt +0 -0
- {xradio-0.0.55.dist-info → xradio-0.0.58.dist-info}/top_level.txt +0 -0
|
@@ -1,7 +1,10 @@
|
|
|
1
1
|
from typing import Generator
|
|
2
2
|
from contextlib import contextmanager
|
|
3
3
|
|
|
4
|
-
|
|
4
|
+
try:
|
|
5
|
+
from casacore import tables
|
|
6
|
+
except ImportError:
|
|
7
|
+
import xradio._utils._casacore.casacore_from_casatools as tables
|
|
5
8
|
|
|
6
9
|
|
|
7
10
|
@contextmanager
|
|
@@ -17,7 +20,11 @@ def open_table_ro(infile: str) -> Generator[tables.table, None, None]:
|
|
|
17
20
|
|
|
18
21
|
@contextmanager
|
|
19
22
|
def open_query(table: tables.table, query: str) -> Generator[tables.table, None, None]:
|
|
20
|
-
|
|
23
|
+
|
|
24
|
+
if hasattr(tables, "taql"):
|
|
25
|
+
ttq = tables.taql(query)
|
|
26
|
+
else:
|
|
27
|
+
ttq = table.taql(query)
|
|
21
28
|
try:
|
|
22
29
|
yield ttq
|
|
23
30
|
finally:
|
|
@@ -43,4 +50,7 @@ class TableManager:
|
|
|
43
50
|
self.infile, readonly=True, lockoptions={"option": "usernoread"}, ack=False
|
|
44
51
|
) as mtable:
|
|
45
52
|
query = f"select * from $mtable {self.taql_where}"
|
|
46
|
-
|
|
53
|
+
if hasattr(tables, "taql"):
|
|
54
|
+
return tables.taql(query)
|
|
55
|
+
else:
|
|
56
|
+
return mtable.taql(query)
|
|
@@ -1,16 +1,23 @@
|
|
|
1
|
+
from collections import deque
|
|
1
2
|
import datetime
|
|
2
3
|
import importlib
|
|
3
4
|
import numcodecs
|
|
4
5
|
import os
|
|
5
6
|
import pathlib
|
|
6
7
|
import time
|
|
7
|
-
from typing import Dict, Union
|
|
8
|
+
from typing import Callable, Dict, Union
|
|
8
9
|
|
|
10
|
+
import dask.array as da
|
|
9
11
|
import numpy as np
|
|
10
12
|
import xarray as xr
|
|
13
|
+
import traceback
|
|
11
14
|
|
|
12
15
|
import toolviper.utils.logger as logger
|
|
13
|
-
|
|
16
|
+
|
|
17
|
+
try:
|
|
18
|
+
from casacore import tables
|
|
19
|
+
except ImportError:
|
|
20
|
+
import xradio._utils._casacore.casacore_from_casatools as tables
|
|
14
21
|
|
|
15
22
|
from xradio.measurement_set._utils._msv2.msv4_sub_xdss import (
|
|
16
23
|
create_pointing_xds,
|
|
@@ -47,7 +54,9 @@ from ._tables.read import (
|
|
|
47
54
|
)
|
|
48
55
|
from ._tables.read_main_table import get_baselines, get_baseline_indices, get_utimes_tol
|
|
49
56
|
from .._utils.stokes_types import stokes_types
|
|
50
|
-
|
|
57
|
+
|
|
58
|
+
from xradio._utils.list_and_array import check_if_consistent, unique_1d
|
|
59
|
+
from xradio._utils.dict_helpers import make_spectral_coord_reference_dict, make_quantity
|
|
51
60
|
|
|
52
61
|
|
|
53
62
|
def parse_chunksize(
|
|
@@ -213,7 +222,6 @@ def mem_chunksize_to_dict_main_balanced(
|
|
|
213
222
|
dictionary of chunk sizes (as dim->size)
|
|
214
223
|
"""
|
|
215
224
|
|
|
216
|
-
dim_names = [name for name in xds_dim_sizes.keys()]
|
|
217
225
|
dim_sizes = [size for size in xds_dim_sizes.values()]
|
|
218
226
|
# Fix fourth dim (polarization) to all (not free to auto-calculate)
|
|
219
227
|
free_dims_mask = np.array([True, True, True, False])
|
|
@@ -483,11 +491,11 @@ def create_coordinates(
|
|
|
483
491
|
freq_column_description["REF_FREQUENCY"],
|
|
484
492
|
ref_code=spectral_window_xds["MEAS_FREQ_REF"].data,
|
|
485
493
|
)
|
|
486
|
-
xds.frequency.attrs["reference_frequency"] =
|
|
487
|
-
|
|
488
|
-
"
|
|
489
|
-
"
|
|
490
|
-
|
|
494
|
+
xds.frequency.attrs["reference_frequency"] = make_spectral_coord_reference_dict(
|
|
495
|
+
float(spectral_window_xds.REF_FREQUENCY.values),
|
|
496
|
+
msv4_measure["units"],
|
|
497
|
+
msv4_measure["observer"],
|
|
498
|
+
)
|
|
491
499
|
xds.frequency.attrs["spectral_window_id"] = spectral_window_id
|
|
492
500
|
|
|
493
501
|
# Add if doppler table is present
|
|
@@ -507,14 +515,9 @@ def create_coordinates(
|
|
|
507
515
|
freq_column_description["CHAN_WIDTH"],
|
|
508
516
|
ref_code=spectral_window_xds["MEAS_FREQ_REF"].data,
|
|
509
517
|
)
|
|
510
|
-
|
|
511
|
-
msv4_measure["
|
|
512
|
-
|
|
513
|
-
xds.frequency.attrs["channel_width"] = {
|
|
514
|
-
"dims": [],
|
|
515
|
-
"data": np.abs(unique_chan_width[0]),
|
|
516
|
-
"attrs": msv4_measure,
|
|
517
|
-
}
|
|
518
|
+
xds.frequency.attrs["channel_width"] = make_quantity(
|
|
519
|
+
np.abs(unique_chan_width[0]), msv4_measure["units"] if msv4_measure else "Hz"
|
|
520
|
+
)
|
|
518
521
|
|
|
519
522
|
###### Create Time Coordinate ######
|
|
520
523
|
main_table_attrs = extract_table_attributes(in_file)
|
|
@@ -527,15 +530,10 @@ def create_coordinates(
|
|
|
527
530
|
msv4_measure = column_description_casacore_to_msv4_measure(
|
|
528
531
|
main_column_descriptions["INTERVAL"]
|
|
529
532
|
)
|
|
530
|
-
|
|
531
|
-
msv4_measure["
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
"dims": [],
|
|
535
|
-
"data": interval,
|
|
536
|
-
"attrs": msv4_measure,
|
|
537
|
-
}
|
|
538
|
-
xds.time.attrs["effective_integration_time"] = "EFFECTIVE_INTEGRATION_TIME"
|
|
533
|
+
xds.time.attrs["integration_time"] = make_quantity(
|
|
534
|
+
interval, msv4_measure["units"] if msv4_measure else "s"
|
|
535
|
+
)
|
|
536
|
+
|
|
539
537
|
return xds
|
|
540
538
|
|
|
541
539
|
|
|
@@ -579,90 +577,115 @@ def create_data_variables(
|
|
|
579
577
|
parallel_mode,
|
|
580
578
|
main_chunksize,
|
|
581
579
|
):
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
except KeyError:
|
|
589
|
-
# If time isn't chunked then `read_col_conversion_dask` is slower than `read_col_conversion_numpy`
|
|
590
|
-
logger.warning(
|
|
591
|
-
"'time' isn't specified in `main_chunksize`. Defaulting to `parallel_mode = 'none'`."
|
|
592
|
-
)
|
|
593
|
-
parallel_mode = "none"
|
|
594
|
-
|
|
595
|
-
# Set read_col_conversion from value of `parallel_mode` argument
|
|
596
|
-
# TODO: To make this compatible with multi-node conversion, `read_col_conversion_dask` and TableManager must be pickled.
|
|
597
|
-
# Casacore will make this difficult
|
|
598
|
-
global read_col_conversion
|
|
599
|
-
if parallel_mode == "time":
|
|
600
|
-
read_col_conversion = read_col_conversion_dask
|
|
601
|
-
else:
|
|
602
|
-
read_col_conversion = read_col_conversion_numpy
|
|
580
|
+
time_chunksize = main_chunksize.get("time", None) if main_chunksize else None
|
|
581
|
+
if parallel_mode == "time" and time_chunksize is None:
|
|
582
|
+
logger.warning(
|
|
583
|
+
"'time' isn't specified in `main_chunksize`. Defaulting to `parallel_mode = 'none'`."
|
|
584
|
+
)
|
|
585
|
+
parallel_mode = "none"
|
|
603
586
|
|
|
604
587
|
# Create Data Variables
|
|
605
588
|
with table_manager.get_table() as tb_tool:
|
|
606
589
|
col_names = tb_tool.colnames()
|
|
607
590
|
|
|
591
|
+
target_cols = set(col_names) & set(col_to_data_variable_names.keys())
|
|
592
|
+
if target_cols.issuperset({"WEIGHT", "WEIGHT_SPECTRUM"}):
|
|
593
|
+
target_cols.remove("WEIGHT")
|
|
594
|
+
|
|
608
595
|
main_table_attrs = extract_table_attributes(in_file)
|
|
609
596
|
main_column_descriptions = main_table_attrs["column_descriptions"]
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
|
|
615
|
-
|
|
616
|
-
|
|
617
|
-
|
|
618
|
-
|
|
619
|
-
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
|
|
631
|
-
|
|
632
|
-
|
|
633
|
-
|
|
634
|
-
|
|
635
|
-
|
|
636
|
-
|
|
637
|
-
|
|
638
|
-
),
|
|
639
|
-
dims=col_dims[col],
|
|
640
|
-
)
|
|
641
|
-
|
|
642
|
-
xds[col_to_data_variable_names[col]].attrs.update(
|
|
643
|
-
create_attribute_metadata(col, main_column_descriptions)
|
|
597
|
+
|
|
598
|
+
# Use a double-ended queue in case WEIGHT_SPECTRUM conversion fails, and
|
|
599
|
+
# we need to add WEIGHT to list of columns to convert during iteration
|
|
600
|
+
target_cols = deque(target_cols)
|
|
601
|
+
|
|
602
|
+
while target_cols:
|
|
603
|
+
col = target_cols.popleft()
|
|
604
|
+
datavar_name = col_to_data_variable_names[col]
|
|
605
|
+
read_col_conversion = get_read_col_conversion_function(col, parallel_mode)
|
|
606
|
+
|
|
607
|
+
try:
|
|
608
|
+
start = time.time()
|
|
609
|
+
col_data = read_col_conversion(
|
|
610
|
+
table_manager,
|
|
611
|
+
col,
|
|
612
|
+
time_baseline_shape,
|
|
613
|
+
tidxs,
|
|
614
|
+
bidxs,
|
|
615
|
+
use_table_iter,
|
|
616
|
+
time_chunksize,
|
|
617
|
+
)
|
|
618
|
+
|
|
619
|
+
if col == "TIME_CENTROID":
|
|
620
|
+
col_data = convert_casacore_time(col_data, False)
|
|
621
|
+
|
|
622
|
+
elif col == "WEIGHT":
|
|
623
|
+
col_data = repeat_weight_array(
|
|
624
|
+
col_data, parallel_mode, xds.sizes, main_chunksize
|
|
644
625
|
)
|
|
645
626
|
|
|
627
|
+
xds[datavar_name] = xr.DataArray(
|
|
628
|
+
col_data,
|
|
629
|
+
dims=col_dims[col],
|
|
630
|
+
attrs=create_attribute_metadata(col, main_column_descriptions),
|
|
631
|
+
)
|
|
632
|
+
logger.debug(f"Time to read column {col} : {time.time() - start}")
|
|
633
|
+
|
|
634
|
+
except Exception as exc:
|
|
635
|
+
logger.debug(f"Could not load column {col}, exception: {exc}")
|
|
636
|
+
logger.debug(traceback.format_exc())
|
|
637
|
+
|
|
638
|
+
if col == "WEIGHT_SPECTRUM" and "WEIGHT" in col_names:
|
|
646
639
|
logger.debug(
|
|
647
|
-
"
|
|
640
|
+
"Failed to convert WEIGHT_SPECTRUM column: "
|
|
641
|
+
"will attempt to use WEIGHT instead"
|
|
648
642
|
)
|
|
649
|
-
|
|
650
|
-
|
|
651
|
-
|
|
652
|
-
|
|
653
|
-
|
|
654
|
-
|
|
655
|
-
|
|
656
|
-
|
|
657
|
-
|
|
658
|
-
|
|
659
|
-
|
|
660
|
-
|
|
661
|
-
|
|
662
|
-
|
|
663
|
-
|
|
664
|
-
|
|
665
|
-
|
|
643
|
+
target_cols.append("WEIGHT")
|
|
644
|
+
|
|
645
|
+
|
|
646
|
+
def get_read_col_conversion_function(col_name: str, parallel_mode: str) -> Callable:
|
|
647
|
+
"""
|
|
648
|
+
Returns the appropriate read_col_conversion function: use the dask version
|
|
649
|
+
for large columns and parallel_mode="time", or the numpy version otherwise.
|
|
650
|
+
"""
|
|
651
|
+
large_columns = {
|
|
652
|
+
"DATA",
|
|
653
|
+
"CORRECTED_DATA",
|
|
654
|
+
"MODEL_DATA",
|
|
655
|
+
"WEIGHT_SPECTRUM",
|
|
656
|
+
"WEIGHT",
|
|
657
|
+
"FLAG",
|
|
658
|
+
}
|
|
659
|
+
return (
|
|
660
|
+
read_col_conversion_dask
|
|
661
|
+
if parallel_mode == "time" and col_name in large_columns
|
|
662
|
+
else read_col_conversion_numpy
|
|
663
|
+
)
|
|
664
|
+
|
|
665
|
+
|
|
666
|
+
def repeat_weight_array(
|
|
667
|
+
weight_arr,
|
|
668
|
+
parallel_mode: str,
|
|
669
|
+
main_sizes: dict[str, int],
|
|
670
|
+
main_chunksize: dict[str, int],
|
|
671
|
+
):
|
|
672
|
+
"""
|
|
673
|
+
Repeat the weights read from the WEIGHT column along the frequency dimension.
|
|
674
|
+
Returns a dask array if parallel_mode="time", or a numpy array otherwise.
|
|
675
|
+
"""
|
|
676
|
+
reshaped_arr = weight_arr[:, :, None, :]
|
|
677
|
+
repeats = (1, 1, main_sizes["frequency"], 1)
|
|
678
|
+
|
|
679
|
+
if parallel_mode == "time":
|
|
680
|
+
result = da.tile(reshaped_arr, repeats)
|
|
681
|
+
# da.tile() adds each repeat as a separate chunk, so rechunking is necessary
|
|
682
|
+
chunksizes = tuple(
|
|
683
|
+
main_chunksize.get(dim, main_sizes[dim])
|
|
684
|
+
for dim in ("time", "baseline_id", "frequency", "polarization")
|
|
685
|
+
)
|
|
686
|
+
return result.rechunk(chunksizes)
|
|
687
|
+
|
|
688
|
+
return np.tile(reshaped_arr, repeats)
|
|
666
689
|
|
|
667
690
|
|
|
668
691
|
def add_missing_data_var_attrs(xds):
|
|
@@ -675,7 +698,7 @@ def add_missing_data_var_attrs(xds):
|
|
|
675
698
|
data_var_names = ["SPECTRUM", "SPECTRUM_CORRECTED"]
|
|
676
699
|
for var_name in data_var_names:
|
|
677
700
|
if var_name in xds.data_vars:
|
|
678
|
-
xds.data_vars[var_name].attrs["units"] =
|
|
701
|
+
xds.data_vars[var_name].attrs["units"] = ""
|
|
679
702
|
|
|
680
703
|
vis_var_names = ["VISIBILITY_MODEL"]
|
|
681
704
|
for var_name in vis_var_names:
|
|
@@ -686,44 +709,11 @@ def add_missing_data_var_attrs(xds):
|
|
|
686
709
|
"VISIBILITY"
|
|
687
710
|
].attrs["units"]
|
|
688
711
|
else:
|
|
689
|
-
xds.data_vars[var_name].attrs["units"] =
|
|
712
|
+
xds.data_vars[var_name].attrs["units"] = ""
|
|
690
713
|
|
|
691
714
|
return xds
|
|
692
715
|
|
|
693
716
|
|
|
694
|
-
def get_weight(
|
|
695
|
-
xds,
|
|
696
|
-
col,
|
|
697
|
-
table_manager,
|
|
698
|
-
time_baseline_shape,
|
|
699
|
-
tidxs,
|
|
700
|
-
bidxs,
|
|
701
|
-
use_table_iter,
|
|
702
|
-
main_column_descriptions,
|
|
703
|
-
time_chunksize,
|
|
704
|
-
):
|
|
705
|
-
xds[col_to_data_variable_names[col]] = xr.DataArray(
|
|
706
|
-
np.tile(
|
|
707
|
-
read_col_conversion(
|
|
708
|
-
table_manager,
|
|
709
|
-
col,
|
|
710
|
-
time_baseline_shape,
|
|
711
|
-
tidxs,
|
|
712
|
-
bidxs,
|
|
713
|
-
use_table_iter,
|
|
714
|
-
time_chunksize,
|
|
715
|
-
)[:, :, None, :],
|
|
716
|
-
(1, 1, xds.sizes["frequency"], 1),
|
|
717
|
-
),
|
|
718
|
-
dims=col_dims[col],
|
|
719
|
-
)
|
|
720
|
-
|
|
721
|
-
xds[col_to_data_variable_names[col]].attrs.update(
|
|
722
|
-
create_attribute_metadata(col, main_column_descriptions)
|
|
723
|
-
)
|
|
724
|
-
return xds
|
|
725
|
-
|
|
726
|
-
|
|
727
717
|
def create_taql_query_where(partition_info: dict):
|
|
728
718
|
main_par_table_cols = [
|
|
729
719
|
"DATA_DESC_ID",
|
|
@@ -765,9 +755,9 @@ def fix_uvw_frame(
|
|
|
765
755
|
"""
|
|
766
756
|
if xds.UVW.attrs["frame"] == "ITRF":
|
|
767
757
|
if is_single_dish:
|
|
768
|
-
center_var = "
|
|
758
|
+
center_var = "FIELD_REFERENCE_CENTER_DIRECTION"
|
|
769
759
|
else:
|
|
770
|
-
center_var = "
|
|
760
|
+
center_var = "FIELD_PHASE_CENTER_DIRECTION"
|
|
771
761
|
|
|
772
762
|
xds.UVW.attrs["frame"] = field_and_source_xds[center_var].attrs["frame"]
|
|
773
763
|
|
|
@@ -794,11 +784,9 @@ def estimate_memory_for_partition(in_file: str, partition: dict) -> float:
|
|
|
794
784
|
if "shape" in col_descr and isinstance(col_descr["shape"], np.ndarray):
|
|
795
785
|
# example: "shape": array([15, 4]) => gives pols x channels
|
|
796
786
|
cells_in_row = col_descr["shape"].prod()
|
|
797
|
-
npols = col_descr["shape"][-1]
|
|
798
787
|
else:
|
|
799
788
|
first_row = np.array(tb_tool.col(data_col)[0])
|
|
800
789
|
cells_in_row = np.prod(first_row.shape)
|
|
801
|
-
npols = first_row.shape[-1]
|
|
802
790
|
|
|
803
791
|
if col_descr["valueType"] == "complex":
|
|
804
792
|
# Assume. Otherwise, read first column and get the itemsize:
|
|
@@ -905,7 +893,6 @@ def estimate_memory_for_partition(in_file: str, partition: dict) -> float:
|
|
|
905
893
|
taql_partition = create_taql_query_where(partition)
|
|
906
894
|
taql_main = f"select * from $mtable {taql_partition}"
|
|
907
895
|
with open_table_ro(in_file) as mtable:
|
|
908
|
-
col_names = mtable.colnames()
|
|
909
896
|
with open_query(mtable, taql_main) as tb_tool:
|
|
910
897
|
# Do not feel tempted to rely on nrows. nrows tends to underestimate memory when baselines are missing.
|
|
911
898
|
# For some EVN datasets that can easily underestimate by a 50%
|
|
@@ -934,6 +921,7 @@ def estimate_memory_for_partition(in_file: str, partition: dict) -> float:
|
|
|
934
921
|
+ calculate_term_other_msv2_indices(msv2_nrows)
|
|
935
922
|
+ calculate_term_sub_xds(estimate_main_xds)
|
|
936
923
|
+ calculate_term_to_zarr(estimate_main_xds)
|
|
924
|
+
+ calculate_term_attrs(estimate_main_xds)
|
|
937
925
|
)
|
|
938
926
|
estimate /= GiBYTES_TO_BYTES
|
|
939
927
|
|
|
@@ -1263,10 +1251,6 @@ def convert_and_write_partition(
|
|
|
1263
1251
|
|
|
1264
1252
|
# Create field_and_source_xds (combines field, source and ephemeris data into one super dataset)
|
|
1265
1253
|
start = time.time()
|
|
1266
|
-
if ephemeris_interpolate:
|
|
1267
|
-
ephemeris_interp_time = xds.time.values
|
|
1268
|
-
else:
|
|
1269
|
-
ephemeris_interp_time = None
|
|
1270
1254
|
|
|
1271
1255
|
# if "FIELD_ID" not in partition_scheme:
|
|
1272
1256
|
# field_id = np.full(time_baseline_shape, -42, dtype=int)
|
|
@@ -1324,7 +1308,7 @@ def convert_and_write_partition(
|
|
|
1324
1308
|
add_encoding(xds, compressor=compressor, chunks=main_chunksize)
|
|
1325
1309
|
logger.debug("Time add compressor and chunk " + str(time.time() - start))
|
|
1326
1310
|
|
|
1327
|
-
|
|
1311
|
+
os.path.join(
|
|
1328
1312
|
out_file,
|
|
1329
1313
|
pathlib.Path(in_file).name.replace(".ms", "") + "_" + str(ms_v4_id),
|
|
1330
1314
|
)
|
|
@@ -1371,7 +1355,7 @@ def convert_and_write_partition(
|
|
|
1371
1355
|
ms_xdt["/phased_array_xds"] = phased_array_xds
|
|
1372
1356
|
|
|
1373
1357
|
if storage_backend == "zarr":
|
|
1374
|
-
ms_xdt.to_zarr(store=os.path.join(out_file, ms_v4_name))
|
|
1358
|
+
ms_xdt.to_zarr(store=os.path.join(out_file, ms_v4_name), mode=mode)
|
|
1375
1359
|
elif storage_backend == "netcdf":
|
|
1376
1360
|
# xds.to_netcdf(path=file_name+"/MAIN", mode=mode) #Does not work
|
|
1377
1361
|
raise
|
|
@@ -1,5 +1,3 @@
|
|
|
1
|
-
import toolviper.utils.logger as logger
|
|
2
|
-
import time
|
|
3
1
|
from typing import Tuple, Union
|
|
4
2
|
|
|
5
3
|
import numpy as np
|
|
@@ -10,7 +8,6 @@ from xradio.measurement_set._utils._msv2.subtables import subt_rename_ids
|
|
|
10
8
|
from xradio.measurement_set._utils._msv2._tables.read import (
|
|
11
9
|
load_generic_table,
|
|
12
10
|
convert_casacore_time,
|
|
13
|
-
convert_casacore_time_to_mjd,
|
|
14
11
|
make_taql_where_between_min_max,
|
|
15
12
|
table_exists,
|
|
16
13
|
)
|
|
@@ -20,17 +17,15 @@ from xradio.measurement_set._utils._msv2.msv4_sub_xdss import (
|
|
|
20
17
|
)
|
|
21
18
|
|
|
22
19
|
from xradio._utils.list_and_array import (
|
|
23
|
-
check_if_consistent,
|
|
24
20
|
unique_1d,
|
|
25
|
-
to_list,
|
|
26
|
-
to_np_array,
|
|
27
21
|
)
|
|
22
|
+
from xradio._utils.dict_helpers import make_quantity_attrs
|
|
28
23
|
|
|
29
24
|
|
|
30
25
|
def create_antenna_xds(
|
|
31
26
|
in_file: str,
|
|
32
27
|
spectral_window_id: int,
|
|
33
|
-
antenna_id:
|
|
28
|
+
antenna_id: np.ndarray,
|
|
34
29
|
feed_id: list,
|
|
35
30
|
telescope_name: str,
|
|
36
31
|
partition_polarization: xr.DataArray,
|
|
@@ -44,8 +39,8 @@ def create_antenna_xds(
|
|
|
44
39
|
Path to the input MSv2.
|
|
45
40
|
spectral_window_id : int
|
|
46
41
|
Spectral window ID.
|
|
47
|
-
antenna_id :
|
|
48
|
-
|
|
42
|
+
antenna_id : np.ndarray
|
|
43
|
+
Antenna IDs.
|
|
49
44
|
feed_id : list
|
|
50
45
|
List of feed IDs.
|
|
51
46
|
telescope_name : str
|
|
@@ -82,7 +77,7 @@ def create_antenna_xds(
|
|
|
82
77
|
|
|
83
78
|
|
|
84
79
|
def extract_antenna_info(
|
|
85
|
-
ant_xds: xr.Dataset, in_file: str, antenna_id:
|
|
80
|
+
ant_xds: xr.Dataset, in_file: str, antenna_id: np.ndarray, telescope_name: str
|
|
86
81
|
) -> xr.Dataset:
|
|
87
82
|
"""Reformats MSv2 Antenna table content to MSv4 schema.
|
|
88
83
|
|
|
@@ -92,8 +87,8 @@ def extract_antenna_info(
|
|
|
92
87
|
The dataset that will be updated with antenna information.
|
|
93
88
|
in_file : str
|
|
94
89
|
Path to the input MSv2.
|
|
95
|
-
antenna_id :
|
|
96
|
-
|
|
90
|
+
antenna_id : np.array
|
|
91
|
+
Antenna IDs to extract information for.
|
|
97
92
|
telescope_name : str
|
|
98
93
|
The name of the telescope.
|
|
99
94
|
|
|
@@ -138,7 +133,7 @@ def extract_antenna_info(
|
|
|
138
133
|
generic_ant_xds, ant_xds, to_new_data_variables, to_new_coords
|
|
139
134
|
)
|
|
140
135
|
|
|
141
|
-
ant_xds["ANTENNA_DISH_DIAMETER"].attrs.update(
|
|
136
|
+
ant_xds["ANTENNA_DISH_DIAMETER"].attrs.update(make_quantity_attrs(["m"]))
|
|
142
137
|
|
|
143
138
|
ant_xds["ANTENNA_POSITION"].attrs["coordinate_system"] = "geocentric"
|
|
144
139
|
ant_xds["ANTENNA_POSITION"].attrs["origin_object_name"] = "earth"
|
|
@@ -507,9 +502,7 @@ def create_phase_calibration_xds(
|
|
|
507
502
|
phase_cal_xds = phase_cal_xds.assign_coords(ant_borrowed_coords | tone_label_coord)
|
|
508
503
|
|
|
509
504
|
# Adjust expected types
|
|
510
|
-
phase_cal_xds["time_phase_cal"] =
|
|
511
|
-
phase_cal_xds.time_phase_cal.astype("float64").astype("float64") / 10**9
|
|
512
|
-
)
|
|
505
|
+
phase_cal_xds["time_phase_cal"] = phase_cal_xds.time_phase_cal
|
|
513
506
|
|
|
514
507
|
phase_cal_xds = rename_and_interpolate_to_time(
|
|
515
508
|
phase_cal_xds, "time_phase_cal", phase_cal_interp_time, "phase_cal_xds"
|