xradio 0.0.29__py3-none-any.whl → 0.0.31__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- xradio/__init__.py +10 -10
- xradio/_utils/common.py +57 -0
- xradio/_utils/{array.py → list_and_array.py} +23 -0
- xradio/vis/_processing_set.py +66 -23
- xradio/vis/_vis_utils/_ms/_tables/create_field_and_source_xds.py +205 -203
- xradio/vis/_vis_utils/_ms/_tables/load_main_table.py +1 -1
- xradio/vis/_vis_utils/_ms/_tables/read.py +20 -19
- xradio/vis/_vis_utils/_ms/_tables/read_main_table.py +1 -1
- xradio/vis/_vis_utils/_ms/_tables/read_subtables.py +1 -1
- xradio/vis/_vis_utils/_ms/chunks.py +1 -1
- xradio/vis/_vis_utils/_ms/conversion.py +65 -58
- xradio/vis/_vis_utils/_ms/descr.py +1 -1
- xradio/vis/_vis_utils/_ms/msv2_to_msv4_meta.py +4 -2
- xradio/vis/_vis_utils/_ms/msv4_sub_xdss.py +7 -4
- xradio/vis/_vis_utils/_ms/partition_queries.py +18 -16
- xradio/vis/convert_msv2_to_processing_set.py +11 -73
- xradio/vis/read_processing_set.py +7 -5
- {xradio-0.0.29.dist-info → xradio-0.0.31.dist-info}/METADATA +1 -1
- {xradio-0.0.29.dist-info → xradio-0.0.31.dist-info}/RECORD +22 -22
- {xradio-0.0.29.dist-info → xradio-0.0.31.dist-info}/WHEEL +1 -1
- {xradio-0.0.29.dist-info → xradio-0.0.31.dist-info}/LICENSE.txt +0 -0
- {xradio-0.0.29.dist-info → xradio-0.0.31.dist-info}/top_level.txt +0 -0
|
@@ -747,13 +747,13 @@ def load_generic_cols(
|
|
|
747
747
|
dict of coordinates and dict of data vars.
|
|
748
748
|
"""
|
|
749
749
|
|
|
750
|
-
|
|
750
|
+
col_types = find_loadable_cols(tb_tool, ignore)
|
|
751
751
|
|
|
752
752
|
trows = tb_tool.row(ignore, exclude=True)[:]
|
|
753
753
|
|
|
754
754
|
# Produce coords and data vars from MS columns
|
|
755
755
|
mcoords, mvars = {}, {}
|
|
756
|
-
for col in
|
|
756
|
+
for col in col_types.keys():
|
|
757
757
|
try:
|
|
758
758
|
# TODO
|
|
759
759
|
# benchmark np.stack() performance
|
|
@@ -779,7 +779,7 @@ def load_generic_cols(
|
|
|
779
779
|
if len(set([isinstance(row[col], dict) for row in trows])) > 1:
|
|
780
780
|
continue # can't deal with this case
|
|
781
781
|
|
|
782
|
-
data = handle_variable_col_issues(inpath, col,
|
|
782
|
+
data = handle_variable_col_issues(inpath, col, col_types[col], trows)
|
|
783
783
|
|
|
784
784
|
if len(data) == 0:
|
|
785
785
|
continue
|
|
@@ -827,7 +827,7 @@ def load_fixed_size_cols(
|
|
|
827
827
|
dict of coordinates and dict of data vars, ready to construct an xr.Dataset
|
|
828
828
|
"""
|
|
829
829
|
|
|
830
|
-
loadable_cols =
|
|
830
|
+
loadable_cols = find_loadable_cols(tb_tool, ignore)
|
|
831
831
|
|
|
832
832
|
# Produce coords and data vars from MS columns
|
|
833
833
|
mcoords, mvars = {}, {}
|
|
@@ -856,13 +856,16 @@ def load_fixed_size_cols(
|
|
|
856
856
|
return mcoords, mvars
|
|
857
857
|
|
|
858
858
|
|
|
859
|
-
def
|
|
859
|
+
def find_loadable_cols(
|
|
860
860
|
tb_tool: tables.table, ignore: Union[List[str], None]
|
|
861
|
-
) -> Dict:
|
|
861
|
+
) -> Dict[str, str]:
|
|
862
862
|
"""
|
|
863
|
-
For a table, finds the columns that are
|
|
864
|
-
|
|
865
|
-
|
|
863
|
+
For a table, finds the columns that are loadable = not of record type,
|
|
864
|
+
and not to be ignored
|
|
865
|
+
In extreme cases of variable size columns, it can happen that all the
|
|
866
|
+
cells are empty (iscelldefined() == false). This is still considered a
|
|
867
|
+
loadable column, even though all values of the resulting data var will
|
|
868
|
+
be empty.
|
|
866
869
|
|
|
867
870
|
Parameters
|
|
868
871
|
----------
|
|
@@ -874,17 +877,15 @@ def find_loadable_filled_cols(
|
|
|
874
877
|
Returns
|
|
875
878
|
-------
|
|
876
879
|
Dict
|
|
877
|
-
dict of {column name
|
|
880
|
+
dict of {column name: column type} for columns that can/should be loaded
|
|
878
881
|
"""
|
|
879
882
|
|
|
880
883
|
colnames = tb_tool.colnames()
|
|
881
|
-
|
|
884
|
+
table_desc = tb_tool.getdesc()
|
|
882
885
|
loadable_cols = {
|
|
883
|
-
col:
|
|
886
|
+
col: table_desc[col]["valueType"]
|
|
884
887
|
for col in colnames
|
|
885
|
-
if (col not in ignore)
|
|
886
|
-
and (tb_tool.iscelldefined(col, 0))
|
|
887
|
-
and tb_tool.coldatatype(col) != "record"
|
|
888
|
+
if (col not in ignore) and tb_tool.coldatatype(col) != "record"
|
|
888
889
|
}
|
|
889
890
|
return loadable_cols
|
|
890
891
|
|
|
@@ -978,7 +979,7 @@ def raw_col_data_to_coords_vars(
|
|
|
978
979
|
|
|
979
980
|
|
|
980
981
|
def handle_variable_col_issues(
|
|
981
|
-
inpath: str, col: str,
|
|
982
|
+
inpath: str, col: str, col_type: str, trows: tables.tablerow
|
|
982
983
|
) -> np.ndarray:
|
|
983
984
|
"""
|
|
984
985
|
load variable-size array columns, padding with nans wherever
|
|
@@ -992,8 +993,8 @@ def handle_variable_col_issues(
|
|
|
992
993
|
path name of the MS
|
|
993
994
|
col : str
|
|
994
995
|
column being loaded
|
|
995
|
-
|
|
996
|
-
|
|
996
|
+
col_type : str
|
|
997
|
+
type of the column cell values
|
|
997
998
|
trows : tables.tablerow
|
|
998
999
|
rows from a table as loaded by tables.row()
|
|
999
1000
|
|
|
@@ -1008,7 +1009,7 @@ def handle_variable_col_issues(
|
|
|
1008
1009
|
|
|
1009
1010
|
mshape = np.array(max([np.array(row[col]).shape for row in trows]))
|
|
1010
1011
|
try:
|
|
1011
|
-
pad_nan = get_pad_nan(
|
|
1012
|
+
pad_nan = get_pad_nan(np.array((), dtype=col_type))
|
|
1012
1013
|
|
|
1013
1014
|
# TODO
|
|
1014
1015
|
# benchmark np.stack() performance
|
|
@@ -13,7 +13,7 @@ from ._tables.read import read_generic_table, make_freq_attrs
|
|
|
13
13
|
from ._tables.read_subtables import read_delayed_pointing_table
|
|
14
14
|
from .._utils.partition_attrs import add_partition_attrs
|
|
15
15
|
from .._utils.xds_helper import make_coords
|
|
16
|
-
from xradio._utils.
|
|
16
|
+
from xradio._utils.list_and_array import unique_1d
|
|
17
17
|
|
|
18
18
|
|
|
19
19
|
def read_spw_ddi_ant_pol(inpath: str) -> Tuple[xr.Dataset]:
|
|
@@ -31,7 +31,7 @@ from ._tables.read import (
|
|
|
31
31
|
)
|
|
32
32
|
from ._tables.read_main_table import get_baselines, get_baseline_indices, get_utimes_tol
|
|
33
33
|
from .._utils.stokes_types import stokes_types
|
|
34
|
-
from xradio._utils.
|
|
34
|
+
from xradio._utils.list_and_array import check_if_consistent, unique_1d, to_list
|
|
35
35
|
|
|
36
36
|
|
|
37
37
|
def parse_chunksize(
|
|
@@ -637,7 +637,7 @@ def convert_and_write_partition(
|
|
|
637
637
|
_description_
|
|
638
638
|
out_file : str
|
|
639
639
|
_description_
|
|
640
|
-
|
|
640
|
+
obs_mode : str
|
|
641
641
|
_description_
|
|
642
642
|
ddi : int, optional
|
|
643
643
|
_description_, by default 0
|
|
@@ -670,7 +670,7 @@ def convert_and_write_partition(
|
|
|
670
670
|
|
|
671
671
|
taql_where = create_taql_query(partition_info)
|
|
672
672
|
ddi = partition_info["DATA_DESC_ID"][0]
|
|
673
|
-
|
|
673
|
+
obs_mode = str(partition_info["OBS_MODE"][0])
|
|
674
674
|
|
|
675
675
|
start = time.time()
|
|
676
676
|
with open_table_ro(in_file) as mtable:
|
|
@@ -754,9 +754,6 @@ def convert_and_write_partition(
|
|
|
754
754
|
|
|
755
755
|
start = time.time()
|
|
756
756
|
|
|
757
|
-
xds.attrs["intent"] = intent
|
|
758
|
-
xds.attrs["ddi"] = ddi
|
|
759
|
-
|
|
760
757
|
# Time and frequency should always be increasing
|
|
761
758
|
if len(xds.frequency) > 1 and xds.frequency[1] - xds.frequency[0] < 0:
|
|
762
759
|
xds = xds.sel(frequency=slice(None, None, -1))
|
|
@@ -765,46 +762,12 @@ def convert_and_write_partition(
|
|
|
765
762
|
xds = xds.sel(time=slice(None, None, -1))
|
|
766
763
|
|
|
767
764
|
# Add data_groups and field_info
|
|
768
|
-
xds
|
|
769
|
-
if "VISIBILITY" in xds:
|
|
770
|
-
xds.attrs["data_groups"]["base"] = {
|
|
771
|
-
"visibility": "VISIBILITY",
|
|
772
|
-
"flag": "FLAG",
|
|
773
|
-
"weight": "WEIGHT",
|
|
774
|
-
"uvw": "UVW",
|
|
775
|
-
}
|
|
776
|
-
|
|
777
|
-
if "VISIBILITY_CORRECTED" in xds:
|
|
778
|
-
xds.attrs["data_groups"]["corrected"] = {
|
|
779
|
-
"visibility": "VISIBILITY_CORRECTED",
|
|
780
|
-
"flag": "FLAG",
|
|
781
|
-
"weight": "WEIGHT",
|
|
782
|
-
"uvw": "UVW",
|
|
783
|
-
}
|
|
784
|
-
|
|
785
|
-
is_single_dish = False
|
|
786
|
-
if "SPECTRUM" in xds:
|
|
787
|
-
xds.attrs["data_groups"]["base"] = {
|
|
788
|
-
"spectrum": "SPECTRUM",
|
|
789
|
-
"flag": "FLAG",
|
|
790
|
-
"weight": "WEIGHT",
|
|
791
|
-
"uvw": "UVW",
|
|
792
|
-
}
|
|
793
|
-
is_single_dish = True
|
|
794
|
-
|
|
795
|
-
if "SPECTRUM_CORRECTED" in xds:
|
|
796
|
-
xds.attrs["data_groups"]["corrected"] = {
|
|
797
|
-
"spectrum": "SPECTRUM_CORRECTED",
|
|
798
|
-
"flag": "FLAG",
|
|
799
|
-
"weight": "WEIGHT",
|
|
800
|
-
"uvw": "UVW",
|
|
801
|
-
}
|
|
802
|
-
is_single_dish = True
|
|
765
|
+
xds, is_single_dish = add_data_groups(xds)
|
|
803
766
|
|
|
804
767
|
# Create field_and_source_xds (combines field, source and ephemeris data into one super dataset)
|
|
805
768
|
start = time.time()
|
|
806
769
|
if ephemeris_interpolate:
|
|
807
|
-
ephemeris_interp_time = xds.time
|
|
770
|
+
ephemeris_interp_time = xds.time.values
|
|
808
771
|
else:
|
|
809
772
|
ephemeris_interp_time = None
|
|
810
773
|
|
|
@@ -812,10 +775,7 @@ def convert_and_write_partition(
|
|
|
812
775
|
scan_id[tidxs, bidxs] = tb_tool.getcol("SCAN_NUMBER")
|
|
813
776
|
scan_id = np.max(scan_id, axis=1)
|
|
814
777
|
|
|
815
|
-
if
|
|
816
|
-
partition_scheme == "ddi_intent_source"
|
|
817
|
-
or partition_scheme == "ddi_intent_scan"
|
|
818
|
-
):
|
|
778
|
+
if "FIELD_ID" not in partition_scheme:
|
|
819
779
|
field_id = np.full(time_baseline_shape, -42, dtype=int)
|
|
820
780
|
field_id[tidxs, bidxs] = tb_tool.getcol("FIELD_ID")
|
|
821
781
|
field_id = np.max(field_id, axis=1)
|
|
@@ -828,7 +788,7 @@ def convert_and_write_partition(
|
|
|
828
788
|
# assert len(col_unique) == 1, col_name + " is not consistent."
|
|
829
789
|
# return col_unique[0]
|
|
830
790
|
|
|
831
|
-
field_and_source_xds = create_field_and_source_xds(
|
|
791
|
+
field_and_source_xds, source_id = create_field_and_source_xds(
|
|
832
792
|
in_file,
|
|
833
793
|
field_id,
|
|
834
794
|
xds.frequency.attrs["spectral_window_id"],
|
|
@@ -867,23 +827,22 @@ def convert_and_write_partition(
|
|
|
867
827
|
+ str(ms_v4_id),
|
|
868
828
|
)
|
|
869
829
|
|
|
870
|
-
if isinstance(field_id, np.ndarray):
|
|
871
|
-
field_id = "OTF"
|
|
872
|
-
|
|
873
830
|
xds.attrs["partition_info"] = {
|
|
874
831
|
"spectral_window_id": xds.frequency.attrs["spectral_window_id"],
|
|
875
832
|
"spectral_window_name": xds.frequency.attrs["spectral_window_name"],
|
|
876
|
-
"field_id": field_id,
|
|
877
|
-
"field_name":
|
|
878
|
-
|
|
879
|
-
|
|
880
|
-
"
|
|
881
|
-
"
|
|
833
|
+
"field_id": to_list(unique_1d(field_id)),
|
|
834
|
+
"field_name": to_list(
|
|
835
|
+
np.unique(field_and_source_xds.field_name.values)
|
|
836
|
+
),
|
|
837
|
+
"source_id": to_list(unique_1d(source_id)),
|
|
838
|
+
"source_name": to_list(
|
|
839
|
+
np.unique(field_and_source_xds.source_name.values)
|
|
840
|
+
),
|
|
841
|
+
"polarization_setup": to_list(xds.polarization.values),
|
|
842
|
+
"obs_mode": obs_mode,
|
|
882
843
|
"taql": taql_where,
|
|
883
844
|
}
|
|
884
845
|
|
|
885
|
-
# print(xds)
|
|
886
|
-
|
|
887
846
|
start = time.time()
|
|
888
847
|
if storage_backend == "zarr":
|
|
889
848
|
xds.to_zarr(store=os.path.join(file_name, "MAIN"), mode=mode)
|
|
@@ -910,3 +869,51 @@ def convert_and_write_partition(
|
|
|
910
869
|
logger.debug("Write data " + str(time.time() - start))
|
|
911
870
|
|
|
912
871
|
# logger.info("Saved ms_v4 " + file_name + " in " + str(time.time() - start_with) + "s")
|
|
872
|
+
|
|
873
|
+
|
|
874
|
+
def add_data_groups(xds):
|
|
875
|
+
xds.attrs["data_groups"] = {}
|
|
876
|
+
if "VISIBILITY" in xds:
|
|
877
|
+
xds.attrs["data_groups"]["base"] = {
|
|
878
|
+
"visibility": "VISIBILITY",
|
|
879
|
+
"flag": "FLAG",
|
|
880
|
+
"weight": "WEIGHT",
|
|
881
|
+
"uvw": "UVW",
|
|
882
|
+
}
|
|
883
|
+
|
|
884
|
+
if "VISIBILITY_CORRECTED" in xds:
|
|
885
|
+
xds.attrs["data_groups"]["corrected"] = {
|
|
886
|
+
"visibility": "VISIBILITY_CORRECTED",
|
|
887
|
+
"flag": "FLAG",
|
|
888
|
+
"weight": "WEIGHT",
|
|
889
|
+
"uvw": "UVW",
|
|
890
|
+
}
|
|
891
|
+
|
|
892
|
+
if "VISIBILITY_MODEL" in xds:
|
|
893
|
+
xds.attrs["data_groups"]["model"] = {
|
|
894
|
+
"visibility": "VISIBILITY_MODEL",
|
|
895
|
+
"flag": "FLAG",
|
|
896
|
+
"weight": "WEIGHT",
|
|
897
|
+
"uvw": "UVW",
|
|
898
|
+
}
|
|
899
|
+
|
|
900
|
+
is_single_dish = False
|
|
901
|
+
if "SPECTRUM" in xds:
|
|
902
|
+
xds.attrs["data_groups"]["base"] = {
|
|
903
|
+
"spectrum": "SPECTRUM",
|
|
904
|
+
"flag": "FLAG",
|
|
905
|
+
"weight": "WEIGHT",
|
|
906
|
+
"uvw": "UVW",
|
|
907
|
+
}
|
|
908
|
+
is_single_dish = True
|
|
909
|
+
|
|
910
|
+
if "SPECTRUM_CORRECTED" in xds:
|
|
911
|
+
xds.attrs["data_groups"]["corrected"] = {
|
|
912
|
+
"spectrum": "SPECTRUM_CORRECTED",
|
|
913
|
+
"flag": "FLAG",
|
|
914
|
+
"weight": "WEIGHT",
|
|
915
|
+
"uvw": "UVW",
|
|
916
|
+
}
|
|
917
|
+
is_single_dish = True
|
|
918
|
+
|
|
919
|
+
return xds, is_single_dish
|
|
@@ -7,7 +7,7 @@ import xarray as xr
|
|
|
7
7
|
|
|
8
8
|
from ._tables.read import read_generic_table, read_flat_col_chunk
|
|
9
9
|
from ._tables.table_query import open_query, open_table_ro
|
|
10
|
-
from xradio._utils.
|
|
10
|
+
from xradio._utils.list_and_array import unique_1d
|
|
11
11
|
|
|
12
12
|
|
|
13
13
|
def describe_ms(
|
|
@@ -4,6 +4,7 @@ col_to_data_variable_names = {
|
|
|
4
4
|
"FLOAT_DATA": "SPECTRUM",
|
|
5
5
|
"DATA": "VISIBILITY",
|
|
6
6
|
"CORRECTED_DATA": "VISIBILITY_CORRECTED",
|
|
7
|
+
"MODEL_DATA": "VISIBILITY_MODEL",
|
|
7
8
|
"WEIGHT_SPECTRUM": "WEIGHT",
|
|
8
9
|
"WEIGHT": "WEIGHT",
|
|
9
10
|
"FLAG": "FLAG",
|
|
@@ -14,6 +15,7 @@ col_to_data_variable_names = {
|
|
|
14
15
|
col_dims = {
|
|
15
16
|
"DATA": ("time", "baseline_id", "frequency", "polarization"),
|
|
16
17
|
"CORRECTED_DATA": ("time", "baseline_id", "frequency", "polarization"),
|
|
18
|
+
"MODEL_DATA": ("time", "baseline_id", "frequency", "polarization"),
|
|
17
19
|
"WEIGHT_SPECTRUM": ("time", "baseline_id", "frequency", "polarization"),
|
|
18
20
|
"WEIGHT": ("time", "baseline_id", "frequency", "polarization"),
|
|
19
21
|
"FLAG": ("time", "baseline_id", "frequency", "polarization"),
|
|
@@ -107,7 +109,7 @@ def column_description_casacore_to_msv4_measure(
|
|
|
107
109
|
ref_index = np.where(casa_frequency_frames_codes == ref_code)[0][0]
|
|
108
110
|
casa_ref = casa_frequency_frames[ref_index]
|
|
109
111
|
else:
|
|
110
|
-
logger.
|
|
112
|
+
logger.debug(
|
|
111
113
|
f"Could not determine {measinfo['type']} measure "
|
|
112
114
|
"reference frame!"
|
|
113
115
|
)
|
|
@@ -116,7 +118,7 @@ def column_description_casacore_to_msv4_measure(
|
|
|
116
118
|
if casa_ref in msv4_measure_conversion.get("Ref_map", {}):
|
|
117
119
|
casa_ref = msv4_measure_conversion["Ref_map"][casa_ref]
|
|
118
120
|
else:
|
|
119
|
-
logger.
|
|
121
|
+
logger.debug(
|
|
120
122
|
f"Unknown reference frame for {measinfo['type']} "
|
|
121
123
|
f"measure, using verbatim: {casa_ref}"
|
|
122
124
|
)
|
|
@@ -11,7 +11,10 @@ from ._tables.read import make_taql_where_between_min_max, read_generic_table
|
|
|
11
11
|
|
|
12
12
|
|
|
13
13
|
def interpolate_to_time(
|
|
14
|
-
xds: xr.Dataset,
|
|
14
|
+
xds: xr.Dataset,
|
|
15
|
+
interp_time: Union[xr.DataArray, None],
|
|
16
|
+
message_prefix: str,
|
|
17
|
+
time_name: str = "time",
|
|
15
18
|
) -> xr.Dataset:
|
|
16
19
|
"""
|
|
17
20
|
Interpolate the time coordinate of the input xarray dataset to the
|
|
@@ -38,13 +41,13 @@ def interpolate_to_time(
|
|
|
38
41
|
xarray dataset with time axis interpolated to interp_time.
|
|
39
42
|
"""
|
|
40
43
|
if interp_time is not None:
|
|
41
|
-
points_before = xds.
|
|
44
|
+
points_before = xds[time_name].size
|
|
42
45
|
if points_before > 1:
|
|
43
46
|
method = "linear"
|
|
44
47
|
else:
|
|
45
48
|
method = "nearest"
|
|
46
|
-
xds = xds.interp(
|
|
47
|
-
points_after = xds.
|
|
49
|
+
xds = xds.interp({time_name: interp_time}, method=method, assume_sorted=True)
|
|
50
|
+
points_after = xds[time_name].size
|
|
48
51
|
logger.debug(
|
|
49
52
|
f"{message_prefix}: interpolating the time coordinate "
|
|
50
53
|
f"from {points_before} to {points_after} points"
|
|
@@ -20,17 +20,19 @@ def enumerated_product(*args):
|
|
|
20
20
|
)
|
|
21
21
|
|
|
22
22
|
|
|
23
|
-
def create_partitions(in_file: str, partition_scheme:
|
|
23
|
+
def create_partitions(in_file: str, partition_scheme: list):
|
|
24
24
|
"""Create a list of dictionaries with the partition information.
|
|
25
25
|
|
|
26
26
|
Args:
|
|
27
27
|
in_file (str): Input MSv2 file path.
|
|
28
|
-
partition_scheme (
|
|
29
|
-
|
|
30
|
-
|
|
28
|
+
partition_scheme (list) : A MS v4 can only contain a single data description (spectral window and polarization setup), and observation mode. Consequently, the MS v2 is partitioned when converting to MS v4.
|
|
29
|
+
In addition to data description and polarization setup a finer partitioning is possible by specifying a list of partitioning keys. Any combination of the following keys are possible:
|
|
30
|
+
"FIELD_ID", "SCAN_NUMBER", "STATE_ID", "SOURCE_ID", "SUB_SCAN_NUMBER". For mosaics where the phase center is rapidly changing (such as VLA on the fly mosaics)
|
|
31
|
+
partition_scheme should be set to an empty list []. By default, ["FIELD_ID"].
|
|
31
32
|
Returns:
|
|
32
33
|
list: list of dictionaries with the partition information.
|
|
33
34
|
"""
|
|
35
|
+
# vla_otf (bool, optional): The partioning of VLA OTF (on the fly) mosaics needs a special partitioning scheme. Defaults to False.
|
|
34
36
|
|
|
35
37
|
# Create partition table
|
|
36
38
|
from casacore import tables
|
|
@@ -38,11 +40,9 @@ def create_partitions(in_file: str, partition_scheme: Union[str, list], vla_otf=
|
|
|
38
40
|
import xarray as xr
|
|
39
41
|
import pandas as pd
|
|
40
42
|
import os
|
|
43
|
+
import time
|
|
41
44
|
|
|
42
|
-
|
|
43
|
-
partition_scheme = ["DATA_DESC_ID", "INTENT", "FIELD_ID"]
|
|
44
|
-
elif partition_scheme == "ddi_intent_scan":
|
|
45
|
-
partition_scheme = ["DATA_DESC_ID", "INTENT", "SCAN_NUMBER"]
|
|
45
|
+
partition_scheme = ["DATA_DESC_ID", "OBS_MODE"] + partition_scheme
|
|
46
46
|
|
|
47
47
|
# Open MSv2 tables and add columns to partition table (par_df):
|
|
48
48
|
par_df = pd.DataFrame()
|
|
@@ -61,8 +61,8 @@ def create_partitions(in_file: str, partition_scheme: Union[str, list], vla_otf=
|
|
|
61
61
|
lockoptions={"option": "usernoread"},
|
|
62
62
|
ack=False,
|
|
63
63
|
)
|
|
64
|
-
if vla_otf:
|
|
65
|
-
|
|
64
|
+
# if vla_otf:
|
|
65
|
+
# par_df["FIELD_NAME"] = np.array(field_tb.getcol("NAME"))[par_df["FIELD_ID"]]
|
|
66
66
|
|
|
67
67
|
# Get source ids if available from source table.
|
|
68
68
|
if os.path.isdir(os.path.join(os.path.join(in_file, "SOURCE"))):
|
|
@@ -74,10 +74,10 @@ def create_partitions(in_file: str, partition_scheme: Union[str, list], vla_otf=
|
|
|
74
74
|
)
|
|
75
75
|
if source_tb.nrows() != 0:
|
|
76
76
|
par_df["SOURCE_ID"] = field_tb.getcol("SOURCE_ID")[par_df["FIELD_ID"]]
|
|
77
|
-
if vla_otf:
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
77
|
+
# if vla_otf:
|
|
78
|
+
# par_df["SOURCE_NAME"] = np.array(source_tb.getcol("NAME"))[
|
|
79
|
+
# par_df["SOURCE_ID"]
|
|
80
|
+
# ]
|
|
81
81
|
|
|
82
82
|
# Get intents and subscan numbers if available from state table.
|
|
83
83
|
if os.path.isdir(os.path.join(in_file, "STATE")):
|
|
@@ -89,7 +89,9 @@ def create_partitions(in_file: str, partition_scheme: Union[str, list], vla_otf=
|
|
|
89
89
|
)
|
|
90
90
|
if state_tb.nrows() != 0:
|
|
91
91
|
# print('state_tb',state_tb.nrows(),state_tb)
|
|
92
|
-
par_df["
|
|
92
|
+
par_df["OBS_MODE"] = np.array(state_tb.getcol("OBS_MODE"))[
|
|
93
|
+
par_df["STATE_ID"]
|
|
94
|
+
]
|
|
93
95
|
par_df["SUB_SCAN_NUMBER"] = state_tb.getcol("SUB_SCAN")[par_df["STATE_ID"]]
|
|
94
96
|
else:
|
|
95
97
|
par_df.drop(["STATE_ID"], axis=1)
|
|
@@ -114,7 +116,7 @@ def create_partitions(in_file: str, partition_scheme: Union[str, list], vla_otf=
|
|
|
114
116
|
"SCAN_NUMBER",
|
|
115
117
|
"STATE_ID",
|
|
116
118
|
"SOURCE_ID",
|
|
117
|
-
"
|
|
119
|
+
"OBS_MODE",
|
|
118
120
|
"SUB_SCAN_NUMBER",
|
|
119
121
|
]
|
|
120
122
|
for idx, pair in enumerated_partitions:
|
|
@@ -11,7 +11,7 @@ from xradio.vis._vis_utils._ms.conversion import convert_and_write_partition
|
|
|
11
11
|
def convert_msv2_to_processing_set(
|
|
12
12
|
in_file: str,
|
|
13
13
|
out_file: str,
|
|
14
|
-
partition_scheme:
|
|
14
|
+
partition_scheme: list = ["FIELD_ID"],
|
|
15
15
|
main_chunksize: Union[Dict, float, None] = None,
|
|
16
16
|
with_pointing: bool = True,
|
|
17
17
|
pointing_chunksize: Union[Dict, float, None] = None,
|
|
@@ -30,10 +30,11 @@ def convert_msv2_to_processing_set(
|
|
|
30
30
|
Input MS name.
|
|
31
31
|
out_file : str
|
|
32
32
|
Output PS name.
|
|
33
|
-
partition_scheme :
|
|
34
|
-
A MS v4 can only contain a single spectral window
|
|
35
|
-
|
|
36
|
-
|
|
33
|
+
partition_scheme : list, optional
|
|
34
|
+
A MS v4 can only contain a single data description (spectral window and polarization setup), and observation mode. Consequently, the MS v2 is partitioned when converting to MS v4.
|
|
35
|
+
In addition to data description and polarization setup a finer partitioning is possible by specifying a list of partitioning keys. Any combination of the following keys are possible:
|
|
36
|
+
"FIELD_ID", "SCAN_NUMBER", "STATE_ID", "SOURCE_ID", "SUB_SCAN_NUMBER". For mosaics where the phase center is rapidly changing (such as VLA on the fly mosaics)
|
|
37
|
+
partition_scheme should be set to an empty list []. By default, ["FIELD_ID"].
|
|
37
38
|
main_chunksize : Union[Dict, float, None], optional
|
|
38
39
|
Defines the chunk size of the main dataset. If given as a dictionary, defines the sizes of several dimensions, and acceptable keys are "time", "baseline_id", "antenna_id", "frequency", "polarization". If given as a float, gives the size of a chunk in GiB. By default, None.
|
|
39
40
|
with_pointing : bool, optional
|
|
@@ -56,11 +57,12 @@ def convert_msv2_to_processing_set(
|
|
|
56
57
|
|
|
57
58
|
partitions = create_partitions(in_file, partition_scheme=partition_scheme)
|
|
58
59
|
logger.info("Number of partitions: " + str(len(partitions)))
|
|
59
|
-
|
|
60
60
|
delayed_list = []
|
|
61
|
-
|
|
62
|
-
for partition_info in partitions:
|
|
63
|
-
|
|
61
|
+
|
|
62
|
+
for ms_v4_id, partition_info in enumerate(partitions):
|
|
63
|
+
# print(ms_v4_id,len(partition_info['FIELD_ID']))
|
|
64
|
+
|
|
65
|
+
logger.info(
|
|
64
66
|
"DDI "
|
|
65
67
|
+ str(partition_info["DATA_DESC_ID"])
|
|
66
68
|
+ ", STATE "
|
|
@@ -103,70 +105,6 @@ def convert_msv2_to_processing_set(
|
|
|
103
105
|
compressor=compressor,
|
|
104
106
|
overwrite=overwrite,
|
|
105
107
|
)
|
|
106
|
-
ms_v4_id = ms_v4_id + 1
|
|
107
108
|
|
|
108
109
|
if parallel:
|
|
109
110
|
dask.compute(delayed_list)
|
|
110
|
-
|
|
111
|
-
# delayed_list = []
|
|
112
|
-
# ms_v4_id = 0
|
|
113
|
-
# for idx, pair in partition_enumerated_product:
|
|
114
|
-
# ddi, state_id, field_id, scan_id = pair
|
|
115
|
-
# # logger.debug(
|
|
116
|
-
# # "DDI " + str(ddi) + ", STATE " + str(state_id) + ", FIELD " + str(field_id) + ", SCAN " + str(scan_id)
|
|
117
|
-
# # )
|
|
118
|
-
|
|
119
|
-
# # if scan_id == 67: #67
|
|
120
|
-
# # logger.debug(
|
|
121
|
-
# # "DDI " + str(ddi) + ", STATE " + str(state_id) + ", FIELD " + str(field_id) + ", SCAN " + str(scan_id)
|
|
122
|
-
# # )
|
|
123
|
-
# if partition_scheme == "ddi_intent_field":
|
|
124
|
-
# intent = intents[idx[1]]
|
|
125
|
-
# else:
|
|
126
|
-
# intent = intents[idx[1]] + "_" + str(state_id)
|
|
127
|
-
|
|
128
|
-
# if parallel:
|
|
129
|
-
# delayed_list.append(
|
|
130
|
-
# dask.delayed(convert_and_write_partition)(
|
|
131
|
-
# in_file,
|
|
132
|
-
# out_file,
|
|
133
|
-
# intent,
|
|
134
|
-
# ms_v4_id,
|
|
135
|
-
# ddi,
|
|
136
|
-
# state_id,
|
|
137
|
-
# field_id,
|
|
138
|
-
# scan_id,
|
|
139
|
-
# partition_scheme,
|
|
140
|
-
# main_chunksize=main_chunksize,
|
|
141
|
-
# with_pointing=with_pointing,
|
|
142
|
-
# pointing_chunksize=pointing_chunksize,
|
|
143
|
-
# pointing_interpolate=pointing_interpolate,
|
|
144
|
-
# ephemeris_interpolate=ephemeris_interpolate,
|
|
145
|
-
# compressor=compressor,
|
|
146
|
-
# overwrite=overwrite,
|
|
147
|
-
# )
|
|
148
|
-
# )
|
|
149
|
-
# else:
|
|
150
|
-
# convert_and_write_partition(
|
|
151
|
-
# in_file,
|
|
152
|
-
# out_file,
|
|
153
|
-
# intent,
|
|
154
|
-
# ms_v4_id,
|
|
155
|
-
# ddi,
|
|
156
|
-
# state_id,
|
|
157
|
-
# field_id,
|
|
158
|
-
# scan_id,
|
|
159
|
-
# partition_scheme,
|
|
160
|
-
# main_chunksize=main_chunksize,
|
|
161
|
-
# with_pointing=with_pointing,
|
|
162
|
-
# pointing_chunksize=pointing_chunksize,
|
|
163
|
-
# pointing_interpolate=pointing_interpolate,
|
|
164
|
-
# ephemeris_interpolate=ephemeris_interpolate,
|
|
165
|
-
# compressor=compressor,
|
|
166
|
-
# storage_backend=storage_backend,
|
|
167
|
-
# overwrite=overwrite,
|
|
168
|
-
# )
|
|
169
|
-
# ms_v4_id = ms_v4_id + 1
|
|
170
|
-
|
|
171
|
-
# if parallel:
|
|
172
|
-
# dask.compute(delayed_list)
|
|
@@ -8,7 +8,7 @@ import s3fs
|
|
|
8
8
|
|
|
9
9
|
def read_processing_set(
|
|
10
10
|
ps_store: str,
|
|
11
|
-
|
|
11
|
+
obs_modes: list = None,
|
|
12
12
|
) -> processing_set:
|
|
13
13
|
"""Creates a lazy representation of a Processing Set (only meta-data is loaded into memory).
|
|
14
14
|
|
|
@@ -16,9 +16,9 @@ def read_processing_set(
|
|
|
16
16
|
----------
|
|
17
17
|
ps_store : str
|
|
18
18
|
String of the path and name of the processing set. For example '/users/user_1/uid___A002_Xf07bba_Xbe5c_target.lsrk.vis.zarr'.
|
|
19
|
-
|
|
20
|
-
A list of
|
|
21
|
-
By default None, which will read all
|
|
19
|
+
obs_modes : list, optional
|
|
20
|
+
A list of obs_mode to be read for example ['OBSERVE_TARGET#ON_SOURCE']. The obs_mode in a processing set can be seem by calling processing_set.summary().
|
|
21
|
+
By default None, which will read all obs_mode.
|
|
22
22
|
|
|
23
23
|
Returns
|
|
24
24
|
-------
|
|
@@ -37,7 +37,9 @@ def read_processing_set(
|
|
|
37
37
|
xds = _open_dataset(ms_main_store, file_system)
|
|
38
38
|
data_groups = xds.attrs["data_groups"]
|
|
39
39
|
|
|
40
|
-
if (
|
|
40
|
+
if (obs_modes is None) or (
|
|
41
|
+
xds.attrs["partition_info"]["obs_mode"] in obs_modes
|
|
42
|
+
):
|
|
41
43
|
sub_xds_dict, field_and_source_xds_dict = _read_sub_xds(
|
|
42
44
|
ms_store, file_system=file_system, data_groups=data_groups
|
|
43
45
|
)
|