xradio 0.0.31__py3-none-any.whl → 0.0.34__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- xradio/_utils/list_and_array.py +5 -3
- xradio/vis/__init__.py +3 -5
- xradio/vis/_processing_set.py +3 -3
- xradio/vis/_vis_utils/_ms/_tables/load_main_table.py +4 -4
- xradio/vis/_vis_utils/_ms/_tables/read.py +57 -41
- xradio/vis/_vis_utils/_ms/_tables/read_main_table.py +17 -18
- xradio/vis/_vis_utils/_ms/_tables/read_subtables.py +5 -5
- xradio/vis/_vis_utils/_ms/_tables/write.py +2 -4
- xradio/vis/_vis_utils/_ms/_tables/write_exp_api.py +19 -13
- xradio/vis/_vis_utils/_ms/chunks.py +5 -72
- xradio/vis/_vis_utils/_ms/conversion.py +238 -55
- xradio/vis/_vis_utils/_ms/{_tables/create_field_and_source_xds.py → create_field_and_source_xds.py} +114 -85
- xradio/vis/_vis_utils/_ms/descr.py +8 -8
- xradio/vis/_vis_utils/_ms/msv4_sub_xdss.py +249 -77
- xradio/vis/_vis_utils/_ms/partition_queries.py +19 -185
- xradio/vis/_vis_utils/_ms/partitions.py +18 -22
- xradio/vis/_vis_utils/_ms/subtables.py +2 -2
- xradio/vis/_vis_utils/_utils/partition_attrs.py +2 -2
- xradio/vis/_vis_utils/_utils/xds_helper.py +12 -12
- xradio/vis/_vis_utils/ms.py +1 -43
- xradio/vis/_vis_utils/zarr.py +0 -1
- xradio/vis/convert_msv2_to_processing_set.py +8 -1
- xradio/vis/load_processing_set.py +0 -3
- xradio/vis/read_processing_set.py +2 -2
- {xradio-0.0.31.dist-info → xradio-0.0.34.dist-info}/METADATA +1 -1
- {xradio-0.0.31.dist-info → xradio-0.0.34.dist-info}/RECORD +29 -31
- {xradio-0.0.31.dist-info → xradio-0.0.34.dist-info}/WHEEL +1 -1
- xradio/vis/_vis_utils/ms_column_descriptions_dicts.py +0 -1360
- xradio/vis/vis_io.py +0 -146
- {xradio-0.0.31.dist-info → xradio-0.0.34.dist-info}/LICENSE.txt +0 -0
- {xradio-0.0.31.dist-info → xradio-0.0.34.dist-info}/top_level.txt +0 -0
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
import itertools
|
|
2
2
|
import graphviper.utils.logger as logger
|
|
3
|
-
import numbers
|
|
4
3
|
from pathlib import Path
|
|
5
4
|
from typing import Dict, List, Tuple, Union
|
|
6
5
|
|
|
@@ -10,8 +9,6 @@ import xarray as xr
|
|
|
10
9
|
from casacore import tables
|
|
11
10
|
|
|
12
11
|
from ._tables.table_query import open_table_ro, open_query
|
|
13
|
-
from ._tables.read import read_generic_table
|
|
14
|
-
from .subtables import subt_rename_ids
|
|
15
12
|
|
|
16
13
|
|
|
17
14
|
def enumerated_product(*args):
|
|
@@ -23,26 +20,29 @@ def enumerated_product(*args):
|
|
|
23
20
|
def create_partitions(in_file: str, partition_scheme: list):
|
|
24
21
|
"""Create a list of dictionaries with the partition information.
|
|
25
22
|
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
23
|
+
Parameters
|
|
24
|
+
----------
|
|
25
|
+
in_file: str
|
|
26
|
+
Input MSv2 file path.
|
|
27
|
+
partition_scheme: list
|
|
28
|
+
A MS v4 can only contain a single data description (spectral window and polarization setup), and observation mode. Consequently, the MS v2 is partitioned when converting to MS v4.
|
|
29
29
|
In addition to data description and polarization setup a finer partitioning is possible by specifying a list of partitioning keys. Any combination of the following keys are possible:
|
|
30
|
-
"FIELD_ID", "SCAN_NUMBER", "STATE_ID", "SOURCE_ID", "SUB_SCAN_NUMBER".
|
|
31
|
-
partition_scheme should be set to an empty list []. By default, ["FIELD_ID"].
|
|
32
|
-
Returns
|
|
33
|
-
|
|
30
|
+
"FIELD_ID", "SCAN_NUMBER", "STATE_ID", "SOURCE_ID", "SUB_SCAN_NUMBER".
|
|
31
|
+
For mosaics where the phase center is rapidly changing (such as VLA on the fly mosaics) partition_scheme should be set to an empty list []. By default, ["FIELD_ID"].
|
|
32
|
+
Returns
|
|
33
|
+
-------
|
|
34
|
+
list
|
|
35
|
+
list of dictionaries with the partition information.
|
|
34
36
|
"""
|
|
35
37
|
# vla_otf (bool, optional): The partioning of VLA OTF (on the fly) mosaics needs a special partitioning scheme. Defaults to False.
|
|
36
38
|
|
|
37
39
|
# Create partition table
|
|
38
40
|
from casacore import tables
|
|
39
41
|
import numpy as np
|
|
40
|
-
import xarray as xr
|
|
41
42
|
import pandas as pd
|
|
42
43
|
import os
|
|
43
|
-
import time
|
|
44
44
|
|
|
45
|
-
partition_scheme = ["DATA_DESC_ID", "OBS_MODE"] + partition_scheme
|
|
45
|
+
partition_scheme = ["DATA_DESC_ID", "OBS_MODE", "OBSERVATION_ID"] + partition_scheme
|
|
46
46
|
|
|
47
47
|
# Open MSv2 tables and add columns to partition table (par_df):
|
|
48
48
|
par_df = pd.DataFrame()
|
|
@@ -53,6 +53,7 @@ def create_partitions(in_file: str, partition_scheme: list):
|
|
|
53
53
|
par_df["FIELD_ID"] = main_tb.getcol("FIELD_ID")
|
|
54
54
|
par_df["SCAN_NUMBER"] = main_tb.getcol("SCAN_NUMBER")
|
|
55
55
|
par_df["STATE_ID"] = main_tb.getcol("STATE_ID")
|
|
56
|
+
par_df["OBSERVATION_ID"] = main_tb.getcol("OBSERVATION_ID")
|
|
56
57
|
par_df = par_df.drop_duplicates()
|
|
57
58
|
|
|
58
59
|
field_tb = tables.table(
|
|
@@ -108,10 +109,13 @@ def create_partitions(in_file: str, partition_scheme: list):
|
|
|
108
109
|
# Make all possible combinations of the partition criteria.
|
|
109
110
|
enumerated_partitions = enumerated_product(*list(partition_criteria.values()))
|
|
110
111
|
|
|
112
|
+
# print('par_df',par_df)
|
|
113
|
+
|
|
111
114
|
# Create a list of dictionaries with the partition information. This will be used to query the MSv2 main table.
|
|
112
115
|
partitions = []
|
|
113
116
|
partition_axis_names = [
|
|
114
117
|
"DATA_DESC_ID",
|
|
118
|
+
"OBSERVATION_ID",
|
|
115
119
|
"FIELD_ID",
|
|
116
120
|
"SCAN_NUMBER",
|
|
117
121
|
"STATE_ID",
|
|
@@ -144,68 +148,8 @@ def create_partitions(in_file: str, partition_scheme: list):
|
|
|
144
148
|
return partitions
|
|
145
149
|
|
|
146
150
|
|
|
147
|
-
# Used by code that will be deprecated at some stage.
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
def make_partition_ids_by_ddi_scan(
|
|
151
|
-
infile: str, do_subscans: bool
|
|
152
|
-
) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
|
|
153
|
-
"""
|
|
154
|
-
Produces arrays of per-partition ddi, scan, state_id, for when
|
|
155
|
-
using partiion schemes 'scan' or 'scan/subscan', that is
|
|
156
|
-
partitioning by some variant of (ddi, scan, subscan(state_id))
|
|
157
|
-
|
|
158
|
-
Parameters
|
|
159
|
-
----------
|
|
160
|
-
infile : str
|
|
161
|
-
Path to MS
|
|
162
|
-
do_subscans : bool
|
|
163
|
-
also partitioning by subscan, not only scan
|
|
164
|
-
|
|
165
|
-
Returns
|
|
166
|
-
-------
|
|
167
|
-
Tuple[np.ndarray, np.ndarray, np.ndarray]
|
|
168
|
-
arrays with indices that define every partition
|
|
169
|
-
"""
|
|
170
|
-
try:
|
|
171
|
-
cctable = None
|
|
172
|
-
taql_distinct_states = None
|
|
173
|
-
cctable = tables.table(
|
|
174
|
-
infile, readonly=True, lockoptions={"option": "usernoread"}, ack=False
|
|
175
|
-
)
|
|
176
|
-
if do_subscans:
|
|
177
|
-
taql_distinct_states = (
|
|
178
|
-
"select DISTINCT SCAN_NUMBER, STATE_ID, DATA_DESC_ID from $cctable"
|
|
179
|
-
)
|
|
180
|
-
else:
|
|
181
|
-
taql_distinct_states = (
|
|
182
|
-
"select DISTINCT SCAN_NUMBER, DATA_DESC_ID from $cctable"
|
|
183
|
-
)
|
|
184
|
-
with open_query(cctable, taql_distinct_states) as query_states:
|
|
185
|
-
logger.debug(
|
|
186
|
-
f"Got query, nrows: {query_states.nrows()}, query: {query_states}"
|
|
187
|
-
)
|
|
188
|
-
scan_number = query_states.getcol("SCAN_NUMBER")
|
|
189
|
-
logger.debug(
|
|
190
|
-
f"Got col SCAN_NUMBER (len: {len(scan_number)}): {scan_number}"
|
|
191
|
-
)
|
|
192
|
-
if do_subscans:
|
|
193
|
-
state_id = query_states.getcol("STATE_ID")
|
|
194
|
-
data_desc_id = np.full(len(scan_number), None)
|
|
195
|
-
else:
|
|
196
|
-
state_id = [None] * len(scan_number)
|
|
197
|
-
logger.debug(f"Got col STATE_ID (len: {len(state_id)}): {state_id}")
|
|
198
|
-
data_desc_id = query_states.getcol("DATA_DESC_ID")
|
|
199
|
-
|
|
200
|
-
logger.debug(f"Got col DATA_DESC_ID (len: {len(data_desc_id)}): {data_desc_id}")
|
|
201
|
-
logger.debug(
|
|
202
|
-
f"Len of DISTINCT SCAN_NUMBER,etc.: {len(scan_number)}. Will generate that number of partitions"
|
|
203
|
-
)
|
|
204
|
-
finally:
|
|
205
|
-
if cctable:
|
|
206
|
-
cctable.close()
|
|
207
|
-
|
|
208
|
-
return data_desc_id, scan_number, state_id
|
|
151
|
+
# Used by code that will be deprecated at some stage. See #192
|
|
152
|
+
# Still need to clarify what to do about intent string filtering ('WVR', etc.)
|
|
209
153
|
|
|
210
154
|
|
|
211
155
|
def make_partition_ids_by_ddi_intent(
|
|
@@ -458,113 +402,3 @@ def partition_when_empty_state(
|
|
|
458
402
|
main_table.close()
|
|
459
403
|
|
|
460
404
|
return distinct_ddis, [None] * nparts, [None] * nparts, [""] * nparts
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
def create_taql_query_and_file_name(out_file, intent, state_ids, field_id, ddi):
|
|
464
|
-
file_name = (
|
|
465
|
-
out_file
|
|
466
|
-
+ "/"
|
|
467
|
-
+ out_file.replace(".vis.zarr", "").split("/")[-1]
|
|
468
|
-
+ "_ddi_"
|
|
469
|
-
+ str(ddi)
|
|
470
|
-
+ "_intent_"
|
|
471
|
-
+ intent
|
|
472
|
-
)
|
|
473
|
-
|
|
474
|
-
taql_where = f"where (DATA_DESC_ID = {ddi})"
|
|
475
|
-
|
|
476
|
-
if isinstance(state_ids, numbers.Integral):
|
|
477
|
-
taql_where += f" AND (STATE_ID = {state_ids})"
|
|
478
|
-
elif state_ids is not None:
|
|
479
|
-
state_ids_or = " OR STATE_ID = ".join(np.char.mod("%d", state_ids))
|
|
480
|
-
taql_where += f" AND (STATE_ID = {state_ids_or})"
|
|
481
|
-
|
|
482
|
-
if field_id is not None:
|
|
483
|
-
taql_where += f" AND (FIELD_ID = {field_id})"
|
|
484
|
-
file_name = file_name + "_field_id_" + str(field_id)
|
|
485
|
-
|
|
486
|
-
return taql_where, file_name
|
|
487
|
-
|
|
488
|
-
|
|
489
|
-
def get_unqiue_intents(in_file):
|
|
490
|
-
"""
|
|
491
|
-
_summary_
|
|
492
|
-
|
|
493
|
-
Parameters
|
|
494
|
-
----------
|
|
495
|
-
in_file : str
|
|
496
|
-
_description_
|
|
497
|
-
|
|
498
|
-
Returns
|
|
499
|
-
-------
|
|
500
|
-
_type_
|
|
501
|
-
_description_
|
|
502
|
-
"""
|
|
503
|
-
state_xds = read_generic_table(
|
|
504
|
-
in_file,
|
|
505
|
-
"STATE",
|
|
506
|
-
rename_ids=subt_rename_ids["STATE"],
|
|
507
|
-
)
|
|
508
|
-
|
|
509
|
-
if len(state_xds.data_vars) > 0:
|
|
510
|
-
obs_mode_dict = {}
|
|
511
|
-
for i, obs_mode in enumerate(state_xds.obs_mode.values):
|
|
512
|
-
if obs_mode in obs_mode_dict:
|
|
513
|
-
obs_mode_dict[obs_mode].append(i)
|
|
514
|
-
else:
|
|
515
|
-
obs_mode_dict[obs_mode] = [i]
|
|
516
|
-
return list(obs_mode_dict.keys()), list(obs_mode_dict.values())
|
|
517
|
-
else: # empty state table
|
|
518
|
-
return ["None"], [None]
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
def enumerated_product(*args):
|
|
522
|
-
yield from zip(
|
|
523
|
-
itertools.product(*(range(len(x)) for x in args)), itertools.product(*args)
|
|
524
|
-
)
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
def create_partition_enumerated_product(in_file: str, partition_scheme: str):
|
|
528
|
-
"""
|
|
529
|
-
Creates an enumerated_product of the data_desc_ids, state_ids, field_ids in a MS v2 that define the partions in a processing set.
|
|
530
|
-
|
|
531
|
-
Parameters
|
|
532
|
-
----------
|
|
533
|
-
in_file : str
|
|
534
|
-
_description_
|
|
535
|
-
partition_scheme : str
|
|
536
|
-
_description_
|
|
537
|
-
|
|
538
|
-
Returns
|
|
539
|
-
-------
|
|
540
|
-
_type_
|
|
541
|
-
_description_
|
|
542
|
-
"""
|
|
543
|
-
# Unused?
|
|
544
|
-
# spw_xds = read_generic_table(
|
|
545
|
-
# in_file,
|
|
546
|
-
# "SPECTRAL_WINDOW",
|
|
547
|
-
# rename_ids=subt_rename_ids["SPECTRAL_WINDOW"],
|
|
548
|
-
# )
|
|
549
|
-
|
|
550
|
-
# TODO: probably get this via query to subtable instead of read_generic_table, we just
|
|
551
|
-
# need the row numbers
|
|
552
|
-
ddi_xds = read_generic_table(in_file, "DATA_DESCRIPTION")
|
|
553
|
-
data_desc_ids = np.arange(ddi_xds.sizes["row"])
|
|
554
|
-
state_xds = read_generic_table(in_file, "STATE")
|
|
555
|
-
|
|
556
|
-
if (partition_scheme == "ddi_intent_field") and (len(state_xds.data_vars) > 0):
|
|
557
|
-
intents, state_ids = get_unqiue_intents(in_file)
|
|
558
|
-
field_ids = np.arange(read_generic_table(in_file, "FIELD").sizes["row"])
|
|
559
|
-
else: # partition_scheme == "ddi_state_field"
|
|
560
|
-
if len(state_xds.data_vars) > 0:
|
|
561
|
-
state_ids = [np.arange(state_xds.sizes["row"])]
|
|
562
|
-
intents = state_xds.obs_mode.values
|
|
563
|
-
else: # empty state table
|
|
564
|
-
state_ids = [None]
|
|
565
|
-
intents = ["None"]
|
|
566
|
-
# print(state_xds, intents)
|
|
567
|
-
# field_ids = [None]
|
|
568
|
-
field_ids = np.arange(read_generic_table(in_file, "FIELD").sizes["row"])
|
|
569
|
-
|
|
570
|
-
return enumerated_product(data_desc_ids, state_ids, field_ids), intents
|
|
@@ -7,11 +7,10 @@ import xarray as xr
|
|
|
7
7
|
from .msv2_msv3 import ignore_msv2_cols
|
|
8
8
|
from .partition_queries import (
|
|
9
9
|
make_partition_ids_by_ddi_intent,
|
|
10
|
-
make_partition_ids_by_ddi_scan,
|
|
11
10
|
)
|
|
12
11
|
from .subtables import subt_rename_ids, add_pointing_to_partition
|
|
13
12
|
from .descr import describe_ms
|
|
14
|
-
from ._tables.read import
|
|
13
|
+
from ._tables.read import load_generic_table, make_freq_attrs
|
|
15
14
|
from ._tables.read_main_table import read_flat_main_table, read_expanded_main_table
|
|
16
15
|
from .._utils.partition_attrs import add_partition_attrs
|
|
17
16
|
from .._utils.xds_helper import expand_xds, make_coords, optimal_chunking
|
|
@@ -23,8 +22,8 @@ VisSetPartitions = Dict[PartitionKey, xr.Dataset]
|
|
|
23
22
|
|
|
24
23
|
|
|
25
24
|
def make_spw_names_by_ddi(ddi_xds: xr.Dataset, spw_xds: xr.Dataset) -> Dict[int, str]:
|
|
26
|
-
spw_ids_by_ddi = ddi_xds.
|
|
27
|
-
spw_names = spw_xds.
|
|
25
|
+
spw_ids_by_ddi = ddi_xds.SPECTRAL_WINDOW_ID[ddi_xds.row].values
|
|
26
|
+
spw_names = spw_xds.NAME[spw_ids_by_ddi].values
|
|
28
27
|
return {ddi: spw_names[ddi] for ddi in np.arange(0, len(spw_names))}
|
|
29
28
|
|
|
30
29
|
|
|
@@ -138,12 +137,12 @@ def read_ms_scan_subscan_partitions(
|
|
|
138
137
|
subtables already read
|
|
139
138
|
"""
|
|
140
139
|
|
|
141
|
-
spw_xds =
|
|
140
|
+
spw_xds = load_generic_table(
|
|
142
141
|
infile,
|
|
143
142
|
"SPECTRAL_WINDOW",
|
|
144
143
|
rename_ids=subt_rename_ids["SPECTRAL_WINDOW"],
|
|
145
144
|
)
|
|
146
|
-
ddi_xds =
|
|
145
|
+
ddi_xds = load_generic_table(infile, "DATA_DESCRIPTION")
|
|
147
146
|
|
|
148
147
|
if partition_scheme == "intent":
|
|
149
148
|
spw_names_by_ddi = make_spw_names_by_ddi(ddi_xds, spw_xds)
|
|
@@ -154,15 +153,12 @@ def read_ms_scan_subscan_partitions(
|
|
|
154
153
|
distinct_intents,
|
|
155
154
|
) = make_partition_ids_by_ddi_intent(infile, spw_names_by_ddi)
|
|
156
155
|
else:
|
|
157
|
-
|
|
158
|
-
data_desc_id, scan_number, state_id = make_partition_ids_by_ddi_scan(
|
|
159
|
-
infile, do_subscans
|
|
160
|
-
)
|
|
156
|
+
raise ValueError("foo")
|
|
161
157
|
|
|
162
|
-
ant_xds =
|
|
158
|
+
ant_xds = load_generic_table(
|
|
163
159
|
infile, "ANTENNA", rename_ids=subt_rename_ids["ANTENNA"]
|
|
164
160
|
)
|
|
165
|
-
pol_xds =
|
|
161
|
+
pol_xds = load_generic_table(
|
|
166
162
|
infile, "POLARIZATION", rename_ids=subt_rename_ids["POLARIZATION"]
|
|
167
163
|
)
|
|
168
164
|
|
|
@@ -263,25 +259,25 @@ def read_ms_ddi_partitions(
|
|
|
263
259
|
"""
|
|
264
260
|
# we need the antenna, spectral window, polarization, and data description tables
|
|
265
261
|
# to define the (sub)datasets (their dims and coords) and to process the main table
|
|
266
|
-
ant_xds =
|
|
262
|
+
ant_xds = load_generic_table(
|
|
267
263
|
infile, "ANTENNA", rename_ids=subt_rename_ids["ANTENNA"]
|
|
268
264
|
)
|
|
269
|
-
spw_xds =
|
|
265
|
+
spw_xds = load_generic_table(
|
|
270
266
|
infile,
|
|
271
267
|
"SPECTRAL_WINDOW",
|
|
272
268
|
rename_ids=subt_rename_ids["SPECTRAL_WINDOW"],
|
|
273
269
|
)
|
|
274
|
-
pol_xds =
|
|
270
|
+
pol_xds = load_generic_table(
|
|
275
271
|
infile, "POLARIZATION", rename_ids=subt_rename_ids["POLARIZATION"]
|
|
276
272
|
)
|
|
277
|
-
ddi_xds =
|
|
273
|
+
ddi_xds = load_generic_table(infile, "DATA_DESCRIPTION")
|
|
278
274
|
|
|
279
275
|
# each DATA_DESC_ID (ddi) is a fixed shape that may differ from others
|
|
280
276
|
# form a list of ddis to process, each will be placed it in its own xarray dataset and partition
|
|
281
277
|
ddis = np.arange(ddi_xds.row.shape[0]) if rowmap is None else list(rowmap.keys())
|
|
282
278
|
|
|
283
279
|
# figure out the chunking for each DDI, either one fixed shape or an auto-computed one
|
|
284
|
-
if type(chunks)
|
|
280
|
+
if type(chunks) is not tuple:
|
|
285
281
|
mshape = describe_ms(infile, mode="flat", rowmap=rowmap)
|
|
286
282
|
chunks = dict(
|
|
287
283
|
[
|
|
@@ -307,7 +303,7 @@ def read_ms_ddi_partitions(
|
|
|
307
303
|
continue
|
|
308
304
|
logger.debug(
|
|
309
305
|
"reading DDI %i with chunking %s..."
|
|
310
|
-
% (ddi, str(chunks[ddi] if type(chunks)
|
|
306
|
+
% (ddi, str(chunks[ddi] if type(chunks) is dict else chunks))
|
|
311
307
|
)
|
|
312
308
|
|
|
313
309
|
# experimenting, comparing overheads of expanded vs. flat
|
|
@@ -321,7 +317,7 @@ def read_ms_ddi_partitions(
|
|
|
321
317
|
infile,
|
|
322
318
|
ddi,
|
|
323
319
|
rowidxs=rowidxs,
|
|
324
|
-
chunks=chunks[ddi] if type(chunks)
|
|
320
|
+
chunks=chunks[ddi] if type(chunks) is dict else chunks,
|
|
325
321
|
ignore_msv2_cols=ignore_msv2_cols,
|
|
326
322
|
)
|
|
327
323
|
if len(xds.sizes) == 0:
|
|
@@ -339,9 +335,9 @@ def read_ms_ddi_partitions(
|
|
|
339
335
|
# filter by channel selection
|
|
340
336
|
if (chanidxs is not None) and (len(chanidxs) < len(xds.chan)):
|
|
341
337
|
xds = xds.isel(chan=chanidxs)
|
|
342
|
-
spw_xds["
|
|
343
|
-
ddi_xds.
|
|
344
|
-
] = spw_xds.
|
|
338
|
+
spw_xds["CHAN_FREQ"][
|
|
339
|
+
ddi_xds.SPECTRAL_WINDOW_ID.values[ddi], : len(chanidxs)
|
|
340
|
+
] = spw_xds.CHAN_FREQ[ddi_xds.SPECTRAL_WINDOW_ID.values[ddi], chanidxs]
|
|
345
341
|
|
|
346
342
|
# expand the row dimension out to (time, baseline)
|
|
347
343
|
if not expanded and expand:
|
|
@@ -7,7 +7,7 @@ from typing import Dict, List
|
|
|
7
7
|
|
|
8
8
|
import xarray as xr
|
|
9
9
|
|
|
10
|
-
from ._tables.read import
|
|
10
|
+
from ._tables.read import load_generic_table, table_exists
|
|
11
11
|
from ._tables.read_subtables import read_ephemerides, read_delayed_pointing_table
|
|
12
12
|
|
|
13
13
|
|
|
@@ -75,7 +75,7 @@ def read_ms_subtables(
|
|
|
75
75
|
rename_ids = {"dim_2": "n_polynomial", "dim_3": "dir"}
|
|
76
76
|
xds = read_delayed_pointing_table(str(subt_path), rename_ids=rename_ids)
|
|
77
77
|
else:
|
|
78
|
-
xds =
|
|
78
|
+
xds = load_generic_table(
|
|
79
79
|
infile,
|
|
80
80
|
subt_name,
|
|
81
81
|
timecols=["TIME"],
|
|
@@ -73,8 +73,8 @@ def init_partition_ids(
|
|
|
73
73
|
ddi_xds: xr.Dataset,
|
|
74
74
|
part_ids: PartitionIds,
|
|
75
75
|
) -> PartitionIds:
|
|
76
|
-
spw_id = ddi_xds.
|
|
77
|
-
pol_setup_id = ddi_xds.
|
|
76
|
+
spw_id = ddi_xds.SPECTRAL_WINDOW_ID.values[ddi]
|
|
77
|
+
pol_setup_id = ddi_xds.POLARIZATION_ID.values[ddi]
|
|
78
78
|
ids: PartitionIds = {
|
|
79
79
|
# The -1 are expected to be be updated from part_ids
|
|
80
80
|
"array_id": -1,
|
|
@@ -35,11 +35,11 @@ def make_coords(
|
|
|
35
35
|
Dict[str, np.ndarray]
|
|
36
36
|
"""
|
|
37
37
|
ant_xds, ddi_xds, spw_xds, pol_xds = subtables
|
|
38
|
-
freq = spw_xds.
|
|
39
|
-
ddi_xds.
|
|
38
|
+
freq = spw_xds.CHAN_FREQ.values[
|
|
39
|
+
ddi_xds.SPECTRAL_WINDOW_ID.values[ddi], : xds.freq.shape[0]
|
|
40
40
|
]
|
|
41
|
-
pol_ids = pol_xds.
|
|
42
|
-
ddi_xds.
|
|
41
|
+
pol_ids = pol_xds.CORR_TYPE.values[
|
|
42
|
+
ddi_xds.POLARIZATION_ID.values[ddi], : xds.pol.shape[0]
|
|
43
43
|
]
|
|
44
44
|
pol_names = np.vectorize(stokes_types.get)(pol_ids)
|
|
45
45
|
ant_id = ant_xds.antenna_id.values
|
|
@@ -125,31 +125,31 @@ def make_global_coords(mxds: xr.Dataset) -> Dict[str, xr.DataArray]:
|
|
|
125
125
|
if "antenna" in metainfo:
|
|
126
126
|
coords["antenna_ids"] = metainfo["antenna"].antenna_id.values
|
|
127
127
|
coords["antennas"] = xr.DataArray(
|
|
128
|
-
metainfo["antenna"].
|
|
128
|
+
metainfo["antenna"].NAME.values, dims=["antenna_ids"]
|
|
129
129
|
)
|
|
130
130
|
if "field" in metainfo:
|
|
131
131
|
coords["field_ids"] = metainfo["field"].field_id.values
|
|
132
132
|
coords["fields"] = xr.DataArray(
|
|
133
|
-
metainfo["field"].
|
|
133
|
+
metainfo["field"].NAME.values, dims=["field_ids"]
|
|
134
134
|
)
|
|
135
135
|
if "feed" in mxds.attrs:
|
|
136
|
-
coords["feed_ids"] = metainfo["feed"].
|
|
136
|
+
coords["feed_ids"] = metainfo["feed"].FEED_ID.values
|
|
137
137
|
if "observation" in metainfo:
|
|
138
138
|
coords["observation_ids"] = metainfo["observation"].observation_id.values
|
|
139
139
|
coords["observations"] = xr.DataArray(
|
|
140
|
-
metainfo["observation"].
|
|
140
|
+
metainfo["observation"].PROJECT.values, dims=["observation_ids"]
|
|
141
141
|
)
|
|
142
142
|
if "polarization" in metainfo:
|
|
143
143
|
coords["polarization_ids"] = metainfo["polarization"].pol_setup_id.values
|
|
144
144
|
if "source" in metainfo:
|
|
145
|
-
coords["source_ids"] = metainfo["source"].
|
|
145
|
+
coords["source_ids"] = metainfo["source"].SOURCE_ID.values
|
|
146
146
|
coords["sources"] = xr.DataArray(
|
|
147
|
-
metainfo["source"].
|
|
147
|
+
metainfo["source"].NAME.values, dims=["source_ids"]
|
|
148
148
|
)
|
|
149
149
|
if "spectral_window" in metainfo:
|
|
150
150
|
coords["spw_ids"] = metainfo["spectral_window"].spw_id.values
|
|
151
151
|
if "state" in metainfo:
|
|
152
|
-
coords["state_ids"] = metainfo["state"].
|
|
152
|
+
coords["state_ids"] = metainfo["state"].STATE_ID.values
|
|
153
153
|
|
|
154
154
|
return coords
|
|
155
155
|
|
|
@@ -225,7 +225,7 @@ def flatten_xds(xds: xr.Dataset) -> xr.Dataset:
|
|
|
225
225
|
# compute for issue https://github.com/hainegroup/oceanspy/issues/332
|
|
226
226
|
# drop=True silently does compute (or at least used to)
|
|
227
227
|
txds = txds.where(
|
|
228
|
-
((txds.
|
|
228
|
+
((txds.STATE_ID != nan_int) & (txds.FIELD_ID != nan_int)).compute(),
|
|
229
229
|
drop=True,
|
|
230
230
|
) # .unify_chunks()
|
|
231
231
|
|
xradio/vis/_vis_utils/ms.py
CHANGED
|
@@ -1,11 +1,8 @@
|
|
|
1
1
|
import os
|
|
2
2
|
import graphviper.utils.logger as logger
|
|
3
|
-
from typing import
|
|
4
|
-
|
|
5
|
-
import xarray as xr
|
|
3
|
+
from typing import List, Tuple, Union
|
|
6
4
|
|
|
7
5
|
from ._utils.cds import CASAVisSet
|
|
8
|
-
from ._ms.chunks import load_main_chunk
|
|
9
6
|
from ._ms.partitions import (
|
|
10
7
|
finalize_partitions,
|
|
11
8
|
read_ms_ddi_partitions,
|
|
@@ -107,42 +104,3 @@ def read_ms(
|
|
|
107
104
|
# build the visibilities container (metainfo + partitions) to return
|
|
108
105
|
cds = vis_xds_packager_cds(subts, parts, "read_ms")
|
|
109
106
|
return cds
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
def load_vis_chunk(
|
|
113
|
-
infile: str,
|
|
114
|
-
block_des: Dict[str, slice],
|
|
115
|
-
partition_key: Tuple[int, int, str],
|
|
116
|
-
) -> Dict[Tuple[int, int], xr.Dataset]:
|
|
117
|
-
"""
|
|
118
|
-
Read a chunk of a MeasurementSet (MSv2 format) into an Xarray
|
|
119
|
-
dataset, loading the data in memory.
|
|
120
|
-
|
|
121
|
-
Parameters
|
|
122
|
-
----------
|
|
123
|
-
infile : str
|
|
124
|
-
Input MS filename
|
|
125
|
-
block_des : Dict[str, slice]
|
|
126
|
-
specification of chunk to load
|
|
127
|
-
partition_key: partition_key: Tuple[int, int, str]
|
|
128
|
-
|
|
129
|
-
Returns
|
|
130
|
-
-------
|
|
131
|
-
Dict[Tuple[int, int], xr.Dataset]
|
|
132
|
-
Xarray datasets with chunk of visibility data, one per DDI
|
|
133
|
-
(spw_id, pol_setup_id pair)
|
|
134
|
-
"""
|
|
135
|
-
infile = os.path.expanduser(infile)
|
|
136
|
-
|
|
137
|
-
logger.info(f"Loading from {infile} as MSv2 a chunk of data into memory")
|
|
138
|
-
|
|
139
|
-
if not os.path.isdir(infile):
|
|
140
|
-
raise ValueError(f"invalid input filename to read_ms {infile}")
|
|
141
|
-
|
|
142
|
-
orig_chunk_to_improve = load_main_chunk(infile, block_des)
|
|
143
|
-
res = vis_xds_packager_cds(
|
|
144
|
-
subtables={},
|
|
145
|
-
partitions={partition_key: orig_chunk_to_improve},
|
|
146
|
-
descr_add="load_vis_block",
|
|
147
|
-
)
|
|
148
|
-
return res
|
xradio/vis/_vis_utils/zarr.py
CHANGED
|
@@ -17,6 +17,7 @@ def convert_msv2_to_processing_set(
|
|
|
17
17
|
pointing_chunksize: Union[Dict, float, None] = None,
|
|
18
18
|
pointing_interpolate: bool = False,
|
|
19
19
|
ephemeris_interpolate: bool = False,
|
|
20
|
+
use_table_iter: bool = False,
|
|
20
21
|
compressor: numcodecs.abc.Codec = numcodecs.Zstd(level=2),
|
|
21
22
|
storage_backend="zarr",
|
|
22
23
|
parallel: bool = False,
|
|
@@ -45,6 +46,8 @@ def convert_msv2_to_processing_set(
|
|
|
45
46
|
Whether to interpolate the time axis of the pointing sub-dataset to the time axis of the main dataset
|
|
46
47
|
ephemeris_interpolate : bool, optional
|
|
47
48
|
Whether to interpolate the time axis of the ephemeris data variables (of the field_and_source sub-dataset) to the time axis of the main dataset
|
|
49
|
+
use_table_iter : bool, optional
|
|
50
|
+
Whether to use the table iterator to read the main table of the MS v2. This should be set to True when reading datasets with large number of rows and few partitions, by default False.
|
|
48
51
|
compressor : numcodecs.abc.Codec, optional
|
|
49
52
|
The Blosc compressor to use when saving the converted data to disk using Zarr, by default numcodecs.Zstd(level=2).
|
|
50
53
|
storage_backend : {"zarr", "netcdf"}, optional
|
|
@@ -63,7 +66,9 @@ def convert_msv2_to_processing_set(
|
|
|
63
66
|
# print(ms_v4_id,len(partition_info['FIELD_ID']))
|
|
64
67
|
|
|
65
68
|
logger.info(
|
|
66
|
-
"
|
|
69
|
+
"OBSERVATION_ID "
|
|
70
|
+
+ str(partition_info["OBSERVATION_ID"])
|
|
71
|
+
+ ", DDI "
|
|
67
72
|
+ str(partition_info["DATA_DESC_ID"])
|
|
68
73
|
+ ", STATE "
|
|
69
74
|
+ str(partition_info["STATE_ID"])
|
|
@@ -80,6 +85,7 @@ def convert_msv2_to_processing_set(
|
|
|
80
85
|
out_file,
|
|
81
86
|
ms_v4_id,
|
|
82
87
|
partition_info=partition_info,
|
|
88
|
+
use_table_iter=use_table_iter,
|
|
83
89
|
partition_scheme=partition_scheme,
|
|
84
90
|
main_chunksize=main_chunksize,
|
|
85
91
|
with_pointing=with_pointing,
|
|
@@ -96,6 +102,7 @@ def convert_msv2_to_processing_set(
|
|
|
96
102
|
out_file,
|
|
97
103
|
ms_v4_id,
|
|
98
104
|
partition_info=partition_info,
|
|
105
|
+
use_table_iter=use_table_iter,
|
|
99
106
|
partition_scheme=partition_scheme,
|
|
100
107
|
main_chunksize=main_chunksize,
|
|
101
108
|
with_pointing=with_pointing,
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import os
|
|
2
|
-
|
|
2
|
+
|
|
3
3
|
from ._processing_set import processing_set
|
|
4
4
|
import graphviper.utils.logger as logger
|
|
5
5
|
from xradio._utils.zarr.common import _open_dataset, _get_ms_stores_and_file_system
|
|
@@ -17,7 +17,7 @@ def read_processing_set(
|
|
|
17
17
|
ps_store : str
|
|
18
18
|
String of the path and name of the processing set. For example '/users/user_1/uid___A002_Xf07bba_Xbe5c_target.lsrk.vis.zarr'.
|
|
19
19
|
obs_modes : list, optional
|
|
20
|
-
A list of obs_mode to be read for example ['OBSERVE_TARGET#ON_SOURCE']. The obs_mode in a processing set can be
|
|
20
|
+
A list of obs_mode to be read for example ['OBSERVE_TARGET#ON_SOURCE']. The obs_mode in a processing set can be seen by calling processing_set.summary().
|
|
21
21
|
By default None, which will read all obs_mode.
|
|
22
22
|
|
|
23
23
|
Returns
|