xradio 0.0.28__py3-none-any.whl → 0.0.29__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- xradio/__init__.py +5 -4
- xradio/_utils/array.py +90 -0
- xradio/_utils/zarr/common.py +48 -3
- xradio/image/_util/zarr.py +4 -1
- xradio/schema/__init__.py +24 -6
- xradio/schema/bases.py +440 -2
- xradio/schema/check.py +96 -55
- xradio/schema/dataclass.py +123 -27
- xradio/schema/metamodel.py +21 -4
- xradio/schema/typing.py +33 -18
- xradio/vis/__init__.py +5 -2
- xradio/vis/_processing_set.py +28 -20
- xradio/vis/_vis_utils/_ms/_tables/create_field_and_source_xds.py +710 -0
- xradio/vis/_vis_utils/_ms/_tables/load.py +23 -10
- xradio/vis/_vis_utils/_ms/_tables/load_main_table.py +145 -64
- xradio/vis/_vis_utils/_ms/_tables/read.py +747 -172
- xradio/vis/_vis_utils/_ms/_tables/read_main_table.py +173 -44
- xradio/vis/_vis_utils/_ms/_tables/read_subtables.py +79 -28
- xradio/vis/_vis_utils/_ms/_tables/write.py +102 -45
- xradio/vis/_vis_utils/_ms/_tables/write_exp_api.py +127 -65
- xradio/vis/_vis_utils/_ms/chunks.py +58 -21
- xradio/vis/_vis_utils/_ms/conversion.py +536 -67
- xradio/vis/_vis_utils/_ms/descr.py +52 -20
- xradio/vis/_vis_utils/_ms/msv2_to_msv4_meta.py +70 -35
- xradio/vis/_vis_utils/_ms/msv4_infos.py +0 -59
- xradio/vis/_vis_utils/_ms/msv4_sub_xdss.py +76 -9
- xradio/vis/_vis_utils/_ms/optimised_functions.py +0 -46
- xradio/vis/_vis_utils/_ms/partition_queries.py +308 -119
- xradio/vis/_vis_utils/_ms/partitions.py +82 -25
- xradio/vis/_vis_utils/_ms/subtables.py +32 -14
- xradio/vis/_vis_utils/_utils/partition_attrs.py +30 -11
- xradio/vis/_vis_utils/_utils/xds_helper.py +136 -45
- xradio/vis/_vis_utils/_zarr/read.py +60 -22
- xradio/vis/_vis_utils/_zarr/write.py +83 -9
- xradio/vis/_vis_utils/ms.py +48 -29
- xradio/vis/_vis_utils/zarr.py +44 -20
- xradio/vis/convert_msv2_to_processing_set.py +106 -32
- xradio/vis/load_processing_set.py +38 -61
- xradio/vis/read_processing_set.py +62 -96
- xradio/vis/schema.py +687 -0
- xradio/vis/vis_io.py +75 -43
- {xradio-0.0.28.dist-info → xradio-0.0.29.dist-info}/LICENSE.txt +6 -1
- {xradio-0.0.28.dist-info → xradio-0.0.29.dist-info}/METADATA +10 -5
- xradio-0.0.29.dist-info/RECORD +73 -0
- {xradio-0.0.28.dist-info → xradio-0.0.29.dist-info}/WHEEL +1 -1
- xradio/vis/model.py +0 -497
- xradio-0.0.28.dist-info/RECORD +0 -71
- {xradio-0.0.28.dist-info → xradio-0.0.29.dist-info}/top_level.txt +0 -0
|
@@ -14,16 +14,156 @@ from ._tables.read import read_generic_table
|
|
|
14
14
|
from .subtables import subt_rename_ids
|
|
15
15
|
|
|
16
16
|
|
|
17
|
+
def enumerated_product(*args):
|
|
18
|
+
yield from zip(
|
|
19
|
+
itertools.product(*(range(len(x)) for x in args)), itertools.product(*args)
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def create_partitions(in_file: str, partition_scheme: Union[str, list], vla_otf=False):
|
|
24
|
+
"""Create a list of dictionaries with the partition information.
|
|
25
|
+
|
|
26
|
+
Args:
|
|
27
|
+
in_file (str): Input MSv2 file path.
|
|
28
|
+
partition_scheme (Union[str, list]): Partition scheme to be used.
|
|
29
|
+
vla_otf (bool, optional): The partioning of VLA OTF (on the fly) mosaics needs a special partitioning scheme. Defaults to False.
|
|
30
|
+
|
|
31
|
+
Returns:
|
|
32
|
+
list: list of dictionaries with the partition information.
|
|
33
|
+
"""
|
|
34
|
+
|
|
35
|
+
# Create partition table
|
|
36
|
+
from casacore import tables
|
|
37
|
+
import numpy as np
|
|
38
|
+
import xarray as xr
|
|
39
|
+
import pandas as pd
|
|
40
|
+
import os
|
|
41
|
+
|
|
42
|
+
if partition_scheme == "ddi_intent_field":
|
|
43
|
+
partition_scheme = ["DATA_DESC_ID", "INTENT", "FIELD_ID"]
|
|
44
|
+
elif partition_scheme == "ddi_intent_scan":
|
|
45
|
+
partition_scheme = ["DATA_DESC_ID", "INTENT", "SCAN_NUMBER"]
|
|
46
|
+
|
|
47
|
+
# Open MSv2 tables and add columns to partition table (par_df):
|
|
48
|
+
par_df = pd.DataFrame()
|
|
49
|
+
main_tb = tables.table(
|
|
50
|
+
in_file, readonly=True, lockoptions={"option": "usernoread"}, ack=False
|
|
51
|
+
)
|
|
52
|
+
par_df["DATA_DESC_ID"] = main_tb.getcol("DATA_DESC_ID")
|
|
53
|
+
par_df["FIELD_ID"] = main_tb.getcol("FIELD_ID")
|
|
54
|
+
par_df["SCAN_NUMBER"] = main_tb.getcol("SCAN_NUMBER")
|
|
55
|
+
par_df["STATE_ID"] = main_tb.getcol("STATE_ID")
|
|
56
|
+
par_df = par_df.drop_duplicates()
|
|
57
|
+
|
|
58
|
+
field_tb = tables.table(
|
|
59
|
+
os.path.join(in_file, "FIELD"),
|
|
60
|
+
readonly=True,
|
|
61
|
+
lockoptions={"option": "usernoread"},
|
|
62
|
+
ack=False,
|
|
63
|
+
)
|
|
64
|
+
if vla_otf:
|
|
65
|
+
par_df["FIELD_NAME"] = np.array(field_tb.getcol("NAME"))[par_df["FIELD_ID"]]
|
|
66
|
+
|
|
67
|
+
# Get source ids if available from source table.
|
|
68
|
+
if os.path.isdir(os.path.join(os.path.join(in_file, "SOURCE"))):
|
|
69
|
+
source_tb = tables.table(
|
|
70
|
+
os.path.join(in_file, "SOURCE"),
|
|
71
|
+
readonly=True,
|
|
72
|
+
lockoptions={"option": "usernoread"},
|
|
73
|
+
ack=False,
|
|
74
|
+
)
|
|
75
|
+
if source_tb.nrows() != 0:
|
|
76
|
+
par_df["SOURCE_ID"] = field_tb.getcol("SOURCE_ID")[par_df["FIELD_ID"]]
|
|
77
|
+
if vla_otf:
|
|
78
|
+
par_df["SOURCE_NAME"] = np.array(source_tb.getcol("NAME"))[
|
|
79
|
+
par_df["SOURCE_ID"]
|
|
80
|
+
]
|
|
81
|
+
|
|
82
|
+
# Get intents and subscan numbers if available from state table.
|
|
83
|
+
if os.path.isdir(os.path.join(in_file, "STATE")):
|
|
84
|
+
state_tb = tables.table(
|
|
85
|
+
os.path.join(in_file, "STATE"),
|
|
86
|
+
readonly=True,
|
|
87
|
+
lockoptions={"option": "usernoread"},
|
|
88
|
+
ack=False,
|
|
89
|
+
)
|
|
90
|
+
if state_tb.nrows() != 0:
|
|
91
|
+
# print('state_tb',state_tb.nrows(),state_tb)
|
|
92
|
+
par_df["INTENT"] = np.array(state_tb.getcol("OBS_MODE"))[par_df["STATE_ID"]]
|
|
93
|
+
par_df["SUB_SCAN_NUMBER"] = state_tb.getcol("SUB_SCAN")[par_df["STATE_ID"]]
|
|
94
|
+
else:
|
|
95
|
+
par_df.drop(["STATE_ID"], axis=1)
|
|
96
|
+
|
|
97
|
+
# Check if all partition scheme criteria are present in the partition table.
|
|
98
|
+
partition_scheme_updated = []
|
|
99
|
+
partition_criteria = {}
|
|
100
|
+
for par in partition_scheme:
|
|
101
|
+
if par in par_df.columns:
|
|
102
|
+
partition_criteria[par] = par_df[par].unique()
|
|
103
|
+
partition_scheme_updated.append(par)
|
|
104
|
+
logger.info(f"Partition scheme that will be used: {partition_scheme_updated}")
|
|
105
|
+
|
|
106
|
+
# Make all possible combinations of the partition criteria.
|
|
107
|
+
enumerated_partitions = enumerated_product(*list(partition_criteria.values()))
|
|
108
|
+
|
|
109
|
+
# Create a list of dictionaries with the partition information. This will be used to query the MSv2 main table.
|
|
110
|
+
partitions = []
|
|
111
|
+
partition_axis_names = [
|
|
112
|
+
"DATA_DESC_ID",
|
|
113
|
+
"FIELD_ID",
|
|
114
|
+
"SCAN_NUMBER",
|
|
115
|
+
"STATE_ID",
|
|
116
|
+
"SOURCE_ID",
|
|
117
|
+
"INTENT",
|
|
118
|
+
"SUB_SCAN_NUMBER",
|
|
119
|
+
]
|
|
120
|
+
for idx, pair in enumerated_partitions:
|
|
121
|
+
query = ""
|
|
122
|
+
for i, par in enumerate(partition_scheme_updated):
|
|
123
|
+
if isinstance(pair[i], str):
|
|
124
|
+
query = query + f'{par} == "{pair[i]}" and '
|
|
125
|
+
else:
|
|
126
|
+
query = query + f"{par} == {pair[i]} and "
|
|
127
|
+
query = query[:-4] # remove last and
|
|
128
|
+
sub_par_df = par_df.query(query).drop_duplicates()
|
|
129
|
+
|
|
130
|
+
if sub_par_df.shape[0] != 0:
|
|
131
|
+
partition_info = {}
|
|
132
|
+
|
|
133
|
+
# FIELD_NAME SOURCE_NAME
|
|
134
|
+
for col_name in partition_axis_names:
|
|
135
|
+
if col_name in sub_par_df.columns:
|
|
136
|
+
partition_info[col_name] = sub_par_df[col_name].unique()
|
|
137
|
+
else:
|
|
138
|
+
partition_info[col_name] = [None]
|
|
139
|
+
|
|
140
|
+
partitions.append(partition_info)
|
|
141
|
+
|
|
142
|
+
return partitions
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
# Used by code that will be deprecated at some stage.
|
|
146
|
+
|
|
147
|
+
|
|
17
148
|
def make_partition_ids_by_ddi_scan(
|
|
18
149
|
infile: str, do_subscans: bool
|
|
19
150
|
) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
|
|
20
|
-
"""
|
|
151
|
+
"""
|
|
152
|
+
Produces arrays of per-partition ddi, scan, state_id, for when
|
|
21
153
|
using partiion schemes 'scan' or 'scan/subscan', that is
|
|
22
154
|
partitioning by some variant of (ddi, scan, subscan(state_id))
|
|
23
155
|
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
:
|
|
156
|
+
Parameters
|
|
157
|
+
----------
|
|
158
|
+
infile : str
|
|
159
|
+
Path to MS
|
|
160
|
+
do_subscans : bool
|
|
161
|
+
also partitioning by subscan, not only scan
|
|
162
|
+
|
|
163
|
+
Returns
|
|
164
|
+
-------
|
|
165
|
+
Tuple[np.ndarray, np.ndarray, np.ndarray]
|
|
166
|
+
arrays with indices that define every partition
|
|
27
167
|
"""
|
|
28
168
|
try:
|
|
29
169
|
cctable = None
|
|
@@ -66,55 +206,72 @@ def make_partition_ids_by_ddi_scan(
|
|
|
66
206
|
return data_desc_id, scan_number, state_id
|
|
67
207
|
|
|
68
208
|
|
|
69
|
-
def
|
|
70
|
-
infile: str,
|
|
209
|
+
def make_partition_ids_by_ddi_intent(
|
|
210
|
+
infile: str, spw_names: xr.DataArray
|
|
71
211
|
) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
|
|
72
|
-
"""
|
|
73
|
-
|
|
212
|
+
"""
|
|
213
|
+
Produces arrays of per-partition ddi, scan, state_id, for when
|
|
214
|
+
using the partition scheme 'intents' (ddi, scan, subscans(state_ids))
|
|
74
215
|
|
|
75
|
-
|
|
76
|
-
|
|
216
|
+
Parameters
|
|
217
|
+
----------
|
|
218
|
+
infile : str
|
|
219
|
+
return: arrays with indices that define every partition
|
|
220
|
+
spw_names: xr.DataArray
|
|
77
221
|
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
222
|
+
|
|
223
|
+
Returns
|
|
224
|
+
-------
|
|
225
|
+
Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]
|
|
226
|
+
arrays with indices that define every partition
|
|
82
227
|
"""
|
|
83
|
-
|
|
84
|
-
|
|
228
|
+
# TODO: could explore other TAQL alternatives, like
|
|
229
|
+
# select ... from ::STATE where OBS_MODE = ...
|
|
230
|
+
#
|
|
231
|
+
# This will work only if intents are already alphabetically sorted (grouped),
|
|
232
|
+
# won't work for alternating intents:
|
|
233
|
+
# taql_intents = "select rowid() as ROWS from $state_tbl GROUPBY OBS_MODE "
|
|
85
234
|
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
)
|
|
89
|
-
taql_ddis = "select DISTINCT DATA_DESC_ID from $main_table"
|
|
90
|
-
with open_query(main_table, taql_ddis) as query_per_intent:
|
|
91
|
-
# Will take whatever scans given the STATE_IDs and DDIs
|
|
92
|
-
# scan_number = query_per_intent.getcol("SCAN_NUMBER")
|
|
93
|
-
distinct_ddis = query_per_intent.getcol("DATA_DESC_ID")
|
|
235
|
+
with open_table_ro(str(Path(infile, "STATE"))) as state_tbl:
|
|
236
|
+
distinct_obs_mode = find_distinct_obs_mode(infile, state_tbl)
|
|
94
237
|
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
)
|
|
98
|
-
nparts = len(distinct_ddis)
|
|
238
|
+
if distinct_obs_mode is None:
|
|
239
|
+
return partition_when_empty_state(infile)
|
|
99
240
|
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
241
|
+
with open_table_ro(infile) as main_tbl:
|
|
242
|
+
(
|
|
243
|
+
data_desc_id,
|
|
244
|
+
state_id_partitions,
|
|
245
|
+
intent_names,
|
|
246
|
+
) = make_ddi_state_intent_lists(
|
|
247
|
+
main_tbl, state_tbl, distinct_obs_mode, spw_names
|
|
248
|
+
)
|
|
103
249
|
|
|
104
|
-
|
|
250
|
+
# Take whatever scans given by the STATE_IDs and DDIs
|
|
251
|
+
scan_number = [None] * len(state_id_partitions)
|
|
252
|
+
|
|
253
|
+
return data_desc_id, scan_number, state_id_partitions, intent_names
|
|
105
254
|
|
|
106
255
|
|
|
107
256
|
def find_distinct_obs_mode(
|
|
108
257
|
infile: str, state_table: tables.table
|
|
109
258
|
) -> Union[List[str], None]:
|
|
110
|
-
"""
|
|
259
|
+
"""
|
|
260
|
+
Produce a list of unique "scan/subscan" intents.
|
|
111
261
|
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
:
|
|
115
|
-
|
|
116
|
-
|
|
262
|
+
Parameters
|
|
263
|
+
----------
|
|
264
|
+
infile : str
|
|
265
|
+
Path to the MS
|
|
266
|
+
state_table : tables.table
|
|
267
|
+
casacore table object to read from
|
|
117
268
|
|
|
269
|
+
Returns
|
|
270
|
+
-------
|
|
271
|
+
Union[List[str], None]
|
|
272
|
+
List of unique "scan/subscan" intents as given in the
|
|
273
|
+
OBS_MODE column of the STATE subtable. None if the STATE subtable
|
|
274
|
+
is empty or there is a problem reading it
|
|
118
275
|
"""
|
|
119
276
|
taql_distinct_intents = "select DISTINCT OBS_MODE from $state_table"
|
|
120
277
|
with open_query(state_table, taql_distinct_intents) as query_intents:
|
|
@@ -131,58 +288,14 @@ def find_distinct_obs_mode(
|
|
|
131
288
|
return distinct_obs_mode
|
|
132
289
|
|
|
133
290
|
|
|
134
|
-
def filter_intents_per_ddi(
|
|
135
|
-
ddis: List[int], substr: str, intents: str, spw_name_by_ddi: Dict[int, str]
|
|
136
|
-
) -> List[str]:
|
|
137
|
-
"""For a given pair of:
|
|
138
|
-
- substring (say 'WVR') associated with a type of intent we want to differentiate
|
|
139
|
-
- intents string (multiple comma-separated scan/subscan intents)
|
|
140
|
-
=> do: for every DDI passed in the list of ddis, either keep only the
|
|
141
|
-
intents that have that substring (if there are any) or drop them, depending on
|
|
142
|
-
whether that substring is present in the SPW name. This is to filter in only
|
|
143
|
-
the intents that really apply to every DDI/SPW.
|
|
144
|
-
|
|
145
|
-
:param ddis: list of ddis for which the intents have to be filtered
|
|
146
|
-
:param substr: substring to filter by
|
|
147
|
-
:param intents: string with a comma-separated list of individual
|
|
148
|
-
scan/subscan intent strings (like scan/subscan intents as stored
|
|
149
|
-
in the MS STATE/OBS_MODE
|
|
150
|
-
:param spw_name_by_ddi: SPW names by DDI ID (row index) key
|
|
151
|
-
:return: list where the intents related to 'substr' have been filtered in our out
|
|
152
|
-
|
|
153
|
-
"""
|
|
154
|
-
present = substr in intents
|
|
155
|
-
# Nothing to effectively filter, full cs-list of intents apply to all DDIs
|
|
156
|
-
if not present:
|
|
157
|
-
return [intents] * len(ddis)
|
|
158
|
-
|
|
159
|
-
every_intent = intents.split(",")
|
|
160
|
-
filtered_intents = []
|
|
161
|
-
for ddi in ddis:
|
|
162
|
-
spw_name = spw_name_by_ddi[ddi]
|
|
163
|
-
if not spw_name:
|
|
164
|
-
# we cannot say / cannot filter
|
|
165
|
-
filtered_intents.append(intents)
|
|
166
|
-
continue
|
|
167
|
-
|
|
168
|
-
# A not-xor to select/deselect (or keep-only/drop) the intents that apply
|
|
169
|
-
# to this DDI
|
|
170
|
-
ddi_intents = [
|
|
171
|
-
intnt for intnt in every_intent if (substr in intnt) == (substr in spw_name)
|
|
172
|
-
]
|
|
173
|
-
ddi_intents = ",".join(ddi_intents)
|
|
174
|
-
filtered_intents.append(ddi_intents)
|
|
175
|
-
|
|
176
|
-
return filtered_intents
|
|
177
|
-
|
|
178
|
-
|
|
179
291
|
def make_ddi_state_intent_lists(
|
|
180
292
|
main_tbl: tables.table,
|
|
181
293
|
state_tbl: tables.table,
|
|
182
294
|
distinct_obs_mode: np.ndarray,
|
|
183
295
|
spw_name_by_ddi: Dict[int, str],
|
|
184
296
|
) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
|
|
185
|
-
"""
|
|
297
|
+
"""
|
|
298
|
+
Produce arrays of (ddi indices, state indices, intent string)
|
|
186
299
|
for every distinct intent string, where every item represents one
|
|
187
300
|
partition of the main table
|
|
188
301
|
|
|
@@ -193,11 +306,21 @@ def make_ddi_state_intent_lists(
|
|
|
193
306
|
intent is the only kept when the DDI/SPW has WVR in its name). See
|
|
194
307
|
call to filter_intents_per_ddi()
|
|
195
308
|
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
309
|
+
Parameters
|
|
310
|
+
----------
|
|
311
|
+
main_tbl : tables.table
|
|
312
|
+
main MS table openend as a casacore.tables.table
|
|
313
|
+
state_tbl : tables.table
|
|
314
|
+
STATE subtable openend as a casacore.tables.table
|
|
315
|
+
distinct_obs_mode : np.ndarray
|
|
316
|
+
list of unique/distinct OBS_MODE strings from the STATE table
|
|
317
|
+
spw_name_by_ddi: Dict[int, str]
|
|
200
318
|
|
|
319
|
+
|
|
320
|
+
Returns
|
|
321
|
+
-------
|
|
322
|
+
Tuple[np.ndarray, np.ndarray, np.ndarray]
|
|
323
|
+
arrays of (ddi indices, state indices, intent string)
|
|
201
324
|
"""
|
|
202
325
|
data_desc_id, state_id_partitions, intent_names = [], [], []
|
|
203
326
|
for intent in distinct_obs_mode:
|
|
@@ -233,41 +356,106 @@ def make_ddi_state_intent_lists(
|
|
|
233
356
|
return data_desc_id, state_id_partitions, intent_names
|
|
234
357
|
|
|
235
358
|
|
|
236
|
-
def
|
|
237
|
-
|
|
238
|
-
) ->
|
|
239
|
-
"""
|
|
240
|
-
|
|
359
|
+
def filter_intents_per_ddi(
|
|
360
|
+
ddis: List[int], substr: str, intents: str, spw_name_by_ddi: Dict[int, str]
|
|
361
|
+
) -> List[str]:
|
|
362
|
+
"""
|
|
363
|
+
For a given pair of:
|
|
364
|
+
- substring (say 'WVR') associated with a type of intent we want to differentiate
|
|
365
|
+
- intents string (multiple comma-separated scan/subscan intents)
|
|
366
|
+
=> do: for every DDI passed in the list of ddis, either keep only the
|
|
367
|
+
intents that have that substring (if there are any) or drop them, depending on
|
|
368
|
+
whether that substring is present in the SPW name. This is to filter in only
|
|
369
|
+
the intents that really apply to every DDI/SPW.
|
|
241
370
|
|
|
242
|
-
|
|
243
|
-
|
|
371
|
+
Parameters
|
|
372
|
+
----------
|
|
373
|
+
ddis : List[int]
|
|
374
|
+
list of ddis for which the intents have to be filtered
|
|
375
|
+
substr : str
|
|
376
|
+
substring to filter by
|
|
377
|
+
intents : str
|
|
378
|
+
string with a comma-separated list of individual
|
|
379
|
+
scan/subscan intent strings (like scan/subscan intents as stored
|
|
380
|
+
in the MS STATE/OBS_MODE
|
|
381
|
+
spw_name_by_ddi : Dict[int, str]
|
|
382
|
+
SPW names by DDI ID (row index) key
|
|
383
|
+
|
|
384
|
+
Returns
|
|
385
|
+
-------
|
|
386
|
+
List[str]
|
|
387
|
+
list where the intents related to 'substr' have been filtered in our out
|
|
244
388
|
"""
|
|
245
|
-
|
|
246
|
-
#
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
# won't work for alternating intents:
|
|
250
|
-
# taql_intents = "select rowid() as ROWS from $state_tbl GROUPBY OBS_MODE "
|
|
389
|
+
present = substr in intents
|
|
390
|
+
# Nothing to effectively filter, full cs-list of intents apply to all DDIs
|
|
391
|
+
if not present:
|
|
392
|
+
return [intents] * len(ddis)
|
|
251
393
|
|
|
252
|
-
|
|
253
|
-
|
|
394
|
+
every_intent = intents.split(",")
|
|
395
|
+
filtered_intents = []
|
|
396
|
+
for ddi in ddis:
|
|
397
|
+
spw_name = spw_name_by_ddi.get(ddi, "")
|
|
254
398
|
|
|
255
|
-
if
|
|
256
|
-
|
|
399
|
+
if not spw_name:
|
|
400
|
+
# we cannot say / cannot filter
|
|
401
|
+
filtered_intents.append(intents)
|
|
402
|
+
continue
|
|
257
403
|
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
)
|
|
404
|
+
# A not-xor to select/deselect (or keep-only/drop) the intents that apply
|
|
405
|
+
# to this DDI
|
|
406
|
+
ddi_intents = [
|
|
407
|
+
intnt for intnt in every_intent if (substr in intnt) == (substr in spw_name)
|
|
408
|
+
]
|
|
409
|
+
ddi_intents = ",".join(ddi_intents)
|
|
410
|
+
filtered_intents.append(ddi_intents)
|
|
266
411
|
|
|
267
|
-
|
|
268
|
-
scan_number = [None] * len(state_id_partitions)
|
|
412
|
+
return filtered_intents
|
|
269
413
|
|
|
270
|
-
|
|
414
|
+
|
|
415
|
+
def partition_when_empty_state(
|
|
416
|
+
infile: str,
|
|
417
|
+
) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
|
|
418
|
+
"""
|
|
419
|
+
Generate fallback partition ids when trying to partition by
|
|
420
|
+
'intent' but the STATE table is empty.
|
|
421
|
+
|
|
422
|
+
Some MSs have no STATE rows and in the main table STATE_ID==-1
|
|
423
|
+
(that is not a valid MSv2 but it happens).
|
|
424
|
+
|
|
425
|
+
Parameters
|
|
426
|
+
----------
|
|
427
|
+
infile : str
|
|
428
|
+
Path to the MS
|
|
429
|
+
|
|
430
|
+
Returns
|
|
431
|
+
-------
|
|
432
|
+
Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]
|
|
433
|
+
same as make_partition_ids_by_ddi_intent but with
|
|
434
|
+
effectively only ddi indices and other indices set to None ("any
|
|
435
|
+
IDs found")
|
|
436
|
+
"""
|
|
437
|
+
try:
|
|
438
|
+
main_table = None
|
|
439
|
+
|
|
440
|
+
main_table = tables.table(
|
|
441
|
+
infile, readonly=True, lockoptions={"option": "usernoread"}, ack=False
|
|
442
|
+
)
|
|
443
|
+
taql_ddis = "select DISTINCT DATA_DESC_ID from $main_table"
|
|
444
|
+
with open_query(main_table, taql_ddis) as query_per_intent:
|
|
445
|
+
# Will take whatever scans given the STATE_IDs and DDIs
|
|
446
|
+
# scan_number = query_per_intent.getcol("SCAN_NUMBER")
|
|
447
|
+
distinct_ddis = query_per_intent.getcol("DATA_DESC_ID")
|
|
448
|
+
|
|
449
|
+
logger.debug(
|
|
450
|
+
f"Producing {len(distinct_ddis)} partitions for ddis: {distinct_ddis}"
|
|
451
|
+
)
|
|
452
|
+
nparts = len(distinct_ddis)
|
|
453
|
+
|
|
454
|
+
finally:
|
|
455
|
+
if main_table:
|
|
456
|
+
main_table.close()
|
|
457
|
+
|
|
458
|
+
return distinct_ddis, [None] * nparts, [None] * nparts, [""] * nparts
|
|
271
459
|
|
|
272
460
|
|
|
273
461
|
def create_taql_query_and_file_name(out_file, intent, state_ids, field_id, ddi):
|
|
@@ -297,7 +485,8 @@ def create_taql_query_and_file_name(out_file, intent, state_ids, field_id, ddi):
|
|
|
297
485
|
|
|
298
486
|
|
|
299
487
|
def get_unqiue_intents(in_file):
|
|
300
|
-
"""
|
|
488
|
+
"""
|
|
489
|
+
_summary_
|
|
301
490
|
|
|
302
491
|
Parameters
|
|
303
492
|
----------
|
|
@@ -334,7 +523,8 @@ def enumerated_product(*args):
|
|
|
334
523
|
|
|
335
524
|
|
|
336
525
|
def create_partition_enumerated_product(in_file: str, partition_scheme: str):
|
|
337
|
-
"""
|
|
526
|
+
"""
|
|
527
|
+
Creates an enumerated_product of the data_desc_ids, state_ids, field_ids in a MS v2 that define the partions in a processing set.
|
|
338
528
|
|
|
339
529
|
Parameters
|
|
340
530
|
----------
|
|
@@ -365,7 +555,6 @@ def create_partition_enumerated_product(in_file: str, partition_scheme: str):
|
|
|
365
555
|
intents, state_ids = get_unqiue_intents(in_file)
|
|
366
556
|
field_ids = np.arange(read_generic_table(in_file, "FIELD").sizes["row"])
|
|
367
557
|
else: # partition_scheme == "ddi_state_field"
|
|
368
|
-
|
|
369
558
|
if len(state_xds.data_vars) > 0:
|
|
370
559
|
state_ids = [np.arange(state_xds.sizes["row"])]
|
|
371
560
|
intents = state_xds.obs_mode.values
|
|
@@ -29,8 +29,19 @@ def make_spw_names_by_ddi(ddi_xds: xr.Dataset, spw_xds: xr.Dataset) -> Dict[int,
|
|
|
29
29
|
|
|
30
30
|
|
|
31
31
|
def split_intents(intents: str):
|
|
32
|
-
"""
|
|
32
|
+
"""
|
|
33
|
+
Make a dict with two scan / subscan levels of intents from an
|
|
33
34
|
intent string from the STATE/OBS_MODE of an MS.
|
|
35
|
+
|
|
36
|
+
Parameters
|
|
37
|
+
----------
|
|
38
|
+
intents : str
|
|
39
|
+
intents "OBS_MODE" string from an MS/STATE row
|
|
40
|
+
|
|
41
|
+
Returns
|
|
42
|
+
-------
|
|
43
|
+
Dict[str, list]
|
|
44
|
+
per scan intent list of individual subscan intent strings
|
|
34
45
|
"""
|
|
35
46
|
sub_sep = "#"
|
|
36
47
|
if sub_sep not in intents:
|
|
@@ -60,13 +71,25 @@ def make_part_key(
|
|
|
60
71
|
partition_scheme: str,
|
|
61
72
|
intent: str = "",
|
|
62
73
|
scan_state: Union[Tuple, None] = None,
|
|
63
|
-
):
|
|
74
|
+
) -> PartitionKey:
|
|
64
75
|
"""
|
|
65
76
|
Makes the key that a partition (sub)xds will have in the partitions dictionary of a cds.
|
|
66
77
|
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
78
|
+
Parameters
|
|
79
|
+
----------
|
|
80
|
+
xds : xr.Dataset
|
|
81
|
+
partition xds with data and attrs
|
|
82
|
+
partition_scheme : str
|
|
83
|
+
one of the schemes supported in the read_ms_*_partitions() functions
|
|
84
|
+
intent : str (Default value = "")
|
|
85
|
+
partition intent
|
|
86
|
+
scan_state : Union[Tuple, None] (Default value = None)
|
|
87
|
+
scan/state ids, required when partition_scheme != 'ddi'
|
|
88
|
+
|
|
89
|
+
Returns
|
|
90
|
+
-------
|
|
91
|
+
PartitionKey
|
|
92
|
+
partition key
|
|
70
93
|
"""
|
|
71
94
|
spw_id = xds.attrs["partition_ids"]["spw_id"]
|
|
72
95
|
pol_setup_id = xds.attrs["partition_ids"]["pol_setup_id"]
|
|
@@ -91,17 +114,28 @@ def read_ms_scan_subscan_partitions(
|
|
|
91
114
|
expand: bool = False,
|
|
92
115
|
chunks: Union[Tuple[int], List[int], None] = None,
|
|
93
116
|
) -> Tuple[VisSetPartitions, Dict[str, xr.Dataset], List[str]]:
|
|
94
|
-
"""
|
|
117
|
+
"""
|
|
118
|
+
partitions per scan_number/subscans
|
|
95
119
|
(main table column SCAN_NUMBER / STATE_ID)
|
|
96
120
|
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
121
|
+
Parameters
|
|
122
|
+
----------
|
|
123
|
+
infile : str
|
|
124
|
+
MS path (main table)
|
|
125
|
+
partition_scheme : str
|
|
126
|
+
this functions can do 'intent', 'scan', and 'scan/subscan'
|
|
127
|
+
expand : bool (Default value = False)
|
|
128
|
+
wether to use (time, baseline) dimensions rather than 1d (row)
|
|
129
|
+
(only relevant when using the read_flat variant of read functions)
|
|
130
|
+
chunk : Union[Tuple[int], List[int], None] (Default value = None)
|
|
131
|
+
Dask chunking as tuple (time, baseline, chan, pol)
|
|
132
|
+
|
|
133
|
+
Returns
|
|
134
|
+
-------
|
|
135
|
+
Tuple[VisSetPartitions, Dict[str, xr.Dataset], List[str]]
|
|
136
|
+
a dictionary of partitions, a dict of subtable
|
|
137
|
+
xr.Datasets to use later for metainformation, and a list of the
|
|
138
|
+
subtables already read
|
|
105
139
|
"""
|
|
106
140
|
|
|
107
141
|
spw_xds = read_generic_table(
|
|
@@ -140,13 +174,15 @@ def read_ms_scan_subscan_partitions(
|
|
|
140
174
|
if partition_scheme == "intent":
|
|
141
175
|
intent = distinct_intents[cnt]
|
|
142
176
|
cnt += 1
|
|
177
|
+
else:
|
|
178
|
+
intent = ""
|
|
143
179
|
|
|
144
180
|
if partition_scheme == "scan":
|
|
145
181
|
scan_state = (scan, None)
|
|
146
182
|
else:
|
|
147
183
|
scan_state = (scan, state)
|
|
148
184
|
# experimenting, comparing overheads of expanded vs. flat
|
|
149
|
-
expanded =
|
|
185
|
+
expanded = not expand
|
|
150
186
|
if expanded:
|
|
151
187
|
xds, part_ids, attrs = read_expanded_main_table(
|
|
152
188
|
infile, ddi, scan_state=scan_state, ignore_msv2_cols=ignore_msv2_cols
|
|
@@ -207,11 +243,23 @@ def read_ms_ddi_partitions(
|
|
|
207
243
|
from the DDIs. First looks into the SPECTRAL_WINDOW, POLARIZATION,
|
|
208
244
|
DATA_DESCRIPTION tables to define the partitions.
|
|
209
245
|
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
246
|
+
Parameters
|
|
247
|
+
----------
|
|
248
|
+
infile : str
|
|
249
|
+
input MS path
|
|
250
|
+
expand : bool (Default value = False)
|
|
251
|
+
redimension (row)->(time,baseline)
|
|
252
|
+
rowmap : Union[dict, None] (Default value = None)
|
|
253
|
+
to be removed
|
|
254
|
+
chunks : Union[Tuple[int], List[int], None] (Default value = None)
|
|
255
|
+
array data chunk sizes
|
|
256
|
+
|
|
257
|
+
Returns
|
|
258
|
+
-------
|
|
259
|
+
Tuple[VisSetPartitions, Dict[str, xr.Dataset], List[str]]
|
|
260
|
+
dictionary of partitions, dict of subtable xr.Datasets to use later
|
|
261
|
+
for metainformation, and a list of the subtables already read
|
|
262
|
+
|
|
215
263
|
"""
|
|
216
264
|
# we need the antenna, spectral window, polarization, and data description tables
|
|
217
265
|
# to define the (sub)datasets (their dims and coords) and to process the main table
|
|
@@ -263,7 +311,7 @@ def read_ms_ddi_partitions(
|
|
|
263
311
|
)
|
|
264
312
|
|
|
265
313
|
# experimenting, comparing overheads of expanded vs. flat
|
|
266
|
-
expanded =
|
|
314
|
+
expanded = not expand
|
|
267
315
|
if expanded:
|
|
268
316
|
xds, part_ids, attrs = read_expanded_main_table(
|
|
269
317
|
infile, ddi, ignore_msv2_cols=ignore_msv2_cols
|
|
@@ -319,13 +367,22 @@ def read_ms_ddi_partitions(
|
|
|
319
367
|
def finalize_partitions(
|
|
320
368
|
parts: Dict[str, xr.Dataset], subts: Dict[str, xr.Dataset]
|
|
321
369
|
) -> Dict[str, xr.Dataset]:
|
|
322
|
-
"""
|
|
370
|
+
"""
|
|
371
|
+
Once the partition datasets and the metainfo/subtable datasets
|
|
323
372
|
have been read, add to the partitions:
|
|
324
373
|
- pointing variables from the pointing subtable
|
|
325
374
|
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
:
|
|
375
|
+
Parameters
|
|
376
|
+
----------
|
|
377
|
+
parts : Dict[str, xr.Dataset]
|
|
378
|
+
partitions as xarray datasets, as read from an MS main table
|
|
379
|
+
subts : Dict[str, xr.Dataset]
|
|
380
|
+
subtables of an MS read as xarray datasets
|
|
381
|
+
|
|
382
|
+
Returns
|
|
383
|
+
-------
|
|
384
|
+
Dict[str, xr.Dataset]
|
|
385
|
+
partitions with additions taken from subtables
|
|
329
386
|
"""
|
|
330
387
|
if "pointing" in subts:
|
|
331
388
|
pointing = subts["pointing"]
|