xradio 0.0.34__py3-none-any.whl → 0.0.36__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- xradio/_utils/list_and_array.py +3 -1
- xradio/_utils/schema.py +190 -0
- xradio/_utils/zarr/common.py +11 -5
- xradio/image/_util/_zarr/xds_from_zarr.py +15 -2
- xradio/image/_util/_zarr/zarr_low_level.py +65 -14
- xradio/schema/bases.py +37 -8
- xradio/schema/check.py +15 -3
- xradio/schema/dataclass.py +2 -2
- xradio/vis/_processing_set.py +136 -10
- xradio/vis/_vis_utils/_ms/_tables/read.py +9 -0
- xradio/vis/_vis_utils/_ms/conversion.py +93 -85
- xradio/vis/_vis_utils/_ms/create_antenna_xds.py +479 -0
- xradio/vis/_vis_utils/_ms/create_field_and_source_xds.py +84 -42
- xradio/vis/_vis_utils/_ms/msv2_to_msv4_meta.py +1 -105
- xradio/vis/_vis_utils/_ms/msv4_sub_xdss.py +4 -224
- xradio/vis/_vis_utils/_utils/xds_helper.py +10 -2
- xradio/vis/convert_msv2_to_processing_set.py +6 -1
- xradio/vis/load_processing_set.py +2 -2
- xradio/vis/read_processing_set.py +5 -2
- xradio/vis/schema.py +348 -112
- {xradio-0.0.34.dist-info → xradio-0.0.36.dist-info}/METADATA +1 -1
- {xradio-0.0.34.dist-info → xradio-0.0.36.dist-info}/RECORD +25 -23
- {xradio-0.0.34.dist-info → xradio-0.0.36.dist-info}/WHEEL +1 -1
- {xradio-0.0.34.dist-info → xradio-0.0.36.dist-info}/LICENSE.txt +0 -0
- {xradio-0.0.34.dist-info → xradio-0.0.36.dist-info}/top_level.txt +0 -0
xradio/vis/_processing_set.py
CHANGED
|
@@ -1,19 +1,57 @@
|
|
|
1
1
|
import pandas as pd
|
|
2
|
+
from xradio._utils.list_and_array import to_list
|
|
3
|
+
import numbers
|
|
2
4
|
|
|
3
5
|
|
|
4
6
|
class processing_set(dict):
|
|
7
|
+
"""
|
|
8
|
+
A dictionary subclass representing a Processing Set (PS) that is a set of Measurement Sets v4 (MS).
|
|
9
|
+
|
|
10
|
+
This class extends the built-in `dict` class and provides additional methods for manipulating and selecting subsets of the Processing Set.
|
|
11
|
+
|
|
12
|
+
Attributes:
|
|
13
|
+
meta (dict): A dictionary containing metadata information about the Processing Set.
|
|
14
|
+
|
|
15
|
+
Methods:
|
|
16
|
+
summary(data_group="base"): Returns a summary of the Processing Set as a Pandas table.
|
|
17
|
+
get_ps_max_dims(): Returns the maximum dimension of all the MSs in the Processing Set.
|
|
18
|
+
get_ps_freq_axis(): Combines the frequency axis of all MSs.
|
|
19
|
+
sel(query:str=None, **kwargs): Selects a subset of the Processing Set based on column names and values or a Pandas query.
|
|
20
|
+
ms_sel(**kwargs): Selects a subset of the Processing Set by applying the `sel` method to each individual MS.
|
|
21
|
+
ms_isel(**kwargs): Selects a subset of the Processing Set by applying the `isel` method to each individual MS.
|
|
22
|
+
"""
|
|
23
|
+
|
|
5
24
|
def __init__(self, *args, **kwargs):
|
|
6
25
|
super().__init__(*args, **kwargs)
|
|
7
26
|
self.meta = {"summary": {}}
|
|
8
27
|
|
|
9
28
|
def summary(self, data_group="base"):
|
|
29
|
+
"""
|
|
30
|
+
Returns a summary of the Processing Set as a Pandas table.
|
|
31
|
+
|
|
32
|
+
Args:
|
|
33
|
+
data_group (str): The data group to summarize. Default is "base".
|
|
34
|
+
|
|
35
|
+
Returns:
|
|
36
|
+
pandas.DataFrame: A DataFrame containing the summary information.
|
|
37
|
+
"""
|
|
10
38
|
if data_group in self.meta["summary"]:
|
|
11
39
|
return self.meta["summary"][data_group]
|
|
12
40
|
else:
|
|
13
|
-
self.meta["summary"][data_group] = self._summary(data_group)
|
|
41
|
+
self.meta["summary"][data_group] = self._summary(data_group).sort_values(
|
|
42
|
+
by=["name"], ascending=True
|
|
43
|
+
)
|
|
14
44
|
return self.meta["summary"][data_group]
|
|
15
45
|
|
|
16
46
|
def get_ps_max_dims(self):
|
|
47
|
+
"""
|
|
48
|
+
Returns the maximum dimension of all the MSs in the Processing Set.
|
|
49
|
+
|
|
50
|
+
For example, if the Processing Set contains two MSs with dimensions (50, 20, 30) and (10, 30, 40), the maximum dimensions will be (50, 30, 40).
|
|
51
|
+
|
|
52
|
+
Returns:
|
|
53
|
+
dict: A dictionary containing the maximum dimensions of the Processing Set.
|
|
54
|
+
"""
|
|
17
55
|
if "max_dims" in self.meta:
|
|
18
56
|
return self.meta["max_dims"]
|
|
19
57
|
else:
|
|
@@ -21,6 +59,12 @@ class processing_set(dict):
|
|
|
21
59
|
return self.meta["max_dims"]
|
|
22
60
|
|
|
23
61
|
def get_ps_freq_axis(self):
|
|
62
|
+
"""
|
|
63
|
+
Combines the frequency axis of all MSs.
|
|
64
|
+
|
|
65
|
+
Returns:
|
|
66
|
+
xarray.DataArray: The frequency axis of the Processing Set.
|
|
67
|
+
"""
|
|
24
68
|
if "freq_axis" in self.meta:
|
|
25
69
|
return self.meta["freq_axis"]
|
|
26
70
|
else:
|
|
@@ -33,11 +77,14 @@ class processing_set(dict):
|
|
|
33
77
|
"obs_mode": [],
|
|
34
78
|
"shape": [],
|
|
35
79
|
"polarization": [],
|
|
80
|
+
"scan_number": [],
|
|
36
81
|
"spw_name": [],
|
|
37
82
|
# "field_id": [],
|
|
38
83
|
"field_name": [],
|
|
39
84
|
# "source_id": [],
|
|
40
85
|
"source_name": [],
|
|
86
|
+
# "num_lines": [],
|
|
87
|
+
"line_name": [],
|
|
41
88
|
"field_coords": [],
|
|
42
89
|
"start_frequency": [],
|
|
43
90
|
"end_frequency": [],
|
|
@@ -52,6 +99,9 @@ class processing_set(dict):
|
|
|
52
99
|
value.attrs["partition_info"]["spectral_window_name"]
|
|
53
100
|
)
|
|
54
101
|
summary_data["polarization"].append(value.polarization.values)
|
|
102
|
+
summary_data["scan_number"].append(
|
|
103
|
+
value.attrs["partition_info"]["scan_number"]
|
|
104
|
+
)
|
|
55
105
|
|
|
56
106
|
if "visibility" in value.attrs["data_groups"][data_group]:
|
|
57
107
|
data_name = value.attrs["data_groups"][data_group]["visibility"]
|
|
@@ -72,8 +122,14 @@ class processing_set(dict):
|
|
|
72
122
|
summary_data["source_name"].append(
|
|
73
123
|
value.attrs["partition_info"]["source_name"]
|
|
74
124
|
)
|
|
75
|
-
|
|
76
|
-
summary_data["
|
|
125
|
+
|
|
126
|
+
summary_data["line_name"].append(value.attrs["partition_info"]["line_name"])
|
|
127
|
+
|
|
128
|
+
# summary_data["num_lines"].append(value.attrs["partition_info"]["num_lines"])
|
|
129
|
+
summary_data["start_frequency"].append(
|
|
130
|
+
to_list(value["frequency"].values)[0]
|
|
131
|
+
)
|
|
132
|
+
summary_data["end_frequency"].append(to_list(value["frequency"].values)[-1])
|
|
77
133
|
|
|
78
134
|
if value[data_name].attrs["field_and_source_xds"].is_ephemeris:
|
|
79
135
|
summary_data["field_coords"].append("Ephemeris")
|
|
@@ -117,7 +173,7 @@ class processing_set(dict):
|
|
|
117
173
|
for ms_xds in self.values():
|
|
118
174
|
assert (
|
|
119
175
|
frame == ms_xds.frequency.attrs["frame"]
|
|
120
|
-
), "Frequency reference frame not consistent in
|
|
176
|
+
), "Frequency reference frame not consistent in Processing Set."
|
|
121
177
|
if ms_xds.frequency.attrs["spectral_window_id"] not in spw_ids:
|
|
122
178
|
spw_ids.append(ms_xds.frequency.attrs["spectral_window_id"])
|
|
123
179
|
freq_axis_list.append(ms_xds.frequency)
|
|
@@ -142,19 +198,71 @@ class processing_set(dict):
|
|
|
142
198
|
def get(self, id):
|
|
143
199
|
return self[list(self.keys())[id]]
|
|
144
200
|
|
|
145
|
-
def sel(self, **kwargs):
|
|
201
|
+
def sel(self, string_exact_match: bool = True, query: str = None, **kwargs):
|
|
202
|
+
"""
|
|
203
|
+
Selects a subset of the Processing Set based on column names and values or a Pandas query.
|
|
204
|
+
|
|
205
|
+
The following columns are supported: name, obs_mode, polarization, spw_name, field_name, source_name, field_coords, start_frequency, end_frequency.
|
|
206
|
+
|
|
207
|
+
This function will not apply any selection on the MS data so data will not be dropped for example if a MS has field_name=['field_0','field_10','field_08'] and ps.sel(field_name='field_0') is done the resulting MS will still have field_name=['field_0','field_10','field_08'].
|
|
208
|
+
|
|
209
|
+
Examples:
|
|
210
|
+
ps.sel(obs_mode='OBSERVE_TARGET#ON_SOURCE', polarization=['RR', 'LL']) # Select all MSs with obs_mode 'OBSERVE_TARGET#ON_SOURCE' and polarization 'RR' or 'LL'.
|
|
211
|
+
ps.sel(query='start_frequency > 100e9 AND end_frequency < 200e9') # Select all MSs with start_frequency greater than 100 GHz and less than 200 GHz.
|
|
212
|
+
|
|
213
|
+
Args:
|
|
214
|
+
query (str): A Pandas query string. Default is None.
|
|
215
|
+
string_exact_match (bool): If True, the selection will be an exact match for string and string list columns. Default is True.
|
|
216
|
+
**kwargs: Keyword arguments representing column names and values to filter the Processing Set.
|
|
217
|
+
|
|
218
|
+
Returns:
|
|
219
|
+
processing_set: The subset of the Processing Set.
|
|
220
|
+
"""
|
|
146
221
|
import numpy as np
|
|
147
222
|
|
|
223
|
+
# def select_rows(df, col, input_strings):
|
|
224
|
+
# return df[df[col].apply(lambda x: any(i in x for i in input_strings))]
|
|
225
|
+
|
|
226
|
+
# def select_rows(df, col, sel, string_exact_match):
|
|
227
|
+
# def check_selection(row_val):
|
|
228
|
+
# if isinstance(row_val, numbers.Number) or string_exact_match:
|
|
229
|
+
# return any(i == row_val for i in sel) #If values are numbers
|
|
230
|
+
# return any(i in row_val for i in sel) #If values are strings
|
|
231
|
+
# return df[df[col].apply(check_selection)]
|
|
232
|
+
|
|
233
|
+
def select_rows(df, col, sel_vals, string_exact_match):
|
|
234
|
+
def check_selection(row_val):
|
|
235
|
+
row_val = to_list(
|
|
236
|
+
row_val
|
|
237
|
+
) # make sure that it is a list so that we can iterate over it.
|
|
238
|
+
|
|
239
|
+
for rw in row_val:
|
|
240
|
+
for s in sel_vals:
|
|
241
|
+
if string_exact_match:
|
|
242
|
+
if rw == s:
|
|
243
|
+
return True
|
|
244
|
+
else:
|
|
245
|
+
if s in rw:
|
|
246
|
+
return True
|
|
247
|
+
return False
|
|
248
|
+
|
|
249
|
+
return df[df[col].apply(check_selection)]
|
|
250
|
+
|
|
148
251
|
summary_table = self.summary()
|
|
149
252
|
for key, value in kwargs.items():
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
253
|
+
value = to_list(value) # make sure value is a list.
|
|
254
|
+
|
|
255
|
+
if len(value) == 1 and isinstance(value[0], slice):
|
|
153
256
|
summary_table = summary_table[
|
|
154
|
-
summary_table[key].between(value.start, value.stop)
|
|
257
|
+
summary_table[key].between(value[0].start, value[0].stop)
|
|
155
258
|
]
|
|
156
259
|
else:
|
|
157
|
-
summary_table =
|
|
260
|
+
summary_table = select_rows(
|
|
261
|
+
summary_table, key, value, string_exact_match
|
|
262
|
+
)
|
|
263
|
+
|
|
264
|
+
if query is not None:
|
|
265
|
+
summary_table = summary_table.query(query)
|
|
158
266
|
|
|
159
267
|
sub_ps = processing_set()
|
|
160
268
|
for key, val in self.items():
|
|
@@ -164,12 +272,30 @@ class processing_set(dict):
|
|
|
164
272
|
return sub_ps
|
|
165
273
|
|
|
166
274
|
def ms_sel(self, **kwargs):
|
|
275
|
+
"""
|
|
276
|
+
Selects a subset of the Processing Set by applying the `sel` method to each MS.
|
|
277
|
+
|
|
278
|
+
Args:
|
|
279
|
+
**kwargs: Keyword arguments representing column names and values to filter the Processing Set.
|
|
280
|
+
|
|
281
|
+
Returns:
|
|
282
|
+
processing_set: The subset of the Processing Set.
|
|
283
|
+
"""
|
|
167
284
|
sub_ps = processing_set()
|
|
168
285
|
for key, val in self.items():
|
|
169
286
|
sub_ps[key] = val.sel(kwargs)
|
|
170
287
|
return sub_ps
|
|
171
288
|
|
|
172
289
|
def ms_isel(self, **kwargs):
|
|
290
|
+
"""
|
|
291
|
+
Selects a subset of the Processing Set by applying the `isel` method to each MS.
|
|
292
|
+
|
|
293
|
+
Args:
|
|
294
|
+
**kwargs: Keyword arguments representing dimension names and indices to select from the Processing Set.
|
|
295
|
+
|
|
296
|
+
Returns:
|
|
297
|
+
processing_set: The subset of the Processing Set.
|
|
298
|
+
"""
|
|
173
299
|
sub_ps = processing_set()
|
|
174
300
|
for key, val in self.items():
|
|
175
301
|
sub_ps[key] = val.isel(kwargs)
|
|
@@ -455,6 +455,9 @@ def redimension_ms_subtable(xds: xr.Dataset, subt_name: str) -> xr.Dataset:
|
|
|
455
455
|
"SOURCE": ["SOURCE_ID", "TIME", "SPECTRAL_WINDOW_ID"],
|
|
456
456
|
"SYSCAL": ["ANTENNA_ID", "FEED_ID", "SPECTRAL_WINDOW_ID", "TIME"],
|
|
457
457
|
"WEATHER": ["ANTENNA_ID", "TIME"],
|
|
458
|
+
"PHASE_CAL": ["ANTENNA_ID", "TIME", "SPECTRAL_WINDOW_ID"],
|
|
459
|
+
"GAIN_CURVE": ["ANTENNA_ID", "TIME", "SPECTRAL_WINDOW_ID"],
|
|
460
|
+
"FEED": ["ANTENNA_ID", "SPECTRAL_WINDOW_ID"],
|
|
458
461
|
# added tables (MSv3 but not preent in MSv2). Build it from "EPHEMi_... tables
|
|
459
462
|
# Not clear what to do about 'time' var/dim: , "time"],
|
|
460
463
|
"EPHEMERIDES": ["ephemeris_row_id", "ephemeris_id"],
|
|
@@ -645,6 +648,9 @@ def load_generic_table(
|
|
|
645
648
|
"SOURCE",
|
|
646
649
|
"SYSCAL",
|
|
647
650
|
"WEATHER",
|
|
651
|
+
"PHASE_CAL",
|
|
652
|
+
"GAIN_CURVE",
|
|
653
|
+
"FEED",
|
|
648
654
|
]:
|
|
649
655
|
xds = redimension_ms_subtable(xds, tname)
|
|
650
656
|
|
|
@@ -944,6 +950,9 @@ def raw_col_data_to_coords_vars(
|
|
|
944
950
|
"SOURCE",
|
|
945
951
|
"SYSCAL",
|
|
946
952
|
"WEATHER",
|
|
953
|
+
"PHASE_CAL",
|
|
954
|
+
"GAIN_CURVE",
|
|
955
|
+
"FEED",
|
|
947
956
|
)
|
|
948
957
|
dim_prefix = "dim"
|
|
949
958
|
|
|
@@ -4,17 +4,22 @@ from .._zarr.encoding import add_encoding
|
|
|
4
4
|
from typing import Dict, Union
|
|
5
5
|
import graphviper.utils.logger as logger
|
|
6
6
|
import os
|
|
7
|
+
import pathlib
|
|
7
8
|
|
|
8
9
|
import numpy as np
|
|
9
10
|
import xarray as xr
|
|
10
11
|
|
|
11
12
|
from casacore import tables
|
|
12
|
-
from .msv4_sub_xdss import
|
|
13
|
+
from xradio.vis._vis_utils._ms.msv4_sub_xdss import (
|
|
14
|
+
create_pointing_xds,
|
|
15
|
+
create_weather_xds,
|
|
16
|
+
)
|
|
17
|
+
from xradio.vis._vis_utils._ms.create_antenna_xds import create_antenna_xds
|
|
13
18
|
from xradio.vis._vis_utils._ms.create_field_and_source_xds import (
|
|
14
19
|
create_field_and_source_xds,
|
|
15
20
|
)
|
|
21
|
+
from xradio._utils.schema import column_description_casacore_to_msv4_measure
|
|
16
22
|
from .msv2_to_msv4_meta import (
|
|
17
|
-
column_description_casacore_to_msv4_measure,
|
|
18
23
|
create_attribute_metadata,
|
|
19
24
|
col_to_data_variable_names,
|
|
20
25
|
col_dims,
|
|
@@ -400,7 +405,7 @@ def calc_indx_for_row_split(tb_tool, taql_where):
|
|
|
400
405
|
|
|
401
406
|
|
|
402
407
|
def create_coordinates(
|
|
403
|
-
xds, in_file, ddi, utime, interval, baseline_ant1_id, baseline_ant2_id
|
|
408
|
+
xds, in_file, ddi, utime, interval, baseline_ant1_id, baseline_ant2_id, scan_id
|
|
404
409
|
):
|
|
405
410
|
coords = {
|
|
406
411
|
"time": utime,
|
|
@@ -408,6 +413,7 @@ def create_coordinates(
|
|
|
408
413
|
"baseline_antenna2_id": ("baseline_id", baseline_ant2_id),
|
|
409
414
|
"uvw_label": ["u", "v", "w"],
|
|
410
415
|
"baseline_id": np.arange(len(baseline_ant1_id)),
|
|
416
|
+
"scan_number": ("time", scan_id),
|
|
411
417
|
}
|
|
412
418
|
|
|
413
419
|
ddi_xds = load_generic_table(in_file, "DATA_DESCRIPTION").sel(row=ddi)
|
|
@@ -446,12 +452,12 @@ def create_coordinates(
|
|
|
446
452
|
)
|
|
447
453
|
xds.frequency.attrs.update(msv4_measure)
|
|
448
454
|
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
):
|
|
455
|
+
spw_name = spectral_window_xds.NAME.values.item()
|
|
456
|
+
if (spw_name is None) or (spw_name == "none") or (spw_name == ""):
|
|
452
457
|
spw_name = "spw_" + str(spectral_window_id)
|
|
453
458
|
else:
|
|
454
|
-
spw_name = spectral_window_xds.NAME.values.item()
|
|
459
|
+
# spw_name = spectral_window_xds.NAME.values.item()
|
|
460
|
+
spw_name = spw_name + "_" + str(spectral_window_id)
|
|
455
461
|
|
|
456
462
|
xds.frequency.attrs["spectral_window_name"] = spw_name
|
|
457
463
|
msv4_measure = column_description_casacore_to_msv4_measure(
|
|
@@ -588,7 +594,7 @@ def create_data_variables(
|
|
|
588
594
|
logger.debug(
|
|
589
595
|
"Time to read column " + str(col) + " : " + str(time.time() - start)
|
|
590
596
|
)
|
|
591
|
-
except:
|
|
597
|
+
except Exception as e:
|
|
592
598
|
logger.debug("Could not load column", col)
|
|
593
599
|
|
|
594
600
|
if ("WEIGHT_SPECTRUM" == col) and (
|
|
@@ -606,6 +612,18 @@ def create_data_variables(
|
|
|
606
612
|
)
|
|
607
613
|
|
|
608
614
|
|
|
615
|
+
def add_missing_data_var_attrs(xds):
|
|
616
|
+
"""Adds in attributes expected metadata that cannot be found
|
|
617
|
+
in the input MSv2. For now specifically for missing
|
|
618
|
+
single-dish/SPECTRUM metadata"""
|
|
619
|
+
data_var_names = ["SPECTRUM", "SPECTRUM_CORRECTED"]
|
|
620
|
+
for var_name in data_var_names:
|
|
621
|
+
if var_name in xds.data_vars:
|
|
622
|
+
xds.data_vars[var_name].attrs["units"] = ["Jy"]
|
|
623
|
+
|
|
624
|
+
return xds
|
|
625
|
+
|
|
626
|
+
|
|
609
627
|
def get_weight(
|
|
610
628
|
xds,
|
|
611
629
|
col,
|
|
@@ -662,7 +680,7 @@ def create_taql_query(partition_info):
|
|
|
662
680
|
def convert_and_write_partition(
|
|
663
681
|
in_file: str,
|
|
664
682
|
out_file: str,
|
|
665
|
-
ms_v4_id: int,
|
|
683
|
+
ms_v4_id: Union[int, str],
|
|
666
684
|
partition_info: Dict,
|
|
667
685
|
use_table_iter: bool,
|
|
668
686
|
partition_scheme: str = "ddi_intent_field",
|
|
@@ -671,6 +689,7 @@ def convert_and_write_partition(
|
|
|
671
689
|
pointing_chunksize: Union[Dict, float, None] = None,
|
|
672
690
|
pointing_interpolate: bool = False,
|
|
673
691
|
ephemeris_interpolate: bool = False,
|
|
692
|
+
phase_cal_interpolate: bool = False,
|
|
674
693
|
compressor: numcodecs.abc.Codec = numcodecs.Zstd(level=2),
|
|
675
694
|
storage_backend="zarr",
|
|
676
695
|
overwrite: bool = False,
|
|
@@ -775,8 +794,19 @@ def convert_and_write_partition(
|
|
|
775
794
|
else:
|
|
776
795
|
interval = interval_unique[0]
|
|
777
796
|
|
|
797
|
+
scan_id = np.full(time_baseline_shape, -42, dtype=int)
|
|
798
|
+
scan_id[tidxs, bidxs] = tb_tool.getcol("SCAN_NUMBER")
|
|
799
|
+
scan_id = np.max(scan_id, axis=1)
|
|
800
|
+
|
|
778
801
|
xds = create_coordinates(
|
|
779
|
-
xds,
|
|
802
|
+
xds,
|
|
803
|
+
in_file,
|
|
804
|
+
ddi,
|
|
805
|
+
utime,
|
|
806
|
+
interval,
|
|
807
|
+
baseline_ant1_id,
|
|
808
|
+
baseline_ant2_id,
|
|
809
|
+
scan_id,
|
|
780
810
|
)
|
|
781
811
|
logger.debug("Time create coordinates " + str(time.time() - start))
|
|
782
812
|
|
|
@@ -792,9 +822,11 @@ def convert_and_write_partition(
|
|
|
792
822
|
use_table_iter,
|
|
793
823
|
)
|
|
794
824
|
|
|
795
|
-
# Add data_groups
|
|
825
|
+
# Add data_groups
|
|
796
826
|
xds, is_single_dish = add_data_groups(xds)
|
|
797
827
|
|
|
828
|
+
xds = add_missing_data_var_attrs(xds)
|
|
829
|
+
|
|
798
830
|
if (
|
|
799
831
|
"WEIGHT" not in xds.data_vars
|
|
800
832
|
): # Some single dish datasets don't have WEIGHT.
|
|
@@ -811,6 +843,9 @@ def convert_and_write_partition(
|
|
|
811
843
|
|
|
812
844
|
logger.debug("Time create data variables " + str(time.time() - start))
|
|
813
845
|
|
|
846
|
+
# To constrain the time range to load (in pointing, ephemerides, phase_cal data_vars)
|
|
847
|
+
time_min_max = find_min_max_times(tb_tool, taql_where)
|
|
848
|
+
|
|
814
849
|
# Create ant_xds
|
|
815
850
|
start = time.time()
|
|
816
851
|
feed_id = unique_1d(
|
|
@@ -826,17 +861,26 @@ def convert_and_write_partition(
|
|
|
826
861
|
[xds["baseline_antenna1_id"].data, xds["baseline_antenna2_id"].data]
|
|
827
862
|
)
|
|
828
863
|
)
|
|
864
|
+
if phase_cal_interpolate:
|
|
865
|
+
phase_cal_interp_time = xds.time.values
|
|
866
|
+
else:
|
|
867
|
+
phase_cal_interp_time = None
|
|
829
868
|
|
|
830
|
-
ant_xds =
|
|
869
|
+
ant_xds = create_antenna_xds(
|
|
831
870
|
in_file,
|
|
832
871
|
xds.frequency.attrs["spectral_window_id"],
|
|
833
872
|
antenna_id,
|
|
834
873
|
feed_id,
|
|
835
874
|
telescope_name,
|
|
875
|
+
time_min_max,
|
|
876
|
+
phase_cal_interp_time,
|
|
836
877
|
)
|
|
837
878
|
|
|
838
879
|
# Change antenna_ids to antenna_names
|
|
839
880
|
xds = antenna_ids_to_names(xds, ant_xds)
|
|
881
|
+
ant_xds = ant_xds.drop_vars(
|
|
882
|
+
"antenna_id"
|
|
883
|
+
) # No longer needed after converting to name.
|
|
840
884
|
|
|
841
885
|
logger.debug("Time ant xds " + str(time.time() - start))
|
|
842
886
|
|
|
@@ -845,9 +889,7 @@ def convert_and_write_partition(
|
|
|
845
889
|
weather_xds = create_weather_xds(in_file)
|
|
846
890
|
logger.debug("Time weather " + str(time.time() - start))
|
|
847
891
|
|
|
848
|
-
#
|
|
849
|
-
time_min_max = find_min_max_times(tb_tool, taql_where)
|
|
850
|
-
|
|
892
|
+
# Create pointing_xds
|
|
851
893
|
if with_pointing:
|
|
852
894
|
start = time.time()
|
|
853
895
|
if pointing_interpolate:
|
|
@@ -869,6 +911,7 @@ def convert_and_write_partition(
|
|
|
869
911
|
)
|
|
870
912
|
|
|
871
913
|
start = time.time()
|
|
914
|
+
xds.attrs["type"] = "visibility"
|
|
872
915
|
|
|
873
916
|
# Time and frequency should always be increasing
|
|
874
917
|
if len(xds.frequency) > 1 and xds.frequency[1] - xds.frequency[0] < 0:
|
|
@@ -884,10 +927,6 @@ def convert_and_write_partition(
|
|
|
884
927
|
else:
|
|
885
928
|
ephemeris_interp_time = None
|
|
886
929
|
|
|
887
|
-
scan_id = np.full(time_baseline_shape, -42, dtype=int)
|
|
888
|
-
scan_id[tidxs, bidxs] = tb_tool.getcol("SCAN_NUMBER")
|
|
889
|
-
scan_id = np.max(scan_id, axis=1)
|
|
890
|
-
|
|
891
930
|
if "FIELD_ID" not in partition_scheme:
|
|
892
931
|
field_id = np.full(time_baseline_shape, -42, dtype=int)
|
|
893
932
|
field_id[tidxs, bidxs] = tb_tool.getcol("FIELD_ID")
|
|
@@ -901,7 +940,7 @@ def convert_and_write_partition(
|
|
|
901
940
|
# assert len(col_unique) == 1, col_name + " is not consistent."
|
|
902
941
|
# return col_unique[0]
|
|
903
942
|
|
|
904
|
-
field_and_source_xds, source_id = create_field_and_source_xds(
|
|
943
|
+
field_and_source_xds, source_id, num_lines = create_field_and_source_xds(
|
|
905
944
|
in_file,
|
|
906
945
|
field_id,
|
|
907
946
|
xds.frequency.attrs["spectral_window_id"],
|
|
@@ -935,11 +974,20 @@ def convert_and_write_partition(
|
|
|
935
974
|
|
|
936
975
|
file_name = os.path.join(
|
|
937
976
|
out_file,
|
|
938
|
-
|
|
977
|
+
pathlib.Path(out_file)
|
|
978
|
+
.name.replace(".vis.zarr", "")
|
|
979
|
+
.replace(".zarr", "")
|
|
939
980
|
+ "_"
|
|
940
981
|
+ str(ms_v4_id),
|
|
941
982
|
)
|
|
942
983
|
|
|
984
|
+
if "line_name" in field_and_source_xds.coords:
|
|
985
|
+
line_name = to_list(
|
|
986
|
+
unique_1d(np.ravel(field_and_source_xds.line_name.values))
|
|
987
|
+
)
|
|
988
|
+
else:
|
|
989
|
+
line_name = []
|
|
990
|
+
|
|
943
991
|
xds.attrs["partition_info"] = {
|
|
944
992
|
# "spectral_window_id": xds.frequency.attrs["spectral_window_id"],
|
|
945
993
|
"spectral_window_name": xds.frequency.attrs["spectral_window_name"],
|
|
@@ -948,11 +996,14 @@ def convert_and_write_partition(
|
|
|
948
996
|
np.unique(field_and_source_xds.field_name.values)
|
|
949
997
|
),
|
|
950
998
|
# "source_id": to_list(unique_1d(source_id)),
|
|
999
|
+
"line_name": line_name,
|
|
1000
|
+
"scan_number": to_list(np.unique(scan_id)),
|
|
951
1001
|
"source_name": to_list(
|
|
952
1002
|
np.unique(field_and_source_xds.source_name.values)
|
|
953
1003
|
),
|
|
954
1004
|
"polarization_setup": to_list(xds.polarization.values),
|
|
955
|
-
"
|
|
1005
|
+
"num_lines": num_lines,
|
|
1006
|
+
"obs_mode": obs_mode.split(","),
|
|
956
1007
|
"taql": taql_where,
|
|
957
1008
|
}
|
|
958
1009
|
|
|
@@ -968,7 +1019,7 @@ def convert_and_write_partition(
|
|
|
968
1019
|
mode=mode,
|
|
969
1020
|
)
|
|
970
1021
|
|
|
971
|
-
if with_pointing:
|
|
1022
|
+
if with_pointing and len(pointing_xds.data_vars) > 1:
|
|
972
1023
|
pointing_xds.to_zarr(
|
|
973
1024
|
store=os.path.join(file_name, "POINTING"), mode=mode
|
|
974
1025
|
)
|
|
@@ -987,69 +1038,26 @@ def convert_and_write_partition(
|
|
|
987
1038
|
|
|
988
1039
|
|
|
989
1040
|
def antenna_ids_to_names(xds, ant_xds):
|
|
1041
|
+
ant_xds = ant_xds.set_xindex(
|
|
1042
|
+
"antenna_id"
|
|
1043
|
+
) # Allows for non-dimension coordinate selection.
|
|
990
1044
|
|
|
991
|
-
if
|
|
992
|
-
|
|
993
|
-
|
|
994
|
-
|
|
995
|
-
|
|
996
|
-
|
|
997
|
-
|
|
998
|
-
|
|
999
|
-
|
|
1000
|
-
|
|
1001
|
-
|
|
1002
|
-
|
|
1003
|
-
|
|
1004
|
-
|
|
1005
|
-
|
|
1006
|
-
|
|
1007
|
-
ant_xds["name"].sel(antenna_id=xds["baseline_antenna2_id"]).values, "_"
|
|
1008
|
-
)
|
|
1009
|
-
baseline_ant2_name = np.core.defchararray.add(
|
|
1010
|
-
baseline_ant2_name,
|
|
1011
|
-
ant_xds["station"].sel(antenna_id=xds["baseline_antenna2_id"]).values,
|
|
1012
|
-
)
|
|
1013
|
-
|
|
1014
|
-
xds["baseline_antenna1_id"] = xr.DataArray(
|
|
1015
|
-
baseline_ant1_name, dims="baseline_id"
|
|
1016
|
-
)
|
|
1017
|
-
xds["baseline_antenna2_id"] = xr.DataArray(
|
|
1018
|
-
baseline_ant2_name, dims="baseline_id"
|
|
1019
|
-
)
|
|
1020
|
-
xds = xds.rename(
|
|
1021
|
-
{
|
|
1022
|
-
"baseline_antenna1_id": "baseline_antenna1_name",
|
|
1023
|
-
"baseline_antenna2_id": "baseline_antenna2_name",
|
|
1024
|
-
}
|
|
1025
|
-
)
|
|
1026
|
-
else: # Single Dish
|
|
1027
|
-
antenna_name = np.core.defchararray.add(
|
|
1028
|
-
ant_xds["name"].sel(antenna_id=xds["antenna_id"]).values, "_"
|
|
1029
|
-
)
|
|
1030
|
-
antenna_name = np.core.defchararray.add(
|
|
1031
|
-
antenna_name,
|
|
1032
|
-
ant_xds["station"].sel(antenna_id=xds["antenna_id"]).values,
|
|
1033
|
-
)
|
|
1034
|
-
xds["antenna_id"] = xr.DataArray(antenna_name, dims="baseline_id")
|
|
1035
|
-
xds = xds.rename({"antenna_id": "antenna_name"})
|
|
1036
|
-
else:
|
|
1037
|
-
if "baseline_antenna1_id" in xds: # Interferometer
|
|
1038
|
-
xds["baseline_antenna1_id"] = ant_xds["name"].sel(
|
|
1039
|
-
antenna_id=xds["baseline_antenna1_id"]
|
|
1040
|
-
)
|
|
1041
|
-
xds["baseline_antenna2_id"] = ant_xds["name"].sel(
|
|
1042
|
-
antenna_id=xds["baseline_antenna2_id"]
|
|
1043
|
-
)
|
|
1044
|
-
xds = xds.rename(
|
|
1045
|
-
{
|
|
1046
|
-
"baseline_antenna1_id": "baseline_antenna1_name",
|
|
1047
|
-
"baseline_antenna2_id": "baseline_antenna2_name",
|
|
1048
|
-
}
|
|
1049
|
-
)
|
|
1050
|
-
else: # Single Dish
|
|
1051
|
-
xds["antenna_id"] = ant_xds["name"].sel(antenna_id=xds["antenna_id"])
|
|
1052
|
-
xds = xds.rename({"antenna_id": "antenna_name"})
|
|
1045
|
+
if "baseline_antenna1_id" in xds: # Interferometer
|
|
1046
|
+
xds["baseline_antenna1_id"] = ant_xds["antenna_name"].sel(
|
|
1047
|
+
antenna_id=xds["baseline_antenna1_id"]
|
|
1048
|
+
)
|
|
1049
|
+
xds["baseline_antenna2_id"] = ant_xds["antenna_name"].sel(
|
|
1050
|
+
antenna_id=xds["baseline_antenna2_id"]
|
|
1051
|
+
)
|
|
1052
|
+
xds = xds.rename(
|
|
1053
|
+
{
|
|
1054
|
+
"baseline_antenna1_id": "baseline_antenna1_name",
|
|
1055
|
+
"baseline_antenna2_id": "baseline_antenna2_name",
|
|
1056
|
+
}
|
|
1057
|
+
)
|
|
1058
|
+
else: # Single Dish
|
|
1059
|
+
xds["antenna_id"] = ant_xds["antenna_name"].sel(antenna_id=xds["antenna_id"])
|
|
1060
|
+
xds = xds.rename({"antenna_id": "antenna_name"})
|
|
1053
1061
|
|
|
1054
1062
|
return xds
|
|
1055
1063
|
|