xradio 0.0.33__py3-none-any.whl → 0.0.36__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- xradio/_utils/list_and_array.py +3 -1
- xradio/_utils/schema.py +190 -0
- xradio/_utils/zarr/common.py +11 -5
- xradio/image/_util/_zarr/xds_from_zarr.py +15 -2
- xradio/image/_util/_zarr/zarr_low_level.py +65 -14
- xradio/schema/bases.py +37 -8
- xradio/schema/check.py +15 -3
- xradio/schema/dataclass.py +2 -2
- xradio/vis/_processing_set.py +136 -10
- xradio/vis/_vis_utils/_ms/_tables/read.py +9 -0
- xradio/vis/_vis_utils/_ms/conversion.py +166 -116
- xradio/vis/_vis_utils/_ms/create_antenna_xds.py +479 -0
- xradio/vis/_vis_utils/_ms/create_field_and_source_xds.py +84 -42
- xradio/vis/_vis_utils/_ms/msv2_to_msv4_meta.py +1 -105
- xradio/vis/_vis_utils/_ms/msv4_sub_xdss.py +4 -224
- xradio/vis/_vis_utils/_utils/xds_helper.py +10 -2
- xradio/vis/convert_msv2_to_processing_set.py +6 -1
- xradio/vis/load_processing_set.py +2 -2
- xradio/vis/read_processing_set.py +5 -2
- xradio/vis/schema.py +348 -112
- {xradio-0.0.33.dist-info → xradio-0.0.36.dist-info}/METADATA +1 -1
- {xradio-0.0.33.dist-info → xradio-0.0.36.dist-info}/RECORD +25 -23
- {xradio-0.0.33.dist-info → xradio-0.0.36.dist-info}/WHEEL +1 -1
- {xradio-0.0.33.dist-info → xradio-0.0.36.dist-info}/LICENSE.txt +0 -0
- {xradio-0.0.33.dist-info → xradio-0.0.36.dist-info}/top_level.txt +0 -0
xradio/vis/_processing_set.py
CHANGED
|
@@ -1,19 +1,57 @@
|
|
|
1
1
|
import pandas as pd
|
|
2
|
+
from xradio._utils.list_and_array import to_list
|
|
3
|
+
import numbers
|
|
2
4
|
|
|
3
5
|
|
|
4
6
|
class processing_set(dict):
|
|
7
|
+
"""
|
|
8
|
+
A dictionary subclass representing a Processing Set (PS) that is a set of Measurement Sets v4 (MS).
|
|
9
|
+
|
|
10
|
+
This class extends the built-in `dict` class and provides additional methods for manipulating and selecting subsets of the Processing Set.
|
|
11
|
+
|
|
12
|
+
Attributes:
|
|
13
|
+
meta (dict): A dictionary containing metadata information about the Processing Set.
|
|
14
|
+
|
|
15
|
+
Methods:
|
|
16
|
+
summary(data_group="base"): Returns a summary of the Processing Set as a Pandas table.
|
|
17
|
+
get_ps_max_dims(): Returns the maximum dimension of all the MSs in the Processing Set.
|
|
18
|
+
get_ps_freq_axis(): Combines the frequency axis of all MSs.
|
|
19
|
+
sel(query:str=None, **kwargs): Selects a subset of the Processing Set based on column names and values or a Pandas query.
|
|
20
|
+
ms_sel(**kwargs): Selects a subset of the Processing Set by applying the `sel` method to each individual MS.
|
|
21
|
+
ms_isel(**kwargs): Selects a subset of the Processing Set by applying the `isel` method to each individual MS.
|
|
22
|
+
"""
|
|
23
|
+
|
|
5
24
|
def __init__(self, *args, **kwargs):
|
|
6
25
|
super().__init__(*args, **kwargs)
|
|
7
26
|
self.meta = {"summary": {}}
|
|
8
27
|
|
|
9
28
|
def summary(self, data_group="base"):
|
|
29
|
+
"""
|
|
30
|
+
Returns a summary of the Processing Set as a Pandas table.
|
|
31
|
+
|
|
32
|
+
Args:
|
|
33
|
+
data_group (str): The data group to summarize. Default is "base".
|
|
34
|
+
|
|
35
|
+
Returns:
|
|
36
|
+
pandas.DataFrame: A DataFrame containing the summary information.
|
|
37
|
+
"""
|
|
10
38
|
if data_group in self.meta["summary"]:
|
|
11
39
|
return self.meta["summary"][data_group]
|
|
12
40
|
else:
|
|
13
|
-
self.meta["summary"][data_group] = self._summary(data_group)
|
|
41
|
+
self.meta["summary"][data_group] = self._summary(data_group).sort_values(
|
|
42
|
+
by=["name"], ascending=True
|
|
43
|
+
)
|
|
14
44
|
return self.meta["summary"][data_group]
|
|
15
45
|
|
|
16
46
|
def get_ps_max_dims(self):
|
|
47
|
+
"""
|
|
48
|
+
Returns the maximum dimension of all the MSs in the Processing Set.
|
|
49
|
+
|
|
50
|
+
For example, if the Processing Set contains two MSs with dimensions (50, 20, 30) and (10, 30, 40), the maximum dimensions will be (50, 30, 40).
|
|
51
|
+
|
|
52
|
+
Returns:
|
|
53
|
+
dict: A dictionary containing the maximum dimensions of the Processing Set.
|
|
54
|
+
"""
|
|
17
55
|
if "max_dims" in self.meta:
|
|
18
56
|
return self.meta["max_dims"]
|
|
19
57
|
else:
|
|
@@ -21,6 +59,12 @@ class processing_set(dict):
|
|
|
21
59
|
return self.meta["max_dims"]
|
|
22
60
|
|
|
23
61
|
def get_ps_freq_axis(self):
|
|
62
|
+
"""
|
|
63
|
+
Combines the frequency axis of all MSs.
|
|
64
|
+
|
|
65
|
+
Returns:
|
|
66
|
+
xarray.DataArray: The frequency axis of the Processing Set.
|
|
67
|
+
"""
|
|
24
68
|
if "freq_axis" in self.meta:
|
|
25
69
|
return self.meta["freq_axis"]
|
|
26
70
|
else:
|
|
@@ -33,11 +77,14 @@ class processing_set(dict):
|
|
|
33
77
|
"obs_mode": [],
|
|
34
78
|
"shape": [],
|
|
35
79
|
"polarization": [],
|
|
80
|
+
"scan_number": [],
|
|
36
81
|
"spw_name": [],
|
|
37
82
|
# "field_id": [],
|
|
38
83
|
"field_name": [],
|
|
39
84
|
# "source_id": [],
|
|
40
85
|
"source_name": [],
|
|
86
|
+
# "num_lines": [],
|
|
87
|
+
"line_name": [],
|
|
41
88
|
"field_coords": [],
|
|
42
89
|
"start_frequency": [],
|
|
43
90
|
"end_frequency": [],
|
|
@@ -52,6 +99,9 @@ class processing_set(dict):
|
|
|
52
99
|
value.attrs["partition_info"]["spectral_window_name"]
|
|
53
100
|
)
|
|
54
101
|
summary_data["polarization"].append(value.polarization.values)
|
|
102
|
+
summary_data["scan_number"].append(
|
|
103
|
+
value.attrs["partition_info"]["scan_number"]
|
|
104
|
+
)
|
|
55
105
|
|
|
56
106
|
if "visibility" in value.attrs["data_groups"][data_group]:
|
|
57
107
|
data_name = value.attrs["data_groups"][data_group]["visibility"]
|
|
@@ -72,8 +122,14 @@ class processing_set(dict):
|
|
|
72
122
|
summary_data["source_name"].append(
|
|
73
123
|
value.attrs["partition_info"]["source_name"]
|
|
74
124
|
)
|
|
75
|
-
|
|
76
|
-
summary_data["
|
|
125
|
+
|
|
126
|
+
summary_data["line_name"].append(value.attrs["partition_info"]["line_name"])
|
|
127
|
+
|
|
128
|
+
# summary_data["num_lines"].append(value.attrs["partition_info"]["num_lines"])
|
|
129
|
+
summary_data["start_frequency"].append(
|
|
130
|
+
to_list(value["frequency"].values)[0]
|
|
131
|
+
)
|
|
132
|
+
summary_data["end_frequency"].append(to_list(value["frequency"].values)[-1])
|
|
77
133
|
|
|
78
134
|
if value[data_name].attrs["field_and_source_xds"].is_ephemeris:
|
|
79
135
|
summary_data["field_coords"].append("Ephemeris")
|
|
@@ -117,7 +173,7 @@ class processing_set(dict):
|
|
|
117
173
|
for ms_xds in self.values():
|
|
118
174
|
assert (
|
|
119
175
|
frame == ms_xds.frequency.attrs["frame"]
|
|
120
|
-
), "Frequency reference frame not consistent in
|
|
176
|
+
), "Frequency reference frame not consistent in Processing Set."
|
|
121
177
|
if ms_xds.frequency.attrs["spectral_window_id"] not in spw_ids:
|
|
122
178
|
spw_ids.append(ms_xds.frequency.attrs["spectral_window_id"])
|
|
123
179
|
freq_axis_list.append(ms_xds.frequency)
|
|
@@ -142,19 +198,71 @@ class processing_set(dict):
|
|
|
142
198
|
def get(self, id):
|
|
143
199
|
return self[list(self.keys())[id]]
|
|
144
200
|
|
|
145
|
-
def sel(self, **kwargs):
|
|
201
|
+
def sel(self, string_exact_match: bool = True, query: str = None, **kwargs):
|
|
202
|
+
"""
|
|
203
|
+
Selects a subset of the Processing Set based on column names and values or a Pandas query.
|
|
204
|
+
|
|
205
|
+
The following columns are supported: name, obs_mode, polarization, spw_name, field_name, source_name, field_coords, start_frequency, end_frequency.
|
|
206
|
+
|
|
207
|
+
This function will not apply any selection on the MS data so data will not be dropped for example if a MS has field_name=['field_0','field_10','field_08'] and ps.sel(field_name='field_0') is done the resulting MS will still have field_name=['field_0','field_10','field_08'].
|
|
208
|
+
|
|
209
|
+
Examples:
|
|
210
|
+
ps.sel(obs_mode='OBSERVE_TARGET#ON_SOURCE', polarization=['RR', 'LL']) # Select all MSs with obs_mode 'OBSERVE_TARGET#ON_SOURCE' and polarization 'RR' or 'LL'.
|
|
211
|
+
ps.sel(query='start_frequency > 100e9 AND end_frequency < 200e9') # Select all MSs with start_frequency greater than 100 GHz and less than 200 GHz.
|
|
212
|
+
|
|
213
|
+
Args:
|
|
214
|
+
query (str): A Pandas query string. Default is None.
|
|
215
|
+
string_exact_match (bool): If True, the selection will be an exact match for string and string list columns. Default is True.
|
|
216
|
+
**kwargs: Keyword arguments representing column names and values to filter the Processing Set.
|
|
217
|
+
|
|
218
|
+
Returns:
|
|
219
|
+
processing_set: The subset of the Processing Set.
|
|
220
|
+
"""
|
|
146
221
|
import numpy as np
|
|
147
222
|
|
|
223
|
+
# def select_rows(df, col, input_strings):
|
|
224
|
+
# return df[df[col].apply(lambda x: any(i in x for i in input_strings))]
|
|
225
|
+
|
|
226
|
+
# def select_rows(df, col, sel, string_exact_match):
|
|
227
|
+
# def check_selection(row_val):
|
|
228
|
+
# if isinstance(row_val, numbers.Number) or string_exact_match:
|
|
229
|
+
# return any(i == row_val for i in sel) #If values are numbers
|
|
230
|
+
# return any(i in row_val for i in sel) #If values are strings
|
|
231
|
+
# return df[df[col].apply(check_selection)]
|
|
232
|
+
|
|
233
|
+
def select_rows(df, col, sel_vals, string_exact_match):
|
|
234
|
+
def check_selection(row_val):
|
|
235
|
+
row_val = to_list(
|
|
236
|
+
row_val
|
|
237
|
+
) # make sure that it is a list so that we can iterate over it.
|
|
238
|
+
|
|
239
|
+
for rw in row_val:
|
|
240
|
+
for s in sel_vals:
|
|
241
|
+
if string_exact_match:
|
|
242
|
+
if rw == s:
|
|
243
|
+
return True
|
|
244
|
+
else:
|
|
245
|
+
if s in rw:
|
|
246
|
+
return True
|
|
247
|
+
return False
|
|
248
|
+
|
|
249
|
+
return df[df[col].apply(check_selection)]
|
|
250
|
+
|
|
148
251
|
summary_table = self.summary()
|
|
149
252
|
for key, value in kwargs.items():
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
253
|
+
value = to_list(value) # make sure value is a list.
|
|
254
|
+
|
|
255
|
+
if len(value) == 1 and isinstance(value[0], slice):
|
|
153
256
|
summary_table = summary_table[
|
|
154
|
-
summary_table[key].between(value.start, value.stop)
|
|
257
|
+
summary_table[key].between(value[0].start, value[0].stop)
|
|
155
258
|
]
|
|
156
259
|
else:
|
|
157
|
-
summary_table =
|
|
260
|
+
summary_table = select_rows(
|
|
261
|
+
summary_table, key, value, string_exact_match
|
|
262
|
+
)
|
|
263
|
+
|
|
264
|
+
if query is not None:
|
|
265
|
+
summary_table = summary_table.query(query)
|
|
158
266
|
|
|
159
267
|
sub_ps = processing_set()
|
|
160
268
|
for key, val in self.items():
|
|
@@ -164,12 +272,30 @@ class processing_set(dict):
|
|
|
164
272
|
return sub_ps
|
|
165
273
|
|
|
166
274
|
def ms_sel(self, **kwargs):
|
|
275
|
+
"""
|
|
276
|
+
Selects a subset of the Processing Set by applying the `sel` method to each MS.
|
|
277
|
+
|
|
278
|
+
Args:
|
|
279
|
+
**kwargs: Keyword arguments representing column names and values to filter the Processing Set.
|
|
280
|
+
|
|
281
|
+
Returns:
|
|
282
|
+
processing_set: The subset of the Processing Set.
|
|
283
|
+
"""
|
|
167
284
|
sub_ps = processing_set()
|
|
168
285
|
for key, val in self.items():
|
|
169
286
|
sub_ps[key] = val.sel(kwargs)
|
|
170
287
|
return sub_ps
|
|
171
288
|
|
|
172
289
|
def ms_isel(self, **kwargs):
|
|
290
|
+
"""
|
|
291
|
+
Selects a subset of the Processing Set by applying the `isel` method to each MS.
|
|
292
|
+
|
|
293
|
+
Args:
|
|
294
|
+
**kwargs: Keyword arguments representing dimension names and indices to select from the Processing Set.
|
|
295
|
+
|
|
296
|
+
Returns:
|
|
297
|
+
processing_set: The subset of the Processing Set.
|
|
298
|
+
"""
|
|
173
299
|
sub_ps = processing_set()
|
|
174
300
|
for key, val in self.items():
|
|
175
301
|
sub_ps[key] = val.isel(kwargs)
|
|
@@ -455,6 +455,9 @@ def redimension_ms_subtable(xds: xr.Dataset, subt_name: str) -> xr.Dataset:
|
|
|
455
455
|
"SOURCE": ["SOURCE_ID", "TIME", "SPECTRAL_WINDOW_ID"],
|
|
456
456
|
"SYSCAL": ["ANTENNA_ID", "FEED_ID", "SPECTRAL_WINDOW_ID", "TIME"],
|
|
457
457
|
"WEATHER": ["ANTENNA_ID", "TIME"],
|
|
458
|
+
"PHASE_CAL": ["ANTENNA_ID", "TIME", "SPECTRAL_WINDOW_ID"],
|
|
459
|
+
"GAIN_CURVE": ["ANTENNA_ID", "TIME", "SPECTRAL_WINDOW_ID"],
|
|
460
|
+
"FEED": ["ANTENNA_ID", "SPECTRAL_WINDOW_ID"],
|
|
458
461
|
# added tables (MSv3 but not preent in MSv2). Build it from "EPHEMi_... tables
|
|
459
462
|
# Not clear what to do about 'time' var/dim: , "time"],
|
|
460
463
|
"EPHEMERIDES": ["ephemeris_row_id", "ephemeris_id"],
|
|
@@ -645,6 +648,9 @@ def load_generic_table(
|
|
|
645
648
|
"SOURCE",
|
|
646
649
|
"SYSCAL",
|
|
647
650
|
"WEATHER",
|
|
651
|
+
"PHASE_CAL",
|
|
652
|
+
"GAIN_CURVE",
|
|
653
|
+
"FEED",
|
|
648
654
|
]:
|
|
649
655
|
xds = redimension_ms_subtable(xds, tname)
|
|
650
656
|
|
|
@@ -944,6 +950,9 @@ def raw_col_data_to_coords_vars(
|
|
|
944
950
|
"SOURCE",
|
|
945
951
|
"SYSCAL",
|
|
946
952
|
"WEATHER",
|
|
953
|
+
"PHASE_CAL",
|
|
954
|
+
"GAIN_CURVE",
|
|
955
|
+
"FEED",
|
|
947
956
|
)
|
|
948
957
|
dim_prefix = "dim"
|
|
949
958
|
|