xradio 0.0.33__py3-none-any.whl → 0.0.36__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,19 +1,57 @@
1
1
  import pandas as pd
2
+ from xradio._utils.list_and_array import to_list
3
+ import numbers
2
4
 
3
5
 
4
6
  class processing_set(dict):
7
+ """
8
+ A dictionary subclass representing a Processing Set (PS) that is a set of Measurement Sets v4 (MS).
9
+
10
+ This class extends the built-in `dict` class and provides additional methods for manipulating and selecting subsets of the Processing Set.
11
+
12
+ Attributes:
13
+ meta (dict): A dictionary containing metadata information about the Processing Set.
14
+
15
+ Methods:
16
+ summary(data_group="base"): Returns a summary of the Processing Set as a Pandas table.
17
+ get_ps_max_dims(): Returns the maximum dimension of all the MSs in the Processing Set.
18
+ get_ps_freq_axis(): Combines the frequency axis of all MSs.
19
+ sel(query:str=None, **kwargs): Selects a subset of the Processing Set based on column names and values or a Pandas query.
20
+ ms_sel(**kwargs): Selects a subset of the Processing Set by applying the `sel` method to each individual MS.
21
+ ms_isel(**kwargs): Selects a subset of the Processing Set by applying the `isel` method to each individual MS.
22
+ """
23
+
5
24
  def __init__(self, *args, **kwargs):
6
25
  super().__init__(*args, **kwargs)
7
26
  self.meta = {"summary": {}}
8
27
 
9
28
  def summary(self, data_group="base"):
29
+ """
30
+ Returns a summary of the Processing Set as a Pandas table.
31
+
32
+ Args:
33
+ data_group (str): The data group to summarize. Default is "base".
34
+
35
+ Returns:
36
+ pandas.DataFrame: A DataFrame containing the summary information.
37
+ """
10
38
  if data_group in self.meta["summary"]:
11
39
  return self.meta["summary"][data_group]
12
40
  else:
13
- self.meta["summary"][data_group] = self._summary(data_group)
41
+ self.meta["summary"][data_group] = self._summary(data_group).sort_values(
42
+ by=["name"], ascending=True
43
+ )
14
44
  return self.meta["summary"][data_group]
15
45
 
16
46
  def get_ps_max_dims(self):
47
+ """
48
+ Returns the maximum dimension of all the MSs in the Processing Set.
49
+
50
+ For example, if the Processing Set contains two MSs with dimensions (50, 20, 30) and (10, 30, 40), the maximum dimensions will be (50, 30, 40).
51
+
52
+ Returns:
53
+ dict: A dictionary containing the maximum dimensions of the Processing Set.
54
+ """
17
55
  if "max_dims" in self.meta:
18
56
  return self.meta["max_dims"]
19
57
  else:
@@ -21,6 +59,12 @@ class processing_set(dict):
21
59
  return self.meta["max_dims"]
22
60
 
23
61
  def get_ps_freq_axis(self):
62
+ """
63
+ Combines the frequency axis of all MSs.
64
+
65
+ Returns:
66
+ xarray.DataArray: The frequency axis of the Processing Set.
67
+ """
24
68
  if "freq_axis" in self.meta:
25
69
  return self.meta["freq_axis"]
26
70
  else:
@@ -33,11 +77,14 @@ class processing_set(dict):
33
77
  "obs_mode": [],
34
78
  "shape": [],
35
79
  "polarization": [],
80
+ "scan_number": [],
36
81
  "spw_name": [],
37
82
  # "field_id": [],
38
83
  "field_name": [],
39
84
  # "source_id": [],
40
85
  "source_name": [],
86
+ # "num_lines": [],
87
+ "line_name": [],
41
88
  "field_coords": [],
42
89
  "start_frequency": [],
43
90
  "end_frequency": [],
@@ -52,6 +99,9 @@ class processing_set(dict):
52
99
  value.attrs["partition_info"]["spectral_window_name"]
53
100
  )
54
101
  summary_data["polarization"].append(value.polarization.values)
102
+ summary_data["scan_number"].append(
103
+ value.attrs["partition_info"]["scan_number"]
104
+ )
55
105
 
56
106
  if "visibility" in value.attrs["data_groups"][data_group]:
57
107
  data_name = value.attrs["data_groups"][data_group]["visibility"]
@@ -72,8 +122,14 @@ class processing_set(dict):
72
122
  summary_data["source_name"].append(
73
123
  value.attrs["partition_info"]["source_name"]
74
124
  )
75
- summary_data["start_frequency"].append(value["frequency"].values[0])
76
- summary_data["end_frequency"].append(value["frequency"].values[-1])
125
+
126
+ summary_data["line_name"].append(value.attrs["partition_info"]["line_name"])
127
+
128
+ # summary_data["num_lines"].append(value.attrs["partition_info"]["num_lines"])
129
+ summary_data["start_frequency"].append(
130
+ to_list(value["frequency"].values)[0]
131
+ )
132
+ summary_data["end_frequency"].append(to_list(value["frequency"].values)[-1])
77
133
 
78
134
  if value[data_name].attrs["field_and_source_xds"].is_ephemeris:
79
135
  summary_data["field_coords"].append("Ephemeris")
@@ -117,7 +173,7 @@ class processing_set(dict):
117
173
  for ms_xds in self.values():
118
174
  assert (
119
175
  frame == ms_xds.frequency.attrs["frame"]
120
- ), "Frequency reference frame not consistent in processing set."
176
+ ), "Frequency reference frame not consistent in Processing Set."
121
177
  if ms_xds.frequency.attrs["spectral_window_id"] not in spw_ids:
122
178
  spw_ids.append(ms_xds.frequency.attrs["spectral_window_id"])
123
179
  freq_axis_list.append(ms_xds.frequency)
@@ -142,19 +198,71 @@ class processing_set(dict):
142
198
  def get(self, id):
143
199
  return self[list(self.keys())[id]]
144
200
 
145
- def sel(self, **kwargs):
201
+ def sel(self, string_exact_match: bool = True, query: str = None, **kwargs):
202
+ """
203
+ Selects a subset of the Processing Set based on column names and values or a Pandas query.
204
+
205
+ The following columns are supported: name, obs_mode, polarization, spw_name, field_name, source_name, field_coords, start_frequency, end_frequency.
206
+
207
+ This function will not apply any selection on the MS data so data will not be dropped for example if a MS has field_name=['field_0','field_10','field_08'] and ps.sel(field_name='field_0') is done the resulting MS will still have field_name=['field_0','field_10','field_08'].
208
+
209
+ Examples:
210
+ ps.sel(obs_mode='OBSERVE_TARGET#ON_SOURCE', polarization=['RR', 'LL']) # Select all MSs with obs_mode 'OBSERVE_TARGET#ON_SOURCE' and polarization 'RR' or 'LL'.
211
+ ps.sel(query='start_frequency > 100e9 AND end_frequency < 200e9') # Select all MSs with start_frequency greater than 100 GHz and less than 200 GHz.
212
+
213
+ Args:
214
+ query (str): A Pandas query string. Default is None.
215
+ string_exact_match (bool): If True, the selection will be an exact match for string and string list columns. Default is True.
216
+ **kwargs: Keyword arguments representing column names and values to filter the Processing Set.
217
+
218
+ Returns:
219
+ processing_set: The subset of the Processing Set.
220
+ """
146
221
  import numpy as np
147
222
 
223
+ # def select_rows(df, col, input_strings):
224
+ # return df[df[col].apply(lambda x: any(i in x for i in input_strings))]
225
+
226
+ # def select_rows(df, col, sel, string_exact_match):
227
+ # def check_selection(row_val):
228
+ # if isinstance(row_val, numbers.Number) or string_exact_match:
229
+ # return any(i == row_val for i in sel) #If values are numbers
230
+ # return any(i in row_val for i in sel) #If values are strings
231
+ # return df[df[col].apply(check_selection)]
232
+
233
+ def select_rows(df, col, sel_vals, string_exact_match):
234
+ def check_selection(row_val):
235
+ row_val = to_list(
236
+ row_val
237
+ ) # make sure that it is a list so that we can iterate over it.
238
+
239
+ for rw in row_val:
240
+ for s in sel_vals:
241
+ if string_exact_match:
242
+ if rw == s:
243
+ return True
244
+ else:
245
+ if s in rw:
246
+ return True
247
+ return False
248
+
249
+ return df[df[col].apply(check_selection)]
250
+
148
251
  summary_table = self.summary()
149
252
  for key, value in kwargs.items():
150
- if isinstance(value, list) or isinstance(value, np.ndarray):
151
- summary_table = summary_table[summary_table[key].isin(value)]
152
- elif isinstance(value, slice):
253
+ value = to_list(value) # make sure value is a list.
254
+
255
+ if len(value) == 1 and isinstance(value[0], slice):
153
256
  summary_table = summary_table[
154
- summary_table[key].between(value.start, value.stop)
257
+ summary_table[key].between(value[0].start, value[0].stop)
155
258
  ]
156
259
  else:
157
- summary_table = summary_table[summary_table[key] == value]
260
+ summary_table = select_rows(
261
+ summary_table, key, value, string_exact_match
262
+ )
263
+
264
+ if query is not None:
265
+ summary_table = summary_table.query(query)
158
266
 
159
267
  sub_ps = processing_set()
160
268
  for key, val in self.items():
@@ -164,12 +272,30 @@ class processing_set(dict):
164
272
  return sub_ps
165
273
 
166
274
  def ms_sel(self, **kwargs):
275
+ """
276
+ Selects a subset of the Processing Set by applying the `sel` method to each MS.
277
+
278
+ Args:
279
+ **kwargs: Keyword arguments representing column names and values to filter the Processing Set.
280
+
281
+ Returns:
282
+ processing_set: The subset of the Processing Set.
283
+ """
167
284
  sub_ps = processing_set()
168
285
  for key, val in self.items():
169
286
  sub_ps[key] = val.sel(kwargs)
170
287
  return sub_ps
171
288
 
172
289
  def ms_isel(self, **kwargs):
290
+ """
291
+ Selects a subset of the Processing Set by applying the `isel` method to each MS.
292
+
293
+ Args:
294
+ **kwargs: Keyword arguments representing dimension names and indices to select from the Processing Set.
295
+
296
+ Returns:
297
+ processing_set: The subset of the Processing Set.
298
+ """
173
299
  sub_ps = processing_set()
174
300
  for key, val in self.items():
175
301
  sub_ps[key] = val.isel(kwargs)
@@ -455,6 +455,9 @@ def redimension_ms_subtable(xds: xr.Dataset, subt_name: str) -> xr.Dataset:
455
455
  "SOURCE": ["SOURCE_ID", "TIME", "SPECTRAL_WINDOW_ID"],
456
456
  "SYSCAL": ["ANTENNA_ID", "FEED_ID", "SPECTRAL_WINDOW_ID", "TIME"],
457
457
  "WEATHER": ["ANTENNA_ID", "TIME"],
458
+ "PHASE_CAL": ["ANTENNA_ID", "TIME", "SPECTRAL_WINDOW_ID"],
459
+ "GAIN_CURVE": ["ANTENNA_ID", "TIME", "SPECTRAL_WINDOW_ID"],
460
+ "FEED": ["ANTENNA_ID", "SPECTRAL_WINDOW_ID"],
458
461
  # added tables (MSv3 but not preent in MSv2). Build it from "EPHEMi_... tables
459
462
  # Not clear what to do about 'time' var/dim: , "time"],
460
463
  "EPHEMERIDES": ["ephemeris_row_id", "ephemeris_id"],
@@ -645,6 +648,9 @@ def load_generic_table(
645
648
  "SOURCE",
646
649
  "SYSCAL",
647
650
  "WEATHER",
651
+ "PHASE_CAL",
652
+ "GAIN_CURVE",
653
+ "FEED",
648
654
  ]:
649
655
  xds = redimension_ms_subtable(xds, tname)
650
656
 
@@ -944,6 +950,9 @@ def raw_col_data_to_coords_vars(
944
950
  "SOURCE",
945
951
  "SYSCAL",
946
952
  "WEATHER",
953
+ "PHASE_CAL",
954
+ "GAIN_CURVE",
955
+ "FEED",
947
956
  )
948
957
  dim_prefix = "dim"
949
958