xradio 0.0.60__py3-none-any.whl → 1.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,8 @@
1
1
  import itertools
2
+ import time
2
3
  import toolviper.utils.logger as logger
4
+ import os
5
+ import pandas as pd
3
6
 
4
7
  import numpy as np
5
8
 
@@ -17,6 +20,9 @@ def enumerated_product(*args):
17
20
  )
18
21
 
19
22
 
23
+ import pickle, gzip
24
+
25
+
20
26
  def create_partitions(in_file: str, partition_scheme: list) -> list[dict]:
21
27
  """Create a list of dictionaries with the partition information.
22
28
 
@@ -34,38 +40,53 @@ def create_partitions(in_file: str, partition_scheme: list) -> list[dict]:
34
40
  list
35
41
  list of dictionaries with the partition information.
36
42
  """
37
- # vla_otf (bool, optional): The partioning of VLA OTF (on the fly) mosaics needs a special partitioning scheme. Defaults to False.
38
43
 
39
- # Create partition table
40
- import pandas as pd
41
- import os
44
+ ### Test new implementation without
45
+ # Always start with these (if available); then extend with user scheme.
46
+ partition_scheme = [
47
+ "DATA_DESC_ID",
48
+ "OBS_MODE",
49
+ "OBSERVATION_ID",
50
+ "EPHEMERIS_ID",
51
+ ] + list(partition_scheme)
42
52
 
43
- partition_scheme = ["DATA_DESC_ID", "OBS_MODE", "OBSERVATION_ID"] + partition_scheme
53
+ # partition_scheme = ["DATA_DESC_ID", "OBS_MODE"] + list(
54
+ # partition_scheme
55
+ # )
44
56
 
45
- # Open MSv2 tables and add columns to partition table (par_df):
46
- par_df = pd.DataFrame()
57
+ t0 = time.time()
58
+ # --------- Load base columns from MAIN table ----------
47
59
  main_tb = tables.table(
48
60
  in_file, readonly=True, lockoptions={"option": "usernoread"}, ack=False
49
61
  )
50
- par_df["DATA_DESC_ID"] = main_tb.getcol("DATA_DESC_ID")
51
- par_df["FIELD_ID"] = main_tb.getcol("FIELD_ID")
52
- par_df["SCAN_NUMBER"] = main_tb.getcol("SCAN_NUMBER")
53
- par_df["STATE_ID"] = main_tb.getcol("STATE_ID")
54
- par_df["OBSERVATION_ID"] = main_tb.getcol("OBSERVATION_ID")
55
- par_df["ANTENNA1"] = main_tb.getcol("ANTENNA1")
56
- par_df = par_df.drop_duplicates()
57
62
 
63
+ # Build minimal DF once. Pull only columns we may need.
64
+ # Add columns here if you expect to aggregate them per-partition.
65
+ base_cols = {
66
+ "DATA_DESC_ID": main_tb.getcol("DATA_DESC_ID"),
67
+ "FIELD_ID": main_tb.getcol("FIELD_ID"),
68
+ "SCAN_NUMBER": main_tb.getcol("SCAN_NUMBER"),
69
+ "STATE_ID": main_tb.getcol("STATE_ID"),
70
+ "OBSERVATION_ID": main_tb.getcol("OBSERVATION_ID"),
71
+ "ANTENNA1": main_tb.getcol("ANTENNA1"),
72
+ }
73
+ par_df = pd.DataFrame(base_cols).drop_duplicates()
74
+ logger.debug(
75
+ f"Loaded MAIN columns in {time.time() - t0:.2f}s "
76
+ f"({len(par_df):,} unique MAIN rows)"
77
+ )
78
+
79
+ # --------- Optional SOURCE/STATE derived columns ----------
80
+ # SOURCE_ID (via FIELD table)
81
+ t1 = time.time()
82
+ source_id_added = False
58
83
  field_tb = tables.table(
59
84
  os.path.join(in_file, "FIELD"),
60
85
  readonly=True,
61
86
  lockoptions={"option": "usernoread"},
62
87
  ack=False,
63
88
  )
64
- # if vla_otf:
65
- # par_df["FIELD_NAME"] = np.array(field_tb.getcol("NAME"))[par_df["FIELD_ID"]]
66
-
67
- # Get source ids if available from source table.
68
- if table_exists(os.path.join(os.path.join(in_file, "SOURCE"))):
89
+ if table_exists(os.path.join(in_file, "SOURCE")):
69
90
  source_tb = tables.table(
70
91
  os.path.join(in_file, "SOURCE"),
71
92
  readonly=True,
@@ -73,13 +94,31 @@ def create_partitions(in_file: str, partition_scheme: list) -> list[dict]:
73
94
  ack=False,
74
95
  )
75
96
  if source_tb.nrows() != 0:
76
- par_df["SOURCE_ID"] = field_tb.getcol("SOURCE_ID")[par_df["FIELD_ID"]]
77
- # if vla_otf:
78
- # par_df["SOURCE_NAME"] = np.array(source_tb.getcol("NAME"))[
79
- # par_df["SOURCE_ID"]
80
- # ]
97
+ # Map SOURCE_ID via FIELD_ID
98
+ field_source = np.asarray(field_tb.getcol("SOURCE_ID"))
99
+ par_df["SOURCE_ID"] = field_source[par_df["FIELD_ID"]]
100
+ source_id_added = True
101
+ logger.debug(
102
+ f"SOURCE processing in {time.time() - t1:.2f}s "
103
+ f"(added SOURCE_ID={source_id_added})"
104
+ )
105
+
106
+ if "EPHEMERIS_ID" in field_tb.colnames():
107
+ ephemeris_id_added = False
108
+ if field_tb.nrows() != 0:
109
+ # Map EPHEMERIS_ID via FIELD_ID
110
+ field_ephemeris = np.asarray(field_tb.getcol("EPHEMERIS_ID"))
111
+ par_df["EPHEMERIS_ID"] = field_ephemeris[par_df["FIELD_ID"]]
112
+ ephemeris_id_added = True
113
+ logger.debug(
114
+ f"EPHEMERIS processing in {time.time() - t1:.2f}s "
115
+ f"(added EPHEMERIS_ID={ephemeris_id_added})"
116
+ )
81
117
 
82
- # Get intents and subscan numbers if available from state table.
118
+ # OBS_MODE & SUB_SCAN_NUMBER (via STATE table)
119
+ t2 = time.time()
120
+ obs_mode_added = False
121
+ sub_scan_added = False
83
122
  if table_exists(os.path.join(in_file, "STATE")):
84
123
  state_tb = tables.table(
85
124
  os.path.join(in_file, "STATE"),
@@ -88,30 +127,36 @@ def create_partitions(in_file: str, partition_scheme: list) -> list[dict]:
88
127
  ack=False,
89
128
  )
90
129
  if state_tb.nrows() != 0:
91
- # print('state_tb',state_tb.nrows(),state_tb)
92
- par_df["OBS_MODE"] = np.array(state_tb.getcol("OBS_MODE"))[
93
- par_df["STATE_ID"]
94
- ]
95
- par_df["SUB_SCAN_NUMBER"] = state_tb.getcol("SUB_SCAN")[par_df["STATE_ID"]]
130
+ state_obs_mode = np.asarray(state_tb.getcol("OBS_MODE"))
131
+ state_sub_scan = np.asarray(state_tb.getcol("SUB_SCAN"))
132
+ # Index by STATE_ID into STATE columns
133
+ par_df["OBS_MODE"] = state_obs_mode[par_df["STATE_ID"]]
134
+ par_df["SUB_SCAN_NUMBER"] = state_sub_scan[par_df["STATE_ID"]]
135
+ obs_mode_added = True
136
+ sub_scan_added = True
96
137
  else:
97
- par_df.drop(["STATE_ID"], axis=1)
138
+ # If STATE empty, drop STATE_ID (it cannot partition anything)
139
+ if "STATE_ID" in par_df.columns:
140
+ par_df.drop(columns=["STATE_ID"], inplace=True)
98
141
 
99
- # Check if all partition scheme criteria are present in the partition table.
100
- partition_scheme_updated = []
101
- partition_criteria = {}
102
- for par in partition_scheme:
103
- if par in par_df.columns:
104
- partition_criteria[par] = par_df[par].unique()
105
- partition_scheme_updated.append(par)
106
- logger.info(f"Partition scheme that will be used: {partition_scheme_updated}")
142
+ if "SUB_SCAN_NUMBER" in par_df.columns:
143
+ par_df.drop(columns=["SUB_SCAN_NUMBER"], inplace=True)
107
144
 
108
- # Make all possible combinations of the partition criteria.
109
- enumerated_partitions = enumerated_product(*list(partition_criteria.values()))
145
+ logger.debug(
146
+ f"STATE processing in {time.time() - t2:.2f}s "
147
+ f"(OBS_MODE={obs_mode_added}, SUB_SCAN_NUMBER={sub_scan_added})"
148
+ )
110
149
 
111
- # print('par_df',par_df)
150
+ # --------- Decide which partition keys are actually available ----------
151
+ t3 = time.time()
152
+ partition_scheme_updated = [k for k in partition_scheme if k in par_df.columns]
153
+ logger.info(f"Updated partition scheme used: {partition_scheme_updated}")
112
154
 
113
- # Create a list of dictionaries with the partition information. This will be used to query the MSv2 main table.
114
- partitions = []
155
+ # If none of the requested keys exist, there is a single partition of "everything"
156
+ if not partition_scheme_updated:
157
+ partition_scheme_updated = []
158
+
159
+ # These are the axes we report per partition (present => aggregate unique values)
115
160
  partition_axis_names = [
116
161
  "DATA_DESC_ID",
117
162
  "OBSERVATION_ID",
@@ -121,30 +166,172 @@ def create_partitions(in_file: str, partition_scheme: list) -> list[dict]:
121
166
  "SOURCE_ID",
122
167
  "OBS_MODE",
123
168
  "SUB_SCAN_NUMBER",
169
+ "EPHEMERIS_ID",
124
170
  ]
171
+ # Only include ANTENNA1 if user asked for it (keeps output size down)
125
172
  if "ANTENNA1" in partition_scheme:
126
173
  partition_axis_names.append("ANTENNA1")
127
174
 
128
- for idx, pair in enumerated_partitions:
129
- query = ""
130
- for i, par in enumerate(partition_scheme_updated):
131
- if isinstance(pair[i], str):
132
- query = query + f'{par} == "{pair[i]}" and '
175
+ # --------- Group only by realized partitions (no Cartesian product!) ----------
176
+ # observed=True speeds up if categorical; here it’s harmless. sort=False keeps source order.
177
+ if partition_scheme_updated:
178
+ grp = par_df.groupby(partition_scheme_updated, sort=False, observed=False)
179
+ groups_iter = grp
180
+ else:
181
+ # Single group: everything
182
+ groups_iter = [(None, par_df)]
183
+
184
+ partitions = []
185
+ # Fast aggregation: use NumPy for uniques to avoid pandas overhead in the tight loop.
186
+ for _, gdf in groups_iter:
187
+ part = {}
188
+ for name in partition_axis_names:
189
+ if name in gdf.columns:
190
+ # Return Python lists to match your prior structure (can be np.ndarray if preferred)
191
+ part[name] = np.unique(gdf[name].to_numpy()).tolist()
133
192
  else:
134
- query = query + f"{par} == {pair[i]} and "
135
- query = query[:-4] # remove last and
136
- sub_par_df = par_df.query(query).drop_duplicates()
193
+ part[name] = [None]
194
+ partitions.append(part)
137
195
 
138
- if sub_par_df.shape[0] != 0:
139
- partition_info = {}
196
+ logger.debug(
197
+ f"Partition build in {time.time() - t3:.2f}s; total {len(partitions):,} partitions"
198
+ )
199
+ logger.debug(f"Total create_partitions time: {time.time() - t0:.2f}s")
140
200
 
141
- # FIELD_NAME SOURCE_NAME
142
- for col_name in partition_axis_names:
143
- if col_name in sub_par_df.columns:
144
- partition_info[col_name] = sub_par_df[col_name].unique()
145
- else:
146
- partition_info[col_name] = [None]
201
+ # # with gzip.open("partition_original_small.pkl.gz", "wb") as f:
202
+ # # pickle.dump(partitions, f, protocol=pickle.HIGHEST_PROTOCOL)
147
203
 
148
- partitions.append(partition_info)
204
+ # #partitions[1]["DATA_DESC_ID"] = [999] # make a change to test comparison
205
+ # #org_partitions = load_dict_list("partition_original_small.pkl.gz")
206
+ # org_partitions = load_dict_list("partition_original.pkl.gz")
149
207
 
150
208
  return partitions
209
+
210
+
211
+ from typing import Any, List, Dict
212
+
213
+
214
+ def save_dict_list(filename: str, data: List[Dict[str, Any]]) -> None:
215
+ """
216
+ Save a list of dictionaries containing NumPy arrays (or other objects)
217
+ to a compressed pickle file.
218
+ """
219
+ with gzip.open(filename, "wb") as f:
220
+ pickle.dump(data, f, protocol=pickle.HIGHEST_PROTOCOL)
221
+
222
+
223
+ def load_dict_list(filename: str) -> List[Dict[str, Any]]:
224
+ """
225
+ Load a list of dictionaries containing NumPy arrays (or other objects)
226
+ from a compressed pickle file.
227
+ """
228
+ with gzip.open(filename, "rb") as f:
229
+ return pickle.load(f)
230
+
231
+
232
+ def dict_list_equal(a: List[Dict[str, Any]], b: List[Dict[str, Any]]) -> bool:
233
+ """
234
+ Compare two lists of dictionaries to ensure they are exactly the same.
235
+ NumPy arrays are compared with array_equal, other objects with ==.
236
+ """
237
+ if len(a) != len(b):
238
+ return False
239
+
240
+ for d1, d2 in zip(a, b):
241
+ if d1.keys() != d2.keys():
242
+ return False
243
+ for k in d1:
244
+ v1, v2 = d1[k], d2[k]
245
+ if isinstance(v1, np.ndarray) and isinstance(v2, np.ndarray):
246
+ if not np.array_equal(v1, v2):
247
+ return False
248
+ else:
249
+ if v1 != v2:
250
+ return False
251
+ return True
252
+
253
+
254
+ from typing import Iterable, Mapping, Tuple, List, Dict, Any, Set
255
+ import numpy as np
256
+
257
+
258
+ def _to_python_scalar(x: Any) -> Any:
259
+ """Convert NumPy scalars to Python scalars; leave others unchanged."""
260
+ if isinstance(x, np.generic):
261
+ return x.item()
262
+ return x
263
+
264
+
265
+ def _to_hashable_value_list(v: Any) -> Tuple[Any, ...]:
266
+ """
267
+ Normalize a dict value (often list/np.ndarray) into a sorted, hashable tuple.
268
+ - Accepts list/tuple/np.ndarray/scalars/None.
269
+ - Treats None as a value.
270
+ - Sorts with a stable key that stringifies items to avoid dtype hiccups.
271
+ """
272
+ if isinstance(v, np.ndarray):
273
+ v = v.tolist()
274
+ if v is None or isinstance(v, (str, bytes)):
275
+ # Treat a bare scalar as a single-element collection for consistency.
276
+ v = [v]
277
+ elif not isinstance(v, (list, tuple)):
278
+ v = [v]
279
+
280
+ py_vals = [_to_python_scalar(x) for x in v]
281
+ # Sort by (type name, repr) to keep mixed types stable if present
282
+ return tuple(sorted(py_vals, key=lambda x: (type(x).__name__, repr(x))))
283
+
284
+
285
+ def _canon_partition(
286
+ d: Mapping[str, Any], ignore_keys: Iterable[str] = ()
287
+ ) -> Tuple[Tuple[str, Tuple[Any, ...]], ...]:
288
+ """
289
+ Canonicalize a partition dict into a hashable, order-insensitive representation.
290
+ - Drops keys in ignore_keys.
291
+ - Converts each value collection to a sorted tuple.
292
+ - Sorts keys.
293
+ """
294
+ ign: Set[str] = set(ignore_keys)
295
+ items = []
296
+ for k, v in d.items():
297
+ if k in ign:
298
+ continue
299
+ items.append((k, _to_hashable_value_list(v)))
300
+ items.sort(key=lambda kv: kv[0])
301
+ return tuple(items)
302
+
303
+
304
+ def compare_partitions_subset(
305
+ new_partitions: List[Dict[str, Any]],
306
+ original_partitions: List[Dict[str, Any]],
307
+ ignore_keys: Iterable[str] = (),
308
+ ) -> Tuple[bool, List[Dict[str, Any]]]:
309
+ """
310
+ Check that every partition in `new_partitions` also appears in `original_partitions`,
311
+ ignoring ordering (of partitions and of values within each key).
312
+
313
+ Parameters
314
+ ----------
315
+ new_partitions : list of dict
316
+ Partitions produced by the optimized/new code.
317
+ original_partitions : list of dict
318
+ Partitions produced by the original code (the reference).
319
+ ignore_keys : iterable of str, optional
320
+ Keys to ignore when comparing partitions (e.g., timestamps or debug fields).
321
+
322
+ Returns
323
+ -------
324
+ (ok, missing)
325
+ ok : bool
326
+ True if every new partition is found in the original set.
327
+ missing : list of dict
328
+ The list of partitions (from `new_partitions`) that were NOT found in `original_partitions`,
329
+ useful for debugging diffs.
330
+ """
331
+ orig_set = {_canon_partition(p, ignore_keys) for p in original_partitions}
332
+ missing = []
333
+ for p in new_partitions:
334
+ cp = _canon_partition(p, ignore_keys)
335
+ if cp not in orig_set:
336
+ missing.append(p)
337
+ return (len(missing) == 0, missing)
@@ -1,10 +1,13 @@
1
1
  import toolviper.utils.logger as logger
2
2
  import numcodecs
3
- from typing import Dict, Union
3
+ from typing import Dict, Union, Literal
4
+ import time
4
5
 
5
6
  import dask
6
7
 
7
- from xradio.measurement_set._utils._msv2.partition_queries import create_partitions
8
+ from xradio.measurement_set._utils._msv2.partition_queries import (
9
+ create_partitions,
10
+ )
8
11
  from xradio.measurement_set._utils._msv2.conversion import (
9
12
  convert_and_write_partition,
10
13
  estimate_memory_and_cores_for_partitions,
@@ -13,7 +16,7 @@ from xradio.measurement_set._utils._msv2.conversion import (
13
16
 
14
17
  def estimate_conversion_memory_and_cores(
15
18
  in_file: str,
16
- partition_scheme: list = ["FIELD_ID"],
19
+ partition_scheme: list = [],
17
20
  ) -> tuple[float, int, int]:
18
21
  """
19
22
  Given an MSv2 and a partition_scheme to use when converting it to MSv4,
@@ -52,7 +55,7 @@ def estimate_conversion_memory_and_cores(
52
55
  def convert_msv2_to_processing_set(
53
56
  in_file: str,
54
57
  out_file: str,
55
- partition_scheme: list = ["FIELD_ID"],
58
+ partition_scheme: list = [],
56
59
  main_chunksize: Union[Dict, float, None] = None,
57
60
  with_pointing: bool = True,
58
61
  pointing_chunksize: Union[Dict, float, None] = None,
@@ -63,8 +66,8 @@ def convert_msv2_to_processing_set(
63
66
  use_table_iter: bool = False,
64
67
  compressor: numcodecs.abc.Codec = numcodecs.Zstd(level=2),
65
68
  add_reshaping_indices: bool = False,
66
- storage_backend: str = "zarr",
67
- parallel_mode: str = "none",
69
+ storage_backend: Literal["zarr", "netcdf"] = "zarr",
70
+ parallel_mode: Literal["none", "partition", "time"] = "none",
68
71
  overwrite: bool = False,
69
72
  ):
70
73
  """Convert a Measurement Set v2 into a Processing Set of Measurement Set v4.
@@ -80,7 +83,7 @@ def convert_msv2_to_processing_set(
80
83
  In addition to data description and polarization setup a finer partitioning is possible by specifying a list of partitioning keys. Any combination of the following keys are possible:
81
84
  "FIELD_ID", "SCAN_NUMBER", "STATE_ID", "SOURCE_ID", "SUB_SCAN_NUMBER", "ANTENNA1".
82
85
  "ANTENNA1" is intended as a single-dish specific partitioning option.
83
- For mosaics where the phase center is rapidly changing (such as VLA on the fly mosaics) partition_scheme should be set to an empty list []. By default, ["FIELD_ID"].
86
+ For mosaics where the phase center is rapidly changing (such as VLA on the fly mosaics) partition_scheme should be set to an empty list []. By default, [].
84
87
  main_chunksize : Union[Dict, float, None], optional
85
88
  Defines the chunk size of the main dataset. If given as a dictionary, defines the sizes of several dimensions, and acceptable keys are "time", "baseline_id", "antenna_id", "frequency", "polarization". If given as a float, gives the size of a chunk in GiB. By default, None.
86
89
  with_pointing : bool, optional
@@ -101,9 +104,9 @@ def convert_msv2_to_processing_set(
101
104
  The Blosc compressor to use when saving the converted data to disk using Zarr, by default numcodecs.Zstd(level=2).
102
105
  add_reshaping_indices : bool, optional
103
106
  Whether to add the tidxs, bidxs and row_id variables to each partition of the main dataset. These can be used to reshape the data back to the original ordering in the MS v2. This is mainly intended for testing and debugging, by default False.
104
- storage_backend : {"zarr", "netcdf"}, optional
107
+ storage_backend : Literal["zarr", "netcdf"], optional
105
108
  The on-disk format to use. "netcdf" is not yet implemented.
106
- parallel_mode : {"none", "partition", "time"}, optional
109
+ parallel_mode : Literal["none", "partition", "time"], optional
107
110
  Choose whether to use Dask to execute conversion in parallel, by default "none" and conversion occurs serially.
108
111
  The option "partition", parallelises the conversion over partitions specified by `partition_scheme`. The option "time" can only be used for phased array interferometers where there are no partitions
109
112
  in the MS v2; instead the MS v2 is parallelised along the time dimension and can be controlled by `main_chunksize`.
@@ -134,6 +137,7 @@ def convert_msv2_to_processing_set(
134
137
  parallel_mode = "none"
135
138
 
136
139
  partitions = create_partitions(in_file, partition_scheme=partition_scheme)
140
+
137
141
  logger.info("Number of partitions: " + str(len(partitions)))
138
142
  if parallel_mode == "time":
139
143
  assert (
@@ -143,7 +147,6 @@ def convert_msv2_to_processing_set(
143
147
  delayed_list = []
144
148
 
145
149
  for ms_v4_id, partition_info in enumerate(partitions):
146
- # print(ms_v4_id,len(partition_info['FIELD_ID']))
147
150
 
148
151
  logger.info(
149
152
  "OBSERVATION_ID "
@@ -156,6 +159,11 @@ def convert_msv2_to_processing_set(
156
159
  + str(partition_info["FIELD_ID"])
157
160
  + ", SCAN "
158
161
  + str(partition_info["SCAN_NUMBER"])
162
+ + (
163
+ ", EPHEMERIS " + str(partition_info["EPHEMERIS_ID"])
164
+ if "EPHEMERIS_ID" in partition_info
165
+ else ""
166
+ )
159
167
  + (
160
168
  ", ANTENNA " + str(partition_info["ANTENNA1"])
161
169
  if "ANTENNA1" in partition_info
@@ -188,6 +196,7 @@ def convert_msv2_to_processing_set(
188
196
  )
189
197
  )
190
198
  else:
199
+ start_time = time.time()
191
200
  convert_and_write_partition(
192
201
  in_file,
193
202
  out_file,
@@ -207,6 +216,10 @@ def convert_msv2_to_processing_set(
207
216
  parallel_mode=parallel_mode,
208
217
  overwrite=overwrite,
209
218
  )
219
+ end_time = time.time()
220
+ logger.debug(
221
+ f"Time to convert partition {ms_v4_id}: {end_time - start_time:.2f} seconds"
222
+ )
210
223
 
211
224
  if parallel_mode == "partition":
212
225
  dask.compute(delayed_list)
@@ -203,19 +203,26 @@ class MeasurementSetXdt:
203
203
  else:
204
204
  line_name = []
205
205
 
206
- if "spectral_window_intent" not in self._xdt.frequency.attrs:
206
+ if "spectral_window_intents" not in self._xdt.frequency.attrs:
207
207
  spw_intent = "UNSPECIFIED"
208
208
  else:
209
- spw_intent = self._xdt.frequency.attrs["spectral_window_intent"]
209
+ spw_intent = self._xdt.frequency.attrs["spectral_window_intents"]
210
+
211
+ if "intents" in self._xdt.observation_info:
212
+ scan_intents = self._xdt.observation_info["intents"]
213
+ else:
214
+ scan_intents = self._xdt.scan_name.attrs.get(
215
+ "scan_intents", ["UNSPECIFIED"]
216
+ )
210
217
 
211
218
  partition_info = {
212
219
  "spectral_window_name": self._xdt.frequency.attrs["spectral_window_name"],
213
- "spectral_window_intent": spw_intent,
220
+ "spectral_window_intents": spw_intent,
214
221
  "field_name": to_list(np.unique(field_and_source_xds.field_name.values)),
215
222
  "polarization_setup": to_list(self._xdt.polarization.values),
216
223
  "scan_name": to_list(np.unique(self._xdt.scan_name.values)),
217
224
  "source_name": to_list(np.unique(field_and_source_xds.source_name.values)),
218
- "intents": self._xdt.observation_info["intents"],
225
+ "scan_intents": scan_intents,
219
226
  "line_name": line_name,
220
227
  "data_group_name": data_group_name,
221
228
  }
@@ -5,7 +5,7 @@ import xarray as xr
5
5
 
6
6
  def open_processing_set(
7
7
  ps_store: str,
8
- intents: list = None,
8
+ scan_intents: list | None = None,
9
9
  ) -> xr.DataTree:
10
10
  """Creates a lazy representation of a Processing Set (only meta-data is loaded into memory).
11
11
 
@@ -13,9 +13,9 @@ def open_processing_set(
13
13
  ----------
14
14
  ps_store : str
15
15
  String of the path and name of the processing set. For example '/users/user_1/uid___A002_Xf07bba_Xbe5c_target.lsrk.vis.zarr'.
16
- intents : list, optional
17
- A list of intents to be opened for example ['OBSERVE_TARGET#ON_SOURCE']. The intents in a processing_set_xdt can be seen by calling processing_set_xdt.ps.summary().
18
- By default None, which will include all intents.
16
+ scan_intents : str | None, optional
17
+ A list of scan_intents to be opened for example ['OBSERVE_TARGET#ON_SOURCE']. The scan_intents in a processing_set_xdt can be seen by calling processing_set_xdt.ps.summary().
18
+ By default None, which will include all scan_intents.
19
19
 
20
20
  Returns
21
21
  -------
@@ -34,10 +34,10 @@ def open_processing_set(
34
34
 
35
35
  # Future work is to add ASDM backend
36
36
 
37
- if intents is None:
37
+ if scan_intents is None:
38
38
  return ps_xdt
39
39
  else:
40
- return ps_xdt.xr_ps.query(intents=intents)
40
+ return ps_xdt.xr_ps.query(scan_intents=scan_intents)
41
41
 
42
42
 
43
43
  # def open_processing_set(