xradio 0.0.59__py3-none-any.whl → 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,8 @@
1
1
  import itertools
2
+ import time
2
3
  import toolviper.utils.logger as logger
4
+ import os
5
+ import pandas as pd
3
6
 
4
7
  import numpy as np
5
8
 
@@ -17,6 +20,9 @@ def enumerated_product(*args):
17
20
  )
18
21
 
19
22
 
23
+ import pickle, gzip
24
+
25
+
20
26
  def create_partitions(in_file: str, partition_scheme: list) -> list[dict]:
21
27
  """Create a list of dictionaries with the partition information.
22
28
 
@@ -34,38 +40,53 @@ def create_partitions(in_file: str, partition_scheme: list) -> list[dict]:
34
40
  list
35
41
  list of dictionaries with the partition information.
36
42
  """
37
- # vla_otf (bool, optional): The partioning of VLA OTF (on the fly) mosaics needs a special partitioning scheme. Defaults to False.
38
43
 
39
- # Create partition table
40
- import pandas as pd
41
- import os
44
+ ### Test new implementation without
45
+ # Always start with these (if available); then extend with user scheme.
46
+ partition_scheme = [
47
+ "DATA_DESC_ID",
48
+ "OBS_MODE",
49
+ "OBSERVATION_ID",
50
+ "EPHEMERIS_ID",
51
+ ] + list(partition_scheme)
42
52
 
43
- partition_scheme = ["DATA_DESC_ID", "OBS_MODE", "OBSERVATION_ID"] + partition_scheme
53
+ # partition_scheme = ["DATA_DESC_ID", "OBS_MODE"] + list(
54
+ # partition_scheme
55
+ # )
44
56
 
45
- # Open MSv2 tables and add columns to partition table (par_df):
46
- par_df = pd.DataFrame()
57
+ t0 = time.time()
58
+ # --------- Load base columns from MAIN table ----------
47
59
  main_tb = tables.table(
48
60
  in_file, readonly=True, lockoptions={"option": "usernoread"}, ack=False
49
61
  )
50
- par_df["DATA_DESC_ID"] = main_tb.getcol("DATA_DESC_ID")
51
- par_df["FIELD_ID"] = main_tb.getcol("FIELD_ID")
52
- par_df["SCAN_NUMBER"] = main_tb.getcol("SCAN_NUMBER")
53
- par_df["STATE_ID"] = main_tb.getcol("STATE_ID")
54
- par_df["OBSERVATION_ID"] = main_tb.getcol("OBSERVATION_ID")
55
- par_df["ANTENNA1"] = main_tb.getcol("ANTENNA1")
56
- par_df = par_df.drop_duplicates()
57
62
 
63
+ # Build minimal DF once. Pull only columns we may need.
64
+ # Add columns here if you expect to aggregate them per-partition.
65
+ base_cols = {
66
+ "DATA_DESC_ID": main_tb.getcol("DATA_DESC_ID"),
67
+ "FIELD_ID": main_tb.getcol("FIELD_ID"),
68
+ "SCAN_NUMBER": main_tb.getcol("SCAN_NUMBER"),
69
+ "STATE_ID": main_tb.getcol("STATE_ID"),
70
+ "OBSERVATION_ID": main_tb.getcol("OBSERVATION_ID"),
71
+ "ANTENNA1": main_tb.getcol("ANTENNA1"),
72
+ }
73
+ par_df = pd.DataFrame(base_cols).drop_duplicates()
74
+ logger.debug(
75
+ f"Loaded MAIN columns in {time.time() - t0:.2f}s "
76
+ f"({len(par_df):,} unique MAIN rows)"
77
+ )
78
+
79
+ # --------- Optional SOURCE/STATE derived columns ----------
80
+ # SOURCE_ID (via FIELD table)
81
+ t1 = time.time()
82
+ source_id_added = False
58
83
  field_tb = tables.table(
59
84
  os.path.join(in_file, "FIELD"),
60
85
  readonly=True,
61
86
  lockoptions={"option": "usernoread"},
62
87
  ack=False,
63
88
  )
64
- # if vla_otf:
65
- # par_df["FIELD_NAME"] = np.array(field_tb.getcol("NAME"))[par_df["FIELD_ID"]]
66
-
67
- # Get source ids if available from source table.
68
- if table_exists(os.path.join(os.path.join(in_file, "SOURCE"))):
89
+ if table_exists(os.path.join(in_file, "SOURCE")):
69
90
  source_tb = tables.table(
70
91
  os.path.join(in_file, "SOURCE"),
71
92
  readonly=True,
@@ -73,13 +94,31 @@ def create_partitions(in_file: str, partition_scheme: list) -> list[dict]:
73
94
  ack=False,
74
95
  )
75
96
  if source_tb.nrows() != 0:
76
- par_df["SOURCE_ID"] = field_tb.getcol("SOURCE_ID")[par_df["FIELD_ID"]]
77
- # if vla_otf:
78
- # par_df["SOURCE_NAME"] = np.array(source_tb.getcol("NAME"))[
79
- # par_df["SOURCE_ID"]
80
- # ]
97
+ # Map SOURCE_ID via FIELD_ID
98
+ field_source = np.asarray(field_tb.getcol("SOURCE_ID"))
99
+ par_df["SOURCE_ID"] = field_source[par_df["FIELD_ID"]]
100
+ source_id_added = True
101
+ logger.debug(
102
+ f"SOURCE processing in {time.time() - t1:.2f}s "
103
+ f"(added SOURCE_ID={source_id_added})"
104
+ )
105
+
106
+ if "EPHEMERIS_ID" in field_tb.colnames():
107
+ ephemeris_id_added = False
108
+ if field_tb.nrows() != 0:
109
+ # Map EPHEMERIS_ID via FIELD_ID
110
+ field_ephemeris = np.asarray(field_tb.getcol("EPHEMERIS_ID"))
111
+ par_df["EPHEMERIS_ID"] = field_ephemeris[par_df["FIELD_ID"]]
112
+ ephemeris_id_added = True
113
+ logger.debug(
114
+ f"EPHEMERIS processing in {time.time() - t1:.2f}s "
115
+ f"(added EPHEMERIS_ID={ephemeris_id_added})"
116
+ )
81
117
 
82
- # Get intents and subscan numbers if available from state table.
118
+ # OBS_MODE & SUB_SCAN_NUMBER (via STATE table)
119
+ t2 = time.time()
120
+ obs_mode_added = False
121
+ sub_scan_added = False
83
122
  if table_exists(os.path.join(in_file, "STATE")):
84
123
  state_tb = tables.table(
85
124
  os.path.join(in_file, "STATE"),
@@ -88,30 +127,36 @@ def create_partitions(in_file: str, partition_scheme: list) -> list[dict]:
88
127
  ack=False,
89
128
  )
90
129
  if state_tb.nrows() != 0:
91
- # print('state_tb',state_tb.nrows(),state_tb)
92
- par_df["OBS_MODE"] = np.array(state_tb.getcol("OBS_MODE"))[
93
- par_df["STATE_ID"]
94
- ]
95
- par_df["SUB_SCAN_NUMBER"] = state_tb.getcol("SUB_SCAN")[par_df["STATE_ID"]]
130
+ state_obs_mode = np.asarray(state_tb.getcol("OBS_MODE"))
131
+ state_sub_scan = np.asarray(state_tb.getcol("SUB_SCAN"))
132
+ # Index by STATE_ID into STATE columns
133
+ par_df["OBS_MODE"] = state_obs_mode[par_df["STATE_ID"]]
134
+ par_df["SUB_SCAN_NUMBER"] = state_sub_scan[par_df["STATE_ID"]]
135
+ obs_mode_added = True
136
+ sub_scan_added = True
96
137
  else:
97
- par_df.drop(["STATE_ID"], axis=1)
138
+ # If STATE empty, drop STATE_ID (it cannot partition anything)
139
+ if "STATE_ID" in par_df.columns:
140
+ par_df.drop(columns=["STATE_ID"], inplace=True)
98
141
 
99
- # Check if all partition scheme criteria are present in the partition table.
100
- partition_scheme_updated = []
101
- partition_criteria = {}
102
- for par in partition_scheme:
103
- if par in par_df.columns:
104
- partition_criteria[par] = par_df[par].unique()
105
- partition_scheme_updated.append(par)
106
- logger.info(f"Partition scheme that will be used: {partition_scheme_updated}")
142
+ if "SUB_SCAN_NUMBER" in par_df.columns:
143
+ par_df.drop(columns=["SUB_SCAN_NUMBER"], inplace=True)
107
144
 
108
- # Make all possible combinations of the partition criteria.
109
- enumerated_partitions = enumerated_product(*list(partition_criteria.values()))
145
+ logger.debug(
146
+ f"STATE processing in {time.time() - t2:.2f}s "
147
+ f"(OBS_MODE={obs_mode_added}, SUB_SCAN_NUMBER={sub_scan_added})"
148
+ )
110
149
 
111
- # print('par_df',par_df)
150
+ # --------- Decide which partition keys are actually available ----------
151
+ t3 = time.time()
152
+ partition_scheme_updated = [k for k in partition_scheme if k in par_df.columns]
153
+ logger.info(f"Updated partition scheme used: {partition_scheme_updated}")
112
154
 
113
- # Create a list of dictionaries with the partition information. This will be used to query the MSv2 main table.
114
- partitions = []
155
+ # If none of the requested keys exist, there is a single partition of "everything"
156
+ if not partition_scheme_updated:
157
+ partition_scheme_updated = []
158
+
159
+ # These are the axes we report per partition (present => aggregate unique values)
115
160
  partition_axis_names = [
116
161
  "DATA_DESC_ID",
117
162
  "OBSERVATION_ID",
@@ -121,30 +166,172 @@ def create_partitions(in_file: str, partition_scheme: list) -> list[dict]:
121
166
  "SOURCE_ID",
122
167
  "OBS_MODE",
123
168
  "SUB_SCAN_NUMBER",
169
+ "EPHEMERIS_ID",
124
170
  ]
171
+ # Only include ANTENNA1 if user asked for it (keeps output size down)
125
172
  if "ANTENNA1" in partition_scheme:
126
173
  partition_axis_names.append("ANTENNA1")
127
174
 
128
- for idx, pair in enumerated_partitions:
129
- query = ""
130
- for i, par in enumerate(partition_scheme_updated):
131
- if isinstance(pair[i], str):
132
- query = query + f'{par} == "{pair[i]}" and '
175
+ # --------- Group only by realized partitions (no Cartesian product!) ----------
176
+ # observed=True speeds up if categorical; here it’s harmless. sort=False keeps source order.
177
+ if partition_scheme_updated:
178
+ grp = par_df.groupby(partition_scheme_updated, sort=False, observed=False)
179
+ groups_iter = grp
180
+ else:
181
+ # Single group: everything
182
+ groups_iter = [(None, par_df)]
183
+
184
+ partitions = []
185
+ # Fast aggregation: use NumPy for uniques to avoid pandas overhead in the tight loop.
186
+ for _, gdf in groups_iter:
187
+ part = {}
188
+ for name in partition_axis_names:
189
+ if name in gdf.columns:
190
+ # Return Python lists to match your prior structure (can be np.ndarray if preferred)
191
+ part[name] = np.unique(gdf[name].to_numpy()).tolist()
133
192
  else:
134
- query = query + f"{par} == {pair[i]} and "
135
- query = query[:-4] # remove last and
136
- sub_par_df = par_df.query(query).drop_duplicates()
193
+ part[name] = [None]
194
+ partitions.append(part)
137
195
 
138
- if sub_par_df.shape[0] != 0:
139
- partition_info = {}
196
+ logger.debug(
197
+ f"Partition build in {time.time() - t3:.2f}s; total {len(partitions):,} partitions"
198
+ )
199
+ logger.debug(f"Total create_partitions time: {time.time() - t0:.2f}s")
140
200
 
141
- # FIELD_NAME SOURCE_NAME
142
- for col_name in partition_axis_names:
143
- if col_name in sub_par_df.columns:
144
- partition_info[col_name] = sub_par_df[col_name].unique()
145
- else:
146
- partition_info[col_name] = [None]
201
+ # # with gzip.open("partition_original_small.pkl.gz", "wb") as f:
202
+ # # pickle.dump(partitions, f, protocol=pickle.HIGHEST_PROTOCOL)
147
203
 
148
- partitions.append(partition_info)
204
+ # #partitions[1]["DATA_DESC_ID"] = [999] # make a change to test comparison
205
+ # #org_partitions = load_dict_list("partition_original_small.pkl.gz")
206
+ # org_partitions = load_dict_list("partition_original.pkl.gz")
149
207
 
150
208
  return partitions
209
+
210
+
211
+ from typing import Any, List, Dict
212
+
213
+
214
+ def save_dict_list(filename: str, data: List[Dict[str, Any]]) -> None:
215
+ """
216
+ Save a list of dictionaries containing NumPy arrays (or other objects)
217
+ to a compressed pickle file.
218
+ """
219
+ with gzip.open(filename, "wb") as f:
220
+ pickle.dump(data, f, protocol=pickle.HIGHEST_PROTOCOL)
221
+
222
+
223
+ def load_dict_list(filename: str) -> List[Dict[str, Any]]:
224
+ """
225
+ Load a list of dictionaries containing NumPy arrays (or other objects)
226
+ from a compressed pickle file.
227
+ """
228
+ with gzip.open(filename, "rb") as f:
229
+ return pickle.load(f)
230
+
231
+
232
+ def dict_list_equal(a: List[Dict[str, Any]], b: List[Dict[str, Any]]) -> bool:
233
+ """
234
+ Compare two lists of dictionaries to ensure they are exactly the same.
235
+ NumPy arrays are compared with array_equal, other objects with ==.
236
+ """
237
+ if len(a) != len(b):
238
+ return False
239
+
240
+ for d1, d2 in zip(a, b):
241
+ if d1.keys() != d2.keys():
242
+ return False
243
+ for k in d1:
244
+ v1, v2 = d1[k], d2[k]
245
+ if isinstance(v1, np.ndarray) and isinstance(v2, np.ndarray):
246
+ if not np.array_equal(v1, v2):
247
+ return False
248
+ else:
249
+ if v1 != v2:
250
+ return False
251
+ return True
252
+
253
+
254
+ from typing import Iterable, Mapping, Tuple, List, Dict, Any, Set
255
+ import numpy as np
256
+
257
+
258
+ def _to_python_scalar(x: Any) -> Any:
259
+ """Convert NumPy scalars to Python scalars; leave others unchanged."""
260
+ if isinstance(x, np.generic):
261
+ return x.item()
262
+ return x
263
+
264
+
265
+ def _to_hashable_value_list(v: Any) -> Tuple[Any, ...]:
266
+ """
267
+ Normalize a dict value (often list/np.ndarray) into a sorted, hashable tuple.
268
+ - Accepts list/tuple/np.ndarray/scalars/None.
269
+ - Treats None as a value.
270
+ - Sorts with a stable key that stringifies items to avoid dtype hiccups.
271
+ """
272
+ if isinstance(v, np.ndarray):
273
+ v = v.tolist()
274
+ if v is None or isinstance(v, (str, bytes)):
275
+ # Treat a bare scalar as a single-element collection for consistency.
276
+ v = [v]
277
+ elif not isinstance(v, (list, tuple)):
278
+ v = [v]
279
+
280
+ py_vals = [_to_python_scalar(x) for x in v]
281
+ # Sort by (type name, repr) to keep mixed types stable if present
282
+ return tuple(sorted(py_vals, key=lambda x: (type(x).__name__, repr(x))))
283
+
284
+
285
+ def _canon_partition(
286
+ d: Mapping[str, Any], ignore_keys: Iterable[str] = ()
287
+ ) -> Tuple[Tuple[str, Tuple[Any, ...]], ...]:
288
+ """
289
+ Canonicalize a partition dict into a hashable, order-insensitive representation.
290
+ - Drops keys in ignore_keys.
291
+ - Converts each value collection to a sorted tuple.
292
+ - Sorts keys.
293
+ """
294
+ ign: Set[str] = set(ignore_keys)
295
+ items = []
296
+ for k, v in d.items():
297
+ if k in ign:
298
+ continue
299
+ items.append((k, _to_hashable_value_list(v)))
300
+ items.sort(key=lambda kv: kv[0])
301
+ return tuple(items)
302
+
303
+
304
+ def compare_partitions_subset(
305
+ new_partitions: List[Dict[str, Any]],
306
+ original_partitions: List[Dict[str, Any]],
307
+ ignore_keys: Iterable[str] = (),
308
+ ) -> Tuple[bool, List[Dict[str, Any]]]:
309
+ """
310
+ Check that every partition in `new_partitions` also appears in `original_partitions`,
311
+ ignoring ordering (of partitions and of values within each key).
312
+
313
+ Parameters
314
+ ----------
315
+ new_partitions : list of dict
316
+ Partitions produced by the optimized/new code.
317
+ original_partitions : list of dict
318
+ Partitions produced by the original code (the reference).
319
+ ignore_keys : iterable of str, optional
320
+ Keys to ignore when comparing partitions (e.g., timestamps or debug fields).
321
+
322
+ Returns
323
+ -------
324
+ (ok, missing)
325
+ ok : bool
326
+ True if every new partition is found in the original set.
327
+ missing : list of dict
328
+ The list of partitions (from `new_partitions`) that were NOT found in `original_partitions`,
329
+ useful for debugging diffs.
330
+ """
331
+ orig_set = {_canon_partition(p, ignore_keys) for p in original_partitions}
332
+ missing = []
333
+ for p in new_partitions:
334
+ cp = _canon_partition(p, ignore_keys)
335
+ if cp not in orig_set:
336
+ missing.append(p)
337
+ return (len(missing) == 0, missing)
@@ -1,10 +1,13 @@
1
1
  import toolviper.utils.logger as logger
2
2
  import numcodecs
3
- from typing import Dict, Union
3
+ from typing import Dict, Union, Literal
4
+ import time
4
5
 
5
6
  import dask
6
7
 
7
- from xradio.measurement_set._utils._msv2.partition_queries import create_partitions
8
+ from xradio.measurement_set._utils._msv2.partition_queries import (
9
+ create_partitions,
10
+ )
8
11
  from xradio.measurement_set._utils._msv2.conversion import (
9
12
  convert_and_write_partition,
10
13
  estimate_memory_and_cores_for_partitions,
@@ -13,7 +16,7 @@ from xradio.measurement_set._utils._msv2.conversion import (
13
16
 
14
17
  def estimate_conversion_memory_and_cores(
15
18
  in_file: str,
16
- partition_scheme: list = ["FIELD_ID"],
19
+ partition_scheme: list = [],
17
20
  ) -> tuple[float, int, int]:
18
21
  """
19
22
  Given an MSv2 and a partition_scheme to use when converting it to MSv4,
@@ -52,7 +55,7 @@ def estimate_conversion_memory_and_cores(
52
55
  def convert_msv2_to_processing_set(
53
56
  in_file: str,
54
57
  out_file: str,
55
- partition_scheme: list = ["FIELD_ID"],
58
+ partition_scheme: list = [],
56
59
  main_chunksize: Union[Dict, float, None] = None,
57
60
  with_pointing: bool = True,
58
61
  pointing_chunksize: Union[Dict, float, None] = None,
@@ -62,8 +65,9 @@ def convert_msv2_to_processing_set(
62
65
  sys_cal_interpolate: bool = False,
63
66
  use_table_iter: bool = False,
64
67
  compressor: numcodecs.abc.Codec = numcodecs.Zstd(level=2),
65
- storage_backend: str = "zarr",
66
- parallel_mode: str = "none",
68
+ add_reshaping_indices: bool = False,
69
+ storage_backend: Literal["zarr", "netcdf"] = "zarr",
70
+ parallel_mode: Literal["none", "partition", "time"] = "none",
67
71
  overwrite: bool = False,
68
72
  ):
69
73
  """Convert a Measurement Set v2 into a Processing Set of Measurement Set v4.
@@ -79,7 +83,7 @@ def convert_msv2_to_processing_set(
79
83
  In addition to data description and polarization setup a finer partitioning is possible by specifying a list of partitioning keys. Any combination of the following keys are possible:
80
84
  "FIELD_ID", "SCAN_NUMBER", "STATE_ID", "SOURCE_ID", "SUB_SCAN_NUMBER", "ANTENNA1".
81
85
  "ANTENNA1" is intended as a single-dish specific partitioning option.
82
- For mosaics where the phase center is rapidly changing (such as VLA on the fly mosaics) partition_scheme should be set to an empty list []. By default, ["FIELD_ID"].
86
+ For mosaics where the phase center is rapidly changing (such as VLA on the fly mosaics) partition_scheme should be set to an empty list []. By default, [].
83
87
  main_chunksize : Union[Dict, float, None], optional
84
88
  Defines the chunk size of the main dataset. If given as a dictionary, defines the sizes of several dimensions, and acceptable keys are "time", "baseline_id", "antenna_id", "frequency", "polarization". If given as a float, gives the size of a chunk in GiB. By default, None.
85
89
  with_pointing : bool, optional
@@ -98,9 +102,11 @@ def convert_msv2_to_processing_set(
98
102
  Whether to use the table iterator to read the main table of the MS v2. This should be set to True when reading datasets with large number of rows and few partitions, by default False.
99
103
  compressor : numcodecs.abc.Codec, optional
100
104
  The Blosc compressor to use when saving the converted data to disk using Zarr, by default numcodecs.Zstd(level=2).
101
- storage_backend : {"zarr", "netcdf"}, optional
105
+ add_reshaping_indices : bool, optional
106
+ Whether to add the tidxs, bidxs and row_id variables to each partition of the main dataset. These can be used to reshape the data back to the original ordering in the MS v2. This is mainly intended for testing and debugging, by default False.
107
+ storage_backend : Literal["zarr", "netcdf"], optional
102
108
  The on-disk format to use. "netcdf" is not yet implemented.
103
- parallel_mode : {"none", "partition", "time"}, optional
109
+ parallel_mode : Literal["none", "partition", "time"], optional
104
110
  Choose whether to use Dask to execute conversion in parallel, by default "none" and conversion occurs serially.
105
111
  The option "partition", parallelises the conversion over partitions specified by `partition_scheme`. The option "time" can only be used for phased array interferometers where there are no partitions
106
112
  in the MS v2; instead the MS v2 is parallelised along the time dimension and can be controlled by `main_chunksize`.
@@ -131,6 +137,7 @@ def convert_msv2_to_processing_set(
131
137
  parallel_mode = "none"
132
138
 
133
139
  partitions = create_partitions(in_file, partition_scheme=partition_scheme)
140
+
134
141
  logger.info("Number of partitions: " + str(len(partitions)))
135
142
  if parallel_mode == "time":
136
143
  assert (
@@ -140,7 +147,6 @@ def convert_msv2_to_processing_set(
140
147
  delayed_list = []
141
148
 
142
149
  for ms_v4_id, partition_info in enumerate(partitions):
143
- # print(ms_v4_id,len(partition_info['FIELD_ID']))
144
150
 
145
151
  logger.info(
146
152
  "OBSERVATION_ID "
@@ -153,6 +159,11 @@ def convert_msv2_to_processing_set(
153
159
  + str(partition_info["FIELD_ID"])
154
160
  + ", SCAN "
155
161
  + str(partition_info["SCAN_NUMBER"])
162
+ + (
163
+ ", EPHEMERIS " + str(partition_info["EPHEMERIS_ID"])
164
+ if "EPHEMERIS_ID" in partition_info
165
+ else ""
166
+ )
156
167
  + (
157
168
  ", ANTENNA " + str(partition_info["ANTENNA1"])
158
169
  if "ANTENNA1" in partition_info
@@ -178,12 +189,14 @@ def convert_msv2_to_processing_set(
178
189
  ephemeris_interpolate=ephemeris_interpolate,
179
190
  phase_cal_interpolate=phase_cal_interpolate,
180
191
  sys_cal_interpolate=sys_cal_interpolate,
192
+ add_reshaping_indices=add_reshaping_indices,
181
193
  compressor=compressor,
182
194
  parallel_mode=parallel_mode,
183
195
  overwrite=overwrite,
184
196
  )
185
197
  )
186
198
  else:
199
+ start_time = time.time()
187
200
  convert_and_write_partition(
188
201
  in_file,
189
202
  out_file,
@@ -198,10 +211,15 @@ def convert_msv2_to_processing_set(
198
211
  ephemeris_interpolate=ephemeris_interpolate,
199
212
  phase_cal_interpolate=phase_cal_interpolate,
200
213
  sys_cal_interpolate=sys_cal_interpolate,
214
+ add_reshaping_indices=add_reshaping_indices,
201
215
  compressor=compressor,
202
216
  parallel_mode=parallel_mode,
203
217
  overwrite=overwrite,
204
218
  )
219
+ end_time = time.time()
220
+ logger.debug(
221
+ f"Time to convert partition {ms_v4_id}: {end_time - start_time:.2f} seconds"
222
+ )
205
223
 
206
224
  if parallel_mode == "partition":
207
225
  dask.compute(delayed_list)
@@ -203,16 +203,26 @@ class MeasurementSetXdt:
203
203
  else:
204
204
  line_name = []
205
205
 
206
+ if "spectral_window_intent" not in self._xdt.frequency.attrs:
207
+ spw_intent = "UNSPECIFIED"
208
+ else:
209
+ spw_intent = self._xdt.frequency.attrs["spectral_window_intents"]
210
+
211
+ if "intents" in self._xdt.observation_info:
212
+ scan_intents = self._xdt.observation_info["intents"]
213
+ else:
214
+ scan_intents = self._xdt.scan_name.attrs.get(
215
+ "scan_intents", ["UNSPECIFIED"]
216
+ )
217
+
206
218
  partition_info = {
207
219
  "spectral_window_name": self._xdt.frequency.attrs["spectral_window_name"],
208
- "spectral_window_intent": self._xdt.frequency.attrs[
209
- "spectral_window_intent"
210
- ],
220
+ "spectral_window_intents": spw_intent,
211
221
  "field_name": to_list(np.unique(field_and_source_xds.field_name.values)),
212
222
  "polarization_setup": to_list(self._xdt.polarization.values),
213
223
  "scan_name": to_list(np.unique(self._xdt.scan_name.values)),
214
224
  "source_name": to_list(np.unique(field_and_source_xds.source_name.values)),
215
- "intents": self._xdt.observation_info["intents"],
225
+ "scan_intents": scan_intents,
216
226
  "line_name": line_name,
217
227
  "data_group_name": data_group_name,
218
228
  }
@@ -5,7 +5,7 @@ import xarray as xr
5
5
 
6
6
  def open_processing_set(
7
7
  ps_store: str,
8
- intents: list = None,
8
+ scan_intents: list | None = None,
9
9
  ) -> xr.DataTree:
10
10
  """Creates a lazy representation of a Processing Set (only meta-data is loaded into memory).
11
11
 
@@ -13,9 +13,9 @@ def open_processing_set(
13
13
  ----------
14
14
  ps_store : str
15
15
  String of the path and name of the processing set. For example '/users/user_1/uid___A002_Xf07bba_Xbe5c_target.lsrk.vis.zarr'.
16
- intents : list, optional
17
- A list of intents to be opened for example ['OBSERVE_TARGET#ON_SOURCE']. The intents in a processing_set_xdt can be seen by calling processing_set_xdt.ps.summary().
18
- By default None, which will include all intents.
16
+ scan_intents : str | None, optional
17
+ A list of scan_intents to be opened for example ['OBSERVE_TARGET#ON_SOURCE']. The scan_intents in a processing_set_xdt can be seen by calling processing_set_xdt.ps.summary().
18
+ By default None, which will include all scan_intents.
19
19
 
20
20
  Returns
21
21
  -------
@@ -34,10 +34,10 @@ def open_processing_set(
34
34
 
35
35
  # Future work is to add ASDM backend
36
36
 
37
- if intents is None:
37
+ if scan_intents is None:
38
38
  return ps_xdt
39
39
  else:
40
- return ps_xdt.xr_ps.query(intents=intents)
40
+ return ps_xdt.xr_ps.query(scan_intents=scan_intents)
41
41
 
42
42
 
43
43
  # def open_processing_set(