xradio 0.0.31__py3-none-any.whl → 0.0.34__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. xradio/_utils/list_and_array.py +5 -3
  2. xradio/vis/__init__.py +3 -5
  3. xradio/vis/_processing_set.py +3 -3
  4. xradio/vis/_vis_utils/_ms/_tables/load_main_table.py +4 -4
  5. xradio/vis/_vis_utils/_ms/_tables/read.py +57 -41
  6. xradio/vis/_vis_utils/_ms/_tables/read_main_table.py +17 -18
  7. xradio/vis/_vis_utils/_ms/_tables/read_subtables.py +5 -5
  8. xradio/vis/_vis_utils/_ms/_tables/write.py +2 -4
  9. xradio/vis/_vis_utils/_ms/_tables/write_exp_api.py +19 -13
  10. xradio/vis/_vis_utils/_ms/chunks.py +5 -72
  11. xradio/vis/_vis_utils/_ms/conversion.py +238 -55
  12. xradio/vis/_vis_utils/_ms/{_tables/create_field_and_source_xds.py → create_field_and_source_xds.py} +114 -85
  13. xradio/vis/_vis_utils/_ms/descr.py +8 -8
  14. xradio/vis/_vis_utils/_ms/msv4_sub_xdss.py +249 -77
  15. xradio/vis/_vis_utils/_ms/partition_queries.py +19 -185
  16. xradio/vis/_vis_utils/_ms/partitions.py +18 -22
  17. xradio/vis/_vis_utils/_ms/subtables.py +2 -2
  18. xradio/vis/_vis_utils/_utils/partition_attrs.py +2 -2
  19. xradio/vis/_vis_utils/_utils/xds_helper.py +12 -12
  20. xradio/vis/_vis_utils/ms.py +1 -43
  21. xradio/vis/_vis_utils/zarr.py +0 -1
  22. xradio/vis/convert_msv2_to_processing_set.py +8 -1
  23. xradio/vis/load_processing_set.py +0 -3
  24. xradio/vis/read_processing_set.py +2 -2
  25. {xradio-0.0.31.dist-info → xradio-0.0.34.dist-info}/METADATA +1 -1
  26. {xradio-0.0.31.dist-info → xradio-0.0.34.dist-info}/RECORD +29 -31
  27. {xradio-0.0.31.dist-info → xradio-0.0.34.dist-info}/WHEEL +1 -1
  28. xradio/vis/_vis_utils/ms_column_descriptions_dicts.py +0 -1360
  29. xradio/vis/vis_io.py +0 -146
  30. {xradio-0.0.31.dist-info → xradio-0.0.34.dist-info}/LICENSE.txt +0 -0
  31. {xradio-0.0.31.dist-info → xradio-0.0.34.dist-info}/top_level.txt +0 -0
@@ -1,6 +1,5 @@
1
1
  import itertools
2
2
  import graphviper.utils.logger as logger
3
- import numbers
4
3
  from pathlib import Path
5
4
  from typing import Dict, List, Tuple, Union
6
5
 
@@ -10,8 +9,6 @@ import xarray as xr
10
9
  from casacore import tables
11
10
 
12
11
  from ._tables.table_query import open_table_ro, open_query
13
- from ._tables.read import read_generic_table
14
- from .subtables import subt_rename_ids
15
12
 
16
13
 
17
14
  def enumerated_product(*args):
@@ -23,26 +20,29 @@ def enumerated_product(*args):
23
20
  def create_partitions(in_file: str, partition_scheme: list):
24
21
  """Create a list of dictionaries with the partition information.
25
22
 
26
- Args:
27
- in_file (str): Input MSv2 file path.
28
- partition_scheme (list) : A MS v4 can only contain a single data description (spectral window and polarization setup), and observation mode. Consequently, the MS v2 is partitioned when converting to MS v4.
23
+ Parameters
24
+ ----------
25
+ in_file: str
26
+ Input MSv2 file path.
27
+ partition_scheme: list
28
+ A MS v4 can only contain a single data description (spectral window and polarization setup), and observation mode. Consequently, the MS v2 is partitioned when converting to MS v4.
29
29
  In addition to data description and polarization setup a finer partitioning is possible by specifying a list of partitioning keys. Any combination of the following keys are possible:
30
- "FIELD_ID", "SCAN_NUMBER", "STATE_ID", "SOURCE_ID", "SUB_SCAN_NUMBER". For mosaics where the phase center is rapidly changing (such as VLA on the fly mosaics)
31
- partition_scheme should be set to an empty list []. By default, ["FIELD_ID"].
32
- Returns:
33
- list: list of dictionaries with the partition information.
30
+ "FIELD_ID", "SCAN_NUMBER", "STATE_ID", "SOURCE_ID", "SUB_SCAN_NUMBER".
31
+ For mosaics where the phase center is rapidly changing (such as VLA on the fly mosaics) partition_scheme should be set to an empty list []. By default, ["FIELD_ID"].
32
+ Returns
33
+ -------
34
+ list
35
+ list of dictionaries with the partition information.
34
36
  """
35
37
  # vla_otf (bool, optional): The partioning of VLA OTF (on the fly) mosaics needs a special partitioning scheme. Defaults to False.
36
38
 
37
39
  # Create partition table
38
40
  from casacore import tables
39
41
  import numpy as np
40
- import xarray as xr
41
42
  import pandas as pd
42
43
  import os
43
- import time
44
44
 
45
- partition_scheme = ["DATA_DESC_ID", "OBS_MODE"] + partition_scheme
45
+ partition_scheme = ["DATA_DESC_ID", "OBS_MODE", "OBSERVATION_ID"] + partition_scheme
46
46
 
47
47
  # Open MSv2 tables and add columns to partition table (par_df):
48
48
  par_df = pd.DataFrame()
@@ -53,6 +53,7 @@ def create_partitions(in_file: str, partition_scheme: list):
53
53
  par_df["FIELD_ID"] = main_tb.getcol("FIELD_ID")
54
54
  par_df["SCAN_NUMBER"] = main_tb.getcol("SCAN_NUMBER")
55
55
  par_df["STATE_ID"] = main_tb.getcol("STATE_ID")
56
+ par_df["OBSERVATION_ID"] = main_tb.getcol("OBSERVATION_ID")
56
57
  par_df = par_df.drop_duplicates()
57
58
 
58
59
  field_tb = tables.table(
@@ -108,10 +109,13 @@ def create_partitions(in_file: str, partition_scheme: list):
108
109
  # Make all possible combinations of the partition criteria.
109
110
  enumerated_partitions = enumerated_product(*list(partition_criteria.values()))
110
111
 
112
+ # print('par_df',par_df)
113
+
111
114
  # Create a list of dictionaries with the partition information. This will be used to query the MSv2 main table.
112
115
  partitions = []
113
116
  partition_axis_names = [
114
117
  "DATA_DESC_ID",
118
+ "OBSERVATION_ID",
115
119
  "FIELD_ID",
116
120
  "SCAN_NUMBER",
117
121
  "STATE_ID",
@@ -144,68 +148,8 @@ def create_partitions(in_file: str, partition_scheme: list):
144
148
  return partitions
145
149
 
146
150
 
147
- # Used by code that will be deprecated at some stage.
148
-
149
-
150
- def make_partition_ids_by_ddi_scan(
151
- infile: str, do_subscans: bool
152
- ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
153
- """
154
- Produces arrays of per-partition ddi, scan, state_id, for when
155
- using partiion schemes 'scan' or 'scan/subscan', that is
156
- partitioning by some variant of (ddi, scan, subscan(state_id))
157
-
158
- Parameters
159
- ----------
160
- infile : str
161
- Path to MS
162
- do_subscans : bool
163
- also partitioning by subscan, not only scan
164
-
165
- Returns
166
- -------
167
- Tuple[np.ndarray, np.ndarray, np.ndarray]
168
- arrays with indices that define every partition
169
- """
170
- try:
171
- cctable = None
172
- taql_distinct_states = None
173
- cctable = tables.table(
174
- infile, readonly=True, lockoptions={"option": "usernoread"}, ack=False
175
- )
176
- if do_subscans:
177
- taql_distinct_states = (
178
- "select DISTINCT SCAN_NUMBER, STATE_ID, DATA_DESC_ID from $cctable"
179
- )
180
- else:
181
- taql_distinct_states = (
182
- "select DISTINCT SCAN_NUMBER, DATA_DESC_ID from $cctable"
183
- )
184
- with open_query(cctable, taql_distinct_states) as query_states:
185
- logger.debug(
186
- f"Got query, nrows: {query_states.nrows()}, query: {query_states}"
187
- )
188
- scan_number = query_states.getcol("SCAN_NUMBER")
189
- logger.debug(
190
- f"Got col SCAN_NUMBER (len: {len(scan_number)}): {scan_number}"
191
- )
192
- if do_subscans:
193
- state_id = query_states.getcol("STATE_ID")
194
- data_desc_id = np.full(len(scan_number), None)
195
- else:
196
- state_id = [None] * len(scan_number)
197
- logger.debug(f"Got col STATE_ID (len: {len(state_id)}): {state_id}")
198
- data_desc_id = query_states.getcol("DATA_DESC_ID")
199
-
200
- logger.debug(f"Got col DATA_DESC_ID (len: {len(data_desc_id)}): {data_desc_id}")
201
- logger.debug(
202
- f"Len of DISTINCT SCAN_NUMBER,etc.: {len(scan_number)}. Will generate that number of partitions"
203
- )
204
- finally:
205
- if cctable:
206
- cctable.close()
207
-
208
- return data_desc_id, scan_number, state_id
151
+ # Used by code that will be deprecated at some stage. See #192
152
+ # Still need to clarify what to do about intent string filtering ('WVR', etc.)
209
153
 
210
154
 
211
155
  def make_partition_ids_by_ddi_intent(
@@ -458,113 +402,3 @@ def partition_when_empty_state(
458
402
  main_table.close()
459
403
 
460
404
  return distinct_ddis, [None] * nparts, [None] * nparts, [""] * nparts
461
-
462
-
463
- def create_taql_query_and_file_name(out_file, intent, state_ids, field_id, ddi):
464
- file_name = (
465
- out_file
466
- + "/"
467
- + out_file.replace(".vis.zarr", "").split("/")[-1]
468
- + "_ddi_"
469
- + str(ddi)
470
- + "_intent_"
471
- + intent
472
- )
473
-
474
- taql_where = f"where (DATA_DESC_ID = {ddi})"
475
-
476
- if isinstance(state_ids, numbers.Integral):
477
- taql_where += f" AND (STATE_ID = {state_ids})"
478
- elif state_ids is not None:
479
- state_ids_or = " OR STATE_ID = ".join(np.char.mod("%d", state_ids))
480
- taql_where += f" AND (STATE_ID = {state_ids_or})"
481
-
482
- if field_id is not None:
483
- taql_where += f" AND (FIELD_ID = {field_id})"
484
- file_name = file_name + "_field_id_" + str(field_id)
485
-
486
- return taql_where, file_name
487
-
488
-
489
- def get_unqiue_intents(in_file):
490
- """
491
- _summary_
492
-
493
- Parameters
494
- ----------
495
- in_file : str
496
- _description_
497
-
498
- Returns
499
- -------
500
- _type_
501
- _description_
502
- """
503
- state_xds = read_generic_table(
504
- in_file,
505
- "STATE",
506
- rename_ids=subt_rename_ids["STATE"],
507
- )
508
-
509
- if len(state_xds.data_vars) > 0:
510
- obs_mode_dict = {}
511
- for i, obs_mode in enumerate(state_xds.obs_mode.values):
512
- if obs_mode in obs_mode_dict:
513
- obs_mode_dict[obs_mode].append(i)
514
- else:
515
- obs_mode_dict[obs_mode] = [i]
516
- return list(obs_mode_dict.keys()), list(obs_mode_dict.values())
517
- else: # empty state table
518
- return ["None"], [None]
519
-
520
-
521
- def enumerated_product(*args):
522
- yield from zip(
523
- itertools.product(*(range(len(x)) for x in args)), itertools.product(*args)
524
- )
525
-
526
-
527
- def create_partition_enumerated_product(in_file: str, partition_scheme: str):
528
- """
529
- Creates an enumerated_product of the data_desc_ids, state_ids, field_ids in a MS v2 that define the partions in a processing set.
530
-
531
- Parameters
532
- ----------
533
- in_file : str
534
- _description_
535
- partition_scheme : str
536
- _description_
537
-
538
- Returns
539
- -------
540
- _type_
541
- _description_
542
- """
543
- # Unused?
544
- # spw_xds = read_generic_table(
545
- # in_file,
546
- # "SPECTRAL_WINDOW",
547
- # rename_ids=subt_rename_ids["SPECTRAL_WINDOW"],
548
- # )
549
-
550
- # TODO: probably get this via query to subtable instead of read_generic_table, we just
551
- # need the row numbers
552
- ddi_xds = read_generic_table(in_file, "DATA_DESCRIPTION")
553
- data_desc_ids = np.arange(ddi_xds.sizes["row"])
554
- state_xds = read_generic_table(in_file, "STATE")
555
-
556
- if (partition_scheme == "ddi_intent_field") and (len(state_xds.data_vars) > 0):
557
- intents, state_ids = get_unqiue_intents(in_file)
558
- field_ids = np.arange(read_generic_table(in_file, "FIELD").sizes["row"])
559
- else: # partition_scheme == "ddi_state_field"
560
- if len(state_xds.data_vars) > 0:
561
- state_ids = [np.arange(state_xds.sizes["row"])]
562
- intents = state_xds.obs_mode.values
563
- else: # empty state table
564
- state_ids = [None]
565
- intents = ["None"]
566
- # print(state_xds, intents)
567
- # field_ids = [None]
568
- field_ids = np.arange(read_generic_table(in_file, "FIELD").sizes["row"])
569
-
570
- return enumerated_product(data_desc_ids, state_ids, field_ids), intents
@@ -7,11 +7,10 @@ import xarray as xr
7
7
  from .msv2_msv3 import ignore_msv2_cols
8
8
  from .partition_queries import (
9
9
  make_partition_ids_by_ddi_intent,
10
- make_partition_ids_by_ddi_scan,
11
10
  )
12
11
  from .subtables import subt_rename_ids, add_pointing_to_partition
13
12
  from .descr import describe_ms
14
- from ._tables.read import read_generic_table, make_freq_attrs
13
+ from ._tables.read import load_generic_table, make_freq_attrs
15
14
  from ._tables.read_main_table import read_flat_main_table, read_expanded_main_table
16
15
  from .._utils.partition_attrs import add_partition_attrs
17
16
  from .._utils.xds_helper import expand_xds, make_coords, optimal_chunking
@@ -23,8 +22,8 @@ VisSetPartitions = Dict[PartitionKey, xr.Dataset]
23
22
 
24
23
 
25
24
  def make_spw_names_by_ddi(ddi_xds: xr.Dataset, spw_xds: xr.Dataset) -> Dict[int, str]:
26
- spw_ids_by_ddi = ddi_xds.spectral_window_id[ddi_xds.row].values
27
- spw_names = spw_xds.name[spw_ids_by_ddi].values
25
+ spw_ids_by_ddi = ddi_xds.SPECTRAL_WINDOW_ID[ddi_xds.row].values
26
+ spw_names = spw_xds.NAME[spw_ids_by_ddi].values
28
27
  return {ddi: spw_names[ddi] for ddi in np.arange(0, len(spw_names))}
29
28
 
30
29
 
@@ -138,12 +137,12 @@ def read_ms_scan_subscan_partitions(
138
137
  subtables already read
139
138
  """
140
139
 
141
- spw_xds = read_generic_table(
140
+ spw_xds = load_generic_table(
142
141
  infile,
143
142
  "SPECTRAL_WINDOW",
144
143
  rename_ids=subt_rename_ids["SPECTRAL_WINDOW"],
145
144
  )
146
- ddi_xds = read_generic_table(infile, "DATA_DESCRIPTION")
145
+ ddi_xds = load_generic_table(infile, "DATA_DESCRIPTION")
147
146
 
148
147
  if partition_scheme == "intent":
149
148
  spw_names_by_ddi = make_spw_names_by_ddi(ddi_xds, spw_xds)
@@ -154,15 +153,12 @@ def read_ms_scan_subscan_partitions(
154
153
  distinct_intents,
155
154
  ) = make_partition_ids_by_ddi_intent(infile, spw_names_by_ddi)
156
155
  else:
157
- do_subscans = partition_scheme == "scan/subscan"
158
- data_desc_id, scan_number, state_id = make_partition_ids_by_ddi_scan(
159
- infile, do_subscans
160
- )
156
+ raise ValueError("foo")
161
157
 
162
- ant_xds = read_generic_table(
158
+ ant_xds = load_generic_table(
163
159
  infile, "ANTENNA", rename_ids=subt_rename_ids["ANTENNA"]
164
160
  )
165
- pol_xds = read_generic_table(
161
+ pol_xds = load_generic_table(
166
162
  infile, "POLARIZATION", rename_ids=subt_rename_ids["POLARIZATION"]
167
163
  )
168
164
 
@@ -263,25 +259,25 @@ def read_ms_ddi_partitions(
263
259
  """
264
260
  # we need the antenna, spectral window, polarization, and data description tables
265
261
  # to define the (sub)datasets (their dims and coords) and to process the main table
266
- ant_xds = read_generic_table(
262
+ ant_xds = load_generic_table(
267
263
  infile, "ANTENNA", rename_ids=subt_rename_ids["ANTENNA"]
268
264
  )
269
- spw_xds = read_generic_table(
265
+ spw_xds = load_generic_table(
270
266
  infile,
271
267
  "SPECTRAL_WINDOW",
272
268
  rename_ids=subt_rename_ids["SPECTRAL_WINDOW"],
273
269
  )
274
- pol_xds = read_generic_table(
270
+ pol_xds = load_generic_table(
275
271
  infile, "POLARIZATION", rename_ids=subt_rename_ids["POLARIZATION"]
276
272
  )
277
- ddi_xds = read_generic_table(infile, "DATA_DESCRIPTION")
273
+ ddi_xds = load_generic_table(infile, "DATA_DESCRIPTION")
278
274
 
279
275
  # each DATA_DESC_ID (ddi) is a fixed shape that may differ from others
280
276
  # form a list of ddis to process, each will be placed it in its own xarray dataset and partition
281
277
  ddis = np.arange(ddi_xds.row.shape[0]) if rowmap is None else list(rowmap.keys())
282
278
 
283
279
  # figure out the chunking for each DDI, either one fixed shape or an auto-computed one
284
- if type(chunks) != tuple:
280
+ if type(chunks) is not tuple:
285
281
  mshape = describe_ms(infile, mode="flat", rowmap=rowmap)
286
282
  chunks = dict(
287
283
  [
@@ -307,7 +303,7 @@ def read_ms_ddi_partitions(
307
303
  continue
308
304
  logger.debug(
309
305
  "reading DDI %i with chunking %s..."
310
- % (ddi, str(chunks[ddi] if type(chunks) == dict else chunks))
306
+ % (ddi, str(chunks[ddi] if type(chunks) is dict else chunks))
311
307
  )
312
308
 
313
309
  # experimenting, comparing overheads of expanded vs. flat
@@ -321,7 +317,7 @@ def read_ms_ddi_partitions(
321
317
  infile,
322
318
  ddi,
323
319
  rowidxs=rowidxs,
324
- chunks=chunks[ddi] if type(chunks) == dict else chunks,
320
+ chunks=chunks[ddi] if type(chunks) is dict else chunks,
325
321
  ignore_msv2_cols=ignore_msv2_cols,
326
322
  )
327
323
  if len(xds.sizes) == 0:
@@ -339,9 +335,9 @@ def read_ms_ddi_partitions(
339
335
  # filter by channel selection
340
336
  if (chanidxs is not None) and (len(chanidxs) < len(xds.chan)):
341
337
  xds = xds.isel(chan=chanidxs)
342
- spw_xds["chan_freq"][
343
- ddi_xds.spectral_window_id.values[ddi], : len(chanidxs)
344
- ] = spw_xds.chan_freq[ddi_xds.spectral_window_id.values[ddi], chanidxs]
338
+ spw_xds["CHAN_FREQ"][
339
+ ddi_xds.SPECTRAL_WINDOW_ID.values[ddi], : len(chanidxs)
340
+ ] = spw_xds.CHAN_FREQ[ddi_xds.SPECTRAL_WINDOW_ID.values[ddi], chanidxs]
345
341
 
346
342
  # expand the row dimension out to (time, baseline)
347
343
  if not expanded and expand:
@@ -7,7 +7,7 @@ from typing import Dict, List
7
7
 
8
8
  import xarray as xr
9
9
 
10
- from ._tables.read import read_generic_table, table_exists
10
+ from ._tables.read import load_generic_table, table_exists
11
11
  from ._tables.read_subtables import read_ephemerides, read_delayed_pointing_table
12
12
 
13
13
 
@@ -75,7 +75,7 @@ def read_ms_subtables(
75
75
  rename_ids = {"dim_2": "n_polynomial", "dim_3": "dir"}
76
76
  xds = read_delayed_pointing_table(str(subt_path), rename_ids=rename_ids)
77
77
  else:
78
- xds = read_generic_table(
78
+ xds = load_generic_table(
79
79
  infile,
80
80
  subt_name,
81
81
  timecols=["TIME"],
@@ -73,8 +73,8 @@ def init_partition_ids(
73
73
  ddi_xds: xr.Dataset,
74
74
  part_ids: PartitionIds,
75
75
  ) -> PartitionIds:
76
- spw_id = ddi_xds.spectral_window_id.values[ddi]
77
- pol_setup_id = ddi_xds.polarization_id.values[ddi]
76
+ spw_id = ddi_xds.SPECTRAL_WINDOW_ID.values[ddi]
77
+ pol_setup_id = ddi_xds.POLARIZATION_ID.values[ddi]
78
78
  ids: PartitionIds = {
79
79
  # The -1 are expected to be be updated from part_ids
80
80
  "array_id": -1,
@@ -35,11 +35,11 @@ def make_coords(
35
35
  Dict[str, np.ndarray]
36
36
  """
37
37
  ant_xds, ddi_xds, spw_xds, pol_xds = subtables
38
- freq = spw_xds.chan_freq.values[
39
- ddi_xds.spectral_window_id.values[ddi], : xds.freq.shape[0]
38
+ freq = spw_xds.CHAN_FREQ.values[
39
+ ddi_xds.SPECTRAL_WINDOW_ID.values[ddi], : xds.freq.shape[0]
40
40
  ]
41
- pol_ids = pol_xds.corr_type.values[
42
- ddi_xds.polarization_id.values[ddi], : xds.pol.shape[0]
41
+ pol_ids = pol_xds.CORR_TYPE.values[
42
+ ddi_xds.POLARIZATION_ID.values[ddi], : xds.pol.shape[0]
43
43
  ]
44
44
  pol_names = np.vectorize(stokes_types.get)(pol_ids)
45
45
  ant_id = ant_xds.antenna_id.values
@@ -125,31 +125,31 @@ def make_global_coords(mxds: xr.Dataset) -> Dict[str, xr.DataArray]:
125
125
  if "antenna" in metainfo:
126
126
  coords["antenna_ids"] = metainfo["antenna"].antenna_id.values
127
127
  coords["antennas"] = xr.DataArray(
128
- metainfo["antenna"].name.values, dims=["antenna_ids"]
128
+ metainfo["antenna"].NAME.values, dims=["antenna_ids"]
129
129
  )
130
130
  if "field" in metainfo:
131
131
  coords["field_ids"] = metainfo["field"].field_id.values
132
132
  coords["fields"] = xr.DataArray(
133
- metainfo["field"].name.values, dims=["field_ids"]
133
+ metainfo["field"].NAME.values, dims=["field_ids"]
134
134
  )
135
135
  if "feed" in mxds.attrs:
136
- coords["feed_ids"] = metainfo["feed"].feed_id.values
136
+ coords["feed_ids"] = metainfo["feed"].FEED_ID.values
137
137
  if "observation" in metainfo:
138
138
  coords["observation_ids"] = metainfo["observation"].observation_id.values
139
139
  coords["observations"] = xr.DataArray(
140
- metainfo["observation"].project.values, dims=["observation_ids"]
140
+ metainfo["observation"].PROJECT.values, dims=["observation_ids"]
141
141
  )
142
142
  if "polarization" in metainfo:
143
143
  coords["polarization_ids"] = metainfo["polarization"].pol_setup_id.values
144
144
  if "source" in metainfo:
145
- coords["source_ids"] = metainfo["source"].source_id.values
145
+ coords["source_ids"] = metainfo["source"].SOURCE_ID.values
146
146
  coords["sources"] = xr.DataArray(
147
- metainfo["source"].name.values, dims=["source_ids"]
147
+ metainfo["source"].NAME.values, dims=["source_ids"]
148
148
  )
149
149
  if "spectral_window" in metainfo:
150
150
  coords["spw_ids"] = metainfo["spectral_window"].spw_id.values
151
151
  if "state" in metainfo:
152
- coords["state_ids"] = metainfo["state"].state_id.values
152
+ coords["state_ids"] = metainfo["state"].STATE_ID.values
153
153
 
154
154
  return coords
155
155
 
@@ -225,7 +225,7 @@ def flatten_xds(xds: xr.Dataset) -> xr.Dataset:
225
225
  # compute for issue https://github.com/hainegroup/oceanspy/issues/332
226
226
  # drop=True silently does compute (or at least used to)
227
227
  txds = txds.where(
228
- ((txds.state_id != nan_int) & (txds.field_id != nan_int)).compute(),
228
+ ((txds.STATE_ID != nan_int) & (txds.FIELD_ID != nan_int)).compute(),
229
229
  drop=True,
230
230
  ) # .unify_chunks()
231
231
 
@@ -1,11 +1,8 @@
1
1
  import os
2
2
  import graphviper.utils.logger as logger
3
- from typing import Dict, List, Tuple, Union
4
-
5
- import xarray as xr
3
+ from typing import List, Tuple, Union
6
4
 
7
5
  from ._utils.cds import CASAVisSet
8
- from ._ms.chunks import load_main_chunk
9
6
  from ._ms.partitions import (
10
7
  finalize_partitions,
11
8
  read_ms_ddi_partitions,
@@ -107,42 +104,3 @@ def read_ms(
107
104
  # build the visibilities container (metainfo + partitions) to return
108
105
  cds = vis_xds_packager_cds(subts, parts, "read_ms")
109
106
  return cds
110
-
111
-
112
- def load_vis_chunk(
113
- infile: str,
114
- block_des: Dict[str, slice],
115
- partition_key: Tuple[int, int, str],
116
- ) -> Dict[Tuple[int, int], xr.Dataset]:
117
- """
118
- Read a chunk of a MeasurementSet (MSv2 format) into an Xarray
119
- dataset, loading the data in memory.
120
-
121
- Parameters
122
- ----------
123
- infile : str
124
- Input MS filename
125
- block_des : Dict[str, slice]
126
- specification of chunk to load
127
- partition_key: partition_key: Tuple[int, int, str]
128
-
129
- Returns
130
- -------
131
- Dict[Tuple[int, int], xr.Dataset]
132
- Xarray datasets with chunk of visibility data, one per DDI
133
- (spw_id, pol_setup_id pair)
134
- """
135
- infile = os.path.expanduser(infile)
136
-
137
- logger.info(f"Loading from {infile} as MSv2 a chunk of data into memory")
138
-
139
- if not os.path.isdir(infile):
140
- raise ValueError(f"invalid input filename to read_ms {infile}")
141
-
142
- orig_chunk_to_improve = load_main_chunk(infile, block_des)
143
- res = vis_xds_packager_cds(
144
- subtables={},
145
- partitions={partition_key: orig_chunk_to_improve},
146
- descr_add="load_vis_block",
147
- )
148
- return res
@@ -3,7 +3,6 @@ from pathlib import Path
3
3
  from typing import Dict, Union
4
4
 
5
5
  import zarr
6
- import xradio
7
6
  import graphviper.utils.logger as logger
8
7
 
9
8
  from ._utils.cds import CASAVisSet
@@ -17,6 +17,7 @@ def convert_msv2_to_processing_set(
17
17
  pointing_chunksize: Union[Dict, float, None] = None,
18
18
  pointing_interpolate: bool = False,
19
19
  ephemeris_interpolate: bool = False,
20
+ use_table_iter: bool = False,
20
21
  compressor: numcodecs.abc.Codec = numcodecs.Zstd(level=2),
21
22
  storage_backend="zarr",
22
23
  parallel: bool = False,
@@ -45,6 +46,8 @@ def convert_msv2_to_processing_set(
45
46
  Whether to interpolate the time axis of the pointing sub-dataset to the time axis of the main dataset
46
47
  ephemeris_interpolate : bool, optional
47
48
  Whether to interpolate the time axis of the ephemeris data variables (of the field_and_source sub-dataset) to the time axis of the main dataset
49
+ use_table_iter : bool, optional
50
+ Whether to use the table iterator to read the main table of the MS v2. This should be set to True when reading datasets with large number of rows and few partitions, by default False.
48
51
  compressor : numcodecs.abc.Codec, optional
49
52
  The Blosc compressor to use when saving the converted data to disk using Zarr, by default numcodecs.Zstd(level=2).
50
53
  storage_backend : {"zarr", "netcdf"}, optional
@@ -63,7 +66,9 @@ def convert_msv2_to_processing_set(
63
66
  # print(ms_v4_id,len(partition_info['FIELD_ID']))
64
67
 
65
68
  logger.info(
66
- "DDI "
69
+ "OBSERVATION_ID "
70
+ + str(partition_info["OBSERVATION_ID"])
71
+ + ", DDI "
67
72
  + str(partition_info["DATA_DESC_ID"])
68
73
  + ", STATE "
69
74
  + str(partition_info["STATE_ID"])
@@ -80,6 +85,7 @@ def convert_msv2_to_processing_set(
80
85
  out_file,
81
86
  ms_v4_id,
82
87
  partition_info=partition_info,
88
+ use_table_iter=use_table_iter,
83
89
  partition_scheme=partition_scheme,
84
90
  main_chunksize=main_chunksize,
85
91
  with_pointing=with_pointing,
@@ -96,6 +102,7 @@ def convert_msv2_to_processing_set(
96
102
  out_file,
97
103
  ms_v4_id,
98
104
  partition_info=partition_info,
105
+ use_table_iter=use_table_iter,
99
106
  partition_scheme=partition_scheme,
100
107
  main_chunksize=main_chunksize,
101
108
  with_pointing=with_pointing,
@@ -1,6 +1,3 @@
1
- import xarray as xr
2
- import zarr
3
- import copy
4
1
  import os
5
2
  from ._processing_set import processing_set
6
3
  from typing import Dict, Union
@@ -1,5 +1,5 @@
1
1
  import os
2
- import xarray as xr
2
+
3
3
  from ._processing_set import processing_set
4
4
  import graphviper.utils.logger as logger
5
5
  from xradio._utils.zarr.common import _open_dataset, _get_ms_stores_and_file_system
@@ -17,7 +17,7 @@ def read_processing_set(
17
17
  ps_store : str
18
18
  String of the path and name of the processing set. For example '/users/user_1/uid___A002_Xf07bba_Xbe5c_target.lsrk.vis.zarr'.
19
19
  obs_modes : list, optional
20
- A list of obs_mode to be read for example ['OBSERVE_TARGET#ON_SOURCE']. The obs_mode in a processing set can be seem by calling processing_set.summary().
20
+ A list of obs_mode to be read for example ['OBSERVE_TARGET#ON_SOURCE']. The obs_mode in a processing set can be seen by calling processing_set.summary().
21
21
  By default None, which will read all obs_mode.
22
22
 
23
23
  Returns
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: xradio
3
- Version: 0.0.31
3
+ Version: 0.0.34
4
4
  Summary: Xarray Radio Astronomy Data IO
5
5
  Author-email: Jan-Willem Steeb <jsteeb@nrao.edu>
6
6
  License: BSD 3-Clause License