xradio 0.0.55__py3-none-any.whl → 0.0.58__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. xradio/__init__.py +2 -2
  2. xradio/_utils/_casacore/casacore_from_casatools.py +1001 -0
  3. xradio/_utils/_casacore/tables.py +6 -1
  4. xradio/_utils/coord_math.py +22 -23
  5. xradio/_utils/dict_helpers.py +76 -11
  6. xradio/_utils/schema.py +5 -2
  7. xradio/_utils/zarr/common.py +1 -73
  8. xradio/image/_util/_casacore/common.py +11 -3
  9. xradio/image/_util/_casacore/xds_from_casacore.py +59 -35
  10. xradio/image/_util/_casacore/xds_to_casacore.py +47 -16
  11. xradio/image/_util/_fits/xds_from_fits.py +172 -77
  12. xradio/image/_util/casacore.py +9 -4
  13. xradio/image/_util/common.py +4 -4
  14. xradio/image/_util/image_factory.py +8 -8
  15. xradio/image/image.py +45 -5
  16. xradio/measurement_set/__init__.py +19 -9
  17. xradio/measurement_set/_utils/__init__.py +1 -3
  18. xradio/measurement_set/_utils/_msv2/__init__.py +0 -0
  19. xradio/measurement_set/_utils/_msv2/_tables/read.py +35 -90
  20. xradio/measurement_set/_utils/_msv2/_tables/read_main_table.py +6 -686
  21. xradio/measurement_set/_utils/_msv2/_tables/table_query.py +13 -3
  22. xradio/measurement_set/_utils/_msv2/conversion.py +129 -145
  23. xradio/measurement_set/_utils/_msv2/create_antenna_xds.py +9 -16
  24. xradio/measurement_set/_utils/_msv2/create_field_and_source_xds.py +125 -221
  25. xradio/measurement_set/_utils/_msv2/msv2_to_msv4_meta.py +1 -2
  26. xradio/measurement_set/_utils/_msv2/msv4_info_dicts.py +13 -8
  27. xradio/measurement_set/_utils/_msv2/msv4_sub_xdss.py +27 -72
  28. xradio/measurement_set/_utils/_msv2/partition_queries.py +5 -262
  29. xradio/measurement_set/_utils/_msv2/subtables.py +0 -107
  30. xradio/measurement_set/_utils/_utils/interpolate.py +60 -0
  31. xradio/measurement_set/_utils/_zarr/encoding.py +2 -7
  32. xradio/measurement_set/convert_msv2_to_processing_set.py +0 -2
  33. xradio/measurement_set/load_processing_set.py +2 -2
  34. xradio/measurement_set/measurement_set_xdt.py +14 -14
  35. xradio/measurement_set/open_processing_set.py +1 -3
  36. xradio/measurement_set/processing_set_xdt.py +41 -835
  37. xradio/measurement_set/schema.py +96 -123
  38. xradio/schema/check.py +91 -97
  39. xradio/schema/dataclass.py +159 -22
  40. xradio/schema/export.py +99 -0
  41. xradio/schema/metamodel.py +51 -16
  42. xradio/schema/typing.py +5 -5
  43. {xradio-0.0.55.dist-info → xradio-0.0.58.dist-info}/METADATA +43 -11
  44. xradio-0.0.58.dist-info/RECORD +65 -0
  45. {xradio-0.0.55.dist-info → xradio-0.0.58.dist-info}/WHEEL +1 -1
  46. xradio/image/_util/fits.py +0 -13
  47. xradio/measurement_set/_utils/_msv2/_tables/load.py +0 -63
  48. xradio/measurement_set/_utils/_msv2/_tables/load_main_table.py +0 -487
  49. xradio/measurement_set/_utils/_msv2/_tables/read_subtables.py +0 -395
  50. xradio/measurement_set/_utils/_msv2/_tables/write.py +0 -320
  51. xradio/measurement_set/_utils/_msv2/_tables/write_exp_api.py +0 -385
  52. xradio/measurement_set/_utils/_msv2/chunks.py +0 -115
  53. xradio/measurement_set/_utils/_msv2/descr.py +0 -165
  54. xradio/measurement_set/_utils/_msv2/msv2_msv3.py +0 -7
  55. xradio/measurement_set/_utils/_msv2/partitions.py +0 -392
  56. xradio/measurement_set/_utils/_utils/cds.py +0 -40
  57. xradio/measurement_set/_utils/_utils/xds_helper.py +0 -404
  58. xradio/measurement_set/_utils/_zarr/read.py +0 -263
  59. xradio/measurement_set/_utils/_zarr/write.py +0 -329
  60. xradio/measurement_set/_utils/msv2.py +0 -106
  61. xradio/measurement_set/_utils/zarr.py +0 -133
  62. xradio-0.0.55.dist-info/RECORD +0 -77
  63. {xradio-0.0.55.dist-info → xradio-0.0.58.dist-info}/licenses/LICENSE.txt +0 -0
  64. {xradio-0.0.55.dist-info → xradio-0.0.58.dist-info}/top_level.txt +0 -0
xradio/image/image.py CHANGED
@@ -15,13 +15,15 @@ import xarray as xr
15
15
  # from .._utils.zarr.common import _load_no_dask_zarr
16
16
 
17
17
  from ._util.casacore import _load_casa_image_block, _xds_to_casa_image
18
- from ._util.fits import _read_fits_image
18
+
19
+ # from ._util.fits import _read_fits_image
19
20
  from ._util.image_factory import (
20
21
  _make_empty_aperture_image,
21
22
  _make_empty_lmuv_image,
22
23
  _make_empty_sky_image,
23
24
  )
24
25
  from ._util.zarr import _load_image_from_zarr_no_dask, _xds_from_zarr, _xds_to_zarr
26
+ from ._util._fits.xds_from_fits import _fits_image_to_xds
25
27
 
26
28
  warnings.filterwarnings("ignore", category=FutureWarning)
27
29
 
@@ -32,12 +34,37 @@ def read_image(
32
34
  verbose: bool = False,
33
35
  do_sky_coords: bool = True,
34
36
  selection: dict = {},
37
+ compute_mask: bool = True,
35
38
  ) -> xr.Dataset:
36
39
  """
37
40
  Convert CASA, FITS, or zarr image to xradio image xds format
38
41
  ngCASA image spec is located at
39
42
  https://docs.google.com/spreadsheets/d/1WW0Gl6z85cJVPgtdgW4dxucurHFa06OKGjgoK8OREFA/edit#gid=1719181934
40
43
 
44
+ Notes on FITS compatibility and memory mapping:
45
+
46
+ This function relies on Astropy's `memmap=True` to avoid loading full image data into memory.
47
+ However, not all FITS files support memory-mapped reads.
48
+
49
+ ⚠️ The following FITS types are incompatible with memory mapping:
50
+
51
+ 1. Compressed images (`CompImageHDU`)
52
+ = Workaround: decompress the FITS using tools like `funpack`, `cfitsio`,
53
+ or Astropy's `.scale()`/`.copy()` workflows
54
+ 2. Some scaled images (using BSCALE/BZERO headers)
55
+ ✅ Supported:
56
+ - Files with no BSCALE/BZERO headers (or BSCALE=1.0 and BZERO=0.0)
57
+ - Uncompressed, unscaled primary HDUs
58
+ ⚠️ Unsupported: Files with BSCALE ≠ 1.0 or BZERO ≠ 0.0
59
+ - These require data rescaling in memory, which disables lazy access
60
+ - Attempting to slice such arrays forces eager read of the full dataset
61
+ - Workaround: remove scaling with Astropy's
62
+ `HDU.data = HDU.data * BSCALE + BZERO` and save a new file
63
+
64
+ These cases will raise `RuntimeError` to prevent silent eager loads that can exhaust memory.
65
+
66
+ If you encounter such an error, consider preprocessing the file to make it memory-mappable.
67
+
41
68
  Parameters
42
69
  ----------
43
70
  infile : str
@@ -69,11 +96,19 @@ def read_image(
69
96
  the selection, and the end pixel is not. An empty dictionary (the
70
97
  default) indicates that the entire image should be returned. Currently
71
98
  only supported for images stored in zarr format.
72
-
99
+ compute_mask : bool, optional
100
+ If True (default), compute and attach valid data masks when converting from FITS to xds.
101
+ If False, skip mask computation entirely. This may improve performance if the mask
102
+ is not required for subsequent processing. It may, however, result in unpredictable behavior
103
+ for applications that are not designed to handle missing data. It is the user's responsibility,
104
+ not the software's, to ensure that the mask is computed if it is necessary. Currently only
105
+ implemented for FITS images.
73
106
  Returns
74
107
  -------
75
108
  xarray.Dataset
76
109
  """
110
+ # from ._util.casacore import _read_casa_image
111
+ # return _read_casa_image(infile, chunks, verbose, do_sky_coords)
77
112
  emsgs = []
78
113
  do_casa = True
79
114
  try:
@@ -92,9 +127,10 @@ def read_image(
92
127
  except Exception as e:
93
128
  emsgs.append(f"image format appears not to be casacore: {e.args}")
94
129
  # next statement is for debug, comment when done debugging
95
- # return _read_fits_image(infile, chunks, verbose, do_sky_coords)
130
+ # return _fits_image_to_xds(infile, chunks, verbose, do_sky_coords, compute_mask)
96
131
  try:
97
- return _read_fits_image(infile, chunks, verbose, do_sky_coords)
132
+ img_full_path = os.path.expanduser(infile)
133
+ return _fits_image_to_xds(infile, chunks, verbose, do_sky_coords, compute_mask)
98
134
  except Exception as e:
99
135
  emsgs.append(f"image format appears not to be fits {e.args}")
100
136
  # when done debuggin comment out next line
@@ -111,7 +147,7 @@ def read_image(
111
147
  raise RuntimeError("\n".join(emsgs))
112
148
 
113
149
 
114
- def load_image(infile: str, block_des: dict = {}, do_sky_coords=True) -> xr.Dataset:
150
+ def load_image(infile: str, block_des: dict = None, do_sky_coords=True) -> xr.Dataset:
115
151
  """
116
152
  Load an image or portion of an image (subimage) into memory with data variables
117
153
  being converted from dask to numpy arrays and coordinate arrays being converted
@@ -144,6 +180,10 @@ def load_image(infile: str, block_des: dict = {}, do_sky_coords=True) -> xr.Data
144
180
  """
145
181
  do_casa = True
146
182
  emsgs = []
183
+
184
+ if block_des is None:
185
+ block_des = {}
186
+
147
187
  selection = copy.deepcopy(block_des) if block_des else block_des
148
188
  if selection:
149
189
  for k, v in selection.items():
@@ -4,13 +4,11 @@ convert, and retrieve information from Processing Set and Measurement Sets nodes
4
4
  Processing Set DataTree
5
5
  """
6
6
 
7
- from .processing_set_xdt import *
7
+ import toolviper.utils.logger as _logger
8
+
9
+ from .processing_set_xdt import ProcessingSetXdt
8
10
  from .open_processing_set import open_processing_set
9
- from .load_processing_set import load_processing_set # , ProcessingSetIterator
10
- from .convert_msv2_to_processing_set import (
11
- convert_msv2_to_processing_set,
12
- estimate_conversion_memory_and_cores,
13
- )
11
+ from .load_processing_set import load_processing_set
14
12
  from .measurement_set_xdt import MeasurementSetXdt
15
13
  from .schema import SpectrumXds, VisibilityXds
16
14
 
@@ -19,9 +17,21 @@ __all__ = [
19
17
  "MeasurementSetXdt",
20
18
  "open_processing_set",
21
19
  "load_processing_set",
22
- "ProcessingSetIterator",
23
- "convert_msv2_to_processing_set",
24
- "estimate_conversion_memory_and_cores",
25
20
  "SpectrumXds",
26
21
  "VisibilityXds",
27
22
  ]
23
+
24
+ try:
25
+ from .convert_msv2_to_processing_set import (
26
+ convert_msv2_to_processing_set,
27
+ estimate_conversion_memory_and_cores,
28
+ )
29
+ except ModuleNotFoundError as exc:
30
+ _logger.warning(
31
+ "Could not import the function to convert from MSv2 to MSv4. "
32
+ f"That functionality will not be available. Details: {exc}"
33
+ )
34
+ else:
35
+ __all__.extend(
36
+ ["convert_msv2_to_processing_set", "estimate_conversion_memory_and_cores"]
37
+ )
@@ -1,5 +1,3 @@
1
- from . import msv2
2
- from . import zarr
3
1
  from . import _utils
4
2
 
5
- __all__ = ["msv2", "zarr", "_utils"]
3
+ __all__ = ["_utils"]
File without changes
@@ -10,7 +10,11 @@ import pandas as pd
10
10
  import xarray as xr
11
11
 
12
12
  import astropy.units
13
- from casacore import tables
13
+
14
+ try:
15
+ from casacore import tables
16
+ except ImportError:
17
+ import xradio._utils._casacore.casacore_from_casatools as tables
14
18
 
15
19
  from .table_query import open_query, open_table_ro, TableManager
16
20
  from xradio._utils.list_and_array import get_pad_value
@@ -42,16 +46,20 @@ def convert_casacore_time(
42
46
  rawtimes: np.ndarray, convert_to_datetime: bool = True
43
47
  ) -> np.ndarray:
44
48
  """
45
- Read time columns to datetime format
46
- pandas datetimes are referenced against a 0 of 1970-01-01
47
- CASA's modified julian day reference time is (of course) 1858-11-17
49
+ Convert data from casacore time columns to a different format, either:
50
+ a) pandas style datetime,
51
+ b) simply seconds from 1970-01-01 00:00:00 UTC (as used in the Unix scale of
52
+ astropy).
53
+
54
+ Pandas datetimes and Unix times are referenced against a 0 of 1970-01-01.
55
+ CASA's (casacore) modified julian day reference time is (of course) 1858-11-17.
48
56
 
49
57
  This requires a correction of 3506716800 seconds which is hardcoded to save time
50
58
 
51
59
  Parameters
52
60
  ----------
53
61
  rawtimes : np.ndarray
54
- times in casacore ref
62
+ time values wrt casacore reference
55
63
  convert_to_datetime : bool (Default value = True)
56
64
  whether to produce pandas style datetime
57
65
 
@@ -308,6 +316,8 @@ def add_units_measures(
308
316
  ): # Little fix for Meerkat data where the units are a string.
309
317
  cc_units = [cc_units]
310
318
 
319
+ if isinstance(cc_units, np.ndarray):
320
+ cc_units = cc_units.tolist()
311
321
  if not isinstance(cc_units, list) or not cc_units:
312
322
  logger.warning(
313
323
  f"Invalid units found for column/variable {col}: {cc_units}"
@@ -345,70 +355,6 @@ def add_units_measures(
345
355
  return mvars
346
356
 
347
357
 
348
- def make_freq_attrs(spw_xds: xr.Dataset, spw_id: int) -> Dict[str, Any]:
349
- """
350
- Grab the units/measure metainfo for the xds.freq dimension of a
351
- parttion from the SPECTRAL_WINDOW subtable CTDS attributes.
352
-
353
- Has to read xds_spw.meas_freq_ref and use it as index in the CTDS
354
- 'VarRefCol' attrs of CHAN_FREQ and REF_FREQUENCY to give a
355
- reference frame to xds_spw.ref_frequency and xds_spw.chan_freq
356
- (then the ref frame from the second will be pulled to
357
- xds.freq.attrs)
358
-
359
- Parameters
360
- ----------
361
- spw_xds : xr.Dataset
362
- (metainfo) SPECTRAL_WINDOW xds
363
- spw_id : int
364
- SPW id of a partition
365
-
366
- Returns
367
- -------
368
- Dict[str, Any]
369
- attributes (units/measure) for the freq dim of a partition
370
- """
371
- fallback_TabRefTypes = [
372
- "REST",
373
- "LSRK",
374
- "LSRD",
375
- "BARY",
376
- "GEO",
377
- "TOPO",
378
- "GALACTO",
379
- "LGROUP",
380
- "CMB",
381
- ]
382
-
383
- ctds_cols = spw_xds.attrs["other"]["msv2"]["ctds_attrs"]["column_descriptions"]
384
- cfreq = ctds_cols["CHAN_FREQ"]
385
-
386
- cf_attrs = spw_xds.data_vars["CHAN_FREQ"].attrs
387
- if "MEASINFO" in cfreq["keywords"] and "VarRefCol" in cfreq["keywords"]["MEASINFO"]:
388
- fattrs = cfreq["keywords"]["MEASINFO"]
389
- var_ref_col = fattrs["VarRefCol"]
390
- # This should point to the SPW/MEAS_FREQ_REF col
391
- meas_freq_ref_idx = spw_xds.data_vars[var_ref_col].values[spw_id]
392
-
393
- if "TabRefCodes" not in fattrs or "TabRefTypes" not in fattrs:
394
- # Datasets like vla/ic2233_1.ms say "VarRefCol" but "TabRefTypes" is missing
395
- ref_frame = fallback_TabRefTypes[meas_freq_ref_idx]
396
- else:
397
- ref_type_code = fattrs["TabRefCodes"][meas_freq_ref_idx]
398
- ref_frame = fattrs["TabRefTypes"][ref_type_code]
399
-
400
- cf_attrs["measure"] = {
401
- "type": fattrs["type"],
402
- "ref_frame": ref_frame,
403
- }
404
-
405
- # Also set the 'VarRefCol' for CHAN_FREQ and REF_FREQUENCEY
406
- spw_xds.data_vars["CHAN_FREQ"].attrs.update(cf_attrs)
407
- spw_xds.data_vars["REF_FREQUENCY"].attrs.update(cf_attrs)
408
-
409
- return cf_attrs
410
-
411
-
412
358
  def redimension_ms_subtable(xds: xr.Dataset, subt_name: str) -> xr.Dataset:
413
359
  """
414
360
  Expand a MeasurementSet subtable xds from single dimension (row)
@@ -545,8 +491,8 @@ def load_generic_table(
545
491
  tname : str
546
492
  (sub)table name, for example 'SOURCE' for myms.ms/SOURCE
547
493
  timecols : Union[List[str], None] (Default value = None)
548
- column names to convert to numpy datetime format.
549
- leaves times as their original casacore format.
494
+ Names of time column(s), to convert from casacore times to 1970-01-01 scale
495
+ An empty list leaves times as their original casacore format.
550
496
  ignore : Union[List[str], None] (Default value = None)
551
497
  list of column names to ignore and not try to read.
552
498
  rename_ids : Dict[str, str] (Default value = None)
@@ -742,7 +688,7 @@ def load_generic_cols(
742
688
  tb_tool : tables.table
743
689
  table to load the columns
744
690
  timecols : Union[List[str], None]
745
- columns names to convert to datetime format
691
+ column names to convert from casacore time format
746
692
  ignore : Union[List[str], None]
747
693
  list of column names to skip and not try to load.
748
694
 
@@ -822,7 +768,7 @@ def load_fixed_size_cols(
822
768
  tb_tool : tables.table
823
769
  table to red the columns
824
770
  timecols : Union[List[str], None]
825
- columns names to convert to datetime format
771
+ column names to convert from casacore time format
826
772
  ignore : Union[List[str], None]
827
773
  list of column names to skip and not try to load.
828
774
 
@@ -917,7 +863,8 @@ def raw_col_data_to_coords_vars(
917
863
  data: np.ndarray :
918
864
  column data
919
865
  timecols: Union[List[str], None]
920
- columns to be treated as TIME-related
866
+ columns to be treated as TIME-related (they are coordinate, need conversion from
867
+ casacore time format.
921
868
 
922
869
  Returns
923
870
  -------
@@ -947,7 +894,7 @@ def raw_col_data_to_coords_vars(
947
894
  data = convert_mjd_time(data).astype("float64") / 1e9
948
895
  else:
949
896
  try:
950
- data = convert_casacore_time(data)
897
+ data = convert_casacore_time(data, False)
951
898
  except pd.errors.OutOfBoundsDatetime as exc:
952
899
  if inpath.endswith("WEATHER"):
953
900
  # intentionally not callling logging.exception
@@ -987,7 +934,7 @@ def raw_col_data_to_coords_vars(
987
934
 
988
935
  def get_pad_value_in_tablerow_column(trows: tables.tablerow, col: str) -> object:
989
936
  """
990
- Gets the pad value for the type of a column (IMPORTANTLY) as froun in the
937
+ Gets the pad value for the type of a column (IMPORTANTLY) as found in the
991
938
  the type specified in the row / column value dict returned by tablerow.
992
939
  This can differ from the type of the column as given in the casacore
993
940
  column descriptions. See https://github.com/casangi/xradio/issues/242.
@@ -1189,7 +1136,7 @@ def read_col_chunk(
1189
1136
  np.ndarray
1190
1137
  """
1191
1138
  # TODO: consider calling load_col_chunk() from inside the withs
1192
- # for read_delayed_pointing_table and read_expanded_main_table
1139
+ # for read_expanded_main_table
1193
1140
  with open_table_ro(infile) as mtable:
1194
1141
  with open_query(mtable, ts_taql) as query:
1195
1142
  if (len(cshape) == 2) or (col == "UVW"): # all the scalars and UVW
@@ -1250,22 +1197,20 @@ def read_col_conversion_numpy(
1250
1197
  # Use casacore to get the shape of a row for this column
1251
1198
  #################################################################################
1252
1199
 
1253
- # getcolshapestring() only works on columns where a row element is an
1254
- # array ie. fails for TIME
1255
- # Assumes the RuntimeError is because the column is a scalar
1256
- try:
1200
+ # getcolshapestring() only works for array-valued columns.
1201
+ # For scalar columns (e.g., EXPOSURE, TIME_CENTROID), it raises a RuntimeError.
1202
+ # So we first check if the column is scalar to avoid that.
1203
+ if tb_tool.isscalarcol(col):
1204
+ extra_dimensions = ()
1205
+ else:
1206
+ # Get the shape string for the first row of the column (e.g., "[4, 2]")
1257
1207
  shape_string = tb_tool.getcolshapestring(col)[0]
1258
- # Convert `shape_string` into a tuple that numpy understands
1208
+
1209
+ # Convert the shape string into a tuple of integers (e.g., (4, 2)) that numpy
1210
+ # understands.
1259
1211
  extra_dimensions = tuple(
1260
- [
1261
- int(idx)
1262
- for idx in shape_string.replace("[", "")
1263
- .replace("]", "")
1264
- .split(", ")
1265
- ]
1212
+ int(dim) for dim in shape_string.strip("[]").split(", ")
1266
1213
  )
1267
- except RuntimeError:
1268
- extra_dimensions = ()
1269
1214
 
1270
1215
  #################################################################################
1271
1216