xradio 0.0.47__py3-none-any.whl → 0.0.49__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- xradio/__init__.py +1 -0
- xradio/_utils/dict_helpers.py +69 -2
- xradio/_utils/list_and_array.py +3 -1
- xradio/_utils/schema.py +3 -1
- xradio/image/_util/__init__.py +0 -3
- xradio/image/_util/_casacore/common.py +0 -13
- xradio/image/_util/_casacore/xds_from_casacore.py +102 -97
- xradio/image/_util/_casacore/xds_to_casacore.py +36 -24
- xradio/image/_util/_fits/xds_from_fits.py +81 -36
- xradio/image/_util/_zarr/zarr_low_level.py +3 -3
- xradio/image/_util/casacore.py +7 -5
- xradio/image/_util/common.py +13 -26
- xradio/image/_util/image_factory.py +143 -191
- xradio/image/image.py +10 -59
- xradio/measurement_set/__init__.py +11 -6
- xradio/measurement_set/_utils/_msv2/_tables/read.py +187 -46
- xradio/measurement_set/_utils/_msv2/_tables/table_query.py +22 -0
- xradio/measurement_set/_utils/_msv2/conversion.py +347 -299
- xradio/measurement_set/_utils/_msv2/create_field_and_source_xds.py +233 -150
- xradio/measurement_set/_utils/_msv2/descr.py +1 -1
- xradio/measurement_set/_utils/_msv2/msv4_info_dicts.py +20 -13
- xradio/measurement_set/_utils/_msv2/msv4_sub_xdss.py +21 -22
- xradio/measurement_set/convert_msv2_to_processing_set.py +46 -6
- xradio/measurement_set/load_processing_set.py +100 -52
- xradio/measurement_set/measurement_set_xdt.py +197 -0
- xradio/measurement_set/open_processing_set.py +122 -86
- xradio/measurement_set/processing_set_xdt.py +1552 -0
- xradio/measurement_set/schema.py +375 -197
- xradio/schema/bases.py +5 -1
- xradio/schema/check.py +97 -5
- xradio/sphinx/schema_table.py +12 -0
- {xradio-0.0.47.dist-info → xradio-0.0.49.dist-info}/METADATA +4 -4
- {xradio-0.0.47.dist-info → xradio-0.0.49.dist-info}/RECORD +36 -36
- {xradio-0.0.47.dist-info → xradio-0.0.49.dist-info}/WHEEL +1 -1
- xradio/measurement_set/measurement_set_xds.py +0 -117
- xradio/measurement_set/processing_set.py +0 -777
- {xradio-0.0.47.dist-info → xradio-0.0.49.dist-info/licenses}/LICENSE.txt +0 -0
- {xradio-0.0.47.dist-info → xradio-0.0.49.dist-info}/top_level.txt +0 -0
|
@@ -4,6 +4,7 @@ from pathlib import Path
|
|
|
4
4
|
import re
|
|
5
5
|
from typing import Any, Callable, Dict, List, Tuple, Union
|
|
6
6
|
|
|
7
|
+
import dask.array as da
|
|
7
8
|
import numpy as np
|
|
8
9
|
import pandas as pd
|
|
9
10
|
import xarray as xr
|
|
@@ -11,7 +12,7 @@ import xarray as xr
|
|
|
11
12
|
import astropy.units
|
|
12
13
|
from casacore import tables
|
|
13
14
|
|
|
14
|
-
from .table_query import open_query, open_table_ro
|
|
15
|
+
from .table_query import open_query, open_table_ro, TableManager
|
|
15
16
|
from xradio._utils.list_and_array import get_pad_value
|
|
16
17
|
|
|
17
18
|
CASACORE_TO_PD_TIME_CORRECTION = 3_506_716_800.0
|
|
@@ -1207,13 +1208,14 @@ def read_col_chunk(
|
|
|
1207
1208
|
return fulldata
|
|
1208
1209
|
|
|
1209
1210
|
|
|
1210
|
-
def
|
|
1211
|
-
|
|
1211
|
+
def read_col_conversion_numpy(
|
|
1212
|
+
table_manager: TableManager,
|
|
1212
1213
|
col: str,
|
|
1213
1214
|
cshape: Tuple[int],
|
|
1214
1215
|
tidxs: np.ndarray,
|
|
1215
1216
|
bidxs: np.ndarray,
|
|
1216
1217
|
use_table_iter: bool,
|
|
1218
|
+
time_chunksize: int,
|
|
1217
1219
|
) -> np.ndarray:
|
|
1218
1220
|
"""
|
|
1219
1221
|
Function to perform delayed reads from table columns when converting
|
|
@@ -1221,7 +1223,7 @@ def read_col_conversion(
|
|
|
1221
1223
|
|
|
1222
1224
|
Parameters
|
|
1223
1225
|
----------
|
|
1224
|
-
|
|
1226
|
+
table_manager : TableManager
|
|
1225
1227
|
|
|
1226
1228
|
col : str
|
|
1227
1229
|
|
|
@@ -1231,6 +1233,8 @@ def read_col_conversion(
|
|
|
1231
1233
|
|
|
1232
1234
|
bidxs : np.ndarray
|
|
1233
1235
|
|
|
1236
|
+
use_table_iter : bool
|
|
1237
|
+
|
|
1234
1238
|
Returns
|
|
1235
1239
|
-------
|
|
1236
1240
|
np.ndarray
|
|
@@ -1241,60 +1245,197 @@ def read_col_conversion(
|
|
|
1241
1245
|
# WARNING: Assumes the num_frequencies * num_polarizations < 2**29. If false,
|
|
1242
1246
|
# https://github.com/casacore/python-casacore/issues/130 isn't mitigated.
|
|
1243
1247
|
|
|
1248
|
+
with table_manager.get_table() as tb_tool:
|
|
1249
|
+
|
|
1250
|
+
# Use casacore to get the shape of a row for this column
|
|
1251
|
+
#################################################################################
|
|
1252
|
+
|
|
1253
|
+
# getcolshapestring() only works on columns where a row element is an
|
|
1254
|
+
# array ie. fails for TIME
|
|
1255
|
+
# Assumes the RuntimeError is because the column is a scalar
|
|
1256
|
+
try:
|
|
1257
|
+
shape_string = tb_tool.getcolshapestring(col)[0]
|
|
1258
|
+
# Convert `shape_string` into a tuple that numpy understands
|
|
1259
|
+
extra_dimensions = tuple(
|
|
1260
|
+
[
|
|
1261
|
+
int(idx)
|
|
1262
|
+
for idx in shape_string.replace("[", "")
|
|
1263
|
+
.replace("]", "")
|
|
1264
|
+
.split(", ")
|
|
1265
|
+
]
|
|
1266
|
+
)
|
|
1267
|
+
except RuntimeError:
|
|
1268
|
+
extra_dimensions = ()
|
|
1269
|
+
|
|
1270
|
+
#################################################################################
|
|
1271
|
+
|
|
1272
|
+
# Get dtype of the column. Only read first row from disk
|
|
1273
|
+
col_dtype = np.array(tb_tool.col(col)[0]).dtype
|
|
1274
|
+
# Use a custom/safe fill value (https://github.com/casangi/xradio/issues/219)
|
|
1275
|
+
fill_value = get_pad_value(col_dtype)
|
|
1276
|
+
|
|
1277
|
+
# Construct a numpy array to populate. `data` has shape (n_times, n_baselines, n_frequencies, n_polarizations)
|
|
1278
|
+
data = np.full(cshape + extra_dimensions, fill_value, dtype=col_dtype)
|
|
1279
|
+
|
|
1280
|
+
# Use built-in casacore table iterator to populate the data column by unique times.
|
|
1281
|
+
if use_table_iter:
|
|
1282
|
+
start_row = 0
|
|
1283
|
+
for ts in tb_tool.iter("TIME", sort=False):
|
|
1284
|
+
num_rows = ts.nrows()
|
|
1285
|
+
|
|
1286
|
+
# Create small temporary array to store the partial column
|
|
1287
|
+
tmp_arr = np.full(
|
|
1288
|
+
(num_rows,) + extra_dimensions, fill_value, dtype=col_dtype
|
|
1289
|
+
)
|
|
1290
|
+
|
|
1291
|
+
# Note we don't use `getcol()` because it's less safe. See:
|
|
1292
|
+
# https://github.com/casacore/python-casacore/issues/130#issuecomment-463202373
|
|
1293
|
+
ts.getcolnp(col, tmp_arr)
|
|
1294
|
+
|
|
1295
|
+
# Get the slice of rows contained in `tmp_arr`.
|
|
1296
|
+
# Used to get the relevant integer indexes from `tidxs` and `bidxs`
|
|
1297
|
+
tmp_slice = slice(start_row, start_row + num_rows)
|
|
1298
|
+
|
|
1299
|
+
# Copy `tmp_arr` into correct elements of `tmp_arr`
|
|
1300
|
+
data[tidxs[tmp_slice], bidxs[tmp_slice]] = tmp_arr
|
|
1301
|
+
start_row += num_rows
|
|
1302
|
+
else:
|
|
1303
|
+
data[tidxs, bidxs] = tb_tool.getcol(col)
|
|
1304
|
+
|
|
1305
|
+
return data
|
|
1306
|
+
|
|
1307
|
+
|
|
1308
|
+
def read_col_conversion_dask(
|
|
1309
|
+
table_manager: TableManager,
|
|
1310
|
+
col: str,
|
|
1311
|
+
cshape: Tuple[int],
|
|
1312
|
+
tidxs: np.ndarray,
|
|
1313
|
+
bidxs: np.ndarray,
|
|
1314
|
+
use_table_iter: bool,
|
|
1315
|
+
time_chunksize: int,
|
|
1316
|
+
) -> da.Array:
|
|
1317
|
+
"""
|
|
1318
|
+
Function to perform delayed reads from table columns when converting
|
|
1319
|
+
(no need for didxs)
|
|
1320
|
+
|
|
1321
|
+
Parameters
|
|
1322
|
+
----------
|
|
1323
|
+
tb_tool : tables.table
|
|
1324
|
+
|
|
1325
|
+
col : str
|
|
1326
|
+
|
|
1327
|
+
cshape : Tuple[int]
|
|
1328
|
+
|
|
1329
|
+
tidxs : np.ndarray
|
|
1330
|
+
|
|
1331
|
+
bidxs : np.ndarray
|
|
1332
|
+
|
|
1333
|
+
Returns
|
|
1334
|
+
-------
|
|
1335
|
+
da.Array
|
|
1336
|
+
"""
|
|
1337
|
+
|
|
1244
1338
|
# Use casacore to get the shape of a row for this column
|
|
1245
1339
|
#################################################################################
|
|
1246
1340
|
|
|
1247
|
-
|
|
1248
|
-
|
|
1341
|
+
with table_manager.get_table() as tb_tool:
|
|
1342
|
+
first_row = tb_tool.row(col)[0][col]
|
|
1249
1343
|
|
|
1250
|
-
|
|
1251
|
-
|
|
1252
|
-
|
|
1253
|
-
|
|
1254
|
-
shape_string = tb_tool.getcolshapestring(col)[0]
|
|
1255
|
-
# Convert `shape_string` into a tuple that numpy understands
|
|
1256
|
-
extra_dimensions = tuple(
|
|
1257
|
-
[
|
|
1258
|
-
int(idx)
|
|
1259
|
-
for idx in shape_string.replace("[", "").replace("]", "").split(", ")
|
|
1260
|
-
]
|
|
1261
|
-
)
|
|
1262
|
-
except RuntimeError:
|
|
1344
|
+
if isinstance(first_row, np.ndarray):
|
|
1345
|
+
extra_dimensions = first_row.shape
|
|
1346
|
+
|
|
1347
|
+
else:
|
|
1263
1348
|
extra_dimensions = ()
|
|
1264
1349
|
|
|
1350
|
+
# Use dask primitives to lazily read chunks of data from the MeasurementSet
|
|
1351
|
+
# Takes inspiration from dask_image https://image.dask.org/en/latest/
|
|
1265
1352
|
#################################################################################
|
|
1266
1353
|
|
|
1267
|
-
# Get dtype of the column.
|
|
1268
|
-
col_dtype = np.array(
|
|
1269
|
-
|
|
1270
|
-
|
|
1354
|
+
# Get dtype of the column. Wrap in numpy array in case of scalar column
|
|
1355
|
+
col_dtype = np.array(first_row).dtype
|
|
1356
|
+
|
|
1357
|
+
# Get the number of rows for a single TIME value
|
|
1358
|
+
num_utimes = cshape[0]
|
|
1359
|
+
rows_per_time = cshape[1]
|
|
1360
|
+
|
|
1361
|
+
# Calculate the chunks of unique times that gives the target chunk sizes
|
|
1362
|
+
tmp_chunks = da.core.normalize_chunks(time_chunksize, (num_utimes,))[0]
|
|
1363
|
+
|
|
1364
|
+
sum = 0
|
|
1365
|
+
arr_start_end_rows = []
|
|
1366
|
+
for chunk in tmp_chunks:
|
|
1367
|
+
start = (sum) * rows_per_time
|
|
1368
|
+
end = (sum + chunk) * rows_per_time
|
|
1369
|
+
|
|
1370
|
+
arr_start_end_rows.append((start, end))
|
|
1371
|
+
sum += chunk
|
|
1372
|
+
|
|
1373
|
+
# Store the start and end rows that should be read for the chunk
|
|
1374
|
+
arr_start_end_rows = da.from_array(arr_start_end_rows, chunks=(1, 2))
|
|
1375
|
+
|
|
1376
|
+
# Specify the output shape `load_col_chunk`
|
|
1377
|
+
output_chunkshape = (tmp_chunks, cshape[1]) + extra_dimensions
|
|
1378
|
+
|
|
1379
|
+
# Apply `load_col_chunk` to each chunk
|
|
1380
|
+
data = arr_start_end_rows.map_blocks(
|
|
1381
|
+
load_col_chunk,
|
|
1382
|
+
table_manager=table_manager,
|
|
1383
|
+
col_name=col,
|
|
1384
|
+
col_dtype=col_dtype,
|
|
1385
|
+
tidxs=tidxs,
|
|
1386
|
+
bidxs=bidxs,
|
|
1387
|
+
rows_per_time=rows_per_time,
|
|
1388
|
+
cshape=cshape,
|
|
1389
|
+
extra_dimensions=extra_dimensions,
|
|
1390
|
+
drop_axis=[1],
|
|
1391
|
+
new_axis=list(range(1, len(cshape + extra_dimensions))),
|
|
1392
|
+
meta=np.array([], dtype=col_dtype),
|
|
1393
|
+
chunks=output_chunkshape,
|
|
1394
|
+
)
|
|
1271
1395
|
|
|
1272
|
-
|
|
1273
|
-
data = np.full(cshape + extra_dimensions, fill_value, dtype=col_dtype)
|
|
1396
|
+
return data
|
|
1274
1397
|
|
|
1275
|
-
# Use built-in casacore table iterator to populate the data column by unique times.
|
|
1276
|
-
if use_table_iter:
|
|
1277
|
-
start_row = 0
|
|
1278
|
-
for ts in tb_tool.iter("TIME", sort=False):
|
|
1279
|
-
num_rows = ts.nrows()
|
|
1280
1398
|
|
|
1281
|
-
|
|
1282
|
-
|
|
1283
|
-
|
|
1284
|
-
|
|
1399
|
+
def load_col_chunk(
|
|
1400
|
+
x,
|
|
1401
|
+
table_manager,
|
|
1402
|
+
col_name,
|
|
1403
|
+
col_dtype,
|
|
1404
|
+
tidxs,
|
|
1405
|
+
bidxs,
|
|
1406
|
+
rows_per_time,
|
|
1407
|
+
cshape,
|
|
1408
|
+
extra_dimensions,
|
|
1409
|
+
):
|
|
1410
|
+
start_row = x[0][0]
|
|
1411
|
+
end_row = x[0][1]
|
|
1412
|
+
num_rows = end_row - start_row
|
|
1413
|
+
assert (num_rows % rows_per_time) == 0
|
|
1414
|
+
num_utimes = num_rows // rows_per_time
|
|
1415
|
+
|
|
1416
|
+
# Create memory buffer to populate with data from disk
|
|
1417
|
+
row_data = np.full((num_rows,) + extra_dimensions, np.nan, dtype=col_dtype)
|
|
1418
|
+
|
|
1419
|
+
# Load data from the column
|
|
1420
|
+
# Release the casacore table as soon as possible
|
|
1421
|
+
with table_manager.get_table() as tb_tool:
|
|
1422
|
+
tb_tool.getcolnp(col_name, row_data, startrow=start_row, nrow=num_rows)
|
|
1423
|
+
|
|
1424
|
+
# Initialise reshaped numpy array
|
|
1425
|
+
reshaped_data = np.full(
|
|
1426
|
+
(num_utimes, cshape[1]) + extra_dimensions, np.nan, dtype=col_dtype
|
|
1427
|
+
)
|
|
1285
1428
|
|
|
1286
|
-
|
|
1287
|
-
|
|
1288
|
-
|
|
1429
|
+
# Create slice object for readability
|
|
1430
|
+
slc = slice(start_row, end_row)
|
|
1431
|
+
tidxs_slc = tidxs[slc]
|
|
1289
1432
|
|
|
1290
|
-
|
|
1291
|
-
|
|
1292
|
-
|
|
1433
|
+
tidxs_slc = (
|
|
1434
|
+
tidxs_slc - tidxs_slc[0]
|
|
1435
|
+
) # Indices of reshaped_data along time differ from values in tidxs. Assumes first time is earliest time
|
|
1436
|
+
bidxs_slc = bidxs[slc]
|
|
1293
1437
|
|
|
1294
|
-
|
|
1295
|
-
|
|
1296
|
-
start_row += num_rows
|
|
1297
|
-
else:
|
|
1298
|
-
data[tidxs, bidxs] = tb_tool.getcol(col)
|
|
1438
|
+
# Populate `reshaped_data` with `row_data`
|
|
1439
|
+
reshaped_data[tidxs_slc, bidxs_slc] = row_data
|
|
1299
1440
|
|
|
1300
|
-
return
|
|
1441
|
+
return reshaped_data
|
|
@@ -22,3 +22,25 @@ def open_query(table: tables.table, query: str) -> Generator[tables.table, None,
|
|
|
22
22
|
yield ttq
|
|
23
23
|
finally:
|
|
24
24
|
ttq.close()
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class TableManager:
|
|
28
|
+
|
|
29
|
+
def __init__(
|
|
30
|
+
self,
|
|
31
|
+
infile: str,
|
|
32
|
+
taql_where: str = "",
|
|
33
|
+
):
|
|
34
|
+
self.infile = infile
|
|
35
|
+
self.taql_where = taql_where
|
|
36
|
+
self.taql_query = taql_where.replace("where ", "")
|
|
37
|
+
|
|
38
|
+
def get_table(self):
|
|
39
|
+
# Performance note:
|
|
40
|
+
# table.query("(DATA_DESC_ID = 0)") is slightly faster than
|
|
41
|
+
# tables.taql("select * from $table (DATA_DESC_ID = 0)")
|
|
42
|
+
with tables.table(
|
|
43
|
+
self.infile, readonly=True, lockoptions={"option": "usernoread"}, ack=False
|
|
44
|
+
) as mtable:
|
|
45
|
+
query = f"select * from $mtable {self.taql_where}"
|
|
46
|
+
return tables.taql(query)
|