xarray-ms 0.2.5__tar.gz → 0.2.6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (24) hide show
  1. {xarray_ms-0.2.5 → xarray_ms-0.2.6}/PKG-INFO +1 -1
  2. {xarray_ms-0.2.5 → xarray_ms-0.2.6}/pyproject.toml +2 -2
  3. {xarray_ms-0.2.5 → xarray_ms-0.2.6}/xarray_ms/backend/msv2/factories/antenna.py +4 -3
  4. {xarray_ms-0.2.5 → xarray_ms-0.2.6}/xarray_ms/backend/msv2/factories/correlated.py +15 -8
  5. xarray_ms-0.2.6/xarray_ms/backend/msv2/imputation.py +95 -0
  6. {xarray_ms-0.2.5 → xarray_ms-0.2.6}/xarray_ms/backend/msv2/structure.py +9 -1
  7. {xarray_ms-0.2.5 → xarray_ms-0.2.6}/xarray_ms/errors.py +9 -0
  8. {xarray_ms-0.2.5 → xarray_ms-0.2.6}/LICENSE +0 -0
  9. {xarray_ms-0.2.5 → xarray_ms-0.2.6}/README.rst +0 -0
  10. {xarray_ms-0.2.5 → xarray_ms-0.2.6}/xarray_ms/__init__.py +0 -0
  11. {xarray_ms-0.2.5 → xarray_ms-0.2.6}/xarray_ms/backend/msv2/array.py +0 -0
  12. {xarray_ms-0.2.5 → xarray_ms-0.2.6}/xarray_ms/backend/msv2/encoders.py +0 -0
  13. {xarray_ms-0.2.5 → xarray_ms-0.2.6}/xarray_ms/backend/msv2/entrypoint.py +0 -0
  14. {xarray_ms-0.2.5 → xarray_ms-0.2.6}/xarray_ms/backend/msv2/entrypoint_utils.py +0 -0
  15. {xarray_ms-0.2.5 → xarray_ms-0.2.6}/xarray_ms/backend/msv2/factories/__init__.py +0 -0
  16. {xarray_ms-0.2.5 → xarray_ms-0.2.6}/xarray_ms/backend/msv2/partition.py +0 -0
  17. {xarray_ms-0.2.5 → xarray_ms-0.2.6}/xarray_ms/casa_types.py +0 -0
  18. {xarray_ms-0.2.5 → xarray_ms-0.2.6}/xarray_ms/msv4_types.py +0 -0
  19. {xarray_ms-0.2.5 → xarray_ms-0.2.6}/xarray_ms/multiton.py +0 -0
  20. {xarray_ms-0.2.5 → xarray_ms-0.2.6}/xarray_ms/query.py +0 -0
  21. {xarray_ms-0.2.5 → xarray_ms-0.2.6}/xarray_ms/testing/__init__.py +0 -0
  22. {xarray_ms-0.2.5 → xarray_ms-0.2.6}/xarray_ms/testing/simulator.py +0 -0
  23. {xarray_ms-0.2.5 → xarray_ms-0.2.6}/xarray_ms/testing/utils.py +0 -0
  24. {xarray_ms-0.2.5 → xarray_ms-0.2.6}/xarray_ms/utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: xarray-ms
3
- Version: 0.2.5
3
+ Version: 0.2.6
4
4
  Summary: xarray MSv4 views over MSv2 Measurement Sets
5
5
  Author: Simon Perkins
6
6
  Author-email: simon.perkins@gmail.com
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "xarray-ms"
3
- version = "0.2.5"
3
+ version = "0.2.6"
4
4
  description = "xarray MSv4 views over MSv2 Measurement Sets"
5
5
  authors = ["Simon Perkins <simon.perkins@gmail.com>"]
6
6
  readme = "README.rst"
@@ -58,7 +58,7 @@ build-backend = "poetry.core.masonry.api"
58
58
  # github_url = "https://github.com/<user or organization>/<project>/"
59
59
 
60
60
  [tool.tbump.version]
61
- current = "0.2.5"
61
+ current = "0.2.6"
62
62
 
63
63
  # Example of a semver regexp.
64
64
  # Make sure this matches current_version before
@@ -3,6 +3,7 @@ from typing import Dict, Mapping
3
3
  import numpy as np
4
4
  from xarray import Dataset, Variable
5
5
 
6
+ from xarray_ms.backend.msv2.imputation import maybe_impute_observation_table
6
7
  from xarray_ms.backend.msv2.structure import MSv2StructureFactory, PartitionKeyT
7
8
  from xarray_ms.errors import InvalidMeasurementSet
8
9
  from xarray_ms.multiton import Multiton
@@ -26,13 +27,13 @@ class AntennaDatasetFactory:
26
27
  self._subtable_factories = subtable_factories
27
28
 
28
29
  def get_dataset(self) -> Mapping[str, Variable]:
29
- structure = self._structure_factory.instance
30
- partition = structure[self._partition_key]
30
+ partition = self._structure_factory.instance[self._partition_key]
31
31
  ants = self._subtable_factories["ANTENNA"].instance
32
32
  feeds = self._subtable_factories["FEED"].instance
33
33
  obs = self._subtable_factories["OBSERVATION"].instance
34
34
 
35
- telescope_name = obs["TELESCOPE_NAME"][partition.obs_id].as_py()
35
+ obs = maybe_impute_observation_table(obs, [partition.obs_id])
36
+ telescope_name = obs["TELESCOPE_NAME"][0].as_py()
36
37
 
37
38
  import pyarrow.compute as pac
38
39
 
@@ -18,6 +18,10 @@ from xarray_ms.backend.msv2.encoders import (
18
18
  QuantityCoder,
19
19
  TimeCoder,
20
20
  )
21
+ from xarray_ms.backend.msv2.imputation import (
22
+ maybe_impute_field_table,
23
+ maybe_impute_observation_table,
24
+ )
21
25
  from xarray_ms.backend.msv2.structure import MSv2StructureFactory, PartitionKeyT
22
26
  from xarray_ms.casa_types import ColumnDesc, FrequencyMeasures, Polarisations
23
27
  from xarray_ms.errors import IrregularGridWarning
@@ -222,6 +226,7 @@ class CorrelatedDatasetFactory:
222
226
  else:
223
227
  data_vars.append(("WEIGHT", self._variable_from_column("WEIGHT_ROW", dim_sizes)))
224
228
 
229
+ field = maybe_impute_field_table(field, partition.field_ids)
225
230
  field_names = field.take(partition.field_ids)["NAME"].to_numpy()
226
231
 
227
232
  # Add coordinates indexing coordinates
@@ -255,8 +260,14 @@ class CorrelatedDatasetFactory:
255
260
  time_coder = TimeCoder("TIME", self._main_column_descs)
256
261
 
257
262
  if partition.interval.size == 1:
263
+ # Single unique value
258
264
  time_attrs = {"integration_time": partition.interval.item()}
265
+ elif np.allclose(partition.interval[:, None], partition.interval[None, :]):
266
+ # Tolerate some jitter in the unique values
267
+ time_attrs = {"integration_time": np.mean(partition.interval)}
259
268
  else:
269
+ # There are multiple unique interval values,
270
+ # a regular grid isn't possible
260
271
  warnings.warn(
261
272
  f"Missing/Multiple intervals {partition.interval} "
262
273
  f"found in partition {self._partition_key}. "
@@ -313,19 +324,15 @@ class CorrelatedDatasetFactory:
313
324
  return FrozenDict(sorted(data_vars + coordinates))
314
325
 
315
326
  def _observation_info(self) -> Dict[str, Any]:
316
- structure = self._structure_factory.instance
317
- partition = structure[self._partition_key]
327
+ partition = self._structure_factory.instance[self._partition_key]
318
328
  obs = self._subtable_factories["OBSERVATION"].instance
319
- observer = obs["OBSERVER"][partition.obs_id].as_py()
320
- project = obs["PROJECT"][partition.obs_id].as_py()
321
- # TODO: A Measures conversions is needed here
322
- release_date = obs["RELEASE_DATE"][partition.obs_id].as_py() # noqa: F841
329
+ obs = maybe_impute_observation_table(obs, [partition.obs_id])
323
330
 
324
331
  return dict(
325
332
  sorted(
326
333
  {
327
- "observer": observer,
328
- "project": project,
334
+ "observer": obs["OBSERVER"][partition.obs_id].as_py(),
335
+ "project": obs["PROJECT"][partition.obs_id].as_py(),
329
336
  }.items()
330
337
  )
331
338
  )
@@ -0,0 +1,95 @@
1
+ from __future__ import annotations
2
+
3
+ import warnings
4
+ from typing import TYPE_CHECKING
5
+
6
+ import numpy as np
7
+ import numpy.typing as npt
8
+
9
+ from xarray_ms.errors import ImputedMetadataWarning
10
+
11
+ if TYPE_CHECKING:
12
+ import pyarrow as pa
13
+
14
+
15
+ def _maybe_return_table_or_max_id(
16
+ table: pa.Table, table_name: str, ids: npt.NDArray[np.int32], id_column_name: str
17
+ ) -> pa.Table | int:
18
+ """Returns the existing table if a row entry exists,
19
+ else returns the maximum id"""
20
+ max_id = np.max(ids)
21
+
22
+ if max_id < len(table):
23
+ return table
24
+
25
+ warnings.warn(
26
+ f"No row exists in the {table_name} table of length {len(table)} "
27
+ f"for {id_column_name}={max_id}. "
28
+ f"Artificial metadata will be substituted.",
29
+ ImputedMetadataWarning,
30
+ )
31
+
32
+ return max_id
33
+
34
+
35
+ def maybe_impute_field_table(
36
+ field: pa.Table, field_id: npt.NDArray[np.int32]
37
+ ) -> pa.Table:
38
+ """Generates a FIELD subtable if there are no row ids
39
+ associated with the given FIELD_ID values"""
40
+
41
+ import pyarrow as pa
42
+
43
+ result = _maybe_return_table_or_max_id(field, "FIELD", field_id, "FIELD_ID")
44
+ if isinstance(result, pa.Table):
45
+ return result
46
+
47
+ return pa.Table.from_pydict(
48
+ {
49
+ "NAME": np.array([f"UNKNOWN-{i}" for i in range(result + 1)], dtype=object),
50
+ "SOURCE_ID": np.zeros(result + 1, np.int32),
51
+ }
52
+ )
53
+
54
+
55
+ def maybe_impute_state_table(
56
+ state: pa.Table, state_id: npt.NDArray[np.int32]
57
+ ) -> pa.Table:
58
+ """Generates a STATE subtable if there are no row ids
59
+ associated with the given STATE_ID values"""
60
+ import pyarrow as pa
61
+
62
+ result = _maybe_return_table_or_max_id(state, "STATE", state_id, "STATE_ID")
63
+ if isinstance(result, pa.Table):
64
+ return result
65
+
66
+ return pa.Table.from_pydict(
67
+ {
68
+ "OBS_MODE": np.array(["UNSPECIFIED"] * (result + 1), dtype=object),
69
+ "SUB_SCAN": np.zeros(result + 1, np.int32),
70
+ }
71
+ )
72
+
73
+
74
+ def maybe_impute_observation_table(
75
+ observation: pa.Table, observation_id: npt.NDArray[np.int32]
76
+ ) -> pa.Table:
77
+ """Generates an OBSERVATION table if there are no row ids
78
+ associated with the given OBSERVATION_ID values"""
79
+ import pyarrow as pa
80
+
81
+ result = _maybe_return_table_or_max_id(
82
+ observation, "OBSERVATION", observation_id, "OBSERVATION_ID"
83
+ )
84
+ if isinstance(result, pa.Table):
85
+ return result
86
+
87
+ unknown = np.array(["unknown"] * (result + 1), dtype=object)
88
+
89
+ return pa.Table.from_pydict(
90
+ {
91
+ "OBSERVER": unknown,
92
+ "PROJECT": unknown,
93
+ "TELESCOPE_NAME": unknown,
94
+ }
95
+ )
@@ -27,6 +27,10 @@ import pyarrow as pa
27
27
  from arcae.lib.arrow_tables import Table
28
28
  from cacheout import Cache
29
29
 
30
+ from xarray_ms.backend.msv2.imputation import (
31
+ maybe_impute_field_table,
32
+ maybe_impute_state_table,
33
+ )
30
34
  from xarray_ms.backend.msv2.partition import PartitionKeyT, TablePartitioner
31
35
  from xarray_ms.errors import (
32
36
  InvalidMeasurementSet,
@@ -233,7 +237,7 @@ class MSv2StructureFactory:
233
237
  _epoch: str
234
238
  _auto_corrs: bool
235
239
  _STRUCTURE_CACHE: ClassVar[Cache] = Cache(
236
- maxsize=100, ttl=60, on_get=on_get_keep_alive
240
+ maxsize=100, ttl=5 * 60, on_get=on_get_keep_alive
237
241
  )
238
242
 
239
243
  def __init__(
@@ -388,6 +392,7 @@ class MSv2Structure(Mapping):
388
392
  ) -> npt.NDArray[np.int32]:
389
393
  """Constructs a SOURCE_ID array from MAIN.FIELD_ID
390
394
  broadcast against FIELD.SOURCE_ID"""
395
+ field = maybe_impute_field_table(field, field_id)
391
396
  field_source_id = field["SOURCE_ID"].to_numpy()
392
397
  source_id = np.empty_like(field_id)
393
398
  chunk = (len(source_id) + ncpus - 1) // ncpus
@@ -411,6 +416,7 @@ class MSv2Structure(Mapping):
411
416
  ) -> npt.NDArray[np.int32]:
412
417
  """Constructs a SUB_SCAN_NUMBER array from MAIN.STATE_ID
413
418
  broadcast against STATE.SUB_SCAN_NUMBER"""
419
+ state = maybe_impute_state_table(state, state_id)
414
420
  state_ssn = state["SUB_SCAN"].to_numpy()
415
421
  subscan_nr = np.empty_like(state_id)
416
422
  chunk = (len(state_id) + ncpus - 1) // ncpus
@@ -434,6 +440,8 @@ class MSv2Structure(Mapping):
434
440
  ) -> Tuple[npt.NDArray[np.int32], Dict[str, List[int]]]:
435
441
  """Constructs an OBS_MODE_ID array from MAIN.STATE_ID broadcast
436
442
  against unique entries in STATE.OBS_MODE"""
443
+
444
+ state = maybe_impute_state_table(state, state_id)
437
445
  obs_mode = state["OBS_MODE"].to_numpy()
438
446
 
439
447
  # Map unique observation modes to state_ids
@@ -3,6 +3,15 @@ class IrregularGridWarning(UserWarning):
3
3
  with each timestep are not homogenous"""
4
4
 
5
5
 
6
+ class MissingMetadataWarning(UserWarning):
7
+ """Warning raised when metadata is missing"""
8
+
9
+
10
+ class ImputedMetadataWarning(MissingMetadataWarning):
11
+ """Warning raised when metadata is imputed
12
+ if the original metadata is missing"""
13
+
14
+
6
15
  class InvalidMeasurementSet(ValueError):
7
16
  """Raised when the Measurement Set foreign key indexing is invalid"""
8
17
 
File without changes
File without changes
File without changes
File without changes