PyPI - xarray-ms - Versions diffs - 0.2.5__tar.gz → 0.2.6__tar.gz - Mend

xarray-ms 0.2.5tar.gz → 0.2.6tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

{xarray_ms-0.2.5 → xarray_ms-0.2.6}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: xarray-ms
-Version: 0.2.5
+Version: 0.2.6
 Summary: xarray MSv4 views over MSv2 Measurement Sets
 Author: Simon Perkins
 Author-email: simon.perkins@gmail.com

{xarray_ms-0.2.5 → xarray_ms-0.2.6}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "xarray-ms"
-version = "0.2.5"
+version = "0.2.6"
 description = "xarray MSv4 views over MSv2 Measurement Sets"
 authors = ["Simon Perkins <simon.perkins@gmail.com>"]
 readme = "README.rst"
@@ -58,7 +58,7 @@ build-backend = "poetry.core.masonry.api"
 # github_url = "https://github.com/<user or organization>/<project>/"
 [tool.tbump.version]
-current = "0.2.5"
+current = "0.2.6"
 # Example of a semver regexp.
 # Make sure this matches current_version before

{xarray_ms-0.2.5 → xarray_ms-0.2.6}/xarray_ms/backend/msv2/factories/antenna.py RENAMED Viewed

@@ -3,6 +3,7 @@ from typing import Dict, Mapping
 import numpy as np
 from xarray import Dataset, Variable
+from xarray_ms.backend.msv2.imputation import maybe_impute_observation_table
 from xarray_ms.backend.msv2.structure import MSv2StructureFactory, PartitionKeyT
 from xarray_ms.errors import InvalidMeasurementSet
 from xarray_ms.multiton import Multiton
@@ -26,13 +27,13 @@ class AntennaDatasetFactory:
     self._subtable_factories = subtable_factories
   def get_dataset(self) -> Mapping[str, Variable]:
-    structure = self._structure_factory.instance
-    partition = structure[self._partition_key]
+    partition = self._structure_factory.instance[self._partition_key]
     ants = self._subtable_factories["ANTENNA"].instance
     feeds = self._subtable_factories["FEED"].instance
     obs = self._subtable_factories["OBSERVATION"].instance
-    telescope_name = obs["TELESCOPE_NAME"][partition.obs_id].as_py()
+    obs = maybe_impute_observation_table(obs, [partition.obs_id])
+    telescope_name = obs["TELESCOPE_NAME"][0].as_py()
     import pyarrow.compute as pac

{xarray_ms-0.2.5 → xarray_ms-0.2.6}/xarray_ms/backend/msv2/factories/correlated.py RENAMED Viewed

@@ -18,6 +18,10 @@ from xarray_ms.backend.msv2.encoders import (
   QuantityCoder,
   TimeCoder,
 )
+from xarray_ms.backend.msv2.imputation import (
+  maybe_impute_field_table,
+  maybe_impute_observation_table,
+)
 from xarray_ms.backend.msv2.structure import MSv2StructureFactory, PartitionKeyT
 from xarray_ms.casa_types import ColumnDesc, FrequencyMeasures, Polarisations
 from xarray_ms.errors import IrregularGridWarning
@@ -222,6 +226,7 @@ class CorrelatedDatasetFactory:
     else:
       data_vars.append(("WEIGHT", self._variable_from_column("WEIGHT_ROW", dim_sizes)))
+    field = maybe_impute_field_table(field, partition.field_ids)
     field_names = field.take(partition.field_ids)["NAME"].to_numpy()
     # Add coordinates indexing coordinates
@@ -255,8 +260,14 @@ class CorrelatedDatasetFactory:
     time_coder = TimeCoder("TIME", self._main_column_descs)
     if partition.interval.size == 1:
+      # Single unique value
       time_attrs = {"integration_time": partition.interval.item()}
+    elif np.allclose(partition.interval[:, None], partition.interval[None, :]):
+      # Tolerate some jitter in the unique values
+      time_attrs = {"integration_time": np.mean(partition.interval)}
     else:
+      # There are multiple unique interval values,
+      # a regular grid isn't possible
       warnings.warn(
         f"Missing/Multiple intervals {partition.interval} "
         f"found in partition {self._partition_key}. "
@@ -313,19 +324,15 @@ class CorrelatedDatasetFactory:
     return FrozenDict(sorted(data_vars + coordinates))
   def _observation_info(self) -> Dict[str, Any]:
-    structure = self._structure_factory.instance
-    partition = structure[self._partition_key]
+    partition = self._structure_factory.instance[self._partition_key]
     obs = self._subtable_factories["OBSERVATION"].instance
-    observer = obs["OBSERVER"][partition.obs_id].as_py()
-    project = obs["PROJECT"][partition.obs_id].as_py()
-    # TODO: A Measures conversions is needed here
-    release_date = obs["RELEASE_DATE"][partition.obs_id].as_py()  # noqa: F841
+    obs = maybe_impute_observation_table(obs, [partition.obs_id])
     return dict(
       sorted(
         {
-          "observer": observer,
-          "project": project,
+          "observer": obs["OBSERVER"][partition.obs_id].as_py(),
+          "project": obs["PROJECT"][partition.obs_id].as_py(),
         }.items()
       )
     )

xarray_ms-0.2.6/xarray_ms/backend/msv2/imputation.py ADDED Viewed

@@ -0,0 +1,95 @@
+from __future__ import annotations
+import warnings
+from typing import TYPE_CHECKING
+import numpy as np
+import numpy.typing as npt
+from xarray_ms.errors import ImputedMetadataWarning
+if TYPE_CHECKING:
+  import pyarrow as pa
+def _maybe_return_table_or_max_id(
+  table: pa.Table, table_name: str, ids: npt.NDArray[np.int32], id_column_name: str
+) -> pa.Table | int:
+  """Returns the existing table if a row entry exists,
+  else returns the maximum id"""
+  max_id = np.max(ids)
+  if max_id < len(table):
+    return table
+  warnings.warn(
+    f"No row exists in the {table_name} table of length {len(table)} "
+    f"for {id_column_name}={max_id}. "
+    f"Artificial metadata will be substituted.",
+    ImputedMetadataWarning,
+  )
+  return max_id
+def maybe_impute_field_table(
+  field: pa.Table, field_id: npt.NDArray[np.int32]
+) -> pa.Table:
+  """Generates a FIELD subtable if there are no row ids
+  associated with the given FIELD_ID values"""
+  import pyarrow as pa
+  result = _maybe_return_table_or_max_id(field, "FIELD", field_id, "FIELD_ID")
+  if isinstance(result, pa.Table):
+    return result
+  return pa.Table.from_pydict(
+    {
+      "NAME": np.array([f"UNKNOWN-{i}" for i in range(result + 1)], dtype=object),
+      "SOURCE_ID": np.zeros(result + 1, np.int32),
+    }
+  )
+def maybe_impute_state_table(
+  state: pa.Table, state_id: npt.NDArray[np.int32]
+) -> pa.Table:
+  """Generates a STATE subtable if there are no row ids
+  associated with the given STATE_ID values"""
+  import pyarrow as pa
+  result = _maybe_return_table_or_max_id(state, "STATE", state_id, "STATE_ID")
+  if isinstance(result, pa.Table):
+    return result
+  return pa.Table.from_pydict(
+    {
+      "OBS_MODE": np.array(["UNSPECIFIED"] * (result + 1), dtype=object),
+      "SUB_SCAN": np.zeros(result + 1, np.int32),
+    }
+  )
+def maybe_impute_observation_table(
+  observation: pa.Table, observation_id: npt.NDArray[np.int32]
+) -> pa.Table:
+  """Generates an OBSERVATION table if there are no row ids
+  associated with the given OBSERVATION_ID values"""
+  import pyarrow as pa
+  result = _maybe_return_table_or_max_id(
+    observation, "OBSERVATION", observation_id, "OBSERVATION_ID"
+  )
+  if isinstance(result, pa.Table):
+    return result
+  unknown = np.array(["unknown"] * (result + 1), dtype=object)
+  return pa.Table.from_pydict(
+    {
+      "OBSERVER": unknown,
+      "PROJECT": unknown,
+      "TELESCOPE_NAME": unknown,
+    }
+  )

{xarray_ms-0.2.5 → xarray_ms-0.2.6}/xarray_ms/backend/msv2/structure.py RENAMED Viewed

@@ -27,6 +27,10 @@ import pyarrow as pa
 from arcae.lib.arrow_tables import Table
 from cacheout import Cache
+from xarray_ms.backend.msv2.imputation import (
+  maybe_impute_field_table,
+  maybe_impute_state_table,
+)
 from xarray_ms.backend.msv2.partition import PartitionKeyT, TablePartitioner
 from xarray_ms.errors import (
   InvalidMeasurementSet,
@@ -233,7 +237,7 @@ class MSv2StructureFactory:
   _epoch: str
   _auto_corrs: bool
   _STRUCTURE_CACHE: ClassVar[Cache] = Cache(
-    maxsize=100, ttl=60, on_get=on_get_keep_alive
+    maxsize=100, ttl=5 * 60, on_get=on_get_keep_alive
   )
   def __init__(
@@ -388,6 +392,7 @@ class MSv2Structure(Mapping):
   ) -> npt.NDArray[np.int32]:
     """Constructs a SOURCE_ID array from MAIN.FIELD_ID
     broadcast against FIELD.SOURCE_ID"""
+    field = maybe_impute_field_table(field, field_id)
     field_source_id = field["SOURCE_ID"].to_numpy()
     source_id = np.empty_like(field_id)
     chunk = (len(source_id) + ncpus - 1) // ncpus
@@ -411,6 +416,7 @@ class MSv2Structure(Mapping):
   ) -> npt.NDArray[np.int32]:
     """Constructs a SUB_SCAN_NUMBER array from MAIN.STATE_ID
     broadcast against STATE.SUB_SCAN_NUMBER"""
+    state = maybe_impute_state_table(state, state_id)
     state_ssn = state["SUB_SCAN"].to_numpy()
     subscan_nr = np.empty_like(state_id)
     chunk = (len(state_id) + ncpus - 1) // ncpus
@@ -434,6 +440,8 @@ class MSv2Structure(Mapping):
   ) -> Tuple[npt.NDArray[np.int32], Dict[str, List[int]]]:
     """Constructs an OBS_MODE_ID array from MAIN.STATE_ID broadcast
     against unique entries in STATE.OBS_MODE"""
+    state = maybe_impute_state_table(state, state_id)
     obs_mode = state["OBS_MODE"].to_numpy()
     # Map unique observation modes to state_ids

{xarray_ms-0.2.5 → xarray_ms-0.2.6}/xarray_ms/errors.py RENAMED Viewed

@@ -3,6 +3,15 @@ class IrregularGridWarning(UserWarning):
   with each timestep are not homogenous"""
+class MissingMetadataWarning(UserWarning):
+  """Warning raised when metadata is missing"""
+class ImputedMetadataWarning(MissingMetadataWarning):
+  """Warning raised when metadata is imputed
+  if the original metadata is missing"""
 class InvalidMeasurementSet(ValueError):
   """Raised when the Measurement Set foreign key indexing is invalid"""