PyPI - xarray-kat - Versions diffs - 0.0.1__tar.gz - Mend

xarray-kat 0.0.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (36) hide show

xarray_kat-0.0.1/PKG-INFO ADDED Viewed

@@ -0,0 +1,89 @@
+Metadata-Version: 2.3
+Name: xarray-kat
+Version: 0.0.1
+Summary: Add your description here
+Author: Simon Perkins
+Author-email: Simon Perkins <simon.perkins@gmail.com>
+Requires-Dist: katpoint>=0.10.2
+Requires-Dist: katsdptelstate[rdb]>=1.0.0
+Requires-Dist: tensorstore>=0.1.81
+Requires-Dist: xarray>=2025.9.1
+Requires-Dist: numba>=0.62.1 ; extra == 'numba'
+Requires-Dist: cloudpickle>=3.1.1 ; extra == 'testing'
+Requires-Dist: dask>=2025.9.1 ; extra == 'testing'
+Requires-Dist: dill>=0.4.0 ; extra == 'testing'
+Requires-Dist: katdal>=0.23 ; extra == 'testing'
+Requires-Dist: pytest>=8.4.2 ; extra == 'testing'
+Requires-Dist: pytest-httpserver>=1.1.0 ; extra == 'testing'
+Requires-Python: >=3.11
+Provides-Extra: numba
+Provides-Extra: testing
+Description-Content-Type: text/x-rst
+Direct xarray views over the MeerKAT archive
+============================================
+This package present an xarray view over observations in the MeerKAT archive.
+Required Reading
+----------------
+You'll need some familiarity with xarray_. In particular:
+- `Indexing and selecting data <xarray-indexing_>`_
+- `Lazy Loading behaviour <xarray-lazy-indexing_>`_
+Example Usage
+-------------
+At a basic level, one can use xarray's selection and lazy loading mechanisms to interact with
+the data:
+.. code-block:: python
+  import xarray_kat
+  import xarray
+  token = "eyFILLMEIN"
+  capture_block_id = 123456789
+  url = f"https://archive-gw-1.kat.ac.za/{capture_block_id}/{capture_block_id}_sdp_l0.full.rdb?token={token}"
+  # If the dataset is small you may be able to load it all in at once
+  dt = xarray.open_datatree(url, chunked_array_type="xarray-kat", chunks={})
+  dt.load()
+  # Otherwise one can select a small partition of the data
+  # that can fit in memory and interact with that
+  ds = dt["123456789_sdp_l0"].ds
+  ds = ds.isel(time=slice(10, 20), baseline_id=[1, 20, 30, 31, 32, 50], frequency=slice(256, 768))
+  ds.load()
+If dask is installed, one can request chunking along dimensions:
+.. code-block:: python
+  import xarray_kat
+  import xarray
+  token = "eyFILLMEIN"
+  capture_block_id = 123456789
+  url = f"https://archive-gw-1.kat.ac.za/{capture_block_id}/{capture_block_id}_sdp_l0.full.rdb?token={token}"
+  # This specifies the natural chunking of the
+  # underlying store
+  dt = xarray.open_datatree(url, chunks={})
+  dt = dt.compute()
+  # More exotic chunking can be selected, but
+  # as this pattern does not match the natural
+  # chunking, it results in repeated requests for
+  # the same data. It may be better to use a
+  # dask.rechunk operation ontop of the natural
+  # chunking, or use cache pools to ameliorate this
+  dt = xarray.open_datatree(url, chunks={"time": 20, "baseline_id": 155, "frequency": 256})
+  dt = dt.compute()
+.. _xarray: https://xarray.dev/
+.. _xarray-indexing: https://docs.xarray.dev/en/latest/user-guide/indexing.html
+.. _xarray-lazy-indexing: https://docs.xarray.dev/en/latest/internals/internal-design.html#lazy-indexing-classes

xarray_kat-0.0.1/README.rst ADDED Viewed

@@ -0,0 +1,67 @@
+Direct xarray views over the MeerKAT archive
+============================================
+This package present an xarray view over observations in the MeerKAT archive.
+Required Reading
+----------------
+You'll need some familiarity with xarray_. In particular:
+- `Indexing and selecting data <xarray-indexing_>`_
+- `Lazy Loading behaviour <xarray-lazy-indexing_>`_
+Example Usage
+-------------
+At a basic level, one can use xarray's selection and lazy loading mechanisms to interact with
+the data:
+.. code-block:: python
+  import xarray_kat
+  import xarray
+  token = "eyFILLMEIN"
+  capture_block_id = 123456789
+  url = f"https://archive-gw-1.kat.ac.za/{capture_block_id}/{capture_block_id}_sdp_l0.full.rdb?token={token}"
+  # If the dataset is small you may be able to load it all in at once
+  dt = xarray.open_datatree(url, chunked_array_type="xarray-kat", chunks={})
+  dt.load()
+  # Otherwise one can select a small partition of the data
+  # that can fit in memory and interact with that
+  ds = dt["123456789_sdp_l0"].ds
+  ds = ds.isel(time=slice(10, 20), baseline_id=[1, 20, 30, 31, 32, 50], frequency=slice(256, 768))
+  ds.load()
+If dask is installed, one can request chunking along dimensions:
+.. code-block:: python
+  import xarray_kat
+  import xarray
+  token = "eyFILLMEIN"
+  capture_block_id = 123456789
+  url = f"https://archive-gw-1.kat.ac.za/{capture_block_id}/{capture_block_id}_sdp_l0.full.rdb?token={token}"
+  # This specifies the natural chunking of the
+  # underlying store
+  dt = xarray.open_datatree(url, chunks={})
+  dt = dt.compute()
+  # More exotic chunking can be selected, but
+  # as this pattern does not match the natural
+  # chunking, it results in repeated requests for
+  # the same data. It may be better to use a
+  # dask.rechunk operation ontop of the natural
+  # chunking, or use cache pools to ameliorate this
+  dt = xarray.open_datatree(url, chunks={"time": 20, "baseline_id": 155, "frequency": 256})
+  dt = dt.compute()
+.. _xarray: https://xarray.dev/
+.. _xarray-indexing: https://docs.xarray.dev/en/latest/user-guide/indexing.html
+.. _xarray-lazy-indexing: https://docs.xarray.dev/en/latest/internals/internal-design.html#lazy-indexing-classes

xarray_kat-0.0.1/pyproject.toml ADDED Viewed

@@ -0,0 +1,86 @@
+[project]
+name = "xarray-kat"
+version = "0.0.1"
+description = "Add your description here"
+readme = "README.rst"
+authors = [
+    { name = "Simon Perkins", email = "simon.perkins@gmail.com" }
+]
+requires-python = ">=3.11"
+dependencies = [
+    "katpoint>=0.10.2",
+    "katsdptelstate[rdb]>=1.0.0",
+    "tensorstore>=0.1.81",
+    "xarray>=2025.9.1",
+]
+[project.entry-points."xarray.backends"]
+"xarray-kat" = "xarray_kat.entrypoint:KatEntryPoint"
+[project.entry-points."xarray.chunkmanagers"]
+"xarray-kat" = "xarray_kat.meerkat_chunk_manager:MeerKatChunkManager"
+[project.optional-dependencies]
+testing = [
+    "cloudpickle>=3.1.1",
+    "dask>=2025.9.1",
+    "dill>=0.4.0",
+    "katdal>=0.23",
+    "pytest>=8.4.2",
+    "pytest-httpserver>=1.1.0",
+]
+numba = [
+    "numba>=0.62.1",
+]
+[build-system]
+requires = ["uv_build>=0.8.17,<0.9.0"]
+build-backend = "uv_build"
+[dependency-groups]
+dev = [
+    "pre-commit>=4.3.0",
+    "tbump>=6.11.0",
+]
+[tool.ruff]
+line-length = 88
+indent-width = 2
+target-version = "py311"
+[tool.ruff.lint]
+select = ["F", "E", "W", "I001"]
+extend-select = ["I"]
+exclude = [
+    "src/xarray_kat/third_party/*",
+]
+[tool.tbump]
+# Uncomment this if your project is hosted on GitHub:
+# github_url = "https://github.com/<user or organization>/<project>/"
+[tool.tbump.version]
+current = "0.0.1"
+# Example of a semver regexp.
+# Make sure this matches current_version before
+# using tbump
+regex = '''
+  (?P<major>\d+)
+  \.
+  (?P<minor>\d+)
+  \.
+  (?P<patch>\d+)
+  '''
+[tool.tbump.git]
+message_template = "Bump to {new_version}"
+tag_template = "{new_version}"
+# For each file to patch, add a [[tool.tbump.file]] config
+# section containing the path of the file, relative to the
+# tbump.toml location.
+[[tool.tbump.file]]
+src = "pyproject.toml"

xarray_kat-0.0.1/src/xarray_kat/__init__.py ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ def hello() -> str:
2	+ return "Hello from xarray-kat!"

xarray_kat-0.0.1/src/xarray_kat/array.py ADDED Viewed

@@ -0,0 +1,308 @@
+from __future__ import annotations
+from abc import ABC, abstractmethod
+from numbers import Integral
+from typing import TYPE_CHECKING, Tuple
+import numpy as np
+import numpy.typing as npt
+import tensorstore as ts
+from xarray.backends import BackendArray
+from xarray.core.indexing import (
+  ExplicitlyIndexedNDArrayMixin,
+  IndexingSupport,
+  OuterIndexer,
+  VectorizedIndexer,
+  expanded_indexer,
+  explicit_indexing_adapter,
+)
+if TYPE_CHECKING:
+  from xarray_kat.multiton import Multiton
+class AbstractMeerkatArchiveArray(ABC, BackendArray):
+  """Require subclasses to implement ``dims`` and ``chunks`` properties.
+  Note that xarray's internal API expects ``BackendArray``
+  to provide ``shape`` and ``dtype`` attributes."""
+  @property
+  @abstractmethod
+  def dims(self) -> Tuple[str, ...]:
+    raise NotImplementedError
+  @property
+  @abstractmethod
+  def chunks(self) -> Tuple[int, ...]:
+    raise NotImplementedError
+class DelayedTensorStore(ExplicitlyIndexedNDArrayMixin):
+  """A wrapper for TensorStores that only produces new
+  DelayedTensorStores when indexed"""
+  __slots__ = ("array",)
+  array: ts.TensorStore
+  def __init__(self, array):
+    self.array = array
+  @property
+  def dtype(self) -> npt.DTypeLike:
+    return self.array.dtype.numpy_dtype
+  def get_duck_array(self):
+    return self.array
+  async def async_get_duck_array(self):
+    return self.array
+  def _oindex_get(self, indexer: OuterIndexer):
+    return DelayedTensorStore(self.array.oindex[indexer.tuple])
+  def _vindex_get(self, indexer: VectorizedIndexer):
+    return DelayedTensorStore(self.array.vindex[indexer.tuple])
+  def __getitem__(self, key):
+    return DelayedTensorStore(self.array[key.tuple])
+class CorrProductMixin:
+  """Mixin containing methods for reasoning about
+  ``(time, frequency, corrprod)`` shaped MeerKAT archive data.
+  Implements ``dims``, ``chunks`` and ``shape`` properties
+  of ``AbstractMeerkatArchiveArray``.
+  The ``meerkat_key`` method produces an index
+  that, when applied to a ``(time, frequency, corrprod)`` array
+  produces a ``(time, frequency, baseline_id, polarization)`` array.
+  This can then be transposed into canonical MSv4 ording.
+  """
+  __slots__ = ("_cp_argsort", "_msv4_shape", "_msv4_dims", "_msv4_chunks")
+  _cp_argsort: npt.NDArray
+  _msv4_shape: Tuple[int, int, int, int]
+  _msv4_dims: Tuple[str, str, str, str]
+  _msv4_chunks: Tuple[int, int, int, int]
+  def __init__(
+    self,
+    meerkat_shape: Tuple[int, int, int],
+    meerkat_dims: Tuple[str, str, str],
+    meerkat_chunks: Tuple[int, int, int],
+    cp_argsort: npt.NDArray,
+    npol: int,
+  ):
+    """Constructs a CorrProductMixin
+    Args:
+      meerkat_shape: The shape of the meerkat array.
+        Should be associated with the ``(time, frequency, corrprod)`` dimensions.
+      meerkat_dims: The dimensions of the meerkat array.
+        Should be ``(time, frequency, corrprod)``.
+      meerkat_chunks: The chunking of the meerkat array.
+        Should be associated with the ``(time, frequency, corrprod)`` dimensions.
+      cp_argsort: An array sorting the ``corrprod`` dimension into a
+        canonical ``(baseline_id, polarization)`` ordering.
+      npol: Number of polarizations.
+    """
+    if meerkat_dims != ("time", "frequency", "corrprod"):
+      raise ValueError(f"{meerkat_dims} should be (time, frequency, corrprod)")
+    try:
+      ntime, nfreq, ncorrprod = meerkat_shape
+    except ValueError:
+      raise ValueError(f"{meerkat_shape} should be (time, frequency, corrprod)")
+    if len(cp_argsort) != ncorrprod:
+      raise ValueError(f"{len(cp_argsort)} does not match corrprods {ncorrprod}")
+    self._cp_argsort = cp_argsort
+    nbl, rem = divmod(len(cp_argsort), npol)
+    self._msv4_shape = (ntime, nbl, nfreq, npol)
+    self._msv4_dims = (meerkat_dims[0], "baseline_id", meerkat_dims[1], "polarization")
+    self._msv4_chunks = (meerkat_chunks[0], nbl, meerkat_chunks[1], npol)
+    if rem != 0:
+      raise ValueError(
+        f"Number of polarizations {npol} must divide "
+        f"the correlation product index {len(cp_argsort)} exactly."
+      )
+  @property
+  def dims(self) -> Tuple[str, ...]:
+    return self._msv4_dims
+  @property
+  def chunks(self) -> Tuple[int, ...]:
+    return self._msv4_chunks
+  @property
+  def shape(self) -> Tuple[int, ...]:
+    return self._msv4_shape
+  @property
+  def ndim(self) -> int:
+    return len(self._msv4_shape)
+  def _normalize_key_axis(
+    self,
+    key: Tuple[slice | npt.NDArray | Integral, ...],
+    axis: int,
+  ) -> npt.NDArray:
+    """Normalises ``key[axis]`` into an numpy array"""
+    if isinstance(key_item := key[axis], slice):
+      return np.arange(self.shape[axis])[key_item]
+    elif isinstance(key_item, Integral):
+      return np.array([key_item])
+    elif isinstance(key_item, np.ndarray):
+      return key_item
+    else:
+      raise NotImplementedError(f"key_item type {type(key_item)}")
+  def meerkat_key(self, msv4_key: Tuple) -> Tuple:
+    """Translates an MSv4 key into a MeerKAT key.
+    MSv4 arrays have ``(time, baseline_id, frequency, polarization)``
+    dimensions. This method translates keys referencing the above
+    dimensions into keys which operate on MeerKAT archive data with
+    ``(time, frequency, corrprod)`` dimensions.
+    """
+    msv4_key = expanded_indexer(msv4_key, self.ndim)
+    assert isinstance(msv4_key, tuple) and len(msv4_key) == 4
+    time_selection = msv4_key[0]
+    bl_selection = self._normalize_key_axis(msv4_key, 1)
+    frequency_selection = msv4_key[2]
+    pol_selection = self._normalize_key_axis(msv4_key, 3)
+    bl_grid, pol_grid = np.meshgrid(bl_selection, pol_selection, indexing="ij")
+    # cp_selection has shape (nbl, npol). When used in an index,
+    # it has the effect of splitting the corrprod dimension
+    # into baseline and polarization
+    npol = self.shape[3]
+    cp_selection = self._cp_argsort[bl_grid * npol + pol_grid]
+    return (time_selection, frequency_selection, cp_selection)
+  @property
+  def transpose_axes(self) -> Tuple[int, int, int, int]:
+    """Transpose (time, frequency, baseline_id, polarization) to
+    (time, baseline_id, frequency, polarization)"""
+    return (0, 2, 1, 3)
+class DelayedCorrProductArray(CorrProductMixin, AbstractMeerkatArchiveArray):
+  """Wraps a ``(time, frequency, corrprod)``` TensorStore.
+  Most data in the MeerKAT archive has dimension
+  ``(time, frequency, corrprod)``.
+  This class reshapes the underlying data into the
+  ``(time, baseline_id, frequency, polarization)`` form.
+  """
+  __slots__ = "_store"
+  _store: Multiton[ts.TensorStore]
+  def __init__(
+    self, store: Multiton[ts.TensorStore], cp_argsort: npt.NDArray, npol: int
+  ):
+    CorrProductMixin.__init__(
+      self,
+      store.instance.shape,
+      store.instance.domain.labels,
+      store.instance.chunk_layout.read_chunk.shape,
+      cp_argsort,
+      npol,
+    )
+    self._store = store
+  @property
+  def dtype(self) -> npt.DTypeLike:
+    return self._store.instance.dtype.numpy_dtype
+  def __getitem__(self, key) -> DelayedTensorStore:
+    return explicit_indexing_adapter(
+      key, self.shape, IndexingSupport.OUTER, self._getitem
+    )
+  def _getitem(self, key) -> DelayedTensorStore:
+    return DelayedTensorStore(
+      self._store.instance[self.meerkat_key(key)].transpose(self.transpose_axes)
+    )
+class ImmediateCorrProductArray(DelayedCorrProductArray):
+  def __init__(
+    self, store: Multiton[ts.TensorStore], cp_argsort: npt.NDArray, npol: int
+  ):
+    super().__init__(store, cp_argsort, npol)
+  def _getitem(self, key) -> npt.NDArray:
+    return super()._getitem(key).get_duck_array().read().result()
+class DelayedBackendArray(
+  CorrProductMixin, ExplicitlyIndexedNDArrayMixin, AbstractMeerkatArchiveArray
+):
+  """Wraps a ``(time, frequency, corrprod)``` TensorStore.
+  Most data in the MeerKAT archive has dimension
+  ``(time, frequency, corrprod)``.
+  This class reshapes the underlying data into the
+  ``(time, baseline_id, frequency, polarization)`` form.
+  """
+  __slots__ = ("array",)
+  array: Multiton[ts.TensorStore]
+  def __init__(
+    self, store: Multiton[ts.Tensorstore], cp_argsort: npt.NDArray, npol: int
+  ):
+    super().__init__(
+      store.instance.shape,
+      store.instance.domain.labels,
+      store.instance.chunk_layout.read_chunk.shape,
+      cp_argsort,
+      npol,
+    )
+    self.array = store
+  @property
+  def dtype(self) -> npt.DTypeLike:
+    return self.array.instance.dtype.numpy_dtype
+  def get_duck_array(self):
+    return self.array.instance
+  async def async_get_duck_array(self):
+    return self.array.instance
+  def _oindex_get(self, indexer):
+    key = self.meerkat_key(indexer.tuple)
+    store = self.array.instance.oindex[key].transpose(self.transpose_axes)
+    return DelayedTensorStore(store)
+  def _vindex_get(self, indexer):
+    raise NotImplementedError("vindex")
+  def __getitem__(self, key):
+    mkey = self.meerkat_key(key.tuple)
+    store = self.array.instance[mkey].transpose(self.transpose_axes)
+    return DelayedTensorStore(store)
+class ImmediateBackendArray(DelayedBackendArray):
+  def __init__(
+    self, store: Multiton[ts.Tensorstore], cp_argsort: npt.NDArray, npol: int
+  ):
+    super().__init__(store, cp_argsort, npol)
+  def _oindex_get(self, indexer):
+    return super()._oindex_get(indexer).get_duck_array().read().result()
+  def _vindex_get(self, indexer):
+    return super()._vindex_get(indexer).get_duck_array().read().result()
+  def __getitem__(self, key):
+    return super().__getitem__(key).get_duck_array().read().result()

xarray_kat-0.0.1/src/xarray_kat/async_loop.py ADDED Viewed

@@ -0,0 +1,88 @@
+from __future__ import annotations
+import asyncio
+import logging
+import threading
+import weakref
+from typing import Any, Dict
+log = logging.getLogger(__name__)
+class Singleton(type):
+  _instances: Dict[type, Any] = {}
+  _instance_lock = threading.Lock()
+  def __call__(cls, *args, **kwargs):
+    if cls not in cls._instances:
+      with cls._instance_lock:
+        if cls not in cls._instances:
+          cls._instances[cls] = super(Singleton, cls).__call__(*args, **kwargs)
+    return cls._instances[cls]
+def _run_loop_in_thread(
+  loop: asyncio.AbstractEventLoop, running: threading.Event
+) -> None:
+  asyncio.set_event_loop(loop)
+  running.set()
+  try:
+    loop.run_forever()
+  finally:
+    log.debug("Loop stops")
+    running.clear()
+    log.debug("Shutting down async generators")
+    loop.run_until_complete(loop.shutdown_asyncgens())
+    log.debug("Shutting down default executors")
+    loop.run_until_complete(loop.shutdown_default_executor())
+    log.debug("Closing the loop")
+    loop.close()
+    log.debug("Done")
+class AsyncLoopSingleton(metaclass=Singleton):
+  _loop: asyncio.AbstractEventLoop | None
+  _thread: threading.Thread | None
+  _lock: threading.Lock
+  _running: threading.Event
+  def __init__(self):
+    self._loop = None
+    self._thread = None
+    self._lock = threading.Lock()
+    self._running = threading.Event()
+    weakref.finalize(self, self.close)
+    self.start()
+  @property
+  def instance(self):
+    return self._loop
+  def start(self) -> None:
+    with self._lock:
+      if self._thread and self._thread.is_alive():
+        return
+      self._loop = asyncio.new_event_loop()
+      self._thread = threading.Thread(
+        target=_run_loop_in_thread,
+        args=(self._loop, self._running),
+        daemon=True,
+        name="AsyncLoopThread",
+      )
+      self._thread.start()
+  def close(self) -> None:
+    with self._lock:
+      if not self._thread or not self._loop:
+        return
+      if self._loop.is_running():
+        self._loop.call_soon_threadsafe(self._loop.stop)
+      self._thread.join()
+      self._thread = None
+      self._loop = None