xarray-ms 0.1.9__tar.gz → 0.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- xarray_ms-0.2.0/LICENSE +28 -0
- xarray_ms-0.2.0/PKG-INFO +149 -0
- xarray_ms-0.2.0/README.rst +126 -0
- xarray_ms-0.2.0/pyproject.toml +96 -0
- xarray_ms-0.2.0/xarray_ms/__init__.py +3 -0
- xarray_ms-0.2.0/xarray_ms/backend/msv2/antenna_dataset_factory.py +36 -0
- xarray_ms-0.2.0/xarray_ms/backend/msv2/array.py +75 -0
- xarray_ms-0.2.0/xarray_ms/backend/msv2/encoders.py +199 -0
- xarray_ms-0.2.0/xarray_ms/backend/msv2/entrypoint.py +354 -0
- xarray_ms-0.2.0/xarray_ms/backend/msv2/main_dataset_factory.py +219 -0
- xarray_ms-0.2.0/xarray_ms/backend/msv2/structure.py +500 -0
- xarray_ms-0.2.0/xarray_ms/backend/msv2/table_factory.py +75 -0
- xarray_ms-0.2.0/xarray_ms/casa_types.py +492 -0
- xarray_ms-0.2.0/xarray_ms/core.py +89 -0
- xarray_ms-0.2.0/xarray_ms/errors.py +19 -0
- xarray_ms-0.2.0/xarray_ms/query.py +31 -0
- xarray_ms-0.2.0/xarray_ms/testing/simulator.py +385 -0
- xarray_ms-0.2.0/xarray_ms/utils.py +86 -0
- xarray-ms-0.1.9/AUTHORS.rst +0 -13
- xarray-ms-0.1.9/CONTRIBUTING.rst +0 -136
- xarray-ms-0.1.9/HISTORY.rst +0 -58
- xarray-ms-0.1.9/LICENSE +0 -339
- xarray-ms-0.1.9/MANIFEST.in +0 -11
- xarray-ms-0.1.9/PKG-INFO +0 -119
- xarray-ms-0.1.9/README.rst +0 -101
- xarray-ms-0.1.9/docs/Makefile +0 -20
- xarray-ms-0.1.9/docs/api.rst +0 -11
- xarray-ms-0.1.9/docs/authors.rst +0 -1
- xarray-ms-0.1.9/docs/conf.py +0 -213
- xarray-ms-0.1.9/docs/contributing.rst +0 -1
- xarray-ms-0.1.9/docs/history.rst +0 -1
- xarray-ms-0.1.9/docs/index.rst +0 -20
- xarray-ms-0.1.9/docs/installation.rst +0 -51
- xarray-ms-0.1.9/docs/make.bat +0 -36
- xarray-ms-0.1.9/docs/readme.rst +0 -1
- xarray-ms-0.1.9/docs/usage.rst +0 -7
- xarray-ms-0.1.9/setup.cfg +0 -30
- xarray-ms-0.1.9/setup.py +0 -57
- xarray-ms-0.1.9/xarray_ms.egg-info/PKG-INFO +0 -119
- xarray-ms-0.1.9/xarray_ms.egg-info/SOURCES.txt +0 -33
- xarray-ms-0.1.9/xarray_ms.egg-info/dependency_links.txt +0 -1
- xarray-ms-0.1.9/xarray_ms.egg-info/requires.txt +0 -12
- xarray-ms-0.1.9/xarray_ms.egg-info/top_level.txt +0 -1
- xarray-ms-0.1.9/xarray_ms.egg-info/zip-safe +0 -1
- xarray-ms-0.1.9/xarrayms/__init__.py +0 -12
- xarray-ms-0.1.9/xarrayms/known_table_schemas.py +0 -94
- xarray-ms-0.1.9/xarrayms/table_executor.py +0 -324
- xarray-ms-0.1.9/xarrayms/tests/conftest.py +0 -64
- xarray-ms-0.1.9/xarrayms/tests/test_ms.py +0 -403
- xarray-ms-0.1.9/xarrayms/tests/test_table_executor.py +0 -177
- xarray-ms-0.1.9/xarrayms/xarray_ms.py +0 -953
- {xarray-ms-0.1.9/xarrayms/tests → xarray_ms-0.2.0/xarray_ms/testing}/__init__.py +0 -0
xarray_ms-0.2.0/LICENSE
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
BSD 3-Clause License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2024, Rhodes University Centre for Radio Astronomy Techniques & Technologies (RATT)
|
|
4
|
+
|
|
5
|
+
Redistribution and use in source and binary forms, with or without
|
|
6
|
+
modification, are permitted provided that the following conditions are met:
|
|
7
|
+
|
|
8
|
+
1. Redistributions of source code must retain the above copyright notice, this
|
|
9
|
+
list of conditions and the following disclaimer.
|
|
10
|
+
|
|
11
|
+
2. Redistributions in binary form must reproduce the above copyright notice,
|
|
12
|
+
this list of conditions and the following disclaimer in the documentation
|
|
13
|
+
and/or other materials provided with the distribution.
|
|
14
|
+
|
|
15
|
+
3. Neither the name of the copyright holder nor the names of its
|
|
16
|
+
contributors may be used to endorse or promote products derived from
|
|
17
|
+
this software without specific prior written permission.
|
|
18
|
+
|
|
19
|
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
20
|
+
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
21
|
+
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
22
|
+
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
|
23
|
+
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
24
|
+
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
|
25
|
+
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
26
|
+
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
|
27
|
+
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
28
|
+
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
xarray_ms-0.2.0/PKG-INFO
ADDED
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: xarray-ms
|
|
3
|
+
Version: 0.2.0
|
|
4
|
+
Summary: xarray MSv4 views over MSv2 Measurement Sets
|
|
5
|
+
Author: Simon Perkins
|
|
6
|
+
Author-email: simon.perkins@gmail.com
|
|
7
|
+
Requires-Python: >=3.10,<4.0
|
|
8
|
+
Classifier: Programming Language :: Python :: 3
|
|
9
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
10
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
11
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
12
|
+
Provides-Extra: testing
|
|
13
|
+
Requires-Dist: arcae (>=0.2.4,<0.3.0)
|
|
14
|
+
Requires-Dist: cacheout (>=0.16.0,<0.17.0)
|
|
15
|
+
Requires-Dist: dask[testing] (>=2024.5.0,<2025.0.0) ; extra == "testing"
|
|
16
|
+
Requires-Dist: distributed[testing] (>=2024.5.0,<2025.0.0) ; extra == "testing"
|
|
17
|
+
Requires-Dist: pytest[testing] (>=8.0.0,<9.0.0) ; extra == "testing"
|
|
18
|
+
Requires-Dist: typing-extensions (>=4.12.2,<5.0.0) ; python_version < "3.11"
|
|
19
|
+
Requires-Dist: xarray (>=2024.3.0,<2025.0.0)
|
|
20
|
+
Requires-Dist: zarr[testing] (>=2.18.3,<3.0.0) ; extra == "testing"
|
|
21
|
+
Description-Content-Type: text/x-rst
|
|
22
|
+
|
|
23
|
+
=========
|
|
24
|
+
xarray-ms
|
|
25
|
+
=========
|
|
26
|
+
|
|
27
|
+
.. image:: https://img.shields.io/pypi/v/xarray-ms.svg
|
|
28
|
+
:target: https://pypi.python.org/pypi/xarray-ms
|
|
29
|
+
|
|
30
|
+
.. image:: https://github.com/ratt-ru/xarray-ms/actions/workflows/ci.yml/badge.svg
|
|
31
|
+
:target: https://github.com/ratt-ru/xarray-ms/actions/workflows/ci.yml
|
|
32
|
+
|
|
33
|
+
.. image:: https://readthedocs.org/projects/xarray-ms/badge/?version=latest
|
|
34
|
+
:target: https://xarray-ms.readthedocs.io/en/latest/?badge=latest
|
|
35
|
+
:alt: Documentation Status
|
|
36
|
+
|
|
37
|
+
====
|
|
38
|
+
|
|
39
|
+
xarray-ms presents a Measurement Set v4 view (MSv4) over
|
|
40
|
+
`CASA Measurement Sets <https://casa.nrao.edu/Memos/229.html>`_ (MSv2).
|
|
41
|
+
It provides access to MSv2 data via the xarray API, allowing MSv4 compliant applications
|
|
42
|
+
to be developed on well-understood MSv2 data.
|
|
43
|
+
|
|
44
|
+
.. code-block:: python
|
|
45
|
+
|
|
46
|
+
>>> import xarray_ms
|
|
47
|
+
>>> import xarray
|
|
48
|
+
>>> ds = xarray.open_dataset("/data/L795830_SB001_uv.MS/",
|
|
49
|
+
chunks={"time": 2000, "baseline": 1000})
|
|
50
|
+
>>> ds
|
|
51
|
+
<xarray.Dataset> Size: 70GB
|
|
52
|
+
Dimensions: (time: 28760, baseline: 2775, frequency: 16,
|
|
53
|
+
polarization: 4, uvw_label: 3)
|
|
54
|
+
Coordinates:
|
|
55
|
+
antenna1_name (baseline) object 22kB dask.array<chunksize=(1000,), meta=np.ndarray>
|
|
56
|
+
antenna2_name (baseline) object 22kB dask.array<chunksize=(1000,), meta=np.ndarray>
|
|
57
|
+
baseline_id (baseline) int64 22kB dask.array<chunksize=(1000,), meta=np.ndarray>
|
|
58
|
+
* frequency (frequency) float64 128B 1.202e+08 ... 1.204e+08
|
|
59
|
+
* polarization (polarization) <U2 32B 'XX' 'XY' 'YX' 'YY'
|
|
60
|
+
* time (time) float64 230kB 1.601e+09 ... 1.601e+09
|
|
61
|
+
Dimensions without coordinates: baseline, uvw_label
|
|
62
|
+
Data variables:
|
|
63
|
+
EFFECTIVE_INTEGRATION_TIME (time, baseline) float64 638MB dask.array<chunksize=(2000, 1000), meta=np.ndarray>
|
|
64
|
+
FLAG (time, baseline, frequency, polarization) uint8 5GB dask.array<chunksize=(2000, 1000, 16, 4), meta=np.ndarray>
|
|
65
|
+
TIME_CENTROID (time, baseline) float64 638MB dask.array<chunksize=(2000, 1000), meta=np.ndarray>
|
|
66
|
+
UVW (time, baseline, uvw_label) float64 2GB dask.array<chunksize=(2000, 1000, 3), meta=np.ndarray>
|
|
67
|
+
VISIBILITY (time, baseline, frequency, polarization) complex64 41GB dask.array<chunksize=(2000, 1000, 16, 4), meta=np.ndarray>
|
|
68
|
+
WEIGHT (time, baseline, frequency, polarization) float32 20GB dask.array<chunksize=(2000, 1000, 16, 4), meta=np.ndarray>
|
|
69
|
+
Attributes:
|
|
70
|
+
antenna_xds: <xarray.Dataset> Size: 4kB\nDimensions: (...
|
|
71
|
+
version: 0.0.1
|
|
72
|
+
creation_date: 2024-09-10T14:29:22.587984+00:00
|
|
73
|
+
data_description_id: 0
|
|
74
|
+
|
|
75
|
+
Measurement Set v4
|
|
76
|
+
------------------
|
|
77
|
+
|
|
78
|
+
NRAO_/SKAO_ are developing a new xarray-based `Measurement Set v4 specification <msv4-spec_>`_.
|
|
79
|
+
While there are many changes some of the major highlights are:
|
|
80
|
+
|
|
81
|
+
* xarray_ is used to define the specification.
|
|
82
|
+
* MSv4 data consists of Datasets of ndarrays on a regular time-channel grid.
|
|
83
|
+
MSv2 data is tabular and, while in many instances the time-channel grid is regular,
|
|
84
|
+
this was not guaranteed, especially after MSv2 datasets had been transformed by various tasks.
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
xarray_ Datasets are self-describing and they are therefore easier to reason about and work with.
|
|
88
|
+
Additionally, the regularity of data will make writing MSv4-based software less complex.
|
|
89
|
+
|
|
90
|
+
xradio
|
|
91
|
+
------
|
|
92
|
+
|
|
93
|
+
`casangi/xradio <xradio_>`_ provides a reference implementation that converts
|
|
94
|
+
CASA v2 Measurement Sets to Zarr v4 Measurement Sets using the python-casacore_
|
|
95
|
+
package.
|
|
96
|
+
|
|
97
|
+
Why xarray-ms?
|
|
98
|
+
--------------
|
|
99
|
+
|
|
100
|
+
* By developing against an MSv4 xarray view over MSv2 data,
|
|
101
|
+
developers can develop applications on well-understood data,
|
|
102
|
+
and then seamlessly transition to newer formats.
|
|
103
|
+
Data can also be exported to newer formats (principally zarr_) via xarray's
|
|
104
|
+
native I/O routines.
|
|
105
|
+
However, the xarray view of either format looks the same to the software developer.
|
|
106
|
+
|
|
107
|
+
* xarray-ms builds on xarray's
|
|
108
|
+
`backend API <https://docs.xarray.dev/en/stable/internals/how-to-add-new-backend.html>`_:
|
|
109
|
+
Implementing a formal CASA MSv2 backend has a number of benefits:
|
|
110
|
+
|
|
111
|
+
* xarray's internal I/O routines such as ``open_dataset`` and ``open_datatree``
|
|
112
|
+
can dispatch to the backend to load data.
|
|
113
|
+
* Similarly xarray's `lazy loading mechanism <xarray_lazy_>`_ dispatches
|
|
114
|
+
through the backend.
|
|
115
|
+
* Automatic access to any `chunked array types <xarray_chunked_arrays_>`_
|
|
116
|
+
supported by xarray including, but not limited to dask_.
|
|
117
|
+
* Arbitrary chunking along any xarray dimension.
|
|
118
|
+
|
|
119
|
+
* xarray-ms uses arcae_, a high-performance backend to CASA Tables implementing
|
|
120
|
+
a subset of python-casacore_'s interface.
|
|
121
|
+
* Some limited support for irregular MSv2 data via padding.
|
|
122
|
+
|
|
123
|
+
Work in Progress
|
|
124
|
+
----------------
|
|
125
|
+
|
|
126
|
+
The Measurement Set v4 specification is currently under active development.
|
|
127
|
+
xarray-ms is currently under active development and does not yet
|
|
128
|
+
have feature parity with xradio_.
|
|
129
|
+
|
|
130
|
+
Most measures information and many secondary sub-tables are currently missing.
|
|
131
|
+
However, the most important parts of the ``MAIN`` tables,
|
|
132
|
+
as well as the ``ANTENNA``, ``POLARIZATON`` and ``SPECTRAL_WINDOW``
|
|
133
|
+
sub-tables are implemented and should be sufficient
|
|
134
|
+
for basic algorithm development.
|
|
135
|
+
|
|
136
|
+
.. _SKAO: https://www.skao.int/
|
|
137
|
+
.. _NRAO: https://public.nrao.edu/
|
|
138
|
+
.. _msv4-spec: https://docs.google.com/spreadsheets/d/14a6qMap9M5r_vjpLnaBKxsR9TF4azN5LVdOxLacOX-s/
|
|
139
|
+
.. _xradio: https://github.com/casangi/xradio
|
|
140
|
+
.. _dask-ms: https://github.com/ratt-ru/dask-ms
|
|
141
|
+
.. _arcae: https://github.com/ratt-ru/arcae
|
|
142
|
+
.. _dask: https://www.dask.org/
|
|
143
|
+
.. _python-casacore: https://github.com/casacore/python-casacore/
|
|
144
|
+
.. _xarray: https://xarray.dev/
|
|
145
|
+
.. _xarray_backend: https://docs.xarray.dev/en/stable/internals/how-to-add-new-backend.html
|
|
146
|
+
.. _xarray_lazy: https://docs.xarray.dev/en/latest/internals/internal-design.html#lazy-indexing-classes
|
|
147
|
+
.. _xarray_chunked_arrays: https://docs.xarray.dev/en/latest/internals/chunked-arrays.html
|
|
148
|
+
.. _zarr: https://zarr.dev/
|
|
149
|
+
|
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
=========
|
|
2
|
+
xarray-ms
|
|
3
|
+
=========
|
|
4
|
+
|
|
5
|
+
.. image:: https://img.shields.io/pypi/v/xarray-ms.svg
|
|
6
|
+
:target: https://pypi.python.org/pypi/xarray-ms
|
|
7
|
+
|
|
8
|
+
.. image:: https://github.com/ratt-ru/xarray-ms/actions/workflows/ci.yml/badge.svg
|
|
9
|
+
:target: https://github.com/ratt-ru/xarray-ms/actions/workflows/ci.yml
|
|
10
|
+
|
|
11
|
+
.. image:: https://readthedocs.org/projects/xarray-ms/badge/?version=latest
|
|
12
|
+
:target: https://xarray-ms.readthedocs.io/en/latest/?badge=latest
|
|
13
|
+
:alt: Documentation Status
|
|
14
|
+
|
|
15
|
+
====
|
|
16
|
+
|
|
17
|
+
xarray-ms presents a Measurement Set v4 view (MSv4) over
|
|
18
|
+
`CASA Measurement Sets <https://casa.nrao.edu/Memos/229.html>`_ (MSv2).
|
|
19
|
+
It provides access to MSv2 data via the xarray API, allowing MSv4 compliant applications
|
|
20
|
+
to be developed on well-understood MSv2 data.
|
|
21
|
+
|
|
22
|
+
.. code-block:: python
|
|
23
|
+
|
|
24
|
+
>>> import xarray_ms
|
|
25
|
+
>>> import xarray
|
|
26
|
+
>>> ds = xarray.open_dataset("/data/L795830_SB001_uv.MS/",
|
|
27
|
+
chunks={"time": 2000, "baseline": 1000})
|
|
28
|
+
>>> ds
|
|
29
|
+
<xarray.Dataset> Size: 70GB
|
|
30
|
+
Dimensions: (time: 28760, baseline: 2775, frequency: 16,
|
|
31
|
+
polarization: 4, uvw_label: 3)
|
|
32
|
+
Coordinates:
|
|
33
|
+
antenna1_name (baseline) object 22kB dask.array<chunksize=(1000,), meta=np.ndarray>
|
|
34
|
+
antenna2_name (baseline) object 22kB dask.array<chunksize=(1000,), meta=np.ndarray>
|
|
35
|
+
baseline_id (baseline) int64 22kB dask.array<chunksize=(1000,), meta=np.ndarray>
|
|
36
|
+
* frequency (frequency) float64 128B 1.202e+08 ... 1.204e+08
|
|
37
|
+
* polarization (polarization) <U2 32B 'XX' 'XY' 'YX' 'YY'
|
|
38
|
+
* time (time) float64 230kB 1.601e+09 ... 1.601e+09
|
|
39
|
+
Dimensions without coordinates: baseline, uvw_label
|
|
40
|
+
Data variables:
|
|
41
|
+
EFFECTIVE_INTEGRATION_TIME (time, baseline) float64 638MB dask.array<chunksize=(2000, 1000), meta=np.ndarray>
|
|
42
|
+
FLAG (time, baseline, frequency, polarization) uint8 5GB dask.array<chunksize=(2000, 1000, 16, 4), meta=np.ndarray>
|
|
43
|
+
TIME_CENTROID (time, baseline) float64 638MB dask.array<chunksize=(2000, 1000), meta=np.ndarray>
|
|
44
|
+
UVW (time, baseline, uvw_label) float64 2GB dask.array<chunksize=(2000, 1000, 3), meta=np.ndarray>
|
|
45
|
+
VISIBILITY (time, baseline, frequency, polarization) complex64 41GB dask.array<chunksize=(2000, 1000, 16, 4), meta=np.ndarray>
|
|
46
|
+
WEIGHT (time, baseline, frequency, polarization) float32 20GB dask.array<chunksize=(2000, 1000, 16, 4), meta=np.ndarray>
|
|
47
|
+
Attributes:
|
|
48
|
+
antenna_xds: <xarray.Dataset> Size: 4kB\nDimensions: (...
|
|
49
|
+
version: 0.0.1
|
|
50
|
+
creation_date: 2024-09-10T14:29:22.587984+00:00
|
|
51
|
+
data_description_id: 0
|
|
52
|
+
|
|
53
|
+
Measurement Set v4
|
|
54
|
+
------------------
|
|
55
|
+
|
|
56
|
+
NRAO_/SKAO_ are developing a new xarray-based `Measurement Set v4 specification <msv4-spec_>`_.
|
|
57
|
+
While there are many changes some of the major highlights are:
|
|
58
|
+
|
|
59
|
+
* xarray_ is used to define the specification.
|
|
60
|
+
* MSv4 data consists of Datasets of ndarrays on a regular time-channel grid.
|
|
61
|
+
MSv2 data is tabular and, while in many instances the time-channel grid is regular,
|
|
62
|
+
this was not guaranteed, especially after MSv2 datasets had been transformed by various tasks.
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
xarray_ Datasets are self-describing and they are therefore easier to reason about and work with.
|
|
66
|
+
Additionally, the regularity of data will make writing MSv4-based software less complex.
|
|
67
|
+
|
|
68
|
+
xradio
|
|
69
|
+
------
|
|
70
|
+
|
|
71
|
+
`casangi/xradio <xradio_>`_ provides a reference implementation that converts
|
|
72
|
+
CASA v2 Measurement Sets to Zarr v4 Measurement Sets using the python-casacore_
|
|
73
|
+
package.
|
|
74
|
+
|
|
75
|
+
Why xarray-ms?
|
|
76
|
+
--------------
|
|
77
|
+
|
|
78
|
+
* By developing against an MSv4 xarray view over MSv2 data,
|
|
79
|
+
developers can develop applications on well-understood data,
|
|
80
|
+
and then seamlessly transition to newer formats.
|
|
81
|
+
Data can also be exported to newer formats (principally zarr_) via xarray's
|
|
82
|
+
native I/O routines.
|
|
83
|
+
However, the xarray view of either format looks the same to the software developer.
|
|
84
|
+
|
|
85
|
+
* xarray-ms builds on xarray's
|
|
86
|
+
`backend API <https://docs.xarray.dev/en/stable/internals/how-to-add-new-backend.html>`_:
|
|
87
|
+
Implementing a formal CASA MSv2 backend has a number of benefits:
|
|
88
|
+
|
|
89
|
+
* xarray's internal I/O routines such as ``open_dataset`` and ``open_datatree``
|
|
90
|
+
can dispatch to the backend to load data.
|
|
91
|
+
* Similarly xarray's `lazy loading mechanism <xarray_lazy_>`_ dispatches
|
|
92
|
+
through the backend.
|
|
93
|
+
* Automatic access to any `chunked array types <xarray_chunked_arrays_>`_
|
|
94
|
+
supported by xarray including, but not limited to dask_.
|
|
95
|
+
* Arbitrary chunking along any xarray dimension.
|
|
96
|
+
|
|
97
|
+
* xarray-ms uses arcae_, a high-performance backend to CASA Tables implementing
|
|
98
|
+
a subset of python-casacore_'s interface.
|
|
99
|
+
* Some limited support for irregular MSv2 data via padding.
|
|
100
|
+
|
|
101
|
+
Work in Progress
|
|
102
|
+
----------------
|
|
103
|
+
|
|
104
|
+
The Measurement Set v4 specification is currently under active development.
|
|
105
|
+
xarray-ms is currently under active development and does not yet
|
|
106
|
+
have feature parity with xradio_.
|
|
107
|
+
|
|
108
|
+
Most measures information and many secondary sub-tables are currently missing.
|
|
109
|
+
However, the most important parts of the ``MAIN`` tables,
|
|
110
|
+
as well as the ``ANTENNA``, ``POLARIZATON`` and ``SPECTRAL_WINDOW``
|
|
111
|
+
sub-tables are implemented and should be sufficient
|
|
112
|
+
for basic algorithm development.
|
|
113
|
+
|
|
114
|
+
.. _SKAO: https://www.skao.int/
|
|
115
|
+
.. _NRAO: https://public.nrao.edu/
|
|
116
|
+
.. _msv4-spec: https://docs.google.com/spreadsheets/d/14a6qMap9M5r_vjpLnaBKxsR9TF4azN5LVdOxLacOX-s/
|
|
117
|
+
.. _xradio: https://github.com/casangi/xradio
|
|
118
|
+
.. _dask-ms: https://github.com/ratt-ru/dask-ms
|
|
119
|
+
.. _arcae: https://github.com/ratt-ru/arcae
|
|
120
|
+
.. _dask: https://www.dask.org/
|
|
121
|
+
.. _python-casacore: https://github.com/casacore/python-casacore/
|
|
122
|
+
.. _xarray: https://xarray.dev/
|
|
123
|
+
.. _xarray_backend: https://docs.xarray.dev/en/stable/internals/how-to-add-new-backend.html
|
|
124
|
+
.. _xarray_lazy: https://docs.xarray.dev/en/latest/internals/internal-design.html#lazy-indexing-classes
|
|
125
|
+
.. _xarray_chunked_arrays: https://docs.xarray.dev/en/latest/internals/chunked-arrays.html
|
|
126
|
+
.. _zarr: https://zarr.dev/
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
[tool.poetry]
|
|
2
|
+
name = "xarray-ms"
|
|
3
|
+
version = "0.2.0"
|
|
4
|
+
description = "xarray MSv4 views over MSv2 Measurement Sets"
|
|
5
|
+
authors = ["Simon Perkins <simon.perkins@gmail.com>"]
|
|
6
|
+
readme = "README.rst"
|
|
7
|
+
|
|
8
|
+
[tool.poetry.dependencies]
|
|
9
|
+
python = "^3.10"
|
|
10
|
+
pytest = {version = "^8.0.0", optional = true, extras = ["testing"]}
|
|
11
|
+
xarray = "^2024.3.0"
|
|
12
|
+
dask = {version = "^2024.5.0", optional = true, extras = ["testing"]}
|
|
13
|
+
distributed = {version = "^2024.5.0", optional = true, extras = ["testing"]}
|
|
14
|
+
cacheout = "^0.16.0"
|
|
15
|
+
arcae = "^0.2.4"
|
|
16
|
+
typing-extensions = { version = "^4.12.2", python = "<3.11" }
|
|
17
|
+
zarr = {version = "^2.18.3", optional = true, extras = ["testing"]}
|
|
18
|
+
|
|
19
|
+
[tool.poetry.extras]
|
|
20
|
+
testing = ["dask", "distributed", "pytest", "zarr"]
|
|
21
|
+
|
|
22
|
+
[tool.poetry.plugins."xarray.backends"]
|
|
23
|
+
"xarray-ms:msv2" = "xarray_ms.backend.msv2.entrypoint:MSv2PartitionEntryPoint"
|
|
24
|
+
|
|
25
|
+
[tool.poetry.group.dev]
|
|
26
|
+
optional = true
|
|
27
|
+
|
|
28
|
+
[tool.poetry.group.dev.dependencies]
|
|
29
|
+
pre-commit = "^3.8.0"
|
|
30
|
+
tbump = "^6.11.0"
|
|
31
|
+
|
|
32
|
+
[tool.poetry.group.doc]
|
|
33
|
+
optional = true
|
|
34
|
+
|
|
35
|
+
[tool.poetry.group.doc.dependencies]
|
|
36
|
+
sphinx = "^8.0.2"
|
|
37
|
+
pygments = "^2.18.0"
|
|
38
|
+
sphinx-copybutton = "^0.5.2"
|
|
39
|
+
pydata-sphinx-theme = "^0.15.4"
|
|
40
|
+
|
|
41
|
+
[tool.ruff]
|
|
42
|
+
line-length = 88
|
|
43
|
+
indent-width = 2
|
|
44
|
+
target-version = "py311"
|
|
45
|
+
|
|
46
|
+
[tool.ruff.lint]
|
|
47
|
+
select = ["F", "E", "W", "I001"]
|
|
48
|
+
extend-select = ["I"]
|
|
49
|
+
|
|
50
|
+
[build-system]
|
|
51
|
+
requires = ["poetry-core"]
|
|
52
|
+
build-backend = "poetry.core.masonry.api"
|
|
53
|
+
[tool.tbump]
|
|
54
|
+
# Uncomment this if your project is hosted on GitHub:
|
|
55
|
+
# github_url = "https://github.com/<user or organization>/<project>/"
|
|
56
|
+
|
|
57
|
+
[tool.tbump.version]
|
|
58
|
+
current = "0.2.0"
|
|
59
|
+
|
|
60
|
+
# Example of a semver regexp.
|
|
61
|
+
# Make sure this matches current_version before
|
|
62
|
+
# using tbump
|
|
63
|
+
regex = '''
|
|
64
|
+
(?P<major>\d+)
|
|
65
|
+
\.
|
|
66
|
+
(?P<minor>\d+)
|
|
67
|
+
\.
|
|
68
|
+
(?P<patch>\d+)
|
|
69
|
+
'''
|
|
70
|
+
|
|
71
|
+
[tool.tbump.git]
|
|
72
|
+
message_template = "Bump to {new_version}"
|
|
73
|
+
tag_template = "{new_version}"
|
|
74
|
+
|
|
75
|
+
# For each file to patch, add a [[tool.tbump.file]] config
|
|
76
|
+
# section containing the path of the file, relative to the
|
|
77
|
+
# tbump.toml location.
|
|
78
|
+
[[tool.tbump.file]]
|
|
79
|
+
src = "pyproject.toml"
|
|
80
|
+
|
|
81
|
+
[[tool.tbump.file]]
|
|
82
|
+
src = "doc/source/conf.py"
|
|
83
|
+
|
|
84
|
+
# You can specify a list of commands to
|
|
85
|
+
# run after the files have been patched
|
|
86
|
+
# and before the git commit is made
|
|
87
|
+
|
|
88
|
+
# [[tool.tbump.before_commit]]
|
|
89
|
+
# name = "check changelog"
|
|
90
|
+
# cmd = "grep -q {new_version} Changelog.rst"
|
|
91
|
+
|
|
92
|
+
# Or run some commands after the git tag and the branch
|
|
93
|
+
# have been pushed:
|
|
94
|
+
# [[tool.tbump.after_push]]
|
|
95
|
+
# name = "publish"
|
|
96
|
+
# cmd = "./publish.sh"
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
from typing import Mapping
|
|
2
|
+
|
|
3
|
+
from xarray import Dataset, Variable
|
|
4
|
+
|
|
5
|
+
from xarray_ms.backend.msv2.structure import MSv2StructureFactory
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class AntennaDatasetFactory:
|
|
9
|
+
_structure_factory: MSv2StructureFactory
|
|
10
|
+
|
|
11
|
+
def __init__(self, structure_factory: MSv2StructureFactory):
|
|
12
|
+
self._structure_factory = structure_factory
|
|
13
|
+
|
|
14
|
+
def get_dataset(self) -> Mapping[str, Variable]:
|
|
15
|
+
ants = self._structure_factory()._ant
|
|
16
|
+
|
|
17
|
+
import pyarrow.compute as pac
|
|
18
|
+
|
|
19
|
+
ant_pos = pac.list_flatten(ants["POSITION"]).to_numpy().reshape(-1, 3)
|
|
20
|
+
|
|
21
|
+
return Dataset(
|
|
22
|
+
data_vars={
|
|
23
|
+
"ANTENNA_POSITION": Variable(
|
|
24
|
+
("antenna_name", "cartesian_pos_label/ellipsoid_pos_label"), ant_pos
|
|
25
|
+
)
|
|
26
|
+
},
|
|
27
|
+
coords={
|
|
28
|
+
"antenna_name": Variable("antenna_name", ants["NAME"].to_numpy()),
|
|
29
|
+
"station": Variable(
|
|
30
|
+
"antenna_name", ants["STATION"].to_numpy(), {"coordinates": "station"}
|
|
31
|
+
),
|
|
32
|
+
"mount": Variable(
|
|
33
|
+
"antenna_name", ants["MOUNT"].to_numpy(), {"coordinates": "mount"}
|
|
34
|
+
),
|
|
35
|
+
},
|
|
36
|
+
)
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import TYPE_CHECKING, Any, Callable, Tuple
|
|
4
|
+
|
|
5
|
+
import numpy as np
|
|
6
|
+
from xarray.backends import BackendArray
|
|
7
|
+
from xarray.core.indexing import IndexingSupport, explicit_indexing_adapter
|
|
8
|
+
|
|
9
|
+
if TYPE_CHECKING:
|
|
10
|
+
import numpy.typing as npt
|
|
11
|
+
|
|
12
|
+
from xarray_ms.backend.msv2.structure import MSv2StructureFactory, PartitionKeyT
|
|
13
|
+
from xarray_ms.backend.msv2.table_factory import TableFactory
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def slice_length(s, max_len):
|
|
17
|
+
start, stop, step = s.indices(max_len)
|
|
18
|
+
if step != 1:
|
|
19
|
+
raise NotImplementedError(f"Slicing with steps {s} other than 1 not supported")
|
|
20
|
+
return stop - start
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class MSv2Array(BackendArray):
|
|
24
|
+
"""Backend array containing functionality for reading an MSv2 column"""
|
|
25
|
+
|
|
26
|
+
_table_factory: TableFactory
|
|
27
|
+
_structure_factory: MSv2StructureFactory
|
|
28
|
+
_partition: PartitionKeyT
|
|
29
|
+
_column: str
|
|
30
|
+
_shape: Tuple[int, ...]
|
|
31
|
+
_dtype: npt.DTypeLike
|
|
32
|
+
_default: Any | None
|
|
33
|
+
_transform: Callable[[npt.NDArray], npt.NDArray] | None
|
|
34
|
+
|
|
35
|
+
def __init__(
|
|
36
|
+
self,
|
|
37
|
+
table_factory: TableFactory,
|
|
38
|
+
structure_factory: MSv2StructureFactory,
|
|
39
|
+
partition: PartitionKeyT,
|
|
40
|
+
column: str,
|
|
41
|
+
shape: Tuple[int, ...],
|
|
42
|
+
dtype: npt.DTypeLike,
|
|
43
|
+
default: Any | None = None,
|
|
44
|
+
transform: Callable[[npt.NDArray], npt.NDArray] | None = None,
|
|
45
|
+
):
|
|
46
|
+
self._table_factory = table_factory
|
|
47
|
+
self._structure_factory = structure_factory
|
|
48
|
+
self._partition = partition
|
|
49
|
+
self._column = column
|
|
50
|
+
self._default = default
|
|
51
|
+
self._transform = transform
|
|
52
|
+
self.shape = shape
|
|
53
|
+
self.dtype = np.dtype(dtype)
|
|
54
|
+
|
|
55
|
+
assert len(shape) >= 2, "(time, baseline) required"
|
|
56
|
+
|
|
57
|
+
def __getitem__(self, key) -> npt.NDArray:
|
|
58
|
+
return explicit_indexing_adapter(
|
|
59
|
+
key, self.shape, IndexingSupport.OUTER, self._getitem
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
def _getitem(self, key) -> npt.NDArray:
|
|
63
|
+
assert len(key) == len(self.shape)
|
|
64
|
+
expected_shape = tuple(slice_length(k, s) for k, s in zip(key, self.shape))
|
|
65
|
+
# Map the (time, baseline) coordinates onto row indices
|
|
66
|
+
rows = self._structure_factory()[self._partition].row_map[key[:2]]
|
|
67
|
+
xkey = (rows.ravel(),) + key[2:]
|
|
68
|
+
row_shape = (rows.size,) + expected_shape[2:]
|
|
69
|
+
result = np.full(row_shape, self._default, dtype=self.dtype)
|
|
70
|
+
self._table_factory().getcol(self._column, xkey, result)
|
|
71
|
+
result = result.reshape(rows.shape + expected_shape[2:])
|
|
72
|
+
return self._transform(result) if self._transform else result
|
|
73
|
+
|
|
74
|
+
def set_transform(self, transform: Callable[[npt.NDArray], npt.NDArray]):
|
|
75
|
+
self._transform = transform
|