xarray-ms 0.3.5__tar.gz → 0.3.7__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {xarray_ms-0.3.5 → xarray_ms-0.3.7}/PKG-INFO +4 -3
- xarray_ms-0.3.7/doc/source/api.rst +31 -0
- {xarray_ms-0.3.5 → xarray_ms-0.3.7}/doc/source/changelog.rst +16 -0
- {xarray_ms-0.3.5 → xarray_ms-0.3.7}/doc/source/conf.py +2 -1
- {xarray_ms-0.3.5 → xarray_ms-0.3.7}/doc/source/index.rst +2 -0
- {xarray_ms-0.3.5 → xarray_ms-0.3.7}/doc/source/introduction.rst +2 -13
- xarray_ms-0.3.7/doc/source/partitioning.rst +184 -0
- xarray_ms-0.3.7/doc/source/roadmap.rst +69 -0
- {xarray_ms-0.3.5 → xarray_ms-0.3.7}/doc/source/tutorial.rst +4 -4
- {xarray_ms-0.3.5 → xarray_ms-0.3.7}/pyproject.toml +5 -4
- {xarray_ms-0.3.5 → xarray_ms-0.3.7}/tests/test_zarr_roundtrip.py +8 -5
- {xarray_ms-0.3.5 → xarray_ms-0.3.7}/xarray_ms/backend/msv2/entrypoint.py +3 -0
- {xarray_ms-0.3.5 → xarray_ms-0.3.7}/xarray_ms/backend/msv2/factories/correlated.py +8 -3
- {xarray_ms-0.3.5 → xarray_ms-0.3.7}/xarray_ms/errors.py +3 -3
- {xarray_ms-0.3.5 → xarray_ms-0.3.7}/xarray_ms/testing/simulator.py +2 -0
- xarray_ms-0.3.5/doc/source/api.rst +0 -54
- {xarray_ms-0.3.5 → xarray_ms-0.3.7}/.github/ISSUE_TEMPLATE.md +0 -0
- {xarray_ms-0.3.5 → xarray_ms-0.3.7}/.github/PULL_REQUEST_TEMPLATE.md +0 -0
- {xarray_ms-0.3.5 → xarray_ms-0.3.7}/.github/dependabot.yml +0 -0
- {xarray_ms-0.3.5 → xarray_ms-0.3.7}/.github/workflows/ci.yml +0 -0
- {xarray_ms-0.3.5 → xarray_ms-0.3.7}/.github/workflows/pre-commit.yml +0 -0
- {xarray_ms-0.3.5 → xarray_ms-0.3.7}/.github/workflows/readthedocs.yml +0 -0
- {xarray_ms-0.3.5 → xarray_ms-0.3.7}/.gitignore +0 -0
- {xarray_ms-0.3.5 → xarray_ms-0.3.7}/.pre-commit-config.yaml +0 -0
- {xarray_ms-0.3.5 → xarray_ms-0.3.7}/.readthedocs.yaml +0 -0
- {xarray_ms-0.3.5 → xarray_ms-0.3.7}/LICENSE +0 -0
- {xarray_ms-0.3.5 → xarray_ms-0.3.7}/README.rst +0 -0
- {xarray_ms-0.3.5 → xarray_ms-0.3.7}/doc/Makefile +0 -0
- {xarray_ms-0.3.5 → xarray_ms-0.3.7}/doc/make.bat +0 -0
- {xarray_ms-0.3.5 → xarray_ms-0.3.7}/doc/source/install.rst +0 -0
- {xarray_ms-0.3.5 → xarray_ms-0.3.7}/doc/source/links.rst +0 -0
- {xarray_ms-0.3.5 → xarray_ms-0.3.7}/hello.txt +0 -0
- {xarray_ms-0.3.5 → xarray_ms-0.3.7}/tests/__init__.py +0 -0
- {xarray_ms-0.3.5 → xarray_ms-0.3.7}/tests/conftest.py +0 -0
- {xarray_ms-0.3.5 → xarray_ms-0.3.7}/tests/msv4_test_corpus/__init__.py +0 -0
- {xarray_ms-0.3.5 → xarray_ms-0.3.7}/tests/msv4_test_corpus/conftest.py +0 -0
- {xarray_ms-0.3.5 → xarray_ms-0.3.7}/tests/msv4_test_corpus/test_msv_corpus.py +0 -0
- {xarray_ms-0.3.5 → xarray_ms-0.3.7}/tests/test_antenna.py +0 -0
- {xarray_ms-0.3.5 → xarray_ms-0.3.7}/tests/test_backend.py +0 -0
- {xarray_ms-0.3.5 → xarray_ms-0.3.7}/tests/test_basic.py +0 -0
- {xarray_ms-0.3.5 → xarray_ms-0.3.7}/tests/test_encoding.py +0 -0
- {xarray_ms-0.3.5 → xarray_ms-0.3.7}/tests/test_field_and_source.py +0 -0
- {xarray_ms-0.3.5 → xarray_ms-0.3.7}/tests/test_github.py +0 -0
- {xarray_ms-0.3.5 → xarray_ms-0.3.7}/tests/test_imputation.py +0 -0
- {xarray_ms-0.3.5 → xarray_ms-0.3.7}/tests/test_multiton.py +0 -0
- {xarray_ms-0.3.5 → xarray_ms-0.3.7}/tests/test_read.py +0 -0
- {xarray_ms-0.3.5 → xarray_ms-0.3.7}/tests/test_structure.py +0 -0
- {xarray_ms-0.3.5 → xarray_ms-0.3.7}/tests/test_utils.py +0 -0
- {xarray_ms-0.3.5 → xarray_ms-0.3.7}/xarray_ms/__init__.py +0 -0
- {xarray_ms-0.3.5 → xarray_ms-0.3.7}/xarray_ms/backend/msv2/array.py +0 -0
- {xarray_ms-0.3.5 → xarray_ms-0.3.7}/xarray_ms/backend/msv2/encoders.py +0 -0
- {xarray_ms-0.3.5 → xarray_ms-0.3.7}/xarray_ms/backend/msv2/entrypoint_utils.py +0 -0
- {xarray_ms-0.3.5 → xarray_ms-0.3.7}/xarray_ms/backend/msv2/factories/__init__.py +0 -0
- {xarray_ms-0.3.5 → xarray_ms-0.3.7}/xarray_ms/backend/msv2/factories/antenna.py +0 -0
- {xarray_ms-0.3.5 → xarray_ms-0.3.7}/xarray_ms/backend/msv2/factories/core.py +0 -0
- {xarray_ms-0.3.5 → xarray_ms-0.3.7}/xarray_ms/backend/msv2/factories/field_and_source.py +0 -0
- {xarray_ms-0.3.5 → xarray_ms-0.3.7}/xarray_ms/backend/msv2/imputation.py +0 -0
- {xarray_ms-0.3.5 → xarray_ms-0.3.7}/xarray_ms/backend/msv2/partition.py +0 -0
- {xarray_ms-0.3.5 → xarray_ms-0.3.7}/xarray_ms/backend/msv2/structure.py +0 -0
- {xarray_ms-0.3.5 → xarray_ms-0.3.7}/xarray_ms/backend/msv2/table_utils.py +0 -0
- {xarray_ms-0.3.5 → xarray_ms-0.3.7}/xarray_ms/casa_types.py +0 -0
- {xarray_ms-0.3.5 → xarray_ms-0.3.7}/xarray_ms/msv4_types.py +0 -0
- {xarray_ms-0.3.5 → xarray_ms-0.3.7}/xarray_ms/multiton.py +0 -0
- {xarray_ms-0.3.5 → xarray_ms-0.3.7}/xarray_ms/query.py +0 -0
- {xarray_ms-0.3.5 → xarray_ms-0.3.7}/xarray_ms/testing/__init__.py +0 -0
- {xarray_ms-0.3.5 → xarray_ms-0.3.7}/xarray_ms/testing/utils.py +0 -0
- {xarray_ms-0.3.5 → xarray_ms-0.3.7}/xarray_ms/utils.py +0 -0
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: xarray-ms
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.7
|
|
4
4
|
Summary: xarray MSv4 views over MSv2 Measurement Sets
|
|
5
5
|
Author-email: Simon Perkins <simon.perkins@gmail.com>
|
|
6
6
|
License-File: LICENSE
|
|
7
7
|
Requires-Python: >=3.10
|
|
8
|
-
Requires-Dist: arcae
|
|
8
|
+
Requires-Dist: arcae<0.4.0,>=0.3.2
|
|
9
9
|
Requires-Dist: cacheout>=0.16.0
|
|
10
10
|
Requires-Dist: typing-extensions>=4.12.2
|
|
11
|
-
Requires-Dist: xarray
|
|
11
|
+
Requires-Dist: xarray<2025.9.1,>=2025.0
|
|
12
12
|
Provides-Extra: dev
|
|
13
13
|
Requires-Dist: pre-commit>=3.8.0; extra == 'dev'
|
|
14
14
|
Requires-Dist: tbump>=6.11.0; extra == 'dev'
|
|
@@ -18,6 +18,7 @@ Requires-Dist: pydata-sphinx-theme>=0.15.4; extra == 'doc'
|
|
|
18
18
|
Requires-Dist: pygments>=2.18.0; extra == 'doc'
|
|
19
19
|
Requires-Dist: sphinx-copybutton>=0.5.2; extra == 'doc'
|
|
20
20
|
Requires-Dist: sphinx>=8.0.2; extra == 'doc'
|
|
21
|
+
Requires-Dist: sphinxcontrib-spelling; extra == 'doc'
|
|
21
22
|
Provides-Extra: testing
|
|
22
23
|
Requires-Dist: dask>=2024.5.0; extra == 'testing'
|
|
23
24
|
Requires-Dist: distributed>=2024.5.0; extra == 'testing'
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
API
|
|
2
|
+
===
|
|
3
|
+
|
|
4
|
+
Opening Measurement Sets
|
|
5
|
+
------------------------
|
|
6
|
+
|
|
7
|
+
The standard :func:`xarray.open_datatree` method should
|
|
8
|
+
be used to open a :class:`~xarray.DataTree` interface
|
|
9
|
+
to the underlying Measurement Set data.
|
|
10
|
+
|
|
11
|
+
.. code-block:: python
|
|
12
|
+
|
|
13
|
+
>>> datatree = xarray.open_datatree("/data/data.ms", partition_schema=["FIELD_ID"])
|
|
14
|
+
|
|
15
|
+
These methods defer to the relevant methods on the
|
|
16
|
+
`Entrypoint Class <entrypoint-class_>`_.
|
|
17
|
+
Consult the method signatures for information on extra
|
|
18
|
+
arguments that can be passed.
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
.. _entrypoint-class:
|
|
22
|
+
|
|
23
|
+
Entrypoint Class
|
|
24
|
+
----------------
|
|
25
|
+
|
|
26
|
+
Entrypoint class for the MSv2 backend.
|
|
27
|
+
|
|
28
|
+
.. autoclass:: xarray_ms.backend.msv2.entrypoint.MSv2EntryPoint
|
|
29
|
+
:members: open_datatree, open_dataset
|
|
30
|
+
|
|
31
|
+
.. _partitioning-schema:
|
|
@@ -3,6 +3,22 @@
|
|
|
3
3
|
Changelog
|
|
4
4
|
=========
|
|
5
5
|
|
|
6
|
+
0.3.7 (03-10-2025)
|
|
7
|
+
------------------
|
|
8
|
+
* Documentation updates (:pr:`134`)
|
|
9
|
+
* Temporarily restrict xarray to \< 2025.9.1 (:pr:`136`) until
|
|
10
|
+
`xarray#10808 <https://github.com/pydata/xarray/issues/10808_>`_
|
|
11
|
+
is resolved.
|
|
12
|
+
* Restrict arcae to \< 0.4.0 to prevent
|
|
13
|
+
API-breaking write support changes (:pr:`136`)
|
|
14
|
+
* Provide a physically realistic SPECTRAL_WINDOW::REF_FREQUENCY in simulated data (:pr:`133`)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
0.3.6 (22-09-2025)
|
|
18
|
+
------------------
|
|
19
|
+
* Document Partitioning Strategies and Irregular Grid Handling (:pr:`132`)
|
|
20
|
+
* Document MSv4 compliance and roadmap (:pr:`131`)
|
|
21
|
+
|
|
6
22
|
0.3.5 (17-09-2025)
|
|
7
23
|
------------------
|
|
8
24
|
* Remove deploy to test-pypi (:pr:`130`)
|
|
@@ -11,12 +11,13 @@
|
|
|
11
11
|
project = "xarray-ms"
|
|
12
12
|
copyright = "2024 - 2025 NRF (SARAO) and Rhodes University (RATT) Centre"
|
|
13
13
|
author = "Simon Perkins"
|
|
14
|
-
release = "0.3.
|
|
14
|
+
release = "0.3.7"
|
|
15
15
|
|
|
16
16
|
# -- General configuration ---------------------------------------------------
|
|
17
17
|
# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
|
|
18
18
|
|
|
19
19
|
extensions = [
|
|
20
|
+
"sphinxcontrib.spelling",
|
|
20
21
|
"sphinx.ext.autodoc",
|
|
21
22
|
"sphinx.ext.autosummary",
|
|
22
23
|
"sphinx.ext.extlinks",
|
|
@@ -72,16 +72,5 @@ Why xarray-ms?
|
|
|
72
72
|
* xarray-ms uses arcae_, a high-performance backend to CASA Tables implementing
|
|
73
73
|
a subset of python-casacore_'s interface.
|
|
74
74
|
* Some limited support for irregular MSv2 data via padding.
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
----------------
|
|
78
|
-
|
|
79
|
-
The Measurement Set v4 specification is currently under active development.
|
|
80
|
-
xarray-ms is also currently under active development and does not yet
|
|
81
|
-
have feature parity with MSv4 or xradio_.
|
|
82
|
-
Some measures information and many secondary datasets are not currently implemented.
|
|
83
|
-
|
|
84
|
-
However, the most important parts of the MSv2 ``MAIN`` tables,
|
|
85
|
-
as well as the ``ANTENNA``, ``POLARIZATON`` and ``SPECTRAL_WINDOW``
|
|
86
|
-
sub-tables are implemented and should be sufficient
|
|
87
|
-
for basic algorithm development.
|
|
75
|
+
* Refer to the :ref:`MSv4 compliance and roadmap <compliance-and-roadmap>`
|
|
76
|
+
section for information on adherence to the specification.
|
|
@@ -0,0 +1,184 @@
|
|
|
1
|
+
.. _partitioning-guide:
|
|
2
|
+
|
|
3
|
+
Partitioning Guide
|
|
4
|
+
==================
|
|
5
|
+
|
|
6
|
+
`Measurement Set v4.0 <msv4-spec_>`_ specifies a series of datasets with
|
|
7
|
+
``time``, ``baseline_id`` and ``frequency`` coordinates where
|
|
8
|
+
``time`` and ``frequency`` have associated ``integration_time`` and
|
|
9
|
+
``channel_width`` attributes.
|
|
10
|
+
In the best case, this represents monotonic, equidistant values along
|
|
11
|
+
``time`` and ``frequency`` and the standard quadratic relation between
|
|
12
|
+
antennas in the case of ``baseline_id``.
|
|
13
|
+
Observational data recorded directly off an interferometer and stored
|
|
14
|
+
for archival purposes will commonly follow a
|
|
15
|
+
``(time, baseline_id, frequency)`` ordering.
|
|
16
|
+
|
|
17
|
+
The usefulness of this representation and ordering is that it is
|
|
18
|
+
simple and easy for software to reason about.
|
|
19
|
+
This is desirable as it simplifies our software.
|
|
20
|
+
|
|
21
|
+
The challenge in converting from MSv2 to MSv4 is formulating a
|
|
22
|
+
partitioning strategy to handle irregularity in an MSv2 dataset.
|
|
23
|
+
|
|
24
|
+
Measurement Set v2.0 irregularity
|
|
25
|
+
---------------------------------
|
|
26
|
+
|
|
27
|
+
By contrast the `Measurement Set v2.0 <msv2-spec_>`_ is a tabular format that
|
|
28
|
+
does not enforce any notion of regularity (although much software assumes it).
|
|
29
|
+
The ``TIME`` and ``INTERVAL`` columns in the MAIN MSv2 table
|
|
30
|
+
describe the midpoint in time at which a sample was measured
|
|
31
|
+
and the amount of time (integration time) taken to measure the sample,
|
|
32
|
+
while the ``ANTENNA1`` and ``ANTENNA2`` columns define the baseline along
|
|
33
|
+
which the sample was measured.
|
|
34
|
+
``TIME``, ``ANTENNA1`` and ``ANTENNA2`` are *keys* in the tabular MAIN table
|
|
35
|
+
and there is no requirement that the measurements they index are ordered,
|
|
36
|
+
or even form a regular ``(time, baseline_id)`` grid.
|
|
37
|
+
Additionally, the ``DATA_DESC_ID`` column establishes a relation to the
|
|
38
|
+
``SPECTRAL_WINDOW::CHAN_FREQ`` and ``SPECTRAL_WINDOW::CHAN_WIDTH`` columns
|
|
39
|
+
representing the frequency centroid and bandwidth of the sample, respectively.
|
|
40
|
+
|
|
41
|
+
The challenge that MSv2 poses to radio astronomy software in the worst case
|
|
42
|
+
is that it can represent overlapped or disjoint measurements in time and frequency
|
|
43
|
+
for one or more baselines.
|
|
44
|
+
However, most observational data is well-behaved:
|
|
45
|
+
Measurements are commonly ordered by ``TIME, ANTENNA1, ANTENNA2``
|
|
46
|
+
and ``CHAN_FREQ`` commonly increases monotically with
|
|
47
|
+
equidistant values (i.e. ``CHAN_WIDTH`` values are uniform) but this cannot
|
|
48
|
+
always be assumed.
|
|
49
|
+
Any regularity in an MSv2 MS is achieved through convention rather
|
|
50
|
+
than enforcement.
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
Choosing a partitioning strategy
|
|
54
|
+
--------------------------------
|
|
55
|
+
|
|
56
|
+
By default, MSv2 measurements are partitioned by ``DATA_DESC_ID``,
|
|
57
|
+
``OBSERVATION_ID``, ``PROCESSOR_ID`` and the
|
|
58
|
+
``STATE::OBS_MODE`` (via ``STATE_ID``) columns.
|
|
59
|
+
|
|
60
|
+
.. autodata:: xarray_ms.backend.msv2.structure.DEFAULT_PARTITION_COLUMNS
|
|
61
|
+
|
|
62
|
+
For example, it follows from the previous section that,
|
|
63
|
+
in order to achieve regularity in frequency, *partition*
|
|
64
|
+
MSv2 measurements by the ``DATA_DESC_ID`` column.
|
|
65
|
+
|
|
66
|
+
Partitioning always uses these columns, but additional columns can be
|
|
67
|
+
selected if finer grained partitioning is required:
|
|
68
|
+
|
|
69
|
+
.. autodata:: xarray_ms.backend.msv2.structure.VALID_PARTITION_COLUMNS
|
|
70
|
+
|
|
71
|
+
Note that ``OBS_MODE`` and ``SUB_SCAN_NUMBER`` are columns in the ``STATE``
|
|
72
|
+
subtable, while ``SOURCE_ID`` is a column of the ``FIELD`` subtable.
|
|
73
|
+
Partitioning on these columns is achieved by joining on the ``STATE_ID``
|
|
74
|
+
and ``FIELD_ID`` columns, respectively.
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
Within these partitions, measurements are sorted by
|
|
78
|
+
``TIME``, ``ANTENNA1`` and ``ANTENNA2``
|
|
79
|
+
to form a grid.
|
|
80
|
+
|
|
81
|
+
.. _time-partitioning:
|
|
82
|
+
|
|
83
|
+
Partitioning in time
|
|
84
|
+
++++++++++++++++++++
|
|
85
|
+
|
|
86
|
+
Compared to frequency, achieving regularity in time requires more thought
|
|
87
|
+
as it depends on identifying partitions of MSv2 where data:
|
|
88
|
+
|
|
89
|
+
1. contains monotically increasing ``TIME`` (after ordering).
|
|
90
|
+
2. is dumped with a uniform ``INTERVAL``.
|
|
91
|
+
3. ideally contains no gaps: i.e. ``(TIME - INTERVAL)[1:] == (TIME + INTERVAL)[:-1]``.
|
|
92
|
+
|
|
93
|
+
For example, ``OBS_MODE`` specifying ``STATE::OBS_MODE`` via ``STATE_ID``
|
|
94
|
+
is a good default partitioner, as it represents a shift in the
|
|
95
|
+
interferometer's mode of operation: It identifies when
|
|
96
|
+
the interferometer is e.g. slewing/observing a calibrator/observing a target.
|
|
97
|
+
|
|
98
|
+
Other valid partitioning columns are:
|
|
99
|
+
|
|
100
|
+
- ``FIELD_ID``: Observing a field for a period of time.
|
|
101
|
+
- ``SOURCE_ID``: Observing a source within a field for a period of time.
|
|
102
|
+
- ``SCAN_NUMBER``: A coarse, logical number (i.e. scan) associated with the data.
|
|
103
|
+
- ``SUB_SCAN_NUMBER``: A finer, logical number (i.e. scan) associated with the data.
|
|
104
|
+
This specifies ``STATE::SUB_SCAN_NUMBER`` (via ``STATE_ID``).
|
|
105
|
+
- ``STATE_ID``: The state of an interferometer.
|
|
106
|
+
|
|
107
|
+
as these columns frequently identify measurement groupings where
|
|
108
|
+
the interferometer is consistently dumping.
|
|
109
|
+
|
|
110
|
+
.. code-block:: python
|
|
111
|
+
|
|
112
|
+
import xarray_ms
|
|
113
|
+
import xarray
|
|
114
|
+
|
|
115
|
+
# Also partition by SCAN_NUMBER and FIELD_ID
|
|
116
|
+
dt = xarray.open_datatree(ms, partition_schema=["SCAN_NUMBER", "FIELD_ID"])
|
|
117
|
+
|
|
118
|
+
.. _missing-baselines:
|
|
119
|
+
|
|
120
|
+
Missing Baselines
|
|
121
|
+
-----------------
|
|
122
|
+
|
|
123
|
+
Baselines can be missing for distinct ``TIME`` values.
|
|
124
|
+
This can occur when Measurement Sets are passed through the
|
|
125
|
+
CASA ``split`` task with ``keepflags=False`` set, for instance.
|
|
126
|
+
|
|
127
|
+
Having all baselines present can be useful
|
|
128
|
+
for simplifying calibration algorithms and cases where
|
|
129
|
+
auto-correlations are requested, but none are present in the data.
|
|
130
|
+
|
|
131
|
+
``xarray-ms`` will impute these missing data points with default values
|
|
132
|
+
(``nan`` in the case of data, ``1`` in the case of flags).
|
|
133
|
+
|
|
134
|
+
Irregular Grid Warnings
|
|
135
|
+
-----------------------
|
|
136
|
+
|
|
137
|
+
Given the specified partitioning schema, ``xarray-ms`` will partition
|
|
138
|
+
the MSv2 by the supplied columns and attempt to establish a regular
|
|
139
|
+
``(time, baseline_id, frequency)`` grid.
|
|
140
|
+
If this is not possible, three classes of warning can be issued,
|
|
141
|
+
related to each of the three dimensions.
|
|
142
|
+
|
|
143
|
+
:class:`~xarray_ms.errors.IrregularTimeGridWarning`
|
|
144
|
+
+++++++++++++++++++++++++++++++++++++++++++++++++++
|
|
145
|
+
|
|
146
|
+
This warning is raised when it is impossible
|
|
147
|
+
to identify a unique ``INTERVAL`` value for a partition.
|
|
148
|
+
This is required to assign a single ``integration_time``
|
|
149
|
+
attribute to the ``time`` coordinate.
|
|
150
|
+
|
|
151
|
+
The above check is relaxed slightly by excluding the last time
|
|
152
|
+
in the partition (to handle averaged data) and by allowing
|
|
153
|
+
a degree of jitter in the ``INTERVAL`` column.
|
|
154
|
+
|
|
155
|
+
Generally, this happens if the requested partitioning schema
|
|
156
|
+
does not satisfy the criteria described in :ref:`time-partitioning`.
|
|
157
|
+
The solution is to experiment with other partitioning columns.
|
|
158
|
+
|
|
159
|
+
Should the user wish to continue with this case,
|
|
160
|
+
``xarray-ms`` sets ``integration_time=nan``
|
|
161
|
+
and adds ``(time, baseline_id)``-shaped,
|
|
162
|
+
``TIME`` and ``INTEGRATION_TIME`` columns.
|
|
163
|
+
Downstream applications should account for this.
|
|
164
|
+
|
|
165
|
+
:class:`~xarray_ms.errors.IrregularChannelGridWarning`
|
|
166
|
+
++++++++++++++++++++++++++++++++++++++++++++++++++++++
|
|
167
|
+
|
|
168
|
+
This warning is raised when it is impossible to identify a unique
|
|
169
|
+
``CHAN_WIDTH`` value for the partition.
|
|
170
|
+
This is required to assign a single ``channel_width``
|
|
171
|
+
attribute to the ``frequency`` coordinate.
|
|
172
|
+
|
|
173
|
+
Should the user wish to continue with this
|
|
174
|
+
case ``xarray-ms`` sets ``channel_width=nan``
|
|
175
|
+
and adds ``(frequency,)``-shaped ``CHANNEL_WIDTH`` columns.
|
|
176
|
+
Downstream application should account for this.
|
|
177
|
+
|
|
178
|
+
:class:`~xarray_ms.errors.IrregularBaselineGridWarning`
|
|
179
|
+
+++++++++++++++++++++++++++++++++++++++++++++++++++++++
|
|
180
|
+
|
|
181
|
+
This warning is raised when baselines were missing for a
|
|
182
|
+
particular timestep.
|
|
183
|
+
This is a relatively benign warning as ``xarray-ms`` will
|
|
184
|
+
impute missing values (See :ref:`missing-baselines`).
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
.. _compliance-and-roadmap:
|
|
2
|
+
|
|
3
|
+
Measurement Set v4 Compliance
|
|
4
|
+
=============================
|
|
5
|
+
|
|
6
|
+
xarray-ms fully implements the loading of correlated data from
|
|
7
|
+
MSv2 datasets into the Measurement Set v4.0 specification.
|
|
8
|
+
This covers the MSv2 ``MAIN`` table, as well as the
|
|
9
|
+
``DATA_DESCRIPTION``, ``SPECTRAL_WINDOW``, ``POLARIZATION``,
|
|
10
|
+
``FEED``, ``OBSERVATION``, ``STATE`` and ``PROCESSOR`` subtables
|
|
11
|
+
whose synthesis is presented in correlated data datasets
|
|
12
|
+
within an xarray DataTree.
|
|
13
|
+
|
|
14
|
+
Care has been taken to convert measures information from MSv2 into
|
|
15
|
+
MSv4 metadata attributes, where appropriate.
|
|
16
|
+
|
|
17
|
+
In particular, it loads the MSv2 dataset present in the
|
|
18
|
+
`Measurement Set v4 test suite <msv4-test-suite_>`_ except for:
|
|
19
|
+
|
|
20
|
+
- ALMA Measurement Sets which sometimes do not correctly link
|
|
21
|
+
the ANTENNNA and MAIN table via the FEED table.
|
|
22
|
+
This will need to be addressed heuristically.
|
|
23
|
+
- Single-dish Measurement Sets.
|
|
24
|
+
This is not difficult as it involves loading in
|
|
25
|
+
``MAIN::FLOAT_DATA`` into the ``SPECTRUM`` variable and
|
|
26
|
+
renaming ``FIELD_PHASE_CENTER_DIRECTION`` to
|
|
27
|
+
``FIELD_REFERENCE_CENTER_DIRECTION`` in the
|
|
28
|
+
``field_and_source_xds`` dataset.
|
|
29
|
+
|
|
30
|
+
MSv4 specifies a set of optional datasets, of which the following are implemented:
|
|
31
|
+
|
|
32
|
+
- antenna_xds
|
|
33
|
+
- field_and_source_xds (required components)
|
|
34
|
+
|
|
35
|
+
The following optional datasets are not yet implemented:
|
|
36
|
+
|
|
37
|
+
- field_and_source_ephemeris_xds
|
|
38
|
+
- pointing_xds
|
|
39
|
+
- system_calibration_xds
|
|
40
|
+
- gain_curve_xds
|
|
41
|
+
- phase_calibration_xds
|
|
42
|
+
- weather_xds
|
|
43
|
+
- phased_array_xds
|
|
44
|
+
|
|
45
|
+
Roadmap
|
|
46
|
+
-------
|
|
47
|
+
|
|
48
|
+
The existing coverage of the specification arguably represents a Pareto distribution of the required data for writing Radio Astronomy software in an MSv4 paradigm, but we aim to address the remaining cases as follows in order of priority:
|
|
49
|
+
|
|
50
|
+
- phased_array_xds
|
|
51
|
+
- pointing_xds
|
|
52
|
+
|
|
53
|
+
as this will more fully support LOFAR and SKA-LOW. The following datasets are probably required for VLBI:
|
|
54
|
+
|
|
55
|
+
- system_calibration_xds
|
|
56
|
+
- weather_xds
|
|
57
|
+
|
|
58
|
+
while the following are arguably required for calibration and other software that will need to be developed
|
|
59
|
+
for the MSv4 paradigm:
|
|
60
|
+
|
|
61
|
+
- gain_curve_xds
|
|
62
|
+
- phase_calibration_xds
|
|
63
|
+
- field_and_source_ephemeris_xds
|
|
64
|
+
- single dish systems
|
|
65
|
+
|
|
66
|
+
This is a rough strategy and doesn't need to be set in stone.
|
|
67
|
+
Please reach out or contribute PR's if you have specific requirements.
|
|
68
|
+
|
|
69
|
+
.. _msv4-test-suite: https://github.com/ratt-ru/xarray-ms/blob/main/tests/msv4_test_corpus/test_msv_corpus.py
|
|
@@ -3,7 +3,7 @@ Tutorial
|
|
|
3
3
|
|
|
4
4
|
The `Measurement Set v2.0 <msv2-spec_>`_ is a tabular format that
|
|
5
5
|
includes notions of regularity or, the shape of the data, in the MAIN table.
|
|
6
|
-
This is
|
|
6
|
+
This is achieved through the ``DATA_DESC_ID`` column which defines the
|
|
7
7
|
Spectral Window and Polarisation Configuration associated with each row:
|
|
8
8
|
the shape of the visibility in each row of the ``DATA`` column can
|
|
9
9
|
vary per-row.
|
|
@@ -16,13 +16,13 @@ or group MSv2 rows by the same shape and configuration.
|
|
|
16
16
|
In xarray-ms, this is accomplished by specifying a ``partition_schema``
|
|
17
17
|
when opening a Measurement Set.
|
|
18
18
|
Different columns may be used to define the partition.
|
|
19
|
-
See :ref:`partitioning-
|
|
19
|
+
See :ref:`partitioning-guide` for more information.
|
|
20
20
|
|
|
21
21
|
Opening a Measurement Set
|
|
22
22
|
-------------------------
|
|
23
23
|
|
|
24
24
|
As xarray-ms implements an `xarray backend <xarray_backend_>`_,
|
|
25
|
-
it is possible to use the :func:`xarray.
|
|
25
|
+
it is possible to use the :func:`xarray.open_datatree` function
|
|
26
26
|
to open multiple partitions of a Measurement Set.
|
|
27
27
|
|
|
28
28
|
.. ipython:: python
|
|
@@ -50,7 +50,7 @@ to open multiple partitions of a Measurement Set.
|
|
|
50
50
|
Selecting a subset of the data
|
|
51
51
|
++++++++++++++++++++++++++++++
|
|
52
52
|
|
|
53
|
-
By default, :func:`~xarray.
|
|
53
|
+
By default, :func:`~xarray.open_datatree` will return a datatree
|
|
54
54
|
with a lazy view over the data.
|
|
55
55
|
xarray has extensive functionality for
|
|
56
56
|
`indexing and selecting data <xarray_indexing_and_selecting_>`_.
|
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "xarray-ms"
|
|
3
|
-
version = "0.3.
|
|
3
|
+
version = "0.3.7"
|
|
4
4
|
description = "xarray MSv4 views over MSv2 Measurement Sets"
|
|
5
5
|
authors = [{name = "Simon Perkins", email = "simon.perkins@gmail.com"}]
|
|
6
6
|
readme = "README.rst"
|
|
7
7
|
requires-python = ">=3.10"
|
|
8
8
|
dependencies = [
|
|
9
|
-
"xarray>=2025.0",
|
|
9
|
+
"xarray>=2025.0, < 2025.9.1",
|
|
10
10
|
"cacheout>=0.16.0",
|
|
11
|
-
"arcae>=0.3.2",
|
|
11
|
+
"arcae>=0.3.2, < 0.4.0",
|
|
12
12
|
"typing-extensions>=4.12.2",
|
|
13
13
|
]
|
|
14
14
|
|
|
@@ -26,6 +26,7 @@ dev = [
|
|
|
26
26
|
]
|
|
27
27
|
doc = [
|
|
28
28
|
"sphinx>=8.0.2",
|
|
29
|
+
"sphinxcontrib-spelling",
|
|
29
30
|
"pygments>=2.18.0",
|
|
30
31
|
"sphinx-copybutton>=0.5.2",
|
|
31
32
|
"pydata-sphinx-theme>=0.15.4",
|
|
@@ -53,7 +54,7 @@ extend-select = ["I"]
|
|
|
53
54
|
# github_url = "https://github.com/<user or organization>/<project>/"
|
|
54
55
|
|
|
55
56
|
[tool.tbump.version]
|
|
56
|
-
current = "0.3.
|
|
57
|
+
current = "0.3.7"
|
|
57
58
|
|
|
58
59
|
# Example of a semver regexp.
|
|
59
60
|
# Make sure this matches current_version before
|
|
@@ -1,18 +1,21 @@
|
|
|
1
|
+
import xarray
|
|
1
2
|
import xarray.testing as xt
|
|
2
|
-
from xarray.backends.api import open_dataset, open_datatree
|
|
3
3
|
|
|
4
4
|
|
|
5
5
|
def test_dataset_roundtrip(simmed_ms, tmp_path):
|
|
6
|
-
ds = open_dataset(simmed_ms)
|
|
6
|
+
ds = xarray.open_dataset(simmed_ms)
|
|
7
7
|
zarr_path = tmp_path / "test_dataset.zarr"
|
|
8
8
|
ds.to_zarr(zarr_path, compute=True, consolidated=True)
|
|
9
|
-
ds2 = open_dataset(zarr_path)
|
|
9
|
+
ds2 = xarray.open_dataset(zarr_path)
|
|
10
10
|
xt.assert_identical(ds, ds2)
|
|
11
11
|
|
|
12
12
|
|
|
13
13
|
def test_datatree_roundtrip(simmed_ms, tmp_path):
|
|
14
|
-
dt = open_datatree(simmed_ms)
|
|
14
|
+
dt = xarray.open_datatree(simmed_ms)
|
|
15
15
|
zarr_path = tmp_path / "test_datatree.zarr"
|
|
16
16
|
dt.to_zarr(zarr_path, compute=True, consolidated=True)
|
|
17
|
-
|
|
17
|
+
# TODO Remove forcing of engine once
|
|
18
|
+
# https://github.com/pydata/xarray/issues/10808
|
|
19
|
+
# is resolved
|
|
20
|
+
dt2 = xarray.open_datatree(zarr_path, engine="zarr")
|
|
18
21
|
xt.assert_identical(dt, dt2)
|
|
@@ -225,6 +225,7 @@ class MSv2EntryPoint(BackendEntrypoint):
|
|
|
225
225
|
]
|
|
226
226
|
description = "Opens v2 CASA Measurement Sets in Xarray"
|
|
227
227
|
url = "https://xarray-ms.readthedocs.io/"
|
|
228
|
+
supports_groups = True
|
|
228
229
|
|
|
229
230
|
def guess_can_open(
|
|
230
231
|
self, filename_or_obj: str | os.PathLike[Any] | BufferedIOBase | AbstractDataStore
|
|
@@ -266,6 +267,7 @@ class MSv2EntryPoint(BackendEntrypoint):
|
|
|
266
267
|
drop_variables: Variables to drop from the dataset.
|
|
267
268
|
partition_schema: The columns to use for partitioning the Measurement set.
|
|
268
269
|
Defaults to :code:`{DEFAULT_PARTITION_COLUMNS}`.
|
|
270
|
+
See :ref:`partitioning-guide` for more further information.
|
|
269
271
|
partition_key: A key corresponding to an individual partition.
|
|
270
272
|
For example :code:`(('DATA_DESC_ID', 0), ('FIELD_ID', 0))`.
|
|
271
273
|
If :code:`None`, the first partition will be opened.
|
|
@@ -345,6 +347,7 @@ class MSv2EntryPoint(BackendEntrypoint):
|
|
|
345
347
|
drop_variables: Variables to drop from the dataset.
|
|
346
348
|
partition_schema: The columns to use for partitioning the Measurement set.
|
|
347
349
|
Defaults to :code:`{DEFAULT_PARTITION_COLUMNS}`.
|
|
350
|
+
See :ref:`partitioning-guide` for more further information.
|
|
348
351
|
auto_corrs: Include/Exclude auto-correlations.
|
|
349
352
|
ninstances: The number of Measurement Set instances to open for parallel I/O.
|
|
350
353
|
epoch: A string uniquely identifying this Dataset.
|
|
@@ -77,6 +77,8 @@ MSV4_to_MSV2_COLUMN_SCHEMAS = {
|
|
|
77
77
|
|
|
78
78
|
FIXED_DIMENSION_SIZES = {"uvw_label": 3}
|
|
79
79
|
|
|
80
|
+
PARTITIONING_LINK = "https://xarray-ms.readthedocs.io/en/latest/partitioning.html"
|
|
81
|
+
|
|
80
82
|
|
|
81
83
|
class CorrelatedFactory(DatasetFactory):
|
|
82
84
|
"""Factory class for generating the main correlated dataset
|
|
@@ -309,7 +311,8 @@ class CorrelatedFactory(DatasetFactory):
|
|
|
309
311
|
f"in the case of data variables "
|
|
310
312
|
f"and flags will be set for these cases. "
|
|
311
313
|
f"This situation is benign, especially if auto-corelations "
|
|
312
|
-
f"have been requested on a dataset without them."
|
|
314
|
+
f"have been requested on a dataset without them. "
|
|
315
|
+
f"See {PARTITIONING_LINK}",
|
|
313
316
|
IrregularBaselineGridWarning,
|
|
314
317
|
)
|
|
315
318
|
|
|
@@ -399,7 +402,8 @@ class CorrelatedFactory(DatasetFactory):
|
|
|
399
402
|
f"time.attrs['integration_time'] will be set to 'nan' and "
|
|
400
403
|
f"(time, baseline_id) shaped TIME and INTEGRATION_TIME columns "
|
|
401
404
|
f"will be added. "
|
|
402
|
-
f"{'They contain nans in missing rows.' if missing_rows else ''}"
|
|
405
|
+
f"{'They contain nans in missing rows. ' if missing_rows else ''}"
|
|
406
|
+
f"See {PARTITIONING_LINK}",
|
|
403
407
|
IrregularTimeGridWarning,
|
|
404
408
|
)
|
|
405
409
|
time_attrs["integration_time"]["data"] = np.nan
|
|
@@ -447,7 +451,8 @@ class CorrelatedFactory(DatasetFactory):
|
|
|
447
451
|
f"found in partition {self._partition_key}. "
|
|
448
452
|
f"MSv4 cannot strictly represent this case and so "
|
|
449
453
|
f"frequency.attrs['channel_width'] will be set to 'nan' and "
|
|
450
|
-
f"a (frequency,) shaped CHANNEL_WIDTH column will be added."
|
|
454
|
+
f"a (frequency,) shaped CHANNEL_WIDTH column will be added. "
|
|
455
|
+
f"See {PARTITIONING_LINK}",
|
|
451
456
|
IrregularChannelGridWarning,
|
|
452
457
|
)
|
|
453
458
|
freq_attrs["channel_width"]["data"] = np.nan
|
|
@@ -2,17 +2,17 @@ class IrregularGridWarning(UserWarning):
|
|
|
2
2
|
"""Base Warning for irregular grids"""
|
|
3
3
|
|
|
4
4
|
|
|
5
|
-
class IrregularTimeGridWarning(
|
|
5
|
+
class IrregularTimeGridWarning(IrregularGridWarning):
|
|
6
6
|
"""Warning raised when the intervals associated
|
|
7
7
|
with each timestep are not homogenous"""
|
|
8
8
|
|
|
9
9
|
|
|
10
|
-
class IrregularBaselineGridWarning(
|
|
10
|
+
class IrregularBaselineGridWarning(IrregularGridWarning):
|
|
11
11
|
"""Warning raised when missing baselines are
|
|
12
12
|
present in the Measurement Set"""
|
|
13
13
|
|
|
14
14
|
|
|
15
|
-
class IrregularChannelGridWarning(
|
|
15
|
+
class IrregularChannelGridWarning(IrregularGridWarning):
|
|
16
16
|
"""Warning raised when an irregular channel grid
|
|
17
17
|
is encountered"""
|
|
18
18
|
|
|
@@ -290,11 +290,13 @@ class MSStructureSimulator:
|
|
|
290
290
|
(nchan,) = chan_freq.shape
|
|
291
291
|
index = (np.array([r]),)
|
|
292
292
|
chan_width = np.full(nchan, (chan_freq[-1] - chan_freq[0]) / nchan)
|
|
293
|
+
ref_freq = chan_freq[0] + chan_freq[-1] / 2
|
|
293
294
|
T.putcol("NUM_CHAN", np.array([nchan]), index=index)
|
|
294
295
|
T.putcol("CHAN_FREQ", chan_freq[None, :], index=index)
|
|
295
296
|
T.putcol("CHAN_WIDTH", chan_width[None, :], index=index)
|
|
296
297
|
T.putcol("RESOLUTION", chan_freq[None, :], index=index)
|
|
297
298
|
T.putcol("EFFECTIVE_BW", chan_width[None, :], index=index)
|
|
299
|
+
T.putcol("REF_FREQUENCY", ref_freq[None], index=index)
|
|
298
300
|
|
|
299
301
|
# Partially populate the POLARIZATION table
|
|
300
302
|
with Table.from_filename(f"{output_ms}::POLARIZATION", **kw) as T:
|
|
@@ -1,54 +0,0 @@
|
|
|
1
|
-
API
|
|
2
|
-
===
|
|
3
|
-
|
|
4
|
-
Opening Measurement Sets
|
|
5
|
-
------------------------
|
|
6
|
-
|
|
7
|
-
The standard :func:`xarray.backends.api.open_dataset` and
|
|
8
|
-
:func:`xarray.backends.api.open_datatree` methods should
|
|
9
|
-
be used to open either a :class:`~xarray.Dataset` or a
|
|
10
|
-
:class:`~xarray.DataTree`.
|
|
11
|
-
|
|
12
|
-
.. code-block:: python
|
|
13
|
-
|
|
14
|
-
>>> dataset = xarray.open_dataset(
|
|
15
|
-
"/data/data.ms",
|
|
16
|
-
partition_schema=["DATA_DESC_ID", "FIELD_ID"])
|
|
17
|
-
>>> datatree = xarray.backends.api.open_datatree(
|
|
18
|
-
"/data/data.ms",
|
|
19
|
-
partition_schema=["DATA_DESC_ID", "FIELD_ID"])
|
|
20
|
-
|
|
21
|
-
These methods defer to the relevant methods on the
|
|
22
|
-
`Entrypoint Class <entrypoint-class_>`_.
|
|
23
|
-
Consult the method signatures for information on extra
|
|
24
|
-
arguments that can be passed.
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
.. _entrypoint-class:
|
|
28
|
-
|
|
29
|
-
Entrypoint Class
|
|
30
|
-
----------------
|
|
31
|
-
|
|
32
|
-
Entrypoint class for the MSv2 backend.
|
|
33
|
-
|
|
34
|
-
.. autoclass:: xarray_ms.backend.msv2.entrypoint.MSv2EntryPoint
|
|
35
|
-
:members: open_dataset, open_datatree
|
|
36
|
-
|
|
37
|
-
.. _partitioning-schema:
|
|
38
|
-
|
|
39
|
-
Partioning Schema
|
|
40
|
-
-----------------
|
|
41
|
-
|
|
42
|
-
The default partitioning schema contains the following columns:
|
|
43
|
-
|
|
44
|
-
.. autodata:: xarray_ms.backend.msv2.structure.DEFAULT_PARTITION_COLUMNS
|
|
45
|
-
|
|
46
|
-
Partitioning always uses these columns, but additional columns can be
|
|
47
|
-
selected if finer grained partitioning is required:
|
|
48
|
-
|
|
49
|
-
.. autodata:: xarray_ms.backend.msv2.structure.VALID_PARTITION_COLUMNS
|
|
50
|
-
|
|
51
|
-
Note that ``OBS_MODE`` and ``SUB_SCAN_NUMBER`` are columns in the ``STATE``
|
|
52
|
-
subtable, while ``SOURCE_ID`` is a column of the ``FIELD`` subtable.
|
|
53
|
-
Partitioning on these columns is achieved by joining on the ``STATE_ID``
|
|
54
|
-
and ``FIELD_ID`` columns, respectively.
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|