xarray-ms 0.3.5__tar.gz → 0.3.7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. {xarray_ms-0.3.5 → xarray_ms-0.3.7}/PKG-INFO +4 -3
  2. xarray_ms-0.3.7/doc/source/api.rst +31 -0
  3. {xarray_ms-0.3.5 → xarray_ms-0.3.7}/doc/source/changelog.rst +16 -0
  4. {xarray_ms-0.3.5 → xarray_ms-0.3.7}/doc/source/conf.py +2 -1
  5. {xarray_ms-0.3.5 → xarray_ms-0.3.7}/doc/source/index.rst +2 -0
  6. {xarray_ms-0.3.5 → xarray_ms-0.3.7}/doc/source/introduction.rst +2 -13
  7. xarray_ms-0.3.7/doc/source/partitioning.rst +184 -0
  8. xarray_ms-0.3.7/doc/source/roadmap.rst +69 -0
  9. {xarray_ms-0.3.5 → xarray_ms-0.3.7}/doc/source/tutorial.rst +4 -4
  10. {xarray_ms-0.3.5 → xarray_ms-0.3.7}/pyproject.toml +5 -4
  11. {xarray_ms-0.3.5 → xarray_ms-0.3.7}/tests/test_zarr_roundtrip.py +8 -5
  12. {xarray_ms-0.3.5 → xarray_ms-0.3.7}/xarray_ms/backend/msv2/entrypoint.py +3 -0
  13. {xarray_ms-0.3.5 → xarray_ms-0.3.7}/xarray_ms/backend/msv2/factories/correlated.py +8 -3
  14. {xarray_ms-0.3.5 → xarray_ms-0.3.7}/xarray_ms/errors.py +3 -3
  15. {xarray_ms-0.3.5 → xarray_ms-0.3.7}/xarray_ms/testing/simulator.py +2 -0
  16. xarray_ms-0.3.5/doc/source/api.rst +0 -54
  17. {xarray_ms-0.3.5 → xarray_ms-0.3.7}/.github/ISSUE_TEMPLATE.md +0 -0
  18. {xarray_ms-0.3.5 → xarray_ms-0.3.7}/.github/PULL_REQUEST_TEMPLATE.md +0 -0
  19. {xarray_ms-0.3.5 → xarray_ms-0.3.7}/.github/dependabot.yml +0 -0
  20. {xarray_ms-0.3.5 → xarray_ms-0.3.7}/.github/workflows/ci.yml +0 -0
  21. {xarray_ms-0.3.5 → xarray_ms-0.3.7}/.github/workflows/pre-commit.yml +0 -0
  22. {xarray_ms-0.3.5 → xarray_ms-0.3.7}/.github/workflows/readthedocs.yml +0 -0
  23. {xarray_ms-0.3.5 → xarray_ms-0.3.7}/.gitignore +0 -0
  24. {xarray_ms-0.3.5 → xarray_ms-0.3.7}/.pre-commit-config.yaml +0 -0
  25. {xarray_ms-0.3.5 → xarray_ms-0.3.7}/.readthedocs.yaml +0 -0
  26. {xarray_ms-0.3.5 → xarray_ms-0.3.7}/LICENSE +0 -0
  27. {xarray_ms-0.3.5 → xarray_ms-0.3.7}/README.rst +0 -0
  28. {xarray_ms-0.3.5 → xarray_ms-0.3.7}/doc/Makefile +0 -0
  29. {xarray_ms-0.3.5 → xarray_ms-0.3.7}/doc/make.bat +0 -0
  30. {xarray_ms-0.3.5 → xarray_ms-0.3.7}/doc/source/install.rst +0 -0
  31. {xarray_ms-0.3.5 → xarray_ms-0.3.7}/doc/source/links.rst +0 -0
  32. {xarray_ms-0.3.5 → xarray_ms-0.3.7}/hello.txt +0 -0
  33. {xarray_ms-0.3.5 → xarray_ms-0.3.7}/tests/__init__.py +0 -0
  34. {xarray_ms-0.3.5 → xarray_ms-0.3.7}/tests/conftest.py +0 -0
  35. {xarray_ms-0.3.5 → xarray_ms-0.3.7}/tests/msv4_test_corpus/__init__.py +0 -0
  36. {xarray_ms-0.3.5 → xarray_ms-0.3.7}/tests/msv4_test_corpus/conftest.py +0 -0
  37. {xarray_ms-0.3.5 → xarray_ms-0.3.7}/tests/msv4_test_corpus/test_msv_corpus.py +0 -0
  38. {xarray_ms-0.3.5 → xarray_ms-0.3.7}/tests/test_antenna.py +0 -0
  39. {xarray_ms-0.3.5 → xarray_ms-0.3.7}/tests/test_backend.py +0 -0
  40. {xarray_ms-0.3.5 → xarray_ms-0.3.7}/tests/test_basic.py +0 -0
  41. {xarray_ms-0.3.5 → xarray_ms-0.3.7}/tests/test_encoding.py +0 -0
  42. {xarray_ms-0.3.5 → xarray_ms-0.3.7}/tests/test_field_and_source.py +0 -0
  43. {xarray_ms-0.3.5 → xarray_ms-0.3.7}/tests/test_github.py +0 -0
  44. {xarray_ms-0.3.5 → xarray_ms-0.3.7}/tests/test_imputation.py +0 -0
  45. {xarray_ms-0.3.5 → xarray_ms-0.3.7}/tests/test_multiton.py +0 -0
  46. {xarray_ms-0.3.5 → xarray_ms-0.3.7}/tests/test_read.py +0 -0
  47. {xarray_ms-0.3.5 → xarray_ms-0.3.7}/tests/test_structure.py +0 -0
  48. {xarray_ms-0.3.5 → xarray_ms-0.3.7}/tests/test_utils.py +0 -0
  49. {xarray_ms-0.3.5 → xarray_ms-0.3.7}/xarray_ms/__init__.py +0 -0
  50. {xarray_ms-0.3.5 → xarray_ms-0.3.7}/xarray_ms/backend/msv2/array.py +0 -0
  51. {xarray_ms-0.3.5 → xarray_ms-0.3.7}/xarray_ms/backend/msv2/encoders.py +0 -0
  52. {xarray_ms-0.3.5 → xarray_ms-0.3.7}/xarray_ms/backend/msv2/entrypoint_utils.py +0 -0
  53. {xarray_ms-0.3.5 → xarray_ms-0.3.7}/xarray_ms/backend/msv2/factories/__init__.py +0 -0
  54. {xarray_ms-0.3.5 → xarray_ms-0.3.7}/xarray_ms/backend/msv2/factories/antenna.py +0 -0
  55. {xarray_ms-0.3.5 → xarray_ms-0.3.7}/xarray_ms/backend/msv2/factories/core.py +0 -0
  56. {xarray_ms-0.3.5 → xarray_ms-0.3.7}/xarray_ms/backend/msv2/factories/field_and_source.py +0 -0
  57. {xarray_ms-0.3.5 → xarray_ms-0.3.7}/xarray_ms/backend/msv2/imputation.py +0 -0
  58. {xarray_ms-0.3.5 → xarray_ms-0.3.7}/xarray_ms/backend/msv2/partition.py +0 -0
  59. {xarray_ms-0.3.5 → xarray_ms-0.3.7}/xarray_ms/backend/msv2/structure.py +0 -0
  60. {xarray_ms-0.3.5 → xarray_ms-0.3.7}/xarray_ms/backend/msv2/table_utils.py +0 -0
  61. {xarray_ms-0.3.5 → xarray_ms-0.3.7}/xarray_ms/casa_types.py +0 -0
  62. {xarray_ms-0.3.5 → xarray_ms-0.3.7}/xarray_ms/msv4_types.py +0 -0
  63. {xarray_ms-0.3.5 → xarray_ms-0.3.7}/xarray_ms/multiton.py +0 -0
  64. {xarray_ms-0.3.5 → xarray_ms-0.3.7}/xarray_ms/query.py +0 -0
  65. {xarray_ms-0.3.5 → xarray_ms-0.3.7}/xarray_ms/testing/__init__.py +0 -0
  66. {xarray_ms-0.3.5 → xarray_ms-0.3.7}/xarray_ms/testing/utils.py +0 -0
  67. {xarray_ms-0.3.5 → xarray_ms-0.3.7}/xarray_ms/utils.py +0 -0
@@ -1,14 +1,14 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: xarray-ms
3
- Version: 0.3.5
3
+ Version: 0.3.7
4
4
  Summary: xarray MSv4 views over MSv2 Measurement Sets
5
5
  Author-email: Simon Perkins <simon.perkins@gmail.com>
6
6
  License-File: LICENSE
7
7
  Requires-Python: >=3.10
8
- Requires-Dist: arcae>=0.3.2
8
+ Requires-Dist: arcae<0.4.0,>=0.3.2
9
9
  Requires-Dist: cacheout>=0.16.0
10
10
  Requires-Dist: typing-extensions>=4.12.2
11
- Requires-Dist: xarray>=2025.0
11
+ Requires-Dist: xarray<2025.9.1,>=2025.0
12
12
  Provides-Extra: dev
13
13
  Requires-Dist: pre-commit>=3.8.0; extra == 'dev'
14
14
  Requires-Dist: tbump>=6.11.0; extra == 'dev'
@@ -18,6 +18,7 @@ Requires-Dist: pydata-sphinx-theme>=0.15.4; extra == 'doc'
18
18
  Requires-Dist: pygments>=2.18.0; extra == 'doc'
19
19
  Requires-Dist: sphinx-copybutton>=0.5.2; extra == 'doc'
20
20
  Requires-Dist: sphinx>=8.0.2; extra == 'doc'
21
+ Requires-Dist: sphinxcontrib-spelling; extra == 'doc'
21
22
  Provides-Extra: testing
22
23
  Requires-Dist: dask>=2024.5.0; extra == 'testing'
23
24
  Requires-Dist: distributed>=2024.5.0; extra == 'testing'
@@ -0,0 +1,31 @@
1
+ API
2
+ ===
3
+
4
+ Opening Measurement Sets
5
+ ------------------------
6
+
7
+ The standard :func:`xarray.open_datatree` method should
8
+ be used to open a :class:`~xarray.DataTree` interface
9
+ to the underlying Measurement Set data.
10
+
11
+ .. code-block:: python
12
+
13
+ >>> datatree = xarray.open_datatree("/data/data.ms", partition_schema=["FIELD_ID"])
14
+
15
+ These methods defer to the relevant methods on the
16
+ `Entrypoint Class <entrypoint-class_>`_.
17
+ Consult the method signatures for information on extra
18
+ arguments that can be passed.
19
+
20
+
21
+ .. _entrypoint-class:
22
+
23
+ Entrypoint Class
24
+ ----------------
25
+
26
+ Entrypoint class for the MSv2 backend.
27
+
28
+ .. autoclass:: xarray_ms.backend.msv2.entrypoint.MSv2EntryPoint
29
+ :members: open_datatree, open_dataset
30
+
31
+ .. _partitioning-schema:
@@ -3,6 +3,22 @@
3
3
  Changelog
4
4
  =========
5
5
 
6
+ 0.3.7 (03-10-2025)
7
+ ------------------
8
+ * Documentation updates (:pr:`134`)
9
+ * Temporarily restrict xarray to \< 2025.9.1 (:pr:`136`) until
10
+ `xarray#10808 <https://github.com/pydata/xarray/issues/10808_>`_
11
+ is resolved.
12
+ * Restrict arcae to \< 0.4.0 to prevent
13
+ API-breaking write support changes (:pr:`136`)
14
+ * Provide a physically realistic SPECTRAL_WINDOW::REF_FREQUENCY in simulated data (:pr:`133`)
15
+
16
+
17
+ 0.3.6 (22-09-2025)
18
+ ------------------
19
+ * Document Partitioning Strategies and Irregular Grid Handling (:pr:`132`)
20
+ * Document MSv4 compliance and roadmap (:pr:`131`)
21
+
6
22
  0.3.5 (17-09-2025)
7
23
  ------------------
8
24
  * Remove deploy to test-pypi (:pr:`130`)
@@ -11,12 +11,13 @@
11
11
  project = "xarray-ms"
12
12
  copyright = "2024 - 2025 NRF (SARAO) and Rhodes University (RATT) Centre"
13
13
  author = "Simon Perkins"
14
- release = "0.3.5"
14
+ release = "0.3.7"
15
15
 
16
16
  # -- General configuration ---------------------------------------------------
17
17
  # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
18
18
 
19
19
  extensions = [
20
+ "sphinxcontrib.spelling",
20
21
  "sphinx.ext.autodoc",
21
22
  "sphinx.ext.autosummary",
22
23
  "sphinx.ext.extlinks",
@@ -13,5 +13,7 @@ xarray-ms documentation
13
13
  introduction
14
14
  install
15
15
  tutorial
16
+ partitioning
17
+ roadmap
16
18
  api
17
19
  changelog
@@ -72,16 +72,5 @@ Why xarray-ms?
72
72
  * xarray-ms uses arcae_, a high-performance backend to CASA Tables implementing
73
73
  a subset of python-casacore_'s interface.
74
74
  * Some limited support for irregular MSv2 data via padding.
75
-
76
- Work in Progress
77
- ----------------
78
-
79
- The Measurement Set v4 specification is currently under active development.
80
- xarray-ms is also currently under active development and does not yet
81
- have feature parity with MSv4 or xradio_.
82
- Some measures information and many secondary datasets are not currently implemented.
83
-
84
- However, the most important parts of the MSv2 ``MAIN`` tables,
85
- as well as the ``ANTENNA``, ``POLARIZATON`` and ``SPECTRAL_WINDOW``
86
- sub-tables are implemented and should be sufficient
87
- for basic algorithm development.
75
+ * Refer to the :ref:`MSv4 compliance and roadmap <compliance-and-roadmap>`
76
+ section for information on adherence to the specification.
@@ -0,0 +1,184 @@
1
+ .. _partitioning-guide:
2
+
3
+ Partitioning Guide
4
+ ==================
5
+
6
+ `Measurement Set v4.0 <msv4-spec_>`_ specifies a series of datasets with
7
+ ``time``, ``baseline_id`` and ``frequency`` coordinates where
8
+ ``time`` and ``frequency`` have associated ``integration_time`` and
9
+ ``channel_width`` attributes.
10
+ In the best case, this represents monotonic, equidistant values along
11
+ ``time`` and ``frequency`` and the standard quadratic relation between
12
+ antennas in the case of ``baseline_id``.
13
+ Observational data recorded directly off an interferometer and stored
14
+ for archival purposes will commonly follow a
15
+ ``(time, baseline_id, frequency)`` ordering.
16
+
17
+ The usefulness of this representation and ordering is that it is
18
+ simple and easy for software to reason about.
19
+ This is desirable as it simplifies our software.
20
+
21
+ The challenge in converting from MSv2 to MSv4 is formulating a
22
+ partitioning strategy to handle irregularity in an MSv2 dataset.
23
+
24
+ Measurement Set v2.0 irregularity
25
+ ---------------------------------
26
+
27
+ By contrast the `Measurement Set v2.0 <msv2-spec_>`_ is a tabular format that
28
+ does not enforce any notion of regularity (although much software assumes it).
29
+ The ``TIME`` and ``INTERVAL`` columns in the MAIN MSv2 table
30
+ describe the midpoint in time at which a sample was measured
31
+ and the amount of time (integration time) taken to measure the sample,
32
+ while the ``ANTENNA1`` and ``ANTENNA2`` columns define the baseline along
33
+ which the sample was measured.
34
+ ``TIME``, ``ANTENNA1`` and ``ANTENNA2`` are *keys* in the tabular MAIN table
35
+ and there is no requirement that the measurements they index are ordered,
36
+ or even form a regular ``(time, baseline_id)`` grid.
37
+ Additionally, the ``DATA_DESC_ID`` column establishes a relation to the
38
+ ``SPECTRAL_WINDOW::CHAN_FREQ`` and ``SPECTRAL_WINDOW::CHAN_WIDTH`` columns
39
+ representing the frequency centroid and bandwidth of the sample, respectively.
40
+
41
+ The challenge that MSv2 poses to radio astronomy software in the worst case
42
+ is that it can represent overlapped or disjoint measurements in time and frequency
43
+ for one or more baselines.
44
+ However, most observational data is well-behaved:
45
+ Measurements are commonly ordered by ``TIME, ANTENNA1, ANTENNA2``
46
+ and ``CHAN_FREQ`` commonly increases monotically with
47
+ equidistant values (i.e. ``CHAN_WIDTH`` values are uniform) but this cannot
48
+ always be assumed.
49
+ Any regularity in an MSv2 MS is achieved through convention rather
50
+ than enforcement.
51
+
52
+
53
+ Choosing a partitioning strategy
54
+ --------------------------------
55
+
56
+ By default, MSv2 measurements are partitioned by ``DATA_DESC_ID``,
57
+ ``OBSERVATION_ID``, ``PROCESSOR_ID`` and the
58
+ ``STATE::OBS_MODE`` (via ``STATE_ID``) columns.
59
+
60
+ .. autodata:: xarray_ms.backend.msv2.structure.DEFAULT_PARTITION_COLUMNS
61
+
62
+ For example, it follows from the previous section that,
63
+ in order to achieve regularity in frequency, *partition*
64
+ MSv2 measurements by the ``DATA_DESC_ID`` column.
65
+
66
+ Partitioning always uses these columns, but additional columns can be
67
+ selected if finer grained partitioning is required:
68
+
69
+ .. autodata:: xarray_ms.backend.msv2.structure.VALID_PARTITION_COLUMNS
70
+
71
+ Note that ``OBS_MODE`` and ``SUB_SCAN_NUMBER`` are columns in the ``STATE``
72
+ subtable, while ``SOURCE_ID`` is a column of the ``FIELD`` subtable.
73
+ Partitioning on these columns is achieved by joining on the ``STATE_ID``
74
+ and ``FIELD_ID`` columns, respectively.
75
+
76
+
77
+ Within these partitions, measurements are sorted by
78
+ ``TIME``, ``ANTENNA1`` and ``ANTENNA2``
79
+ to form a grid.
80
+
81
+ .. _time-partitioning:
82
+
83
+ Partitioning in time
84
+ ++++++++++++++++++++
85
+
86
+ Compared to frequency, achieving regularity in time requires more thought
87
+ as it depends on identifying partitions of MSv2 where data:
88
+
89
+ 1. contains monotically increasing ``TIME`` (after ordering).
90
+ 2. is dumped with a uniform ``INTERVAL``.
91
+ 3. ideally contains no gaps: i.e. ``(TIME - INTERVAL)[1:] == (TIME + INTERVAL)[:-1]``.
92
+
93
+ For example, ``OBS_MODE`` specifying ``STATE::OBS_MODE`` via ``STATE_ID``
94
+ is a good default partitioner, as it represents a shift in the
95
+ interferometer's mode of operation: It identifies when
96
+ the interferometer is e.g. slewing/observing a calibrator/observing a target.
97
+
98
+ Other valid partitioning columns are:
99
+
100
+ - ``FIELD_ID``: Observing a field for a period of time.
101
+ - ``SOURCE_ID``: Observing a source within a field for a period of time.
102
+ - ``SCAN_NUMBER``: A coarse, logical number (i.e. scan) associated with the data.
103
+ - ``SUB_SCAN_NUMBER``: A finer, logical number (i.e. scan) associated with the data.
104
+ This specifies ``STATE::SUB_SCAN_NUMBER`` (via ``STATE_ID``).
105
+ - ``STATE_ID``: The state of an interferometer.
106
+
107
+ as these columns frequently identify measurement groupings where
108
+ the interferometer is consistently dumping.
109
+
110
+ .. code-block:: python
111
+
112
+ import xarray_ms
113
+ import xarray
114
+
115
+ # Also partition by SCAN_NUMBER and FIELD_ID
116
+ dt = xarray.open_datatree(ms, partition_schema=["SCAN_NUMBER", "FIELD_ID"])
117
+
118
+ .. _missing-baselines:
119
+
120
+ Missing Baselines
121
+ -----------------
122
+
123
+ Baselines can be missing for distinct ``TIME`` values.
124
+ This can occur when Measurement Sets are passed through the
125
+ CASA ``split`` task with ``keepflags=False`` set, for instance.
126
+
127
+ Having all baselines present can be useful
128
+ for simplifying calibration algorithms and cases where
129
+ auto-correlations are requested, but none are present in the data.
130
+
131
+ ``xarray-ms`` will impute these missing data points with default values
132
+ (``nan`` in the case of data, ``1`` in the case of flags).
133
+
134
+ Irregular Grid Warnings
135
+ -----------------------
136
+
137
+ Given the specified partitioning schema, ``xarray-ms`` will partition
138
+ the MSv2 by the supplied columns and attempt to establish a regular
139
+ ``(time, baseline_id, frequency)`` grid.
140
+ If this is not possible, three classes of warning can be issued,
141
+ related to each of the three dimensions.
142
+
143
+ :class:`~xarray_ms.errors.IrregularTimeGridWarning`
144
+ +++++++++++++++++++++++++++++++++++++++++++++++++++
145
+
146
+ This warning is raised when it is impossible
147
+ to identify a unique ``INTERVAL`` value for a partition.
148
+ This is required to assign a single ``integration_time``
149
+ attribute to the ``time`` coordinate.
150
+
151
+ The above check is relaxed slightly by excluding the last time
152
+ in the partition (to handle averaged data) and by allowing
153
+ a degree of jitter in the ``INTERVAL`` column.
154
+
155
+ Generally, this happens if the requested partitioning schema
156
+ does not satisfy the criteria described in :ref:`time-partitioning`.
157
+ The solution is to experiment with other partitioning columns.
158
+
159
+ Should the user wish to continue with this case,
160
+ ``xarray-ms`` sets ``integration_time=nan``
161
+ and adds ``(time, baseline_id)``-shaped,
162
+ ``TIME`` and ``INTEGRATION_TIME`` columns.
163
+ Downstream applications should account for this.
164
+
165
+ :class:`~xarray_ms.errors.IrregularChannelGridWarning`
166
+ ++++++++++++++++++++++++++++++++++++++++++++++++++++++
167
+
168
+ This warning is raised when it is impossible to identify a unique
169
+ ``CHAN_WIDTH`` value for the partition.
170
+ This is required to assign a single ``channel_width``
171
+ attribute to the ``frequency`` coordinate.
172
+
173
+ Should the user wish to continue with this
174
+ case ``xarray-ms`` sets ``channel_width=nan``
175
+ and adds ``(frequency,)``-shaped ``CHANNEL_WIDTH`` columns.
176
+ Downstream application should account for this.
177
+
178
+ :class:`~xarray_ms.errors.IrregularBaselineGridWarning`
179
+ +++++++++++++++++++++++++++++++++++++++++++++++++++++++
180
+
181
+ This warning is raised when baselines were missing for a
182
+ particular timestep.
183
+ This is a relatively benign warning as ``xarray-ms`` will
184
+ impute missing values (See :ref:`missing-baselines`).
@@ -0,0 +1,69 @@
1
+ .. _compliance-and-roadmap:
2
+
3
+ Measurement Set v4 Compliance
4
+ =============================
5
+
6
+ xarray-ms fully implements the loading of correlated data from
7
+ MSv2 datasets into the Measurement Set v4.0 specification.
8
+ This covers the MSv2 ``MAIN`` table, as well as the
9
+ ``DATA_DESCRIPTION``, ``SPECTRAL_WINDOW``, ``POLARIZATION``,
10
+ ``FEED``, ``OBSERVATION``, ``STATE`` and ``PROCESSOR`` subtables
11
+ whose synthesis is presented in correlated data datasets
12
+ within an xarray DataTree.
13
+
14
+ Care has been taken to convert measures information from MSv2 into
15
+ MSv4 metadata attributes, where appropriate.
16
+
17
+ In particular, it loads the MSv2 dataset present in the
18
+ `Measurement Set v4 test suite <msv4-test-suite_>`_ except for:
19
+
20
+ - ALMA Measurement Sets which sometimes do not correctly link
21
+ the ANTENNNA and MAIN table via the FEED table.
22
+ This will need to be addressed heuristically.
23
+ - Single-dish Measurement Sets.
24
+ This is not difficult as it involves loading in
25
+ ``MAIN::FLOAT_DATA`` into the ``SPECTRUM`` variable and
26
+ renaming ``FIELD_PHASE_CENTER_DIRECTION`` to
27
+ ``FIELD_REFERENCE_CENTER_DIRECTION`` in the
28
+ ``field_and_source_xds`` dataset.
29
+
30
+ MSv4 specifies a set of optional datasets, of which the following are implemented:
31
+
32
+ - antenna_xds
33
+ - field_and_source_xds (required components)
34
+
35
+ The following optional datasets are not yet implemented:
36
+
37
+ - field_and_source_ephemeris_xds
38
+ - pointing_xds
39
+ - system_calibration_xds
40
+ - gain_curve_xds
41
+ - phase_calibration_xds
42
+ - weather_xds
43
+ - phased_array_xds
44
+
45
+ Roadmap
46
+ -------
47
+
48
+ The existing coverage of the specification arguably represents a Pareto distribution of the required data for writing Radio Astronomy software in an MSv4 paradigm, but we aim to address the remaining cases as follows in order of priority:
49
+
50
+ - phased_array_xds
51
+ - pointing_xds
52
+
53
+ as this will more fully support LOFAR and SKA-LOW. The following datasets are probably required for VLBI:
54
+
55
+ - system_calibration_xds
56
+ - weather_xds
57
+
58
+ while the following are arguably required for calibration and other software that will need to be developed
59
+ for the MSv4 paradigm:
60
+
61
+ - gain_curve_xds
62
+ - phase_calibration_xds
63
+ - field_and_source_ephemeris_xds
64
+ - single dish systems
65
+
66
+ This is a rough strategy and doesn't need to be set in stone.
67
+ Please reach out or contribute PR's if you have specific requirements.
68
+
69
+ .. _msv4-test-suite: https://github.com/ratt-ru/xarray-ms/blob/main/tests/msv4_test_corpus/test_msv_corpus.py
@@ -3,7 +3,7 @@ Tutorial
3
3
 
4
4
  The `Measurement Set v2.0 <msv2-spec_>`_ is a tabular format that
5
5
  includes notions of regularity or, the shape of the data, in the MAIN table.
6
- This is accomplished through the ``DATA_DESC_ID`` column which defines the
6
+ This is achieved through the ``DATA_DESC_ID`` column which defines the
7
7
  Spectral Window and Polarisation Configuration associated with each row:
8
8
  the shape of the visibility in each row of the ``DATA`` column can
9
9
  vary per-row.
@@ -16,13 +16,13 @@ or group MSv2 rows by the same shape and configuration.
16
16
  In xarray-ms, this is accomplished by specifying a ``partition_schema``
17
17
  when opening a Measurement Set.
18
18
  Different columns may be used to define the partition.
19
- See :ref:`partitioning-schema` for more information.
19
+ See :ref:`partitioning-guide` for more information.
20
20
 
21
21
  Opening a Measurement Set
22
22
  -------------------------
23
23
 
24
24
  As xarray-ms implements an `xarray backend <xarray_backend_>`_,
25
- it is possible to use the :func:`xarray.backends.api.open_datatree` function
25
+ it is possible to use the :func:`xarray.open_datatree` function
26
26
  to open multiple partitions of a Measurement Set.
27
27
 
28
28
  .. ipython:: python
@@ -50,7 +50,7 @@ to open multiple partitions of a Measurement Set.
50
50
  Selecting a subset of the data
51
51
  ++++++++++++++++++++++++++++++
52
52
 
53
- By default, :func:`~xarray.backends.api.open_datatree` will return a datatree
53
+ By default, :func:`~xarray.open_datatree` will return a datatree
54
54
  with a lazy view over the data.
55
55
  xarray has extensive functionality for
56
56
  `indexing and selecting data <xarray_indexing_and_selecting_>`_.
@@ -1,14 +1,14 @@
1
1
  [project]
2
2
  name = "xarray-ms"
3
- version = "0.3.5"
3
+ version = "0.3.7"
4
4
  description = "xarray MSv4 views over MSv2 Measurement Sets"
5
5
  authors = [{name = "Simon Perkins", email = "simon.perkins@gmail.com"}]
6
6
  readme = "README.rst"
7
7
  requires-python = ">=3.10"
8
8
  dependencies = [
9
- "xarray>=2025.0",
9
+ "xarray>=2025.0, < 2025.9.1",
10
10
  "cacheout>=0.16.0",
11
- "arcae>=0.3.2",
11
+ "arcae>=0.3.2, < 0.4.0",
12
12
  "typing-extensions>=4.12.2",
13
13
  ]
14
14
 
@@ -26,6 +26,7 @@ dev = [
26
26
  ]
27
27
  doc = [
28
28
  "sphinx>=8.0.2",
29
+ "sphinxcontrib-spelling",
29
30
  "pygments>=2.18.0",
30
31
  "sphinx-copybutton>=0.5.2",
31
32
  "pydata-sphinx-theme>=0.15.4",
@@ -53,7 +54,7 @@ extend-select = ["I"]
53
54
  # github_url = "https://github.com/<user or organization>/<project>/"
54
55
 
55
56
  [tool.tbump.version]
56
- current = "0.3.5"
57
+ current = "0.3.7"
57
58
 
58
59
  # Example of a semver regexp.
59
60
  # Make sure this matches current_version before
@@ -1,18 +1,21 @@
1
+ import xarray
1
2
  import xarray.testing as xt
2
- from xarray.backends.api import open_dataset, open_datatree
3
3
 
4
4
 
5
5
  def test_dataset_roundtrip(simmed_ms, tmp_path):
6
- ds = open_dataset(simmed_ms)
6
+ ds = xarray.open_dataset(simmed_ms)
7
7
  zarr_path = tmp_path / "test_dataset.zarr"
8
8
  ds.to_zarr(zarr_path, compute=True, consolidated=True)
9
- ds2 = open_dataset(zarr_path)
9
+ ds2 = xarray.open_dataset(zarr_path)
10
10
  xt.assert_identical(ds, ds2)
11
11
 
12
12
 
13
13
  def test_datatree_roundtrip(simmed_ms, tmp_path):
14
- dt = open_datatree(simmed_ms)
14
+ dt = xarray.open_datatree(simmed_ms)
15
15
  zarr_path = tmp_path / "test_datatree.zarr"
16
16
  dt.to_zarr(zarr_path, compute=True, consolidated=True)
17
- dt2 = open_datatree(zarr_path)
17
+ # TODO Remove forcing of engine once
18
+ # https://github.com/pydata/xarray/issues/10808
19
+ # is resolved
20
+ dt2 = xarray.open_datatree(zarr_path, engine="zarr")
18
21
  xt.assert_identical(dt, dt2)
@@ -225,6 +225,7 @@ class MSv2EntryPoint(BackendEntrypoint):
225
225
  ]
226
226
  description = "Opens v2 CASA Measurement Sets in Xarray"
227
227
  url = "https://xarray-ms.readthedocs.io/"
228
+ supports_groups = True
228
229
 
229
230
  def guess_can_open(
230
231
  self, filename_or_obj: str | os.PathLike[Any] | BufferedIOBase | AbstractDataStore
@@ -266,6 +267,7 @@ class MSv2EntryPoint(BackendEntrypoint):
266
267
  drop_variables: Variables to drop from the dataset.
267
268
  partition_schema: The columns to use for partitioning the Measurement set.
268
269
  Defaults to :code:`{DEFAULT_PARTITION_COLUMNS}`.
270
+ See :ref:`partitioning-guide` for more further information.
269
271
  partition_key: A key corresponding to an individual partition.
270
272
  For example :code:`(('DATA_DESC_ID', 0), ('FIELD_ID', 0))`.
271
273
  If :code:`None`, the first partition will be opened.
@@ -345,6 +347,7 @@ class MSv2EntryPoint(BackendEntrypoint):
345
347
  drop_variables: Variables to drop from the dataset.
346
348
  partition_schema: The columns to use for partitioning the Measurement set.
347
349
  Defaults to :code:`{DEFAULT_PARTITION_COLUMNS}`.
350
+ See :ref:`partitioning-guide` for more further information.
348
351
  auto_corrs: Include/Exclude auto-correlations.
349
352
  ninstances: The number of Measurement Set instances to open for parallel I/O.
350
353
  epoch: A string uniquely identifying this Dataset.
@@ -77,6 +77,8 @@ MSV4_to_MSV2_COLUMN_SCHEMAS = {
77
77
 
78
78
  FIXED_DIMENSION_SIZES = {"uvw_label": 3}
79
79
 
80
+ PARTITIONING_LINK = "https://xarray-ms.readthedocs.io/en/latest/partitioning.html"
81
+
80
82
 
81
83
  class CorrelatedFactory(DatasetFactory):
82
84
  """Factory class for generating the main correlated dataset
@@ -309,7 +311,8 @@ class CorrelatedFactory(DatasetFactory):
309
311
  f"in the case of data variables "
310
312
  f"and flags will be set for these cases. "
311
313
  f"This situation is benign, especially if auto-corelations "
312
- f"have been requested on a dataset without them.",
314
+ f"have been requested on a dataset without them. "
315
+ f"See {PARTITIONING_LINK}",
313
316
  IrregularBaselineGridWarning,
314
317
  )
315
318
 
@@ -399,7 +402,8 @@ class CorrelatedFactory(DatasetFactory):
399
402
  f"time.attrs['integration_time'] will be set to 'nan' and "
400
403
  f"(time, baseline_id) shaped TIME and INTEGRATION_TIME columns "
401
404
  f"will be added. "
402
- f"{'They contain nans in missing rows.' if missing_rows else ''}",
405
+ f"{'They contain nans in missing rows. ' if missing_rows else ''}"
406
+ f"See {PARTITIONING_LINK}",
403
407
  IrregularTimeGridWarning,
404
408
  )
405
409
  time_attrs["integration_time"]["data"] = np.nan
@@ -447,7 +451,8 @@ class CorrelatedFactory(DatasetFactory):
447
451
  f"found in partition {self._partition_key}. "
448
452
  f"MSv4 cannot strictly represent this case and so "
449
453
  f"frequency.attrs['channel_width'] will be set to 'nan' and "
450
- f"a (frequency,) shaped CHANNEL_WIDTH column will be added.",
454
+ f"a (frequency,) shaped CHANNEL_WIDTH column will be added. "
455
+ f"See {PARTITIONING_LINK}",
451
456
  IrregularChannelGridWarning,
452
457
  )
453
458
  freq_attrs["channel_width"]["data"] = np.nan
@@ -2,17 +2,17 @@ class IrregularGridWarning(UserWarning):
2
2
  """Base Warning for irregular grids"""
3
3
 
4
4
 
5
- class IrregularTimeGridWarning(UserWarning):
5
+ class IrregularTimeGridWarning(IrregularGridWarning):
6
6
  """Warning raised when the intervals associated
7
7
  with each timestep are not homogenous"""
8
8
 
9
9
 
10
- class IrregularBaselineGridWarning(UserWarning):
10
+ class IrregularBaselineGridWarning(IrregularGridWarning):
11
11
  """Warning raised when missing baselines are
12
12
  present in the Measurement Set"""
13
13
 
14
14
 
15
- class IrregularChannelGridWarning(UserWarning):
15
+ class IrregularChannelGridWarning(IrregularGridWarning):
16
16
  """Warning raised when an irregular channel grid
17
17
  is encountered"""
18
18
 
@@ -290,11 +290,13 @@ class MSStructureSimulator:
290
290
  (nchan,) = chan_freq.shape
291
291
  index = (np.array([r]),)
292
292
  chan_width = np.full(nchan, (chan_freq[-1] - chan_freq[0]) / nchan)
293
+ ref_freq = chan_freq[0] + chan_freq[-1] / 2
293
294
  T.putcol("NUM_CHAN", np.array([nchan]), index=index)
294
295
  T.putcol("CHAN_FREQ", chan_freq[None, :], index=index)
295
296
  T.putcol("CHAN_WIDTH", chan_width[None, :], index=index)
296
297
  T.putcol("RESOLUTION", chan_freq[None, :], index=index)
297
298
  T.putcol("EFFECTIVE_BW", chan_width[None, :], index=index)
299
+ T.putcol("REF_FREQUENCY", ref_freq[None], index=index)
298
300
 
299
301
  # Partially populate the POLARIZATION table
300
302
  with Table.from_filename(f"{output_ms}::POLARIZATION", **kw) as T:
@@ -1,54 +0,0 @@
1
- API
2
- ===
3
-
4
- Opening Measurement Sets
5
- ------------------------
6
-
7
- The standard :func:`xarray.backends.api.open_dataset` and
8
- :func:`xarray.backends.api.open_datatree` methods should
9
- be used to open either a :class:`~xarray.Dataset` or a
10
- :class:`~xarray.DataTree`.
11
-
12
- .. code-block:: python
13
-
14
- >>> dataset = xarray.open_dataset(
15
- "/data/data.ms",
16
- partition_schema=["DATA_DESC_ID", "FIELD_ID"])
17
- >>> datatree = xarray.backends.api.open_datatree(
18
- "/data/data.ms",
19
- partition_schema=["DATA_DESC_ID", "FIELD_ID"])
20
-
21
- These methods defer to the relevant methods on the
22
- `Entrypoint Class <entrypoint-class_>`_.
23
- Consult the method signatures for information on extra
24
- arguments that can be passed.
25
-
26
-
27
- .. _entrypoint-class:
28
-
29
- Entrypoint Class
30
- ----------------
31
-
32
- Entrypoint class for the MSv2 backend.
33
-
34
- .. autoclass:: xarray_ms.backend.msv2.entrypoint.MSv2EntryPoint
35
- :members: open_dataset, open_datatree
36
-
37
- .. _partitioning-schema:
38
-
39
- Partioning Schema
40
- -----------------
41
-
42
- The default partitioning schema contains the following columns:
43
-
44
- .. autodata:: xarray_ms.backend.msv2.structure.DEFAULT_PARTITION_COLUMNS
45
-
46
- Partitioning always uses these columns, but additional columns can be
47
- selected if finer grained partitioning is required:
48
-
49
- .. autodata:: xarray_ms.backend.msv2.structure.VALID_PARTITION_COLUMNS
50
-
51
- Note that ``OBS_MODE`` and ``SUB_SCAN_NUMBER`` are columns in the ``STATE``
52
- subtable, while ``SOURCE_ID`` is a column of the ``FIELD`` subtable.
53
- Partitioning on these columns is achieved by joining on the ``STATE_ID``
54
- and ``FIELD_ID`` columns, respectively.
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes