tsam-xarray 0.5.0__tar.gz → 0.5.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {tsam_xarray-0.5.0 → tsam_xarray-0.5.2}/.github/workflows/ci.yaml +1 -1
- tsam_xarray-0.5.2/.release-please-manifest.json +3 -0
- {tsam_xarray-0.5.0 → tsam_xarray-0.5.2}/CHANGELOG.md +14 -0
- tsam_xarray-0.5.2/PKG-INFO +112 -0
- tsam_xarray-0.5.2/README.md +91 -0
- tsam_xarray-0.5.2/docs/assets/multi-dim-input.png +0 -0
- tsam_xarray-0.5.2/docs/assets/multi-dim-metrics.png +0 -0
- tsam_xarray-0.5.2/scripts/generate_readme_images.py +68 -0
- {tsam_xarray-0.5.0 → tsam_xarray-0.5.2}/src/tsam_xarray/_clustering.py +52 -15
- {tsam_xarray-0.5.0 → tsam_xarray-0.5.2}/src/tsam_xarray/_version.py +2 -2
- {tsam_xarray-0.5.0 → tsam_xarray-0.5.2}/test/test_aggregate.py +4 -1
- {tsam_xarray-0.5.0 → tsam_xarray-0.5.2}/test/test_parametrized.py +87 -0
- tsam_xarray-0.5.0/.release-please-manifest.json +0 -3
- tsam_xarray-0.5.0/PKG-INFO +0 -117
- tsam_xarray-0.5.0/README.md +0 -96
- {tsam_xarray-0.5.0 → tsam_xarray-0.5.2}/.github/dependabot.yml +0 -0
- {tsam_xarray-0.5.0 → tsam_xarray-0.5.2}/.github/workflows/dependabot-auto-merge.yaml +0 -0
- {tsam_xarray-0.5.0 → tsam_xarray-0.5.2}/.github/workflows/pr-title.yaml +0 -0
- {tsam_xarray-0.5.0 → tsam_xarray-0.5.2}/.github/workflows/publish.yaml +0 -0
- {tsam_xarray-0.5.0 → tsam_xarray-0.5.2}/.github/workflows/release.yaml +0 -0
- {tsam_xarray-0.5.0 → tsam_xarray-0.5.2}/.gitignore +0 -0
- {tsam_xarray-0.5.0 → tsam_xarray-0.5.2}/.pre-commit-config.yaml +0 -0
- {tsam_xarray-0.5.0 → tsam_xarray-0.5.2}/.readthedocs.yaml +0 -0
- {tsam_xarray-0.5.0 → tsam_xarray-0.5.2}/.release-please-config.json +0 -0
- {tsam_xarray-0.5.0 → tsam_xarray-0.5.2}/CLAUDE.md +0 -0
- {tsam_xarray-0.5.0 → tsam_xarray-0.5.2}/LICENSE +0 -0
- {tsam_xarray-0.5.0 → tsam_xarray-0.5.2}/codecov.yml +0 -0
- {tsam_xarray-0.5.0 → tsam_xarray-0.5.2}/docs/changelog.md +0 -0
- {tsam_xarray-0.5.0 → tsam_xarray-0.5.2}/docs/data-model.md +0 -0
- {tsam_xarray-0.5.0 → tsam_xarray-0.5.2}/docs/examples/clustering-io.ipynb +0 -0
- {tsam_xarray-0.5.0 → tsam_xarray-0.5.2}/docs/examples/getting-started.ipynb +0 -0
- {tsam_xarray-0.5.0 → tsam_xarray-0.5.2}/docs/examples/multi-dim.ipynb +0 -0
- {tsam_xarray-0.5.0 → tsam_xarray-0.5.2}/docs/examples/segmentation.ipynb +0 -0
- {tsam_xarray-0.5.0 → tsam_xarray-0.5.2}/docs/examples/tuning.ipynb +0 -0
- {tsam_xarray-0.5.0 → tsam_xarray-0.5.2}/docs/gen_ref_pages.py +0 -0
- {tsam_xarray-0.5.0 → tsam_xarray-0.5.2}/docs/index.md +0 -0
- {tsam_xarray-0.5.0 → tsam_xarray-0.5.2}/docs/stylesheets/extra.css +0 -0
- {tsam_xarray-0.5.0 → tsam_xarray-0.5.2}/mkdocs.yml +0 -0
- {tsam_xarray-0.5.0 → tsam_xarray-0.5.2}/pyproject.toml +0 -0
- {tsam_xarray-0.5.0 → tsam_xarray-0.5.2}/src/tsam_xarray/__init__.py +0 -0
- {tsam_xarray-0.5.0 → tsam_xarray-0.5.2}/src/tsam_xarray/_core.py +0 -0
- {tsam_xarray-0.5.0 → tsam_xarray-0.5.2}/src/tsam_xarray/_result.py +0 -0
- {tsam_xarray-0.5.0 → tsam_xarray-0.5.2}/src/tsam_xarray/_sample_data.py +0 -0
- {tsam_xarray-0.5.0 → tsam_xarray-0.5.2}/src/tsam_xarray/_tuning.py +0 -0
- {tsam_xarray-0.5.0 → tsam_xarray-0.5.2}/test/conftest.py +0 -0
- {tsam_xarray-0.5.0 → tsam_xarray-0.5.2}/test/test_tuning.py +0 -0
|
@@ -66,7 +66,7 @@ jobs:
|
|
|
66
66
|
- name: Run tests
|
|
67
67
|
run: uv run pytest -n auto --cov=src/tsam_xarray --cov-report=xml --cov-report=term-missing
|
|
68
68
|
|
|
69
|
-
- uses: codecov/codecov-action@
|
|
69
|
+
- uses: codecov/codecov-action@v6
|
|
70
70
|
if: matrix.python-version == '3.12'
|
|
71
71
|
with:
|
|
72
72
|
token: ${{ secrets.CODECOV_TOKEN }}
|
|
@@ -1,5 +1,19 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## [0.5.2](https://github.com/FBumann/tsam_xarray/compare/v0.5.1...v0.5.2) (2026-04-01)
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
### Features
|
|
7
|
+
|
|
8
|
+
* compact time_coords serialization in ClusteringResult JSON ([#79](https://github.com/FBumann/tsam_xarray/issues/79)) ([bac9fd1](https://github.com/FBumann/tsam_xarray/commit/bac9fd17ee28a48fd6f51ce16fea0df883cccb99))
|
|
9
|
+
|
|
10
|
+
## [0.5.1](https://github.com/FBumann/tsam_xarray/compare/v0.5.0...v0.5.1) (2026-03-31)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
### Features
|
|
14
|
+
|
|
15
|
+
* add to_dict/from_dict on ClusteringResult ([#75](https://github.com/FBumann/tsam_xarray/issues/75)) ([24723a8](https://github.com/FBumann/tsam_xarray/commit/24723a82b0daa9eacdaa98e2ee300b9e44697bd6))
|
|
16
|
+
|
|
3
17
|
## [0.5.0](https://github.com/FBumann/tsam_xarray/compare/v0.4.0...v0.5.0) (2026-03-31)
|
|
4
18
|
|
|
5
19
|
|
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: tsam_xarray
|
|
3
|
+
Version: 0.5.2
|
|
4
|
+
Summary: Lightweight xarray wrapper for tsam time series aggregation
|
|
5
|
+
License-Expression: MIT
|
|
6
|
+
License-File: LICENSE
|
|
7
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
8
|
+
Classifier: Operating System :: OS Independent
|
|
9
|
+
Classifier: Programming Language :: Python :: 3
|
|
10
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
11
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
14
|
+
Requires-Python: >=3.11
|
|
15
|
+
Requires-Dist: bottleneck>=1.4
|
|
16
|
+
Requires-Dist: tsam>=3.3.0
|
|
17
|
+
Requires-Dist: xarray>=2024.1
|
|
18
|
+
Provides-Extra: plot
|
|
19
|
+
Requires-Dist: plotly>=5; extra == 'plot'
|
|
20
|
+
Description-Content-Type: text/markdown
|
|
21
|
+
|
|
22
|
+
# tsam_xarray
|
|
23
|
+
|
|
24
|
+
[](https://pypi.org/project/tsam-xarray/)
|
|
25
|
+
[](https://pypi.org/project/tsam-xarray/)
|
|
26
|
+
[](https://github.com/FBumann/tsam_xarray/actions/workflows/ci.yaml)
|
|
27
|
+
[](https://codecov.io/gh/FBumann/tsam_xarray)
|
|
28
|
+
[](LICENSE)
|
|
29
|
+
[](https://tsam-xarray.readthedocs.io/)
|
|
30
|
+
|
|
31
|
+
**DataArray in, DataArray out** — multi-dimensional time series aggregation with [tsam](https://github.com/FZJ-IEK3-VSA/tsam) and [xarray](https://xarray.dev/).
|
|
32
|
+
|
|
33
|
+
## The problem
|
|
34
|
+
|
|
35
|
+
Energy system data is multi-dimensional — variables, regions, scenarios, years. Some dimensions should be **clustered together** (solar and wind profiles in the same region should see the same typical days), while others need **independent clustering** (each scenario has its own weather patterns).
|
|
36
|
+
|
|
37
|
+

|
|
38
|
+
|
|
39
|
+
tsam works on flat DataFrames. With multi-dimensional data, you end up writing boilerplate: loop over scenarios, convert to DataFrame, aggregate, extract results, convert back, concatenate, hope the dims line up. Accuracy metrics come back as unlabeled `pd.Series`. Saving a clustering means managing raw dicts.
|
|
40
|
+
|
|
41
|
+
## The solution
|
|
42
|
+
|
|
43
|
+
```python
|
|
44
|
+
import tsam_xarray
|
|
45
|
+
|
|
46
|
+
result = tsam_xarray.aggregate(
|
|
47
|
+
da, # (time, variable, region, scenario)
|
|
48
|
+
time_dim="time",
|
|
49
|
+
cluster_dim=["variable", "region"], # clustered together
|
|
50
|
+
n_clusters=4,
|
|
51
|
+
)
|
|
52
|
+
# scenario is sliced independently — each gets its own clustering
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
Everything comes back as labeled xarray objects:
|
|
56
|
+
|
|
57
|
+
```python
|
|
58
|
+
result.cluster_representatives # (scenario, cluster, timestep, variable, region)
|
|
59
|
+
result.reconstructed # same shape as input
|
|
60
|
+
result.cluster_assignments # (scenario, period)
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
Accuracy metrics preserve all dimensions — see exactly where the approximation is good or bad:
|
|
64
|
+
|
|
65
|
+

|
|
66
|
+
|
|
67
|
+
```python
|
|
68
|
+
result.accuracy.rmse # DataArray (scenario, variable, region)
|
|
69
|
+
result.accuracy.weighted_rmse # DataArray (scenario,) — per-slice summary
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
## Save, load, reuse
|
|
73
|
+
|
|
74
|
+
```python
|
|
75
|
+
# Save clustering (not the data — just the mapping)
|
|
76
|
+
result.clustering.to_json("clustering.json")
|
|
77
|
+
|
|
78
|
+
# Load and inspect — no original data needed
|
|
79
|
+
clustering = tsam_xarray.load_clustering("clustering.json")
|
|
80
|
+
clustering.n_clusters # 4
|
|
81
|
+
clustering.cluster_assignments # DataArray (scenario, period)
|
|
82
|
+
clustering.cluster_occurrences # DataArray (scenario, cluster)
|
|
83
|
+
|
|
84
|
+
# Apply to new data or disaggregate optimization results
|
|
85
|
+
new_result = clustering.apply(new_da)
|
|
86
|
+
full_timeseries = clustering.disaggregate(optimized_data)
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
## Tuning
|
|
90
|
+
|
|
91
|
+
Find optimal hyperparameters across all slices:
|
|
92
|
+
|
|
93
|
+
```python
|
|
94
|
+
grid = tsam_xarray.grid_search(
|
|
95
|
+
da,
|
|
96
|
+
time_dim="time",
|
|
97
|
+
cluster_dim=["variable", "region"],
|
|
98
|
+
timesteps=np.geomspace(2, 48, num=12, dtype=int), # sparse search
|
|
99
|
+
)
|
|
100
|
+
grid.summary_matrix["rmse"] # heatmap-ready (n_clusters, n_segments)
|
|
101
|
+
grid.accuracy["weighted_rmse"] # per-slice weighted RMSE for every config
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
## Installation
|
|
105
|
+
|
|
106
|
+
```bash
|
|
107
|
+
pip install tsam-xarray
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
## Documentation
|
|
111
|
+
|
|
112
|
+
Full docs with interactive examples: **[tsam-xarray.readthedocs.io](https://tsam-xarray.readthedocs.io/)**
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
# tsam_xarray
|
|
2
|
+
|
|
3
|
+
[](https://pypi.org/project/tsam-xarray/)
|
|
4
|
+
[](https://pypi.org/project/tsam-xarray/)
|
|
5
|
+
[](https://github.com/FBumann/tsam_xarray/actions/workflows/ci.yaml)
|
|
6
|
+
[](https://codecov.io/gh/FBumann/tsam_xarray)
|
|
7
|
+
[](LICENSE)
|
|
8
|
+
[](https://tsam-xarray.readthedocs.io/)
|
|
9
|
+
|
|
10
|
+
**DataArray in, DataArray out** — multi-dimensional time series aggregation with [tsam](https://github.com/FZJ-IEK3-VSA/tsam) and [xarray](https://xarray.dev/).
|
|
11
|
+
|
|
12
|
+
## The problem
|
|
13
|
+
|
|
14
|
+
Energy system data is multi-dimensional — variables, regions, scenarios, years. Some dimensions should be **clustered together** (solar and wind profiles in the same region should see the same typical days), while others need **independent clustering** (each scenario has its own weather patterns).
|
|
15
|
+
|
|
16
|
+

|
|
17
|
+
|
|
18
|
+
tsam works on flat DataFrames. With multi-dimensional data, you end up writing boilerplate: loop over scenarios, convert to DataFrame, aggregate, extract results, convert back, concatenate, hope the dims line up. Accuracy metrics come back as unlabeled `pd.Series`. Saving a clustering means managing raw dicts.
|
|
19
|
+
|
|
20
|
+
## The solution
|
|
21
|
+
|
|
22
|
+
```python
|
|
23
|
+
import tsam_xarray
|
|
24
|
+
|
|
25
|
+
result = tsam_xarray.aggregate(
|
|
26
|
+
da, # (time, variable, region, scenario)
|
|
27
|
+
time_dim="time",
|
|
28
|
+
cluster_dim=["variable", "region"], # clustered together
|
|
29
|
+
n_clusters=4,
|
|
30
|
+
)
|
|
31
|
+
# scenario is sliced independently — each gets its own clustering
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
Everything comes back as labeled xarray objects:
|
|
35
|
+
|
|
36
|
+
```python
|
|
37
|
+
result.cluster_representatives # (scenario, cluster, timestep, variable, region)
|
|
38
|
+
result.reconstructed # same shape as input
|
|
39
|
+
result.cluster_assignments # (scenario, period)
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
Accuracy metrics preserve all dimensions — see exactly where the approximation is good or bad:
|
|
43
|
+
|
|
44
|
+

|
|
45
|
+
|
|
46
|
+
```python
|
|
47
|
+
result.accuracy.rmse # DataArray (scenario, variable, region)
|
|
48
|
+
result.accuracy.weighted_rmse # DataArray (scenario,) — per-slice summary
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
## Save, load, reuse
|
|
52
|
+
|
|
53
|
+
```python
|
|
54
|
+
# Save clustering (not the data — just the mapping)
|
|
55
|
+
result.clustering.to_json("clustering.json")
|
|
56
|
+
|
|
57
|
+
# Load and inspect — no original data needed
|
|
58
|
+
clustering = tsam_xarray.load_clustering("clustering.json")
|
|
59
|
+
clustering.n_clusters # 4
|
|
60
|
+
clustering.cluster_assignments # DataArray (scenario, period)
|
|
61
|
+
clustering.cluster_occurrences # DataArray (scenario, cluster)
|
|
62
|
+
|
|
63
|
+
# Apply to new data or disaggregate optimization results
|
|
64
|
+
new_result = clustering.apply(new_da)
|
|
65
|
+
full_timeseries = clustering.disaggregate(optimized_data)
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
## Tuning
|
|
69
|
+
|
|
70
|
+
Find optimal hyperparameters across all slices:
|
|
71
|
+
|
|
72
|
+
```python
|
|
73
|
+
grid = tsam_xarray.grid_search(
|
|
74
|
+
da,
|
|
75
|
+
time_dim="time",
|
|
76
|
+
cluster_dim=["variable", "region"],
|
|
77
|
+
timesteps=np.geomspace(2, 48, num=12, dtype=int), # sparse search
|
|
78
|
+
)
|
|
79
|
+
grid.summary_matrix["rmse"] # heatmap-ready (n_clusters, n_segments)
|
|
80
|
+
grid.accuracy["weighted_rmse"] # per-slice weighted RMSE for every config
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
## Installation
|
|
84
|
+
|
|
85
|
+
```bash
|
|
86
|
+
pip install tsam-xarray
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
## Documentation
|
|
90
|
+
|
|
91
|
+
Full docs with interactive examples: **[tsam-xarray.readthedocs.io](https://tsam-xarray.readthedocs.io/)**
|
|
Binary file
|
|
Binary file
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
"""Generate images for README.md."""
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
import xarray_plotly # noqa: F401
|
|
6
|
+
|
|
7
|
+
import tsam_xarray
|
|
8
|
+
from tsam_xarray._sample_data import sample_energy_data
|
|
9
|
+
|
|
10
|
+
ASSETS = Path("docs/assets")
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def generate_input_plot() -> None:
|
|
14
|
+
"""Multi-dimensional input data plot."""
|
|
15
|
+
da = sample_energy_data(n_days=30)
|
|
16
|
+
fig = da.plotly.line(
|
|
17
|
+
x="time", color="variable", facet_row="scenario", facet_col="region"
|
|
18
|
+
)
|
|
19
|
+
fig.update_layout(
|
|
20
|
+
height=400,
|
|
21
|
+
width=850,
|
|
22
|
+
margin=dict(t=40, b=25, l=50, r=20),
|
|
23
|
+
template="plotly_white",
|
|
24
|
+
font=dict(size=11),
|
|
25
|
+
title_text=("Input: 3 variables x 3 regions x 2 scenarios x 720 hours"),
|
|
26
|
+
title_x=0.5,
|
|
27
|
+
title_font_size=13,
|
|
28
|
+
)
|
|
29
|
+
fig.update_xaxes(tickformat="%b %d")
|
|
30
|
+
fig.update_traces(line_width=0.8)
|
|
31
|
+
fig.write_image(ASSETS / "multi-dim-input.png", scale=2)
|
|
32
|
+
print(f"Saved {ASSETS / 'multi-dim-input.png'}")
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def generate_metrics_plot() -> None:
|
|
36
|
+
"""Per-column RMSE heatmap across all dimensions."""
|
|
37
|
+
da = sample_energy_data(n_days=30)
|
|
38
|
+
r = tsam_xarray.aggregate(
|
|
39
|
+
da,
|
|
40
|
+
time_dim="time",
|
|
41
|
+
cluster_dim="variable",
|
|
42
|
+
n_clusters=4,
|
|
43
|
+
)
|
|
44
|
+
fig = r.accuracy.rmse.plotly.imshow(
|
|
45
|
+
x="variable",
|
|
46
|
+
y="region",
|
|
47
|
+
facet_col="scenario",
|
|
48
|
+
text_auto=".2f",
|
|
49
|
+
color_continuous_scale="YlOrRd",
|
|
50
|
+
)
|
|
51
|
+
fig.update_layout(
|
|
52
|
+
height=280,
|
|
53
|
+
width=650,
|
|
54
|
+
margin=dict(t=40, b=20, l=60, r=20),
|
|
55
|
+
template="plotly_white",
|
|
56
|
+
font=dict(size=12),
|
|
57
|
+
title_text=("Per-column RMSE — faceted by scenario (independent clustering)"),
|
|
58
|
+
title_x=0.5,
|
|
59
|
+
title_font_size=13,
|
|
60
|
+
)
|
|
61
|
+
fig.write_image(ASSETS / "multi-dim-metrics.png", scale=2)
|
|
62
|
+
print(f"Saved {ASSETS / 'multi-dim-metrics.png'}")
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
if __name__ == "__main__":
|
|
66
|
+
ASSETS.mkdir(parents=True, exist_ok=True)
|
|
67
|
+
generate_input_plot()
|
|
68
|
+
generate_metrics_plot()
|
|
@@ -21,6 +21,25 @@ from tsam_xarray._core import (
|
|
|
21
21
|
)
|
|
22
22
|
|
|
23
23
|
|
|
24
|
+
def _time_coords_to_dict(tc: pd.DatetimeIndex) -> dict[str, Any] | list[str]:
|
|
25
|
+
"""Serialize a DatetimeIndex compactly when possible.
|
|
26
|
+
|
|
27
|
+
Regular indices are stored as ``{start, periods, freq}`` (~3 values).
|
|
28
|
+
Irregular indices fall back to a full ISO string list.
|
|
29
|
+
"""
|
|
30
|
+
freq = pd.infer_freq(tc)
|
|
31
|
+
if freq is not None:
|
|
32
|
+
return {"start": tc[0].isoformat(), "periods": len(tc), "freq": freq}
|
|
33
|
+
return [t.isoformat() for t in tc]
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def _time_coords_from_dict(raw: dict[str, Any] | list[str]) -> pd.DatetimeIndex:
|
|
37
|
+
"""Deserialize a DatetimeIndex from either compact or list format."""
|
|
38
|
+
if isinstance(raw, dict):
|
|
39
|
+
return pd.date_range(raw["start"], periods=raw["periods"], freq=raw["freq"])
|
|
40
|
+
return pd.DatetimeIndex(raw)
|
|
41
|
+
|
|
42
|
+
|
|
24
43
|
@dataclass(frozen=True, repr=False)
|
|
25
44
|
class ClusteringResult:
|
|
26
45
|
"""Reusable clustering result with xarray dimension metadata.
|
|
@@ -445,13 +464,12 @@ class ClusteringResult:
|
|
|
445
464
|
|
|
446
465
|
return _concat_along_dims(results, slice_dims, slice_coords)
|
|
447
466
|
|
|
448
|
-
def
|
|
449
|
-
"""
|
|
467
|
+
def to_dict(self) -> dict[str, Any]:
|
|
468
|
+
"""Serialize clustering to a dictionary.
|
|
450
469
|
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
to ``json.dump()``. Default: ``indent=2``.
|
|
470
|
+
Returns:
|
|
471
|
+
Plain dict suitable for ``json.dump()`` or
|
|
472
|
+
storage in databases, APIs, etc.
|
|
455
473
|
"""
|
|
456
474
|
entries = []
|
|
457
475
|
for key, cr in self.clusterings.items():
|
|
@@ -468,24 +486,30 @@ class ClusteringResult:
|
|
|
468
486
|
"clusterings": entries,
|
|
469
487
|
}
|
|
470
488
|
if self.time_coords is not None:
|
|
471
|
-
data["time_coords"] =
|
|
489
|
+
data["time_coords"] = _time_coords_to_dict(self.time_coords)
|
|
490
|
+
return data
|
|
472
491
|
|
|
492
|
+
def to_json(self, path: str | Path, **json_kwargs: Any) -> None:
|
|
493
|
+
"""Save clustering to JSON file.
|
|
494
|
+
|
|
495
|
+
Args:
|
|
496
|
+
path: Output file path.
|
|
497
|
+
**json_kwargs: Additional keyword arguments passed
|
|
498
|
+
to ``json.dump()``. Default: ``indent=2``.
|
|
499
|
+
"""
|
|
473
500
|
with Path(path).open("w") as f:
|
|
474
|
-
json.dump(
|
|
501
|
+
json.dump(self.to_dict(), f, **json_kwargs)
|
|
475
502
|
|
|
476
503
|
@classmethod
|
|
477
|
-
def
|
|
478
|
-
"""Load clustering from
|
|
504
|
+
def from_dict(cls, data: dict[str, Any]) -> ClusteringResult:
|
|
505
|
+
"""Load clustering from a dictionary.
|
|
479
506
|
|
|
480
507
|
Args:
|
|
481
|
-
|
|
508
|
+
data: Dict as returned by :meth:`to_dict`.
|
|
482
509
|
|
|
483
510
|
Returns:
|
|
484
511
|
The loaded ``ClusteringResult``.
|
|
485
512
|
"""
|
|
486
|
-
with Path(path).open() as f:
|
|
487
|
-
data = json.load(f)
|
|
488
|
-
|
|
489
513
|
clusterings: dict[tuple[Hashable, ...], tsam.ClusteringResult] = {}
|
|
490
514
|
for entry in data["clusterings"]:
|
|
491
515
|
key = tuple(entry["key"])
|
|
@@ -493,7 +517,7 @@ class ClusteringResult:
|
|
|
493
517
|
|
|
494
518
|
time_coords: pd.DatetimeIndex | None = None
|
|
495
519
|
if "time_coords" in data:
|
|
496
|
-
time_coords =
|
|
520
|
+
time_coords = _time_coords_from_dict(data["time_coords"])
|
|
497
521
|
|
|
498
522
|
return cls(
|
|
499
523
|
time_dim=data["time_dim"],
|
|
@@ -503,6 +527,19 @@ class ClusteringResult:
|
|
|
503
527
|
time_coords=time_coords,
|
|
504
528
|
)
|
|
505
529
|
|
|
530
|
+
@classmethod
|
|
531
|
+
def from_json(cls, path: str | Path) -> ClusteringResult:
|
|
532
|
+
"""Load clustering from JSON file.
|
|
533
|
+
|
|
534
|
+
Args:
|
|
535
|
+
path: Input file path.
|
|
536
|
+
|
|
537
|
+
Returns:
|
|
538
|
+
The loaded ``ClusteringResult``.
|
|
539
|
+
"""
|
|
540
|
+
with Path(path).open() as f:
|
|
541
|
+
return cls.from_dict(json.load(f))
|
|
542
|
+
|
|
506
543
|
|
|
507
544
|
ClusteringInfo = ClusteringResult
|
|
508
545
|
"""Backwards-compatible alias for :class:`ClusteringResult`."""
|
|
@@ -18,7 +18,7 @@ version_tuple: tuple[int | str, ...]
|
|
|
18
18
|
commit_id: str | None
|
|
19
19
|
__commit_id__: str | None
|
|
20
20
|
|
|
21
|
-
__version__ = version = '0.5.
|
|
22
|
-
__version_tuple__ = version_tuple = (0, 5,
|
|
21
|
+
__version__ = version = '0.5.2'
|
|
22
|
+
__version_tuple__ = version_tuple = (0, 5, 2)
|
|
23
23
|
|
|
24
24
|
__commit_id__ = commit_id = None
|
|
@@ -1353,7 +1353,10 @@ class TestClusteringDisaggregate:
|
|
|
1353
1353
|
with open(path) as f:
|
|
1354
1354
|
data = json.load(f)
|
|
1355
1355
|
assert "time_coords" in data
|
|
1356
|
-
|
|
1356
|
+
tc = data["time_coords"]
|
|
1357
|
+
# Regular index → compact dict format
|
|
1358
|
+
assert isinstance(tc, dict)
|
|
1359
|
+
assert tc["periods"] == da_flat.sizes["time"]
|
|
1357
1360
|
|
|
1358
1361
|
def test_time_coords_roundtrip(self, tmp_path):
|
|
1359
1362
|
"""time_coords survive JSON round-trip."""
|
|
@@ -436,6 +436,35 @@ class TestClusteringCentersAndSegments:
|
|
|
436
436
|
assert result.clustering.segment_centers is None
|
|
437
437
|
|
|
438
438
|
|
|
439
|
+
class TestClusteringDictRoundtrip:
|
|
440
|
+
"""to_dict/from_dict preserves clustering."""
|
|
441
|
+
|
|
442
|
+
def test_dict_roundtrip_assignments(self, agg_case: AggregateCase):
|
|
443
|
+
result = _aggregate(agg_case)
|
|
444
|
+
d = result.clustering.to_dict()
|
|
445
|
+
loaded = tsam_xarray.ClusteringResult.from_dict(d)
|
|
446
|
+
np.testing.assert_array_equal(
|
|
447
|
+
result.clustering.cluster_assignments.values,
|
|
448
|
+
loaded.cluster_assignments.values,
|
|
449
|
+
)
|
|
450
|
+
|
|
451
|
+
def test_dict_roundtrip_occurrences(self, agg_case: AggregateCase):
|
|
452
|
+
result = _aggregate(agg_case)
|
|
453
|
+
d = result.clustering.to_dict()
|
|
454
|
+
loaded = tsam_xarray.ClusteringResult.from_dict(d)
|
|
455
|
+
np.testing.assert_array_equal(
|
|
456
|
+
result.clustering.cluster_occurrences.values,
|
|
457
|
+
loaded.cluster_occurrences.values,
|
|
458
|
+
)
|
|
459
|
+
|
|
460
|
+
def test_dict_roundtrip_disaggregate(self, agg_case: AggregateCase):
|
|
461
|
+
result = _aggregate(agg_case)
|
|
462
|
+
d = result.clustering.to_dict()
|
|
463
|
+
loaded = tsam_xarray.ClusteringResult.from_dict(d)
|
|
464
|
+
dis = loaded.disaggregate(result.cluster_representatives)
|
|
465
|
+
np.testing.assert_allclose(dis.values, result.reconstructed.values, rtol=1e-10)
|
|
466
|
+
|
|
467
|
+
|
|
439
468
|
class TestClusteringIORoundtrip:
|
|
440
469
|
"""save/load/apply preserves results."""
|
|
441
470
|
|
|
@@ -476,3 +505,61 @@ class TestClusteringIORoundtrip:
|
|
|
476
505
|
clustering = tsam_xarray.load_clustering(str(path))
|
|
477
506
|
new_result = clustering.apply(agg_case.da)
|
|
478
507
|
assert new_result.is_transferred
|
|
508
|
+
|
|
509
|
+
|
|
510
|
+
class TestTimeCoordsSerialization:
|
|
511
|
+
"""Compact time_coords serialization helpers."""
|
|
512
|
+
|
|
513
|
+
def test_regular_index_compact(self):
|
|
514
|
+
"""Regular DatetimeIndex is stored as {start, periods, freq}."""
|
|
515
|
+
import pandas as pd
|
|
516
|
+
|
|
517
|
+
from tsam_xarray._clustering import _time_coords_to_dict
|
|
518
|
+
|
|
519
|
+
tc = pd.date_range("2025-01-01", periods=8760, freq="h")
|
|
520
|
+
d = _time_coords_to_dict(tc)
|
|
521
|
+
assert isinstance(d, dict)
|
|
522
|
+
assert set(d) == {"start", "periods", "freq"}
|
|
523
|
+
assert d["periods"] == 8760
|
|
524
|
+
|
|
525
|
+
def test_regular_index_roundtrip(self):
|
|
526
|
+
"""Compact format round-trips exactly."""
|
|
527
|
+
import pandas as pd
|
|
528
|
+
|
|
529
|
+
from tsam_xarray._clustering import (
|
|
530
|
+
_time_coords_from_dict,
|
|
531
|
+
_time_coords_to_dict,
|
|
532
|
+
)
|
|
533
|
+
|
|
534
|
+
tc = pd.date_range("2025-01-01", periods=8760, freq="h")
|
|
535
|
+
restored = _time_coords_from_dict(_time_coords_to_dict(tc))
|
|
536
|
+
pd.testing.assert_index_equal(tc, restored)
|
|
537
|
+
|
|
538
|
+
def test_irregular_index_fallback(self):
|
|
539
|
+
"""Irregular DatetimeIndex falls back to list of ISO strings."""
|
|
540
|
+
import pandas as pd
|
|
541
|
+
|
|
542
|
+
from tsam_xarray._clustering import (
|
|
543
|
+
_time_coords_from_dict,
|
|
544
|
+
_time_coords_to_dict,
|
|
545
|
+
)
|
|
546
|
+
|
|
547
|
+
tc = pd.DatetimeIndex(["2025-01-01", "2025-01-03", "2025-01-07"])
|
|
548
|
+
d = _time_coords_to_dict(tc)
|
|
549
|
+
assert isinstance(d, list)
|
|
550
|
+
restored = _time_coords_from_dict(d)
|
|
551
|
+
pd.testing.assert_index_equal(tc, restored)
|
|
552
|
+
|
|
553
|
+
def test_old_list_format_still_loads(self):
|
|
554
|
+
"""List format (pre-compact) is still accepted by from_dict."""
|
|
555
|
+
from tsam_xarray._clustering import _time_coords_from_dict
|
|
556
|
+
|
|
557
|
+
raw = ["2025-01-01T00:00:00", "2025-01-01T01:00:00"]
|
|
558
|
+
restored = _time_coords_from_dict(raw)
|
|
559
|
+
assert len(restored) == 2
|
|
560
|
+
|
|
561
|
+
def test_dict_roundtrip_uses_compact(self, agg_case: AggregateCase):
|
|
562
|
+
"""to_dict uses compact format for regular time indices."""
|
|
563
|
+
result = _aggregate(agg_case)
|
|
564
|
+
d = result.clustering.to_dict()
|
|
565
|
+
assert isinstance(d["time_coords"], dict)
|
tsam_xarray-0.5.0/PKG-INFO
DELETED
|
@@ -1,117 +0,0 @@
|
|
|
1
|
-
Metadata-Version: 2.4
|
|
2
|
-
Name: tsam_xarray
|
|
3
|
-
Version: 0.5.0
|
|
4
|
-
Summary: Lightweight xarray wrapper for tsam time series aggregation
|
|
5
|
-
License-Expression: MIT
|
|
6
|
-
License-File: LICENSE
|
|
7
|
-
Classifier: License :: OSI Approved :: MIT License
|
|
8
|
-
Classifier: Operating System :: OS Independent
|
|
9
|
-
Classifier: Programming Language :: Python :: 3
|
|
10
|
-
Classifier: Programming Language :: Python :: 3.11
|
|
11
|
-
Classifier: Programming Language :: Python :: 3.12
|
|
12
|
-
Classifier: Programming Language :: Python :: 3.13
|
|
13
|
-
Classifier: Programming Language :: Python :: 3.14
|
|
14
|
-
Requires-Python: >=3.11
|
|
15
|
-
Requires-Dist: bottleneck>=1.4
|
|
16
|
-
Requires-Dist: tsam>=3.3.0
|
|
17
|
-
Requires-Dist: xarray>=2024.1
|
|
18
|
-
Provides-Extra: plot
|
|
19
|
-
Requires-Dist: plotly>=5; extra == 'plot'
|
|
20
|
-
Description-Content-Type: text/markdown
|
|
21
|
-
|
|
22
|
-
# tsam_xarray
|
|
23
|
-
|
|
24
|
-
[](https://pypi.org/project/tsam-xarray/)
|
|
25
|
-
[](https://pypi.org/project/tsam-xarray/)
|
|
26
|
-
[](https://github.com/FBumann/tsam_xarray/actions/workflows/ci.yaml)
|
|
27
|
-
[](https://codecov.io/gh/FBumann/tsam_xarray)
|
|
28
|
-
[](LICENSE)
|
|
29
|
-
[](https://tsam-xarray.readthedocs.io/)
|
|
30
|
-
|
|
31
|
-
Lightweight [xarray](https://xarray.dev/) wrapper for [tsam](https://github.com/FZJ-IEK3-VSA/tsam) time series aggregation.
|
|
32
|
-
|
|
33
|
-
**DataArray in, DataArray out** — no manual DataFrame conversions, no MultiIndex wrangling, no loop-and-concat boilerplate.
|
|
34
|
-
|
|
35
|
-
## Installation
|
|
36
|
-
|
|
37
|
-
```bash
|
|
38
|
-
pip install tsam_xarray
|
|
39
|
-
```
|
|
40
|
-
|
|
41
|
-
## Quick start
|
|
42
|
-
|
|
43
|
-
```python
|
|
44
|
-
import numpy as np
|
|
45
|
-
import pandas as pd
|
|
46
|
-
import xarray as xr
|
|
47
|
-
import tsam_xarray
|
|
48
|
-
|
|
49
|
-
# Create sample data: 30 days of hourly solar and wind data
|
|
50
|
-
time = pd.date_range("2020-01-01", periods=30 * 24, freq="h")
|
|
51
|
-
da = xr.DataArray(
|
|
52
|
-
np.random.default_rng(42).random((len(time), 2)),
|
|
53
|
-
dims=["time", "variable"],
|
|
54
|
-
coords={"time": time, "variable": ["solar", "wind"]},
|
|
55
|
-
)
|
|
56
|
-
|
|
57
|
-
# Aggregate to 4 typical days
|
|
58
|
-
result = tsam_xarray.aggregate(
|
|
59
|
-
da, time_dim="time", cluster_dim="variable", n_clusters=4,
|
|
60
|
-
)
|
|
61
|
-
|
|
62
|
-
result.cluster_representatives # (cluster, timestep, variable)
|
|
63
|
-
result.cluster_weights # (cluster,) — days each cluster represents
|
|
64
|
-
result.accuracy.rmse # (variable,) — per-variable RMSE
|
|
65
|
-
result.reconstructed # same shape as input
|
|
66
|
-
```
|
|
67
|
-
|
|
68
|
-
## Multi-dimensional data
|
|
69
|
-
|
|
70
|
-
```python
|
|
71
|
-
# Cluster variable x region together; scenario is sliced independently
|
|
72
|
-
result = tsam_xarray.aggregate(
|
|
73
|
-
da,
|
|
74
|
-
time_dim="time",
|
|
75
|
-
cluster_dim=["variable", "region"],
|
|
76
|
-
n_clusters=8,
|
|
77
|
-
)
|
|
78
|
-
|
|
79
|
-
result.cluster_representatives # (scenario, cluster, timestep, variable, region)
|
|
80
|
-
```
|
|
81
|
-
|
|
82
|
-
## Weights
|
|
83
|
-
|
|
84
|
-
```python
|
|
85
|
-
# Single cluster_dim — simple dict
|
|
86
|
-
result = tsam_xarray.aggregate(
|
|
87
|
-
da, time_dim="time", cluster_dim="variable", n_clusters=8,
|
|
88
|
-
weights={"solar": 2.0, "wind": 1.0},
|
|
89
|
-
)
|
|
90
|
-
|
|
91
|
-
# Multiple cluster_dim — dict-of-dicts
|
|
92
|
-
result = tsam_xarray.aggregate(
|
|
93
|
-
da, time_dim="time", cluster_dim=["variable", "region"], n_clusters=8,
|
|
94
|
-
weights={"variable": {"solar": 2.0}, "region": {"north": 1.5}},
|
|
95
|
-
)
|
|
96
|
-
```
|
|
97
|
-
|
|
98
|
-
## tsam passthrough
|
|
99
|
-
|
|
100
|
-
All [tsam.aggregate()](https://github.com/FZJ-IEK3-VSA/tsam) keyword arguments pass through:
|
|
101
|
-
|
|
102
|
-
```python
|
|
103
|
-
from tsam import ClusterConfig, SegmentConfig
|
|
104
|
-
|
|
105
|
-
result = tsam_xarray.aggregate(
|
|
106
|
-
da,
|
|
107
|
-
time_dim="time",
|
|
108
|
-
cluster_dim="variable",
|
|
109
|
-
n_clusters=8,
|
|
110
|
-
cluster=ClusterConfig(method="kmeans"),
|
|
111
|
-
segments=SegmentConfig(n_segments=6),
|
|
112
|
-
)
|
|
113
|
-
```
|
|
114
|
-
|
|
115
|
-
## Documentation
|
|
116
|
-
|
|
117
|
-
Full docs with interactive examples: [tsam-xarray.readthedocs.io](https://tsam-xarray.readthedocs.io/)
|
tsam_xarray-0.5.0/README.md
DELETED
|
@@ -1,96 +0,0 @@
|
|
|
1
|
-
# tsam_xarray
|
|
2
|
-
|
|
3
|
-
[](https://pypi.org/project/tsam-xarray/)
|
|
4
|
-
[](https://pypi.org/project/tsam-xarray/)
|
|
5
|
-
[](https://github.com/FBumann/tsam_xarray/actions/workflows/ci.yaml)
|
|
6
|
-
[](https://codecov.io/gh/FBumann/tsam_xarray)
|
|
7
|
-
[](LICENSE)
|
|
8
|
-
[](https://tsam-xarray.readthedocs.io/)
|
|
9
|
-
|
|
10
|
-
Lightweight [xarray](https://xarray.dev/) wrapper for [tsam](https://github.com/FZJ-IEK3-VSA/tsam) time series aggregation.
|
|
11
|
-
|
|
12
|
-
**DataArray in, DataArray out** — no manual DataFrame conversions, no MultiIndex wrangling, no loop-and-concat boilerplate.
|
|
13
|
-
|
|
14
|
-
## Installation
|
|
15
|
-
|
|
16
|
-
```bash
|
|
17
|
-
pip install tsam_xarray
|
|
18
|
-
```
|
|
19
|
-
|
|
20
|
-
## Quick start
|
|
21
|
-
|
|
22
|
-
```python
|
|
23
|
-
import numpy as np
|
|
24
|
-
import pandas as pd
|
|
25
|
-
import xarray as xr
|
|
26
|
-
import tsam_xarray
|
|
27
|
-
|
|
28
|
-
# Create sample data: 30 days of hourly solar and wind data
|
|
29
|
-
time = pd.date_range("2020-01-01", periods=30 * 24, freq="h")
|
|
30
|
-
da = xr.DataArray(
|
|
31
|
-
np.random.default_rng(42).random((len(time), 2)),
|
|
32
|
-
dims=["time", "variable"],
|
|
33
|
-
coords={"time": time, "variable": ["solar", "wind"]},
|
|
34
|
-
)
|
|
35
|
-
|
|
36
|
-
# Aggregate to 4 typical days
|
|
37
|
-
result = tsam_xarray.aggregate(
|
|
38
|
-
da, time_dim="time", cluster_dim="variable", n_clusters=4,
|
|
39
|
-
)
|
|
40
|
-
|
|
41
|
-
result.cluster_representatives # (cluster, timestep, variable)
|
|
42
|
-
result.cluster_weights # (cluster,) — days each cluster represents
|
|
43
|
-
result.accuracy.rmse # (variable,) — per-variable RMSE
|
|
44
|
-
result.reconstructed # same shape as input
|
|
45
|
-
```
|
|
46
|
-
|
|
47
|
-
## Multi-dimensional data
|
|
48
|
-
|
|
49
|
-
```python
|
|
50
|
-
# Cluster variable x region together; scenario is sliced independently
|
|
51
|
-
result = tsam_xarray.aggregate(
|
|
52
|
-
da,
|
|
53
|
-
time_dim="time",
|
|
54
|
-
cluster_dim=["variable", "region"],
|
|
55
|
-
n_clusters=8,
|
|
56
|
-
)
|
|
57
|
-
|
|
58
|
-
result.cluster_representatives # (scenario, cluster, timestep, variable, region)
|
|
59
|
-
```
|
|
60
|
-
|
|
61
|
-
## Weights
|
|
62
|
-
|
|
63
|
-
```python
|
|
64
|
-
# Single cluster_dim — simple dict
|
|
65
|
-
result = tsam_xarray.aggregate(
|
|
66
|
-
da, time_dim="time", cluster_dim="variable", n_clusters=8,
|
|
67
|
-
weights={"solar": 2.0, "wind": 1.0},
|
|
68
|
-
)
|
|
69
|
-
|
|
70
|
-
# Multiple cluster_dim — dict-of-dicts
|
|
71
|
-
result = tsam_xarray.aggregate(
|
|
72
|
-
da, time_dim="time", cluster_dim=["variable", "region"], n_clusters=8,
|
|
73
|
-
weights={"variable": {"solar": 2.0}, "region": {"north": 1.5}},
|
|
74
|
-
)
|
|
75
|
-
```
|
|
76
|
-
|
|
77
|
-
## tsam passthrough
|
|
78
|
-
|
|
79
|
-
All [tsam.aggregate()](https://github.com/FZJ-IEK3-VSA/tsam) keyword arguments pass through:
|
|
80
|
-
|
|
81
|
-
```python
|
|
82
|
-
from tsam import ClusterConfig, SegmentConfig
|
|
83
|
-
|
|
84
|
-
result = tsam_xarray.aggregate(
|
|
85
|
-
da,
|
|
86
|
-
time_dim="time",
|
|
87
|
-
cluster_dim="variable",
|
|
88
|
-
n_clusters=8,
|
|
89
|
-
cluster=ClusterConfig(method="kmeans"),
|
|
90
|
-
segments=SegmentConfig(n_segments=6),
|
|
91
|
-
)
|
|
92
|
-
```
|
|
93
|
-
|
|
94
|
-
## Documentation
|
|
95
|
-
|
|
96
|
-
Full docs with interactive examples: [tsam-xarray.readthedocs.io](https://tsam-xarray.readthedocs.io/)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|