PyPI - tsam-xarray - Versions diffs - 0.0.1a0__py3-none-any.whl - Mend

tsam-xarray 0.0.1a0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

tsam_xarray/__init__.py +6 -0
tsam_xarray/_core.py +457 -0
tsam_xarray/_result.py +226 -0
tsam_xarray/_sample_data.py +107 -0
tsam_xarray/_version.py +34 -0
tsam_xarray-0.0.1a0.dist-info/METADATA +79 -0
tsam_xarray-0.0.1a0.dist-info/RECORD +8 -0
tsam_xarray-0.0.1a0.dist-info/WHEEL +4 -0

tsam_xarray/__init__.py ADDED Viewed

@@ -0,0 +1,6 @@
+"""tsam_xarray: Lightweight xarray wrapper for tsam time series aggregation."""
+from tsam_xarray._core import aggregate
+from tsam_xarray._result import AccuracyMetrics, AggregationResult
+__all__ = ["AccuracyMetrics", "AggregationResult", "aggregate"]

tsam_xarray/_core.py ADDED Viewed

@@ -0,0 +1,457 @@
+"""Core aggregation logic for tsam_xarray."""
+from __future__ import annotations
+import itertools
+from collections.abc import Hashable, Sequence
+from typing import Any
+import numpy as np
+import pandas as pd
+import tsam
+import xarray as xr
+from tsam_xarray._result import AccuracyMetrics, AggregationResult
+Weights = dict[str, float] | dict[str, dict[str, float]] | None
+def aggregate(
+    da: xr.DataArray,
+    *,
+    time_dim: str,
+    cluster_dim: Sequence[str] | str,
+    n_clusters: int,
+    weights: Weights = None,
+    **tsam_kwargs: Any,
+) -> AggregationResult:
+    """Aggregate an xarray DataArray using tsam.
+    Parameters
+    ----------
+    da : xr.DataArray
+        Input data with a time dimension and optional extra dimensions.
+    time_dim : str
+        Name of the time dimension.
+    cluster_dim : Sequence[str] | str
+        Dimension(s) to cluster together. Multiple dims are stacked
+        internally into a MultiIndex and unstacked in results.
+        All remaining dims are sliced independently.
+    n_clusters : int
+        Number of typical periods.
+    weights : dict[str, float] | dict[str, dict[str, float]] | None
+        Per-coordinate weights for clustering. Missing entries default
+        to 1.0. Two formats:
+        - **Simple dict** (single ``cluster_dim``)::
+              weights={"solar": 2.0, "wind": 1.0}
+        - **Dict-of-dicts** (multiple ``cluster_dim``)::
+              weights={"variable": {"solar": 2.0}, "region": {"north": 1.5}}
+          Weights are multiplied across dimensions, e.g. ``("solar", "north")``
+          gets weight ``2.0 * 1.5 = 3.0``.
+    **tsam_kwargs
+        Additional keyword arguments passed to ``tsam.aggregate()``.
+    """
+    _validate_time_dim(da, time_dim)
+    col_dims = _resolve_cluster_dim(cluster_dim)
+    slice_dims = _infer_slice_dims(da, time_dim, col_dims)
+    _validate(da, time_dim, col_dims, slice_dims)
+    _validate_no_cluster_config_weights(tsam_kwargs)
+    per_dim_weights = _normalize_weights(weights, da, col_dims)
+    if not slice_dims:
+        return _aggregate_single(
+            da, n_clusters, time_dim, col_dims, per_dim_weights, tsam_kwargs
+        )
+    slice_coords = {d: da.coords[d].values for d in slice_dims}
+    slice_keys = list(itertools.product(*(slice_coords[d] for d in slice_dims)))
+    results: list[AggregationResult] = []
+    raw_map: dict[tuple[Hashable, ...], Any] = {}
+    for key in slice_keys:
+        sel = dict(zip(slice_dims, key, strict=True))
+        da_slice = da.sel(sel)
+        r = _aggregate_single(
+            da_slice, n_clusters, time_dim, col_dims, per_dim_weights, tsam_kwargs
+        )
+        results.append(r)
+        raw_map[key] = r.raw
+    return _concat_results(results, slice_dims, slice_coords, raw_map)
+def _resolve_cluster_dim(
+    cluster_dim: Sequence[str] | str,
+) -> list[str]:
+    """Resolve cluster_dim to a list of dimension names."""
+    if isinstance(cluster_dim, str):
+        return [cluster_dim]
+    return list(cluster_dim)
+def _infer_slice_dims(
+    da: xr.DataArray,
+    time_dim: str,
+    col_dims: list[str],
+) -> list[str]:
+    """Infer slice dims: everything not time_dim or column dims."""
+    exclude = {time_dim, *col_dims}
+    return [str(d) for d in da.dims if d not in exclude]
+def _validate_time_dim(da: xr.DataArray, time_dim: str) -> None:
+    if time_dim not in da.dims:
+        msg = f"time_dim {time_dim!r} not in DataArray dims {set(da.dims)}"
+        raise ValueError(msg)
+def _validate_no_cluster_config_weights(
+    tsam_kwargs: dict[str, Any],
+) -> None:
+    """Reject deprecated weights in ClusterConfig."""
+    cluster_config = tsam_kwargs.get("cluster")
+    if cluster_config is not None and cluster_config.weights is not None:
+        msg = (
+            "ClusterConfig.weights is deprecated in tsam and not "
+            "supported by tsam_xarray. Use the top-level 'weights' "
+            "parameter of aggregate() instead."
+        )
+        raise ValueError(msg)
+def _validate(
+    da: xr.DataArray,
+    time_dim: str,
+    col_dims: list[str],
+    slice_dims: list[str],
+) -> None:
+    dims = set(da.dims)
+    for d in col_dims:
+        if d not in dims:
+            msg = f"cluster_dim entry {d!r} not in DataArray dims {dims}"
+            raise ValueError(msg)
+        if d == time_dim:
+            msg = "cluster_dim and time_dim must not overlap"
+            raise ValueError(msg)
+def _to_dataframe(
+    da: xr.DataArray,
+    time_dim: str,
+    col_dims: list[str],
+) -> pd.DataFrame:
+    """Convert DataArray to DataFrame for tsam."""
+    if not col_dims:
+        s = da.to_pandas()
+        if isinstance(s, pd.Series):
+            name = da.name or "value"
+            return s.to_frame(name=str(name))
+        return pd.DataFrame(s)
+    if len(col_dims) > 1:
+        da = da.stack(_column=col_dims)
+        col_dim = "_column"
+    else:
+        col_dim = col_dims[0]
+    da_t = da.transpose(time_dim, col_dim)
+    return pd.DataFrame(da_t.to_pandas())
+def _representatives_to_da(
+    df: pd.DataFrame,
+    col_dims: list[str],
+) -> xr.DataArray:
+    """Convert cluster_representatives DataFrame to DataArray."""
+    df = df.copy()
+    # With segmentation, index has 3 levels: (cluster, segment_step, segment_duration)
+    # Without: 2 levels: (cluster, timestep)
+    if df.index.nlevels == 3:
+        df.index = df.index.droplevel(2)  # drop segment_duration
+    df.index.names = ["cluster", "timestep"]
+    if not col_dims:
+        clusters = df.index.get_level_values(0).unique()
+        timesteps = df.index.get_level_values(1).unique()
+        values = df.values.squeeze(axis=1).reshape(len(clusters), len(timesteps))
+        return xr.DataArray(
+            values,
+            dims=["cluster", "timestep"],
+            coords={"cluster": clusters, "timestep": timesteps},
+        )
+    stacked = df.stack(df.columns.names, future_stack=True)
+    da: xr.DataArray = stacked.to_xarray()  # type: ignore[assignment]
+    return da
+def _segment_durations_to_da(
+    raw_durations: tuple[tuple[int, ...], ...] | None,
+) -> xr.DataArray | None:
+    """Convert tsam segment_durations to DataArray."""
+    if raw_durations is None:
+        return None
+    data = np.array(raw_durations)  # (n_clusters, n_segments)
+    return xr.DataArray(
+        data,
+        dims=["cluster", "timestep"],
+        coords={
+            "cluster": np.arange(data.shape[0]),
+            "timestep": np.arange(data.shape[1]),
+        },
+    )
+def _reconstructed_to_da(
+    df: pd.DataFrame,
+    time_dim: str,
+    col_dims: list[str],
+) -> xr.DataArray:
+    """Convert reconstructed DataFrame to DataArray."""
+    df = df.copy()
+    df.index.name = time_dim
+    if not col_dims:
+        return xr.DataArray(
+            df.values.squeeze(axis=1),
+            dims=[time_dim],
+            coords={time_dim: df.index},
+        )
+    stacked = df.stack(df.columns.names, future_stack=True)
+    da: xr.DataArray = stacked.to_xarray()  # type: ignore[assignment]
+    return da
+def _metric_to_da(
+    series: pd.Series[float],
+    col_dims: list[str],
+    column_names: list[str] | None = None,
+) -> xr.DataArray:
+    """Convert an accuracy metric Series to DataArray."""
+    if not col_dims:
+        return xr.DataArray(float(series.iloc[0]))
+    series = series.copy()
+    if isinstance(series.index, pd.MultiIndex):
+        if column_names is not None:
+            series.index = series.index.set_names(column_names)
+    elif series.index.name is None:
+        series.index.name = col_dims[0]
+    return xr.DataArray(series.to_xarray())
+def _normalize_weights(
+    weights: dict[str, float] | dict[str, dict[str, float]] | None,
+    da: xr.DataArray,
+    col_dims: list[str],
+) -> dict[str, dict[str, float]] | None:
+    """Normalize weights to dict-of-dicts and validate dims/coords."""
+    if weights is None or not weights:
+        return None
+    first_val = next(iter(weights.values()))
+    if isinstance(first_val, dict):
+        # Dict-of-dicts — validate all values are dicts
+        for _key, val in weights.items():
+            if not isinstance(val, dict):
+                msg = (
+                    "Mixed weights format: all values must be dicts. "
+                    'Use {"dim": {"coord": weight}} for all entries.'
+                )
+                raise ValueError(msg)
+        per_dim_weights: dict[str, dict[str, float]] = weights  # type: ignore[assignment]
+    else:
+        # Simple dict — requires single cluster_dim
+        if len(col_dims) != 1:
+            msg = (
+                "Simple dict weights require a single cluster_dim. "
+                "For multiple cluster_dim, use dict-of-dicts: "
+                '{"dim_name": {"coord": weight}}.'
+            )
+            raise ValueError(msg)
+        per_dim_weights = {col_dims[0]: weights}  # type: ignore[dict-item]
+    # Validate dim names exist in cluster_dim
+    extra_dims = set(per_dim_weights.keys()) - set(col_dims)
+    if extra_dims:
+        msg = (
+            f"weights has unknown dims {extra_dims}, "
+            f"must be subset of cluster_dim {col_dims}"
+        )
+        raise ValueError(msg)
+    # Validate coord values exist in the DataArray
+    for dim_name, coord_weights in per_dim_weights.items():
+        valid_coords = set(str(c) for c in da.coords[dim_name].values)
+        unknown = set(coord_weights.keys()) - valid_coords
+        if unknown:
+            msg = (
+                f"weights has unknown coords {unknown} for dim {dim_name!r}, "
+                f"valid coords: {sorted(valid_coords)}"
+            )
+            raise ValueError(msg)
+    return per_dim_weights
+def _translate_weights(
+    weights: dict[str, dict[str, float]],
+    df: pd.DataFrame,
+    col_dims: list[str],
+) -> dict[Hashable, float]:
+    """Translate per-dim weights to flat column weights for tsam."""
+    flat: dict[Hashable, float] = {}
+    for col in df.columns:
+        w = 1.0
+        if isinstance(col, tuple):
+            for dim_name, coord_val in zip(col_dims, col, strict=True):
+                if dim_name in weights:
+                    w *= weights[dim_name].get(str(coord_val), 1.0)
+        else:
+            dim_name = col_dims[0]
+            if dim_name in weights:
+                w *= weights[dim_name].get(str(col), 1.0)
+        flat[col] = w
+    return flat
+def _aggregate_single(
+    da: xr.DataArray,
+    n_clusters: int,
+    time_dim: str,
+    col_dims: list[str],
+    weights: dict[str, dict[str, float]] | None,
+    tsam_kwargs: dict[str, Any],
+) -> AggregationResult:
+    """Run a single tsam aggregation on a DataArray."""
+    df = _to_dataframe(da, time_dim, col_dims)
+    tsam_weights: dict[Hashable, float] | None = None
+    if weights is not None:
+        tsam_weights = _translate_weights(weights, df, col_dims)
+    tsam_result = tsam.aggregate(
+        df,
+        n_clusters,
+        weights=tsam_weights,  # type: ignore[arg-type]
+        **tsam_kwargs,
+    )
+    typical = _representatives_to_da(tsam_result.cluster_representatives, col_dims)
+    reconstructed = _reconstructed_to_da(tsam_result.reconstructed, time_dim, col_dims)
+    cw = tsam_result.cluster_weights
+    cluster_ids = np.array(sorted(cw.keys()))
+    cluster_weights_da = xr.DataArray(
+        np.array([cw[k] for k in cluster_ids]),
+        dims=["cluster"],
+        coords={"cluster": cluster_ids},
+    )
+    assignments_da = xr.DataArray(tsam_result.cluster_assignments, dims=["period"])
+    col_names: list[str] | None = None
+    if isinstance(df.columns, pd.MultiIndex):
+        col_names = [str(n) for n in df.columns.names]
+    accuracy = AccuracyMetrics(
+        rmse=_metric_to_da(tsam_result.accuracy.rmse, col_dims, col_names),
+        mae=_metric_to_da(tsam_result.accuracy.mae, col_dims, col_names),
+        rmse_duration=_metric_to_da(
+            tsam_result.accuracy.rmse_duration, col_dims, col_names
+        ),
+    )
+    seg_durations = _segment_durations_to_da(tsam_result.segment_durations)
+    return AggregationResult(
+        typical_periods=typical,
+        cluster_assignments=assignments_da,
+        cluster_weights=cluster_weights_da,
+        segment_durations=seg_durations,
+        accuracy=accuracy,
+        reconstructed=reconstructed,
+        original=da,
+        raw=tsam_result,
+    )
+def _make_dim_index(
+    slice_coords: dict[str, Any],
+    dim: str,
+) -> pd.Index:
+    """Create a pd.Index for a slice dimension."""
+    return pd.Index(slice_coords[dim], name=dim)  # type: ignore[no-any-return]
+def _concat_along_dims(
+    arrays: list[xr.DataArray],
+    slice_dims: list[str],
+    slice_coords: dict[str, Any],
+) -> xr.DataArray:
+    """Concat arrays along one or more slice dims."""
+    if len(slice_dims) == 1:
+        return xr.concat(arrays, dim=_make_dim_index(slice_coords, slice_dims[0]))
+    it = iter(arrays)
+    def _nest(dims: list[str]) -> list[Any]:
+        if len(dims) == 1:
+            return [next(it) for _ in slice_coords[dims[0]]]
+        return [_nest(dims[1:]) for _ in slice_coords[dims[0]]]
+    nested: Any = _nest(slice_dims)
+    def _recursive_concat(node: Any, dims: list[str]) -> xr.DataArray:
+        dim = dims[0]
+        idx = _make_dim_index(slice_coords, dim)
+        if len(dims) == 1:
+            return xr.concat(node, dim=idx)  # type: ignore[no-any-return]
+        children = [_recursive_concat(child, dims[1:]) for child in node]
+        return xr.concat(children, dim=idx)
+    return _recursive_concat(nested, slice_dims)
+def _concat_results(
+    results: list[AggregationResult],
+    slice_dims: list[str],
+    slice_coords: dict[str, Any],
+    raw_map: dict[tuple[Hashable, ...], Any],
+) -> AggregationResult:
+    """Concatenate per-slice results along slice dims."""
+    def _field(field_name: str) -> xr.DataArray:
+        arrays = [getattr(r, field_name) for r in results]
+        return _concat_along_dims(arrays, slice_dims, slice_coords)
+    def _optional_field(field_name: str) -> xr.DataArray | None:
+        arrays = [getattr(r, field_name) for r in results]
+        if arrays[0] is None:
+            return None
+        return _concat_along_dims(arrays, slice_dims, slice_coords)
+    def _acc_field(field_name: str) -> xr.DataArray:
+        arrays = [getattr(r.accuracy, field_name) for r in results]
+        return _concat_along_dims(arrays, slice_dims, slice_coords)
+    return AggregationResult(
+        typical_periods=_field("typical_periods"),
+        cluster_assignments=_field("cluster_assignments"),
+        cluster_weights=_field("cluster_weights"),
+        segment_durations=_optional_field("segment_durations"),
+        accuracy=AccuracyMetrics(
+            rmse=_acc_field("rmse"),
+            mae=_acc_field("mae"),
+            rmse_duration=_acc_field("rmse_duration"),
+        ),
+        reconstructed=_field("reconstructed"),
+        original=_field("original"),
+        raw=raw_map,
+    )

tsam_xarray/_result.py ADDED Viewed

@@ -0,0 +1,226 @@
+"""Result dataclasses for tsam_xarray."""
+from __future__ import annotations
+from dataclasses import dataclass
+from typing import Any
+import numpy as np
+import xarray as xr
+@dataclass(frozen=True)
+class AccuracyMetrics:
+    """Accuracy metrics from time series aggregation."""
+    rmse: xr.DataArray
+    mae: xr.DataArray
+    rmse_duration: xr.DataArray
+@dataclass(frozen=True)
+class AggregationResult:
+    """Result of tsam_xarray.aggregate()."""
+    typical_periods: xr.DataArray
+    cluster_assignments: xr.DataArray
+    cluster_weights: xr.DataArray
+    segment_durations: xr.DataArray | None
+    accuracy: AccuracyMetrics
+    reconstructed: xr.DataArray
+    original: xr.DataArray
+    raw: Any  # tsam.AggregationResult or dict of them
+    @property
+    def n_clusters(self) -> int:
+        """Number of typical period clusters."""
+        return int(self.cluster_weights.sizes["cluster"])
+    @property
+    def n_timesteps_per_period(self) -> int:
+        """Number of timesteps per typical period."""
+        return int(self.typical_periods.sizes["timestep"])
+    @property
+    def n_segments(self) -> int | None:
+        """Number of segments per period, if segmentation was used."""
+        if isinstance(self.raw, dict):
+            first = next(iter(self.raw.values()))
+            result: int | None = first.n_segments
+        else:
+            result = self.raw.n_segments
+        return result
+    @property
+    def clustering_duration(self) -> float:
+        """Time spent on clustering in seconds."""
+        if isinstance(self.raw, dict):
+            total: float = sum(r.clustering_duration for r in self.raw.values())
+            return total
+        duration: float = self.raw.clustering_duration
+        return duration
+    @property
+    def is_transferred(self) -> bool:
+        """Whether result was created via ClusteringResult.apply()."""
+        if isinstance(self.raw, dict):
+            return all(r.is_transferred for r in self.raw.values())
+        is_transferred: bool = self.raw.is_transferred
+        return is_transferred
+    @property
+    def residuals(self) -> xr.DataArray:
+        """Difference between original and reconstructed data."""
+        return self.original - self.reconstructed
+    def disaggregate(self, data: xr.DataArray) -> xr.DataArray:
+        """Map data on ``(cluster, timestep)`` back to original time.
+        This is the inverse of ``aggregate()``. Use it to expand
+        external data computed on the compact typical-period grid
+        (e.g., optimization results) back to the full time axis.
+        Without segmentation, values are repeated for each timestep
+        in the period. With segmentation, values are placed at segment
+        boundaries and remaining timesteps are NaN — use
+        ``.ffill(dim="time")``, ``.interpolate_na(dim="time")``, etc.
+        Parameters
+        ----------
+        data : xr.DataArray
+            Data with ``cluster`` and ``timestep`` dims, matching the
+            shape of ``result.typical_periods``. Additional dims
+            (including auto-sliced dims like scenario) are supported.
+        Returns
+        -------
+        xr.DataArray
+            Data with ``cluster`` and ``timestep`` replaced by the
+            original ``time`` dimension.
+        """
+        # Identify slice dims (dims on data that aren't cluster/timestep
+        # and aren't cluster_dim coords)
+        slice_dims = [
+            str(d)
+            for d in data.dims
+            if d not in ("cluster", "timestep") and d in self.cluster_assignments.dims
+        ]
+        if not slice_dims:
+            return self._disaggregate_single(data)
+        # Loop over slice dims and concat
+        import itertools
+        slice_coords = {d: data.coords[d].values for d in slice_dims}
+        keys = list(itertools.product(*(slice_coords[d] for d in slice_dims)))
+        results = []
+        for key in keys:
+            sel = dict(zip(slice_dims, key, strict=True))
+            data_slice = data.sel(sel)
+            # Use per-slice raw result for assignments/durations
+            result_slice = self._make_slice_view(sel)
+            results.append(result_slice._disaggregate_single(data_slice))
+        # Concat along slice dims
+        out = results[0]
+        if len(slice_dims) == 1:
+            import pandas as pd
+            out = xr.concat(
+                results,
+                dim=pd.Index(
+                    slice_coords[slice_dims[0]],
+                    name=slice_dims[0],
+                ),
+            )
+        else:
+            import pandas as pd
+            # Multi-dim concat
+            it = iter(results)
+            def _nest(dims: list[str]) -> list:  # type: ignore[type-arg]
+                if len(dims) == 1:
+                    return [next(it) for _ in slice_coords[dims[0]]]
+                return [_nest(dims[1:]) for _ in slice_coords[dims[0]]]
+            nested = _nest(slice_dims)
+            for dim in reversed(slice_dims):
+                idx = pd.Index(slice_coords[dim], name=dim)
+                if isinstance(nested[0], list):
+                    nested = [xr.concat(group, dim=idx) for group in nested]
+                else:
+                    out = xr.concat(nested, dim=idx)
+        return out
+    def _make_slice_view(self, sel: dict[str, object]) -> AggregationResult:
+        """Create a view of this result for a single slice."""
+        return AggregationResult(
+            typical_periods=self.typical_periods.sel(sel),
+            cluster_assignments=self.cluster_assignments.sel(sel),
+            cluster_weights=self.cluster_weights.sel(sel),
+            segment_durations=(
+                self.segment_durations.sel(sel)
+                if self.segment_durations is not None
+                else None
+            ),
+            accuracy=AccuracyMetrics(
+                rmse=self.accuracy.rmse.sel(sel),
+                mae=self.accuracy.mae.sel(sel),
+                rmse_duration=self.accuracy.rmse_duration.sel(sel),
+            ),
+            reconstructed=self.reconstructed.sel(sel),
+            original=self.original.sel(sel),
+            raw=(
+                self.raw[tuple(sel.values())]
+                if isinstance(self.raw, dict)
+                else self.raw
+            ),
+        )
+    def _disaggregate_single(self, data: xr.DataArray) -> xr.DataArray:
+        """Disaggregate without slice dims."""
+        time_coords = self.original.coords["time"]
+        assignments = self.cluster_assignments.values
+        n_original_timesteps = len(time_coords)
+        n_periods = len(assignments)
+        n_per_period = n_original_timesteps // n_periods
+        other_dims = [str(d) for d in data.dims if d not in ("cluster", "timestep")]
+        if self.segment_durations is None:
+            expanded = data.sel(cluster=xr.DataArray(assignments, dims=["period"]))
+            flat = expanded.values.reshape(-1, *expanded.shape[2:])
+            result = xr.DataArray(
+                flat[:n_original_timesteps],
+                dims=["time", *other_dims],
+                coords={"time": time_coords},
+            )
+            for d in other_dims:
+                if d in data.coords:
+                    result = result.assign_coords({d: data.coords[d]})
+            return result
+        other_shape = [data.sizes[d] for d in other_dims]
+        total_timesteps = n_periods * n_per_period
+        out = np.full([total_timesteps, *other_shape], np.nan)
+        for p_idx, cluster in enumerate(assignments):
+            offset = 0
+            durations = self.segment_durations.sel(cluster=int(cluster)).values
+            for seg_idx, dur in enumerate(durations):
+                t_start = p_idx * n_per_period + offset
+                vals = data.sel(cluster=int(cluster), timestep=seg_idx).values
+                out[t_start] = vals
+                offset += int(dur)
+        result = xr.DataArray(
+            out[:n_original_timesteps],
+            dims=["time", *other_dims],
+            coords={"time": time_coords},
+        )
+        for d in other_dims:
+            if d in data.coords:
+                result = result.assign_coords({d: data.coords[d]})
+        return result

tsam_xarray/_sample_data.py ADDED Viewed

@@ -0,0 +1,107 @@
+"""Synthetic sample data for documentation and testing."""
+from __future__ import annotations
+import numpy as np
+import pandas as pd
+import xarray as xr
+def sample_energy_data(
+    n_days: int = 30,
+    seed: int = 42,
+) -> xr.DataArray:
+    """Create a synthetic energy DataArray with realistic profiles.
+    Returns an hourly DataArray with dimensions:
+    - **time** — hourly timestamps
+    - **variable** — ``solar``, ``wind``, ``demand``
+    - **region** — ``north``, ``south``, ``east``
+    - **scenario** — ``low``, ``high``
+    Solar follows a daily bell curve, wind has seasonal variation
+    with autocorrelation, and demand combines a daily commute pattern
+    with weather-driven noise. Scenarios scale the base profiles.
+    Parameters
+    ----------
+    n_days : int
+        Number of days of hourly data (default: 30).
+    seed : int
+        Random seed for reproducibility (default: 42).
+    Returns
+    -------
+    xr.DataArray
+        Shape ``(n_days * 24, 3, 3, 2)`` with coords on every dim.
+    """
+    rng = np.random.default_rng(seed)
+    hours = n_days * 24
+    time = pd.date_range("2020-01-01", periods=hours, freq="h")
+    hour_of_day = np.arange(hours) % 24
+    day_of_year = time.dayofyear.values
+    variables = ["solar", "wind", "demand"]
+    regions = ["north", "south", "east"]
+    scenarios = ["low", "high"]
+    # --- base profiles (hours,) ---
+    # Solar: bell curve peaking at noon, zero at night
+    solar_base = np.maximum(0, np.sin(np.pi * (hour_of_day - 6) / 12)) ** 1.5
+    # Seasonal envelope: weaker in winter
+    solar_season = 0.6 + 0.4 * np.sin(2 * np.pi * (day_of_year - 80) / 365)
+    solar = solar_base * solar_season
+    # Wind: autocorrelated noise with seasonal mean
+    wind = np.empty(hours)
+    wind[0] = 0.5
+    for t in range(1, hours):
+        wind[t] = 0.9 * wind[t - 1] + 0.1 * rng.standard_normal()
+    wind = (wind - wind.min()) / (wind.max() - wind.min())
+    wind_season = 0.7 + 0.3 * np.cos(2 * np.pi * (day_of_year - 1) / 365)
+    wind = wind * wind_season
+    # Demand: daily pattern + seasonal + noise
+    demand_daily = 0.5 + 0.3 * np.sin(np.pi * (hour_of_day - 5) / 12)
+    demand_season = 1.0 + 0.2 * np.cos(2 * np.pi * (day_of_year - 1) / 365)
+    demand = demand_daily * demand_season + 0.05 * rng.standard_normal(hours)
+    demand = np.clip(demand, 0, None)
+    bases = np.stack([solar, wind, demand], axis=-1)  # (hours, 3)
+    # --- region modifiers ---
+    region_scales = np.array(
+        [
+            [0.7, 1.3, 1.1],  # north: less solar, more wind, slightly more demand
+            [1.3, 0.7, 0.9],  # south: more solar, less wind, less demand
+            [1.0, 1.0, 1.0],  # east: baseline
+        ]
+    )  # (3 regions, 3 variables)
+    # (hours, variables, regions)
+    data_3d = bases[:, :, np.newaxis] * region_scales.T[np.newaxis, :, :]
+    # --- scenario scaling ---
+    scenario_scales = np.array([0.8, 1.2])  # low, high
+    # (hours, variables, regions, scenarios)
+    data_4d = (
+        data_3d[:, :, :, np.newaxis]
+        * scenario_scales[np.newaxis, np.newaxis, np.newaxis, :]
+    )
+    # Add a small amount of noise per cell
+    data_4d += 0.02 * rng.standard_normal(data_4d.shape)
+    data_4d = np.clip(data_4d, 0, None)
+    return xr.DataArray(
+        data_4d,
+        dims=["time", "variable", "region", "scenario"],
+        coords={
+            "time": time,
+            "variable": variables,
+            "region": regions,
+            "scenario": scenarios,
+        },
+        name="energy",
+    )

tsam_xarray/_version.py ADDED Viewed

@@ -0,0 +1,34 @@
+# file generated by setuptools-scm
+# don't change, don't track in version control
+__all__ = [
+    "__version__",
+    "__version_tuple__",
+    "version",
+    "version_tuple",
+    "__commit_id__",
+    "commit_id",
+]
+TYPE_CHECKING = False
+if TYPE_CHECKING:
+    from typing import Tuple
+    from typing import Union
+    VERSION_TUPLE = Tuple[Union[int, str], ...]
+    COMMIT_ID = Union[str, None]
+else:
+    VERSION_TUPLE = object
+    COMMIT_ID = object
+version: str
+__version__: str
+__version_tuple__: VERSION_TUPLE
+version_tuple: VERSION_TUPLE
+commit_id: COMMIT_ID
+__commit_id__: COMMIT_ID
+__version__ = version = '0.0.1a0'
+__version_tuple__ = version_tuple = (0, 0, 1, 'a0')
+__commit_id__ = commit_id = None

tsam_xarray-0.0.1a0.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,79 @@
+Metadata-Version: 2.4
+Name: tsam_xarray
+Version: 0.0.1a0
+Summary: Lightweight xarray wrapper for tsam time series aggregation
+License-Expression: MIT
+Requires-Python: >=3.12
+Requires-Dist: bottleneck>=1.4
+Requires-Dist: tsam>=3.2.0
+Requires-Dist: xarray>=2024.1
+Description-Content-Type: text/markdown
+# tsam_xarray
+Lightweight [xarray](https://xarray.dev/) wrapper for [tsam](https://github.com/FZJ-IEK3-VSA/tsam) time series aggregation.
+## Installation
+```bash
+pip install tsam_xarray
+```
+## Quick start
+```python
+import numpy as np
+import pandas as pd
+import xarray as xr
+import tsam_xarray
+# Create sample data: 30 days of hourly solar and wind data
+time = pd.date_range("2020-01-01", periods=30 * 24, freq="h")
+da = xr.DataArray(
+    np.random.default_rng(42).random((len(time), 2)),
+    dims=["time", "variable"],
+    coords={"time": time, "variable": ["solar", "wind"]},
+)
+# Aggregate to 4 typical days
+result = tsam_xarray.aggregate(
+    da, time_dim="time", cluster_dim="variable", n_clusters=4,
+)
+result.typical_periods   # (cluster, timestep, variable)
+result.cluster_weights   # (cluster,) — days each represents
+result.accuracy.rmse     # (variable,) — per-variable RMSE
+result.reconstructed     # same shape as input
+```
+## Multi-dimensional data
+```python
+# 4D data: (time, variable, region, scenario)
+da = xr.DataArray(...)
+# Cluster variable × region together; scenario is sliced independently
+result = tsam_xarray.aggregate(
+    da,
+    time_dim="time",
+    cluster_dim=["variable", "region"],
+    n_clusters=8,
+)
+result.typical_periods  # (scenario, cluster, timestep, variable, region)
+```
+All [tsam.aggregate()](https://github.com/FZJ-IEK3-VSA/tsam) keyword arguments pass through:
+```python
+from tsam import ClusterConfig, SegmentConfig
+result = tsam_xarray.aggregate(
+    da,
+    time_dim="time",
+    cluster_dim="variable",
+    n_clusters=8,
+    cluster=ClusterConfig(method="kmeans"),
+    segments=SegmentConfig(n_segments=6),
+)
+```

tsam_xarray-0.0.1a0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,8 @@
+tsam_xarray/__init__.py,sha256=99SJnPmXonkPw_ZZSNSca0x1EVNZCs4465KOG4oQX64,253
+tsam_xarray/_core.py,sha256=ejIrJlJaVQDk1JgVT5SX_rmbOCqP2wZXxLVTEJVLQQ8,14855
+tsam_xarray/_result.py,sha256=44O5AomGDnvDROMqmZpVRlDBfrVi_jNdwDThJPBhJZc,8187
+tsam_xarray/_sample_data.py,sha256=yi2f5hPOUV3uUCX2kL4EfS3GV-obgQGnmidKFO5jSG4,3534
+tsam_xarray/_version.py,sha256=N6jqqryygxntTpQZELt2H0LAGZ4wKgVPTWGAhPzx98U,712
+tsam_xarray-0.0.1a0.dist-info/METADATA,sha256=PSXRFH12FrJSQfaSymU7PMzUFGMtOW1GepVKSPK9cDc,1980
+tsam_xarray-0.0.1a0.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
+tsam_xarray-0.0.1a0.dist-info/RECORD,,

tsam_xarray-0.0.1a0.dist-info/WHEEL ADDED Viewed

@@ -0,0 +1,4 @@
+Wheel-Version: 1.0
+Generator: hatchling 1.29.0
+Root-Is-Purelib: true
+Tag: py3-none-any