PyPI - tsam - Versions diffs - 3.0.0__tar.gz → 3.1.0__tar.gz - Mend

tsam 3.0.0tar.gz → 3.1.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (57) hide show

{tsam-3.0.0/src/tsam.egg-info → tsam-3.1.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: tsam
-Version: 3.0.0
+Version: 3.1.0
 Summary: Time series aggregation module (tsam) to create typical periods
 Author-email: Leander Kotzur <leander.kotzur@googlemail.com>, Maximilian Hoffmann <maximilian.hoffmann@julumni.fz-juelich.de>
 Maintainer-email: Julian Belina <j.belina@fz-juelich.de>
@@ -49,14 +49,8 @@ Requires-Dist: pandas<=3.0.0,>=2.2.0
 Requires-Dist: numpy<=2.4.1,>=1.22.4
 Requires-Dist: pyomo<=6.95,>=6.4.8
 Requires-Dist: networkx<=3.6.1,>=2.5
-Requires-Dist: tqdm<=4.67.1,>=4.21.0
+Requires-Dist: tqdm<=4.67.2,>=4.21.0
 Requires-Dist: highspy<=1.12.0,>=1.7.2
-Provides-Extra: plot
-Requires-Dist: plotly>=5.0.0; extra == "plot"
-Provides-Extra: notebooks
-Requires-Dist: notebook>=7.5.0; extra == "notebooks"
-Requires-Dist: plotly>=5.0.0; extra == "notebooks"
-Requires-Dist: matplotlib; extra == "notebooks"
 Provides-Extra: develop
 Requires-Dist: pytest; extra == "develop"
 Requires-Dist: pytest-cov; extra == "develop"
@@ -65,6 +59,7 @@ Requires-Dist: codecov; extra == "develop"
 Requires-Dist: sphinx; extra == "develop"
 Requires-Dist: sphinx-autobuild; extra == "develop"
 Requires-Dist: sphinx_book_theme; extra == "develop"
+Requires-Dist: nbsphinx; extra == "develop"
 Requires-Dist: twine; extra == "develop"
 Requires-Dist: nbval; extra == "develop"
 Requires-Dist: ruff; extra == "develop"
@@ -73,7 +68,11 @@ Requires-Dist: pandas-stubs; extra == "develop"
 Requires-Dist: pre-commit; extra == "develop"
 Requires-Dist: plotly>=5.0.0; extra == "develop"
 Requires-Dist: notebook>=7.5.0; extra == "develop"
-Requires-Dist: matplotlib; extra == "develop"
+Provides-Extra: plot
+Requires-Dist: plotly>=5.0.0; extra == "plot"
+Provides-Extra: notebooks
+Requires-Dist: notebook>=7.5.0; extra == "notebooks"
+Requires-Dist: plotly>=5.0.0; extra == "notebooks"
 Dynamic: license-file
 [![Version](https://img.shields.io/pypi/v/tsam.svg)](https://pypi.python.org/pypi/tsam) [![Conda Version](https://img.shields.io/conda/vn/conda-forge/tsam.svg)](https://anaconda.org/conda-forge/tsam) [![Documentation Status](https://readthedocs.org/projects/tsam/badge/?version=latest)](https://tsam.readthedocs.io/en/latest/) [![PyPI - License](https://img.shields.io/pypi/l/tsam)]((https://github.com/FZJ-IEK3-VSA/tsam/blob/master/LICENSE.txt)) [![codecov](https://codecov.io/gh/FZJ-IEK3-VSA/tsam/branch/master/graph/badge.svg)](https://codecov.io/gh/FZJ-IEK3-VSA/tsam)
@@ -217,9 +216,9 @@ cluster_representatives = aggregation.createTypicalPeriods()
 ### Detailed examples
 Detailed examples can be found at:/docs/source/examples_notebooks/
-A [**first example**](/docs/source/examples_notebooks/aggregation_example.ipynb) shows the capabilites of tsam as jupyter notebook.
+A [**quickstart example**](/docs/source/examples_notebooks/quickstart.ipynb) shows the capabilities of tsam as a Jupyter notebook.
-A [**second example**](/docs/source/examples_notebooks/aggregation_optiinput.ipynb) shows in more detail how to access the relevant aggregation results required for paramtrizing e.g. an optimization.
+A [**second example**](/docs/source/examples_notebooks/optimization_input.ipynb) shows in more detail how to access the relevant aggregation results required for parameterizing e.g. an optimization.
 The example time series are based on a department [publication](https://www.mdpi.com/1996-1073/10/3/361) and the [test reference years of the DWD](https://www.dwd.de/DE/leistungen/testreferenzjahre/testreferenzjahre.html).

{tsam-3.0.0 → tsam-3.1.0}/README.md RENAMED Viewed

@@ -139,9 +139,9 @@ cluster_representatives = aggregation.createTypicalPeriods()
 ### Detailed examples
 Detailed examples can be found at:/docs/source/examples_notebooks/
-A [**first example**](/docs/source/examples_notebooks/aggregation_example.ipynb) shows the capabilites of tsam as jupyter notebook.
+A [**quickstart example**](/docs/source/examples_notebooks/quickstart.ipynb) shows the capabilities of tsam as a Jupyter notebook.
-A [**second example**](/docs/source/examples_notebooks/aggregation_optiinput.ipynb) shows in more detail how to access the relevant aggregation results required for paramtrizing e.g. an optimization.
+A [**second example**](/docs/source/examples_notebooks/optimization_input.ipynb) shows in more detail how to access the relevant aggregation results required for parameterizing e.g. an optimization.
 The example time series are based on a department [publication](https://www.mdpi.com/1996-1073/10/3/361) and the [test reference years of the DWD](https://www.dwd.de/DE/leistungen/testreferenzjahre/testreferenzjahre.html).

{tsam-3.0.0 → tsam-3.1.0}/environment.yml RENAMED Viewed

@@ -10,9 +10,8 @@ dependencies:
   - numpy >=1.22.4,<=2.4.1
   - pyomo >=6.4.3,<=6.95
   - networkx >=2.5,<=3.6.1
-  - tqdm >=4.21.0,<=4.67.1
+  - tqdm >=4.21.0,<=4.67.2
   - highspy >=1.7.2,<=1.12.0
-  - matplotlib
   - plotly >=5.0.0
   # Testing
   - pytest
@@ -23,6 +22,7 @@ dependencies:
   # Documentation
   - sphinx
   - sphinx-autobuild
+  - nbsphinx
   - sphinx-book-theme
   # Linting and formatting
   - ruff

{tsam-3.0.0 → tsam-3.1.0}/pyproject.toml RENAMED Viewed

@@ -5,7 +5,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "tsam"
-version = "3.0.0"
+version = "3.1.0"
 description = "Time series aggregation module (tsam) to create typical periods"
 authors = [
   { name = "Leander Kotzur", email = "leander.kotzur@googlemail.com" },
@@ -37,15 +37,13 @@ dependencies = [
   "numpy>=1.22.4,<=2.4.1",
   "pyomo>=6.4.8,<=6.95",
   "networkx>=2.5,<=3.6.1",
-  "tqdm>=4.21.0,<=4.67.1",
+  "tqdm>=4.21.0,<=4.67.2",
   "highspy>=1.7.2,<=1.12.0",
 ]
 requires-python = ">=3.10,<3.15"
 [project.optional-dependencies]
-plot = ["plotly>=5.0.0"]
-notebooks = ["notebook>=7.5.0", "plotly>=5.0.0", "matplotlib"]
 develop = [
   "pytest",
   "pytest-cov",
@@ -54,6 +52,7 @@ develop = [
   "sphinx",
   "sphinx-autobuild",
   "sphinx_book_theme",
+  "nbsphinx",
   "twine",
   "nbval",
   "ruff",
@@ -62,8 +61,9 @@ develop = [
   "pre-commit",
   "plotly>=5.0.0",
   "notebook>=7.5.0",
-  "matplotlib",
 ]
+plot = ["plotly>=5.0.0"]
+notebooks = ["notebook>=7.5.0", "plotly>=5.0.0"]
 [tool.setuptools.packages.find]
 where = ["src"]

{tsam-3.0.0 → tsam-3.1.0}/src/tsam/api.py RENAMED Viewed

@@ -536,6 +536,7 @@ def _build_old_params(
         params["addPeakMin"] = extremes.min_value
         params["addMeanMax"] = extremes.max_period
         params["addMeanMin"] = extremes.min_period
+        params["extremePreserveNumClusters"] = extremes._effective_preserve_n_clusters
     else:
         params["extremePeriodMethod"] = "None"

{tsam-3.0.0 → tsam-3.1.0}/src/tsam/config.py RENAMED Viewed

@@ -606,9 +606,10 @@ class ClusteringResult:
         ):
             warnings.warn(
                 "The 'replace' extreme method creates a hybrid cluster representation "
-                "(some columns from the medoid, some from the extreme period) that cannot "
-                "be perfectly reproduced during transfer. The transferred result will use "
-                "the medoid representation for all columns instead of the hybrid values. "
+                "(some columns from the cluster representative, some from the extreme period) "
+                "that cannot be perfectly reproduced during transfer. The transferred result "
+                "will use the stored cluster center periods directly, without the extreme "
+                "value injection that was applied during the original aggregation. "
                 "For exact transfer, use 'append' or 'new_cluster' extreme methods.",
                 UserWarning,
                 stacklevel=2,
@@ -785,6 +786,18 @@ class ExtremeConfig:
     min_period : list[str], optional
         Column names where the period with minimum total should be preserved.
         Example: ["wind_generation"] to preserve lowest wind day.
+    preserve_n_clusters : bool, optional
+        Whether extreme periods count toward n_clusters.
+        - True: Extremes are included in n_clusters
+          (e.g., n_clusters=10 with 2 extremes = 8 from clustering + 2 extremes)
+        - False: Extremes are added on top of n_clusters (old api behaviour)
+          (e.g., n_clusters=10 + 2 extremes = 12 final clusters)
+        Only affects "append" or "new_cluster" methods ("replace" never changes n_clusters).
+        .. deprecated::
+            The default will change from False to True in a future release.
+            Set explicitly to silence the FutureWarning.
     """
     method: ExtremeMethod = "append"
@@ -792,6 +805,18 @@ class ExtremeConfig:
     min_value: list[str] = field(default_factory=list)
     max_period: list[str] = field(default_factory=list)
     min_period: list[str] = field(default_factory=list)
+    preserve_n_clusters: bool | None = None
+    def __post_init__(self) -> None:
+        """Emit FutureWarning if preserve_n_clusters is not explicitly set."""
+        if self.preserve_n_clusters is None and self.has_extremes():
+            warnings.warn(
+                "preserve_n_clusters currently defaults to False to match behaviour of the old api, "
+                "but will default to True in a future release. Set preserve_n_clusters explicitly "
+                "to silence this warning.",
+                FutureWarning,
+                stacklevel=3,
+            )
     def has_extremes(self) -> bool:
         """Check if any extreme periods are configured."""
@@ -799,6 +824,17 @@ class ExtremeConfig:
             self.max_value or self.min_value or self.max_period or self.min_period
         )
+    @property
+    def _effective_preserve_n_clusters(self) -> bool:
+        """Get the effective value for preserve_n_clusters.
+        Returns False if not explicitly set (current default behavior).
+        In a future release, the default will change to True.
+        """
+        if self.preserve_n_clusters is None:
+            return False  # Current default, will change to True in future
+        return self.preserve_n_clusters
     def to_dict(self) -> dict[str, Any]:
         """Convert to dictionary for JSON serialization."""
         result: dict[str, Any] = {}
@@ -812,6 +848,8 @@ class ExtremeConfig:
             result["max_period"] = self.max_period
         if self.min_period:
             result["min_period"] = self.min_period
+        if self.preserve_n_clusters is not None:
+            result["preserve_n_clusters"] = self.preserve_n_clusters
         return result
     @classmethod
@@ -823,6 +861,7 @@ class ExtremeConfig:
             min_value=data.get("min_value", []),
             max_period=data.get("max_period", []),
             min_period=data.get("min_period", []),
+            preserve_n_clusters=data.get("preserve_n_clusters"),
         )

{tsam-3.0.0 → tsam-3.1.0}/src/tsam/timeseriesaggregation.py RENAMED Viewed

@@ -132,6 +132,7 @@ class TimeSeriesAggregation:
         weightDict=None,
         segmentation=False,
         extremePeriodMethod="None",
+        extremePreserveNumClusters=False,
         representationMethod=None,
         representationDict=None,
         distributionPeriodWise=True,
@@ -318,6 +319,8 @@ class TimeSeriesAggregation:
         self.extremePeriodMethod = extremePeriodMethod
+        self.extremePreserveNumClusters = extremePreserveNumClusters
         self.evalSumPeriods = evalSumPeriods
         self.sortValues = sortValues
@@ -683,6 +686,46 @@ class TimeSeriesAggregation:
         return unnormalizedTimeSeries
+    def _countExtremePeriods(self, groupedSeries):
+        """
+        Count unique extreme periods without modifying any state.
+        Used by extremePreserveNumClusters to determine how many clusters
+        to reserve for extreme periods before clustering.
+        Note: The extreme-finding logic (idxmax/idxmin on peak/mean) must
+        stay in sync with _addExtremePeriods. This is intentionally separate
+        because _addExtremePeriods also filters out periods that are already
+        cluster centers (not known at count time).
+        """
+        extremePeriodIndices = set()
+        # Only iterate over columns that are actually in extreme lists
+        extreme_columns = (
+            set(self.addPeakMax)
+            | set(self.addPeakMin)
+            | set(self.addMeanMax)
+            | set(self.addMeanMin)
+        )
+        for column in extreme_columns:
+            col_data = groupedSeries[column]
+            if column in self.addPeakMax:
+                extremePeriodIndices.add(col_data.max(axis=1).idxmax())
+            if column in self.addPeakMin:
+                extremePeriodIndices.add(col_data.min(axis=1).idxmin())
+            # Compute mean only once if needed for either addMeanMax or addMeanMin
+            if column in self.addMeanMax or column in self.addMeanMin:
+                mean_series = col_data.mean(axis=1)
+                if column in self.addMeanMax:
+                    extremePeriodIndices.add(mean_series.idxmax())
+                if column in self.addMeanMin:
+                    extremePeriodIndices.add(mean_series.idxmin())
+        return len(extremePeriodIndices)
     def _addExtremePeriods(
         self,
         groupedSeries,
@@ -983,7 +1026,7 @@ class TimeSeriesAggregation:
         # Reshape back to 2D: (n_clusters, n_cols * n_timesteps)
         return arr.reshape(n_clusters, -1)
-    def _clusterSortedPeriods(self, candidates, n_init=20):
+    def _clusterSortedPeriods(self, candidates, n_init=20, n_clusters=None):
         """
         Runs the clustering algorithms for the sorted profiles within the period
         instead of the original profiles. (Duration curve clustering)
@@ -1001,13 +1044,16 @@ class TimeSeriesAggregation:
             n_periods, -1
         )
+        if n_clusters is None:
+            n_clusters = self.noTypicalPeriods
         (
             _altClusterCenters,
             self.clusterCenterIndices,
             clusterOrders_C,
         ) = aggregatePeriods(
             sortedClusterValues,
-            n_clusters=self.noTypicalPeriods,
+            n_clusters=n_clusters,
             n_iter=30,
             solver=self.solver,
             clusterMethod=self.clusterMethod,
@@ -1052,6 +1098,41 @@ class TimeSeriesAggregation:
         """
         self._preProcessTimeSeries()
+        # Warn if extremePreserveNumClusters is ignored due to predefined cluster order
+        if (
+            self.predefClusterOrder is not None
+            and self.extremePreserveNumClusters
+            and self.extremePeriodMethod not in ("None", "replace_cluster_center")
+        ):
+            warnings.warn(
+                "extremePreserveNumClusters=True is ignored when predefClusterOrder "
+                "is set. Extreme periods will be appended via _addExtremePeriods "
+                "without reserving clusters upfront. To avoid this warning, set "
+                "extremePreserveNumClusters=False or remove predefClusterOrder.",
+                UserWarning,
+                stacklevel=2,
+            )
+        # Count extreme periods upfront if include_in_count is True
+        # Note: replace_cluster_center doesn't add new clusters, so skip
+        n_extremes = 0
+        if (
+            self.extremePreserveNumClusters
+            and self.extremePeriodMethod not in ("None", "replace_cluster_center")
+            and self.predefClusterOrder is None  # Don't count for predefined
+        ):
+            n_extremes = self._countExtremePeriods(self.normalizedPeriodlyProfiles)
+            if self.noTypicalPeriods <= n_extremes:
+                raise ValueError(
+                    f"n_clusters ({self.noTypicalPeriods}) must be greater than "
+                    f"the number of extreme periods ({n_extremes}) when "
+                    "preserve_n_clusters=True"
+                )
+        # Compute effective number of clusters for the clustering algorithm
+        effective_n_clusters = self.noTypicalPeriods - n_extremes
         # check for additional cluster parameters
         if self.evalSumPeriods:
             evaluationValues = (
@@ -1096,7 +1177,7 @@ class TimeSeriesAggregation:
                     self._clusterOrder,
                 ) = aggregatePeriods(
                     candidates,
-                    n_clusters=self.noTypicalPeriods,
+                    n_clusters=effective_n_clusters,
                     n_iter=100,
                     solver=self.solver,
                     clusterMethod=self.clusterMethod,
@@ -1107,7 +1188,7 @@ class TimeSeriesAggregation:
                 )
             else:
                 self.clusterCenters, self._clusterOrder = self._clusterSortedPeriods(
-                    candidates
+                    candidates, n_clusters=effective_n_clusters
                 )
             self.clusteringDuration = time.time() - cluster_duration
@@ -1117,7 +1198,6 @@ class TimeSeriesAggregation:
             self.clusterPeriods.append(cluster_center[:delClusterParams])
         if not self.extremePeriodMethod == "None":
-            # overwrite clusterPeriods and clusterOrder
             (
                 self.clusterPeriods,
                 self._clusterOrder,

{tsam-3.0.0 → tsam-3.1.0/src/tsam.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: tsam
-Version: 3.0.0
+Version: 3.1.0
 Summary: Time series aggregation module (tsam) to create typical periods
 Author-email: Leander Kotzur <leander.kotzur@googlemail.com>, Maximilian Hoffmann <maximilian.hoffmann@julumni.fz-juelich.de>
 Maintainer-email: Julian Belina <j.belina@fz-juelich.de>
@@ -49,14 +49,8 @@ Requires-Dist: pandas<=3.0.0,>=2.2.0
 Requires-Dist: numpy<=2.4.1,>=1.22.4
 Requires-Dist: pyomo<=6.95,>=6.4.8
 Requires-Dist: networkx<=3.6.1,>=2.5
-Requires-Dist: tqdm<=4.67.1,>=4.21.0
+Requires-Dist: tqdm<=4.67.2,>=4.21.0
 Requires-Dist: highspy<=1.12.0,>=1.7.2
-Provides-Extra: plot
-Requires-Dist: plotly>=5.0.0; extra == "plot"
-Provides-Extra: notebooks
-Requires-Dist: notebook>=7.5.0; extra == "notebooks"
-Requires-Dist: plotly>=5.0.0; extra == "notebooks"
-Requires-Dist: matplotlib; extra == "notebooks"
 Provides-Extra: develop
 Requires-Dist: pytest; extra == "develop"
 Requires-Dist: pytest-cov; extra == "develop"
@@ -65,6 +59,7 @@ Requires-Dist: codecov; extra == "develop"
 Requires-Dist: sphinx; extra == "develop"
 Requires-Dist: sphinx-autobuild; extra == "develop"
 Requires-Dist: sphinx_book_theme; extra == "develop"
+Requires-Dist: nbsphinx; extra == "develop"
 Requires-Dist: twine; extra == "develop"
 Requires-Dist: nbval; extra == "develop"
 Requires-Dist: ruff; extra == "develop"
@@ -73,7 +68,11 @@ Requires-Dist: pandas-stubs; extra == "develop"
 Requires-Dist: pre-commit; extra == "develop"
 Requires-Dist: plotly>=5.0.0; extra == "develop"
 Requires-Dist: notebook>=7.5.0; extra == "develop"
-Requires-Dist: matplotlib; extra == "develop"
+Provides-Extra: plot
+Requires-Dist: plotly>=5.0.0; extra == "plot"
+Provides-Extra: notebooks
+Requires-Dist: notebook>=7.5.0; extra == "notebooks"
+Requires-Dist: plotly>=5.0.0; extra == "notebooks"
 Dynamic: license-file
 [![Version](https://img.shields.io/pypi/v/tsam.svg)](https://pypi.python.org/pypi/tsam) [![Conda Version](https://img.shields.io/conda/vn/conda-forge/tsam.svg)](https://anaconda.org/conda-forge/tsam) [![Documentation Status](https://readthedocs.org/projects/tsam/badge/?version=latest)](https://tsam.readthedocs.io/en/latest/) [![PyPI - License](https://img.shields.io/pypi/l/tsam)]((https://github.com/FZJ-IEK3-VSA/tsam/blob/master/LICENSE.txt)) [![codecov](https://codecov.io/gh/FZJ-IEK3-VSA/tsam/branch/master/graph/badge.svg)](https://codecov.io/gh/FZJ-IEK3-VSA/tsam)
@@ -217,9 +216,9 @@ cluster_representatives = aggregation.createTypicalPeriods()
 ### Detailed examples
 Detailed examples can be found at:/docs/source/examples_notebooks/
-A [**first example**](/docs/source/examples_notebooks/aggregation_example.ipynb) shows the capabilites of tsam as jupyter notebook.
+A [**quickstart example**](/docs/source/examples_notebooks/quickstart.ipynb) shows the capabilities of tsam as a Jupyter notebook.
-A [**second example**](/docs/source/examples_notebooks/aggregation_optiinput.ipynb) shows in more detail how to access the relevant aggregation results required for paramtrizing e.g. an optimization.
+A [**second example**](/docs/source/examples_notebooks/optimization_input.ipynb) shows in more detail how to access the relevant aggregation results required for parameterizing e.g. an optimization.
 The example time series are based on a department [publication](https://www.mdpi.com/1996-1073/10/3/361) and the [test reference years of the DWD](https://www.dwd.de/DE/leistungen/testreferenzjahre/testreferenzjahre.html).

{tsam-3.0.0 → tsam-3.1.0}/src/tsam.egg-info/requires.txt RENAMED Viewed

@@ -3,7 +3,7 @@ pandas<=3.0.0,>=2.2.0
 numpy<=2.4.1,>=1.22.4
 pyomo<=6.95,>=6.4.8
 networkx<=3.6.1,>=2.5
-tqdm<=4.67.1,>=4.21.0
+tqdm<=4.67.2,>=4.21.0
 highspy<=1.12.0,>=1.7.2
 [develop]
@@ -14,6 +14,7 @@ codecov
 sphinx
 sphinx-autobuild
 sphinx_book_theme
+nbsphinx
 twine
 nbval
 ruff
@@ -22,12 +23,10 @@ pandas-stubs
 pre-commit
 plotly>=5.0.0
 notebook>=7.5.0
-matplotlib
 [notebooks]
 notebook>=7.5.0
 plotly>=5.0.0
-matplotlib
 [plot]
 plotly>=5.0.0

tsam-3.1.0/test/test_extremePeriods.py ADDED Viewed

@@ -0,0 +1,237 @@
+import warnings
+import numpy as np
+import pandas as pd
+import pytest
+import tsam
+import tsam.timeseriesaggregation as tsam_legacy
+from conftest import TESTDATA_CSV
+from tsam.config import ExtremeConfig
+def test_extremePeriods():
+    hoursPerPeriod = 24
+    noTypicalPeriods = 8
+    raw = pd.read_csv(TESTDATA_CSV, index_col=0)
+    aggregation1 = tsam_legacy.TimeSeriesAggregation(
+        raw,
+        noTypicalPeriods=noTypicalPeriods,
+        hoursPerPeriod=hoursPerPeriod,
+        clusterMethod="hierarchical",
+        rescaleClusterPeriods=False,
+        extremePeriodMethod="new_cluster_center",
+        addPeakMax=["GHI"],
+    )
+    aggregation2 = tsam_legacy.TimeSeriesAggregation(
+        raw,
+        noTypicalPeriods=noTypicalPeriods,
+        hoursPerPeriod=hoursPerPeriod,
+        clusterMethod="hierarchical",
+        rescaleClusterPeriods=False,
+        extremePeriodMethod="append",
+        addPeakMax=["GHI"],
+    )
+    aggregation3 = tsam_legacy.TimeSeriesAggregation(
+        raw,
+        noTypicalPeriods=noTypicalPeriods,
+        hoursPerPeriod=hoursPerPeriod,
+        clusterMethod="hierarchical",
+        rescaleClusterPeriods=False,
+        extremePeriodMethod="replace_cluster_center",
+        addPeakMax=["GHI"],
+    )
+    # make sure that the RMSE for new cluster centers (reassigning points to the exxtreme point if the distance to it is
+    # smaller)is bigger than for appending just one extreme period
+    np.testing.assert_array_less(
+        aggregation1.accuracyIndicators().loc["GHI", "RMSE"],
+        aggregation2.accuracyIndicators().loc["GHI", "RMSE"],
+    )
+    # make sure that the RMSE for appending the extreme period is smaller than for replacing the cluster center by the
+    # extreme period (conservative assumption)
+    np.testing.assert_array_less(
+        aggregation2.accuracyIndicators().loc["GHI", "RMSE"],
+        aggregation3.accuracyIndicators().loc["GHI", "RMSE"],
+    )
+    # check if addMeanMax and addMeanMin are working
+    aggregation4 = tsam_legacy.TimeSeriesAggregation(
+        raw,
+        noTypicalPeriods=noTypicalPeriods,
+        hoursPerPeriod=hoursPerPeriod,
+        clusterMethod="hierarchical",
+        rescaleClusterPeriods=False,
+        extremePeriodMethod="append",
+        addMeanMax=["GHI"],
+        addMeanMin=["GHI"],
+    )
+    origData = aggregation4.predictOriginalData()
+    np.testing.assert_array_almost_equal(
+        raw.groupby(np.arange(len(raw)) // 24).mean().max().loc["GHI"],
+        origData.groupby(np.arange(len(origData)) // 24).mean().max().loc["GHI"],
+        decimal=6,
+    )
+    np.testing.assert_array_almost_equal(
+        raw.groupby(np.arange(len(raw)) // 24).mean().min().loc["GHI"],
+        origData.groupby(np.arange(len(origData)) // 24).mean().min().loc["GHI"],
+        decimal=6,
+    )
+def test_preserve_n_clusters_exact_clusters_append():
+    """Final n_clusters equals requested when preserve_n_clusters=True with append method."""
+    raw = pd.read_csv(TESTDATA_CSV, index_col=0)
+    n_clusters = 10
+    result = tsam.aggregate(
+        raw,
+        n_clusters=n_clusters,
+        extremes=ExtremeConfig(
+            method="append",
+            max_value=["GHI"],
+            min_value=["T"],
+            preserve_n_clusters=True,
+        ),
+    )
+    # With preserve_n_clusters=True, final cluster count should equal n_clusters
+    assert result.n_clusters == n_clusters
+def test_preserve_n_clusters_exact_clusters_new_cluster():
+    """Final n_clusters equals requested when preserve_n_clusters=True with new_cluster method."""
+    raw = pd.read_csv(TESTDATA_CSV, index_col=0)
+    n_clusters = 10
+    result = tsam.aggregate(
+        raw,
+        n_clusters=n_clusters,
+        extremes=ExtremeConfig(
+            method="new_cluster",
+            max_value=["GHI"],
+            preserve_n_clusters=True,
+        ),
+    )
+    # With preserve_n_clusters=True, final cluster count should equal n_clusters
+    assert result.n_clusters == n_clusters
+def test_preserve_n_clusters_false_adds_extra_clusters():
+    """Default behavior: extremes are added on top of n_clusters."""
+    raw = pd.read_csv(TESTDATA_CSV, index_col=0)
+    n_clusters = 10
+    result = tsam.aggregate(
+        raw,
+        n_clusters=n_clusters,
+        extremes=ExtremeConfig(
+            method="append",
+            max_value=["GHI"],
+            min_value=["T"],
+            preserve_n_clusters=False,  # Default
+        ),
+    )
+    # With preserve_n_clusters=False (default), extremes are added on top
+    # So final count should be > n_clusters (n_clusters + n_extremes)
+    assert result.n_clusters > n_clusters
+def test_preserve_n_clusters_validation_error():
+    """Error if n_clusters <= n_extremes when preserve_n_clusters=True."""
+    raw = pd.read_csv(TESTDATA_CSV, index_col=0)
+    with pytest.raises(ValueError, match="must be greater than"):
+        tsam.aggregate(
+            raw,
+            n_clusters=2,
+            extremes=ExtremeConfig(
+                max_value=["GHI", "T", "Wind"],  # 3 extremes
+                preserve_n_clusters=True,
+            ),
+        )
+def test_preserve_n_clusters_preserves_extremes():
+    """Extreme values are still preserved with preserve_n_clusters=True."""
+    raw = pd.read_csv(TESTDATA_CSV, index_col=0)
+    result = tsam.aggregate(
+        raw,
+        n_clusters=10,
+        extremes=ExtremeConfig(
+            method="append",
+            max_value=["GHI"],
+            preserve_n_clusters=True,
+        ),
+        preserve_column_means=False,  # Don't rescale to check raw extreme preservation
+    )
+    # The maximum GHI value should be preserved in the typical periods
+    orig_max = raw["GHI"].max()
+    typical_max = result.cluster_representatives["GHI"].max()
+    np.testing.assert_almost_equal(orig_max, typical_max, decimal=5)
+def test_preserve_n_clusters_serialization():
+    """ExtremeConfig with preserve_n_clusters serializes correctly."""
+    config = ExtremeConfig(
+        method="append",
+        max_value=["Load"],
+        preserve_n_clusters=True,
+    )
+    d = config.to_dict()
+    assert d["preserve_n_clusters"] is True
+    config2 = ExtremeConfig.from_dict(d)
+    assert config2.preserve_n_clusters is True
+def test_preserve_n_clusters_default_none_with_future_warning():
+    """Default value of preserve_n_clusters is None with FutureWarning."""
+    # Creating ExtremeConfig with extremes but without explicit preserve_n_clusters
+    # should emit a FutureWarning
+    with pytest.warns(FutureWarning, match="preserve_n_clusters currently defaults"):
+        config = ExtremeConfig(max_value=["Load"])
+    # The raw value should be None
+    assert config.preserve_n_clusters is None
+    # But effective value should be False (current default behavior)
+    assert config._effective_preserve_n_clusters is False
+    # to_dict should not include it when None
+    d = config.to_dict()
+    assert "preserve_n_clusters" not in d
+def test_preserve_n_clusters_explicit_false_no_warning():
+    """Setting preserve_n_clusters=False explicitly should not warn."""
+    # No warning when explicitly set
+    with warnings.catch_warnings():
+        warnings.simplefilter("error", FutureWarning)
+        config = ExtremeConfig(max_value=["Load"], preserve_n_clusters=False)
+    assert config.preserve_n_clusters is False
+    assert config._effective_preserve_n_clusters is False
+    # to_dict should include it when explicitly False
+    d = config.to_dict()
+    assert d["preserve_n_clusters"] is False
+if __name__ == "__main__":
+    test_extremePeriods()

tsam-3.0.0/test/test_extremePeriods.py DELETED Viewed

@@ -1,87 +0,0 @@
-import numpy as np
-import pandas as pd
-import tsam.timeseriesaggregation as tsam
-from conftest import TESTDATA_CSV
-def test_extremePeriods():
-    hoursPerPeriod = 24
-    noTypicalPeriods = 8
-    raw = pd.read_csv(TESTDATA_CSV, index_col=0)
-    aggregation1 = tsam.TimeSeriesAggregation(
-        raw,
-        noTypicalPeriods=noTypicalPeriods,
-        hoursPerPeriod=hoursPerPeriod,
-        clusterMethod="hierarchical",
-        rescaleClusterPeriods=False,
-        extremePeriodMethod="new_cluster_center",
-        addPeakMax=["GHI"],
-    )
-    aggregation2 = tsam.TimeSeriesAggregation(
-        raw,
-        noTypicalPeriods=noTypicalPeriods,
-        hoursPerPeriod=hoursPerPeriod,
-        clusterMethod="hierarchical",
-        rescaleClusterPeriods=False,
-        extremePeriodMethod="append",
-        addPeakMax=["GHI"],
-    )
-    aggregation3 = tsam.TimeSeriesAggregation(
-        raw,
-        noTypicalPeriods=noTypicalPeriods,
-        hoursPerPeriod=hoursPerPeriod,
-        clusterMethod="hierarchical",
-        rescaleClusterPeriods=False,
-        extremePeriodMethod="replace_cluster_center",
-        addPeakMax=["GHI"],
-    )
-    # make sure that the RMSE for new cluster centers (reassigning points to the exxtreme point if the distance to it is
-    # smaller)is bigger than for appending just one extreme period
-    np.testing.assert_array_less(
-        aggregation1.accuracyIndicators().loc["GHI", "RMSE"],
-        aggregation2.accuracyIndicators().loc["GHI", "RMSE"],
-    )
-    # make sure that the RMSE for appending the extreme period is smaller than for replacing the cluster center by the
-    # extreme period (conservative assumption)
-    np.testing.assert_array_less(
-        aggregation2.accuracyIndicators().loc["GHI", "RMSE"],
-        aggregation3.accuracyIndicators().loc["GHI", "RMSE"],
-    )
-    # check if addMeanMax and addMeanMin are working
-    aggregation4 = tsam.TimeSeriesAggregation(
-        raw,
-        noTypicalPeriods=noTypicalPeriods,
-        hoursPerPeriod=hoursPerPeriod,
-        clusterMethod="hierarchical",
-        rescaleClusterPeriods=False,
-        extremePeriodMethod="append",
-        addMeanMax=["GHI"],
-        addMeanMin=["GHI"],
-    )
-    origData = aggregation4.predictOriginalData()
-    np.testing.assert_array_almost_equal(
-        raw.groupby(np.arange(len(raw)) // 24).mean().max().loc["GHI"],
-        origData.groupby(np.arange(len(origData)) // 24).mean().max().loc["GHI"],
-        decimal=6,
-    )
-    np.testing.assert_array_almost_equal(
-        raw.groupby(np.arange(len(raw)) // 24).mean().min().loc["GHI"],
-        origData.groupby(np.arange(len(origData)) // 24).mean().min().loc["GHI"],
-        decimal=6,
-    )
-if __name__ == "__main__":
-    test_extremePeriods()