PyPI - tsam - Versions diffs - 2.3.8__tar.gz → 2.3.9__tar.gz - Mend

tsam 2.3.8tar.gz → 2.3.9tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (56) hide show

{tsam-2.3.8/src/tsam.egg-info → tsam-2.3.9}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: tsam
-Version: 2.3.8
+Version: 2.3.9
 Summary: Time series aggregation module (tsam) to create typical periods
 Author-email: Leander Kotzur <leander.kotzur@googlemail.com>, Maximilian Hoffmann <maximilian.hoffmann@julumni.fz-juelich.de>
 Maintainer-email: Julian Belina <j.belina@fz-juelich.de>

{tsam-2.3.8 → tsam-2.3.9}/pyproject.toml RENAMED Viewed

@@ -5,7 +5,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "tsam"
-version = "2.3.8"
+version = "2.3.9"
 description = "Time series aggregation module (tsam) to create typical periods"
 authors = [
     { name = "Leander Kotzur", email = "leander.kotzur@googlemail.com" },

{tsam-2.3.8 → tsam-2.3.9}/src/tsam/utils/durationRepresentation.py RENAMED Viewed

@@ -28,69 +28,61 @@ def durationRepresentation(
     :type representMinMax: bool
     """
-    # Convert candidates to numpy array at the beginning if it's a DataFrame
-    if isinstance(candidates, pd.DataFrame):
-        candidates_array = candidates.values
-    else:
-        candidates_array = candidates
-    # Create a pandas DataFrame only when necessary
-    columnTuples = [(i, j) for i in range(int(candidates_array.shape[1] / timeStepsPerPeriod))
-                   for j in range(timeStepsPerPeriod)]
+    # make pd.DataFrame each row represents a candidate, and the columns are defined by two levels: the attributes and
+    # the time steps inside the candidates.
+    columnTuples = []
+    num_attributes = int(candidates.shape[1] / timeStepsPerPeriod)
+    for i in range(num_attributes):
+        for j in range(timeStepsPerPeriod):
+            columnTuples.append((i, j))
     candidates_df = pd.DataFrame(
-        candidates_array, columns=pd.MultiIndex.from_tuples(columnTuples)
+        candidates, columns=pd.MultiIndex.from_tuples(columnTuples)
     )
+    # There are two options for the duration representation. Either, the distribution of each cluster is preserved
+    # (periodWise = True) or the distribution of the total time series is preserved only. In the latter case, the
+    # inner-cluster variance is smaller and the variance across the typical periods' mean values is higher
     if distributionPeriodWise:
         clusterCenters = []
-        unique_clusters = np.unique(clusterOrder)
-        for clusterNum in unique_clusters:
+        for clusterNum in np.unique(clusterOrder):
             indice = np.where(clusterOrder == clusterNum)[0]
             noCandidates = len(indice)
+            # Skip empty clusters
+            if len(indice) == 0:
+                continue
-            # Pre-allocate the full cluster center array
-            cluster_values_count = noCandidates * timeStepsPerPeriod * len(candidates_df.columns.levels[0])
-            clusterCenter = np.zeros(cluster_values_count)
-            current_idx = 0
+            # This list will hold the representative values for each attribute
+            clusterCenter_parts = []
             for a in candidates_df.columns.levels[0]:
-                # Get values using numpy indexing when possible
-                candidateValues = candidates_df.loc[indice, a].values
-                # Reshape to more easily work with numpy
-                candidateValues_reshaped = candidateValues.reshape(-1)
-                # Sort values using numpy
-                sorted_values = np.sort(candidateValues_reshaped)
-                # Calculate representative values directly
-                values_per_timestep = noCandidates
-                representation_values = np.zeros(timeStepsPerPeriod)
-                for t in range(timeStepsPerPeriod):
-                    start_idx = t * values_per_timestep
-                    end_idx = start_idx + values_per_timestep
-                    representation_values[t] = np.mean(sorted_values[start_idx:end_idx])
-                # Handle min/max representation if needed
+                candidateValues_np = candidates_df.loc[indice, a].values
+                # flatten the 2D array (candidates, timesteps) into a 1D array and sort it.
+                sorted_flat_values = np.sort(candidateValues_np.flatten())
+                # reshape the sorted values and calculate the mean for each representative time step.
+                representationValues_np = sorted_flat_values.reshape(timeStepsPerPeriod, noCandidates).mean(axis=1)
+                # respect max and min of the attributes
                 if representMinMax:
-                    representation_values[0] = sorted_values[0]
-                    representation_values[-1] = sorted_values[-1]
-                # Re-order values based on the mean of candidate values
-                mean_values = np.mean(candidateValues, axis=0)
-                order_indices = np.argsort(mean_values)
-                # Reorder representation values
-                representation_values_ordered = representation_values[order_indices]
+                    representationValues_np[0] = sorted_flat_values[0]
+                    representationValues_np[-1] = sorted_flat_values[-1]
+                # get the order of the representation values such that euclidean distance
+                # to the candidates' mean profile is minimized.
+                mean_profile_order = np.argsort(candidateValues_np.mean(axis=0))
-                # Add to cluster center
-                clusterCenter[current_idx:current_idx+len(representation_values)] = representation_values_ordered
-                current_idx += len(representation_values)
+                # Create an empty array to place the results in the correct order
+                final_representation_for_attr = np.empty_like(representationValues_np)
+                final_representation_for_attr[mean_profile_order] = representationValues_np
-            clusterCenters.append(clusterCenter[:current_idx])  # Trim if we didn't use the whole pre-allocation
+                # add to cluster center
+                clusterCenter_parts.append(final_representation_for_attr)
+            clusterCenters.append(np.concatenate(clusterCenter_parts))
     else:
         clusterCentersList = []

{tsam-2.3.8 → tsam-2.3.9/src/tsam.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: tsam
-Version: 2.3.8
+Version: 2.3.9
 Summary: Time series aggregation module (tsam) to create typical periods
 Author-email: Leander Kotzur <leander.kotzur@googlemail.com>, Maximilian Hoffmann <maximilian.hoffmann@julumni.fz-juelich.de>
 Maintainer-email: Julian Belina <j.belina@fz-juelich.de>