tsam 2.3.7__tar.gz → 2.3.8__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. {tsam-2.3.7/src/tsam.egg-info → tsam-2.3.8}/PKG-INFO +1 -1
  2. {tsam-2.3.7 → tsam-2.3.8}/pyproject.toml +1 -1
  3. {tsam-2.3.7 → tsam-2.3.8}/src/tsam/utils/durationRepresentation.py +63 -52
  4. {tsam-2.3.7 → tsam-2.3.8/src/tsam.egg-info}/PKG-INFO +1 -1
  5. {tsam-2.3.7 → tsam-2.3.8}/LICENSE.txt +0 -0
  6. {tsam-2.3.7 → tsam-2.3.8}/MANIFEST.in +0 -0
  7. {tsam-2.3.7 → tsam-2.3.8}/README.md +0 -0
  8. {tsam-2.3.7 → tsam-2.3.8}/examples/results/paretoOptimalAggregation.csv +0 -0
  9. {tsam-2.3.7 → tsam-2.3.8}/examples/results/preprocessed_wind.csv +0 -0
  10. {tsam-2.3.7 → tsam-2.3.8}/examples/results/testperiods_hierarchical.csv +0 -0
  11. {tsam-2.3.7 → tsam-2.3.8}/examples/results/testperiods_kmeans.csv +0 -0
  12. {tsam-2.3.7 → tsam-2.3.8}/examples/results/testperiods_kmedoids.csv +0 -0
  13. {tsam-2.3.7 → tsam-2.3.8}/examples/results/testperiods_predefClusterOrder.csv +0 -0
  14. {tsam-2.3.7 → tsam-2.3.8}/examples/results/testperiods_predefClusterOrderAndClusterCenters.csv +0 -0
  15. {tsam-2.3.7 → tsam-2.3.8}/examples/results/testperiods_segmentation.csv +0 -0
  16. {tsam-2.3.7 → tsam-2.3.8}/examples/testdata.csv +0 -0
  17. {tsam-2.3.7 → tsam-2.3.8}/requirements.txt +0 -0
  18. {tsam-2.3.7 → tsam-2.3.8}/requirements.yml +0 -0
  19. {tsam-2.3.7 → tsam-2.3.8}/requirements_dev.txt +0 -0
  20. {tsam-2.3.7 → tsam-2.3.8}/setup.cfg +0 -0
  21. {tsam-2.3.7 → tsam-2.3.8}/src/tsam/__init__.py +0 -0
  22. {tsam-2.3.7 → tsam-2.3.8}/src/tsam/hyperparametertuning.py +0 -0
  23. {tsam-2.3.7 → tsam-2.3.8}/src/tsam/periodAggregation.py +0 -0
  24. {tsam-2.3.7 → tsam-2.3.8}/src/tsam/representations.py +0 -0
  25. {tsam-2.3.7 → tsam-2.3.8}/src/tsam/timeseriesaggregation.py +0 -0
  26. {tsam-2.3.7 → tsam-2.3.8}/src/tsam/utils/__init__.py +0 -0
  27. {tsam-2.3.7 → tsam-2.3.8}/src/tsam/utils/k_maxoids.py +0 -0
  28. {tsam-2.3.7 → tsam-2.3.8}/src/tsam/utils/k_medoids_contiguity.py +0 -0
  29. {tsam-2.3.7 → tsam-2.3.8}/src/tsam/utils/k_medoids_exact.py +0 -0
  30. {tsam-2.3.7 → tsam-2.3.8}/src/tsam/utils/segmentation.py +0 -0
  31. {tsam-2.3.7 → tsam-2.3.8}/src/tsam.egg-info/SOURCES.txt +0 -0
  32. {tsam-2.3.7 → tsam-2.3.8}/src/tsam.egg-info/dependency_links.txt +0 -0
  33. {tsam-2.3.7 → tsam-2.3.8}/src/tsam.egg-info/requires.txt +0 -0
  34. {tsam-2.3.7 → tsam-2.3.8}/src/tsam.egg-info/top_level.txt +0 -0
  35. {tsam-2.3.7 → tsam-2.3.8}/test/test_accuracyIndicators.py +0 -0
  36. {tsam-2.3.7 → tsam-2.3.8}/test/test_adjacent_periods.py +0 -0
  37. {tsam-2.3.7 → tsam-2.3.8}/test/test_aggregate_hiearchical.py +0 -0
  38. {tsam-2.3.7 → tsam-2.3.8}/test/test_assert_raises.py +0 -0
  39. {tsam-2.3.7 → tsam-2.3.8}/test/test_averaging.py +0 -0
  40. {tsam-2.3.7 → tsam-2.3.8}/test/test_cluster_order.py +0 -0
  41. {tsam-2.3.7 → tsam-2.3.8}/test/test_durationCurve.py +0 -0
  42. {tsam-2.3.7 → tsam-2.3.8}/test/test_durationRepresentation.py +0 -0
  43. {tsam-2.3.7 → tsam-2.3.8}/test/test_extremePeriods.py +0 -0
  44. {tsam-2.3.7 → tsam-2.3.8}/test/test_hierarchical.py +0 -0
  45. {tsam-2.3.7 → tsam-2.3.8}/test/test_hypertuneAggregation.py +0 -0
  46. {tsam-2.3.7 → tsam-2.3.8}/test/test_k_maxoids.py +0 -0
  47. {tsam-2.3.7 → tsam-2.3.8}/test/test_k_medoids.py +0 -0
  48. {tsam-2.3.7 → tsam-2.3.8}/test/test_k_medoids_contiguity.py +0 -0
  49. {tsam-2.3.7 → tsam-2.3.8}/test/test_minmaxRepresentation.py +0 -0
  50. {tsam-2.3.7 → tsam-2.3.8}/test/test_preprocess.py +0 -0
  51. {tsam-2.3.7 → tsam-2.3.8}/test/test_properties.py +0 -0
  52. {tsam-2.3.7 → tsam-2.3.8}/test/test_samemean.py +0 -0
  53. {tsam-2.3.7 → tsam-2.3.8}/test/test_segmentation.py +0 -0
  54. {tsam-2.3.7 → tsam-2.3.8}/test/test_subhourlyResolution.py +0 -0
  55. {tsam-2.3.7 → tsam-2.3.8}/test/test_subhourly_periods.py +0 -0
  56. {tsam-2.3.7 → tsam-2.3.8}/test/test_weightingFactors.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: tsam
3
- Version: 2.3.7
3
+ Version: 2.3.8
4
4
  Summary: Time series aggregation module (tsam) to create typical periods
5
5
  Author-email: Leander Kotzur <leander.kotzur@googlemail.com>, Maximilian Hoffmann <maximilian.hoffmann@julumni.fz-juelich.de>
6
6
  Maintainer-email: Julian Belina <j.belina@fz-juelich.de>
@@ -5,7 +5,7 @@ build-backend = "setuptools.build_meta"
5
5
 
6
6
  [project]
7
7
  name = "tsam"
8
- version = "2.3.7"
8
+ version = "2.3.8"
9
9
  description = "Time series aggregation module (tsam) to create typical periods"
10
10
  authors = [
11
11
  { name = "Leander Kotzur", email = "leander.kotzur@googlemail.com" },
@@ -28,69 +28,80 @@ def durationRepresentation(
28
28
  :type representMinMax: bool
29
29
  """
30
30
 
31
- # make pd.DataFrame each row represents a candidate, and the columns are defined by two levels: the attributes and
32
- # the time steps inside the candidates.
33
- columnTuples = []
34
- for i in range(int(candidates.shape[1] / timeStepsPerPeriod)):
35
- for j in range(timeStepsPerPeriod):
36
- columnTuples.append((i, j))
37
- candidates = pd.DataFrame(
38
- candidates, columns=pd.MultiIndex.from_tuples(columnTuples)
31
+ # Convert candidates to numpy array at the beginning if it's a DataFrame
32
+ if isinstance(candidates, pd.DataFrame):
33
+ candidates_array = candidates.values
34
+ else:
35
+ candidates_array = candidates
36
+
37
+ # Create a pandas DataFrame only when necessary
38
+ columnTuples = [(i, j) for i in range(int(candidates_array.shape[1] / timeStepsPerPeriod))
39
+ for j in range(timeStepsPerPeriod)]
40
+
41
+ candidates_df = pd.DataFrame(
42
+ candidates_array, columns=pd.MultiIndex.from_tuples(columnTuples)
39
43
  )
40
-
41
- # There are two options for the duration representation. Either, the distribution of each cluster is preserved
42
- # (periodWise = True) or the distribution of the total time series is preserved only. In the latter case, the
43
- # inner-cluster variance is smaller and the variance across the typical periods' mean values is higher
44
+
44
45
  if distributionPeriodWise:
45
46
  clusterCenters = []
46
- for clusterNum in np.unique(clusterOrder):
47
- indice = np.where(clusterOrder == clusterNum)
48
- noCandidates = len(indice[0])
49
- clean_index = []
50
-
51
- clusterCenter = []
52
- # get a clean index depending on the size
53
- for y in candidates.columns.levels[1]:
54
- for x in range(noCandidates):
55
- clean_index.append((x, y))
56
- for a in candidates.columns.levels[0]:
57
- # get all the values of a certain attribute and cluster
58
- candidateValues = candidates.loc[indice[0], a]
59
- # sort all values
60
- sortedAttr = candidateValues.stack(
61
- future_stack=True,
62
- ).sort_values()
63
- # reindex and arrange such that every sorted segment gets represented by its mean
64
- sortedAttr.index = pd.MultiIndex.from_tuples(clean_index)
65
- representationValues = sortedAttr.unstack(level=0).mean(axis=1)
66
- # respect max and min of the attributes
47
+ unique_clusters = np.unique(clusterOrder)
48
+
49
+ for clusterNum in unique_clusters:
50
+ indice = np.where(clusterOrder == clusterNum)[0]
51
+ noCandidates = len(indice)
52
+
53
+ # Pre-allocate the full cluster center array
54
+ cluster_values_count = noCandidates * timeStepsPerPeriod * len(candidates_df.columns.levels[0])
55
+ clusterCenter = np.zeros(cluster_values_count)
56
+ current_idx = 0
57
+
58
+ for a in candidates_df.columns.levels[0]:
59
+ # Get values using numpy indexing when possible
60
+ candidateValues = candidates_df.loc[indice, a].values
61
+
62
+ # Reshape to more easily work with numpy
63
+ candidateValues_reshaped = candidateValues.reshape(-1)
64
+
65
+ # Sort values using numpy
66
+ sorted_values = np.sort(candidateValues_reshaped)
67
+
68
+ # Calculate representative values directly
69
+ values_per_timestep = noCandidates
70
+ representation_values = np.zeros(timeStepsPerPeriod)
71
+
72
+ for t in range(timeStepsPerPeriod):
73
+ start_idx = t * values_per_timestep
74
+ end_idx = start_idx + values_per_timestep
75
+ representation_values[t] = np.mean(sorted_values[start_idx:end_idx])
76
+
77
+ # Handle min/max representation if needed
67
78
  if representMinMax:
68
- representationValues.loc[0] = sortedAttr.values[0]
69
- representationValues.loc[representationValues.index[-1]] = (
70
- sortedAttr.values[-1]
71
- )
72
-
73
- # get the order of the representation values such that euclidean distance to the candidates is minimized
74
- order = candidateValues.mean().sort_values().index
75
- # arrange
76
- representationValues.index = order
77
- representationValues.sort_index(inplace=True)
78
-
79
- # add to cluster center
80
- clusterCenter = np.append(clusterCenter, representationValues.values)
81
-
82
- clusterCenters.append(clusterCenter)
83
-
79
+ representation_values[0] = sorted_values[0]
80
+ representation_values[-1] = sorted_values[-1]
81
+
82
+ # Re-order values based on the mean of candidate values
83
+ mean_values = np.mean(candidateValues, axis=0)
84
+ order_indices = np.argsort(mean_values)
85
+
86
+ # Reorder representation values
87
+ representation_values_ordered = representation_values[order_indices]
88
+
89
+ # Add to cluster center
90
+ clusterCenter[current_idx:current_idx+len(representation_values)] = representation_values_ordered
91
+ current_idx += len(representation_values)
92
+
93
+ clusterCenters.append(clusterCenter[:current_idx]) # Trim if we didn't use the whole pre-allocation
94
+
84
95
  else:
85
96
  clusterCentersList = []
86
- for a in candidates.columns.levels[0]:
97
+ for a in candidates_df.columns.levels[0]:
87
98
  meanVals = []
88
99
  clusterLengths = []
89
100
  for clusterNum in np.unique(clusterOrder):
90
101
  indice = np.where(clusterOrder == clusterNum)
91
102
  noCandidates = len(indice[0])
92
103
  # get all the values of a certain attribute and cluster
93
- candidateValues = candidates.loc[indice[0], a]
104
+ candidateValues = candidates_df.loc[indice[0], a]
94
105
  # calculate centroid of each cluster and append to list
95
106
  meanVals.append(candidateValues.mean())
96
107
  # make a list of weights of each cluster for each time step within the period
@@ -113,7 +124,7 @@ def durationRepresentation(
113
124
  order = meansAndWeightsSorted.index
114
125
  # sort all values of the original time series
115
126
  sortedAttr = (
116
- candidates.loc[:, a]
127
+ candidates_df.loc[:, a]
117
128
  .stack(
118
129
  future_stack=True,
119
130
  )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: tsam
3
- Version: 2.3.7
3
+ Version: 2.3.8
4
4
  Summary: Time series aggregation module (tsam) to create typical periods
5
5
  Author-email: Leander Kotzur <leander.kotzur@googlemail.com>, Maximilian Hoffmann <maximilian.hoffmann@julumni.fz-juelich.de>
6
6
  Maintainer-email: Julian Belina <j.belina@fz-juelich.de>
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes