ts-shape 0.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. ts_shape/__init__.py +0 -0
  2. ts_shape/context/__init__.py +9 -0
  3. ts_shape/context/value_mapping.py +89 -0
  4. ts_shape/events/__init__.py +14 -0
  5. ts_shape/events/correlation/__init__.py +24 -0
  6. ts_shape/events/correlation/anomaly_correlation.py +248 -0
  7. ts_shape/events/correlation/signal_correlation.py +213 -0
  8. ts_shape/events/energy/__init__.py +27 -0
  9. ts_shape/events/energy/consumption_analysis.py +238 -0
  10. ts_shape/events/energy/efficiency_tracking.py +390 -0
  11. ts_shape/events/engineering/__init__.py +24 -0
  12. ts_shape/events/engineering/setpoint_events.py +1025 -0
  13. ts_shape/events/engineering/startup_events.py +720 -0
  14. ts_shape/events/maintenance/__init__.py +35 -0
  15. ts_shape/events/maintenance/degradation_detection.py +368 -0
  16. ts_shape/events/maintenance/failure_prediction.py +255 -0
  17. ts_shape/events/maintenance/vibration_analysis.py +217 -0
  18. ts_shape/events/production/__init__.py +106 -0
  19. ts_shape/events/production/alarm_management.py +289 -0
  20. ts_shape/events/production/batch_tracking.py +242 -0
  21. ts_shape/events/production/changeover.py +282 -0
  22. ts_shape/events/production/cycle_time_tracking.py +404 -0
  23. ts_shape/events/production/downtime.py +0 -0
  24. ts_shape/events/production/downtime_tracking.py +418 -0
  25. ts_shape/events/production/flow_constraints.py +356 -0
  26. ts_shape/events/production/line_throughput.py +322 -0
  27. ts_shape/events/production/machine_state.py +210 -0
  28. ts_shape/events/production/oee_calculator.py +369 -0
  29. ts_shape/events/production/part_tracking.py +276 -0
  30. ts_shape/events/production/quality_tracking.py +507 -0
  31. ts_shape/events/production/shift_reporting.py +366 -0
  32. ts_shape/events/quality/__init__.py +27 -0
  33. ts_shape/events/quality/outlier_detection.py +254 -0
  34. ts_shape/events/quality/statistical_process_control.py +641 -0
  35. ts_shape/events/quality/tolerance_deviation.py +456 -0
  36. ts_shape/events/supplychain/__init__.py +19 -0
  37. ts_shape/events/supplychain/demand_pattern.py +209 -0
  38. ts_shape/events/supplychain/inventory_monitoring.py +316 -0
  39. ts_shape/events/supplychain/lead_time_analysis.py +192 -0
  40. ts_shape/features/__init__.py +95 -0
  41. ts_shape/features/cycles/__init__.py +19 -0
  42. ts_shape/features/cycles/cycle_processor.py +328 -0
  43. ts_shape/features/cycles/cycles_extractor.py +464 -0
  44. ts_shape/features/stats/__init__.py +76 -0
  45. ts_shape/features/stats/boolean_stats.py +71 -0
  46. ts_shape/features/stats/feature_table.py +118 -0
  47. ts_shape/features/stats/numeric_stats.py +122 -0
  48. ts_shape/features/stats/string_stats.py +124 -0
  49. ts_shape/features/stats/timestamp_stats.py +103 -0
  50. ts_shape/features/time_stats/__init__.py +10 -0
  51. ts_shape/features/time_stats/time_stats_numeric.py +89 -0
  52. ts_shape/loader/__init__.py +51 -0
  53. ts_shape/loader/combine/__init__.py +8 -0
  54. ts_shape/loader/combine/integrator.py +139 -0
  55. ts_shape/loader/context/__init__.py +17 -0
  56. ts_shape/loader/context/context_enricher.py +162 -0
  57. ts_shape/loader/metadata/__init__.py +28 -0
  58. ts_shape/loader/metadata/metadata_api_loader.py +109 -0
  59. ts_shape/loader/metadata/metadata_db_loader.py +107 -0
  60. ts_shape/loader/metadata/metadata_json_loader.py +315 -0
  61. ts_shape/loader/timeseries/__init__.py +37 -0
  62. ts_shape/loader/timeseries/azure_blob_loader.py +830 -0
  63. ts_shape/loader/timeseries/energy_api_loader.py +202 -0
  64. ts_shape/loader/timeseries/parquet_loader.py +169 -0
  65. ts_shape/loader/timeseries/s3proxy_parquet_loader.py +83 -0
  66. ts_shape/loader/timeseries/timescale_loader.py +55 -0
  67. ts_shape/transform/__init__.py +63 -0
  68. ts_shape/transform/calculator/__init__.py +14 -0
  69. ts_shape/transform/calculator/numeric_calc.py +120 -0
  70. ts_shape/transform/filter/__init__.py +41 -0
  71. ts_shape/transform/filter/boolean_filter.py +37 -0
  72. ts_shape/transform/filter/custom_filter.py +32 -0
  73. ts_shape/transform/filter/datetime_filter.py +123 -0
  74. ts_shape/transform/filter/numeric_filter.py +39 -0
  75. ts_shape/transform/filter/string_filter.py +44 -0
  76. ts_shape/transform/functions/__init__.py +8 -0
  77. ts_shape/transform/functions/lambda_func.py +28 -0
  78. ts_shape/transform/time_functions/__init__.py +15 -0
  79. ts_shape/transform/time_functions/timestamp_converter.py +41 -0
  80. ts_shape/transform/time_functions/timezone_shift.py +150 -0
  81. ts_shape/utils/__init__.py +8 -0
  82. ts_shape/utils/base.py +36 -0
  83. ts_shape-0.0.0.dist-info/METADATA +400 -0
  84. ts_shape-0.0.0.dist-info/RECORD +87 -0
  85. ts_shape-0.0.0.dist-info/WHEEL +5 -0
  86. ts_shape-0.0.0.dist-info/licenses/LICENSE.txt +21 -0
  87. ts_shape-0.0.0.dist-info/top_level.txt +1 -0
ts_shape/__init__.py ADDED
File without changes
@@ -0,0 +1,9 @@
1
+ """Context
2
+
3
+ Utilities for enriching DataFrames with contextual information and mappings.
4
+
5
+ Classes:
6
+ - ValueMapper: Map categorical codes to readable values from external files.
7
+ - map_values: Merge and replace a target column using a CSV/JSON mapping table.
8
+ - _load_mapping_table: Load a mapping table from CSV or JSON.
9
+ """
@@ -0,0 +1,89 @@
1
+ import pandas as pd # type: ignore
2
+ from typing import Union
3
+ from ts_shape.utils.base import Base
4
+
5
+ class ValueMapper(Base):
6
+ """
7
+ A class to map values from specified columns of a DataFrame using a mapping table (CSV or JSON file),
8
+ inheriting from the Base class.
9
+ """
10
+
11
+ def __init__(
12
+ self,
13
+ dataframe: pd.DataFrame,
14
+ mapping_file: str,
15
+ map_column: str,
16
+ mapping_key_column: str,
17
+ mapping_value_column: str,
18
+ file_type: str = 'csv',
19
+ sep: str = ',',
20
+ encoding: str = 'utf-8',
21
+ column_name: str = 'systime'
22
+ ) -> None:
23
+ """
24
+ Initializes ValueMapper and the base DataFrame from the Base class.
25
+
26
+ Args:
27
+ dataframe (pd.DataFrame): The DataFrame to be processed and mapped.
28
+ mapping_file (str): The file path of the mapping table (CSV or JSON).
29
+ map_column (str): The name of the column in the DataFrame that needs to be mapped.
30
+ mapping_key_column (str): The column in the mapping table to match with values from the DataFrame.
31
+ mapping_value_column (str): The column in the mapping table containing the values to map to.
32
+ file_type (str): The type of the mapping file ('csv' or 'json'). Defaults to 'csv'.
33
+ sep (str): The separator for CSV files. Defaults to ','.
34
+ encoding (str): The encoding to use for reading the file. Defaults to 'utf-8'.
35
+ column_name (str): The name of the column to sort the DataFrame by in the base class. Defaults to 'systime'.
36
+ """
37
+ # Initialize the Base class with the sorted DataFrame
38
+ super().__init__(dataframe, column_name)
39
+
40
+ # Additional attributes for ValueMapper
41
+ self.map_column: str = map_column
42
+ self.mapping_key_column: str = mapping_key_column
43
+ self.mapping_value_column: str = mapping_value_column
44
+ self.sep: str = sep
45
+ self.encoding: str = encoding
46
+
47
+ # Load the mapping table based on file type
48
+ self.mapping_table: pd.DataFrame = self._load_mapping_table(mapping_file, file_type)
49
+
50
+ def _load_mapping_table(self, mapping_file: str, file_type: str) -> pd.DataFrame:
51
+ """
52
+ Loads the mapping table from a CSV or JSON file.
53
+
54
+ Args:
55
+ mapping_file (str): The file path of the mapping table.
56
+ file_type (str): The type of the file ('csv' or 'json').
57
+
58
+ Returns:
59
+ pd.DataFrame: The loaded mapping table as a DataFrame.
60
+ """
61
+ if file_type == 'csv':
62
+ return pd.read_csv(mapping_file, sep=self.sep, encoding=self.encoding)
63
+ elif file_type == 'json':
64
+ return pd.read_json(mapping_file, encoding=self.encoding)
65
+ else:
66
+ raise ValueError("Unsupported file type. Please use 'csv' or 'json'.")
67
+
68
+ def map_values(self) -> pd.DataFrame:
69
+ """
70
+ Maps values in the specified DataFrame column based on the mapping table.
71
+
72
+ Returns:
73
+ pd.DataFrame: A new DataFrame with the mapped values.
74
+ """
75
+ # Merge the mapping table with the DataFrame based on the map_column and mapping_key_column
76
+ mapped_df = self.dataframe.merge(
77
+ self.mapping_table[[self.mapping_key_column, self.mapping_value_column]],
78
+ left_on=self.map_column,
79
+ right_on=self.mapping_key_column,
80
+ how='left'
81
+ )
82
+
83
+ # Replace the original column with the mapped values
84
+ mapped_df[self.map_column] = mapped_df[self.mapping_value_column]
85
+
86
+ # Drop unnecessary columns
87
+ mapped_df = mapped_df.drop([self.mapping_key_column, self.mapping_value_column], axis=1)
88
+
89
+ return mapped_df
@@ -0,0 +1,14 @@
1
+ """Events
2
+
3
+ Extract events from shaped timeseries across quality, maintenance,
4
+ production, energy, correlation, and supply chain domains.
5
+
6
+ Subpackages:
7
+ - quality: Outlier detection, SPC, tolerance deviation
8
+ - production: Machine state, throughput, changeover, OEE, alarms, batches, shifts
9
+ - engineering: Setpoint changes, startup detection
10
+ - maintenance: Degradation detection, failure prediction, vibration analysis
11
+ - supplychain: Inventory monitoring, lead time analysis, demand patterns
12
+ - energy: Consumption analysis, efficiency tracking
13
+ - correlation: Signal correlation, anomaly correlation
14
+ """
@@ -0,0 +1,24 @@
1
+ """Correlation Events
2
+
3
+ Cross-signal correlation analysis for detecting related anomalies
4
+ and patterns across multiple timeseries signals.
5
+
6
+ Classes:
7
+ - SignalCorrelationEvents: Analyze correlations between signals.
8
+ - rolling_correlation: Time-windowed Pearson correlation between two signals.
9
+ - correlation_breakdown: Detect periods where normally correlated signals diverge.
10
+ - lag_correlation: Cross-correlation with time lag analysis.
11
+
12
+ - AnomalyCorrelationEvents: Correlate anomaly events across signals.
13
+ - coincident_anomalies: Find anomalies occurring simultaneously across signals.
14
+ - cascade_detection: Detect anomaly cascades (signal A anomaly followed by B).
15
+ - root_cause_ranking: Rank signals by how often their anomalies precede others.
16
+ """
17
+
18
+ from .signal_correlation import SignalCorrelationEvents
19
+ from .anomaly_correlation import AnomalyCorrelationEvents
20
+
21
+ __all__ = [
22
+ "SignalCorrelationEvents",
23
+ "AnomalyCorrelationEvents",
24
+ ]
@@ -0,0 +1,248 @@
1
+ import pandas as pd # type: ignore
2
+ import numpy as np # type: ignore
3
+ from typing import List, Dict, Any, Optional
4
+
5
+ from ts_shape.utils.base import Base
6
+
7
+
8
+ class AnomalyCorrelationEvents(Base):
9
+ """Correlation: Anomaly Correlation Analysis
10
+
11
+ Correlate anomaly events across multiple signals to find coincident
12
+ patterns, cascading failures, and root cause candidates.
13
+
14
+ Methods:
15
+ - coincident_anomalies: Find anomalies that co-occur within a time window.
16
+ - cascade_detection: Detect anomaly cascades (A precedes B within a window).
17
+ - root_cause_ranking: Rank signals by how often their anomalies precede others.
18
+ """
19
+
20
+ def __init__(
21
+ self,
22
+ dataframe: pd.DataFrame,
23
+ *,
24
+ event_uuid: str = "corr:anomaly",
25
+ value_column: str = "value_double",
26
+ time_column: str = "systime",
27
+ ) -> None:
28
+ super().__init__(dataframe, column_name=time_column)
29
+ self.event_uuid = event_uuid
30
+ self.value_column = value_column
31
+ self.time_column = time_column
32
+
33
+ def _detect_signal_anomalies(
34
+ self,
35
+ signal_uuid: str,
36
+ *,
37
+ z_threshold: float = 3.0,
38
+ ) -> pd.DataFrame:
39
+ """Internal: detect anomalies in a single signal using Z-score."""
40
+ s = (
41
+ self.dataframe[self.dataframe["uuid"] == signal_uuid]
42
+ .copy()
43
+ .sort_values(self.time_column)
44
+ )
45
+ if s.empty or len(s) < 3:
46
+ return pd.DataFrame(columns=[self.time_column, "uuid", self.value_column, "z_score"])
47
+
48
+ s[self.time_column] = pd.to_datetime(s[self.time_column])
49
+ values = s[self.value_column]
50
+ mean = values.mean()
51
+ std = values.std()
52
+ if std == 0:
53
+ return pd.DataFrame(columns=[self.time_column, "uuid", self.value_column, "z_score"])
54
+
55
+ s["z_score"] = ((values - mean) / std).abs()
56
+ anomalies = s[s["z_score"] >= z_threshold][[self.time_column, "uuid", self.value_column, "z_score"]]
57
+ return anomalies.reset_index(drop=True)
58
+
59
+ def coincident_anomalies(
60
+ self,
61
+ signal_uuids: List[str],
62
+ *,
63
+ z_threshold: float = 3.0,
64
+ coincidence_window: str = "5min",
65
+ min_signals: int = 2,
66
+ ) -> pd.DataFrame:
67
+ """Find anomalies that co-occur across multiple signals within a time window.
68
+
69
+ Args:
70
+ signal_uuids: List of signal UUIDs to analyze.
71
+ z_threshold: Z-score threshold for anomaly detection per signal.
72
+ coincidence_window: Time window for considering anomalies coincident.
73
+ min_signals: Minimum number of signals with anomalies to flag.
74
+
75
+ Returns:
76
+ DataFrame: window_start, window_end, uuid, is_delta,
77
+ anomaly_count, signal_uuids_involved
78
+ """
79
+ all_anomalies = []
80
+ for uid in signal_uuids:
81
+ anom = self._detect_signal_anomalies(uid, z_threshold=z_threshold)
82
+ if not anom.empty:
83
+ all_anomalies.append(anom)
84
+
85
+ if not all_anomalies:
86
+ return pd.DataFrame(
87
+ columns=[
88
+ "window_start", "window_end", "uuid", "is_delta",
89
+ "anomaly_count", "signal_uuids_involved",
90
+ ]
91
+ )
92
+
93
+ combined = pd.concat(all_anomalies, ignore_index=True)
94
+ combined = combined.sort_values(self.time_column)
95
+
96
+ window_td = pd.to_timedelta(coincidence_window)
97
+ rows: List[Dict[str, Any]] = []
98
+ processed = set()
99
+
100
+ for i, row in combined.iterrows():
101
+ if i in processed:
102
+ continue
103
+ t = row[self.time_column]
104
+ window_mask = (
105
+ (combined[self.time_column] >= t)
106
+ & (combined[self.time_column] <= t + window_td)
107
+ )
108
+ window_data = combined[window_mask]
109
+ unique_signals = window_data["uuid"].unique()
110
+
111
+ if len(unique_signals) >= min_signals:
112
+ rows.append(
113
+ {
114
+ "window_start": t,
115
+ "window_end": t + window_td,
116
+ "uuid": self.event_uuid,
117
+ "is_delta": True,
118
+ "anomaly_count": len(window_data),
119
+ "signal_uuids_involved": ",".join(sorted(unique_signals)),
120
+ }
121
+ )
122
+ processed.update(window_data.index.tolist())
123
+
124
+ return pd.DataFrame(rows)
125
+
126
+ def cascade_detection(
127
+ self,
128
+ leader_uuid: str,
129
+ follower_uuid: str,
130
+ *,
131
+ z_threshold: float = 3.0,
132
+ max_delay: str = "10min",
133
+ ) -> pd.DataFrame:
134
+ """Detect anomaly cascades: leader anomaly followed by follower anomaly.
135
+
136
+ Identifies cases where an anomaly in signal A is followed by an
137
+ anomaly in signal B within the max_delay window.
138
+
139
+ Args:
140
+ leader_uuid: UUID of the leading signal.
141
+ follower_uuid: UUID of the following signal.
142
+ z_threshold: Z-score threshold for anomaly detection.
143
+ max_delay: Maximum time between leader and follower anomaly.
144
+
145
+ Returns:
146
+ DataFrame: leader_time, follower_time, uuid, is_delta,
147
+ leader_uuid, follower_uuid, delay_seconds
148
+ """
149
+ leader_anom = self._detect_signal_anomalies(leader_uuid, z_threshold=z_threshold)
150
+ follower_anom = self._detect_signal_anomalies(follower_uuid, z_threshold=z_threshold)
151
+
152
+ if leader_anom.empty or follower_anom.empty:
153
+ return pd.DataFrame(
154
+ columns=[
155
+ "leader_time", "follower_time", "uuid", "is_delta",
156
+ "leader_uuid", "follower_uuid", "delay_seconds",
157
+ ]
158
+ )
159
+
160
+ max_delay_td = pd.to_timedelta(max_delay)
161
+ rows: List[Dict[str, Any]] = []
162
+ used_followers = set()
163
+
164
+ for _, lrow in leader_anom.iterrows():
165
+ lt = lrow[self.time_column]
166
+ candidates = follower_anom[
167
+ (follower_anom[self.time_column] > lt)
168
+ & (follower_anom[self.time_column] <= lt + max_delay_td)
169
+ ]
170
+ for fidx, frow in candidates.iterrows():
171
+ if fidx not in used_followers:
172
+ rows.append(
173
+ {
174
+ "leader_time": lt,
175
+ "follower_time": frow[self.time_column],
176
+ "uuid": self.event_uuid,
177
+ "is_delta": True,
178
+ "leader_uuid": leader_uuid,
179
+ "follower_uuid": follower_uuid,
180
+ "delay_seconds": (
181
+ frow[self.time_column] - lt
182
+ ).total_seconds(),
183
+ }
184
+ )
185
+ used_followers.add(fidx)
186
+ break # one follower per leader
187
+
188
+ return pd.DataFrame(rows)
189
+
190
+ def root_cause_ranking(
191
+ self,
192
+ signal_uuids: List[str],
193
+ *,
194
+ z_threshold: float = 3.0,
195
+ max_delay: str = "10min",
196
+ ) -> pd.DataFrame:
197
+ """Rank signals by how often their anomalies precede others.
198
+
199
+ For each pair of signals, counts how many times signal A's anomaly
200
+ precedes signal B's anomaly within max_delay. Signals that frequently
201
+ lead are potential root causes.
202
+
203
+ Args:
204
+ signal_uuids: List of signal UUIDs.
205
+ z_threshold: Z-score threshold.
206
+ max_delay: Maximum delay for cascade detection.
207
+
208
+ Returns:
209
+ DataFrame: signal_uuid, leader_count, follower_count,
210
+ leader_ratio, rank
211
+ """
212
+ if len(signal_uuids) < 2:
213
+ return pd.DataFrame(
214
+ columns=["signal_uuid", "leader_count", "follower_count", "leader_ratio", "rank"]
215
+ )
216
+
217
+ leader_counts: Dict[str, int] = {uid: 0 for uid in signal_uuids}
218
+ follower_counts: Dict[str, int] = {uid: 0 for uid in signal_uuids}
219
+
220
+ for i, uid_a in enumerate(signal_uuids):
221
+ for uid_b in signal_uuids[i + 1 :]:
222
+ cascades_ab = self.cascade_detection(
223
+ uid_a, uid_b, z_threshold=z_threshold, max_delay=max_delay
224
+ )
225
+ cascades_ba = self.cascade_detection(
226
+ uid_b, uid_a, z_threshold=z_threshold, max_delay=max_delay
227
+ )
228
+ leader_counts[uid_a] += len(cascades_ab)
229
+ follower_counts[uid_b] += len(cascades_ab)
230
+ leader_counts[uid_b] += len(cascades_ba)
231
+ follower_counts[uid_a] += len(cascades_ba)
232
+
233
+ rows = []
234
+ for uid in signal_uuids:
235
+ total = leader_counts[uid] + follower_counts[uid]
236
+ rows.append(
237
+ {
238
+ "signal_uuid": uid,
239
+ "leader_count": leader_counts[uid],
240
+ "follower_count": follower_counts[uid],
241
+ "leader_ratio": leader_counts[uid] / total if total > 0 else 0.0,
242
+ }
243
+ )
244
+
245
+ result = pd.DataFrame(rows)
246
+ result = result.sort_values("leader_ratio", ascending=False).reset_index(drop=True)
247
+ result["rank"] = range(1, len(result) + 1)
248
+ return result
@@ -0,0 +1,213 @@
1
+ import pandas as pd # type: ignore
2
+ import numpy as np # type: ignore
3
+ from typing import List, Dict, Any, Optional
4
+
5
+ from ts_shape.utils.base import Base
6
+
7
+
8
+ class SignalCorrelationEvents(Base):
9
+ """Correlation: Signal Correlation Analysis
10
+
11
+ Analyze time-windowed correlations between pairs of numeric signals.
12
+ Useful for detecting when normally correlated process variables diverge.
13
+
14
+ Methods:
15
+ - rolling_correlation: Pearson correlation over rolling windows.
16
+ - correlation_breakdown: Detect periods where correlation drops below threshold.
17
+ - lag_correlation: Cross-correlation with time lag to find delayed relationships.
18
+ """
19
+
20
+ def __init__(
21
+ self,
22
+ dataframe: pd.DataFrame,
23
+ *,
24
+ event_uuid: str = "corr:signal",
25
+ value_column: str = "value_double",
26
+ time_column: str = "systime",
27
+ ) -> None:
28
+ super().__init__(dataframe, column_name=time_column)
29
+ self.event_uuid = event_uuid
30
+ self.value_column = value_column
31
+ self.time_column = time_column
32
+
33
+ def _align_signals(
34
+ self, uuid_a: str, uuid_b: str, resample: str = "1min"
35
+ ) -> pd.DataFrame:
36
+ """Align two signals on a common time index via resampling."""
37
+ a = (
38
+ self.dataframe[self.dataframe["uuid"] == uuid_a]
39
+ .copy()
40
+ .sort_values(self.time_column)
41
+ )
42
+ b = (
43
+ self.dataframe[self.dataframe["uuid"] == uuid_b]
44
+ .copy()
45
+ .sort_values(self.time_column)
46
+ )
47
+
48
+ if a.empty or b.empty:
49
+ return pd.DataFrame(columns=["signal_a", "signal_b"])
50
+
51
+ a[self.time_column] = pd.to_datetime(a[self.time_column])
52
+ b[self.time_column] = pd.to_datetime(b[self.time_column])
53
+
54
+ a = a.set_index(self.time_column)[self.value_column].resample(resample).mean()
55
+ b = b.set_index(self.time_column)[self.value_column].resample(resample).mean()
56
+
57
+ aligned = pd.DataFrame({"signal_a": a, "signal_b": b}).dropna()
58
+ return aligned
59
+
60
+ def rolling_correlation(
61
+ self,
62
+ uuid_a: str,
63
+ uuid_b: str,
64
+ *,
65
+ resample: str = "1min",
66
+ window: int = 60,
67
+ ) -> pd.DataFrame:
68
+ """Compute rolling Pearson correlation between two signals.
69
+
70
+ Args:
71
+ uuid_a: UUID of first signal.
72
+ uuid_b: UUID of second signal.
73
+ resample: Resample interval for alignment.
74
+ window: Rolling window size (in resampled periods).
75
+
76
+ Returns:
77
+ DataFrame: systime, uuid, source_uuid_a, source_uuid_b,
78
+ is_delta, correlation
79
+ """
80
+ aligned = self._align_signals(uuid_a, uuid_b, resample=resample)
81
+ if aligned.empty or len(aligned) < window:
82
+ return pd.DataFrame(
83
+ columns=[
84
+ "systime", "uuid", "source_uuid_a", "source_uuid_b",
85
+ "is_delta", "correlation",
86
+ ]
87
+ )
88
+
89
+ corr = aligned["signal_a"].rolling(window=window, min_periods=max(2, window // 2)).corr(
90
+ aligned["signal_b"]
91
+ )
92
+ out = pd.DataFrame(
93
+ {
94
+ "systime": aligned.index,
95
+ "uuid": self.event_uuid,
96
+ "source_uuid_a": uuid_a,
97
+ "source_uuid_b": uuid_b,
98
+ "is_delta": True,
99
+ "correlation": corr.values,
100
+ }
101
+ ).dropna(subset=["correlation"])
102
+
103
+ return out.reset_index(drop=True)
104
+
105
+ def correlation_breakdown(
106
+ self,
107
+ uuid_a: str,
108
+ uuid_b: str,
109
+ *,
110
+ resample: str = "1min",
111
+ window: int = 60,
112
+ threshold: float = 0.5,
113
+ ) -> pd.DataFrame:
114
+ """Detect periods where correlation drops below a threshold.
115
+
116
+ Returns intervals where previously correlated signals diverge,
117
+ which may indicate process issues.
118
+
119
+ Args:
120
+ uuid_a: UUID of first signal.
121
+ uuid_b: UUID of second signal.
122
+ resample: Resample interval for alignment.
123
+ window: Rolling window size.
124
+ threshold: Correlation threshold below which to flag.
125
+
126
+ Returns:
127
+ DataFrame: start, end, uuid, source_uuid_a, source_uuid_b,
128
+ is_delta, min_correlation, duration_seconds
129
+ """
130
+ corr_df = self.rolling_correlation(
131
+ uuid_a, uuid_b, resample=resample, window=window
132
+ )
133
+ if corr_df.empty:
134
+ return pd.DataFrame(
135
+ columns=[
136
+ "start", "end", "uuid", "source_uuid_a", "source_uuid_b",
137
+ "is_delta", "min_correlation", "duration_seconds",
138
+ ]
139
+ )
140
+
141
+ corr_df["below"] = corr_df["correlation"] < threshold
142
+ corr_df["group"] = (corr_df["below"] != corr_df["below"].shift()).cumsum()
143
+
144
+ breakdowns = corr_df[corr_df["below"]].groupby("group")
145
+ rows: List[Dict[str, Any]] = []
146
+ for _, grp in breakdowns:
147
+ rows.append(
148
+ {
149
+ "start": grp["systime"].iloc[0],
150
+ "end": grp["systime"].iloc[-1],
151
+ "uuid": self.event_uuid,
152
+ "source_uuid_a": uuid_a,
153
+ "source_uuid_b": uuid_b,
154
+ "is_delta": True,
155
+ "min_correlation": grp["correlation"].min(),
156
+ "duration_seconds": (
157
+ grp["systime"].iloc[-1] - grp["systime"].iloc[0]
158
+ ).total_seconds(),
159
+ }
160
+ )
161
+
162
+ return pd.DataFrame(rows)
163
+
164
+ def lag_correlation(
165
+ self,
166
+ uuid_a: str,
167
+ uuid_b: str,
168
+ *,
169
+ resample: str = "1min",
170
+ max_lag: int = 30,
171
+ ) -> pd.DataFrame:
172
+ """Cross-correlation with time lag analysis.
173
+
174
+ Finds the time lag at which two signals are most correlated.
175
+
176
+ Args:
177
+ uuid_a: UUID of first signal (reference).
178
+ uuid_b: UUID of second signal (lagged).
179
+ resample: Resample interval for alignment.
180
+ max_lag: Maximum lag periods to test (in both directions).
181
+
182
+ Returns:
183
+ DataFrame: lag_periods, correlation, is_best_lag
184
+ """
185
+ aligned = self._align_signals(uuid_a, uuid_b, resample=resample)
186
+ if aligned.empty or len(aligned) < max_lag * 2:
187
+ return pd.DataFrame(columns=["lag_periods", "correlation", "is_best_lag"])
188
+
189
+ a = aligned["signal_a"].values
190
+ b = aligned["signal_b"].values
191
+ n = len(a)
192
+
193
+ rows: List[Dict[str, Any]] = []
194
+ for lag in range(-max_lag, max_lag + 1):
195
+ if lag < 0:
196
+ corr = np.corrcoef(a[:lag], b[-lag:])[0, 1]
197
+ elif lag > 0:
198
+ corr = np.corrcoef(a[lag:], b[:n - lag])[0, 1]
199
+ else:
200
+ corr = np.corrcoef(a, b)[0, 1]
201
+
202
+ if not np.isnan(corr):
203
+ rows.append({"lag_periods": lag, "correlation": corr})
204
+
205
+ result = pd.DataFrame(rows)
206
+ if result.empty:
207
+ return pd.DataFrame(columns=["lag_periods", "correlation", "is_best_lag"])
208
+
209
+ best_idx = result["correlation"].abs().idxmax()
210
+ result["is_best_lag"] = False
211
+ result.loc[best_idx, "is_best_lag"] = True
212
+
213
+ return result
@@ -0,0 +1,27 @@
1
+ """Energy Events
2
+
3
+ Detectors for energy-related patterns: consumption analysis, efficiency
4
+ tracking, and peak demand detection on manufacturing/industrial IoT time
5
+ series data.
6
+
7
+ Classes:
8
+ - EnergyConsumptionEvents: Analyze energy consumption patterns.
9
+ - consumption_by_window: Aggregate energy consumption per time window.
10
+ - peak_demand_detection: Detect peak demand periods exceeding thresholds.
11
+ - consumption_baseline_deviation: Compare actual vs baseline consumption.
12
+ - energy_per_unit: Calculate energy consumption per production unit.
13
+
14
+ - EnergyEfficiencyEvents: Track energy efficiency metrics.
15
+ - efficiency_trend: Rolling efficiency metric over time.
16
+ - idle_energy_waste: Detect energy consumption during idle periods.
17
+ - specific_energy_consumption: Energy per unit output over time.
18
+ - efficiency_comparison: Compare efficiency across shifts or periods.
19
+ """
20
+
21
+ from .consumption_analysis import EnergyConsumptionEvents
22
+ from .efficiency_tracking import EnergyEfficiencyEvents
23
+
24
+ __all__ = [
25
+ "EnergyConsumptionEvents",
26
+ "EnergyEfficiencyEvents",
27
+ ]