ts-shape 0.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ts_shape/__init__.py +0 -0
- ts_shape/context/__init__.py +9 -0
- ts_shape/context/value_mapping.py +89 -0
- ts_shape/events/__init__.py +14 -0
- ts_shape/events/correlation/__init__.py +24 -0
- ts_shape/events/correlation/anomaly_correlation.py +248 -0
- ts_shape/events/correlation/signal_correlation.py +213 -0
- ts_shape/events/energy/__init__.py +27 -0
- ts_shape/events/energy/consumption_analysis.py +238 -0
- ts_shape/events/energy/efficiency_tracking.py +390 -0
- ts_shape/events/engineering/__init__.py +24 -0
- ts_shape/events/engineering/setpoint_events.py +1025 -0
- ts_shape/events/engineering/startup_events.py +720 -0
- ts_shape/events/maintenance/__init__.py +35 -0
- ts_shape/events/maintenance/degradation_detection.py +368 -0
- ts_shape/events/maintenance/failure_prediction.py +255 -0
- ts_shape/events/maintenance/vibration_analysis.py +217 -0
- ts_shape/events/production/__init__.py +106 -0
- ts_shape/events/production/alarm_management.py +289 -0
- ts_shape/events/production/batch_tracking.py +242 -0
- ts_shape/events/production/changeover.py +282 -0
- ts_shape/events/production/cycle_time_tracking.py +404 -0
- ts_shape/events/production/downtime.py +0 -0
- ts_shape/events/production/downtime_tracking.py +418 -0
- ts_shape/events/production/flow_constraints.py +356 -0
- ts_shape/events/production/line_throughput.py +322 -0
- ts_shape/events/production/machine_state.py +210 -0
- ts_shape/events/production/oee_calculator.py +369 -0
- ts_shape/events/production/part_tracking.py +276 -0
- ts_shape/events/production/quality_tracking.py +507 -0
- ts_shape/events/production/shift_reporting.py +366 -0
- ts_shape/events/quality/__init__.py +27 -0
- ts_shape/events/quality/outlier_detection.py +254 -0
- ts_shape/events/quality/statistical_process_control.py +641 -0
- ts_shape/events/quality/tolerance_deviation.py +456 -0
- ts_shape/events/supplychain/__init__.py +19 -0
- ts_shape/events/supplychain/demand_pattern.py +209 -0
- ts_shape/events/supplychain/inventory_monitoring.py +316 -0
- ts_shape/events/supplychain/lead_time_analysis.py +192 -0
- ts_shape/features/__init__.py +95 -0
- ts_shape/features/cycles/__init__.py +19 -0
- ts_shape/features/cycles/cycle_processor.py +328 -0
- ts_shape/features/cycles/cycles_extractor.py +464 -0
- ts_shape/features/stats/__init__.py +76 -0
- ts_shape/features/stats/boolean_stats.py +71 -0
- ts_shape/features/stats/feature_table.py +118 -0
- ts_shape/features/stats/numeric_stats.py +122 -0
- ts_shape/features/stats/string_stats.py +124 -0
- ts_shape/features/stats/timestamp_stats.py +103 -0
- ts_shape/features/time_stats/__init__.py +10 -0
- ts_shape/features/time_stats/time_stats_numeric.py +89 -0
- ts_shape/loader/__init__.py +51 -0
- ts_shape/loader/combine/__init__.py +8 -0
- ts_shape/loader/combine/integrator.py +139 -0
- ts_shape/loader/context/__init__.py +17 -0
- ts_shape/loader/context/context_enricher.py +162 -0
- ts_shape/loader/metadata/__init__.py +28 -0
- ts_shape/loader/metadata/metadata_api_loader.py +109 -0
- ts_shape/loader/metadata/metadata_db_loader.py +107 -0
- ts_shape/loader/metadata/metadata_json_loader.py +315 -0
- ts_shape/loader/timeseries/__init__.py +37 -0
- ts_shape/loader/timeseries/azure_blob_loader.py +830 -0
- ts_shape/loader/timeseries/energy_api_loader.py +202 -0
- ts_shape/loader/timeseries/parquet_loader.py +169 -0
- ts_shape/loader/timeseries/s3proxy_parquet_loader.py +83 -0
- ts_shape/loader/timeseries/timescale_loader.py +55 -0
- ts_shape/transform/__init__.py +63 -0
- ts_shape/transform/calculator/__init__.py +14 -0
- ts_shape/transform/calculator/numeric_calc.py +120 -0
- ts_shape/transform/filter/__init__.py +41 -0
- ts_shape/transform/filter/boolean_filter.py +37 -0
- ts_shape/transform/filter/custom_filter.py +32 -0
- ts_shape/transform/filter/datetime_filter.py +123 -0
- ts_shape/transform/filter/numeric_filter.py +39 -0
- ts_shape/transform/filter/string_filter.py +44 -0
- ts_shape/transform/functions/__init__.py +8 -0
- ts_shape/transform/functions/lambda_func.py +28 -0
- ts_shape/transform/time_functions/__init__.py +15 -0
- ts_shape/transform/time_functions/timestamp_converter.py +41 -0
- ts_shape/transform/time_functions/timezone_shift.py +150 -0
- ts_shape/utils/__init__.py +8 -0
- ts_shape/utils/base.py +36 -0
- ts_shape-0.0.0.dist-info/METADATA +400 -0
- ts_shape-0.0.0.dist-info/RECORD +87 -0
- ts_shape-0.0.0.dist-info/WHEEL +5 -0
- ts_shape-0.0.0.dist-info/licenses/LICENSE.txt +21 -0
- ts_shape-0.0.0.dist-info/top_level.txt +1 -0
ts_shape/__init__.py
ADDED
|
File without changes
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
"""Context
|
|
2
|
+
|
|
3
|
+
Utilities for enriching DataFrames with contextual information and mappings.
|
|
4
|
+
|
|
5
|
+
Classes:
|
|
6
|
+
- ValueMapper: Map categorical codes to readable values from external files.
|
|
7
|
+
- map_values: Merge and replace a target column using a CSV/JSON mapping table.
|
|
8
|
+
- _load_mapping_table: Load a mapping table from CSV or JSON.
|
|
9
|
+
"""
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
import pandas as pd # type: ignore
|
|
2
|
+
from typing import Union
|
|
3
|
+
from ts_shape.utils.base import Base
|
|
4
|
+
|
|
5
|
+
class ValueMapper(Base):
|
|
6
|
+
"""
|
|
7
|
+
A class to map values from specified columns of a DataFrame using a mapping table (CSV or JSON file),
|
|
8
|
+
inheriting from the Base class.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
def __init__(
|
|
12
|
+
self,
|
|
13
|
+
dataframe: pd.DataFrame,
|
|
14
|
+
mapping_file: str,
|
|
15
|
+
map_column: str,
|
|
16
|
+
mapping_key_column: str,
|
|
17
|
+
mapping_value_column: str,
|
|
18
|
+
file_type: str = 'csv',
|
|
19
|
+
sep: str = ',',
|
|
20
|
+
encoding: str = 'utf-8',
|
|
21
|
+
column_name: str = 'systime'
|
|
22
|
+
) -> None:
|
|
23
|
+
"""
|
|
24
|
+
Initializes ValueMapper and the base DataFrame from the Base class.
|
|
25
|
+
|
|
26
|
+
Args:
|
|
27
|
+
dataframe (pd.DataFrame): The DataFrame to be processed and mapped.
|
|
28
|
+
mapping_file (str): The file path of the mapping table (CSV or JSON).
|
|
29
|
+
map_column (str): The name of the column in the DataFrame that needs to be mapped.
|
|
30
|
+
mapping_key_column (str): The column in the mapping table to match with values from the DataFrame.
|
|
31
|
+
mapping_value_column (str): The column in the mapping table containing the values to map to.
|
|
32
|
+
file_type (str): The type of the mapping file ('csv' or 'json'). Defaults to 'csv'.
|
|
33
|
+
sep (str): The separator for CSV files. Defaults to ','.
|
|
34
|
+
encoding (str): The encoding to use for reading the file. Defaults to 'utf-8'.
|
|
35
|
+
column_name (str): The name of the column to sort the DataFrame by in the base class. Defaults to 'systime'.
|
|
36
|
+
"""
|
|
37
|
+
# Initialize the Base class with the sorted DataFrame
|
|
38
|
+
super().__init__(dataframe, column_name)
|
|
39
|
+
|
|
40
|
+
# Additional attributes for ValueMapper
|
|
41
|
+
self.map_column: str = map_column
|
|
42
|
+
self.mapping_key_column: str = mapping_key_column
|
|
43
|
+
self.mapping_value_column: str = mapping_value_column
|
|
44
|
+
self.sep: str = sep
|
|
45
|
+
self.encoding: str = encoding
|
|
46
|
+
|
|
47
|
+
# Load the mapping table based on file type
|
|
48
|
+
self.mapping_table: pd.DataFrame = self._load_mapping_table(mapping_file, file_type)
|
|
49
|
+
|
|
50
|
+
def _load_mapping_table(self, mapping_file: str, file_type: str) -> pd.DataFrame:
|
|
51
|
+
"""
|
|
52
|
+
Loads the mapping table from a CSV or JSON file.
|
|
53
|
+
|
|
54
|
+
Args:
|
|
55
|
+
mapping_file (str): The file path of the mapping table.
|
|
56
|
+
file_type (str): The type of the file ('csv' or 'json').
|
|
57
|
+
|
|
58
|
+
Returns:
|
|
59
|
+
pd.DataFrame: The loaded mapping table as a DataFrame.
|
|
60
|
+
"""
|
|
61
|
+
if file_type == 'csv':
|
|
62
|
+
return pd.read_csv(mapping_file, sep=self.sep, encoding=self.encoding)
|
|
63
|
+
elif file_type == 'json':
|
|
64
|
+
return pd.read_json(mapping_file, encoding=self.encoding)
|
|
65
|
+
else:
|
|
66
|
+
raise ValueError("Unsupported file type. Please use 'csv' or 'json'.")
|
|
67
|
+
|
|
68
|
+
def map_values(self) -> pd.DataFrame:
|
|
69
|
+
"""
|
|
70
|
+
Maps values in the specified DataFrame column based on the mapping table.
|
|
71
|
+
|
|
72
|
+
Returns:
|
|
73
|
+
pd.DataFrame: A new DataFrame with the mapped values.
|
|
74
|
+
"""
|
|
75
|
+
# Merge the mapping table with the DataFrame based on the map_column and mapping_key_column
|
|
76
|
+
mapped_df = self.dataframe.merge(
|
|
77
|
+
self.mapping_table[[self.mapping_key_column, self.mapping_value_column]],
|
|
78
|
+
left_on=self.map_column,
|
|
79
|
+
right_on=self.mapping_key_column,
|
|
80
|
+
how='left'
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
# Replace the original column with the mapped values
|
|
84
|
+
mapped_df[self.map_column] = mapped_df[self.mapping_value_column]
|
|
85
|
+
|
|
86
|
+
# Drop unnecessary columns
|
|
87
|
+
mapped_df = mapped_df.drop([self.mapping_key_column, self.mapping_value_column], axis=1)
|
|
88
|
+
|
|
89
|
+
return mapped_df
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
"""Events
|
|
2
|
+
|
|
3
|
+
Extract events from shaped timeseries across quality, maintenance,
|
|
4
|
+
production, energy, correlation, and supply chain domains.
|
|
5
|
+
|
|
6
|
+
Subpackages:
|
|
7
|
+
- quality: Outlier detection, SPC, tolerance deviation
|
|
8
|
+
- production: Machine state, throughput, changeover, OEE, alarms, batches, shifts
|
|
9
|
+
- engineering: Setpoint changes, startup detection
|
|
10
|
+
- maintenance: Degradation detection, failure prediction, vibration analysis
|
|
11
|
+
- supplychain: Inventory monitoring, lead time analysis, demand patterns
|
|
12
|
+
- energy: Consumption analysis, efficiency tracking
|
|
13
|
+
- correlation: Signal correlation, anomaly correlation
|
|
14
|
+
"""
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
"""Correlation Events
|
|
2
|
+
|
|
3
|
+
Cross-signal correlation analysis for detecting related anomalies
|
|
4
|
+
and patterns across multiple timeseries signals.
|
|
5
|
+
|
|
6
|
+
Classes:
|
|
7
|
+
- SignalCorrelationEvents: Analyze correlations between signals.
|
|
8
|
+
- rolling_correlation: Time-windowed Pearson correlation between two signals.
|
|
9
|
+
- correlation_breakdown: Detect periods where normally correlated signals diverge.
|
|
10
|
+
- lag_correlation: Cross-correlation with time lag analysis.
|
|
11
|
+
|
|
12
|
+
- AnomalyCorrelationEvents: Correlate anomaly events across signals.
|
|
13
|
+
- coincident_anomalies: Find anomalies occurring simultaneously across signals.
|
|
14
|
+
- cascade_detection: Detect anomaly cascades (signal A anomaly followed by B).
|
|
15
|
+
- root_cause_ranking: Rank signals by how often their anomalies precede others.
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
from .signal_correlation import SignalCorrelationEvents
|
|
19
|
+
from .anomaly_correlation import AnomalyCorrelationEvents
|
|
20
|
+
|
|
21
|
+
__all__ = [
|
|
22
|
+
"SignalCorrelationEvents",
|
|
23
|
+
"AnomalyCorrelationEvents",
|
|
24
|
+
]
|
|
@@ -0,0 +1,248 @@
|
|
|
1
|
+
import pandas as pd # type: ignore
|
|
2
|
+
import numpy as np # type: ignore
|
|
3
|
+
from typing import List, Dict, Any, Optional
|
|
4
|
+
|
|
5
|
+
from ts_shape.utils.base import Base
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class AnomalyCorrelationEvents(Base):
|
|
9
|
+
"""Correlation: Anomaly Correlation Analysis
|
|
10
|
+
|
|
11
|
+
Correlate anomaly events across multiple signals to find coincident
|
|
12
|
+
patterns, cascading failures, and root cause candidates.
|
|
13
|
+
|
|
14
|
+
Methods:
|
|
15
|
+
- coincident_anomalies: Find anomalies that co-occur within a time window.
|
|
16
|
+
- cascade_detection: Detect anomaly cascades (A precedes B within a window).
|
|
17
|
+
- root_cause_ranking: Rank signals by how often their anomalies precede others.
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
def __init__(
|
|
21
|
+
self,
|
|
22
|
+
dataframe: pd.DataFrame,
|
|
23
|
+
*,
|
|
24
|
+
event_uuid: str = "corr:anomaly",
|
|
25
|
+
value_column: str = "value_double",
|
|
26
|
+
time_column: str = "systime",
|
|
27
|
+
) -> None:
|
|
28
|
+
super().__init__(dataframe, column_name=time_column)
|
|
29
|
+
self.event_uuid = event_uuid
|
|
30
|
+
self.value_column = value_column
|
|
31
|
+
self.time_column = time_column
|
|
32
|
+
|
|
33
|
+
def _detect_signal_anomalies(
|
|
34
|
+
self,
|
|
35
|
+
signal_uuid: str,
|
|
36
|
+
*,
|
|
37
|
+
z_threshold: float = 3.0,
|
|
38
|
+
) -> pd.DataFrame:
|
|
39
|
+
"""Internal: detect anomalies in a single signal using Z-score."""
|
|
40
|
+
s = (
|
|
41
|
+
self.dataframe[self.dataframe["uuid"] == signal_uuid]
|
|
42
|
+
.copy()
|
|
43
|
+
.sort_values(self.time_column)
|
|
44
|
+
)
|
|
45
|
+
if s.empty or len(s) < 3:
|
|
46
|
+
return pd.DataFrame(columns=[self.time_column, "uuid", self.value_column, "z_score"])
|
|
47
|
+
|
|
48
|
+
s[self.time_column] = pd.to_datetime(s[self.time_column])
|
|
49
|
+
values = s[self.value_column]
|
|
50
|
+
mean = values.mean()
|
|
51
|
+
std = values.std()
|
|
52
|
+
if std == 0:
|
|
53
|
+
return pd.DataFrame(columns=[self.time_column, "uuid", self.value_column, "z_score"])
|
|
54
|
+
|
|
55
|
+
s["z_score"] = ((values - mean) / std).abs()
|
|
56
|
+
anomalies = s[s["z_score"] >= z_threshold][[self.time_column, "uuid", self.value_column, "z_score"]]
|
|
57
|
+
return anomalies.reset_index(drop=True)
|
|
58
|
+
|
|
59
|
+
def coincident_anomalies(
|
|
60
|
+
self,
|
|
61
|
+
signal_uuids: List[str],
|
|
62
|
+
*,
|
|
63
|
+
z_threshold: float = 3.0,
|
|
64
|
+
coincidence_window: str = "5min",
|
|
65
|
+
min_signals: int = 2,
|
|
66
|
+
) -> pd.DataFrame:
|
|
67
|
+
"""Find anomalies that co-occur across multiple signals within a time window.
|
|
68
|
+
|
|
69
|
+
Args:
|
|
70
|
+
signal_uuids: List of signal UUIDs to analyze.
|
|
71
|
+
z_threshold: Z-score threshold for anomaly detection per signal.
|
|
72
|
+
coincidence_window: Time window for considering anomalies coincident.
|
|
73
|
+
min_signals: Minimum number of signals with anomalies to flag.
|
|
74
|
+
|
|
75
|
+
Returns:
|
|
76
|
+
DataFrame: window_start, window_end, uuid, is_delta,
|
|
77
|
+
anomaly_count, signal_uuids_involved
|
|
78
|
+
"""
|
|
79
|
+
all_anomalies = []
|
|
80
|
+
for uid in signal_uuids:
|
|
81
|
+
anom = self._detect_signal_anomalies(uid, z_threshold=z_threshold)
|
|
82
|
+
if not anom.empty:
|
|
83
|
+
all_anomalies.append(anom)
|
|
84
|
+
|
|
85
|
+
if not all_anomalies:
|
|
86
|
+
return pd.DataFrame(
|
|
87
|
+
columns=[
|
|
88
|
+
"window_start", "window_end", "uuid", "is_delta",
|
|
89
|
+
"anomaly_count", "signal_uuids_involved",
|
|
90
|
+
]
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
combined = pd.concat(all_anomalies, ignore_index=True)
|
|
94
|
+
combined = combined.sort_values(self.time_column)
|
|
95
|
+
|
|
96
|
+
window_td = pd.to_timedelta(coincidence_window)
|
|
97
|
+
rows: List[Dict[str, Any]] = []
|
|
98
|
+
processed = set()
|
|
99
|
+
|
|
100
|
+
for i, row in combined.iterrows():
|
|
101
|
+
if i in processed:
|
|
102
|
+
continue
|
|
103
|
+
t = row[self.time_column]
|
|
104
|
+
window_mask = (
|
|
105
|
+
(combined[self.time_column] >= t)
|
|
106
|
+
& (combined[self.time_column] <= t + window_td)
|
|
107
|
+
)
|
|
108
|
+
window_data = combined[window_mask]
|
|
109
|
+
unique_signals = window_data["uuid"].unique()
|
|
110
|
+
|
|
111
|
+
if len(unique_signals) >= min_signals:
|
|
112
|
+
rows.append(
|
|
113
|
+
{
|
|
114
|
+
"window_start": t,
|
|
115
|
+
"window_end": t + window_td,
|
|
116
|
+
"uuid": self.event_uuid,
|
|
117
|
+
"is_delta": True,
|
|
118
|
+
"anomaly_count": len(window_data),
|
|
119
|
+
"signal_uuids_involved": ",".join(sorted(unique_signals)),
|
|
120
|
+
}
|
|
121
|
+
)
|
|
122
|
+
processed.update(window_data.index.tolist())
|
|
123
|
+
|
|
124
|
+
return pd.DataFrame(rows)
|
|
125
|
+
|
|
126
|
+
def cascade_detection(
|
|
127
|
+
self,
|
|
128
|
+
leader_uuid: str,
|
|
129
|
+
follower_uuid: str,
|
|
130
|
+
*,
|
|
131
|
+
z_threshold: float = 3.0,
|
|
132
|
+
max_delay: str = "10min",
|
|
133
|
+
) -> pd.DataFrame:
|
|
134
|
+
"""Detect anomaly cascades: leader anomaly followed by follower anomaly.
|
|
135
|
+
|
|
136
|
+
Identifies cases where an anomaly in signal A is followed by an
|
|
137
|
+
anomaly in signal B within the max_delay window.
|
|
138
|
+
|
|
139
|
+
Args:
|
|
140
|
+
leader_uuid: UUID of the leading signal.
|
|
141
|
+
follower_uuid: UUID of the following signal.
|
|
142
|
+
z_threshold: Z-score threshold for anomaly detection.
|
|
143
|
+
max_delay: Maximum time between leader and follower anomaly.
|
|
144
|
+
|
|
145
|
+
Returns:
|
|
146
|
+
DataFrame: leader_time, follower_time, uuid, is_delta,
|
|
147
|
+
leader_uuid, follower_uuid, delay_seconds
|
|
148
|
+
"""
|
|
149
|
+
leader_anom = self._detect_signal_anomalies(leader_uuid, z_threshold=z_threshold)
|
|
150
|
+
follower_anom = self._detect_signal_anomalies(follower_uuid, z_threshold=z_threshold)
|
|
151
|
+
|
|
152
|
+
if leader_anom.empty or follower_anom.empty:
|
|
153
|
+
return pd.DataFrame(
|
|
154
|
+
columns=[
|
|
155
|
+
"leader_time", "follower_time", "uuid", "is_delta",
|
|
156
|
+
"leader_uuid", "follower_uuid", "delay_seconds",
|
|
157
|
+
]
|
|
158
|
+
)
|
|
159
|
+
|
|
160
|
+
max_delay_td = pd.to_timedelta(max_delay)
|
|
161
|
+
rows: List[Dict[str, Any]] = []
|
|
162
|
+
used_followers = set()
|
|
163
|
+
|
|
164
|
+
for _, lrow in leader_anom.iterrows():
|
|
165
|
+
lt = lrow[self.time_column]
|
|
166
|
+
candidates = follower_anom[
|
|
167
|
+
(follower_anom[self.time_column] > lt)
|
|
168
|
+
& (follower_anom[self.time_column] <= lt + max_delay_td)
|
|
169
|
+
]
|
|
170
|
+
for fidx, frow in candidates.iterrows():
|
|
171
|
+
if fidx not in used_followers:
|
|
172
|
+
rows.append(
|
|
173
|
+
{
|
|
174
|
+
"leader_time": lt,
|
|
175
|
+
"follower_time": frow[self.time_column],
|
|
176
|
+
"uuid": self.event_uuid,
|
|
177
|
+
"is_delta": True,
|
|
178
|
+
"leader_uuid": leader_uuid,
|
|
179
|
+
"follower_uuid": follower_uuid,
|
|
180
|
+
"delay_seconds": (
|
|
181
|
+
frow[self.time_column] - lt
|
|
182
|
+
).total_seconds(),
|
|
183
|
+
}
|
|
184
|
+
)
|
|
185
|
+
used_followers.add(fidx)
|
|
186
|
+
break # one follower per leader
|
|
187
|
+
|
|
188
|
+
return pd.DataFrame(rows)
|
|
189
|
+
|
|
190
|
+
def root_cause_ranking(
|
|
191
|
+
self,
|
|
192
|
+
signal_uuids: List[str],
|
|
193
|
+
*,
|
|
194
|
+
z_threshold: float = 3.0,
|
|
195
|
+
max_delay: str = "10min",
|
|
196
|
+
) -> pd.DataFrame:
|
|
197
|
+
"""Rank signals by how often their anomalies precede others.
|
|
198
|
+
|
|
199
|
+
For each pair of signals, counts how many times signal A's anomaly
|
|
200
|
+
precedes signal B's anomaly within max_delay. Signals that frequently
|
|
201
|
+
lead are potential root causes.
|
|
202
|
+
|
|
203
|
+
Args:
|
|
204
|
+
signal_uuids: List of signal UUIDs.
|
|
205
|
+
z_threshold: Z-score threshold.
|
|
206
|
+
max_delay: Maximum delay for cascade detection.
|
|
207
|
+
|
|
208
|
+
Returns:
|
|
209
|
+
DataFrame: signal_uuid, leader_count, follower_count,
|
|
210
|
+
leader_ratio, rank
|
|
211
|
+
"""
|
|
212
|
+
if len(signal_uuids) < 2:
|
|
213
|
+
return pd.DataFrame(
|
|
214
|
+
columns=["signal_uuid", "leader_count", "follower_count", "leader_ratio", "rank"]
|
|
215
|
+
)
|
|
216
|
+
|
|
217
|
+
leader_counts: Dict[str, int] = {uid: 0 for uid in signal_uuids}
|
|
218
|
+
follower_counts: Dict[str, int] = {uid: 0 for uid in signal_uuids}
|
|
219
|
+
|
|
220
|
+
for i, uid_a in enumerate(signal_uuids):
|
|
221
|
+
for uid_b in signal_uuids[i + 1 :]:
|
|
222
|
+
cascades_ab = self.cascade_detection(
|
|
223
|
+
uid_a, uid_b, z_threshold=z_threshold, max_delay=max_delay
|
|
224
|
+
)
|
|
225
|
+
cascades_ba = self.cascade_detection(
|
|
226
|
+
uid_b, uid_a, z_threshold=z_threshold, max_delay=max_delay
|
|
227
|
+
)
|
|
228
|
+
leader_counts[uid_a] += len(cascades_ab)
|
|
229
|
+
follower_counts[uid_b] += len(cascades_ab)
|
|
230
|
+
leader_counts[uid_b] += len(cascades_ba)
|
|
231
|
+
follower_counts[uid_a] += len(cascades_ba)
|
|
232
|
+
|
|
233
|
+
rows = []
|
|
234
|
+
for uid in signal_uuids:
|
|
235
|
+
total = leader_counts[uid] + follower_counts[uid]
|
|
236
|
+
rows.append(
|
|
237
|
+
{
|
|
238
|
+
"signal_uuid": uid,
|
|
239
|
+
"leader_count": leader_counts[uid],
|
|
240
|
+
"follower_count": follower_counts[uid],
|
|
241
|
+
"leader_ratio": leader_counts[uid] / total if total > 0 else 0.0,
|
|
242
|
+
}
|
|
243
|
+
)
|
|
244
|
+
|
|
245
|
+
result = pd.DataFrame(rows)
|
|
246
|
+
result = result.sort_values("leader_ratio", ascending=False).reset_index(drop=True)
|
|
247
|
+
result["rank"] = range(1, len(result) + 1)
|
|
248
|
+
return result
|
|
@@ -0,0 +1,213 @@
|
|
|
1
|
+
import pandas as pd # type: ignore
|
|
2
|
+
import numpy as np # type: ignore
|
|
3
|
+
from typing import List, Dict, Any, Optional
|
|
4
|
+
|
|
5
|
+
from ts_shape.utils.base import Base
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class SignalCorrelationEvents(Base):
|
|
9
|
+
"""Correlation: Signal Correlation Analysis
|
|
10
|
+
|
|
11
|
+
Analyze time-windowed correlations between pairs of numeric signals.
|
|
12
|
+
Useful for detecting when normally correlated process variables diverge.
|
|
13
|
+
|
|
14
|
+
Methods:
|
|
15
|
+
- rolling_correlation: Pearson correlation over rolling windows.
|
|
16
|
+
- correlation_breakdown: Detect periods where correlation drops below threshold.
|
|
17
|
+
- lag_correlation: Cross-correlation with time lag to find delayed relationships.
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
def __init__(
|
|
21
|
+
self,
|
|
22
|
+
dataframe: pd.DataFrame,
|
|
23
|
+
*,
|
|
24
|
+
event_uuid: str = "corr:signal",
|
|
25
|
+
value_column: str = "value_double",
|
|
26
|
+
time_column: str = "systime",
|
|
27
|
+
) -> None:
|
|
28
|
+
super().__init__(dataframe, column_name=time_column)
|
|
29
|
+
self.event_uuid = event_uuid
|
|
30
|
+
self.value_column = value_column
|
|
31
|
+
self.time_column = time_column
|
|
32
|
+
|
|
33
|
+
def _align_signals(
|
|
34
|
+
self, uuid_a: str, uuid_b: str, resample: str = "1min"
|
|
35
|
+
) -> pd.DataFrame:
|
|
36
|
+
"""Align two signals on a common time index via resampling."""
|
|
37
|
+
a = (
|
|
38
|
+
self.dataframe[self.dataframe["uuid"] == uuid_a]
|
|
39
|
+
.copy()
|
|
40
|
+
.sort_values(self.time_column)
|
|
41
|
+
)
|
|
42
|
+
b = (
|
|
43
|
+
self.dataframe[self.dataframe["uuid"] == uuid_b]
|
|
44
|
+
.copy()
|
|
45
|
+
.sort_values(self.time_column)
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
if a.empty or b.empty:
|
|
49
|
+
return pd.DataFrame(columns=["signal_a", "signal_b"])
|
|
50
|
+
|
|
51
|
+
a[self.time_column] = pd.to_datetime(a[self.time_column])
|
|
52
|
+
b[self.time_column] = pd.to_datetime(b[self.time_column])
|
|
53
|
+
|
|
54
|
+
a = a.set_index(self.time_column)[self.value_column].resample(resample).mean()
|
|
55
|
+
b = b.set_index(self.time_column)[self.value_column].resample(resample).mean()
|
|
56
|
+
|
|
57
|
+
aligned = pd.DataFrame({"signal_a": a, "signal_b": b}).dropna()
|
|
58
|
+
return aligned
|
|
59
|
+
|
|
60
|
+
def rolling_correlation(
|
|
61
|
+
self,
|
|
62
|
+
uuid_a: str,
|
|
63
|
+
uuid_b: str,
|
|
64
|
+
*,
|
|
65
|
+
resample: str = "1min",
|
|
66
|
+
window: int = 60,
|
|
67
|
+
) -> pd.DataFrame:
|
|
68
|
+
"""Compute rolling Pearson correlation between two signals.
|
|
69
|
+
|
|
70
|
+
Args:
|
|
71
|
+
uuid_a: UUID of first signal.
|
|
72
|
+
uuid_b: UUID of second signal.
|
|
73
|
+
resample: Resample interval for alignment.
|
|
74
|
+
window: Rolling window size (in resampled periods).
|
|
75
|
+
|
|
76
|
+
Returns:
|
|
77
|
+
DataFrame: systime, uuid, source_uuid_a, source_uuid_b,
|
|
78
|
+
is_delta, correlation
|
|
79
|
+
"""
|
|
80
|
+
aligned = self._align_signals(uuid_a, uuid_b, resample=resample)
|
|
81
|
+
if aligned.empty or len(aligned) < window:
|
|
82
|
+
return pd.DataFrame(
|
|
83
|
+
columns=[
|
|
84
|
+
"systime", "uuid", "source_uuid_a", "source_uuid_b",
|
|
85
|
+
"is_delta", "correlation",
|
|
86
|
+
]
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
corr = aligned["signal_a"].rolling(window=window, min_periods=max(2, window // 2)).corr(
|
|
90
|
+
aligned["signal_b"]
|
|
91
|
+
)
|
|
92
|
+
out = pd.DataFrame(
|
|
93
|
+
{
|
|
94
|
+
"systime": aligned.index,
|
|
95
|
+
"uuid": self.event_uuid,
|
|
96
|
+
"source_uuid_a": uuid_a,
|
|
97
|
+
"source_uuid_b": uuid_b,
|
|
98
|
+
"is_delta": True,
|
|
99
|
+
"correlation": corr.values,
|
|
100
|
+
}
|
|
101
|
+
).dropna(subset=["correlation"])
|
|
102
|
+
|
|
103
|
+
return out.reset_index(drop=True)
|
|
104
|
+
|
|
105
|
+
def correlation_breakdown(
|
|
106
|
+
self,
|
|
107
|
+
uuid_a: str,
|
|
108
|
+
uuid_b: str,
|
|
109
|
+
*,
|
|
110
|
+
resample: str = "1min",
|
|
111
|
+
window: int = 60,
|
|
112
|
+
threshold: float = 0.5,
|
|
113
|
+
) -> pd.DataFrame:
|
|
114
|
+
"""Detect periods where correlation drops below a threshold.
|
|
115
|
+
|
|
116
|
+
Returns intervals where previously correlated signals diverge,
|
|
117
|
+
which may indicate process issues.
|
|
118
|
+
|
|
119
|
+
Args:
|
|
120
|
+
uuid_a: UUID of first signal.
|
|
121
|
+
uuid_b: UUID of second signal.
|
|
122
|
+
resample: Resample interval for alignment.
|
|
123
|
+
window: Rolling window size.
|
|
124
|
+
threshold: Correlation threshold below which to flag.
|
|
125
|
+
|
|
126
|
+
Returns:
|
|
127
|
+
DataFrame: start, end, uuid, source_uuid_a, source_uuid_b,
|
|
128
|
+
is_delta, min_correlation, duration_seconds
|
|
129
|
+
"""
|
|
130
|
+
corr_df = self.rolling_correlation(
|
|
131
|
+
uuid_a, uuid_b, resample=resample, window=window
|
|
132
|
+
)
|
|
133
|
+
if corr_df.empty:
|
|
134
|
+
return pd.DataFrame(
|
|
135
|
+
columns=[
|
|
136
|
+
"start", "end", "uuid", "source_uuid_a", "source_uuid_b",
|
|
137
|
+
"is_delta", "min_correlation", "duration_seconds",
|
|
138
|
+
]
|
|
139
|
+
)
|
|
140
|
+
|
|
141
|
+
corr_df["below"] = corr_df["correlation"] < threshold
|
|
142
|
+
corr_df["group"] = (corr_df["below"] != corr_df["below"].shift()).cumsum()
|
|
143
|
+
|
|
144
|
+
breakdowns = corr_df[corr_df["below"]].groupby("group")
|
|
145
|
+
rows: List[Dict[str, Any]] = []
|
|
146
|
+
for _, grp in breakdowns:
|
|
147
|
+
rows.append(
|
|
148
|
+
{
|
|
149
|
+
"start": grp["systime"].iloc[0],
|
|
150
|
+
"end": grp["systime"].iloc[-1],
|
|
151
|
+
"uuid": self.event_uuid,
|
|
152
|
+
"source_uuid_a": uuid_a,
|
|
153
|
+
"source_uuid_b": uuid_b,
|
|
154
|
+
"is_delta": True,
|
|
155
|
+
"min_correlation": grp["correlation"].min(),
|
|
156
|
+
"duration_seconds": (
|
|
157
|
+
grp["systime"].iloc[-1] - grp["systime"].iloc[0]
|
|
158
|
+
).total_seconds(),
|
|
159
|
+
}
|
|
160
|
+
)
|
|
161
|
+
|
|
162
|
+
return pd.DataFrame(rows)
|
|
163
|
+
|
|
164
|
+
def lag_correlation(
|
|
165
|
+
self,
|
|
166
|
+
uuid_a: str,
|
|
167
|
+
uuid_b: str,
|
|
168
|
+
*,
|
|
169
|
+
resample: str = "1min",
|
|
170
|
+
max_lag: int = 30,
|
|
171
|
+
) -> pd.DataFrame:
|
|
172
|
+
"""Cross-correlation with time lag analysis.
|
|
173
|
+
|
|
174
|
+
Finds the time lag at which two signals are most correlated.
|
|
175
|
+
|
|
176
|
+
Args:
|
|
177
|
+
uuid_a: UUID of first signal (reference).
|
|
178
|
+
uuid_b: UUID of second signal (lagged).
|
|
179
|
+
resample: Resample interval for alignment.
|
|
180
|
+
max_lag: Maximum lag periods to test (in both directions).
|
|
181
|
+
|
|
182
|
+
Returns:
|
|
183
|
+
DataFrame: lag_periods, correlation, is_best_lag
|
|
184
|
+
"""
|
|
185
|
+
aligned = self._align_signals(uuid_a, uuid_b, resample=resample)
|
|
186
|
+
if aligned.empty or len(aligned) < max_lag * 2:
|
|
187
|
+
return pd.DataFrame(columns=["lag_periods", "correlation", "is_best_lag"])
|
|
188
|
+
|
|
189
|
+
a = aligned["signal_a"].values
|
|
190
|
+
b = aligned["signal_b"].values
|
|
191
|
+
n = len(a)
|
|
192
|
+
|
|
193
|
+
rows: List[Dict[str, Any]] = []
|
|
194
|
+
for lag in range(-max_lag, max_lag + 1):
|
|
195
|
+
if lag < 0:
|
|
196
|
+
corr = np.corrcoef(a[:lag], b[-lag:])[0, 1]
|
|
197
|
+
elif lag > 0:
|
|
198
|
+
corr = np.corrcoef(a[lag:], b[:n - lag])[0, 1]
|
|
199
|
+
else:
|
|
200
|
+
corr = np.corrcoef(a, b)[0, 1]
|
|
201
|
+
|
|
202
|
+
if not np.isnan(corr):
|
|
203
|
+
rows.append({"lag_periods": lag, "correlation": corr})
|
|
204
|
+
|
|
205
|
+
result = pd.DataFrame(rows)
|
|
206
|
+
if result.empty:
|
|
207
|
+
return pd.DataFrame(columns=["lag_periods", "correlation", "is_best_lag"])
|
|
208
|
+
|
|
209
|
+
best_idx = result["correlation"].abs().idxmax()
|
|
210
|
+
result["is_best_lag"] = False
|
|
211
|
+
result.loc[best_idx, "is_best_lag"] = True
|
|
212
|
+
|
|
213
|
+
return result
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
"""Energy Events
|
|
2
|
+
|
|
3
|
+
Detectors for energy-related patterns: consumption analysis, efficiency
|
|
4
|
+
tracking, and peak demand detection on manufacturing/industrial IoT time
|
|
5
|
+
series data.
|
|
6
|
+
|
|
7
|
+
Classes:
|
|
8
|
+
- EnergyConsumptionEvents: Analyze energy consumption patterns.
|
|
9
|
+
- consumption_by_window: Aggregate energy consumption per time window.
|
|
10
|
+
- peak_demand_detection: Detect peak demand periods exceeding thresholds.
|
|
11
|
+
- consumption_baseline_deviation: Compare actual vs baseline consumption.
|
|
12
|
+
- energy_per_unit: Calculate energy consumption per production unit.
|
|
13
|
+
|
|
14
|
+
- EnergyEfficiencyEvents: Track energy efficiency metrics.
|
|
15
|
+
- efficiency_trend: Rolling efficiency metric over time.
|
|
16
|
+
- idle_energy_waste: Detect energy consumption during idle periods.
|
|
17
|
+
- specific_energy_consumption: Energy per unit output over time.
|
|
18
|
+
- efficiency_comparison: Compare efficiency across shifts or periods.
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
from .consumption_analysis import EnergyConsumptionEvents
|
|
22
|
+
from .efficiency_tracking import EnergyEfficiencyEvents
|
|
23
|
+
|
|
24
|
+
__all__ = [
|
|
25
|
+
"EnergyConsumptionEvents",
|
|
26
|
+
"EnergyEfficiencyEvents",
|
|
27
|
+
]
|