ts-shape 0.0.0.19__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. ts-shape-0.0.0.19/LICENSE.txt +21 -0
  2. ts-shape-0.0.0.19/MANIFEST.in +0 -0
  3. ts-shape-0.0.0.19/PKG-INFO +75 -0
  4. ts-shape-0.0.0.19/README.md +60 -0
  5. ts-shape-0.0.0.19/setup.cfg +4 -0
  6. ts-shape-0.0.0.19/setup.py +26 -0
  7. ts-shape-0.0.0.19/src/ts_shape/context/__init__.py +1 -0
  8. ts-shape-0.0.0.19/src/ts_shape/context/value_mapping.py +89 -0
  9. ts-shape-0.0.0.19/src/ts_shape/events/__init__.py +0 -0
  10. ts-shape-0.0.0.19/src/ts_shape/events/maintenance/__init__.py +0 -0
  11. ts-shape-0.0.0.19/src/ts_shape/events/production/__init__.py +0 -0
  12. ts-shape-0.0.0.19/src/ts_shape/events/quality/__init__.py +0 -0
  13. ts-shape-0.0.0.19/src/ts_shape/events/quality/outlier_detection.py +120 -0
  14. ts-shape-0.0.0.19/src/ts_shape/events/quality/statistical_process_control.py +191 -0
  15. ts-shape-0.0.0.19/src/ts_shape/events/quality/tolerance_deviation.py +87 -0
  16. ts-shape-0.0.0.19/src/ts_shape/features/__init__.py +0 -0
  17. ts-shape-0.0.0.19/src/ts_shape/features/cycles/__init__.py +0 -0
  18. ts-shape-0.0.0.19/src/ts_shape/features/cycles/cycle_processor.py +121 -0
  19. ts-shape-0.0.0.19/src/ts_shape/features/cycles/cycles_extractor.py +109 -0
  20. ts-shape-0.0.0.19/src/ts_shape/features/stats/__init__.py +0 -0
  21. ts-shape-0.0.0.19/src/ts_shape/features/stats/boolean_stats.py +71 -0
  22. ts-shape-0.0.0.19/src/ts_shape/features/stats/feature_table.py +118 -0
  23. ts-shape-0.0.0.19/src/ts_shape/features/stats/numeric_stats.py +122 -0
  24. ts-shape-0.0.0.19/src/ts_shape/features/stats/string_stats.py +124 -0
  25. ts-shape-0.0.0.19/src/ts_shape/features/stats/timestamp_stats.py +103 -0
  26. ts-shape-0.0.0.19/src/ts_shape/features/time_stats/__init__.py +0 -0
  27. ts-shape-0.0.0.19/src/ts_shape/features/time_stats/time_stats_numeric.py +89 -0
  28. ts-shape-0.0.0.19/src/ts_shape/loader/__init__.py +0 -0
  29. ts-shape-0.0.0.19/src/ts_shape/loader/combine/__init__.py +0 -0
  30. ts-shape-0.0.0.19/src/ts_shape/loader/combine/integrator.py +99 -0
  31. ts-shape-0.0.0.19/src/ts_shape/loader/context/__init__.py +0 -0
  32. ts-shape-0.0.0.19/src/ts_shape/loader/metadata/__init__.py +0 -0
  33. ts-shape-0.0.0.19/src/ts_shape/loader/metadata/metadata_api_loader.py +109 -0
  34. ts-shape-0.0.0.19/src/ts_shape/loader/metadata/metadata_db_loader.py +107 -0
  35. ts-shape-0.0.0.19/src/ts_shape/loader/timeseries/__init__.py +0 -0
  36. ts-shape-0.0.0.19/src/ts_shape/loader/timeseries/parquet_loader.py +169 -0
  37. ts-shape-0.0.0.19/src/ts_shape/loader/timeseries/s3proxy_parquet_loader.py +83 -0
  38. ts-shape-0.0.0.19/src/ts_shape/loader/timeseries/timescale_loader.py +55 -0
  39. ts-shape-0.0.0.19/src/ts_shape/transform/__init__.py +0 -0
  40. ts-shape-0.0.0.19/src/ts_shape/transform/calculator/__init__.py +0 -0
  41. ts-shape-0.0.0.19/src/ts_shape/transform/calculator/numeric_calc.py +120 -0
  42. ts-shape-0.0.0.19/src/ts_shape/transform/filter/__init__.py +0 -0
  43. ts-shape-0.0.0.19/src/ts_shape/transform/filter/boolean_filter.py +37 -0
  44. ts-shape-0.0.0.19/src/ts_shape/transform/filter/custom_filter.py +32 -0
  45. ts-shape-0.0.0.19/src/ts_shape/transform/filter/datetime_filter.py +123 -0
  46. ts-shape-0.0.0.19/src/ts_shape/transform/filter/numeric_filter.py +39 -0
  47. ts-shape-0.0.0.19/src/ts_shape/transform/filter/string_filter.py +44 -0
  48. ts-shape-0.0.0.19/src/ts_shape/transform/functions/__init__.py +0 -0
  49. ts-shape-0.0.0.19/src/ts_shape/transform/functions/lambda_func.py +28 -0
  50. ts-shape-0.0.0.19/src/ts_shape/transform/time_functions/__init__.py +0 -0
  51. ts-shape-0.0.0.19/src/ts_shape/transform/time_functions/timestamp_converter.py +41 -0
  52. ts-shape-0.0.0.19/src/ts_shape/transform/time_functions/timezone_shift.py +150 -0
  53. ts-shape-0.0.0.19/src/ts_shape.egg-info/PKG-INFO +75 -0
  54. ts-shape-0.0.0.19/src/ts_shape.egg-info/SOURCES.txt +54 -0
  55. ts-shape-0.0.0.19/src/ts_shape.egg-info/dependency_links.txt +1 -0
  56. ts-shape-0.0.0.19/src/ts_shape.egg-info/top_level.txt +1 -0
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) [2024] Jakob Gabriel
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
File without changes
@@ -0,0 +1,75 @@
1
+ Metadata-Version: 2.1
2
+ Name: ts-shape
3
+ Version: 0.0.0.19
4
+ Summary: ts-shape filters, transforms and engineers your timeseries dataframe
5
+ Home-page: https://jakobgabriel.github.io/ts-shape/
6
+ Author: Jakob Gabriel
7
+ Author-email: jakob.gabriel5@googlemail.com
8
+ Project-URL: Bug Tracker, https://github.com/jakobgabriel/ts-shape
9
+ Classifier: Programming Language :: Python :: 3
10
+ Classifier: License :: OSI Approved :: MIT License
11
+ Classifier: Operating System :: OS Independent
12
+ Requires-Python: >=3.10
13
+ Description-Content-Type: text/markdown
14
+ License-File: LICENSE.txt
15
+
16
+ # ts-shape | Timeseries Shaper
17
+
18
+ [![pypi version](https://img.shields.io/pypi/v/timeseries-shaper.svg)](https://pypi.org/project/timeseries-shaper/)
19
+ [![downloads](https://static.pepy.tech/badge/timeseries-shaper/week)](https://pepy.tech/projects/timeseries-shaper)
20
+ ![documentation workflow](https://github.com/jakobgabriel/ts-shape/actions/workflows/generate_docs.yml/badge.svg)
21
+ [![documentation](https://img.shields.io/badge/docs-mkdocs-708FCC.svg?style=flat)](https://jakobgabriel.github.io/ts-shape/)
22
+
23
+ ----
24
+
25
+ This repository contains the *ts-shape* python package. The abbreviation stands for
26
+
27
+ *"Time Series shaping with rule based methods"*.
28
+
29
+ ts-shape is a Python library for efficiently transforms, contextualizes and extracts events from time series data. It provides a set of tools to handle various transformations, making data preparation tasks easier and more intuitive.
30
+
31
+ Besides that multiple engineering specific methods are utilized to make it fast and easy to work with time series data.
32
+
33
+ ## Features | Concept
34
+
35
+
36
+ | **Category** | **Feature** | **Status** |
37
+ |---------------|--------------------------------------------------------|------------|
38
+ | **Transform** | Filters: Datatype-specific filters | ✔️ |
39
+ | | Functions: Lambda functions for transformations | ✔️ |
40
+ | | Time Functions: Time-specific transformations | ✔️ |
41
+ | | Calculator: Calculation-based transformations | ✔️ |
42
+ | **Features** | Stats: Datatype-specific statistics | ✔️ |
43
+ | | Time Stats: Timestamp-specific statistics | ✔️ |
44
+ | **Context** | Contextualize Timeseries datasets with foreign sources | ❌ |
45
+ | **Events** | Quality Events | ❌ |
46
+ | | Maintenance Events | ❌ |
47
+ | | Production Events | ❌ |
48
+ | | Engineering Events | ❌ |
49
+
50
+
51
+ ## Installation
52
+
53
+ Install ts-shape using pip:
54
+
55
+ ```bash
56
+ pip install timeseries-shaper
57
+ ```
58
+
59
+ ## Documentation
60
+
61
+ For full documentation, visit GitHub Pages or check out the docstrings in the code.
62
+
63
+ ## Contributing
64
+
65
+ Contributions are welcome! For major changes, please open an issue first to discuss what you would like to change.
66
+
67
+ Please ensure to update tests as appropriate.
68
+
69
+ ## License
70
+
71
+ Distributed under the MIT License. See LICENSE for more information.
72
+
73
+ ## Acknowledgements
74
+
75
+ !TODO
@@ -0,0 +1,60 @@
1
+ # ts-shape | Timeseries Shaper
2
+
3
+ [![pypi version](https://img.shields.io/pypi/v/timeseries-shaper.svg)](https://pypi.org/project/timeseries-shaper/)
4
+ [![downloads](https://static.pepy.tech/badge/timeseries-shaper/week)](https://pepy.tech/projects/timeseries-shaper)
5
+ ![documentation workflow](https://github.com/jakobgabriel/ts-shape/actions/workflows/generate_docs.yml/badge.svg)
6
+ [![documentation](https://img.shields.io/badge/docs-mkdocs-708FCC.svg?style=flat)](https://jakobgabriel.github.io/ts-shape/)
7
+
8
+ ----
9
+
10
+ This repository contains the *ts-shape* python package. The abbreviation stands for
11
+
12
+ *"Time Series shaping with rule based methods"*.
13
+
14
+ ts-shape is a Python library for efficiently transforms, contextualizes and extracts events from time series data. It provides a set of tools to handle various transformations, making data preparation tasks easier and more intuitive.
15
+
16
+ Besides that multiple engineering specific methods are utilized to make it fast and easy to work with time series data.
17
+
18
+ ## Features | Concept
19
+
20
+
21
+ | **Category** | **Feature** | **Status** |
22
+ |---------------|--------------------------------------------------------|------------|
23
+ | **Transform** | Filters: Datatype-specific filters | ✔️ |
24
+ | | Functions: Lambda functions for transformations | ✔️ |
25
+ | | Time Functions: Time-specific transformations | ✔️ |
26
+ | | Calculator: Calculation-based transformations | ✔️ |
27
+ | **Features** | Stats: Datatype-specific statistics | ✔️ |
28
+ | | Time Stats: Timestamp-specific statistics | ✔️ |
29
+ | **Context** | Contextualize Timeseries datasets with foreign sources | ❌ |
30
+ | **Events** | Quality Events | ❌ |
31
+ | | Maintenance Events | ❌ |
32
+ | | Production Events | ❌ |
33
+ | | Engineering Events | ❌ |
34
+
35
+
36
+ ## Installation
37
+
38
+ Install ts-shape using pip:
39
+
40
+ ```bash
41
+ pip install timeseries-shaper
42
+ ```
43
+
44
+ ## Documentation
45
+
46
+ For full documentation, visit GitHub Pages or check out the docstrings in the code.
47
+
48
+ ## Contributing
49
+
50
+ Contributions are welcome! For major changes, please open an issue first to discuss what you would like to change.
51
+
52
+ Please ensure to update tests as appropriate.
53
+
54
+ ## License
55
+
56
+ Distributed under the MIT License. See LICENSE for more information.
57
+
58
+ ## Acknowledgements
59
+
60
+ !TODO
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,26 @@
1
+ import setuptools
2
+
3
+ with open("README.md", "r", encoding = "utf-8") as fh:
4
+ long_description = fh.read()
5
+
6
+ setuptools.setup(
7
+ name = "ts-shape",
8
+ version = "0.0.0.19",
9
+ author = "Jakob Gabriel",
10
+ author_email = "jakob.gabriel5@googlemail.com",
11
+ description = "ts-shape filters, transforms and engineers your timeseries dataframe",
12
+ long_description = long_description,
13
+ long_description_content_type = "text/markdown",
14
+ url = "https://jakobgabriel.github.io/ts-shape/",
15
+ project_urls = {
16
+ "Bug Tracker": "https://github.com/jakobgabriel/ts-shape",
17
+ },
18
+ classifiers = [
19
+ "Programming Language :: Python :: 3",
20
+ "License :: OSI Approved :: MIT License",
21
+ "Operating System :: OS Independent",
22
+ ],
23
+ package_dir = {"": "src"},
24
+ packages = setuptools.find_packages(where="src"),
25
+ python_requires = ">=3.10"
26
+ )
@@ -0,0 +1 @@
1
+ '''test'''
@@ -0,0 +1,89 @@
1
+ import pandas as pd # type: ignore
2
+ from typing import Union
3
+ from ts_shape.utils.base import Base
4
+
5
+ class ValueMapper(Base):
6
+ """
7
+ A class to map values from specified columns of a DataFrame using a mapping table (CSV or JSON file),
8
+ inheriting from the Base class.
9
+ """
10
+
11
+ def __init__(
12
+ self,
13
+ dataframe: pd.DataFrame,
14
+ mapping_file: str,
15
+ map_column: str,
16
+ mapping_key_column: str,
17
+ mapping_value_column: str,
18
+ file_type: str = 'csv',
19
+ sep: str = ',',
20
+ encoding: str = 'utf-8',
21
+ column_name: str = 'systime'
22
+ ) -> None:
23
+ """
24
+ Initializes ValueMapper and the base DataFrame from the Base class.
25
+
26
+ Args:
27
+ dataframe (pd.DataFrame): The DataFrame to be processed and mapped.
28
+ mapping_file (str): The file path of the mapping table (CSV or JSON).
29
+ map_column (str): The name of the column in the DataFrame that needs to be mapped.
30
+ mapping_key_column (str): The column in the mapping table to match with values from the DataFrame.
31
+ mapping_value_column (str): The column in the mapping table containing the values to map to.
32
+ file_type (str): The type of the mapping file ('csv' or 'json'). Defaults to 'csv'.
33
+ sep (str): The separator for CSV files. Defaults to ','.
34
+ encoding (str): The encoding to use for reading the file. Defaults to 'utf-8'.
35
+ column_name (str): The name of the column to sort the DataFrame by in the base class. Defaults to 'systime'.
36
+ """
37
+ # Initialize the Base class with the sorted DataFrame
38
+ super().__init__(dataframe, column_name)
39
+
40
+ # Additional attributes for ValueMapper
41
+ self.map_column: str = map_column
42
+ self.mapping_key_column: str = mapping_key_column
43
+ self.mapping_value_column: str = mapping_value_column
44
+ self.sep: str = sep
45
+ self.encoding: str = encoding
46
+
47
+ # Load the mapping table based on file type
48
+ self.mapping_table: pd.DataFrame = self._load_mapping_table(mapping_file, file_type)
49
+
50
+ def _load_mapping_table(self, mapping_file: str, file_type: str) -> pd.DataFrame:
51
+ """
52
+ Loads the mapping table from a CSV or JSON file.
53
+
54
+ Args:
55
+ mapping_file (str): The file path of the mapping table.
56
+ file_type (str): The type of the file ('csv' or 'json').
57
+
58
+ Returns:
59
+ pd.DataFrame: The loaded mapping table as a DataFrame.
60
+ """
61
+ if file_type == 'csv':
62
+ return pd.read_csv(mapping_file, sep=self.sep, encoding=self.encoding)
63
+ elif file_type == 'json':
64
+ return pd.read_json(mapping_file, encoding=self.encoding)
65
+ else:
66
+ raise ValueError("Unsupported file type. Please use 'csv' or 'json'.")
67
+
68
+ def map_values(self) -> pd.DataFrame:
69
+ """
70
+ Maps values in the specified DataFrame column based on the mapping table.
71
+
72
+ Returns:
73
+ pd.DataFrame: A new DataFrame with the mapped values.
74
+ """
75
+ # Merge the mapping table with the DataFrame based on the map_column and mapping_key_column
76
+ mapped_df = self.dataframe.merge(
77
+ self.mapping_table[[self.mapping_key_column, self.mapping_value_column]],
78
+ left_on=self.map_column,
79
+ right_on=self.mapping_key_column,
80
+ how='left'
81
+ )
82
+
83
+ # Replace the original column with the mapped values
84
+ mapped_df[self.map_column] = mapped_df[self.mapping_value_column]
85
+
86
+ # Drop unnecessary columns
87
+ mapped_df = mapped_df.drop([self.mapping_key_column, self.mapping_value_column], axis=1)
88
+
89
+ return mapped_df
File without changes
@@ -0,0 +1,120 @@
1
+ import pandas as pd # type: ignore
2
+ import numpy as np
3
+ from scipy.stats import zscore
4
+ from typing import Callable, Union
5
+ from ts_shape.utils.base import Base
6
+
7
+
8
+ class OutlierDetectionEvents(Base):
9
+ """
10
+ Processes time series data to detect outliers based on specified statistical methods.
11
+ """
12
+
13
+ def __init__(self, dataframe: pd.DataFrame, value_column: str, event_uuid: str = 'outlier_event',
14
+ time_threshold: str = '5min') -> None:
15
+ """
16
+ Initializes the OutlierDetectionEvents with specific attributes for outlier detection.
17
+
18
+ Args:
19
+ dataframe (pd.DataFrame): The input time series DataFrame.
20
+ value_column (str): The name of the column containing the values for outlier detection.
21
+ event_uuid (str): A UUID or identifier for detected outlier events.
22
+ time_threshold (str): The time threshold to group close events together.
23
+ """
24
+ super().__init__(dataframe)
25
+ self.value_column = value_column
26
+ self.event_uuid = event_uuid
27
+ self.time_threshold = time_threshold
28
+
29
+ def _group_outliers(self, outliers_df: pd.DataFrame) -> pd.DataFrame:
30
+ """
31
+ Groups detected outliers that are close in time and prepares the final events DataFrame.
32
+
33
+ Returns:
34
+ pd.DataFrame: A DataFrame of grouped outlier events.
35
+ """
36
+ # Grouping outliers that are close to each other in terms of time
37
+ outliers_df['group_id'] = (outliers_df['systime'].diff().abs() > pd.to_timedelta(self.time_threshold)).cumsum()
38
+
39
+ # Prepare events DataFrame
40
+ events_data = []
41
+
42
+ for group_id in outliers_df['group_id'].unique():
43
+ group_data = outliers_df[outliers_df['group_id'] == group_id]
44
+ if group_data.shape[0] > 1: # Ensure there's more than one row to work with
45
+ first_row = group_data.nsmallest(1, 'systime')
46
+ last_row = group_data.nlargest(1, 'systime')
47
+ combined_rows = pd.concat([first_row, last_row])
48
+ events_data.append(combined_rows)
49
+
50
+ # Convert list of DataFrame slices to a single DataFrame
51
+ if events_data:
52
+ events_df = pd.concat(events_data)
53
+ events_df['uuid'] = self.event_uuid
54
+ else:
55
+ events_df = pd.DataFrame(columns=outliers_df.columns) # Create empty DataFrame if no data
56
+
57
+ events_df[self.value_column] = np.nan
58
+ events_df['is_delta'] = True
59
+
60
+ return events_df.drop(['outlier', 'group_id'], axis=1)
61
+
62
+ def detect_outliers_zscore(self, threshold: float = 3.0) -> pd.DataFrame:
63
+ """
64
+ Detects outliers using the Z-score method.
65
+
66
+ Args:
67
+ threshold (float): The Z-score threshold for detecting outliers.
68
+
69
+ Returns:
70
+ pd.DataFrame: A DataFrame of detected outliers and grouped events.
71
+ """
72
+ df = self.dataframe.copy()
73
+
74
+ # Convert 'systime' to datetime and sort the DataFrame by 'systime' in descending order
75
+ df['systime'] = pd.to_datetime(df['systime'])
76
+ df = df.sort_values(by='systime', ascending=False)
77
+
78
+ # Detect outliers using the Z-score method
79
+ df['outlier'] = np.abs(zscore(df[self.value_column])) > threshold
80
+
81
+ # Filter to keep only outliers
82
+ outliers_df = df[df['outlier']]
83
+
84
+ # Group and return the outliers
85
+ return self._group_outliers(outliers_df)
86
+
87
+ def detect_outliers_iqr(self, threshold: tuple = (1.5, 1.5)) -> pd.DataFrame:
88
+ """
89
+ Detects outliers using the IQR method.
90
+
91
+ Args:
92
+ threshold (tuple): The multipliers for the IQR range for detecting outliers (lower, upper).
93
+
94
+ Returns:
95
+ pd.DataFrame: A DataFrame of detected outliers and grouped events.
96
+ """
97
+ df = self.dataframe.copy()
98
+
99
+ # Convert 'systime' to datetime and sort the DataFrame by 'systime' in descending order
100
+ df['systime'] = pd.to_datetime(df['systime'])
101
+ df = df.sort_values(by='systime', ascending=False)
102
+
103
+ # Detect outliers using the IQR method
104
+ Q1 = df[self.value_column].quantile(0.25)
105
+ Q3 = df[self.value_column].quantile(0.75)
106
+ IQR = Q3 - Q1
107
+ lower_bound = Q1 - threshold[0] * IQR
108
+ upper_bound = Q3 + threshold[1] * IQR
109
+ df['outlier'] = (df[self.value_column] < lower_bound) | (df[self.value_column] > upper_bound)
110
+
111
+ # Filter to keep only outliers
112
+ outliers_df = df[df['outlier']]
113
+
114
+ # Group and return the outliers
115
+ return self._group_outliers(outliers_df)
116
+
117
+ # Example usage:
118
+ # outlier_detector = OutlierDetectionEvents(dataframe=df, value_column='value')
119
+ # detected_outliers_zscore = outlier_detector.detect_outliers_zscore(threshold=3.0)
120
+ # detected_outliers_iqr = outlier_detector.detect_outliers_iqr(threshold=(1.5, 1.5))
@@ -0,0 +1,191 @@
1
+ import pandas as pd # type: ignore
2
+ import numpy as np
3
+ from typing import Callable, List, Optional
4
+ from ts_shape.utils.base import Base
5
+
6
+ class StatisticalProcessControlRuleBased(Base):
7
+ """
8
+ Inherits from Base and applies SPC rules (Western Electric Rules) to a DataFrame for event detection.
9
+ Processes data based on control limit UUIDs, actual value UUIDs, and generates events with an event UUID.
10
+ """
11
+
12
+ def __init__(self, dataframe: pd.DataFrame, value_column: str, tolerance_uuid: str, actual_uuid: str, event_uuid: str) -> None:
13
+ """
14
+ Initializes the SPCMonitor with UUIDs for tolerance, actual, and event values.
15
+ Inherits the sorted dataframe from the Base class.
16
+
17
+ Args:
18
+ dataframe (pd.DataFrame): The input DataFrame containing the data to be processed.
19
+ value_column (str): The column containing the values to monitor.
20
+ tolerance_uuid (str): UUID identifier for rows that set tolerance values.
21
+ actual_uuid (str): UUID identifier for rows containing actual values.
22
+ event_uuid (str): UUID to assign to generated events.
23
+ """
24
+ super().__init__(dataframe) # Initialize the Base class
25
+ self.value_column: str = value_column
26
+ self.tolerance_uuid: str = tolerance_uuid
27
+ self.actual_uuid: str = actual_uuid
28
+ self.event_uuid: str = event_uuid
29
+
30
+ def calculate_control_limits(self) -> pd.DataFrame:
31
+ """
32
+ Calculate the control limits (mean ± 1σ, 2σ, 3σ) for the tolerance values.
33
+
34
+ Returns:
35
+ pd.DataFrame: DataFrame with control limits for each tolerance group.
36
+ """
37
+ df = self.dataframe[self.dataframe['uuid'] == self.tolerance_uuid]
38
+ mean = df[self.value_column].mean()
39
+ sigma = df[self.value_column].std()
40
+
41
+ control_limits = {
42
+ 'mean': mean,
43
+ '1sigma_upper': mean + sigma,
44
+ '1sigma_lower': mean - sigma,
45
+ '2sigma_upper': mean + 2 * sigma,
46
+ '2sigma_lower': mean - 2 * sigma,
47
+ '3sigma_upper': mean + 3 * sigma,
48
+ '3sigma_lower': mean - 3 * sigma,
49
+ }
50
+
51
+ return pd.DataFrame([control_limits])
52
+
53
+ def rule_1(self, df: pd.DataFrame, limits: pd.DataFrame) -> pd.DataFrame:
54
+ """
55
+ Rule 1: One point beyond the 3σ control limits.
56
+
57
+ Returns:
58
+ pd.DataFrame: Filtered DataFrame with rule violations.
59
+ """
60
+ df['rule_1'] = (df[self.value_column] > limits['3sigma_upper'].values[0]) | (df[self.value_column] < limits['3sigma_lower'].values[0])
61
+ return df[df['rule_1']]
62
+
63
+ def rule_2(self, df: pd.DataFrame) -> pd.DataFrame:
64
+ """
65
+ Rule 2: Nine consecutive points on one side of the mean.
66
+
67
+ Returns:
68
+ pd.DataFrame: Filtered DataFrame with rule violations.
69
+ """
70
+ mean = df[self.value_column].mean()
71
+ df['above_mean'] = df[self.value_column] > mean
72
+ df['below_mean'] = df[self.value_column] < mean
73
+ df['rule_2'] = (df['above_mean'].rolling(window=9).sum() == 9) | (df['below_mean'].rolling(window=9).sum() == 9)
74
+ return df[df['rule_2']]
75
+
76
+ def rule_3(self, df: pd.DataFrame) -> pd.DataFrame:
77
+ """
78
+ Rule 3: Six consecutive points steadily increasing or decreasing.
79
+
80
+ Returns:
81
+ pd.DataFrame: Filtered DataFrame with rule violations.
82
+ """
83
+ df['increasing'] = df[self.value_column].diff().gt(0)
84
+ df['decreasing'] = df[self.value_column].diff().lt(0)
85
+ df['rule_3'] = (df['increasing'].rolling(window=6).sum() == 6) | (df['decreasing'].rolling(window=6).sum() == 6)
86
+ return df[df['rule_3']]
87
+
88
+ def rule_4(self, df: pd.DataFrame) -> pd.DataFrame:
89
+ """
90
+ Rule 4: Fourteen consecutive points alternating up and down.
91
+
92
+ Returns:
93
+ pd.DataFrame: Filtered DataFrame with rule violations.
94
+ """
95
+ df['alternating'] = df[self.value_column].diff().apply(np.sign)
96
+ df['rule_4'] = df['alternating'].rolling(window=14).apply(lambda x: (x != x.shift()).sum() == 13, raw=True)
97
+ return df[df['rule_4']]
98
+
99
+ def rule_5(self, df: pd.DataFrame, limits: pd.DataFrame) -> pd.DataFrame:
100
+ """
101
+ Rule 5: Two out of three consecutive points near the control limit (beyond 2σ but within 3σ).
102
+
103
+ Returns:
104
+ pd.DataFrame: Filtered DataFrame with rule violations.
105
+ """
106
+ df['rule_5'] = df[self.value_column].apply(
107
+ lambda x: 1 if ((x > limits['2sigma_upper'].values[0] and x < limits['3sigma_upper'].values[0]) or
108
+ (x < limits['2sigma_lower'].values[0] and x > limits['3sigma_lower'].values[0])) else 0
109
+ )
110
+ df['rule_5'] = df['rule_5'].rolling(window=3).sum() >= 2
111
+ return df[df['rule_5']]
112
+
113
+ def rule_6(self, df: pd.DataFrame, limits: pd.DataFrame) -> pd.DataFrame:
114
+ """
115
+ Rule 6: Four out of five consecutive points near the control limit (beyond 1σ but within 2σ).
116
+
117
+ Returns:
118
+ pd.DataFrame: Filtered DataFrame with rule violations.
119
+ """
120
+ df['rule_6'] = df[self.value_column].apply(
121
+ lambda x: 1 if ((x > limits['1sigma_upper'].values[0] and x < limits['2sigma_upper'].values[0]) or
122
+ (x < limits['1sigma_lower'].values[0] and x > limits['2sigma_lower'].values[0])) else 0
123
+ )
124
+ df['rule_6'] = df['rule_6'].rolling(window=5).sum() >= 4
125
+ return df[df['rule_6']]
126
+
127
+ def rule_7(self, df: pd.DataFrame, limits: pd.DataFrame) -> pd.DataFrame:
128
+ """
129
+ Rule 7: Fifteen consecutive points within 1σ of the centerline.
130
+
131
+ Returns:
132
+ pd.DataFrame: Filtered DataFrame with rule violations.
133
+ """
134
+ df['rule_7'] = df[self.value_column].apply(
135
+ lambda x: 1 if (x < limits['1sigma_upper'].values[0] and x > limits['1sigma_lower'].values[0]) else 0
136
+ )
137
+ df['rule_7'] = df['rule_7'].rolling(window=15).sum() == 15
138
+ return df[df['rule_7']]
139
+
140
+ def rule_8(self, df: pd.DataFrame, limits: pd.DataFrame) -> pd.DataFrame:
141
+ """
142
+ Rule 8: Eight consecutive points on both sides of the mean within 1σ.
143
+
144
+ Returns:
145
+ pd.DataFrame: Filtered DataFrame with rule violations.
146
+ """
147
+ df['rule_8'] = df[self.value_column].apply(
148
+ lambda x: 1 if (x < limits['1sigma_upper'].values[0] and x > limits['1sigma_lower'].values[0]) else 0
149
+ )
150
+ df['rule_8'] = df['rule_8'].rolling(window=8).sum() == 8
151
+ return df[df['rule_8']]
152
+
153
+ def process(self, selected_rules: Optional[List[str]] = None) -> pd.DataFrame:
154
+ """
155
+ Applies the selected SPC rules and generates a DataFrame of events where any rules are violated.
156
+
157
+ Args:
158
+ selected_rules (Optional[List[str]]): List of rule names (e.g., ['rule_1', 'rule_3']) to apply.
159
+
160
+ Returns:
161
+ pd.DataFrame: DataFrame with rule violations and detected events.
162
+ """
163
+ df = self.dataframe[self.dataframe['uuid'] == self.actual_uuid]
164
+ df['systime'] = pd.to_datetime(df['systime'])
165
+ df = df.sort_values(by='systime')
166
+
167
+ limits = self.calculate_control_limits()
168
+
169
+ # Dictionary of rule functions
170
+ rules = {
171
+ 'rule_1': lambda df: self.rule_1(df, limits),
172
+ 'rule_2': lambda df: self.rule_2(df),
173
+ 'rule_3': lambda df: self.rule_3(df),
174
+ 'rule_4': lambda df: self.rule_4(df),
175
+ 'rule_5': lambda df: self.rule_5(df, limits),
176
+ 'rule_6': lambda df: self.rule_6(df, limits),
177
+ 'rule_7': lambda df: self.rule_7(df, limits),
178
+ 'rule_8': lambda df: self.rule_8(df, limits)
179
+ }
180
+
181
+ # If no specific rules are provided, use all rules
182
+ if selected_rules is None:
183
+ selected_rules = list(rules.keys())
184
+
185
+ # Apply selected rules and concatenate results
186
+ events = pd.concat([rules[rule](df) for rule in selected_rules if rule in rules]).drop_duplicates()
187
+
188
+ # Add the event UUID to the detected events
189
+ events['uuid'] = self.event_uuid
190
+
191
+ return events[['systime', self.value_column, 'uuid']].drop_duplicates()