spotforecast2-safe 0.0.1__tar.gz → 0.0.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- spotforecast2_safe-0.0.3/PKG-INFO +107 -0
- spotforecast2_safe-0.0.3/README.md +73 -0
- {spotforecast2_safe-0.0.1 → spotforecast2_safe-0.0.3}/pyproject.toml +7 -9
- {spotforecast2_safe-0.0.1 → spotforecast2_safe-0.0.3}/src/spotforecast2_safe/preprocessing/__init__.py +1 -10
- spotforecast2_safe-0.0.3/src/spotforecast2_safe/preprocessing/outlier.py +188 -0
- spotforecast2_safe-0.0.1/PKG-INFO +0 -86
- spotforecast2_safe-0.0.1/README.md +0 -48
- spotforecast2_safe-0.0.1/src/spotforecast2_safe/forecaster/metrics.py +0 -527
- spotforecast2_safe-0.0.1/src/spotforecast2_safe/model_selection/__init__.py +0 -5
- spotforecast2_safe-0.0.1/src/spotforecast2_safe/model_selection/bayesian_search.py +0 -453
- spotforecast2_safe-0.0.1/src/spotforecast2_safe/model_selection/grid_search.py +0 -314
- spotforecast2_safe-0.0.1/src/spotforecast2_safe/model_selection/random_search.py +0 -151
- spotforecast2_safe-0.0.1/src/spotforecast2_safe/model_selection/split_base.py +0 -357
- spotforecast2_safe-0.0.1/src/spotforecast2_safe/model_selection/split_one_step.py +0 -248
- spotforecast2_safe-0.0.1/src/spotforecast2_safe/model_selection/split_ts_cv.py +0 -687
- spotforecast2_safe-0.0.1/src/spotforecast2_safe/model_selection/utils_common.py +0 -718
- spotforecast2_safe-0.0.1/src/spotforecast2_safe/model_selection/utils_metrics.py +0 -103
- spotforecast2_safe-0.0.1/src/spotforecast2_safe/model_selection/validation.py +0 -685
- spotforecast2_safe-0.0.1/src/spotforecast2_safe/preprocessing/outlier.py +0 -435
- spotforecast2_safe-0.0.1/src/spotforecast2_safe/preprocessing/time_series_visualization.py +0 -815
- spotforecast2_safe-0.0.1/src/spotforecast2_safe/stats/__init__.py +0 -7
- spotforecast2_safe-0.0.1/src/spotforecast2_safe/stats/autocorrelation.py +0 -173
- {spotforecast2_safe-0.0.1 → spotforecast2_safe-0.0.3}/src/spotforecast2_safe/__init__.py +0 -0
- {spotforecast2_safe-0.0.1 → spotforecast2_safe-0.0.3}/src/spotforecast2_safe/data/__init__.py +0 -0
- {spotforecast2_safe-0.0.1 → spotforecast2_safe-0.0.3}/src/spotforecast2_safe/data/data.py +0 -0
- {spotforecast2_safe-0.0.1 → spotforecast2_safe-0.0.3}/src/spotforecast2_safe/data/fetch_data.py +0 -0
- {spotforecast2_safe-0.0.1 → spotforecast2_safe-0.0.3}/src/spotforecast2_safe/exceptions.py +0 -0
- {spotforecast2_safe-0.0.1 → spotforecast2_safe-0.0.3}/src/spotforecast2_safe/forecaster/__init__.py +0 -0
- {spotforecast2_safe-0.0.1 → spotforecast2_safe-0.0.3}/src/spotforecast2_safe/forecaster/base.py +0 -0
- {spotforecast2_safe-0.0.1 → spotforecast2_safe-0.0.3}/src/spotforecast2_safe/forecaster/recursive/__init__.py +0 -0
- {spotforecast2_safe-0.0.1 → spotforecast2_safe-0.0.3}/src/spotforecast2_safe/forecaster/recursive/_forecaster_equivalent_date.py +0 -0
- {spotforecast2_safe-0.0.1 → spotforecast2_safe-0.0.3}/src/spotforecast2_safe/forecaster/recursive/_forecaster_recursive.py +0 -0
- {spotforecast2_safe-0.0.1 → spotforecast2_safe-0.0.3}/src/spotforecast2_safe/forecaster/recursive/_warnings.py +0 -0
- {spotforecast2_safe-0.0.1 → spotforecast2_safe-0.0.3}/src/spotforecast2_safe/forecaster/utils.py +0 -0
- {spotforecast2_safe-0.0.1 → spotforecast2_safe-0.0.3}/src/spotforecast2_safe/preprocessing/_binner.py +0 -0
- {spotforecast2_safe-0.0.1 → spotforecast2_safe-0.0.3}/src/spotforecast2_safe/preprocessing/_common.py +0 -0
- {spotforecast2_safe-0.0.1 → spotforecast2_safe-0.0.3}/src/spotforecast2_safe/preprocessing/_differentiator.py +0 -0
- {spotforecast2_safe-0.0.1 → spotforecast2_safe-0.0.3}/src/spotforecast2_safe/preprocessing/_rolling.py +0 -0
- {spotforecast2_safe-0.0.1 → spotforecast2_safe-0.0.3}/src/spotforecast2_safe/preprocessing/curate_data.py +0 -0
- {spotforecast2_safe-0.0.1 → spotforecast2_safe-0.0.3}/src/spotforecast2_safe/preprocessing/imputation.py +0 -0
- {spotforecast2_safe-0.0.1 → spotforecast2_safe-0.0.3}/src/spotforecast2_safe/preprocessing/split.py +0 -0
- {spotforecast2_safe-0.0.1 → spotforecast2_safe-0.0.3}/src/spotforecast2_safe/processing/__init__.py +0 -0
- {spotforecast2_safe-0.0.1 → spotforecast2_safe-0.0.3}/src/spotforecast2_safe/processing/agg_predict.py +0 -0
- {spotforecast2_safe-0.0.1 → spotforecast2_safe-0.0.3}/src/spotforecast2_safe/processing/n2n_predict.py +0 -0
- {spotforecast2_safe-0.0.1 → spotforecast2_safe-0.0.3}/src/spotforecast2_safe/processing/n2n_predict_with_covariates.py +0 -0
- {spotforecast2_safe-0.0.1 → spotforecast2_safe-0.0.3}/src/spotforecast2_safe/py.typed +0 -0
- {spotforecast2_safe-0.0.1 → spotforecast2_safe-0.0.3}/src/spotforecast2_safe/utils/__init__.py +0 -0
- {spotforecast2_safe-0.0.1 → spotforecast2_safe-0.0.3}/src/spotforecast2_safe/utils/convert_to_utc.py +0 -0
- {spotforecast2_safe-0.0.1 → spotforecast2_safe-0.0.3}/src/spotforecast2_safe/utils/data_transform.py +0 -0
- {spotforecast2_safe-0.0.1 → spotforecast2_safe-0.0.3}/src/spotforecast2_safe/utils/forecaster_config.py +0 -0
- {spotforecast2_safe-0.0.1 → spotforecast2_safe-0.0.3}/src/spotforecast2_safe/utils/generate_holiday.py +0 -0
- {spotforecast2_safe-0.0.1 → spotforecast2_safe-0.0.3}/src/spotforecast2_safe/utils/validation.py +0 -0
- {spotforecast2_safe-0.0.1 → spotforecast2_safe-0.0.3}/src/spotforecast2_safe/weather/__init__.py +0 -0
- {spotforecast2_safe-0.0.1 → spotforecast2_safe-0.0.3}/src/spotforecast2_safe/weather/weather_client.py +0 -0
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
Metadata-Version: 2.3
|
|
2
|
+
Name: spotforecast2-safe
|
|
3
|
+
Version: 0.0.3
|
|
4
|
+
Summary: spotforecast2-safe (Core): Safety-critical time series forecasting for production
|
|
5
|
+
Author: bartzbeielstein
|
|
6
|
+
Author-email: bartzbeielstein <32470350+bartzbeielstein@users.noreply.github.com>
|
|
7
|
+
Requires-Dist: astral>=3.2
|
|
8
|
+
Requires-Dist: feature-engine>=1.9.3
|
|
9
|
+
Requires-Dist: flake8>=7.3.0
|
|
10
|
+
Requires-Dist: holidays>=0.90
|
|
11
|
+
Requires-Dist: lightgbm>=4.6.0
|
|
12
|
+
Requires-Dist: numba>=0.63.1
|
|
13
|
+
Requires-Dist: pandas>=3.0.0
|
|
14
|
+
Requires-Dist: pyarrow>=23.0.0
|
|
15
|
+
Requires-Dist: scikit-learn>=1.8.0
|
|
16
|
+
Requires-Dist: tqdm>=4.67.2
|
|
17
|
+
Requires-Dist: pytest>=9.0.2 ; extra == 'dev'
|
|
18
|
+
Requires-Dist: pytest-cov>=6.0.0 ; extra == 'dev'
|
|
19
|
+
Requires-Dist: black>=24.1.0 ; extra == 'dev'
|
|
20
|
+
Requires-Dist: isort>=5.13.0 ; extra == 'dev'
|
|
21
|
+
Requires-Dist: ruff>=0.3.0 ; extra == 'dev'
|
|
22
|
+
Requires-Dist: mkdocs>=1.6.1 ; extra == 'dev'
|
|
23
|
+
Requires-Dist: mkdocs-material>=9.7.1 ; extra == 'dev'
|
|
24
|
+
Requires-Dist: mkdocstrings>=1.0.2 ; extra == 'dev'
|
|
25
|
+
Requires-Dist: mkdocstrings-python>=2.0.1 ; extra == 'dev'
|
|
26
|
+
Requires-Dist: safety>=3.0.0 ; extra == 'dev'
|
|
27
|
+
Requires-Dist: bandit>=1.8.0 ; extra == 'dev'
|
|
28
|
+
Requires-Python: >=3.13
|
|
29
|
+
Project-URL: Documentation, https://sequential-parameter-optimization.github.io/spotforecast2-safe/
|
|
30
|
+
Project-URL: Repository, https://github.com/sequential-parameter-optimization/spotforecast2-safe
|
|
31
|
+
Project-URL: Issues, https://github.com/sequential-parameter-optimization/spotforecast2-safe/issues
|
|
32
|
+
Provides-Extra: dev
|
|
33
|
+
Description-Content-Type: text/markdown
|
|
34
|
+
|
|
35
|
+
<div align="left">
|
|
36
|
+
<img src="logo/spotlogo.png" alt="spotforecast2-safe Logo" width="300">
|
|
37
|
+
</div>
|
|
38
|
+
|
|
39
|
+
# spotforecast2-safe (Core)
|
|
40
|
+
|
|
41
|
+
[](https://www.python.org/downloads/)
|
|
42
|
+
[](MODEL_CARD.md)
|
|
43
|
+
[](pyproject.toml)
|
|
44
|
+
[](MODEL_CARD.md)
|
|
45
|
+
[](LICENSE)
|
|
46
|
+
|
|
47
|
+
**Testing & Quality**
|
|
48
|
+
|
|
49
|
+
[](https://github.com/sequential-parameter-optimization/spotforecast2-safe/actions/workflows/ci.yml)
|
|
50
|
+
[](https://sequential-parameter-optimization.github.io/spotforecast2-safe/)
|
|
51
|
+
[](MODEL_CARD.md)
|
|
52
|
+
|
|
53
|
+
**Status**
|
|
54
|
+
|
|
55
|
+
[](https://github.com/sequential-parameter-optimization/spotforecast2-safe)
|
|
56
|
+
[](https://github.com/psf/black)
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
## Safety-Critical Design Goals
|
|
61
|
+
|
|
62
|
+
`spotforecast2-safe` is a specialized Python library designed to **facilitate** time series forecasting in safety-critical production environments and embedded systems.
|
|
63
|
+
|
|
64
|
+
Unlike standard machine and dep learning libraries, it follows a strict **"Safety-First"** architecture by design. **However, users must independently verify that these features meet their specific regulatory requirements:**
|
|
65
|
+
|
|
66
|
+
- **Zero Dead Code**: We aim to minimize the attack surface by excluding visualization and training logic.
|
|
67
|
+
- **Deterministic Logic**: The algorithms are designed to be purely mathematical and deterministic.
|
|
68
|
+
- **Fail-Safe Operation**: The system is designed to favor explicit errors over silent failures when encountering invalid data.
|
|
69
|
+
- **EU AI Act Support**: The architecture supports transparency and data governance, helping users build compliant high-risk AI components.
|
|
70
|
+
|
|
71
|
+
For a detailed technical overview of our safety mechanisms, see our **[MODEL_CARD.md](MODEL_CARD.md)**.
|
|
72
|
+
|
|
73
|
+
## ⚠️ Disclaimer & Liability
|
|
74
|
+
|
|
75
|
+
**IMPORTANT**: This software is provided "as is" and any express or implied warranties, including, but not limited to, the implied warranties of merchantability and fitness for a particular purpose are disclaimed.
|
|
76
|
+
|
|
77
|
+
In no event shall the authors, copyright holders, or contributors be liable for any direct, indirect, incidental, special, exemplary, or consequential damages (including, but not limited to, procurement of substitute goods or services; loss of use, data, or profits; or business interruption) however caused and on any theory of liability, whether in contract, strict liability, or tort (including negligence or otherwise) arising in any way out of the use of this software, even if advised of the possibility of such damage.
|
|
78
|
+
|
|
79
|
+
**The use of this software in safety-critical systems is at the sole risk of the user.**
|
|
80
|
+
|
|
81
|
+
## Attributions
|
|
82
|
+
|
|
83
|
+
Parts of the code are ported from `skforecast` to reduce external dependencies.
|
|
84
|
+
Many thanks to the [skforecast team](https://skforecast.org/0.20.0/more/about-skforecast.html) for their great work!
|
|
85
|
+
|
|
86
|
+
## Documentation
|
|
87
|
+
|
|
88
|
+
Documentation (API) is available at: [https://sequential-parameter-optimization.github.io/spotforecast2-safe/](https://sequential-parameter-optimization.github.io/spotforecast2-safe/)
|
|
89
|
+
|
|
90
|
+
## License
|
|
91
|
+
|
|
92
|
+
`spotforecast2-safe` software: [BSD-3-Clause License](LICENSE)
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
# References
|
|
96
|
+
|
|
97
|
+
## spotforecast2
|
|
98
|
+
|
|
99
|
+
The "full" version of `spotforecast2-safe`, which is named `spotforecast`, is available at: [https://sequential-parameter-optimization.github.io/spotforecast2/](https://sequential-parameter-optimization.github.io/spotforecast2/)
|
|
100
|
+
|
|
101
|
+
## skforecast
|
|
102
|
+
|
|
103
|
+
* Amat Rodrigo, J., & Escobar Ortiz, J. (2026). skforecast (Version 0.20.0) [Computer software]. https://doi.org/10.5281/zenodo.8382788
|
|
104
|
+
|
|
105
|
+
## spotoptim
|
|
106
|
+
|
|
107
|
+
* [spotoptim documentation](https://sequential-parameter-optimization.github.io/spotoptim/)
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
<div align="left">
|
|
2
|
+
<img src="logo/spotlogo.png" alt="spotforecast2-safe Logo" width="300">
|
|
3
|
+
</div>
|
|
4
|
+
|
|
5
|
+
# spotforecast2-safe (Core)
|
|
6
|
+
|
|
7
|
+
[](https://www.python.org/downloads/)
|
|
8
|
+
[](MODEL_CARD.md)
|
|
9
|
+
[](pyproject.toml)
|
|
10
|
+
[](MODEL_CARD.md)
|
|
11
|
+
[](LICENSE)
|
|
12
|
+
|
|
13
|
+
**Testing & Quality**
|
|
14
|
+
|
|
15
|
+
[](https://github.com/sequential-parameter-optimization/spotforecast2-safe/actions/workflows/ci.yml)
|
|
16
|
+
[](https://sequential-parameter-optimization.github.io/spotforecast2-safe/)
|
|
17
|
+
[](MODEL_CARD.md)
|
|
18
|
+
|
|
19
|
+
**Status**
|
|
20
|
+
|
|
21
|
+
[](https://github.com/sequential-parameter-optimization/spotforecast2-safe)
|
|
22
|
+
[](https://github.com/psf/black)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
## Safety-Critical Design Goals
|
|
27
|
+
|
|
28
|
+
`spotforecast2-safe` is a specialized Python library designed to **facilitate** time series forecasting in safety-critical production environments and embedded systems.
|
|
29
|
+
|
|
30
|
+
Unlike standard machine and dep learning libraries, it follows a strict **"Safety-First"** architecture by design. **However, users must independently verify that these features meet their specific regulatory requirements:**
|
|
31
|
+
|
|
32
|
+
- **Zero Dead Code**: We aim to minimize the attack surface by excluding visualization and training logic.
|
|
33
|
+
- **Deterministic Logic**: The algorithms are designed to be purely mathematical and deterministic.
|
|
34
|
+
- **Fail-Safe Operation**: The system is designed to favor explicit errors over silent failures when encountering invalid data.
|
|
35
|
+
- **EU AI Act Support**: The architecture supports transparency and data governance, helping users build compliant high-risk AI components.
|
|
36
|
+
|
|
37
|
+
For a detailed technical overview of our safety mechanisms, see our **[MODEL_CARD.md](MODEL_CARD.md)**.
|
|
38
|
+
|
|
39
|
+
## ⚠️ Disclaimer & Liability
|
|
40
|
+
|
|
41
|
+
**IMPORTANT**: This software is provided "as is" and any express or implied warranties, including, but not limited to, the implied warranties of merchantability and fitness for a particular purpose are disclaimed.
|
|
42
|
+
|
|
43
|
+
In no event shall the authors, copyright holders, or contributors be liable for any direct, indirect, incidental, special, exemplary, or consequential damages (including, but not limited to, procurement of substitute goods or services; loss of use, data, or profits; or business interruption) however caused and on any theory of liability, whether in contract, strict liability, or tort (including negligence or otherwise) arising in any way out of the use of this software, even if advised of the possibility of such damage.
|
|
44
|
+
|
|
45
|
+
**The use of this software in safety-critical systems is at the sole risk of the user.**
|
|
46
|
+
|
|
47
|
+
## Attributions
|
|
48
|
+
|
|
49
|
+
Parts of the code are ported from `skforecast` to reduce external dependencies.
|
|
50
|
+
Many thanks to the [skforecast team](https://skforecast.org/0.20.0/more/about-skforecast.html) for their great work!
|
|
51
|
+
|
|
52
|
+
## Documentation
|
|
53
|
+
|
|
54
|
+
Documentation (API) is available at: [https://sequential-parameter-optimization.github.io/spotforecast2-safe/](https://sequential-parameter-optimization.github.io/spotforecast2-safe/)
|
|
55
|
+
|
|
56
|
+
## License
|
|
57
|
+
|
|
58
|
+
`spotforecast2-safe` software: [BSD-3-Clause License](LICENSE)
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
# References
|
|
62
|
+
|
|
63
|
+
## spotforecast2
|
|
64
|
+
|
|
65
|
+
The "full" version of `spotforecast2-safe`, which is named `spotforecast`, is available at: [https://sequential-parameter-optimization.github.io/spotforecast2/](https://sequential-parameter-optimization.github.io/spotforecast2/)
|
|
66
|
+
|
|
67
|
+
## skforecast
|
|
68
|
+
|
|
69
|
+
* Amat Rodrigo, J., & Escobar Ortiz, J. (2026). skforecast (Version 0.20.0) [Computer software]. https://doi.org/10.5281/zenodo.8382788
|
|
70
|
+
|
|
71
|
+
## spotoptim
|
|
72
|
+
|
|
73
|
+
* [spotoptim documentation](https://sequential-parameter-optimization.github.io/spotoptim/)
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "spotforecast2-safe"
|
|
3
|
-
version = "0.0.
|
|
4
|
-
description = "
|
|
3
|
+
version = "0.0.3"
|
|
4
|
+
description = "spotforecast2-safe (Core): Safety-critical time series forecasting for production"
|
|
5
5
|
readme = "README.md"
|
|
6
6
|
authors = [
|
|
7
7
|
{ name = "bartzbeielstein", email = "32470350+bartzbeielstein@users.noreply.github.com" }
|
|
@@ -12,21 +12,19 @@ dependencies = [
|
|
|
12
12
|
"feature-engine>=1.9.3",
|
|
13
13
|
"flake8>=7.3.0",
|
|
14
14
|
"holidays>=0.90",
|
|
15
|
-
"ipykernel>=7.1.0",
|
|
16
|
-
"jupyter>=1.1.1",
|
|
17
15
|
"lightgbm>=4.6.0",
|
|
18
|
-
"matplotlib>=3.10.8",
|
|
19
16
|
"numba>=0.63.1",
|
|
20
|
-
"optuna>=4.7.0",
|
|
21
17
|
"pandas>=3.0.0",
|
|
22
|
-
"plotly>=6.5.2",
|
|
23
18
|
"pyarrow>=23.0.0",
|
|
24
19
|
"scikit-learn>=1.8.0",
|
|
25
|
-
"shap>=0.49.1",
|
|
26
|
-
"spotoptim>=0.0.160",
|
|
27
20
|
"tqdm>=4.67.2",
|
|
28
21
|
]
|
|
29
22
|
|
|
23
|
+
[project.urls]
|
|
24
|
+
Documentation = "https://sequential-parameter-optimization.github.io/spotforecast2-safe/"
|
|
25
|
+
Repository = "https://github.com/sequential-parameter-optimization/spotforecast2-safe"
|
|
26
|
+
Issues = "https://github.com/sequential-parameter-optimization/spotforecast2-safe/issues"
|
|
27
|
+
|
|
30
28
|
[build-system]
|
|
31
29
|
requires = ["uv_build>=0.9.18,<0.10.0"]
|
|
32
30
|
build-backend = "uv_build"
|
|
@@ -9,13 +9,8 @@ from .outlier import (
|
|
|
9
9
|
mark_outliers,
|
|
10
10
|
manual_outlier_removal,
|
|
11
11
|
get_outliers,
|
|
12
|
-
visualize_outliers_hist,
|
|
13
|
-
visualize_outliers_plotly_scatter,
|
|
14
|
-
)
|
|
15
|
-
from .time_series_visualization import (
|
|
16
|
-
visualize_ts_plotly,
|
|
17
|
-
visualize_ts_comparison,
|
|
18
12
|
)
|
|
13
|
+
|
|
19
14
|
from .imputation import custom_weights, get_missing_weights, WeightFunction
|
|
20
15
|
from .split import split_abs_train_val_test, split_rel_train_val_test
|
|
21
16
|
from ._differentiator import TimeSeriesDifferentiator
|
|
@@ -31,10 +26,6 @@ __all__ = [
|
|
|
31
26
|
"mark_outliers",
|
|
32
27
|
"manual_outlier_removal",
|
|
33
28
|
"get_outliers",
|
|
34
|
-
"visualize_outliers_hist",
|
|
35
|
-
"visualize_outliers_plotly_scatter",
|
|
36
|
-
"visualize_ts_plotly",
|
|
37
|
-
"visualize_ts_comparison",
|
|
38
29
|
"custom_weights",
|
|
39
30
|
"get_missing_weights",
|
|
40
31
|
"WeightFunction",
|
|
@@ -0,0 +1,188 @@
|
|
|
1
|
+
from typing import Optional, Dict
|
|
2
|
+
|
|
3
|
+
from sklearn.ensemble import IsolationForest
|
|
4
|
+
import numpy as np
|
|
5
|
+
import pandas as pd
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def mark_outliers(
|
|
9
|
+
data: pd.DataFrame,
|
|
10
|
+
contamination: float = 0.1,
|
|
11
|
+
random_state: int = 1234,
|
|
12
|
+
verbose: bool = False,
|
|
13
|
+
) -> tuple[pd.DataFrame, np.ndarray]:
|
|
14
|
+
"""Marks outliers as NaN in the dataset using Isolation Forest.
|
|
15
|
+
|
|
16
|
+
Args:
|
|
17
|
+
data (pd.DataFrame):
|
|
18
|
+
The input dataset.
|
|
19
|
+
contamination (float):
|
|
20
|
+
The (estimated) proportion of outliers in the dataset.
|
|
21
|
+
random_state (int):
|
|
22
|
+
Random seed for reproducibility. Default is 1234.
|
|
23
|
+
verbose (bool):
|
|
24
|
+
Whether to print additional information.
|
|
25
|
+
|
|
26
|
+
Returns:
|
|
27
|
+
tuple[pd.DataFrame, np.ndarray]: A tuple containing the modified dataset with outliers marked as NaN and the outlier labels.
|
|
28
|
+
|
|
29
|
+
Examples:
|
|
30
|
+
>>> from spotforecast2.data.fetch_data import fetch_data
|
|
31
|
+
>>> from spotforecast2_safe.preprocessing.outlier import mark_outliers
|
|
32
|
+
>>> data = fetch_data()
|
|
33
|
+
>>> cleaned_data, outlier_labels = mark_outliers(data, contamination=0.1, random_state=42, verbose=True)
|
|
34
|
+
"""
|
|
35
|
+
for col in data.columns:
|
|
36
|
+
iso = IsolationForest(contamination=contamination, random_state=random_state)
|
|
37
|
+
# Fit and predict (-1 for outliers, 1 for inliers)
|
|
38
|
+
outliers = iso.fit_predict(data[[col]])
|
|
39
|
+
|
|
40
|
+
# Mark outliers as NaN
|
|
41
|
+
data.loc[outliers == -1, col] = np.nan
|
|
42
|
+
|
|
43
|
+
pct_outliers = (outliers == -1).mean() * 100
|
|
44
|
+
if verbose:
|
|
45
|
+
print(
|
|
46
|
+
f"Column '{col}': Marked {pct_outliers:.4f}% of data points as outliers."
|
|
47
|
+
)
|
|
48
|
+
return data, outliers
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def manual_outlier_removal(
|
|
52
|
+
data: pd.DataFrame,
|
|
53
|
+
column: str,
|
|
54
|
+
lower_threshold: float | None = None,
|
|
55
|
+
upper_threshold: float | None = None,
|
|
56
|
+
verbose: bool = False,
|
|
57
|
+
) -> tuple[pd.DataFrame, int]:
|
|
58
|
+
"""Manual outlier removal function.
|
|
59
|
+
Args:
|
|
60
|
+
data (pd.DataFrame):
|
|
61
|
+
The input dataset.
|
|
62
|
+
column (str):
|
|
63
|
+
The column name in which to perform manual outlier removal.
|
|
64
|
+
lower_threshold (float | None):
|
|
65
|
+
The lower threshold below which values are considered outliers.
|
|
66
|
+
If None, no lower threshold is applied.
|
|
67
|
+
upper_threshold (float | None):
|
|
68
|
+
The upper threshold above which values are considered outliers.
|
|
69
|
+
If None, no upper threshold is applied.
|
|
70
|
+
verbose (bool):
|
|
71
|
+
Whether to print additional information.
|
|
72
|
+
|
|
73
|
+
Returns:
|
|
74
|
+
tuple[pd.DataFrame, int]: A tuple containing the modified dataset with outliers marked as NaN and the number of outliers marked.
|
|
75
|
+
|
|
76
|
+
Examples:
|
|
77
|
+
>>> from spotforecast2.data.fetch_data import fetch_data
|
|
78
|
+
>>> from spotforecast2_safe.preprocessing.outlier import manual_outlier_removal
|
|
79
|
+
>>> data = fetch_data()
|
|
80
|
+
>>> data, n_manual_outliers = manual_outlier_removal(
|
|
81
|
+
... data,
|
|
82
|
+
... column='ABC',
|
|
83
|
+
... lower_threshold=50,
|
|
84
|
+
... upper_threshold=700,
|
|
85
|
+
... verbose=True
|
|
86
|
+
"""
|
|
87
|
+
if lower_threshold is None and upper_threshold is None:
|
|
88
|
+
if verbose:
|
|
89
|
+
print(f"No thresholds provided for {column}; no outliers marked.")
|
|
90
|
+
return data, 0
|
|
91
|
+
|
|
92
|
+
if lower_threshold is not None and upper_threshold is not None:
|
|
93
|
+
mask = (data[column] > upper_threshold) | (data[column] < lower_threshold)
|
|
94
|
+
elif lower_threshold is not None:
|
|
95
|
+
mask = data[column] < lower_threshold
|
|
96
|
+
else:
|
|
97
|
+
mask = data[column] > upper_threshold
|
|
98
|
+
|
|
99
|
+
n_manual_outliers = mask.sum()
|
|
100
|
+
|
|
101
|
+
data.loc[mask, column] = np.nan
|
|
102
|
+
|
|
103
|
+
if verbose:
|
|
104
|
+
if lower_threshold is not None and upper_threshold is not None:
|
|
105
|
+
print(
|
|
106
|
+
f"Manually marked {n_manual_outliers} values > {upper_threshold} or < {lower_threshold} as outliers in {column}."
|
|
107
|
+
)
|
|
108
|
+
elif lower_threshold is not None:
|
|
109
|
+
print(
|
|
110
|
+
f"Manually marked {n_manual_outliers} values < {lower_threshold} as outliers in {column}."
|
|
111
|
+
)
|
|
112
|
+
else:
|
|
113
|
+
print(
|
|
114
|
+
f"Manually marked {n_manual_outliers} values > {upper_threshold} as outliers in {column}."
|
|
115
|
+
)
|
|
116
|
+
return data, n_manual_outliers
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def get_outliers(
|
|
120
|
+
data: pd.DataFrame,
|
|
121
|
+
data_original: Optional[pd.DataFrame] = None,
|
|
122
|
+
contamination: float = 0.01,
|
|
123
|
+
random_state: int = 1234,
|
|
124
|
+
) -> Dict[str, pd.Series]:
|
|
125
|
+
"""Detect outliers in each column using Isolation Forest.
|
|
126
|
+
|
|
127
|
+
This function uses scikit-learn's IsolationForest algorithm to detect outliers
|
|
128
|
+
in each column of the input DataFrame. The original data (before any NaN values
|
|
129
|
+
were introduced) can be provided to identify which values were marked as NaN due
|
|
130
|
+
to outlier detection.
|
|
131
|
+
|
|
132
|
+
Args:
|
|
133
|
+
data: The input DataFrame to check for outliers.
|
|
134
|
+
data_original: Optional original DataFrame before outlier marking. If provided,
|
|
135
|
+
helps identify which values became NaN due to outlier detection.
|
|
136
|
+
Default: None.
|
|
137
|
+
contamination: The estimated proportion of outliers in the dataset.
|
|
138
|
+
Default: 0.01.
|
|
139
|
+
random_state: Random seed for reproducibility. Default: 1234.
|
|
140
|
+
|
|
141
|
+
Returns:
|
|
142
|
+
A dictionary mapping column names to Series of outlier values.
|
|
143
|
+
For columns without outliers, an empty Series is returned.
|
|
144
|
+
|
|
145
|
+
Raises:
|
|
146
|
+
ValueError: If data is empty or contains no columns.
|
|
147
|
+
|
|
148
|
+
Examples:
|
|
149
|
+
>>> import pandas as pd
|
|
150
|
+
>>> import numpy as np
|
|
151
|
+
>>> from spotforecast2_safe.preprocessing.outlier import get_outliers
|
|
152
|
+
>>>
|
|
153
|
+
>>> # Create sample data with outliers
|
|
154
|
+
>>> np.random.seed(42)
|
|
155
|
+
>>> data = pd.DataFrame({
|
|
156
|
+
... 'A': np.concatenate([np.random.normal(0, 1, 100), [10, 11, 12]]),
|
|
157
|
+
... 'B': np.concatenate([np.random.normal(5, 2, 100), [100, 110, 120]])
|
|
158
|
+
... })
|
|
159
|
+
>>> data_original = data.copy()
|
|
160
|
+
>>>
|
|
161
|
+
>>> # Detect outliers
|
|
162
|
+
>>> outliers = get_outliers(data_original, contamination=0.03)
|
|
163
|
+
>>> for col, outlier_vals in outliers.items():
|
|
164
|
+
... print(f"{col}: {len(outlier_vals)} outliers detected")
|
|
165
|
+
"""
|
|
166
|
+
if data.empty:
|
|
167
|
+
raise ValueError("Input data is empty")
|
|
168
|
+
if len(data.columns) == 0:
|
|
169
|
+
raise ValueError("Input data contains no columns")
|
|
170
|
+
|
|
171
|
+
outliers_dict = {}
|
|
172
|
+
|
|
173
|
+
for col in data.columns:
|
|
174
|
+
iso = IsolationForest(contamination=contamination, random_state=random_state)
|
|
175
|
+
# Fit and predict (-1 for outliers, 1 for inliers)
|
|
176
|
+
predictions = iso.fit_predict(data[[col]])
|
|
177
|
+
|
|
178
|
+
# Get outlier values
|
|
179
|
+
if data_original is not None:
|
|
180
|
+
# Use original data to identify outlier values
|
|
181
|
+
outlier_mask = predictions == -1
|
|
182
|
+
outliers_dict[col] = data_original.loc[outlier_mask, col]
|
|
183
|
+
else:
|
|
184
|
+
# Use current data
|
|
185
|
+
outlier_mask = predictions == -1
|
|
186
|
+
outliers_dict[col] = data.loc[outlier_mask, col]
|
|
187
|
+
|
|
188
|
+
return outliers_dict
|
|
@@ -1,86 +0,0 @@
|
|
|
1
|
-
Metadata-Version: 2.3
|
|
2
|
-
Name: spotforecast2-safe
|
|
3
|
-
Version: 0.0.1
|
|
4
|
-
Summary: Safe forecasting with spot
|
|
5
|
-
Author: bartzbeielstein
|
|
6
|
-
Author-email: bartzbeielstein <32470350+bartzbeielstein@users.noreply.github.com>
|
|
7
|
-
Requires-Dist: astral>=3.2
|
|
8
|
-
Requires-Dist: feature-engine>=1.9.3
|
|
9
|
-
Requires-Dist: flake8>=7.3.0
|
|
10
|
-
Requires-Dist: holidays>=0.90
|
|
11
|
-
Requires-Dist: ipykernel>=7.1.0
|
|
12
|
-
Requires-Dist: jupyter>=1.1.1
|
|
13
|
-
Requires-Dist: lightgbm>=4.6.0
|
|
14
|
-
Requires-Dist: matplotlib>=3.10.8
|
|
15
|
-
Requires-Dist: numba>=0.63.1
|
|
16
|
-
Requires-Dist: optuna>=4.7.0
|
|
17
|
-
Requires-Dist: pandas>=3.0.0
|
|
18
|
-
Requires-Dist: plotly>=6.5.2
|
|
19
|
-
Requires-Dist: pyarrow>=23.0.0
|
|
20
|
-
Requires-Dist: scikit-learn>=1.8.0
|
|
21
|
-
Requires-Dist: shap>=0.49.1
|
|
22
|
-
Requires-Dist: spotoptim>=0.0.160
|
|
23
|
-
Requires-Dist: tqdm>=4.67.2
|
|
24
|
-
Requires-Dist: pytest>=9.0.2 ; extra == 'dev'
|
|
25
|
-
Requires-Dist: pytest-cov>=6.0.0 ; extra == 'dev'
|
|
26
|
-
Requires-Dist: black>=24.1.0 ; extra == 'dev'
|
|
27
|
-
Requires-Dist: isort>=5.13.0 ; extra == 'dev'
|
|
28
|
-
Requires-Dist: ruff>=0.3.0 ; extra == 'dev'
|
|
29
|
-
Requires-Dist: mkdocs>=1.6.1 ; extra == 'dev'
|
|
30
|
-
Requires-Dist: mkdocs-material>=9.7.1 ; extra == 'dev'
|
|
31
|
-
Requires-Dist: mkdocstrings>=1.0.2 ; extra == 'dev'
|
|
32
|
-
Requires-Dist: mkdocstrings-python>=2.0.1 ; extra == 'dev'
|
|
33
|
-
Requires-Dist: safety>=3.0.0 ; extra == 'dev'
|
|
34
|
-
Requires-Dist: bandit>=1.8.0 ; extra == 'dev'
|
|
35
|
-
Requires-Python: >=3.13
|
|
36
|
-
Provides-Extra: dev
|
|
37
|
-
Description-Content-Type: text/markdown
|
|
38
|
-
|
|
39
|
-
<div align="left">
|
|
40
|
-
<img src="logo/spotlogo.png" alt="spotforecast2-safe Logo" width="300">
|
|
41
|
-
</div>
|
|
42
|
-
|
|
43
|
-
# spotforecast2-safe
|
|
44
|
-
|
|
45
|
-
[](https://www.python.org/downloads/)
|
|
46
|
-
[](https://pypi.org/project/spotforecast2-safe/)
|
|
47
|
-
[](https://pypi.org/project/spotforecast2-safe/)
|
|
48
|
-
[](https://github.com/sequential-parameter-optimization/spotforecast2-safe/blob/main/LICENSE)
|
|
49
|
-
|
|
50
|
-
**Testing & Quality**
|
|
51
|
-
|
|
52
|
-
[](https://github.com/sequential-parameter-optimization/spotforecast2-safe/actions/workflows/ci.yml)
|
|
53
|
-
[](https://sequential-parameter-optimization.github.io/spotforecast2-safe/)
|
|
54
|
-
[](https://github.com/sequential-parameter-optimization/spotforecast2-safe/releases)
|
|
55
|
-
|
|
56
|
-
**Status**
|
|
57
|
-
|
|
58
|
-
[](https://github.com/sequential-parameter-optimization/spotforecast2-safe)
|
|
59
|
-
[](https://github.com/psf/black)
|
|
60
|
-
|
|
61
|
-
## About spotforecast2-safe
|
|
62
|
-
|
|
63
|
-
`spotforecast2-safe` is a Python library for time series forecasting in safety-critical environments. It is designed with EU AI Act principles in mind: Minimal dependency footprint for enhanced cybersecurity, deterministic data transformation for reproducibility, and strict error handling for robustness in safety-critical pipelines.
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
Parts of the code are ported from `skforecast` to reduce external dependencies.
|
|
67
|
-
Many thanks to the [skforecast team](https://skforecast.org/0.20.0/more/about-skforecast.html) for their great work!
|
|
68
|
-
|
|
69
|
-
## Documentation
|
|
70
|
-
|
|
71
|
-
Documentation (API) is available at: [https://sequential-parameter-optimization.github.io/spotforecast2-safe/](https://sequential-parameter-optimization.github.io/spotforecast2-safe/)
|
|
72
|
-
|
|
73
|
-
## License
|
|
74
|
-
|
|
75
|
-
`spotforecast2-safe` software: [BSD-3-Clause License](https://github.com/sequential-parameter-optimization/spotforecast2-safe?tab=BSD-3-Clause-1-ov-file)
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
# References
|
|
79
|
-
|
|
80
|
-
## skforecast:
|
|
81
|
-
|
|
82
|
-
* Amat Rodrigo, J., & Escobar Ortiz, J. (2026). skforecast (Version 0.20.0) [Computer software]. https://doi.org/10.5281/zenodo.8382788
|
|
83
|
-
|
|
84
|
-
## spotoptim:
|
|
85
|
-
|
|
86
|
-
* [spotoptim documentation](https://sequential-parameter-optimization.github.io/spotoptim/)
|
|
@@ -1,48 +0,0 @@
|
|
|
1
|
-
<div align="left">
|
|
2
|
-
<img src="logo/spotlogo.png" alt="spotforecast2-safe Logo" width="300">
|
|
3
|
-
</div>
|
|
4
|
-
|
|
5
|
-
# spotforecast2-safe
|
|
6
|
-
|
|
7
|
-
[](https://www.python.org/downloads/)
|
|
8
|
-
[](https://pypi.org/project/spotforecast2-safe/)
|
|
9
|
-
[](https://pypi.org/project/spotforecast2-safe/)
|
|
10
|
-
[](https://github.com/sequential-parameter-optimization/spotforecast2-safe/blob/main/LICENSE)
|
|
11
|
-
|
|
12
|
-
**Testing & Quality**
|
|
13
|
-
|
|
14
|
-
[](https://github.com/sequential-parameter-optimization/spotforecast2-safe/actions/workflows/ci.yml)
|
|
15
|
-
[](https://sequential-parameter-optimization.github.io/spotforecast2-safe/)
|
|
16
|
-
[](https://github.com/sequential-parameter-optimization/spotforecast2-safe/releases)
|
|
17
|
-
|
|
18
|
-
**Status**
|
|
19
|
-
|
|
20
|
-
[](https://github.com/sequential-parameter-optimization/spotforecast2-safe)
|
|
21
|
-
[](https://github.com/psf/black)
|
|
22
|
-
|
|
23
|
-
## About spotforecast2-safe
|
|
24
|
-
|
|
25
|
-
`spotforecast2-safe` is a Python library for time series forecasting in safety-critical environments. It is designed with EU AI Act principles in mind: Minimal dependency footprint for enhanced cybersecurity, deterministic data transformation for reproducibility, and strict error handling for robustness in safety-critical pipelines.
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
Parts of the code are ported from `skforecast` to reduce external dependencies.
|
|
29
|
-
Many thanks to the [skforecast team](https://skforecast.org/0.20.0/more/about-skforecast.html) for their great work!
|
|
30
|
-
|
|
31
|
-
## Documentation
|
|
32
|
-
|
|
33
|
-
Documentation (API) is available at: [https://sequential-parameter-optimization.github.io/spotforecast2-safe/](https://sequential-parameter-optimization.github.io/spotforecast2-safe/)
|
|
34
|
-
|
|
35
|
-
## License
|
|
36
|
-
|
|
37
|
-
`spotforecast2-safe` software: [BSD-3-Clause License](https://github.com/sequential-parameter-optimization/spotforecast2-safe?tab=BSD-3-Clause-1-ov-file)
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
# References
|
|
41
|
-
|
|
42
|
-
## skforecast:
|
|
43
|
-
|
|
44
|
-
* Amat Rodrigo, J., & Escobar Ortiz, J. (2026). skforecast (Version 0.20.0) [Computer software]. https://doi.org/10.5281/zenodo.8382788
|
|
45
|
-
|
|
46
|
-
## spotoptim:
|
|
47
|
-
|
|
48
|
-
* [spotoptim documentation](https://sequential-parameter-optimization.github.io/spotoptim/)
|