ssad 0.1.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. ssad-0.1.2/LICENSE +7 -0
  2. ssad-0.1.2/PKG-INFO +74 -0
  3. ssad-0.1.2/README.md +208 -0
  4. ssad-0.1.2/README_PYPI.md +40 -0
  5. ssad-0.1.2/pyproject.toml +111 -0
  6. ssad-0.1.2/setup.cfg +4 -0
  7. ssad-0.1.2/src/ssad/__init__.py +39 -0
  8. ssad-0.1.2/src/ssad/confidence_estimators/__init__.py +15 -0
  9. ssad-0.1.2/src/ssad/confidence_estimators/api.py +16 -0
  10. ssad-0.1.2/src/ssad/confidence_estimators/binary_confidence.py +30 -0
  11. ssad-0.1.2/src/ssad/confidence_estimators/confidence_estimator.py +94 -0
  12. ssad-0.1.2/src/ssad/confidence_estimators/confidence_intervals_configuration.py +67 -0
  13. ssad-0.1.2/src/ssad/confidence_estimators/hybrid_confidence.py +49 -0
  14. ssad-0.1.2/src/ssad/confidence_estimators/supports_confidence_estimation.py +26 -0
  15. ssad-0.1.2/src/ssad/datamodules/__init__.py +10 -0
  16. ssad-0.1.2/src/ssad/datamodules/api.py +27 -0
  17. ssad-0.1.2/src/ssad/datamodules/dataframe_with_labels.py +95 -0
  18. ssad-0.1.2/src/ssad/datamodules/dataset_interfaces.py +47 -0
  19. ssad-0.1.2/src/ssad/datamodules/dataset_with_confidence.py +101 -0
  20. ssad-0.1.2/src/ssad/datamodules/self_supervision_datamodule.py +99 -0
  21. ssad-0.1.2/src/ssad/datamodules/transforms/dataframe_to_tensor.py +34 -0
  22. ssad-0.1.2/src/ssad/datasets/__init__.py +14 -0
  23. ssad-0.1.2/src/ssad/datasets/api.py +29 -0
  24. ssad-0.1.2/src/ssad/datasets/general_tabular_datamodule.py +322 -0
  25. ssad-0.1.2/src/ssad/datasets/pipeline.py +98 -0
  26. ssad-0.1.2/src/ssad/datasets/utils.py +427 -0
  27. ssad-0.1.2/src/ssad/distribution_analyzers/__init__.py +15 -0
  28. ssad-0.1.2/src/ssad/distribution_analyzers/api.py +20 -0
  29. ssad-0.1.2/src/ssad/distribution_analyzers/evt_thresholding.py +162 -0
  30. ssad-0.1.2/src/ssad/distribution_analyzers/supports_distribution_analysis.py +63 -0
  31. ssad-0.1.2/src/ssad/distribution_analyzers/triangular_thresholding.py +295 -0
  32. ssad-0.1.2/src/ssad/loggers/__init__.py +13 -0
  33. ssad-0.1.2/src/ssad/loggers/api.py +14 -0
  34. ssad-0.1.2/src/ssad/loggers/logging_config.py +57 -0
  35. ssad-0.1.2/src/ssad/loggers/mlflow_logger.py +154 -0
  36. ssad-0.1.2/src/ssad/models/__init__.py +10 -0
  37. ssad-0.1.2/src/ssad/models/api.py +14 -0
  38. ssad-0.1.2/src/ssad/models/autoencoder.py +86 -0
  39. ssad-0.1.2/src/ssad/models/variational_autoencoder.py +388 -0
  40. ssad-0.1.2/src/ssad/modules/__init__.py +15 -0
  41. ssad-0.1.2/src/ssad/modules/api.py +22 -0
  42. ssad-0.1.2/src/ssad/modules/cosine_reconstruction_module.py +105 -0
  43. ssad-0.1.2/src/ssad/modules/free_energy_module.py +204 -0
  44. ssad-0.1.2/src/ssad/modules/self_supervision_module.py +528 -0
  45. ssad-0.1.2/src/ssad/modules/supports_self_supervision.py +47 -0
  46. ssad-0.1.2/src/ssad/py.typed +0 -0
  47. ssad-0.1.2/src/ssad.egg-info/PKG-INFO +74 -0
  48. ssad-0.1.2/src/ssad.egg-info/SOURCES.txt +49 -0
  49. ssad-0.1.2/src/ssad.egg-info/dependency_links.txt +1 -0
  50. ssad-0.1.2/src/ssad.egg-info/requires.txt +17 -0
  51. ssad-0.1.2/src/ssad.egg-info/top_level.txt +1 -0
ssad-0.1.2/LICENSE ADDED
@@ -0,0 +1,7 @@
1
+ Copyright (c) 2026 Orange SA
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
4
+
5
+ The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
6
+
7
+ THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
ssad-0.1.2/PKG-INFO ADDED
@@ -0,0 +1,74 @@
1
+ Metadata-Version: 2.4
2
+ Name: ssad
3
+ Version: 0.1.2
4
+ Summary: Framework for self-supervised training of reconstruction-based autoencoder models for anomaly detection.
5
+ Author-email: Samuel Berlemont <samuel.berlemont@orange.com>
6
+ Maintainer: Mohammed Achraf El Khamlichi
7
+ Maintainer-email: Julien Cumin <julien1.cumin@orange.com>
8
+ License: MIT
9
+ Project-URL: Homepage, https://github.com/Orange-OpenSource/SSAD
10
+ Project-URL: Issues, https://github.com/Orange-OpenSource/SSAD/issues
11
+ Project-URL: Repository, https://github.com/Orange-OpenSource/SSAD
12
+ Classifier: Programming Language :: Python :: 3
13
+ Classifier: Operating System :: OS Independent
14
+ Requires-Python: <3.15,>=3.10
15
+ Description-Content-Type: text/markdown
16
+ License-File: LICENSE
17
+ Requires-Dist: category_encoders<3.0,>=2.8
18
+ Requires-Dist: numpy<3.0,>=1.26
19
+ Requires-Dist: pandas<3.0,>=2.2
20
+ Requires-Dist: scipy<2.0,>=1.15
21
+ Requires-Dist: scikit-learn<2.0,>=1.9
22
+ Requires-Dist: torch<3.0,>=2.10
23
+ Requires-Dist: lightning<3.0,>=2.5
24
+ Requires-Dist: mlflow<4.0,>=3.12
25
+ Requires-Dist: psutil<8.0,>=7.0
26
+ Requires-Dist: nvidia-ml-py<14.0,>=13.610
27
+ Provides-Extra: dev
28
+ Requires-Dist: pandas-stubs==2.2.2.240603; extra == "dev"
29
+ Requires-Dist: torchvision<0.29,>=0.25; extra == "dev"
30
+ Requires-Dist: types-pytz>=2026.2; extra == "dev"
31
+ Requires-Dist: mypy<2.0,>=1.11; extra == "dev"
32
+ Requires-Dist: pylint<4.0,>=3.2; extra == "dev"
33
+ Dynamic: license-file
34
+
35
+ # SSAD — Self-Supervised Anomaly Detection Library
36
+
37
+ A Python library for autoencoder-based **anomaly detection** with self-supervised training and dynamic per-sample **confidence** updates.
38
+
39
+ ## Key Features
40
+
41
+ - Compute per-sample anomaly scores
42
+ - Estimate confidence from score distributions
43
+ - Recalibrate confidence intervals during training
44
+ - Apply confidence-aware losses (normal / abnormal / uncertain)
45
+ - Track experiments and artifacts with **MLflow**
46
+
47
+ ## Installation
48
+
49
+ ```bash
50
+ pip install ssad
51
+ ```
52
+
53
+ For development setup:
54
+
55
+ ```bash
56
+ pip install -e .[dev]
57
+ ```
58
+
59
+ ## Quick Links
60
+
61
+ - **Repository**: https://github.com/Orange-OpenSource/SSAD
62
+ - **Examples**: https://github.com/Orange-OpenSource/SSAD/tree/main/examples
63
+ - **Issues**: https://github.com/Orange-OpenSource/SSAD/issues
64
+
65
+
66
+ ## References
67
+
68
+ 1. N. Najari, S. Berlemont, G. Lefebvre, S. Duffner, C. Garcia,
69
+ *Robust Variational Autoencoders and Normalizing Flows for Unsupervised Network Anomaly Detection*,
70
+ AINA 2022, doi: 10.1007/978-3-030-99587-4_24
71
+
72
+ 2. N. Najari, S. Berlemont, G. Lefebvre, S. Duffner, C. Garcia,
73
+ *RADON: Robust Autoencoder for Unsupervised Anomaly Detection*,
74
+ SIN 2021, doi: 10.1109/SIN54109.2021.9699174
ssad-0.1.2/README.md ADDED
@@ -0,0 +1,208 @@
1
+ # SSAD — Self-Supervised Anomaly Detection Library
2
+
3
+ A Python library for autoencoder-based **anomaly detection** based on self-supervised training with dynamic **sample confidence** updates.
4
+
5
+ ## Purpose
6
+
7
+ This library is designed to:
8
+
9
+ - train a model that produces an **anomaly score**;
10
+ - estimate per-sample **confidence** from that score;
11
+ - analyze score distributions to periodically recalibrate confidence intervals corresponding to normal, abnormal and unknown samples;
12
+ - apply different losses depending on confidence regions, which can take confidence and intervals into account to reweight samples;
13
+ - track experiments, metrics, and artifacts with **MLflow** and an SQL backend store.
14
+
15
+ NB: examples are proposed in the [examples folder](examples). They correspond to the implementation of the RADON and GRAnD anomaly detection models.
16
+
17
+ [1] N. Najari, S. Berlemont, G. Lefebvre, S. Duffner, et C. Garcia, « Robust Variational Autoencoders and Normalizing Flows for Unsupervised Network Anomaly Detection », in Advanced Information Networking and Applications, vol. 450, L. Barolli, F. Hussain, et T. Enokido, Éd., in Lecture Notes in Networks and Systems, vol. 450. , Cham: Springer International Publishing, 2022, p. 281‑292. doi: 10.1007/978-3-030-99587-4_24.
18
+
19
+ [2] N. Najari, S. Berlemont, G. Lefebvre, S. Duffner, et C. Garcia, « RADON: Robust Autoencoder for Unsupervised Anomaly Detection », in 2021 14th International Conference on Security of Information and Networks (SIN), déc. 2021, p. 1‑8. doi: 10.1109/SIN54109.2021.9699174.
20
+
21
+ ---
22
+
23
+ ## Installation
24
+
25
+ ```
26
+ pip install -e .
27
+ ```
28
+
29
+ or, to launch examples,
30
+
31
+ ```
32
+ pip install -e .[dev]
33
+ ```
34
+
35
+ ---
36
+
37
+ ## Visualize results
38
+ Define the tracking URI in the MLflow configuration of the experiment:
39
+
40
+ ```
41
+ tracking_uri=f"sqlite:///<path-to>/mlflow.db",
42
+ ```
43
+
44
+ then start the server to visualize the results and artefacts:
45
+
46
+ ```
47
+ mlflow ui --backend-store-uri sqlite:////<path-to>/mlflow.db
48
+ ```
49
+
50
+ ## High-Level Architecture
51
+
52
+ The codebase is split into focused modules:
53
+
54
+ - `confidence_estimators`: confidence estimation logic from model scores.
55
+ - `distribution_analyzers`: score distribution analysis and interval extraction.
56
+ - `datamodules`: Lightning data wrapping with confidence-aware datasets.
57
+ - `datasets`: dataset types and confidence I/O helpers.
58
+ - `modules`: self-supervision training module and callback.
59
+ - `models`: PyTorch model definitions (e.g., autoencoder).
60
+ - `loggers`: MLflow utility functions for artifacts and metrics logging.
61
+
62
+ Consolidated class diagram:
63
+ ![Complete class diagram](docs/diagrams/complete.svg)
64
+
65
+ ---
66
+
67
+ ## Class Diagrams
68
+
69
+ All PlantUML source files are in:
70
+
71
+ - `docs/diagrams/*.plantuml`
72
+
73
+ ### 1) Confidence Estimators
74
+ - UML source: `docs/diagrams/confidence_estimators.plantuml`
75
+ - Figure:
76
+ ![confidence_estimators](docs/diagrams/confidence_estimators.svg)
77
+
78
+ Main elements:
79
+
80
+ - `SupportsConfidenceEstimation` (Protocol)
81
+ - `BaseConfidenceEstimator` (abstract)
82
+ - `ConfidenceIntervalsConfiguration`
83
+ - `Interval` (extends `pandas.Interval`)
84
+
85
+ ---
86
+
87
+ ### 2) Data Modules
88
+ - UML source: `docs/diagrams/datamodules.plantuml`
89
+ - Figure:
90
+ ![datamodules](docs/diagrams/datamodules.svg)
91
+
92
+ Main elements:
93
+
94
+ - `SelfSupervisionDataModule` wrapping a Lightning datamodule
95
+ - Integration with `DatasetWithConfidence`
96
+
97
+ ---
98
+
99
+ ### 3) Datasets
100
+ - UML source: `docs/diagrams/datasets.plantuml`
101
+ - Figure:
102
+ ![datasets](docs/diagrams/datasets.svg)
103
+
104
+ Main elements:
105
+
106
+ - `DataFrameWithLabels`
107
+ - `DatasetWithLabels` / `DatasetWithInputDim` (Protocols)
108
+ - `DatasetWithConfidence`
109
+ - Utility functions:
110
+ - `init_confidence_from_csv`
111
+ - `save_confidence_to_csv`
112
+
113
+ ---
114
+
115
+ ### 4) Distribution Analyzers
116
+ - UML source: `docs/diagrams/distribution_analyzers.plantuml`
117
+ - Figure:
118
+ ![distribution_analyzers](docs/diagrams/distribution_analyzers.svg)
119
+
120
+ Main elements:
121
+
122
+ - `SupportsDistributionAnalysis` (Protocol)
123
+ - Concrete analyzer implementations (e.g., thresholding strategies)
124
+
125
+ ---
126
+
127
+ ### 5) Models
128
+ - UML source: `docs/diagrams/models.plantuml`
129
+ - Figure:
130
+ ![models](docs/diagrams/models.svg)
131
+
132
+ Main elements:
133
+
134
+ - `torch.nn.Module`
135
+ - `Autoencoder`
136
+
137
+ ---
138
+
139
+ ### 6) Self-Supervision Modules
140
+ - UML source: `docs/diagrams/modules.plantuml`
141
+ - Figure:
142
+ ![modules](docs/diagrams/modules.svg)
143
+
144
+ Main elements:
145
+
146
+ - `SupportsSelfSupervision` (Protocol)
147
+ - `SelfSupervisionModule` (abstract Lightning module)
148
+ - `SelfSupervisionCallback`
149
+ - Dependency injection of:
150
+ - `SupportsConfidenceEstimation`
151
+ - `SupportsDistributionAnalysis`
152
+
153
+ ---
154
+
155
+ ## Training Loop (Conceptual)
156
+
157
+ 1. The model computes per-sample scores (`score` / `_prediction_score`).
158
+ 2. Distribution analysis derives confidence intervals.
159
+ 3. Confidence estimator maps scores to confidence values.
160
+ 4. Training dataset is refreshed with updated confidence.
161
+ 5. Loss computation uses confidence-aware behavior (normal/abnormal/uncertain).
162
+ 6. Confidence and intervals are recalibrated every `every_n_epochs`.
163
+ 7. Metrics and artifacts are logged.
164
+
165
+ ---
166
+
167
+ ## Global workflow
168
+
169
+ ![workflow](docs/diagrams/ssad_workflow_light.svg)
170
+
171
+ ---
172
+
173
+ ## Logging
174
+
175
+ `ssad/loggers/mlflow_logger.py` provides helper functions to log:
176
+
177
+ - confidence CSV snapshots (`confidence_epoch_*.csv`)
178
+ - confidence interval JSON files (`confidence_intervals_epoch_*.json`)
179
+ - distribution analysis figures (`confidence_analysis_epoch_*.svg`)
180
+ - system metrics (CPU / RAM / GPU)
181
+ - test metrics by threshold (`test_metrics_threshold=*.json`)
182
+
183
+ ---
184
+
185
+ ## Main Dependencies
186
+
187
+ - Python 3.10+
188
+ - PyTorch
189
+ - Lightning
190
+ - NumPy / pandas / scikit-learn / matplotlib
191
+ - MLflow
192
+ - psutil
193
+
194
+ ---
195
+
196
+ ## Typical Usage (High-Level)
197
+
198
+ 1. Prepare a dataset compatible with:
199
+ - `DatasetWithLabels`
200
+ - `DatasetWithInputDim`
201
+ 2. Build your base `LightningDataModule`.
202
+ 3. Wrap it with `SelfSupervisionDataModule`.
203
+ 4. Instantiate:
204
+ - a model (`nn.Module`)
205
+ - a confidence estimator (`SupportsConfidenceEstimation`)
206
+ - a distribution analyzer (`SupportsDistributionAnalysis`)
207
+ - a concrete `SelfSupervisionModule`
208
+ 5. Train/evaluate with Lightning `Trainer`.
@@ -0,0 +1,40 @@
1
+ # SSAD — Self-Supervised Anomaly Detection Library
2
+
3
+ A Python library for autoencoder-based **anomaly detection** with self-supervised training and dynamic per-sample **confidence** updates.
4
+
5
+ ## Key Features
6
+
7
+ - Compute per-sample anomaly scores
8
+ - Estimate confidence from score distributions
9
+ - Recalibrate confidence intervals during training
10
+ - Apply confidence-aware losses (normal / abnormal / uncertain)
11
+ - Track experiments and artifacts with **MLflow**
12
+
13
+ ## Installation
14
+
15
+ ```bash
16
+ pip install ssad
17
+ ```
18
+
19
+ For development setup:
20
+
21
+ ```bash
22
+ pip install -e .[dev]
23
+ ```
24
+
25
+ ## Quick Links
26
+
27
+ - **Repository**: https://github.com/Orange-OpenSource/SSAD
28
+ - **Examples**: https://github.com/Orange-OpenSource/SSAD/tree/main/examples
29
+ - **Issues**: https://github.com/Orange-OpenSource/SSAD/issues
30
+
31
+
32
+ ## References
33
+
34
+ 1. N. Najari, S. Berlemont, G. Lefebvre, S. Duffner, C. Garcia,
35
+ *Robust Variational Autoencoders and Normalizing Flows for Unsupervised Network Anomaly Detection*,
36
+ AINA 2022, doi: 10.1007/978-3-030-99587-4_24
37
+
38
+ 2. N. Najari, S. Berlemont, G. Lefebvre, S. Duffner, C. Garcia,
39
+ *RADON: Robust Autoencoder for Unsupervised Anomaly Detection*,
40
+ SIN 2021, doi: 10.1109/SIN54109.2021.9699174
@@ -0,0 +1,111 @@
1
+ [project]
2
+ name = "ssad"
3
+ version = "0.1.2"
4
+ description = "Framework for self-supervised training of reconstruction-based autoencoder models for anomaly detection."
5
+ readme = "README_PYPI.md"
6
+ requires-python = ">=3.10,<3.15"
7
+ license = { text = "MIT" }
8
+ authors = [
9
+ { name = "Samuel Berlemont", email = "samuel.berlemont@orange.com" }
10
+ ]
11
+ maintainers = [
12
+ { name = "Julien Cumin", email = "julien1.cumin@orange.com" },
13
+ { name = "Mohammed Achraf El Khamlichi" },
14
+ ]
15
+ classifiers = [
16
+ "Programming Language :: Python :: 3",
17
+ "Operating System :: OS Independent",
18
+ ]
19
+ dependencies = [
20
+ "category_encoders>=2.8,<3.0",
21
+ "numpy>=1.26,<3.0",
22
+ "pandas>=2.2,<3.0",
23
+ "scipy>=1.15,<2.0",
24
+ "scikit-learn>=1.9,<2.0",
25
+ "torch>=2.10,<3.0",
26
+ "lightning>=2.5,<3.0",
27
+ "mlflow>=3.12,<4.0",
28
+ "psutil>=7.0,<8.0",
29
+ "nvidia-ml-py>=13.610,<14.0"
30
+ ]
31
+
32
+ [project.urls]
33
+ Homepage = "https://github.com/Orange-OpenSource/SSAD"
34
+ Issues = "https://github.com/Orange-OpenSource/SSAD/issues"
35
+ Repository = "https://github.com/Orange-OpenSource/SSAD"
36
+
37
+ [project.optional-dependencies]
38
+ dev = [
39
+ "pandas-stubs==2.2.2.240603",
40
+ "torchvision>=0.25,<0.29",
41
+ "types-pytz>=2026.2",
42
+ "mypy>=1.11,<2.0",
43
+ "pylint>=3.2,<4.0",
44
+ ]
45
+
46
+ [build-system]
47
+ requires = ["setuptools >= 77.0.3", "wheel"]
48
+ build-backend = "setuptools.build_meta"
49
+
50
+ [tool.setuptools.packages.find]
51
+ where = ["src"]
52
+ include = ["ssad*"]
53
+
54
+ [tool.setuptools.package-data]
55
+ ssad = ["py.typed"]
56
+
57
+ [tool.mypy]
58
+ python_version = "3.10"
59
+ files = ["ssad"]
60
+ pretty = true
61
+ show_error_codes = true
62
+ warn_unused_configs = true
63
+
64
+ # Progressive strictness - active baseline (Palier 1)
65
+ ignore_missing_imports = true
66
+ check_untyped_defs = true
67
+ no_implicit_optional = true
68
+
69
+ # warn_redundant_casts = true
70
+ # warn_unused_ignores = true
71
+ # strict_equality = true
72
+ # disallow_incomplete_defs = true
73
+
74
+ # disallow_untyped_defs = true
75
+ # warn_return_any = true
76
+ # disallow_any_generics = true
77
+
78
+ [[tool.mypy.overrides]]
79
+ module = [
80
+ "category_encoders.*",
81
+ "lightning.*",
82
+ "mlflow.*",
83
+ "torchvision.*",
84
+ ]
85
+ ignore_missing_imports = true
86
+
87
+ [tool.pylint.main]
88
+ py-version = "3.10"
89
+ jobs = 0
90
+ recursive = true
91
+ ignore = ["build", "dist", ".venv"]
92
+
93
+ [tool.pylint.format]
94
+ max-line-length = 100
95
+
96
+ [tool.pylint."messages control"]
97
+ disable = [
98
+ "C0114", # missing-module-docstring
99
+ "C0115", # missing-class-docstring
100
+ "C0116", # missing-function-docstring
101
+ "R0903", # too-few-public-methods
102
+ ]
103
+
104
+ [tool.pylint.design]
105
+ max-args = 10
106
+ max-locals = 25
107
+ max-branches = 20
108
+ max-statements = 80
109
+
110
+ [tool.pylint.typecheck]
111
+ ignored-modules = ["torch", "lightning", "mlflow"]
ssad-0.1.2/setup.cfg ADDED
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,39 @@
1
+ # Software Name : Self-Supervised Anomaly Detection
2
+ # SPDX-FileCopyrightText: Copyright (c) Orange SA
3
+ # SPDX-License-Identifier: MIT
4
+ #
5
+ # This software is distributed under the MIT License,
6
+ # see the "LICENSE.txt" file for more details or https://spdx.org/licenses/MIT.html
7
+ #
8
+ # Authors: see CONTRIBUTORS
9
+ # Software description: A Python library for autoencoder-based anomaly detection
10
+ # based on self-supervised training with dynamic sample confidence updates.
11
+ """
12
+ Initializes the main package API by exposing public interfaces from submodules.
13
+
14
+ This module re-exports the core components of the package to simplify access and
15
+ maintain a clean and consistent public API.
16
+
17
+ Available namespaces:
18
+ - confidence_estimators
19
+ - datamodules
20
+ - datasets
21
+ - distribution_analyzers
22
+ - models
23
+ - modules
24
+ - loggers
25
+
26
+ Usage:
27
+ from mypackage import SomeModel, SomeDataModule, ConfidenceEstimator
28
+
29
+ Note:
30
+ This file uses wildcard imports (`*`) to expose only the public symbols defined
31
+ in each submodule's `__all__` list.
32
+ """
33
+ from .confidence_estimators.api import *
34
+ from .datamodules.api import *
35
+ from .datasets.api import *
36
+ from .distribution_analyzers.api import *
37
+ from .models.api import *
38
+ from .modules.api import *
39
+ from .loggers.api import *
@@ -0,0 +1,15 @@
1
+ # Software Name : Self-Supervised Anomaly Detection
2
+ # SPDX-FileCopyrightText: Copyright (c) Orange SA
3
+ # SPDX-License-Identifier: MIT
4
+ #
5
+ # This software is distributed under the MIT License,
6
+ # see the "LICENSE.txt" file for more details or https://spdx.org/licenses/MIT.html
7
+ #
8
+ # Authors: see CONTRIBUTORS
9
+ # Software description: A Python library for autoencoder-based anomaly detection
10
+ # based on self-supervised training with dynamic sample confidence updates.
11
+ r"""
12
+ # What is confidence estimator
13
+ A confidence estimator is a module that, given a value such as a reconstruction error,
14
+ or a gradient value, returns a confidence score comprised between -1 and 1.
15
+ """
@@ -0,0 +1,16 @@
1
+ # Software Name : Self-Supervised Anomaly Detection
2
+ # SPDX-FileCopyrightText: Copyright (c) Orange SA
3
+ # SPDX-License-Identifier: MIT
4
+ #
5
+ # This software is distributed under the MIT License,
6
+ # see the "LICENSE.txt" file for more details or https://spdx.org/licenses/MIT.html
7
+ #
8
+ # Authors: see CONTRIBUTORS
9
+ # Software description: A Python library for autoencoder-based anomaly detection
10
+ # based on self-supervised training with dynamic sample confidence updates.
11
+ """Public API for self-supervision confidence estimators."""
12
+ from .binary_confidence import BinaryConfidence
13
+ from .supports_confidence_estimation import SupportsConfidenceEstimation
14
+ from .confidence_intervals_configuration import ConfidenceIntervalsConfiguration
15
+
16
+ __all__ = ["SupportsConfidenceEstimation", "BinaryConfidence", "ConfidenceIntervalsConfiguration"]
@@ -0,0 +1,30 @@
1
+ # Software Name : Self-Supervised Anomaly Detection
2
+ # SPDX-FileCopyrightText: Copyright (c) Orange SA
3
+ # SPDX-License-Identifier: MIT
4
+ #
5
+ # This software is distributed under the MIT License,
6
+ # see the "LICENSE.txt" file for more details or https://spdx.org/licenses/MIT.html
7
+ #
8
+ # Authors: see CONTRIBUTORS
9
+ # Software description: A Python library for autoencoder-based anomaly detection
10
+ # based on self-supervised training with dynamic sample confidence updates.
11
+ """
12
+ Implements a binary confidence estimator.
13
+ A sample is either normal or abnormal, otherwise it is omitted (zero confidence).
14
+ """
15
+
16
+ import torch
17
+ from .confidence_estimator import BaseConfidenceEstimator
18
+
19
+
20
+ class BinaryConfidence(BaseConfidenceEstimator):
21
+ """Binary confidence estimator"""
22
+
23
+ def _confidence_normal(self, score):
24
+ return torch.ones_like(score)
25
+
26
+ def _confidence_abnormal(self, score):
27
+ return torch.full_like(score, -1)
28
+
29
+ def _confidence_unknown(self, score):
30
+ return torch.zeros_like(score)
@@ -0,0 +1,94 @@
1
+ # Software Name : Self-Supervised Anomaly Detection
2
+ # SPDX-FileCopyrightText: Copyright (c) Orange SA
3
+ # SPDX-License-Identifier: MIT
4
+ #
5
+ # This software is distributed under the MIT License,
6
+ # see the "LICENSE.txt" file for more details or https://spdx.org/licenses/MIT.html
7
+ #
8
+ # Authors: see CONTRIBUTORS
9
+ # Software description: A Python library for autoencoder-based anomaly detection
10
+ # based on self-supervised training with dynamic sample confidence updates.
11
+ """
12
+ Provides the base class for confidence estimators.
13
+ """
14
+
15
+ from abc import ABC, abstractmethod
16
+ from typing import Optional
17
+ import torch
18
+
19
+ from .confidence_intervals_configuration import (
20
+ ConfidenceIntervalsConfiguration,
21
+ )
22
+ from .supports_confidence_estimation import SupportsConfidenceEstimation
23
+
24
+
25
+ class BaseConfidenceEstimator(ABC, SupportsConfidenceEstimation):
26
+ """Base class for confidence estimators.
27
+ A confidence estimator relies on four intervals with associated scoring functions
28
+ to provide a confidence score given a criterion score for a sample.
29
+
30
+ The criterion score can be for instance a reconstruction score, or the norm of
31
+ the gradient of the reconstruction error.
32
+
33
+ The intervals define the domain for model scores for the four different confidence behaviors:
34
+ - normal: samples with model scores in this interval are considered as normal
35
+ - abnormal: samples with model scores in this interval are considered as abnormal
36
+ - unknown_positive: samples with model scores in this interval are
37
+ considered as unknown, but leaning towards a normal sample.
38
+ - unknown_negative: samples with model scores in this interval are
39
+ considered as unknown, but leaning towards an abnormal sample.
40
+
41
+ Each interval is associated with a "criterion score to confidence score" conversion function.
42
+ These functions should be implemented in the _estimate_confidence_from_model_score method.
43
+ """
44
+
45
+ def __init__(self):
46
+ super().__init__()
47
+ self.configuration: Optional[ConfidenceIntervalsConfiguration] = None
48
+ self.distribution: Optional[torch.Tensor] = None
49
+
50
+ @abstractmethod
51
+ def _confidence_normal(self, score):
52
+ raise NotImplementedError()
53
+
54
+ @abstractmethod
55
+ def _confidence_abnormal(self, score) -> torch.Tensor:
56
+ raise NotImplementedError()
57
+
58
+ @abstractmethod
59
+ def _confidence_unknown(self, score) -> torch.Tensor:
60
+ raise NotImplementedError()
61
+
62
+ @torch.no_grad()
63
+ def estimate_confidence(self, scores_batch: torch.Tensor) -> torch.Tensor:
64
+ """Estimates the confidence in a batch by retrieving the criterion score
65
+ and translating into a confidence score.
66
+
67
+ Args:
68
+ scores_batch (torch.Tensor): batch whose confidence is to be estimated.
69
+
70
+ Returns:
71
+ torch.Tensor: confidence score
72
+ """
73
+ # TODO: check this order of computation
74
+ confidence = self._confidence_unknown(scores_batch)
75
+
76
+ if self.configuration is None:
77
+ raise ValueError("Confidence estimator configuration is None")
78
+
79
+ # TODO: rework signatures of confidence normal/abnormal/unknown
80
+ normal_confidences = self._confidence_normal(scores_batch)
81
+ abnormal_confidences = self._confidence_abnormal(scores_batch)
82
+
83
+ confidence = torch.where(
84
+ self.configuration.normal.contains_tensor_mask(scores_batch),
85
+ normal_confidences,
86
+ confidence,
87
+ )
88
+ confidence = torch.where(
89
+ self.configuration.abnormal.contains_tensor_mask(scores_batch),
90
+ abnormal_confidences,
91
+ confidence,
92
+ )
93
+
94
+ return confidence