ssad 0.1.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ssad-0.1.2/LICENSE +7 -0
- ssad-0.1.2/PKG-INFO +74 -0
- ssad-0.1.2/README.md +208 -0
- ssad-0.1.2/README_PYPI.md +40 -0
- ssad-0.1.2/pyproject.toml +111 -0
- ssad-0.1.2/setup.cfg +4 -0
- ssad-0.1.2/src/ssad/__init__.py +39 -0
- ssad-0.1.2/src/ssad/confidence_estimators/__init__.py +15 -0
- ssad-0.1.2/src/ssad/confidence_estimators/api.py +16 -0
- ssad-0.1.2/src/ssad/confidence_estimators/binary_confidence.py +30 -0
- ssad-0.1.2/src/ssad/confidence_estimators/confidence_estimator.py +94 -0
- ssad-0.1.2/src/ssad/confidence_estimators/confidence_intervals_configuration.py +67 -0
- ssad-0.1.2/src/ssad/confidence_estimators/hybrid_confidence.py +49 -0
- ssad-0.1.2/src/ssad/confidence_estimators/supports_confidence_estimation.py +26 -0
- ssad-0.1.2/src/ssad/datamodules/__init__.py +10 -0
- ssad-0.1.2/src/ssad/datamodules/api.py +27 -0
- ssad-0.1.2/src/ssad/datamodules/dataframe_with_labels.py +95 -0
- ssad-0.1.2/src/ssad/datamodules/dataset_interfaces.py +47 -0
- ssad-0.1.2/src/ssad/datamodules/dataset_with_confidence.py +101 -0
- ssad-0.1.2/src/ssad/datamodules/self_supervision_datamodule.py +99 -0
- ssad-0.1.2/src/ssad/datamodules/transforms/dataframe_to_tensor.py +34 -0
- ssad-0.1.2/src/ssad/datasets/__init__.py +14 -0
- ssad-0.1.2/src/ssad/datasets/api.py +29 -0
- ssad-0.1.2/src/ssad/datasets/general_tabular_datamodule.py +322 -0
- ssad-0.1.2/src/ssad/datasets/pipeline.py +98 -0
- ssad-0.1.2/src/ssad/datasets/utils.py +427 -0
- ssad-0.1.2/src/ssad/distribution_analyzers/__init__.py +15 -0
- ssad-0.1.2/src/ssad/distribution_analyzers/api.py +20 -0
- ssad-0.1.2/src/ssad/distribution_analyzers/evt_thresholding.py +162 -0
- ssad-0.1.2/src/ssad/distribution_analyzers/supports_distribution_analysis.py +63 -0
- ssad-0.1.2/src/ssad/distribution_analyzers/triangular_thresholding.py +295 -0
- ssad-0.1.2/src/ssad/loggers/__init__.py +13 -0
- ssad-0.1.2/src/ssad/loggers/api.py +14 -0
- ssad-0.1.2/src/ssad/loggers/logging_config.py +57 -0
- ssad-0.1.2/src/ssad/loggers/mlflow_logger.py +154 -0
- ssad-0.1.2/src/ssad/models/__init__.py +10 -0
- ssad-0.1.2/src/ssad/models/api.py +14 -0
- ssad-0.1.2/src/ssad/models/autoencoder.py +86 -0
- ssad-0.1.2/src/ssad/models/variational_autoencoder.py +388 -0
- ssad-0.1.2/src/ssad/modules/__init__.py +15 -0
- ssad-0.1.2/src/ssad/modules/api.py +22 -0
- ssad-0.1.2/src/ssad/modules/cosine_reconstruction_module.py +105 -0
- ssad-0.1.2/src/ssad/modules/free_energy_module.py +204 -0
- ssad-0.1.2/src/ssad/modules/self_supervision_module.py +528 -0
- ssad-0.1.2/src/ssad/modules/supports_self_supervision.py +47 -0
- ssad-0.1.2/src/ssad/py.typed +0 -0
- ssad-0.1.2/src/ssad.egg-info/PKG-INFO +74 -0
- ssad-0.1.2/src/ssad.egg-info/SOURCES.txt +49 -0
- ssad-0.1.2/src/ssad.egg-info/dependency_links.txt +1 -0
- ssad-0.1.2/src/ssad.egg-info/requires.txt +17 -0
- ssad-0.1.2/src/ssad.egg-info/top_level.txt +1 -0
ssad-0.1.2/LICENSE
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
Copyright (c) 2026 Orange SA
|
|
2
|
+
|
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
|
4
|
+
|
|
5
|
+
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
|
6
|
+
|
|
7
|
+
THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
ssad-0.1.2/PKG-INFO
ADDED
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: ssad
|
|
3
|
+
Version: 0.1.2
|
|
4
|
+
Summary: Framework for self-supervised training of reconstruction-based autoencoder models for anomaly detection.
|
|
5
|
+
Author-email: Samuel Berlemont <samuel.berlemont@orange.com>
|
|
6
|
+
Maintainer: Mohammed Achraf El Khamlichi
|
|
7
|
+
Maintainer-email: Julien Cumin <julien1.cumin@orange.com>
|
|
8
|
+
License: MIT
|
|
9
|
+
Project-URL: Homepage, https://github.com/Orange-OpenSource/SSAD
|
|
10
|
+
Project-URL: Issues, https://github.com/Orange-OpenSource/SSAD/issues
|
|
11
|
+
Project-URL: Repository, https://github.com/Orange-OpenSource/SSAD
|
|
12
|
+
Classifier: Programming Language :: Python :: 3
|
|
13
|
+
Classifier: Operating System :: OS Independent
|
|
14
|
+
Requires-Python: <3.15,>=3.10
|
|
15
|
+
Description-Content-Type: text/markdown
|
|
16
|
+
License-File: LICENSE
|
|
17
|
+
Requires-Dist: category_encoders<3.0,>=2.8
|
|
18
|
+
Requires-Dist: numpy<3.0,>=1.26
|
|
19
|
+
Requires-Dist: pandas<3.0,>=2.2
|
|
20
|
+
Requires-Dist: scipy<2.0,>=1.15
|
|
21
|
+
Requires-Dist: scikit-learn<2.0,>=1.9
|
|
22
|
+
Requires-Dist: torch<3.0,>=2.10
|
|
23
|
+
Requires-Dist: lightning<3.0,>=2.5
|
|
24
|
+
Requires-Dist: mlflow<4.0,>=3.12
|
|
25
|
+
Requires-Dist: psutil<8.0,>=7.0
|
|
26
|
+
Requires-Dist: nvidia-ml-py<14.0,>=13.610
|
|
27
|
+
Provides-Extra: dev
|
|
28
|
+
Requires-Dist: pandas-stubs==2.2.2.240603; extra == "dev"
|
|
29
|
+
Requires-Dist: torchvision<0.29,>=0.25; extra == "dev"
|
|
30
|
+
Requires-Dist: types-pytz>=2026.2; extra == "dev"
|
|
31
|
+
Requires-Dist: mypy<2.0,>=1.11; extra == "dev"
|
|
32
|
+
Requires-Dist: pylint<4.0,>=3.2; extra == "dev"
|
|
33
|
+
Dynamic: license-file
|
|
34
|
+
|
|
35
|
+
# SSAD — Self-Supervised Anomaly Detection Library
|
|
36
|
+
|
|
37
|
+
A Python library for autoencoder-based **anomaly detection** with self-supervised training and dynamic per-sample **confidence** updates.
|
|
38
|
+
|
|
39
|
+
## Key Features
|
|
40
|
+
|
|
41
|
+
- Compute per-sample anomaly scores
|
|
42
|
+
- Estimate confidence from score distributions
|
|
43
|
+
- Recalibrate confidence intervals during training
|
|
44
|
+
- Apply confidence-aware losses (normal / abnormal / uncertain)
|
|
45
|
+
- Track experiments and artifacts with **MLflow**
|
|
46
|
+
|
|
47
|
+
## Installation
|
|
48
|
+
|
|
49
|
+
```bash
|
|
50
|
+
pip install ssad
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
For development setup:
|
|
54
|
+
|
|
55
|
+
```bash
|
|
56
|
+
pip install -e .[dev]
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
## Quick Links
|
|
60
|
+
|
|
61
|
+
- **Repository**: https://github.com/Orange-OpenSource/SSAD
|
|
62
|
+
- **Examples**: https://github.com/Orange-OpenSource/SSAD/tree/main/examples
|
|
63
|
+
- **Issues**: https://github.com/Orange-OpenSource/SSAD/issues
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
## References
|
|
67
|
+
|
|
68
|
+
1. N. Najari, S. Berlemont, G. Lefebvre, S. Duffner, C. Garcia,
|
|
69
|
+
*Robust Variational Autoencoders and Normalizing Flows for Unsupervised Network Anomaly Detection*,
|
|
70
|
+
AINA 2022, doi: 10.1007/978-3-030-99587-4_24
|
|
71
|
+
|
|
72
|
+
2. N. Najari, S. Berlemont, G. Lefebvre, S. Duffner, C. Garcia,
|
|
73
|
+
*RADON: Robust Autoencoder for Unsupervised Anomaly Detection*,
|
|
74
|
+
SIN 2021, doi: 10.1109/SIN54109.2021.9699174
|
ssad-0.1.2/README.md
ADDED
|
@@ -0,0 +1,208 @@
|
|
|
1
|
+
# SSAD — Self-Supervised Anomaly Detection Library
|
|
2
|
+
|
|
3
|
+
A Python library for autoencoder-based **anomaly detection** based on self-supervised training with dynamic **sample confidence** updates.
|
|
4
|
+
|
|
5
|
+
## Purpose
|
|
6
|
+
|
|
7
|
+
This library is designed to:
|
|
8
|
+
|
|
9
|
+
- train a model that produces an **anomaly score**;
|
|
10
|
+
- estimate per-sample **confidence** from that score;
|
|
11
|
+
- analyze score distributions to periodically recalibrate confidence intervals corresponding to normal, abnormal and unknown samples;
|
|
12
|
+
- apply different losses depending on confidence regions, which can take confidence and intervals into account to reweight samples;
|
|
13
|
+
- track experiments, metrics, and artifacts with **MLflow** and an SQL backend store.
|
|
14
|
+
|
|
15
|
+
NB: examples are proposed in the [examples folder](examples). They correspond to the implementation of the RADON and GRAnD anomaly detection models.
|
|
16
|
+
|
|
17
|
+
[1] N. Najari, S. Berlemont, G. Lefebvre, S. Duffner, et C. Garcia, « Robust Variational Autoencoders and Normalizing Flows for Unsupervised Network Anomaly Detection », in Advanced Information Networking and Applications, vol. 450, L. Barolli, F. Hussain, et T. Enokido, Éd., in Lecture Notes in Networks and Systems, vol. 450. , Cham: Springer International Publishing, 2022, p. 281‑292. doi: 10.1007/978-3-030-99587-4_24.
|
|
18
|
+
|
|
19
|
+
[2] N. Najari, S. Berlemont, G. Lefebvre, S. Duffner, et C. Garcia, « RADON: Robust Autoencoder for Unsupervised Anomaly Detection », in 2021 14th International Conference on Security of Information and Networks (SIN), déc. 2021, p. 1‑8. doi: 10.1109/SIN54109.2021.9699174.
|
|
20
|
+
|
|
21
|
+
---
|
|
22
|
+
|
|
23
|
+
## Installation
|
|
24
|
+
|
|
25
|
+
```
|
|
26
|
+
pip install -e .
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
or, to launch examples,
|
|
30
|
+
|
|
31
|
+
```
|
|
32
|
+
pip install -e .[dev]
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
---
|
|
36
|
+
|
|
37
|
+
## Visualize results
|
|
38
|
+
Define the tracking URI in the MLflow configuration of the experiment:
|
|
39
|
+
|
|
40
|
+
```
|
|
41
|
+
tracking_uri=f"sqlite:///<path-to>/mlflow.db",
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
then start the server to visualize the results and artefacts:
|
|
45
|
+
|
|
46
|
+
```
|
|
47
|
+
mlflow ui --backend-store-uri sqlite:////<path-to>/mlflow.db
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
## High-Level Architecture
|
|
51
|
+
|
|
52
|
+
The codebase is split into focused modules:
|
|
53
|
+
|
|
54
|
+
- `confidence_estimators`: confidence estimation logic from model scores.
|
|
55
|
+
- `distribution_analyzers`: score distribution analysis and interval extraction.
|
|
56
|
+
- `datamodules`: Lightning data wrapping with confidence-aware datasets.
|
|
57
|
+
- `datasets`: dataset types and confidence I/O helpers.
|
|
58
|
+
- `modules`: self-supervision training module and callback.
|
|
59
|
+
- `models`: PyTorch model definitions (e.g., autoencoder).
|
|
60
|
+
- `loggers`: MLflow utility functions for artifacts and metrics logging.
|
|
61
|
+
|
|
62
|
+
Consolidated class diagram:
|
|
63
|
+

|
|
64
|
+
|
|
65
|
+
---
|
|
66
|
+
|
|
67
|
+
## Class Diagrams
|
|
68
|
+
|
|
69
|
+
All PlantUML source files are in:
|
|
70
|
+
|
|
71
|
+
- `docs/diagrams/*.plantuml`
|
|
72
|
+
|
|
73
|
+
### 1) Confidence Estimators
|
|
74
|
+
- UML source: `docs/diagrams/confidence_estimators.plantuml`
|
|
75
|
+
- Figure:
|
|
76
|
+

|
|
77
|
+
|
|
78
|
+
Main elements:
|
|
79
|
+
|
|
80
|
+
- `SupportsConfidenceEstimation` (Protocol)
|
|
81
|
+
- `BaseConfidenceEstimator` (abstract)
|
|
82
|
+
- `ConfidenceIntervalsConfiguration`
|
|
83
|
+
- `Interval` (extends `pandas.Interval`)
|
|
84
|
+
|
|
85
|
+
---
|
|
86
|
+
|
|
87
|
+
### 2) Data Modules
|
|
88
|
+
- UML source: `docs/diagrams/datamodules.plantuml`
|
|
89
|
+
- Figure:
|
|
90
|
+

|
|
91
|
+
|
|
92
|
+
Main elements:
|
|
93
|
+
|
|
94
|
+
- `SelfSupervisionDataModule` wrapping a Lightning datamodule
|
|
95
|
+
- Integration with `DatasetWithConfidence`
|
|
96
|
+
|
|
97
|
+
---
|
|
98
|
+
|
|
99
|
+
### 3) Datasets
|
|
100
|
+
- UML source: `docs/diagrams/datasets.plantuml`
|
|
101
|
+
- Figure:
|
|
102
|
+

|
|
103
|
+
|
|
104
|
+
Main elements:
|
|
105
|
+
|
|
106
|
+
- `DataFrameWithLabels`
|
|
107
|
+
- `DatasetWithLabels` / `DatasetWithInputDim` (Protocols)
|
|
108
|
+
- `DatasetWithConfidence`
|
|
109
|
+
- Utility functions:
|
|
110
|
+
- `init_confidence_from_csv`
|
|
111
|
+
- `save_confidence_to_csv`
|
|
112
|
+
|
|
113
|
+
---
|
|
114
|
+
|
|
115
|
+
### 4) Distribution Analyzers
|
|
116
|
+
- UML source: `docs/diagrams/distribution_analyzers.plantuml`
|
|
117
|
+
- Figure:
|
|
118
|
+

|
|
119
|
+
|
|
120
|
+
Main elements:
|
|
121
|
+
|
|
122
|
+
- `SupportsDistributionAnalysis` (Protocol)
|
|
123
|
+
- Concrete analyzer implementations (e.g., thresholding strategies)
|
|
124
|
+
|
|
125
|
+
---
|
|
126
|
+
|
|
127
|
+
### 5) Models
|
|
128
|
+
- UML source: `docs/diagrams/models.plantuml`
|
|
129
|
+
- Figure:
|
|
130
|
+

|
|
131
|
+
|
|
132
|
+
Main elements:
|
|
133
|
+
|
|
134
|
+
- `torch.nn.Module`
|
|
135
|
+
- `Autoencoder`
|
|
136
|
+
|
|
137
|
+
---
|
|
138
|
+
|
|
139
|
+
### 6) Self-Supervision Modules
|
|
140
|
+
- UML source: `docs/diagrams/modules.plantuml`
|
|
141
|
+
- Figure:
|
|
142
|
+

|
|
143
|
+
|
|
144
|
+
Main elements:
|
|
145
|
+
|
|
146
|
+
- `SupportsSelfSupervision` (Protocol)
|
|
147
|
+
- `SelfSupervisionModule` (abstract Lightning module)
|
|
148
|
+
- `SelfSupervisionCallback`
|
|
149
|
+
- Dependency injection of:
|
|
150
|
+
- `SupportsConfidenceEstimation`
|
|
151
|
+
- `SupportsDistributionAnalysis`
|
|
152
|
+
|
|
153
|
+
---
|
|
154
|
+
|
|
155
|
+
## Training Loop (Conceptual)
|
|
156
|
+
|
|
157
|
+
1. The model computes per-sample scores (`score` / `_prediction_score`).
|
|
158
|
+
2. Distribution analysis derives confidence intervals.
|
|
159
|
+
3. Confidence estimator maps scores to confidence values.
|
|
160
|
+
4. Training dataset is refreshed with updated confidence.
|
|
161
|
+
5. Loss computation uses confidence-aware behavior (normal/abnormal/uncertain).
|
|
162
|
+
6. Confidence and intervals are recalibrated every `every_n_epochs`.
|
|
163
|
+
7. Metrics and artifacts are logged.
|
|
164
|
+
|
|
165
|
+
---
|
|
166
|
+
|
|
167
|
+
## Global workflow
|
|
168
|
+
|
|
169
|
+

|
|
170
|
+
|
|
171
|
+
---
|
|
172
|
+
|
|
173
|
+
## Logging
|
|
174
|
+
|
|
175
|
+
`ssad/loggers/mlflow_logger.py` provides helper functions to log:
|
|
176
|
+
|
|
177
|
+
- confidence CSV snapshots (`confidence_epoch_*.csv`)
|
|
178
|
+
- confidence interval JSON files (`confidence_intervals_epoch_*.json`)
|
|
179
|
+
- distribution analysis figures (`confidence_analysis_epoch_*.svg`)
|
|
180
|
+
- system metrics (CPU / RAM / GPU)
|
|
181
|
+
- test metrics by threshold (`test_metrics_threshold=*.json`)
|
|
182
|
+
|
|
183
|
+
---
|
|
184
|
+
|
|
185
|
+
## Main Dependencies
|
|
186
|
+
|
|
187
|
+
- Python 3.10+
|
|
188
|
+
- PyTorch
|
|
189
|
+
- Lightning
|
|
190
|
+
- NumPy / pandas / scikit-learn / matplotlib
|
|
191
|
+
- MLflow
|
|
192
|
+
- psutil
|
|
193
|
+
|
|
194
|
+
---
|
|
195
|
+
|
|
196
|
+
## Typical Usage (High-Level)
|
|
197
|
+
|
|
198
|
+
1. Prepare a dataset compatible with:
|
|
199
|
+
- `DatasetWithLabels`
|
|
200
|
+
- `DatasetWithInputDim`
|
|
201
|
+
2. Build your base `LightningDataModule`.
|
|
202
|
+
3. Wrap it with `SelfSupervisionDataModule`.
|
|
203
|
+
4. Instantiate:
|
|
204
|
+
- a model (`nn.Module`)
|
|
205
|
+
- a confidence estimator (`SupportsConfidenceEstimation`)
|
|
206
|
+
- a distribution analyzer (`SupportsDistributionAnalysis`)
|
|
207
|
+
- a concrete `SelfSupervisionModule`
|
|
208
|
+
5. Train/evaluate with Lightning `Trainer`.
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
# SSAD — Self-Supervised Anomaly Detection Library
|
|
2
|
+
|
|
3
|
+
A Python library for autoencoder-based **anomaly detection** with self-supervised training and dynamic per-sample **confidence** updates.
|
|
4
|
+
|
|
5
|
+
## Key Features
|
|
6
|
+
|
|
7
|
+
- Compute per-sample anomaly scores
|
|
8
|
+
- Estimate confidence from score distributions
|
|
9
|
+
- Recalibrate confidence intervals during training
|
|
10
|
+
- Apply confidence-aware losses (normal / abnormal / uncertain)
|
|
11
|
+
- Track experiments and artifacts with **MLflow**
|
|
12
|
+
|
|
13
|
+
## Installation
|
|
14
|
+
|
|
15
|
+
```bash
|
|
16
|
+
pip install ssad
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
For development setup:
|
|
20
|
+
|
|
21
|
+
```bash
|
|
22
|
+
pip install -e .[dev]
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
## Quick Links
|
|
26
|
+
|
|
27
|
+
- **Repository**: https://github.com/Orange-OpenSource/SSAD
|
|
28
|
+
- **Examples**: https://github.com/Orange-OpenSource/SSAD/tree/main/examples
|
|
29
|
+
- **Issues**: https://github.com/Orange-OpenSource/SSAD/issues
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
## References
|
|
33
|
+
|
|
34
|
+
1. N. Najari, S. Berlemont, G. Lefebvre, S. Duffner, C. Garcia,
|
|
35
|
+
*Robust Variational Autoencoders and Normalizing Flows for Unsupervised Network Anomaly Detection*,
|
|
36
|
+
AINA 2022, doi: 10.1007/978-3-030-99587-4_24
|
|
37
|
+
|
|
38
|
+
2. N. Najari, S. Berlemont, G. Lefebvre, S. Duffner, C. Garcia,
|
|
39
|
+
*RADON: Robust Autoencoder for Unsupervised Anomaly Detection*,
|
|
40
|
+
SIN 2021, doi: 10.1109/SIN54109.2021.9699174
|
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "ssad"
|
|
3
|
+
version = "0.1.2"
|
|
4
|
+
description = "Framework for self-supervised training of reconstruction-based autoencoder models for anomaly detection."
|
|
5
|
+
readme = "README_PYPI.md"
|
|
6
|
+
requires-python = ">=3.10,<3.15"
|
|
7
|
+
license = { text = "MIT" }
|
|
8
|
+
authors = [
|
|
9
|
+
{ name = "Samuel Berlemont", email = "samuel.berlemont@orange.com" }
|
|
10
|
+
]
|
|
11
|
+
maintainers = [
|
|
12
|
+
{ name = "Julien Cumin", email = "julien1.cumin@orange.com" },
|
|
13
|
+
{ name = "Mohammed Achraf El Khamlichi" },
|
|
14
|
+
]
|
|
15
|
+
classifiers = [
|
|
16
|
+
"Programming Language :: Python :: 3",
|
|
17
|
+
"Operating System :: OS Independent",
|
|
18
|
+
]
|
|
19
|
+
dependencies = [
|
|
20
|
+
"category_encoders>=2.8,<3.0",
|
|
21
|
+
"numpy>=1.26,<3.0",
|
|
22
|
+
"pandas>=2.2,<3.0",
|
|
23
|
+
"scipy>=1.15,<2.0",
|
|
24
|
+
"scikit-learn>=1.9,<2.0",
|
|
25
|
+
"torch>=2.10,<3.0",
|
|
26
|
+
"lightning>=2.5,<3.0",
|
|
27
|
+
"mlflow>=3.12,<4.0",
|
|
28
|
+
"psutil>=7.0,<8.0",
|
|
29
|
+
"nvidia-ml-py>=13.610,<14.0"
|
|
30
|
+
]
|
|
31
|
+
|
|
32
|
+
[project.urls]
|
|
33
|
+
Homepage = "https://github.com/Orange-OpenSource/SSAD"
|
|
34
|
+
Issues = "https://github.com/Orange-OpenSource/SSAD/issues"
|
|
35
|
+
Repository = "https://github.com/Orange-OpenSource/SSAD"
|
|
36
|
+
|
|
37
|
+
[project.optional-dependencies]
|
|
38
|
+
dev = [
|
|
39
|
+
"pandas-stubs==2.2.2.240603",
|
|
40
|
+
"torchvision>=0.25,<0.29",
|
|
41
|
+
"types-pytz>=2026.2",
|
|
42
|
+
"mypy>=1.11,<2.0",
|
|
43
|
+
"pylint>=3.2,<4.0",
|
|
44
|
+
]
|
|
45
|
+
|
|
46
|
+
[build-system]
|
|
47
|
+
requires = ["setuptools >= 77.0.3", "wheel"]
|
|
48
|
+
build-backend = "setuptools.build_meta"
|
|
49
|
+
|
|
50
|
+
[tool.setuptools.packages.find]
|
|
51
|
+
where = ["src"]
|
|
52
|
+
include = ["ssad*"]
|
|
53
|
+
|
|
54
|
+
[tool.setuptools.package-data]
|
|
55
|
+
ssad = ["py.typed"]
|
|
56
|
+
|
|
57
|
+
[tool.mypy]
|
|
58
|
+
python_version = "3.10"
|
|
59
|
+
files = ["ssad"]
|
|
60
|
+
pretty = true
|
|
61
|
+
show_error_codes = true
|
|
62
|
+
warn_unused_configs = true
|
|
63
|
+
|
|
64
|
+
# Progressive strictness - active baseline (Palier 1)
|
|
65
|
+
ignore_missing_imports = true
|
|
66
|
+
check_untyped_defs = true
|
|
67
|
+
no_implicit_optional = true
|
|
68
|
+
|
|
69
|
+
# warn_redundant_casts = true
|
|
70
|
+
# warn_unused_ignores = true
|
|
71
|
+
# strict_equality = true
|
|
72
|
+
# disallow_incomplete_defs = true
|
|
73
|
+
|
|
74
|
+
# disallow_untyped_defs = true
|
|
75
|
+
# warn_return_any = true
|
|
76
|
+
# disallow_any_generics = true
|
|
77
|
+
|
|
78
|
+
[[tool.mypy.overrides]]
|
|
79
|
+
module = [
|
|
80
|
+
"category_encoders.*",
|
|
81
|
+
"lightning.*",
|
|
82
|
+
"mlflow.*",
|
|
83
|
+
"torchvision.*",
|
|
84
|
+
]
|
|
85
|
+
ignore_missing_imports = true
|
|
86
|
+
|
|
87
|
+
[tool.pylint.main]
|
|
88
|
+
py-version = "3.10"
|
|
89
|
+
jobs = 0
|
|
90
|
+
recursive = true
|
|
91
|
+
ignore = ["build", "dist", ".venv"]
|
|
92
|
+
|
|
93
|
+
[tool.pylint.format]
|
|
94
|
+
max-line-length = 100
|
|
95
|
+
|
|
96
|
+
[tool.pylint."messages control"]
|
|
97
|
+
disable = [
|
|
98
|
+
"C0114", # missing-module-docstring
|
|
99
|
+
"C0115", # missing-class-docstring
|
|
100
|
+
"C0116", # missing-function-docstring
|
|
101
|
+
"R0903", # too-few-public-methods
|
|
102
|
+
]
|
|
103
|
+
|
|
104
|
+
[tool.pylint.design]
|
|
105
|
+
max-args = 10
|
|
106
|
+
max-locals = 25
|
|
107
|
+
max-branches = 20
|
|
108
|
+
max-statements = 80
|
|
109
|
+
|
|
110
|
+
[tool.pylint.typecheck]
|
|
111
|
+
ignored-modules = ["torch", "lightning", "mlflow"]
|
ssad-0.1.2/setup.cfg
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
# Software Name : Self-Supervised Anomaly Detection
|
|
2
|
+
# SPDX-FileCopyrightText: Copyright (c) Orange SA
|
|
3
|
+
# SPDX-License-Identifier: MIT
|
|
4
|
+
#
|
|
5
|
+
# This software is distributed under the MIT License,
|
|
6
|
+
# see the "LICENSE.txt" file for more details or https://spdx.org/licenses/MIT.html
|
|
7
|
+
#
|
|
8
|
+
# Authors: see CONTRIBUTORS
|
|
9
|
+
# Software description: A Python library for autoencoder-based anomaly detection
|
|
10
|
+
# based on self-supervised training with dynamic sample confidence updates.
|
|
11
|
+
"""
|
|
12
|
+
Initializes the main package API by exposing public interfaces from submodules.
|
|
13
|
+
|
|
14
|
+
This module re-exports the core components of the package to simplify access and
|
|
15
|
+
maintain a clean and consistent public API.
|
|
16
|
+
|
|
17
|
+
Available namespaces:
|
|
18
|
+
- confidence_estimators
|
|
19
|
+
- datamodules
|
|
20
|
+
- datasets
|
|
21
|
+
- distribution_analyzers
|
|
22
|
+
- models
|
|
23
|
+
- modules
|
|
24
|
+
- loggers
|
|
25
|
+
|
|
26
|
+
Usage:
|
|
27
|
+
from mypackage import SomeModel, SomeDataModule, ConfidenceEstimator
|
|
28
|
+
|
|
29
|
+
Note:
|
|
30
|
+
This file uses wildcard imports (`*`) to expose only the public symbols defined
|
|
31
|
+
in each submodule's `__all__` list.
|
|
32
|
+
"""
|
|
33
|
+
from .confidence_estimators.api import *
|
|
34
|
+
from .datamodules.api import *
|
|
35
|
+
from .datasets.api import *
|
|
36
|
+
from .distribution_analyzers.api import *
|
|
37
|
+
from .models.api import *
|
|
38
|
+
from .modules.api import *
|
|
39
|
+
from .loggers.api import *
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
# Software Name : Self-Supervised Anomaly Detection
|
|
2
|
+
# SPDX-FileCopyrightText: Copyright (c) Orange SA
|
|
3
|
+
# SPDX-License-Identifier: MIT
|
|
4
|
+
#
|
|
5
|
+
# This software is distributed under the MIT License,
|
|
6
|
+
# see the "LICENSE.txt" file for more details or https://spdx.org/licenses/MIT.html
|
|
7
|
+
#
|
|
8
|
+
# Authors: see CONTRIBUTORS
|
|
9
|
+
# Software description: A Python library for autoencoder-based anomaly detection
|
|
10
|
+
# based on self-supervised training with dynamic sample confidence updates.
|
|
11
|
+
r"""
|
|
12
|
+
# What is confidence estimator
|
|
13
|
+
A confidence estimator is a module that, given a value such as a reconstruction error,
|
|
14
|
+
or a gradient value, returns a confidence score comprised between -1 and 1.
|
|
15
|
+
"""
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
# Software Name : Self-Supervised Anomaly Detection
|
|
2
|
+
# SPDX-FileCopyrightText: Copyright (c) Orange SA
|
|
3
|
+
# SPDX-License-Identifier: MIT
|
|
4
|
+
#
|
|
5
|
+
# This software is distributed under the MIT License,
|
|
6
|
+
# see the "LICENSE.txt" file for more details or https://spdx.org/licenses/MIT.html
|
|
7
|
+
#
|
|
8
|
+
# Authors: see CONTRIBUTORS
|
|
9
|
+
# Software description: A Python library for autoencoder-based anomaly detection
|
|
10
|
+
# based on self-supervised training with dynamic sample confidence updates.
|
|
11
|
+
"""Public API for self-supervision confidence estimators."""
|
|
12
|
+
from .binary_confidence import BinaryConfidence
|
|
13
|
+
from .supports_confidence_estimation import SupportsConfidenceEstimation
|
|
14
|
+
from .confidence_intervals_configuration import ConfidenceIntervalsConfiguration
|
|
15
|
+
|
|
16
|
+
__all__ = ["SupportsConfidenceEstimation", "BinaryConfidence", "ConfidenceIntervalsConfiguration"]
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
# Software Name : Self-Supervised Anomaly Detection
|
|
2
|
+
# SPDX-FileCopyrightText: Copyright (c) Orange SA
|
|
3
|
+
# SPDX-License-Identifier: MIT
|
|
4
|
+
#
|
|
5
|
+
# This software is distributed under the MIT License,
|
|
6
|
+
# see the "LICENSE.txt" file for more details or https://spdx.org/licenses/MIT.html
|
|
7
|
+
#
|
|
8
|
+
# Authors: see CONTRIBUTORS
|
|
9
|
+
# Software description: A Python library for autoencoder-based anomaly detection
|
|
10
|
+
# based on self-supervised training with dynamic sample confidence updates.
|
|
11
|
+
"""
|
|
12
|
+
Implements a binary confidence estimator.
|
|
13
|
+
A sample is either normal or abnormal, otherwise it is omitted (zero confidence).
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
import torch
|
|
17
|
+
from .confidence_estimator import BaseConfidenceEstimator
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class BinaryConfidence(BaseConfidenceEstimator):
|
|
21
|
+
"""Binary confidence estimator"""
|
|
22
|
+
|
|
23
|
+
def _confidence_normal(self, score):
|
|
24
|
+
return torch.ones_like(score)
|
|
25
|
+
|
|
26
|
+
def _confidence_abnormal(self, score):
|
|
27
|
+
return torch.full_like(score, -1)
|
|
28
|
+
|
|
29
|
+
def _confidence_unknown(self, score):
|
|
30
|
+
return torch.zeros_like(score)
|
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
# Software Name : Self-Supervised Anomaly Detection
|
|
2
|
+
# SPDX-FileCopyrightText: Copyright (c) Orange SA
|
|
3
|
+
# SPDX-License-Identifier: MIT
|
|
4
|
+
#
|
|
5
|
+
# This software is distributed under the MIT License,
|
|
6
|
+
# see the "LICENSE.txt" file for more details or https://spdx.org/licenses/MIT.html
|
|
7
|
+
#
|
|
8
|
+
# Authors: see CONTRIBUTORS
|
|
9
|
+
# Software description: A Python library for autoencoder-based anomaly detection
|
|
10
|
+
# based on self-supervised training with dynamic sample confidence updates.
|
|
11
|
+
"""
|
|
12
|
+
Provides the base class for confidence estimators.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
from abc import ABC, abstractmethod
|
|
16
|
+
from typing import Optional
|
|
17
|
+
import torch
|
|
18
|
+
|
|
19
|
+
from .confidence_intervals_configuration import (
|
|
20
|
+
ConfidenceIntervalsConfiguration,
|
|
21
|
+
)
|
|
22
|
+
from .supports_confidence_estimation import SupportsConfidenceEstimation
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class BaseConfidenceEstimator(ABC, SupportsConfidenceEstimation):
|
|
26
|
+
"""Base class for confidence estimators.
|
|
27
|
+
A confidence estimator relies on four intervals with associated scoring functions
|
|
28
|
+
to provide a confidence score given a criterion score for a sample.
|
|
29
|
+
|
|
30
|
+
The criterion score can be for instance a reconstruction score, or the norm of
|
|
31
|
+
the gradient of the reconstruction error.
|
|
32
|
+
|
|
33
|
+
The intervals define the domain for model scores for the four different confidence behaviors:
|
|
34
|
+
- normal: samples with model scores in this interval are considered as normal
|
|
35
|
+
- abnormal: samples with model scores in this interval are considered as abnormal
|
|
36
|
+
- unknown_positive: samples with model scores in this interval are
|
|
37
|
+
considered as unknown, but leaning towards a normal sample.
|
|
38
|
+
- unknown_negative: samples with model scores in this interval are
|
|
39
|
+
considered as unknown, but leaning towards an abnormal sample.
|
|
40
|
+
|
|
41
|
+
Each interval is associated with a "criterion score to confidence score" conversion function.
|
|
42
|
+
These functions should be implemented in the _estimate_confidence_from_model_score method.
|
|
43
|
+
"""
|
|
44
|
+
|
|
45
|
+
def __init__(self):
|
|
46
|
+
super().__init__()
|
|
47
|
+
self.configuration: Optional[ConfidenceIntervalsConfiguration] = None
|
|
48
|
+
self.distribution: Optional[torch.Tensor] = None
|
|
49
|
+
|
|
50
|
+
@abstractmethod
|
|
51
|
+
def _confidence_normal(self, score):
|
|
52
|
+
raise NotImplementedError()
|
|
53
|
+
|
|
54
|
+
@abstractmethod
|
|
55
|
+
def _confidence_abnormal(self, score) -> torch.Tensor:
|
|
56
|
+
raise NotImplementedError()
|
|
57
|
+
|
|
58
|
+
@abstractmethod
|
|
59
|
+
def _confidence_unknown(self, score) -> torch.Tensor:
|
|
60
|
+
raise NotImplementedError()
|
|
61
|
+
|
|
62
|
+
@torch.no_grad()
|
|
63
|
+
def estimate_confidence(self, scores_batch: torch.Tensor) -> torch.Tensor:
|
|
64
|
+
"""Estimates the confidence in a batch by retrieving the criterion score
|
|
65
|
+
and translating into a confidence score.
|
|
66
|
+
|
|
67
|
+
Args:
|
|
68
|
+
scores_batch (torch.Tensor): batch whose confidence is to be estimated.
|
|
69
|
+
|
|
70
|
+
Returns:
|
|
71
|
+
torch.Tensor: confidence score
|
|
72
|
+
"""
|
|
73
|
+
# TODO: check this order of computation
|
|
74
|
+
confidence = self._confidence_unknown(scores_batch)
|
|
75
|
+
|
|
76
|
+
if self.configuration is None:
|
|
77
|
+
raise ValueError("Confidence estimator configuration is None")
|
|
78
|
+
|
|
79
|
+
# TODO: rework signatures of confidence normal/abnormal/unknown
|
|
80
|
+
normal_confidences = self._confidence_normal(scores_batch)
|
|
81
|
+
abnormal_confidences = self._confidence_abnormal(scores_batch)
|
|
82
|
+
|
|
83
|
+
confidence = torch.where(
|
|
84
|
+
self.configuration.normal.contains_tensor_mask(scores_batch),
|
|
85
|
+
normal_confidences,
|
|
86
|
+
confidence,
|
|
87
|
+
)
|
|
88
|
+
confidence = torch.where(
|
|
89
|
+
self.configuration.abnormal.contains_tensor_mask(scores_batch),
|
|
90
|
+
abnormal_confidences,
|
|
91
|
+
confidence,
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
return confidence
|