truthound-dashboard 1.3.1__py3-none-any.whl → 1.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- truthound_dashboard/api/alerts.py +258 -0
- truthound_dashboard/api/anomaly.py +1302 -0
- truthound_dashboard/api/cross_alerts.py +352 -0
- truthound_dashboard/api/deps.py +143 -0
- truthound_dashboard/api/drift_monitor.py +540 -0
- truthound_dashboard/api/lineage.py +1151 -0
- truthound_dashboard/api/maintenance.py +363 -0
- truthound_dashboard/api/middleware.py +373 -1
- truthound_dashboard/api/model_monitoring.py +805 -0
- truthound_dashboard/api/notifications_advanced.py +2452 -0
- truthound_dashboard/api/plugins.py +2096 -0
- truthound_dashboard/api/profile.py +211 -14
- truthound_dashboard/api/reports.py +853 -0
- truthound_dashboard/api/router.py +147 -0
- truthound_dashboard/api/rule_suggestions.py +310 -0
- truthound_dashboard/api/schema_evolution.py +231 -0
- truthound_dashboard/api/sources.py +47 -3
- truthound_dashboard/api/triggers.py +190 -0
- truthound_dashboard/api/validations.py +13 -0
- truthound_dashboard/api/validators.py +333 -4
- truthound_dashboard/api/versioning.py +309 -0
- truthound_dashboard/api/websocket.py +301 -0
- truthound_dashboard/core/__init__.py +27 -0
- truthound_dashboard/core/anomaly.py +1395 -0
- truthound_dashboard/core/anomaly_explainer.py +633 -0
- truthound_dashboard/core/cache.py +206 -0
- truthound_dashboard/core/cached_services.py +422 -0
- truthound_dashboard/core/charts.py +352 -0
- truthound_dashboard/core/connections.py +1069 -42
- truthound_dashboard/core/cross_alerts.py +837 -0
- truthound_dashboard/core/drift_monitor.py +1477 -0
- truthound_dashboard/core/drift_sampling.py +669 -0
- truthound_dashboard/core/i18n/__init__.py +42 -0
- truthound_dashboard/core/i18n/detector.py +173 -0
- truthound_dashboard/core/i18n/messages.py +564 -0
- truthound_dashboard/core/lineage.py +971 -0
- truthound_dashboard/core/maintenance.py +443 -5
- truthound_dashboard/core/model_monitoring.py +1043 -0
- truthound_dashboard/core/notifications/channels.py +1020 -1
- truthound_dashboard/core/notifications/deduplication/__init__.py +143 -0
- truthound_dashboard/core/notifications/deduplication/policies.py +274 -0
- truthound_dashboard/core/notifications/deduplication/service.py +400 -0
- truthound_dashboard/core/notifications/deduplication/stores.py +2365 -0
- truthound_dashboard/core/notifications/deduplication/strategies.py +422 -0
- truthound_dashboard/core/notifications/dispatcher.py +43 -0
- truthound_dashboard/core/notifications/escalation/__init__.py +149 -0
- truthound_dashboard/core/notifications/escalation/backends.py +1384 -0
- truthound_dashboard/core/notifications/escalation/engine.py +429 -0
- truthound_dashboard/core/notifications/escalation/models.py +336 -0
- truthound_dashboard/core/notifications/escalation/scheduler.py +1187 -0
- truthound_dashboard/core/notifications/escalation/state_machine.py +330 -0
- truthound_dashboard/core/notifications/escalation/stores.py +2896 -0
- truthound_dashboard/core/notifications/events.py +49 -0
- truthound_dashboard/core/notifications/metrics/__init__.py +115 -0
- truthound_dashboard/core/notifications/metrics/base.py +528 -0
- truthound_dashboard/core/notifications/metrics/collectors.py +583 -0
- truthound_dashboard/core/notifications/routing/__init__.py +169 -0
- truthound_dashboard/core/notifications/routing/combinators.py +184 -0
- truthound_dashboard/core/notifications/routing/config.py +375 -0
- truthound_dashboard/core/notifications/routing/config_parser.py +867 -0
- truthound_dashboard/core/notifications/routing/engine.py +382 -0
- truthound_dashboard/core/notifications/routing/expression_engine.py +1269 -0
- truthound_dashboard/core/notifications/routing/jinja2_engine.py +774 -0
- truthound_dashboard/core/notifications/routing/rules.py +625 -0
- truthound_dashboard/core/notifications/routing/validator.py +678 -0
- truthound_dashboard/core/notifications/service.py +2 -0
- truthound_dashboard/core/notifications/stats_aggregator.py +850 -0
- truthound_dashboard/core/notifications/throttling/__init__.py +83 -0
- truthound_dashboard/core/notifications/throttling/builder.py +311 -0
- truthound_dashboard/core/notifications/throttling/stores.py +1859 -0
- truthound_dashboard/core/notifications/throttling/throttlers.py +633 -0
- truthound_dashboard/core/openlineage.py +1028 -0
- truthound_dashboard/core/plugins/__init__.py +39 -0
- truthound_dashboard/core/plugins/docs/__init__.py +39 -0
- truthound_dashboard/core/plugins/docs/extractor.py +703 -0
- truthound_dashboard/core/plugins/docs/renderers.py +804 -0
- truthound_dashboard/core/plugins/hooks/__init__.py +63 -0
- truthound_dashboard/core/plugins/hooks/decorators.py +367 -0
- truthound_dashboard/core/plugins/hooks/manager.py +403 -0
- truthound_dashboard/core/plugins/hooks/protocols.py +265 -0
- truthound_dashboard/core/plugins/lifecycle/__init__.py +41 -0
- truthound_dashboard/core/plugins/lifecycle/hot_reload.py +584 -0
- truthound_dashboard/core/plugins/lifecycle/machine.py +419 -0
- truthound_dashboard/core/plugins/lifecycle/states.py +266 -0
- truthound_dashboard/core/plugins/loader.py +504 -0
- truthound_dashboard/core/plugins/registry.py +810 -0
- truthound_dashboard/core/plugins/reporter_executor.py +588 -0
- truthound_dashboard/core/plugins/sandbox/__init__.py +59 -0
- truthound_dashboard/core/plugins/sandbox/code_validator.py +243 -0
- truthound_dashboard/core/plugins/sandbox/engines.py +770 -0
- truthound_dashboard/core/plugins/sandbox/protocols.py +194 -0
- truthound_dashboard/core/plugins/sandbox.py +617 -0
- truthound_dashboard/core/plugins/security/__init__.py +68 -0
- truthound_dashboard/core/plugins/security/analyzer.py +535 -0
- truthound_dashboard/core/plugins/security/policies.py +311 -0
- truthound_dashboard/core/plugins/security/protocols.py +296 -0
- truthound_dashboard/core/plugins/security/signing.py +842 -0
- truthound_dashboard/core/plugins/security.py +446 -0
- truthound_dashboard/core/plugins/validator_executor.py +401 -0
- truthound_dashboard/core/plugins/versioning/__init__.py +51 -0
- truthound_dashboard/core/plugins/versioning/constraints.py +377 -0
- truthound_dashboard/core/plugins/versioning/dependencies.py +541 -0
- truthound_dashboard/core/plugins/versioning/semver.py +266 -0
- truthound_dashboard/core/profile_comparison.py +601 -0
- truthound_dashboard/core/report_history.py +570 -0
- truthound_dashboard/core/reporters/__init__.py +57 -0
- truthound_dashboard/core/reporters/base.py +296 -0
- truthound_dashboard/core/reporters/csv_reporter.py +155 -0
- truthound_dashboard/core/reporters/html_reporter.py +598 -0
- truthound_dashboard/core/reporters/i18n/__init__.py +65 -0
- truthound_dashboard/core/reporters/i18n/base.py +494 -0
- truthound_dashboard/core/reporters/i18n/catalogs.py +930 -0
- truthound_dashboard/core/reporters/json_reporter.py +160 -0
- truthound_dashboard/core/reporters/junit_reporter.py +233 -0
- truthound_dashboard/core/reporters/markdown_reporter.py +207 -0
- truthound_dashboard/core/reporters/pdf_reporter.py +209 -0
- truthound_dashboard/core/reporters/registry.py +272 -0
- truthound_dashboard/core/rule_generator.py +2088 -0
- truthound_dashboard/core/scheduler.py +822 -12
- truthound_dashboard/core/schema_evolution.py +858 -0
- truthound_dashboard/core/services.py +152 -9
- truthound_dashboard/core/statistics.py +718 -0
- truthound_dashboard/core/streaming_anomaly.py +883 -0
- truthound_dashboard/core/triggers/__init__.py +45 -0
- truthound_dashboard/core/triggers/base.py +226 -0
- truthound_dashboard/core/triggers/evaluators.py +609 -0
- truthound_dashboard/core/triggers/factory.py +363 -0
- truthound_dashboard/core/unified_alerts.py +870 -0
- truthound_dashboard/core/validation_limits.py +509 -0
- truthound_dashboard/core/versioning.py +709 -0
- truthound_dashboard/core/websocket/__init__.py +59 -0
- truthound_dashboard/core/websocket/manager.py +512 -0
- truthound_dashboard/core/websocket/messages.py +130 -0
- truthound_dashboard/db/__init__.py +30 -0
- truthound_dashboard/db/models.py +3375 -3
- truthound_dashboard/main.py +22 -0
- truthound_dashboard/schemas/__init__.py +396 -1
- truthound_dashboard/schemas/anomaly.py +1258 -0
- truthound_dashboard/schemas/base.py +4 -0
- truthound_dashboard/schemas/cross_alerts.py +334 -0
- truthound_dashboard/schemas/drift_monitor.py +890 -0
- truthound_dashboard/schemas/lineage.py +428 -0
- truthound_dashboard/schemas/maintenance.py +154 -0
- truthound_dashboard/schemas/model_monitoring.py +374 -0
- truthound_dashboard/schemas/notifications_advanced.py +1363 -0
- truthound_dashboard/schemas/openlineage.py +704 -0
- truthound_dashboard/schemas/plugins.py +1293 -0
- truthound_dashboard/schemas/profile.py +420 -34
- truthound_dashboard/schemas/profile_comparison.py +242 -0
- truthound_dashboard/schemas/reports.py +285 -0
- truthound_dashboard/schemas/rule_suggestion.py +434 -0
- truthound_dashboard/schemas/schema_evolution.py +164 -0
- truthound_dashboard/schemas/source.py +117 -2
- truthound_dashboard/schemas/triggers.py +511 -0
- truthound_dashboard/schemas/unified_alerts.py +223 -0
- truthound_dashboard/schemas/validation.py +25 -1
- truthound_dashboard/schemas/validators/__init__.py +11 -0
- truthound_dashboard/schemas/validators/base.py +151 -0
- truthound_dashboard/schemas/versioning.py +152 -0
- truthound_dashboard/static/index.html +2 -2
- {truthound_dashboard-1.3.1.dist-info → truthound_dashboard-1.4.1.dist-info}/METADATA +147 -23
- truthound_dashboard-1.4.1.dist-info/RECORD +239 -0
- truthound_dashboard/static/assets/index-BZG20KuF.js +0 -586
- truthound_dashboard/static/assets/index-D_HyZ3pb.css +0 -1
- truthound_dashboard/static/assets/unmerged_dictionaries-CtpqQBm0.js +0 -1
- truthound_dashboard-1.3.1.dist-info/RECORD +0 -110
- {truthound_dashboard-1.3.1.dist-info → truthound_dashboard-1.4.1.dist-info}/WHEEL +0 -0
- {truthound_dashboard-1.3.1.dist-info → truthound_dashboard-1.4.1.dist-info}/entry_points.txt +0 -0
- {truthound_dashboard-1.3.1.dist-info → truthound_dashboard-1.4.1.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,1258 @@
|
|
|
1
|
+
"""Anomaly detection Pydantic schemas.
|
|
2
|
+
|
|
3
|
+
This module defines schemas for ML-based anomaly detection API operations.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
|
|
8
|
+
from enum import Enum
|
|
9
|
+
from typing import Any, Literal
|
|
10
|
+
|
|
11
|
+
from pydantic import Field
|
|
12
|
+
|
|
13
|
+
from .base import BaseSchema, IDMixin, ListResponseWrapper
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
# =============================================================================
|
|
17
|
+
# Enums and Types
|
|
18
|
+
# =============================================================================
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class AnomalyAlgorithm(str, Enum):
|
|
22
|
+
"""Supported anomaly detection algorithms."""
|
|
23
|
+
|
|
24
|
+
ISOLATION_FOREST = "isolation_forest"
|
|
25
|
+
LOF = "lof"
|
|
26
|
+
ONE_CLASS_SVM = "one_class_svm"
|
|
27
|
+
DBSCAN = "dbscan"
|
|
28
|
+
STATISTICAL = "statistical"
|
|
29
|
+
AUTOENCODER = "autoencoder"
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class AnomalyStatus(str, Enum):
|
|
33
|
+
"""Status of an anomaly detection run."""
|
|
34
|
+
|
|
35
|
+
PENDING = "pending"
|
|
36
|
+
RUNNING = "running"
|
|
37
|
+
SUCCESS = "success"
|
|
38
|
+
ERROR = "error"
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
# Algorithm categories for UI grouping
|
|
42
|
+
AlgorithmCategory = Literal["tree", "density", "svm", "clustering", "statistical", "neural"]
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
# =============================================================================
|
|
46
|
+
# Algorithm Configuration Schemas
|
|
47
|
+
# =============================================================================
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
class IsolationForestConfig(BaseSchema):
|
|
51
|
+
"""Configuration for Isolation Forest algorithm."""
|
|
52
|
+
|
|
53
|
+
n_estimators: int = Field(
|
|
54
|
+
default=100,
|
|
55
|
+
ge=10,
|
|
56
|
+
le=500,
|
|
57
|
+
description="Number of isolation trees",
|
|
58
|
+
)
|
|
59
|
+
contamination: float = Field(
|
|
60
|
+
default=0.1,
|
|
61
|
+
ge=0.01,
|
|
62
|
+
le=0.5,
|
|
63
|
+
description="Expected proportion of anomalies in the dataset",
|
|
64
|
+
)
|
|
65
|
+
max_samples: int | str = Field(
|
|
66
|
+
default="auto",
|
|
67
|
+
description="Number of samples to draw (int or 'auto')",
|
|
68
|
+
)
|
|
69
|
+
random_state: int | None = Field(
|
|
70
|
+
default=42,
|
|
71
|
+
description="Random seed for reproducibility",
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
class LOFConfig(BaseSchema):
|
|
76
|
+
"""Configuration for Local Outlier Factor algorithm."""
|
|
77
|
+
|
|
78
|
+
n_neighbors: int = Field(
|
|
79
|
+
default=20,
|
|
80
|
+
ge=5,
|
|
81
|
+
le=100,
|
|
82
|
+
description="Number of neighbors for LOF computation",
|
|
83
|
+
)
|
|
84
|
+
contamination: float = Field(
|
|
85
|
+
default=0.1,
|
|
86
|
+
ge=0.01,
|
|
87
|
+
le=0.5,
|
|
88
|
+
description="Expected proportion of anomalies",
|
|
89
|
+
)
|
|
90
|
+
algorithm: Literal["auto", "ball_tree", "kd_tree", "brute"] = Field(
|
|
91
|
+
default="auto",
|
|
92
|
+
description="Algorithm for nearest neighbors",
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
class OneClassSVMConfig(BaseSchema):
|
|
97
|
+
"""Configuration for One-Class SVM algorithm."""
|
|
98
|
+
|
|
99
|
+
kernel: Literal["rbf", "linear", "poly", "sigmoid"] = Field(
|
|
100
|
+
default="rbf",
|
|
101
|
+
description="Kernel type for SVM",
|
|
102
|
+
)
|
|
103
|
+
nu: float = Field(
|
|
104
|
+
default=0.1,
|
|
105
|
+
ge=0.01,
|
|
106
|
+
le=0.5,
|
|
107
|
+
description="Upper bound on fraction of anomalies",
|
|
108
|
+
)
|
|
109
|
+
gamma: str | float = Field(
|
|
110
|
+
default="scale",
|
|
111
|
+
description="Kernel coefficient ('scale', 'auto', or float)",
|
|
112
|
+
)
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
class DBSCANConfig(BaseSchema):
|
|
116
|
+
"""Configuration for DBSCAN algorithm."""
|
|
117
|
+
|
|
118
|
+
eps: float = Field(
|
|
119
|
+
default=0.5,
|
|
120
|
+
ge=0.01,
|
|
121
|
+
le=10.0,
|
|
122
|
+
description="Maximum distance between samples in a cluster",
|
|
123
|
+
)
|
|
124
|
+
min_samples: int = Field(
|
|
125
|
+
default=5,
|
|
126
|
+
ge=2,
|
|
127
|
+
le=50,
|
|
128
|
+
description="Minimum samples in a core neighborhood",
|
|
129
|
+
)
|
|
130
|
+
metric: Literal["euclidean", "manhattan", "cosine"] = Field(
|
|
131
|
+
default="euclidean",
|
|
132
|
+
description="Distance metric",
|
|
133
|
+
)
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
class StatisticalConfig(BaseSchema):
|
|
137
|
+
"""Configuration for Statistical anomaly detection."""
|
|
138
|
+
|
|
139
|
+
method: Literal["zscore", "iqr", "mad"] = Field(
|
|
140
|
+
default="zscore",
|
|
141
|
+
description="Statistical method (z-score, IQR, or MAD)",
|
|
142
|
+
)
|
|
143
|
+
threshold: float = Field(
|
|
144
|
+
default=3.0,
|
|
145
|
+
ge=1.0,
|
|
146
|
+
le=5.0,
|
|
147
|
+
description="Threshold for anomaly detection (e.g., 3 std devs)",
|
|
148
|
+
)
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
class AutoencoderConfig(BaseSchema):
|
|
152
|
+
"""Configuration for Autoencoder-based detection."""
|
|
153
|
+
|
|
154
|
+
encoding_dim: int = Field(
|
|
155
|
+
default=32,
|
|
156
|
+
ge=8,
|
|
157
|
+
le=256,
|
|
158
|
+
description="Dimension of the encoding layer",
|
|
159
|
+
)
|
|
160
|
+
epochs: int = Field(
|
|
161
|
+
default=50,
|
|
162
|
+
ge=10,
|
|
163
|
+
le=200,
|
|
164
|
+
description="Number of training epochs",
|
|
165
|
+
)
|
|
166
|
+
threshold_percentile: float = Field(
|
|
167
|
+
default=95,
|
|
168
|
+
ge=90,
|
|
169
|
+
le=99,
|
|
170
|
+
description="Percentile for anomaly threshold",
|
|
171
|
+
)
|
|
172
|
+
batch_size: int = Field(
|
|
173
|
+
default=32,
|
|
174
|
+
ge=8,
|
|
175
|
+
le=256,
|
|
176
|
+
description="Training batch size",
|
|
177
|
+
)
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
# =============================================================================
|
|
181
|
+
# Request Schemas
|
|
182
|
+
# =============================================================================
|
|
183
|
+
|
|
184
|
+
|
|
185
|
+
class AnomalyDetectionRequest(BaseSchema):
|
|
186
|
+
"""Request to run anomaly detection."""
|
|
187
|
+
|
|
188
|
+
algorithm: AnomalyAlgorithm = Field(
|
|
189
|
+
default=AnomalyAlgorithm.ISOLATION_FOREST,
|
|
190
|
+
description="Detection algorithm to use",
|
|
191
|
+
)
|
|
192
|
+
columns: list[str] | None = Field(
|
|
193
|
+
default=None,
|
|
194
|
+
description="Columns to analyze (None = all numeric columns)",
|
|
195
|
+
)
|
|
196
|
+
config: dict[str, Any] | None = Field(
|
|
197
|
+
default=None,
|
|
198
|
+
description="Algorithm-specific configuration",
|
|
199
|
+
)
|
|
200
|
+
sample_size: int | None = Field(
|
|
201
|
+
default=None,
|
|
202
|
+
ge=100,
|
|
203
|
+
description="Sample size for large datasets",
|
|
204
|
+
)
|
|
205
|
+
|
|
206
|
+
|
|
207
|
+
# =============================================================================
|
|
208
|
+
# Result Schemas
|
|
209
|
+
# =============================================================================
|
|
210
|
+
|
|
211
|
+
|
|
212
|
+
class AnomalyRecord(BaseSchema):
|
|
213
|
+
"""Single anomaly record."""
|
|
214
|
+
|
|
215
|
+
row_index: int = Field(..., description="Row index in the dataset")
|
|
216
|
+
anomaly_score: float = Field(
|
|
217
|
+
...,
|
|
218
|
+
description="Anomaly score (higher = more anomalous)",
|
|
219
|
+
)
|
|
220
|
+
column_values: dict[str, Any] = Field(
|
|
221
|
+
default_factory=dict,
|
|
222
|
+
description="Values of analyzed columns for this row",
|
|
223
|
+
)
|
|
224
|
+
is_anomaly: bool = Field(..., description="Whether classified as anomaly")
|
|
225
|
+
|
|
226
|
+
|
|
227
|
+
class ColumnAnomalySummary(BaseSchema):
|
|
228
|
+
"""Anomaly summary for a single column."""
|
|
229
|
+
|
|
230
|
+
column: str = Field(..., description="Column name")
|
|
231
|
+
dtype: str = Field(..., description="Data type")
|
|
232
|
+
anomaly_count: int = Field(..., description="Number of anomalies in this column")
|
|
233
|
+
anomaly_rate: float = Field(..., description="Rate of anomalies (0-1)")
|
|
234
|
+
mean_anomaly_score: float = Field(..., description="Mean anomaly score")
|
|
235
|
+
min_value: float | None = Field(default=None, description="Minimum value")
|
|
236
|
+
max_value: float | None = Field(default=None, description="Maximum value")
|
|
237
|
+
top_anomaly_indices: list[int] = Field(
|
|
238
|
+
default_factory=list,
|
|
239
|
+
description="Row indices of top anomalies",
|
|
240
|
+
)
|
|
241
|
+
|
|
242
|
+
|
|
243
|
+
class AnomalyDetectionResponse(IDMixin, BaseSchema):
|
|
244
|
+
"""Response for anomaly detection results."""
|
|
245
|
+
|
|
246
|
+
source_id: str = Field(..., description="Source ID that was analyzed")
|
|
247
|
+
status: AnomalyStatus = Field(..., description="Detection status")
|
|
248
|
+
algorithm: AnomalyAlgorithm = Field(..., description="Algorithm used")
|
|
249
|
+
config: dict[str, Any] | None = Field(
|
|
250
|
+
default=None,
|
|
251
|
+
description="Configuration used",
|
|
252
|
+
)
|
|
253
|
+
|
|
254
|
+
# Results
|
|
255
|
+
total_rows: int | None = Field(default=None, description="Total rows analyzed")
|
|
256
|
+
anomaly_count: int | None = Field(default=None, description="Number of anomalies found")
|
|
257
|
+
anomaly_rate: float | None = Field(
|
|
258
|
+
default=None,
|
|
259
|
+
description="Rate of anomalies (0-1)",
|
|
260
|
+
)
|
|
261
|
+
columns_analyzed: list[str] | None = Field(
|
|
262
|
+
default=None,
|
|
263
|
+
description="Columns that were analyzed",
|
|
264
|
+
)
|
|
265
|
+
column_summaries: list[ColumnAnomalySummary] | None = Field(
|
|
266
|
+
default=None,
|
|
267
|
+
description="Per-column anomaly summaries",
|
|
268
|
+
)
|
|
269
|
+
anomalies: list[AnomalyRecord] | None = Field(
|
|
270
|
+
default=None,
|
|
271
|
+
description="Top anomaly records (limited to 100)",
|
|
272
|
+
)
|
|
273
|
+
|
|
274
|
+
# Timing
|
|
275
|
+
duration_ms: int | None = Field(default=None, description="Execution time in ms")
|
|
276
|
+
error_message: str | None = Field(default=None, description="Error message if failed")
|
|
277
|
+
|
|
278
|
+
# Timestamps
|
|
279
|
+
created_at: str = Field(..., description="When detection was created")
|
|
280
|
+
started_at: str | None = Field(default=None, description="When detection started")
|
|
281
|
+
completed_at: str | None = Field(default=None, description="When detection completed")
|
|
282
|
+
|
|
283
|
+
|
|
284
|
+
class AnomalyDetectionListResponse(ListResponseWrapper[AnomalyDetectionResponse]):
|
|
285
|
+
"""Paginated anomaly detection list response."""
|
|
286
|
+
|
|
287
|
+
pass
|
|
288
|
+
|
|
289
|
+
|
|
290
|
+
# =============================================================================
|
|
291
|
+
# Batch Detection Schemas
|
|
292
|
+
# =============================================================================
|
|
293
|
+
|
|
294
|
+
|
|
295
|
+
class BatchDetectionStatus(str, Enum):
|
|
296
|
+
"""Status of a batch anomaly detection job."""
|
|
297
|
+
|
|
298
|
+
PENDING = "pending"
|
|
299
|
+
RUNNING = "running"
|
|
300
|
+
COMPLETED = "completed"
|
|
301
|
+
PARTIAL = "partial"
|
|
302
|
+
ERROR = "error"
|
|
303
|
+
CANCELLED = "cancelled"
|
|
304
|
+
|
|
305
|
+
|
|
306
|
+
class BatchDetectionRequest(BaseSchema):
|
|
307
|
+
"""Request to run batch anomaly detection across multiple sources."""
|
|
308
|
+
|
|
309
|
+
source_ids: list[str] = Field(
|
|
310
|
+
...,
|
|
311
|
+
min_length=1,
|
|
312
|
+
description="List of source IDs to run detection on",
|
|
313
|
+
)
|
|
314
|
+
name: str | None = Field(
|
|
315
|
+
default=None,
|
|
316
|
+
max_length=255,
|
|
317
|
+
description="Optional name for this batch job",
|
|
318
|
+
)
|
|
319
|
+
algorithm: AnomalyAlgorithm = Field(
|
|
320
|
+
default=AnomalyAlgorithm.ISOLATION_FOREST,
|
|
321
|
+
description="Detection algorithm to use for all sources",
|
|
322
|
+
)
|
|
323
|
+
config: dict[str, Any] | None = Field(
|
|
324
|
+
default=None,
|
|
325
|
+
description="Algorithm-specific configuration",
|
|
326
|
+
)
|
|
327
|
+
sample_size: int | None = Field(
|
|
328
|
+
default=None,
|
|
329
|
+
ge=100,
|
|
330
|
+
description="Sample size for large datasets",
|
|
331
|
+
)
|
|
332
|
+
|
|
333
|
+
|
|
334
|
+
class BatchSourceResult(BaseSchema):
|
|
335
|
+
"""Result for a single source in a batch job."""
|
|
336
|
+
|
|
337
|
+
source_id: str = Field(..., description="Source ID")
|
|
338
|
+
source_name: str | None = Field(default=None, description="Source name")
|
|
339
|
+
detection_id: str | None = Field(default=None, description="Detection ID if completed")
|
|
340
|
+
status: str = Field(..., description="Status: pending, running, success, error")
|
|
341
|
+
anomaly_count: int | None = Field(default=None, description="Number of anomalies found")
|
|
342
|
+
anomaly_rate: float | None = Field(default=None, description="Rate of anomalies (0-1)")
|
|
343
|
+
total_rows: int | None = Field(default=None, description="Total rows analyzed")
|
|
344
|
+
error_message: str | None = Field(default=None, description="Error message if failed")
|
|
345
|
+
|
|
346
|
+
|
|
347
|
+
class BatchDetectionResponse(IDMixin, BaseSchema):
|
|
348
|
+
"""Response for batch anomaly detection job."""
|
|
349
|
+
|
|
350
|
+
name: str | None = Field(default=None, description="Job name")
|
|
351
|
+
status: BatchDetectionStatus = Field(..., description="Batch job status")
|
|
352
|
+
algorithm: AnomalyAlgorithm = Field(..., description="Algorithm used")
|
|
353
|
+
config: dict[str, Any] | None = Field(default=None, description="Configuration used")
|
|
354
|
+
|
|
355
|
+
# Progress
|
|
356
|
+
total_sources: int = Field(..., description="Total number of sources")
|
|
357
|
+
completed_sources: int = Field(..., description="Number of completed sources")
|
|
358
|
+
failed_sources: int = Field(default=0, description="Number of failed sources")
|
|
359
|
+
progress_percent: float = Field(..., description="Progress percentage (0-100)")
|
|
360
|
+
current_source_id: str | None = Field(
|
|
361
|
+
default=None,
|
|
362
|
+
description="Currently processing source ID",
|
|
363
|
+
)
|
|
364
|
+
|
|
365
|
+
# Aggregate results
|
|
366
|
+
total_anomalies: int = Field(default=0, description="Total anomalies found")
|
|
367
|
+
total_rows_analyzed: int = Field(default=0, description="Total rows analyzed")
|
|
368
|
+
average_anomaly_rate: float = Field(
|
|
369
|
+
default=0.0,
|
|
370
|
+
description="Average anomaly rate across sources",
|
|
371
|
+
)
|
|
372
|
+
|
|
373
|
+
# Per-source results
|
|
374
|
+
results: list[BatchSourceResult] | None = Field(
|
|
375
|
+
default=None,
|
|
376
|
+
description="Results for each source",
|
|
377
|
+
)
|
|
378
|
+
|
|
379
|
+
# Timing
|
|
380
|
+
duration_ms: int | None = Field(default=None, description="Total execution time in ms")
|
|
381
|
+
error_message: str | None = Field(default=None, description="Error message if failed")
|
|
382
|
+
|
|
383
|
+
# Timestamps
|
|
384
|
+
created_at: str = Field(..., description="When batch job was created")
|
|
385
|
+
started_at: str | None = Field(default=None, description="When batch job started")
|
|
386
|
+
completed_at: str | None = Field(default=None, description="When batch job completed")
|
|
387
|
+
|
|
388
|
+
|
|
389
|
+
class BatchDetectionListResponse(ListResponseWrapper[BatchDetectionResponse]):
|
|
390
|
+
"""Paginated batch detection list response."""
|
|
391
|
+
|
|
392
|
+
pass
|
|
393
|
+
|
|
394
|
+
|
|
395
|
+
# =============================================================================
|
|
396
|
+
# Algorithm Comparison Schemas
|
|
397
|
+
# =============================================================================
|
|
398
|
+
|
|
399
|
+
|
|
400
|
+
class AlgorithmComparisonRequest(BaseSchema):
|
|
401
|
+
"""Request to compare multiple anomaly detection algorithms."""
|
|
402
|
+
|
|
403
|
+
algorithms: list[AnomalyAlgorithm] = Field(
|
|
404
|
+
...,
|
|
405
|
+
min_length=2,
|
|
406
|
+
max_length=6,
|
|
407
|
+
description="List of algorithms to compare (2-6 algorithms)",
|
|
408
|
+
)
|
|
409
|
+
columns: list[str] | None = Field(
|
|
410
|
+
default=None,
|
|
411
|
+
description="Columns to analyze (None = all numeric columns)",
|
|
412
|
+
)
|
|
413
|
+
config: dict[str, dict[str, Any]] | None = Field(
|
|
414
|
+
default=None,
|
|
415
|
+
description="Algorithm-specific configurations keyed by algorithm name",
|
|
416
|
+
)
|
|
417
|
+
sample_size: int | None = Field(
|
|
418
|
+
default=None,
|
|
419
|
+
ge=100,
|
|
420
|
+
description="Sample size for large datasets",
|
|
421
|
+
)
|
|
422
|
+
|
|
423
|
+
|
|
424
|
+
class AlgorithmComparisonResultItem(BaseSchema):
|
|
425
|
+
"""Single algorithm result within a comparison."""
|
|
426
|
+
|
|
427
|
+
algorithm: AnomalyAlgorithm = Field(..., description="Algorithm name")
|
|
428
|
+
display_name: str = Field(..., description="Human-readable algorithm name")
|
|
429
|
+
status: AnomalyStatus = Field(..., description="Execution status")
|
|
430
|
+
anomaly_count: int | None = Field(default=None, description="Number of anomalies found")
|
|
431
|
+
anomaly_rate: float | None = Field(default=None, description="Rate of anomalies (0-1)")
|
|
432
|
+
duration_ms: int | None = Field(default=None, description="Execution time in ms")
|
|
433
|
+
error_message: str | None = Field(default=None, description="Error message if failed")
|
|
434
|
+
anomaly_indices: list[int] = Field(
|
|
435
|
+
default_factory=list,
|
|
436
|
+
description="Row indices flagged as anomalies",
|
|
437
|
+
)
|
|
438
|
+
|
|
439
|
+
|
|
440
|
+
class AgreementLevel(str, Enum):
|
|
441
|
+
"""Level of agreement among algorithms."""
|
|
442
|
+
|
|
443
|
+
ALL = "all"
|
|
444
|
+
MAJORITY = "majority"
|
|
445
|
+
SOME = "some"
|
|
446
|
+
ONE = "one"
|
|
447
|
+
|
|
448
|
+
|
|
449
|
+
class AgreementRecord(BaseSchema):
|
|
450
|
+
"""A row with its agreement information across algorithms."""
|
|
451
|
+
|
|
452
|
+
row_index: int = Field(..., description="Row index in the dataset")
|
|
453
|
+
detected_by: list[AnomalyAlgorithm] = Field(
|
|
454
|
+
...,
|
|
455
|
+
description="Algorithms that flagged this row as anomaly",
|
|
456
|
+
)
|
|
457
|
+
detection_count: int = Field(..., description="Number of algorithms that detected this row")
|
|
458
|
+
agreement_level: AgreementLevel = Field(..., description="Level of agreement")
|
|
459
|
+
confidence_score: float = Field(
|
|
460
|
+
...,
|
|
461
|
+
ge=0.0,
|
|
462
|
+
le=1.0,
|
|
463
|
+
description="Confidence score based on agreement (0-1)",
|
|
464
|
+
)
|
|
465
|
+
column_values: dict[str, Any] = Field(
|
|
466
|
+
default_factory=dict,
|
|
467
|
+
description="Values of analyzed columns for this row",
|
|
468
|
+
)
|
|
469
|
+
|
|
470
|
+
|
|
471
|
+
class AgreementSummary(BaseSchema):
|
|
472
|
+
"""Summary of algorithm agreement."""
|
|
473
|
+
|
|
474
|
+
total_algorithms: int = Field(..., description="Number of algorithms compared")
|
|
475
|
+
total_unique_anomalies: int = Field(
|
|
476
|
+
...,
|
|
477
|
+
description="Total unique rows flagged by at least one algorithm",
|
|
478
|
+
)
|
|
479
|
+
all_agree_count: int = Field(
|
|
480
|
+
...,
|
|
481
|
+
description="Rows flagged by all algorithms",
|
|
482
|
+
)
|
|
483
|
+
majority_agree_count: int = Field(
|
|
484
|
+
...,
|
|
485
|
+
description="Rows flagged by majority (>50%) of algorithms",
|
|
486
|
+
)
|
|
487
|
+
some_agree_count: int = Field(
|
|
488
|
+
...,
|
|
489
|
+
description="Rows flagged by at least 2 algorithms",
|
|
490
|
+
)
|
|
491
|
+
one_only_count: int = Field(
|
|
492
|
+
...,
|
|
493
|
+
description="Rows flagged by only 1 algorithm",
|
|
494
|
+
)
|
|
495
|
+
agreement_matrix: list[list[int]] = Field(
|
|
496
|
+
default_factory=list,
|
|
497
|
+
description="Pairwise overlap matrix between algorithms",
|
|
498
|
+
)
|
|
499
|
+
|
|
500
|
+
|
|
501
|
+
class AlgorithmComparisonResult(IDMixin, BaseSchema):
|
|
502
|
+
"""Response for algorithm comparison results."""
|
|
503
|
+
|
|
504
|
+
source_id: str = Field(..., description="Source ID that was analyzed")
|
|
505
|
+
status: AnomalyStatus = Field(..., description="Overall comparison status")
|
|
506
|
+
total_rows: int | None = Field(default=None, description="Total rows analyzed")
|
|
507
|
+
columns_analyzed: list[str] | None = Field(
|
|
508
|
+
default=None,
|
|
509
|
+
description="Columns that were analyzed",
|
|
510
|
+
)
|
|
511
|
+
|
|
512
|
+
# Individual algorithm results
|
|
513
|
+
algorithm_results: list[AlgorithmComparisonResultItem] = Field(
|
|
514
|
+
default_factory=list,
|
|
515
|
+
description="Results from each algorithm",
|
|
516
|
+
)
|
|
517
|
+
|
|
518
|
+
# Agreement analysis
|
|
519
|
+
agreement_summary: AgreementSummary | None = Field(
|
|
520
|
+
default=None,
|
|
521
|
+
description="Summary of agreement between algorithms",
|
|
522
|
+
)
|
|
523
|
+
agreement_records: list[AgreementRecord] | None = Field(
|
|
524
|
+
default=None,
|
|
525
|
+
description="Records with their agreement information (limited to top 100)",
|
|
526
|
+
)
|
|
527
|
+
|
|
528
|
+
# Timing
|
|
529
|
+
total_duration_ms: int | None = Field(
|
|
530
|
+
default=None,
|
|
531
|
+
description="Total execution time in ms",
|
|
532
|
+
)
|
|
533
|
+
error_message: str | None = Field(default=None, description="Error message if failed")
|
|
534
|
+
|
|
535
|
+
# Timestamps
|
|
536
|
+
created_at: str = Field(..., description="When comparison was created")
|
|
537
|
+
completed_at: str | None = Field(default=None, description="When comparison completed")
|
|
538
|
+
|
|
539
|
+
|
|
540
|
+
class AlgorithmComparisonListResponse(ListResponseWrapper[AlgorithmComparisonResult]):
|
|
541
|
+
"""Paginated algorithm comparison list response."""
|
|
542
|
+
|
|
543
|
+
pass
|
|
544
|
+
|
|
545
|
+
|
|
546
|
+
# =============================================================================
|
|
547
|
+
# Algorithm Info Schemas
|
|
548
|
+
# =============================================================================
|
|
549
|
+
|
|
550
|
+
|
|
551
|
+
class AlgorithmParameter(BaseSchema):
|
|
552
|
+
"""Parameter definition for an algorithm."""
|
|
553
|
+
|
|
554
|
+
name: str = Field(..., description="Parameter name")
|
|
555
|
+
label: str = Field(..., description="Display label")
|
|
556
|
+
type: Literal["integer", "float", "string", "select", "boolean"] = Field(
|
|
557
|
+
...,
|
|
558
|
+
description="Parameter type",
|
|
559
|
+
)
|
|
560
|
+
default: Any = Field(..., description="Default value")
|
|
561
|
+
min_value: float | None = Field(default=None, description="Minimum value (for numeric)")
|
|
562
|
+
max_value: float | None = Field(default=None, description="Maximum value (for numeric)")
|
|
563
|
+
options: list[str] | None = Field(default=None, description="Options for select type")
|
|
564
|
+
description: str = Field(..., description="Parameter description")
|
|
565
|
+
|
|
566
|
+
|
|
567
|
+
class AlgorithmInfo(BaseSchema):
|
|
568
|
+
"""Information about an anomaly detection algorithm."""
|
|
569
|
+
|
|
570
|
+
name: AnomalyAlgorithm = Field(..., description="Algorithm identifier")
|
|
571
|
+
display_name: str = Field(..., description="Human-readable name")
|
|
572
|
+
description: str = Field(..., description="Algorithm description")
|
|
573
|
+
category: AlgorithmCategory = Field(..., description="Algorithm category")
|
|
574
|
+
parameters: list[AlgorithmParameter] = Field(
|
|
575
|
+
default_factory=list,
|
|
576
|
+
description="Configurable parameters",
|
|
577
|
+
)
|
|
578
|
+
pros: list[str] = Field(default_factory=list, description="Algorithm advantages")
|
|
579
|
+
cons: list[str] = Field(default_factory=list, description="Algorithm limitations")
|
|
580
|
+
best_for: str = Field(..., description="Best use case description")
|
|
581
|
+
requires_scaling: bool = Field(
|
|
582
|
+
default=True,
|
|
583
|
+
description="Whether data scaling is recommended",
|
|
584
|
+
)
|
|
585
|
+
|
|
586
|
+
|
|
587
|
+
class AlgorithmListResponse(BaseSchema):
|
|
588
|
+
"""Response containing all available algorithms."""
|
|
589
|
+
|
|
590
|
+
algorithms: list[AlgorithmInfo] = Field(
|
|
591
|
+
...,
|
|
592
|
+
description="Available anomaly detection algorithms",
|
|
593
|
+
)
|
|
594
|
+
total: int = Field(..., description="Total number of algorithms")
|
|
595
|
+
|
|
596
|
+
|
|
597
|
+
# =============================================================================
|
|
598
|
+
# Helper function to get algorithm info
|
|
599
|
+
# =============================================================================
|
|
600
|
+
|
|
601
|
+
|
|
602
|
+
def get_algorithm_info_list() -> list[AlgorithmInfo]:
|
|
603
|
+
"""Get information about all supported algorithms."""
|
|
604
|
+
return [
|
|
605
|
+
AlgorithmInfo(
|
|
606
|
+
name=AnomalyAlgorithm.ISOLATION_FOREST,
|
|
607
|
+
display_name="Isolation Forest",
|
|
608
|
+
description="Tree-based algorithm that isolates anomalies by random partitioning. "
|
|
609
|
+
"Works by building trees that isolate observations - anomalies require fewer "
|
|
610
|
+
"partitions to isolate.",
|
|
611
|
+
category="tree",
|
|
612
|
+
parameters=[
|
|
613
|
+
AlgorithmParameter(
|
|
614
|
+
name="n_estimators",
|
|
615
|
+
label="Number of Trees",
|
|
616
|
+
type="integer",
|
|
617
|
+
default=100,
|
|
618
|
+
min_value=10,
|
|
619
|
+
max_value=500,
|
|
620
|
+
description="Number of isolation trees in the ensemble",
|
|
621
|
+
),
|
|
622
|
+
AlgorithmParameter(
|
|
623
|
+
name="contamination",
|
|
624
|
+
label="Contamination",
|
|
625
|
+
type="float",
|
|
626
|
+
default=0.1,
|
|
627
|
+
min_value=0.01,
|
|
628
|
+
max_value=0.5,
|
|
629
|
+
description="Expected proportion of anomalies in the dataset",
|
|
630
|
+
),
|
|
631
|
+
],
|
|
632
|
+
pros=["Fast training and prediction", "Scales well to large datasets", "No distribution assumptions"],
|
|
633
|
+
cons=["May miss clustered anomalies", "Sensitive to contamination parameter"],
|
|
634
|
+
best_for="Large datasets with global anomalies, high-dimensional data",
|
|
635
|
+
requires_scaling=False,
|
|
636
|
+
),
|
|
637
|
+
AlgorithmInfo(
|
|
638
|
+
name=AnomalyAlgorithm.LOF,
|
|
639
|
+
display_name="Local Outlier Factor",
|
|
640
|
+
description="Density-based algorithm that compares local density of a point with "
|
|
641
|
+
"its neighbors. Points with substantially lower density are considered outliers.",
|
|
642
|
+
category="density",
|
|
643
|
+
parameters=[
|
|
644
|
+
AlgorithmParameter(
|
|
645
|
+
name="n_neighbors",
|
|
646
|
+
label="Number of Neighbors",
|
|
647
|
+
type="integer",
|
|
648
|
+
default=20,
|
|
649
|
+
min_value=5,
|
|
650
|
+
max_value=100,
|
|
651
|
+
description="Number of neighbors for local density estimation",
|
|
652
|
+
),
|
|
653
|
+
AlgorithmParameter(
|
|
654
|
+
name="contamination",
|
|
655
|
+
label="Contamination",
|
|
656
|
+
type="float",
|
|
657
|
+
default=0.1,
|
|
658
|
+
min_value=0.01,
|
|
659
|
+
max_value=0.5,
|
|
660
|
+
description="Expected proportion of anomalies",
|
|
661
|
+
),
|
|
662
|
+
],
|
|
663
|
+
pros=["Detects local anomalies", "Works well with varying densities", "Intuitive interpretation"],
|
|
664
|
+
cons=["Computationally expensive for large datasets", "Sensitive to n_neighbors"],
|
|
665
|
+
best_for="Datasets with varying cluster densities, local outlier detection",
|
|
666
|
+
requires_scaling=True,
|
|
667
|
+
),
|
|
668
|
+
AlgorithmInfo(
|
|
669
|
+
name=AnomalyAlgorithm.ONE_CLASS_SVM,
|
|
670
|
+
display_name="One-Class SVM",
|
|
671
|
+
description="Support Vector Machine trained only on normal data. Creates a "
|
|
672
|
+
"decision boundary around normal observations, flagging points outside as anomalies.",
|
|
673
|
+
category="svm",
|
|
674
|
+
parameters=[
|
|
675
|
+
AlgorithmParameter(
|
|
676
|
+
name="kernel",
|
|
677
|
+
label="Kernel",
|
|
678
|
+
type="select",
|
|
679
|
+
default="rbf",
|
|
680
|
+
options=["rbf", "linear", "poly", "sigmoid"],
|
|
681
|
+
description="Kernel function for the SVM",
|
|
682
|
+
),
|
|
683
|
+
AlgorithmParameter(
|
|
684
|
+
name="nu",
|
|
685
|
+
label="Nu",
|
|
686
|
+
type="float",
|
|
687
|
+
default=0.1,
|
|
688
|
+
min_value=0.01,
|
|
689
|
+
max_value=0.5,
|
|
690
|
+
description="Upper bound on fraction of training errors",
|
|
691
|
+
),
|
|
692
|
+
],
|
|
693
|
+
pros=["Effective in high dimensions", "Flexible via kernel choice", "Memory efficient"],
|
|
694
|
+
cons=["Slow for large datasets", "Sensitive to kernel and parameters"],
|
|
695
|
+
best_for="High-dimensional data, when data fits in memory",
|
|
696
|
+
requires_scaling=True,
|
|
697
|
+
),
|
|
698
|
+
AlgorithmInfo(
|
|
699
|
+
name=AnomalyAlgorithm.DBSCAN,
|
|
700
|
+
display_name="DBSCAN",
|
|
701
|
+
description="Density-based clustering that identifies points not belonging to any "
|
|
702
|
+
"cluster as anomalies. Works by grouping points that are close together.",
|
|
703
|
+
category="clustering",
|
|
704
|
+
parameters=[
|
|
705
|
+
AlgorithmParameter(
|
|
706
|
+
name="eps",
|
|
707
|
+
label="Epsilon (eps)",
|
|
708
|
+
type="float",
|
|
709
|
+
default=0.5,
|
|
710
|
+
min_value=0.01,
|
|
711
|
+
max_value=10.0,
|
|
712
|
+
description="Maximum distance between two samples in a cluster",
|
|
713
|
+
),
|
|
714
|
+
AlgorithmParameter(
|
|
715
|
+
name="min_samples",
|
|
716
|
+
label="Minimum Samples",
|
|
717
|
+
type="integer",
|
|
718
|
+
default=5,
|
|
719
|
+
min_value=2,
|
|
720
|
+
max_value=50,
|
|
721
|
+
description="Minimum samples in a core neighborhood",
|
|
722
|
+
),
|
|
723
|
+
],
|
|
724
|
+
pros=["No contamination parameter needed", "Finds arbitrarily shaped clusters", "Robust to noise"],
|
|
725
|
+
cons=["Sensitive to eps parameter", "Struggles with varying densities"],
|
|
726
|
+
best_for="Datasets with clear cluster structure, spatial data",
|
|
727
|
+
requires_scaling=True,
|
|
728
|
+
),
|
|
729
|
+
AlgorithmInfo(
|
|
730
|
+
name=AnomalyAlgorithm.STATISTICAL,
|
|
731
|
+
display_name="Statistical",
|
|
732
|
+
description="Traditional statistical methods including Z-score (standard deviations "
|
|
733
|
+
"from mean), IQR (interquartile range), and MAD (median absolute deviation).",
|
|
734
|
+
category="statistical",
|
|
735
|
+
parameters=[
|
|
736
|
+
AlgorithmParameter(
|
|
737
|
+
name="method",
|
|
738
|
+
label="Method",
|
|
739
|
+
type="select",
|
|
740
|
+
default="zscore",
|
|
741
|
+
options=["zscore", "iqr", "mad"],
|
|
742
|
+
description="Statistical method for detection",
|
|
743
|
+
),
|
|
744
|
+
AlgorithmParameter(
|
|
745
|
+
name="threshold",
|
|
746
|
+
label="Threshold",
|
|
747
|
+
type="float",
|
|
748
|
+
default=3.0,
|
|
749
|
+
min_value=1.0,
|
|
750
|
+
max_value=5.0,
|
|
751
|
+
description="Number of standard deviations/IQR multiplier",
|
|
752
|
+
),
|
|
753
|
+
],
|
|
754
|
+
pros=["Simple and interpretable", "Fast computation", "Works on univariate data"],
|
|
755
|
+
cons=["Assumes normal distribution (for z-score)", "May miss complex anomalies"],
|
|
756
|
+
best_for="Univariate data, quick analysis, interpretable results",
|
|
757
|
+
requires_scaling=False,
|
|
758
|
+
),
|
|
759
|
+
AlgorithmInfo(
|
|
760
|
+
name=AnomalyAlgorithm.AUTOENCODER,
|
|
761
|
+
display_name="Autoencoder",
|
|
762
|
+
description="Neural network that learns to compress and reconstruct data. "
|
|
763
|
+
"Anomalies have high reconstruction error as they differ from normal patterns.",
|
|
764
|
+
category="neural",
|
|
765
|
+
parameters=[
|
|
766
|
+
AlgorithmParameter(
|
|
767
|
+
name="encoding_dim",
|
|
768
|
+
label="Encoding Dimension",
|
|
769
|
+
type="integer",
|
|
770
|
+
default=32,
|
|
771
|
+
min_value=8,
|
|
772
|
+
max_value=256,
|
|
773
|
+
description="Dimension of the encoding (bottleneck) layer",
|
|
774
|
+
),
|
|
775
|
+
AlgorithmParameter(
|
|
776
|
+
name="epochs",
|
|
777
|
+
label="Training Epochs",
|
|
778
|
+
type="integer",
|
|
779
|
+
default=50,
|
|
780
|
+
min_value=10,
|
|
781
|
+
max_value=200,
|
|
782
|
+
description="Number of training epochs",
|
|
783
|
+
),
|
|
784
|
+
AlgorithmParameter(
|
|
785
|
+
name="threshold_percentile",
|
|
786
|
+
label="Threshold Percentile",
|
|
787
|
+
type="float",
|
|
788
|
+
default=95,
|
|
789
|
+
min_value=90,
|
|
790
|
+
max_value=99,
|
|
791
|
+
description="Percentile of reconstruction error for anomaly threshold",
|
|
792
|
+
),
|
|
793
|
+
],
|
|
794
|
+
pros=["Captures complex patterns", "Learns data representation", "Works with high dimensions"],
|
|
795
|
+
cons=["Requires more data", "Computationally expensive", "Black box"],
|
|
796
|
+
best_for="Complex patterns, large datasets, multivariate anomalies",
|
|
797
|
+
requires_scaling=True,
|
|
798
|
+
),
|
|
799
|
+
]
|
|
800
|
+
|
|
801
|
+
|
|
802
|
+
# =============================================================================
|
|
803
|
+
# Explainability Schemas
|
|
804
|
+
# =============================================================================
|
|
805
|
+
|
|
806
|
+
|
|
807
|
+
class FeatureContribution(BaseSchema):
|
|
808
|
+
"""Feature contribution to anomaly score (SHAP value)."""
|
|
809
|
+
|
|
810
|
+
feature: str = Field(..., description="Feature/column name")
|
|
811
|
+
value: float = Field(..., description="Actual feature value for this row")
|
|
812
|
+
shap_value: float = Field(
|
|
813
|
+
...,
|
|
814
|
+
description="SHAP value indicating contribution direction and magnitude",
|
|
815
|
+
)
|
|
816
|
+
contribution: float = Field(
|
|
817
|
+
...,
|
|
818
|
+
ge=0,
|
|
819
|
+
description="Absolute contribution (|shap_value|)",
|
|
820
|
+
)
|
|
821
|
+
|
|
822
|
+
|
|
823
|
+
class AnomalyExplanationResult(BaseSchema):
|
|
824
|
+
"""Explanation for a single anomalous row."""
|
|
825
|
+
|
|
826
|
+
row_index: int = Field(..., description="Row index in the dataset")
|
|
827
|
+
anomaly_score: float = Field(
|
|
828
|
+
...,
|
|
829
|
+
ge=0,
|
|
830
|
+
le=1,
|
|
831
|
+
description="Anomaly score for this row",
|
|
832
|
+
)
|
|
833
|
+
feature_contributions: list[FeatureContribution] = Field(
|
|
834
|
+
...,
|
|
835
|
+
description="Feature contributions sorted by importance",
|
|
836
|
+
)
|
|
837
|
+
total_shap: float = Field(
|
|
838
|
+
...,
|
|
839
|
+
description="Sum of all SHAP values for this row",
|
|
840
|
+
)
|
|
841
|
+
summary: str = Field(
|
|
842
|
+
...,
|
|
843
|
+
description="Human-readable summary of why this row is anomalous",
|
|
844
|
+
)
|
|
845
|
+
|
|
846
|
+
|
|
847
|
+
class ExplainabilityRequest(BaseSchema):
|
|
848
|
+
"""Request to generate explanations for anomalies."""
|
|
849
|
+
|
|
850
|
+
row_indices: list[int] = Field(
|
|
851
|
+
...,
|
|
852
|
+
min_length=1,
|
|
853
|
+
max_length=100,
|
|
854
|
+
description="Row indices to explain (max 100)",
|
|
855
|
+
)
|
|
856
|
+
max_features: int = Field(
|
|
857
|
+
default=10,
|
|
858
|
+
ge=1,
|
|
859
|
+
le=50,
|
|
860
|
+
description="Maximum number of features to include in explanation",
|
|
861
|
+
)
|
|
862
|
+
sample_background: int = Field(
|
|
863
|
+
default=100,
|
|
864
|
+
ge=10,
|
|
865
|
+
le=500,
|
|
866
|
+
description="Number of background samples for SHAP KernelExplainer",
|
|
867
|
+
)
|
|
868
|
+
|
|
869
|
+
|
|
870
|
+
class ExplainabilityResponse(BaseSchema):
|
|
871
|
+
"""Response containing anomaly explanations."""
|
|
872
|
+
|
|
873
|
+
detection_id: str = Field(..., description="ID of the anomaly detection run")
|
|
874
|
+
algorithm: str = Field(..., description="Detection algorithm used")
|
|
875
|
+
row_indices: list[int] = Field(..., description="Row indices that were explained")
|
|
876
|
+
feature_names: list[str] = Field(..., description="Feature/column names analyzed")
|
|
877
|
+
explanations: list[AnomalyExplanationResult] = Field(
|
|
878
|
+
...,
|
|
879
|
+
description="Explanations for each requested row",
|
|
880
|
+
)
|
|
881
|
+
generated_at: str = Field(..., description="When explanations were generated")
|
|
882
|
+
error: str | None = Field(default=None, description="Error message if generation failed")
|
|
883
|
+
|
|
884
|
+
|
|
885
|
+
class CachedExplanationResponse(IDMixin, BaseSchema):
|
|
886
|
+
"""Response for a cached explanation from database."""
|
|
887
|
+
|
|
888
|
+
detection_id: str = Field(..., description="ID of the anomaly detection run")
|
|
889
|
+
row_index: int = Field(..., description="Row index in the dataset")
|
|
890
|
+
anomaly_score: float = Field(..., description="Anomaly score for this row")
|
|
891
|
+
feature_contributions: list[FeatureContribution] = Field(
|
|
892
|
+
...,
|
|
893
|
+
description="Feature contributions",
|
|
894
|
+
)
|
|
895
|
+
total_shap: float = Field(..., description="Sum of all SHAP values")
|
|
896
|
+
summary: str = Field(..., description="Human-readable explanation summary")
|
|
897
|
+
generated_at: str | None = Field(
|
|
898
|
+
default=None,
|
|
899
|
+
description="When this explanation was generated",
|
|
900
|
+
)
|
|
901
|
+
|
|
902
|
+
|
|
903
|
+
class CachedExplanationsListResponse(BaseSchema):
|
|
904
|
+
"""Response containing list of cached explanations."""
|
|
905
|
+
|
|
906
|
+
detection_id: str = Field(..., description="ID of the anomaly detection run")
|
|
907
|
+
explanations: list[CachedExplanationResponse] = Field(
|
|
908
|
+
...,
|
|
909
|
+
description="Cached explanations",
|
|
910
|
+
)
|
|
911
|
+
total: int = Field(..., description="Total number of explanations")
|
|
912
|
+
|
|
913
|
+
|
|
914
|
+
# =============================================================================
|
|
915
|
+
# Streaming Anomaly Detection Schemas
|
|
916
|
+
# =============================================================================
|
|
917
|
+
|
|
918
|
+
|
|
919
|
+
class StreamingAlgorithm(str, Enum):
|
|
920
|
+
"""Supported streaming anomaly detection algorithms."""
|
|
921
|
+
|
|
922
|
+
ZSCORE_ROLLING = "zscore_rolling"
|
|
923
|
+
EXPONENTIAL_MOVING_AVERAGE = "ema"
|
|
924
|
+
ISOLATION_FOREST_INCREMENTAL = "isolation_forest_incremental"
|
|
925
|
+
HALF_SPACE_TREES = "half_space_trees"
|
|
926
|
+
ROBUST_RANDOM_CUT_FOREST = "rrcf"
|
|
927
|
+
|
|
928
|
+
|
|
929
|
+
class StreamingSessionStatus(str, Enum):
|
|
930
|
+
"""Status of a streaming session."""
|
|
931
|
+
|
|
932
|
+
CREATED = "created"
|
|
933
|
+
RUNNING = "running"
|
|
934
|
+
PAUSED = "paused"
|
|
935
|
+
STOPPED = "stopped"
|
|
936
|
+
ERROR = "error"
|
|
937
|
+
|
|
938
|
+
|
|
939
|
+
class StreamingSessionCreate(BaseSchema):
|
|
940
|
+
"""Request to create a streaming session."""
|
|
941
|
+
|
|
942
|
+
source_id: str | None = Field(
|
|
943
|
+
default=None,
|
|
944
|
+
description="Optional source ID to associate with",
|
|
945
|
+
)
|
|
946
|
+
algorithm: StreamingAlgorithm = Field(
|
|
947
|
+
default=StreamingAlgorithm.ZSCORE_ROLLING,
|
|
948
|
+
description="Streaming detection algorithm",
|
|
949
|
+
)
|
|
950
|
+
window_size: int = Field(
|
|
951
|
+
default=100,
|
|
952
|
+
ge=10,
|
|
953
|
+
le=10000,
|
|
954
|
+
description="Size of the sliding window",
|
|
955
|
+
)
|
|
956
|
+
threshold: float = Field(
|
|
957
|
+
default=3.0,
|
|
958
|
+
ge=1.0,
|
|
959
|
+
le=10.0,
|
|
960
|
+
description="Anomaly detection threshold",
|
|
961
|
+
)
|
|
962
|
+
columns: list[str] | None = Field(
|
|
963
|
+
default=None,
|
|
964
|
+
description="Columns to monitor (None = all numeric)",
|
|
965
|
+
)
|
|
966
|
+
config: dict[str, Any] | None = Field(
|
|
967
|
+
default=None,
|
|
968
|
+
description="Additional algorithm configuration",
|
|
969
|
+
)
|
|
970
|
+
|
|
971
|
+
|
|
972
|
+
class StreamingStatistics(BaseSchema):
|
|
973
|
+
"""Rolling statistics for a column."""
|
|
974
|
+
|
|
975
|
+
count: int = Field(..., description="Number of data points")
|
|
976
|
+
mean: float = Field(..., description="Rolling mean")
|
|
977
|
+
std: float = Field(..., description="Rolling standard deviation")
|
|
978
|
+
min: float | None = Field(default=None, description="Minimum value")
|
|
979
|
+
max: float | None = Field(default=None, description="Maximum value")
|
|
980
|
+
anomaly_count: int = Field(..., description="Number of anomalies detected")
|
|
981
|
+
anomaly_rate: float = Field(..., description="Rate of anomalies (0-1)")
|
|
982
|
+
|
|
983
|
+
|
|
984
|
+
class StreamingSessionResponse(IDMixin, BaseSchema):
|
|
985
|
+
"""Response for a streaming session."""
|
|
986
|
+
|
|
987
|
+
source_id: str | None = Field(default=None, description="Associated source ID")
|
|
988
|
+
algorithm: StreamingAlgorithm = Field(..., description="Detection algorithm")
|
|
989
|
+
window_size: int = Field(..., description="Sliding window size")
|
|
990
|
+
threshold: float = Field(..., description="Detection threshold")
|
|
991
|
+
columns: list[str] = Field(..., description="Columns being monitored")
|
|
992
|
+
status: StreamingSessionStatus = Field(..., description="Session status")
|
|
993
|
+
config: dict[str, Any] | None = Field(default=None, description="Algorithm config")
|
|
994
|
+
statistics: dict[str, StreamingStatistics] | None = Field(
|
|
995
|
+
default=None,
|
|
996
|
+
description="Per-column statistics",
|
|
997
|
+
)
|
|
998
|
+
total_points: int = Field(default=0, description="Total data points processed")
|
|
999
|
+
total_alerts: int = Field(default=0, description="Total alerts generated")
|
|
1000
|
+
created_at: str = Field(..., description="When session was created")
|
|
1001
|
+
started_at: str | None = Field(default=None, description="When session started")
|
|
1002
|
+
stopped_at: str | None = Field(default=None, description="When session stopped")
|
|
1003
|
+
|
|
1004
|
+
|
|
1005
|
+
class StreamingSessionListResponse(ListResponseWrapper[StreamingSessionResponse]):
|
|
1006
|
+
"""Paginated streaming session list response."""
|
|
1007
|
+
|
|
1008
|
+
pass
|
|
1009
|
+
|
|
1010
|
+
|
|
1011
|
+
class StreamingDataPoint(BaseSchema):
|
|
1012
|
+
"""A single data point to push to streaming session."""
|
|
1013
|
+
|
|
1014
|
+
data: dict[str, Any] = Field(..., description="Column name to value mapping")
|
|
1015
|
+
timestamp: str | None = Field(
|
|
1016
|
+
default=None,
|
|
1017
|
+
description="ISO format timestamp (defaults to now)",
|
|
1018
|
+
)
|
|
1019
|
+
|
|
1020
|
+
|
|
1021
|
+
class StreamingDataBatch(BaseSchema):
|
|
1022
|
+
"""A batch of data points to push."""
|
|
1023
|
+
|
|
1024
|
+
data_points: list[StreamingDataPoint] = Field(
|
|
1025
|
+
...,
|
|
1026
|
+
min_length=1,
|
|
1027
|
+
max_length=1000,
|
|
1028
|
+
description="List of data points",
|
|
1029
|
+
)
|
|
1030
|
+
|
|
1031
|
+
|
|
1032
|
+
class StreamingAlert(BaseSchema):
|
|
1033
|
+
"""An anomaly alert from streaming detection."""
|
|
1034
|
+
|
|
1035
|
+
id: str = Field(..., description="Alert unique identifier")
|
|
1036
|
+
session_id: str = Field(..., description="Session that generated this alert")
|
|
1037
|
+
timestamp: str = Field(..., description="When anomaly was detected")
|
|
1038
|
+
data_point: dict[str, Any] = Field(..., description="The data point that triggered alert")
|
|
1039
|
+
anomaly_score: float = Field(..., description="Anomaly score (higher = more anomalous)")
|
|
1040
|
+
is_anomaly: bool = Field(..., description="Whether classified as anomaly")
|
|
1041
|
+
algorithm: StreamingAlgorithm = Field(..., description="Algorithm that detected it")
|
|
1042
|
+
details: dict[str, Any] = Field(
|
|
1043
|
+
default_factory=dict,
|
|
1044
|
+
description="Additional detection details",
|
|
1045
|
+
)
|
|
1046
|
+
|
|
1047
|
+
|
|
1048
|
+
class StreamingAlertListResponse(ListResponseWrapper[StreamingAlert]):
|
|
1049
|
+
"""Paginated streaming alert list response."""
|
|
1050
|
+
|
|
1051
|
+
pass
|
|
1052
|
+
|
|
1053
|
+
|
|
1054
|
+
class StreamingStatusResponse(BaseSchema):
|
|
1055
|
+
"""Status response for a streaming session."""
|
|
1056
|
+
|
|
1057
|
+
session_id: str = Field(..., description="Session ID")
|
|
1058
|
+
status: StreamingSessionStatus = Field(..., description="Current status")
|
|
1059
|
+
total_points: int = Field(..., description="Total data points processed")
|
|
1060
|
+
total_alerts: int = Field(..., description="Total alerts generated")
|
|
1061
|
+
buffer_utilization: float = Field(
|
|
1062
|
+
...,
|
|
1063
|
+
ge=0,
|
|
1064
|
+
le=1,
|
|
1065
|
+
description="Buffer utilization (0-1)",
|
|
1066
|
+
)
|
|
1067
|
+
statistics: dict[str, StreamingStatistics] = Field(
|
|
1068
|
+
...,
|
|
1069
|
+
description="Per-column statistics",
|
|
1070
|
+
)
|
|
1071
|
+
recent_alerts: list[StreamingAlert] = Field(
|
|
1072
|
+
default_factory=list,
|
|
1073
|
+
description="Recent alerts (last 10)",
|
|
1074
|
+
)
|
|
1075
|
+
|
|
1076
|
+
|
|
1077
|
+
class StreamingRecentDataResponse(BaseSchema):
|
|
1078
|
+
"""Response containing recent data points."""
|
|
1079
|
+
|
|
1080
|
+
session_id: str = Field(..., description="Session ID")
|
|
1081
|
+
data_points: list[dict[str, Any]] = Field(
|
|
1082
|
+
...,
|
|
1083
|
+
description="Recent data points with timestamps",
|
|
1084
|
+
)
|
|
1085
|
+
total: int = Field(..., description="Total points in response")
|
|
1086
|
+
|
|
1087
|
+
|
|
1088
|
+
class StreamingAlgorithmInfo(BaseSchema):
|
|
1089
|
+
"""Information about a streaming algorithm."""
|
|
1090
|
+
|
|
1091
|
+
name: StreamingAlgorithm = Field(..., description="Algorithm identifier")
|
|
1092
|
+
display_name: str = Field(..., description="Human-readable name")
|
|
1093
|
+
description: str = Field(..., description="Algorithm description")
|
|
1094
|
+
supports_online_learning: bool = Field(
|
|
1095
|
+
...,
|
|
1096
|
+
description="Whether algorithm supports online model updates",
|
|
1097
|
+
)
|
|
1098
|
+
parameters: list[AlgorithmParameter] = Field(
|
|
1099
|
+
default_factory=list,
|
|
1100
|
+
description="Configurable parameters",
|
|
1101
|
+
)
|
|
1102
|
+
best_for: str = Field(..., description="Best use case description")
|
|
1103
|
+
|
|
1104
|
+
|
|
1105
|
+
class StreamingAlgorithmListResponse(BaseSchema):
|
|
1106
|
+
"""Response containing streaming algorithms."""
|
|
1107
|
+
|
|
1108
|
+
algorithms: list[StreamingAlgorithmInfo] = Field(
|
|
1109
|
+
...,
|
|
1110
|
+
description="Available streaming algorithms",
|
|
1111
|
+
)
|
|
1112
|
+
total: int = Field(..., description="Total number of algorithms")
|
|
1113
|
+
|
|
1114
|
+
|
|
1115
|
+
def get_streaming_algorithm_info_list() -> list[StreamingAlgorithmInfo]:
|
|
1116
|
+
"""Get information about all supported streaming algorithms."""
|
|
1117
|
+
return [
|
|
1118
|
+
StreamingAlgorithmInfo(
|
|
1119
|
+
name=StreamingAlgorithm.ZSCORE_ROLLING,
|
|
1120
|
+
display_name="Rolling Z-Score",
|
|
1121
|
+
description="Detects anomalies based on rolling z-scores computed over a sliding window. "
|
|
1122
|
+
"Simple and effective for stationary data streams.",
|
|
1123
|
+
supports_online_learning=True,
|
|
1124
|
+
parameters=[
|
|
1125
|
+
AlgorithmParameter(
|
|
1126
|
+
name="window_size",
|
|
1127
|
+
label="Window Size",
|
|
1128
|
+
type="integer",
|
|
1129
|
+
default=100,
|
|
1130
|
+
min_value=10,
|
|
1131
|
+
max_value=10000,
|
|
1132
|
+
description="Number of recent points to use for statistics",
|
|
1133
|
+
),
|
|
1134
|
+
AlgorithmParameter(
|
|
1135
|
+
name="threshold",
|
|
1136
|
+
label="Z-Score Threshold",
|
|
1137
|
+
type="float",
|
|
1138
|
+
default=3.0,
|
|
1139
|
+
min_value=1.0,
|
|
1140
|
+
max_value=5.0,
|
|
1141
|
+
description="Number of standard deviations for anomaly threshold",
|
|
1142
|
+
),
|
|
1143
|
+
],
|
|
1144
|
+
best_for="Simple time series with stationary patterns, quick setup",
|
|
1145
|
+
),
|
|
1146
|
+
StreamingAlgorithmInfo(
|
|
1147
|
+
name=StreamingAlgorithm.EXPONENTIAL_MOVING_AVERAGE,
|
|
1148
|
+
display_name="Exponential Moving Average",
|
|
1149
|
+
description="Uses exponentially weighted moving average to track trends and detect "
|
|
1150
|
+
"deviations. Adapts quickly to recent changes in the data.",
|
|
1151
|
+
supports_online_learning=True,
|
|
1152
|
+
parameters=[
|
|
1153
|
+
AlgorithmParameter(
|
|
1154
|
+
name="alpha",
|
|
1155
|
+
label="Smoothing Factor (Alpha)",
|
|
1156
|
+
type="float",
|
|
1157
|
+
default=0.1,
|
|
1158
|
+
min_value=0.01,
|
|
1159
|
+
max_value=0.5,
|
|
1160
|
+
description="Weight for recent observations (higher = more responsive)",
|
|
1161
|
+
),
|
|
1162
|
+
AlgorithmParameter(
|
|
1163
|
+
name="threshold_multiplier",
|
|
1164
|
+
label="Threshold Multiplier",
|
|
1165
|
+
type="float",
|
|
1166
|
+
default=2.0,
|
|
1167
|
+
min_value=1.0,
|
|
1168
|
+
max_value=5.0,
|
|
1169
|
+
description="Multiplier for deviation threshold",
|
|
1170
|
+
),
|
|
1171
|
+
],
|
|
1172
|
+
best_for="Non-stationary data with changing trends, sensor data",
|
|
1173
|
+
),
|
|
1174
|
+
StreamingAlgorithmInfo(
|
|
1175
|
+
name=StreamingAlgorithm.ISOLATION_FOREST_INCREMENTAL,
|
|
1176
|
+
display_name="Incremental Isolation Forest",
|
|
1177
|
+
description="Periodically retrains Isolation Forest on recent window data. "
|
|
1178
|
+
"Good for detecting global anomalies in multi-dimensional streams.",
|
|
1179
|
+
supports_online_learning=False,
|
|
1180
|
+
parameters=[
|
|
1181
|
+
AlgorithmParameter(
|
|
1182
|
+
name="contamination",
|
|
1183
|
+
label="Contamination",
|
|
1184
|
+
type="float",
|
|
1185
|
+
default=0.1,
|
|
1186
|
+
min_value=0.01,
|
|
1187
|
+
max_value=0.5,
|
|
1188
|
+
description="Expected proportion of anomalies",
|
|
1189
|
+
),
|
|
1190
|
+
AlgorithmParameter(
|
|
1191
|
+
name="window_size",
|
|
1192
|
+
label="Training Window",
|
|
1193
|
+
type="integer",
|
|
1194
|
+
default=100,
|
|
1195
|
+
min_value=50,
|
|
1196
|
+
max_value=1000,
|
|
1197
|
+
description="Window size for periodic retraining",
|
|
1198
|
+
),
|
|
1199
|
+
],
|
|
1200
|
+
best_for="Multi-dimensional streams, complex patterns",
|
|
1201
|
+
),
|
|
1202
|
+
StreamingAlgorithmInfo(
|
|
1203
|
+
name=StreamingAlgorithm.HALF_SPACE_TREES,
|
|
1204
|
+
display_name="Half-Space Trees",
|
|
1205
|
+
description="Streaming variant of Isolation Forest using half-space partitioning. "
|
|
1206
|
+
"Efficient for high-dimensional streaming data.",
|
|
1207
|
+
supports_online_learning=True,
|
|
1208
|
+
parameters=[
|
|
1209
|
+
AlgorithmParameter(
|
|
1210
|
+
name="n_trees",
|
|
1211
|
+
label="Number of Trees",
|
|
1212
|
+
type="integer",
|
|
1213
|
+
default=25,
|
|
1214
|
+
min_value=5,
|
|
1215
|
+
max_value=100,
|
|
1216
|
+
description="Number of half-space trees",
|
|
1217
|
+
),
|
|
1218
|
+
AlgorithmParameter(
|
|
1219
|
+
name="height",
|
|
1220
|
+
label="Tree Height",
|
|
1221
|
+
type="integer",
|
|
1222
|
+
default=8,
|
|
1223
|
+
min_value=4,
|
|
1224
|
+
max_value=15,
|
|
1225
|
+
description="Maximum depth of each tree",
|
|
1226
|
+
),
|
|
1227
|
+
],
|
|
1228
|
+
best_for="High-dimensional streaming data, real-time requirements",
|
|
1229
|
+
),
|
|
1230
|
+
StreamingAlgorithmInfo(
|
|
1231
|
+
name=StreamingAlgorithm.ROBUST_RANDOM_CUT_FOREST,
|
|
1232
|
+
display_name="Robust Random Cut Forest",
|
|
1233
|
+
description="Uses collusive displacement for anomaly scoring. "
|
|
1234
|
+
"Robust to noise and concept drift in streaming data.",
|
|
1235
|
+
supports_online_learning=True,
|
|
1236
|
+
parameters=[
|
|
1237
|
+
AlgorithmParameter(
|
|
1238
|
+
name="num_trees",
|
|
1239
|
+
label="Number of Trees",
|
|
1240
|
+
type="integer",
|
|
1241
|
+
default=40,
|
|
1242
|
+
min_value=10,
|
|
1243
|
+
max_value=100,
|
|
1244
|
+
description="Number of random cut trees",
|
|
1245
|
+
),
|
|
1246
|
+
AlgorithmParameter(
|
|
1247
|
+
name="tree_size",
|
|
1248
|
+
label="Tree Size",
|
|
1249
|
+
type="integer",
|
|
1250
|
+
default=256,
|
|
1251
|
+
min_value=64,
|
|
1252
|
+
max_value=1024,
|
|
1253
|
+
description="Maximum number of points per tree",
|
|
1254
|
+
),
|
|
1255
|
+
],
|
|
1256
|
+
best_for="Complex streaming data with concept drift, AWS Kinesis integration",
|
|
1257
|
+
),
|
|
1258
|
+
]
|