truthound 1.0.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- truthound/__init__.py +162 -0
- truthound/adapters.py +100 -0
- truthound/api.py +365 -0
- truthound/audit/__init__.py +248 -0
- truthound/audit/core.py +967 -0
- truthound/audit/filters.py +620 -0
- truthound/audit/formatters.py +707 -0
- truthound/audit/logger.py +902 -0
- truthound/audit/middleware.py +571 -0
- truthound/audit/storage.py +1083 -0
- truthound/benchmark/__init__.py +123 -0
- truthound/benchmark/base.py +757 -0
- truthound/benchmark/comparison.py +635 -0
- truthound/benchmark/generators.py +706 -0
- truthound/benchmark/reporters.py +718 -0
- truthound/benchmark/runner.py +635 -0
- truthound/benchmark/scenarios.py +712 -0
- truthound/cache.py +252 -0
- truthound/checkpoint/__init__.py +136 -0
- truthound/checkpoint/actions/__init__.py +164 -0
- truthound/checkpoint/actions/base.py +324 -0
- truthound/checkpoint/actions/custom.py +234 -0
- truthound/checkpoint/actions/discord_notify.py +290 -0
- truthound/checkpoint/actions/email_notify.py +405 -0
- truthound/checkpoint/actions/github_action.py +406 -0
- truthound/checkpoint/actions/opsgenie.py +1499 -0
- truthound/checkpoint/actions/pagerduty.py +226 -0
- truthound/checkpoint/actions/slack_notify.py +233 -0
- truthound/checkpoint/actions/store_result.py +249 -0
- truthound/checkpoint/actions/teams_notify.py +1570 -0
- truthound/checkpoint/actions/telegram_notify.py +419 -0
- truthound/checkpoint/actions/update_docs.py +552 -0
- truthound/checkpoint/actions/webhook.py +293 -0
- truthound/checkpoint/analytics/__init__.py +147 -0
- truthound/checkpoint/analytics/aggregations/__init__.py +23 -0
- truthound/checkpoint/analytics/aggregations/rollup.py +481 -0
- truthound/checkpoint/analytics/aggregations/time_bucket.py +306 -0
- truthound/checkpoint/analytics/analyzers/__init__.py +17 -0
- truthound/checkpoint/analytics/analyzers/anomaly.py +386 -0
- truthound/checkpoint/analytics/analyzers/base.py +270 -0
- truthound/checkpoint/analytics/analyzers/forecast.py +421 -0
- truthound/checkpoint/analytics/analyzers/trend.py +314 -0
- truthound/checkpoint/analytics/models.py +292 -0
- truthound/checkpoint/analytics/protocols.py +549 -0
- truthound/checkpoint/analytics/service.py +718 -0
- truthound/checkpoint/analytics/stores/__init__.py +16 -0
- truthound/checkpoint/analytics/stores/base.py +306 -0
- truthound/checkpoint/analytics/stores/memory_store.py +353 -0
- truthound/checkpoint/analytics/stores/sqlite_store.py +557 -0
- truthound/checkpoint/analytics/stores/timescale_store.py +501 -0
- truthound/checkpoint/async_actions.py +794 -0
- truthound/checkpoint/async_base.py +708 -0
- truthound/checkpoint/async_checkpoint.py +617 -0
- truthound/checkpoint/async_runner.py +639 -0
- truthound/checkpoint/checkpoint.py +527 -0
- truthound/checkpoint/ci/__init__.py +61 -0
- truthound/checkpoint/ci/detector.py +355 -0
- truthound/checkpoint/ci/reporter.py +436 -0
- truthound/checkpoint/ci/templates.py +454 -0
- truthound/checkpoint/circuitbreaker/__init__.py +133 -0
- truthound/checkpoint/circuitbreaker/breaker.py +542 -0
- truthound/checkpoint/circuitbreaker/core.py +252 -0
- truthound/checkpoint/circuitbreaker/detection.py +459 -0
- truthound/checkpoint/circuitbreaker/middleware.py +389 -0
- truthound/checkpoint/circuitbreaker/registry.py +357 -0
- truthound/checkpoint/distributed/__init__.py +139 -0
- truthound/checkpoint/distributed/backends/__init__.py +35 -0
- truthound/checkpoint/distributed/backends/celery_backend.py +503 -0
- truthound/checkpoint/distributed/backends/kubernetes_backend.py +696 -0
- truthound/checkpoint/distributed/backends/local_backend.py +397 -0
- truthound/checkpoint/distributed/backends/ray_backend.py +625 -0
- truthound/checkpoint/distributed/base.py +774 -0
- truthound/checkpoint/distributed/orchestrator.py +765 -0
- truthound/checkpoint/distributed/protocols.py +842 -0
- truthound/checkpoint/distributed/registry.py +449 -0
- truthound/checkpoint/idempotency/__init__.py +120 -0
- truthound/checkpoint/idempotency/core.py +295 -0
- truthound/checkpoint/idempotency/fingerprint.py +454 -0
- truthound/checkpoint/idempotency/locking.py +604 -0
- truthound/checkpoint/idempotency/service.py +592 -0
- truthound/checkpoint/idempotency/stores.py +653 -0
- truthound/checkpoint/monitoring/__init__.py +134 -0
- truthound/checkpoint/monitoring/aggregators/__init__.py +15 -0
- truthound/checkpoint/monitoring/aggregators/base.py +372 -0
- truthound/checkpoint/monitoring/aggregators/realtime.py +300 -0
- truthound/checkpoint/monitoring/aggregators/window.py +493 -0
- truthound/checkpoint/monitoring/collectors/__init__.py +17 -0
- truthound/checkpoint/monitoring/collectors/base.py +257 -0
- truthound/checkpoint/monitoring/collectors/memory_collector.py +617 -0
- truthound/checkpoint/monitoring/collectors/prometheus_collector.py +451 -0
- truthound/checkpoint/monitoring/collectors/redis_collector.py +518 -0
- truthound/checkpoint/monitoring/events.py +410 -0
- truthound/checkpoint/monitoring/protocols.py +636 -0
- truthound/checkpoint/monitoring/service.py +578 -0
- truthound/checkpoint/monitoring/views/__init__.py +17 -0
- truthound/checkpoint/monitoring/views/base.py +172 -0
- truthound/checkpoint/monitoring/views/queue_view.py +220 -0
- truthound/checkpoint/monitoring/views/task_view.py +240 -0
- truthound/checkpoint/monitoring/views/worker_view.py +263 -0
- truthound/checkpoint/registry.py +337 -0
- truthound/checkpoint/runner.py +356 -0
- truthound/checkpoint/transaction/__init__.py +133 -0
- truthound/checkpoint/transaction/base.py +389 -0
- truthound/checkpoint/transaction/compensatable.py +537 -0
- truthound/checkpoint/transaction/coordinator.py +576 -0
- truthound/checkpoint/transaction/executor.py +622 -0
- truthound/checkpoint/transaction/idempotency.py +534 -0
- truthound/checkpoint/transaction/saga/__init__.py +143 -0
- truthound/checkpoint/transaction/saga/builder.py +584 -0
- truthound/checkpoint/transaction/saga/definition.py +515 -0
- truthound/checkpoint/transaction/saga/event_store.py +542 -0
- truthound/checkpoint/transaction/saga/patterns.py +833 -0
- truthound/checkpoint/transaction/saga/runner.py +718 -0
- truthound/checkpoint/transaction/saga/state_machine.py +793 -0
- truthound/checkpoint/transaction/saga/strategies.py +780 -0
- truthound/checkpoint/transaction/saga/testing.py +886 -0
- truthound/checkpoint/triggers/__init__.py +58 -0
- truthound/checkpoint/triggers/base.py +237 -0
- truthound/checkpoint/triggers/event.py +385 -0
- truthound/checkpoint/triggers/schedule.py +355 -0
- truthound/cli.py +2358 -0
- truthound/cli_modules/__init__.py +124 -0
- truthound/cli_modules/advanced/__init__.py +45 -0
- truthound/cli_modules/advanced/benchmark.py +343 -0
- truthound/cli_modules/advanced/docs.py +225 -0
- truthound/cli_modules/advanced/lineage.py +209 -0
- truthound/cli_modules/advanced/ml.py +320 -0
- truthound/cli_modules/advanced/realtime.py +196 -0
- truthound/cli_modules/checkpoint/__init__.py +46 -0
- truthound/cli_modules/checkpoint/init.py +114 -0
- truthound/cli_modules/checkpoint/list.py +71 -0
- truthound/cli_modules/checkpoint/run.py +159 -0
- truthound/cli_modules/checkpoint/validate.py +67 -0
- truthound/cli_modules/common/__init__.py +71 -0
- truthound/cli_modules/common/errors.py +414 -0
- truthound/cli_modules/common/options.py +419 -0
- truthound/cli_modules/common/output.py +507 -0
- truthound/cli_modules/common/protocol.py +552 -0
- truthound/cli_modules/core/__init__.py +48 -0
- truthound/cli_modules/core/check.py +123 -0
- truthound/cli_modules/core/compare.py +104 -0
- truthound/cli_modules/core/learn.py +57 -0
- truthound/cli_modules/core/mask.py +77 -0
- truthound/cli_modules/core/profile.py +65 -0
- truthound/cli_modules/core/scan.py +61 -0
- truthound/cli_modules/profiler/__init__.py +51 -0
- truthound/cli_modules/profiler/auto_profile.py +175 -0
- truthound/cli_modules/profiler/metadata.py +107 -0
- truthound/cli_modules/profiler/suite.py +283 -0
- truthound/cli_modules/registry.py +431 -0
- truthound/cli_modules/scaffolding/__init__.py +89 -0
- truthound/cli_modules/scaffolding/base.py +631 -0
- truthound/cli_modules/scaffolding/commands.py +545 -0
- truthound/cli_modules/scaffolding/plugins.py +1072 -0
- truthound/cli_modules/scaffolding/reporters.py +594 -0
- truthound/cli_modules/scaffolding/validators.py +1127 -0
- truthound/common/__init__.py +18 -0
- truthound/common/resilience/__init__.py +130 -0
- truthound/common/resilience/bulkhead.py +266 -0
- truthound/common/resilience/circuit_breaker.py +516 -0
- truthound/common/resilience/composite.py +332 -0
- truthound/common/resilience/config.py +292 -0
- truthound/common/resilience/protocols.py +217 -0
- truthound/common/resilience/rate_limiter.py +404 -0
- truthound/common/resilience/retry.py +341 -0
- truthound/datadocs/__init__.py +260 -0
- truthound/datadocs/base.py +571 -0
- truthound/datadocs/builder.py +761 -0
- truthound/datadocs/charts.py +764 -0
- truthound/datadocs/dashboard/__init__.py +63 -0
- truthound/datadocs/dashboard/app.py +576 -0
- truthound/datadocs/dashboard/components.py +584 -0
- truthound/datadocs/dashboard/state.py +240 -0
- truthound/datadocs/engine/__init__.py +46 -0
- truthound/datadocs/engine/context.py +376 -0
- truthound/datadocs/engine/pipeline.py +618 -0
- truthound/datadocs/engine/registry.py +469 -0
- truthound/datadocs/exporters/__init__.py +49 -0
- truthound/datadocs/exporters/base.py +198 -0
- truthound/datadocs/exporters/html.py +178 -0
- truthound/datadocs/exporters/json_exporter.py +253 -0
- truthound/datadocs/exporters/markdown.py +284 -0
- truthound/datadocs/exporters/pdf.py +392 -0
- truthound/datadocs/i18n/__init__.py +86 -0
- truthound/datadocs/i18n/catalog.py +960 -0
- truthound/datadocs/i18n/formatting.py +505 -0
- truthound/datadocs/i18n/loader.py +256 -0
- truthound/datadocs/i18n/plurals.py +378 -0
- truthound/datadocs/renderers/__init__.py +42 -0
- truthound/datadocs/renderers/base.py +401 -0
- truthound/datadocs/renderers/custom.py +342 -0
- truthound/datadocs/renderers/jinja.py +697 -0
- truthound/datadocs/sections.py +736 -0
- truthound/datadocs/styles.py +931 -0
- truthound/datadocs/themes/__init__.py +101 -0
- truthound/datadocs/themes/base.py +336 -0
- truthound/datadocs/themes/default.py +417 -0
- truthound/datadocs/themes/enterprise.py +419 -0
- truthound/datadocs/themes/loader.py +336 -0
- truthound/datadocs/themes.py +301 -0
- truthound/datadocs/transformers/__init__.py +57 -0
- truthound/datadocs/transformers/base.py +268 -0
- truthound/datadocs/transformers/enrichers.py +544 -0
- truthound/datadocs/transformers/filters.py +447 -0
- truthound/datadocs/transformers/i18n.py +468 -0
- truthound/datadocs/versioning/__init__.py +62 -0
- truthound/datadocs/versioning/diff.py +639 -0
- truthound/datadocs/versioning/storage.py +497 -0
- truthound/datadocs/versioning/version.py +358 -0
- truthound/datasources/__init__.py +223 -0
- truthound/datasources/_async_protocols.py +222 -0
- truthound/datasources/_protocols.py +159 -0
- truthound/datasources/adapters.py +428 -0
- truthound/datasources/async_base.py +599 -0
- truthound/datasources/async_factory.py +511 -0
- truthound/datasources/base.py +516 -0
- truthound/datasources/factory.py +433 -0
- truthound/datasources/nosql/__init__.py +47 -0
- truthound/datasources/nosql/base.py +487 -0
- truthound/datasources/nosql/elasticsearch.py +801 -0
- truthound/datasources/nosql/mongodb.py +636 -0
- truthound/datasources/pandas_optimized.py +582 -0
- truthound/datasources/pandas_source.py +216 -0
- truthound/datasources/polars_source.py +395 -0
- truthound/datasources/spark_source.py +479 -0
- truthound/datasources/sql/__init__.py +154 -0
- truthound/datasources/sql/base.py +710 -0
- truthound/datasources/sql/bigquery.py +410 -0
- truthound/datasources/sql/cloud_base.py +199 -0
- truthound/datasources/sql/databricks.py +471 -0
- truthound/datasources/sql/mysql.py +316 -0
- truthound/datasources/sql/oracle.py +427 -0
- truthound/datasources/sql/postgresql.py +321 -0
- truthound/datasources/sql/redshift.py +479 -0
- truthound/datasources/sql/snowflake.py +439 -0
- truthound/datasources/sql/sqlite.py +286 -0
- truthound/datasources/sql/sqlserver.py +437 -0
- truthound/datasources/streaming/__init__.py +47 -0
- truthound/datasources/streaming/base.py +350 -0
- truthound/datasources/streaming/kafka.py +670 -0
- truthound/decorators.py +98 -0
- truthound/docs/__init__.py +69 -0
- truthound/docs/extractor.py +971 -0
- truthound/docs/generator.py +601 -0
- truthound/docs/parser.py +1037 -0
- truthound/docs/renderer.py +999 -0
- truthound/drift/__init__.py +22 -0
- truthound/drift/compare.py +189 -0
- truthound/drift/detectors.py +464 -0
- truthound/drift/report.py +160 -0
- truthound/execution/__init__.py +65 -0
- truthound/execution/_protocols.py +324 -0
- truthound/execution/base.py +576 -0
- truthound/execution/distributed/__init__.py +179 -0
- truthound/execution/distributed/aggregations.py +731 -0
- truthound/execution/distributed/arrow_bridge.py +817 -0
- truthound/execution/distributed/base.py +550 -0
- truthound/execution/distributed/dask_engine.py +976 -0
- truthound/execution/distributed/mixins.py +766 -0
- truthound/execution/distributed/protocols.py +756 -0
- truthound/execution/distributed/ray_engine.py +1127 -0
- truthound/execution/distributed/registry.py +446 -0
- truthound/execution/distributed/spark_engine.py +1011 -0
- truthound/execution/distributed/validator_adapter.py +682 -0
- truthound/execution/pandas_engine.py +401 -0
- truthound/execution/polars_engine.py +497 -0
- truthound/execution/pushdown/__init__.py +230 -0
- truthound/execution/pushdown/ast.py +1550 -0
- truthound/execution/pushdown/builder.py +1550 -0
- truthound/execution/pushdown/dialects.py +1072 -0
- truthound/execution/pushdown/executor.py +829 -0
- truthound/execution/pushdown/optimizer.py +1041 -0
- truthound/execution/sql_engine.py +518 -0
- truthound/infrastructure/__init__.py +189 -0
- truthound/infrastructure/audit.py +1515 -0
- truthound/infrastructure/config.py +1133 -0
- truthound/infrastructure/encryption.py +1132 -0
- truthound/infrastructure/logging.py +1503 -0
- truthound/infrastructure/metrics.py +1220 -0
- truthound/lineage/__init__.py +89 -0
- truthound/lineage/base.py +746 -0
- truthound/lineage/impact_analysis.py +474 -0
- truthound/lineage/integrations/__init__.py +22 -0
- truthound/lineage/integrations/openlineage.py +548 -0
- truthound/lineage/tracker.py +512 -0
- truthound/lineage/visualization/__init__.py +33 -0
- truthound/lineage/visualization/protocols.py +145 -0
- truthound/lineage/visualization/renderers/__init__.py +20 -0
- truthound/lineage/visualization/renderers/cytoscape.py +329 -0
- truthound/lineage/visualization/renderers/d3.py +331 -0
- truthound/lineage/visualization/renderers/graphviz.py +276 -0
- truthound/lineage/visualization/renderers/mermaid.py +308 -0
- truthound/maskers.py +113 -0
- truthound/ml/__init__.py +124 -0
- truthound/ml/anomaly_models/__init__.py +31 -0
- truthound/ml/anomaly_models/ensemble.py +362 -0
- truthound/ml/anomaly_models/isolation_forest.py +444 -0
- truthound/ml/anomaly_models/statistical.py +392 -0
- truthound/ml/base.py +1178 -0
- truthound/ml/drift_detection/__init__.py +26 -0
- truthound/ml/drift_detection/concept.py +381 -0
- truthound/ml/drift_detection/distribution.py +361 -0
- truthound/ml/drift_detection/feature.py +442 -0
- truthound/ml/drift_detection/multivariate.py +495 -0
- truthound/ml/monitoring/__init__.py +88 -0
- truthound/ml/monitoring/alerting/__init__.py +33 -0
- truthound/ml/monitoring/alerting/handlers.py +427 -0
- truthound/ml/monitoring/alerting/rules.py +508 -0
- truthound/ml/monitoring/collectors/__init__.py +19 -0
- truthound/ml/monitoring/collectors/composite.py +105 -0
- truthound/ml/monitoring/collectors/drift.py +324 -0
- truthound/ml/monitoring/collectors/performance.py +179 -0
- truthound/ml/monitoring/collectors/quality.py +369 -0
- truthound/ml/monitoring/monitor.py +536 -0
- truthound/ml/monitoring/protocols.py +451 -0
- truthound/ml/monitoring/stores/__init__.py +15 -0
- truthound/ml/monitoring/stores/memory.py +201 -0
- truthound/ml/monitoring/stores/prometheus.py +296 -0
- truthound/ml/rule_learning/__init__.py +25 -0
- truthound/ml/rule_learning/constraint_miner.py +443 -0
- truthound/ml/rule_learning/pattern_learner.py +499 -0
- truthound/ml/rule_learning/profile_learner.py +462 -0
- truthound/multitenancy/__init__.py +326 -0
- truthound/multitenancy/core.py +852 -0
- truthound/multitenancy/integration.py +597 -0
- truthound/multitenancy/isolation.py +630 -0
- truthound/multitenancy/manager.py +770 -0
- truthound/multitenancy/middleware.py +765 -0
- truthound/multitenancy/quota.py +537 -0
- truthound/multitenancy/resolvers.py +603 -0
- truthound/multitenancy/storage.py +703 -0
- truthound/observability/__init__.py +307 -0
- truthound/observability/context.py +531 -0
- truthound/observability/instrumentation.py +611 -0
- truthound/observability/logging.py +887 -0
- truthound/observability/metrics.py +1157 -0
- truthound/observability/tracing/__init__.py +178 -0
- truthound/observability/tracing/baggage.py +310 -0
- truthound/observability/tracing/config.py +426 -0
- truthound/observability/tracing/exporter.py +787 -0
- truthound/observability/tracing/integration.py +1018 -0
- truthound/observability/tracing/otel/__init__.py +146 -0
- truthound/observability/tracing/otel/adapter.py +982 -0
- truthound/observability/tracing/otel/bridge.py +1177 -0
- truthound/observability/tracing/otel/compat.py +681 -0
- truthound/observability/tracing/otel/config.py +691 -0
- truthound/observability/tracing/otel/detection.py +327 -0
- truthound/observability/tracing/otel/protocols.py +426 -0
- truthound/observability/tracing/processor.py +561 -0
- truthound/observability/tracing/propagator.py +757 -0
- truthound/observability/tracing/provider.py +569 -0
- truthound/observability/tracing/resource.py +515 -0
- truthound/observability/tracing/sampler.py +487 -0
- truthound/observability/tracing/span.py +676 -0
- truthound/plugins/__init__.py +198 -0
- truthound/plugins/base.py +599 -0
- truthound/plugins/cli.py +680 -0
- truthound/plugins/dependencies/__init__.py +42 -0
- truthound/plugins/dependencies/graph.py +422 -0
- truthound/plugins/dependencies/resolver.py +417 -0
- truthound/plugins/discovery.py +379 -0
- truthound/plugins/docs/__init__.py +46 -0
- truthound/plugins/docs/extractor.py +444 -0
- truthound/plugins/docs/renderer.py +499 -0
- truthound/plugins/enterprise_manager.py +877 -0
- truthound/plugins/examples/__init__.py +19 -0
- truthound/plugins/examples/custom_validators.py +317 -0
- truthound/plugins/examples/slack_notifier.py +312 -0
- truthound/plugins/examples/xml_reporter.py +254 -0
- truthound/plugins/hooks.py +558 -0
- truthound/plugins/lifecycle/__init__.py +43 -0
- truthound/plugins/lifecycle/hot_reload.py +402 -0
- truthound/plugins/lifecycle/manager.py +371 -0
- truthound/plugins/manager.py +736 -0
- truthound/plugins/registry.py +338 -0
- truthound/plugins/security/__init__.py +93 -0
- truthound/plugins/security/exceptions.py +332 -0
- truthound/plugins/security/policies.py +348 -0
- truthound/plugins/security/protocols.py +643 -0
- truthound/plugins/security/sandbox/__init__.py +45 -0
- truthound/plugins/security/sandbox/context.py +158 -0
- truthound/plugins/security/sandbox/engines/__init__.py +19 -0
- truthound/plugins/security/sandbox/engines/container.py +379 -0
- truthound/plugins/security/sandbox/engines/noop.py +144 -0
- truthound/plugins/security/sandbox/engines/process.py +336 -0
- truthound/plugins/security/sandbox/factory.py +211 -0
- truthound/plugins/security/signing/__init__.py +57 -0
- truthound/plugins/security/signing/service.py +330 -0
- truthound/plugins/security/signing/trust_store.py +368 -0
- truthound/plugins/security/signing/verifier.py +459 -0
- truthound/plugins/versioning/__init__.py +41 -0
- truthound/plugins/versioning/constraints.py +297 -0
- truthound/plugins/versioning/resolver.py +329 -0
- truthound/profiler/__init__.py +1729 -0
- truthound/profiler/_lazy.py +452 -0
- truthound/profiler/ab_testing/__init__.py +80 -0
- truthound/profiler/ab_testing/analysis.py +449 -0
- truthound/profiler/ab_testing/base.py +257 -0
- truthound/profiler/ab_testing/experiment.py +395 -0
- truthound/profiler/ab_testing/tracking.py +368 -0
- truthound/profiler/auto_threshold.py +1170 -0
- truthound/profiler/base.py +579 -0
- truthound/profiler/cache_patterns.py +911 -0
- truthound/profiler/caching.py +1303 -0
- truthound/profiler/column_profiler.py +712 -0
- truthound/profiler/comparison.py +1007 -0
- truthound/profiler/custom_patterns.py +1170 -0
- truthound/profiler/dashboard/__init__.py +50 -0
- truthound/profiler/dashboard/app.py +476 -0
- truthound/profiler/dashboard/components.py +457 -0
- truthound/profiler/dashboard/config.py +72 -0
- truthound/profiler/distributed/__init__.py +83 -0
- truthound/profiler/distributed/base.py +281 -0
- truthound/profiler/distributed/dask_backend.py +498 -0
- truthound/profiler/distributed/local_backend.py +293 -0
- truthound/profiler/distributed/profiler.py +304 -0
- truthound/profiler/distributed/ray_backend.py +374 -0
- truthound/profiler/distributed/spark_backend.py +375 -0
- truthound/profiler/distributed.py +1366 -0
- truthound/profiler/enterprise_sampling.py +1065 -0
- truthound/profiler/errors.py +488 -0
- truthound/profiler/evolution/__init__.py +91 -0
- truthound/profiler/evolution/alerts.py +426 -0
- truthound/profiler/evolution/changes.py +206 -0
- truthound/profiler/evolution/compatibility.py +365 -0
- truthound/profiler/evolution/detector.py +372 -0
- truthound/profiler/evolution/protocols.py +121 -0
- truthound/profiler/generators/__init__.py +48 -0
- truthound/profiler/generators/base.py +384 -0
- truthound/profiler/generators/ml_rules.py +375 -0
- truthound/profiler/generators/pattern_rules.py +384 -0
- truthound/profiler/generators/schema_rules.py +267 -0
- truthound/profiler/generators/stats_rules.py +324 -0
- truthound/profiler/generators/suite_generator.py +857 -0
- truthound/profiler/i18n.py +1542 -0
- truthound/profiler/incremental.py +554 -0
- truthound/profiler/incremental_validation.py +1710 -0
- truthound/profiler/integration/__init__.py +73 -0
- truthound/profiler/integration/adapters.py +345 -0
- truthound/profiler/integration/context.py +371 -0
- truthound/profiler/integration/executor.py +527 -0
- truthound/profiler/integration/naming.py +75 -0
- truthound/profiler/integration/protocols.py +243 -0
- truthound/profiler/memory.py +1185 -0
- truthound/profiler/migration/__init__.py +60 -0
- truthound/profiler/migration/base.py +345 -0
- truthound/profiler/migration/manager.py +444 -0
- truthound/profiler/migration/v1_0_to_v1_1.py +484 -0
- truthound/profiler/ml/__init__.py +73 -0
- truthound/profiler/ml/base.py +244 -0
- truthound/profiler/ml/classifier.py +507 -0
- truthound/profiler/ml/feature_extraction.py +604 -0
- truthound/profiler/ml/pretrained.py +448 -0
- truthound/profiler/ml_inference.py +1276 -0
- truthound/profiler/native_patterns.py +815 -0
- truthound/profiler/observability.py +1184 -0
- truthound/profiler/process_timeout.py +1566 -0
- truthound/profiler/progress.py +568 -0
- truthound/profiler/progress_callbacks.py +1734 -0
- truthound/profiler/quality.py +1345 -0
- truthound/profiler/resilience.py +1180 -0
- truthound/profiler/sampled_matcher.py +794 -0
- truthound/profiler/sampling.py +1288 -0
- truthound/profiler/scheduling/__init__.py +82 -0
- truthound/profiler/scheduling/protocols.py +214 -0
- truthound/profiler/scheduling/scheduler.py +474 -0
- truthound/profiler/scheduling/storage.py +457 -0
- truthound/profiler/scheduling/triggers.py +449 -0
- truthound/profiler/schema.py +603 -0
- truthound/profiler/streaming.py +685 -0
- truthound/profiler/streaming_patterns.py +1354 -0
- truthound/profiler/suite_cli.py +625 -0
- truthound/profiler/suite_config.py +789 -0
- truthound/profiler/suite_export.py +1268 -0
- truthound/profiler/table_profiler.py +547 -0
- truthound/profiler/timeout.py +565 -0
- truthound/profiler/validation.py +1532 -0
- truthound/profiler/visualization/__init__.py +118 -0
- truthound/profiler/visualization/base.py +346 -0
- truthound/profiler/visualization/generator.py +1259 -0
- truthound/profiler/visualization/plotly_renderer.py +811 -0
- truthound/profiler/visualization/renderers.py +669 -0
- truthound/profiler/visualization/sections.py +540 -0
- truthound/profiler/visualization.py +2122 -0
- truthound/profiler/yaml_validation.py +1151 -0
- truthound/py.typed +0 -0
- truthound/ratelimit/__init__.py +248 -0
- truthound/ratelimit/algorithms.py +1108 -0
- truthound/ratelimit/core.py +573 -0
- truthound/ratelimit/integration.py +532 -0
- truthound/ratelimit/limiter.py +663 -0
- truthound/ratelimit/middleware.py +700 -0
- truthound/ratelimit/policy.py +792 -0
- truthound/ratelimit/storage.py +763 -0
- truthound/rbac/__init__.py +340 -0
- truthound/rbac/core.py +976 -0
- truthound/rbac/integration.py +760 -0
- truthound/rbac/manager.py +1052 -0
- truthound/rbac/middleware.py +842 -0
- truthound/rbac/policy.py +954 -0
- truthound/rbac/storage.py +878 -0
- truthound/realtime/__init__.py +141 -0
- truthound/realtime/adapters/__init__.py +43 -0
- truthound/realtime/adapters/base.py +533 -0
- truthound/realtime/adapters/kafka.py +487 -0
- truthound/realtime/adapters/kinesis.py +479 -0
- truthound/realtime/adapters/mock.py +243 -0
- truthound/realtime/base.py +553 -0
- truthound/realtime/factory.py +382 -0
- truthound/realtime/incremental.py +660 -0
- truthound/realtime/processing/__init__.py +67 -0
- truthound/realtime/processing/exactly_once.py +575 -0
- truthound/realtime/processing/state.py +547 -0
- truthound/realtime/processing/windows.py +647 -0
- truthound/realtime/protocols.py +569 -0
- truthound/realtime/streaming.py +605 -0
- truthound/realtime/testing/__init__.py +32 -0
- truthound/realtime/testing/containers.py +615 -0
- truthound/realtime/testing/fixtures.py +484 -0
- truthound/report.py +280 -0
- truthound/reporters/__init__.py +46 -0
- truthound/reporters/_protocols.py +30 -0
- truthound/reporters/base.py +324 -0
- truthound/reporters/ci/__init__.py +66 -0
- truthound/reporters/ci/azure.py +436 -0
- truthound/reporters/ci/base.py +509 -0
- truthound/reporters/ci/bitbucket.py +567 -0
- truthound/reporters/ci/circleci.py +547 -0
- truthound/reporters/ci/detection.py +364 -0
- truthound/reporters/ci/factory.py +182 -0
- truthound/reporters/ci/github.py +388 -0
- truthound/reporters/ci/gitlab.py +471 -0
- truthound/reporters/ci/jenkins.py +525 -0
- truthound/reporters/console_reporter.py +299 -0
- truthound/reporters/factory.py +211 -0
- truthound/reporters/html_reporter.py +524 -0
- truthound/reporters/json_reporter.py +256 -0
- truthound/reporters/markdown_reporter.py +280 -0
- truthound/reporters/sdk/__init__.py +174 -0
- truthound/reporters/sdk/builder.py +558 -0
- truthound/reporters/sdk/mixins.py +1150 -0
- truthound/reporters/sdk/schema.py +1493 -0
- truthound/reporters/sdk/templates.py +666 -0
- truthound/reporters/sdk/testing.py +968 -0
- truthound/scanners.py +170 -0
- truthound/scheduling/__init__.py +122 -0
- truthound/scheduling/cron.py +1136 -0
- truthound/scheduling/presets.py +212 -0
- truthound/schema.py +275 -0
- truthound/secrets/__init__.py +173 -0
- truthound/secrets/base.py +618 -0
- truthound/secrets/cloud.py +682 -0
- truthound/secrets/integration.py +507 -0
- truthound/secrets/manager.py +633 -0
- truthound/secrets/oidc/__init__.py +172 -0
- truthound/secrets/oidc/base.py +902 -0
- truthound/secrets/oidc/credential_provider.py +623 -0
- truthound/secrets/oidc/exchangers.py +1001 -0
- truthound/secrets/oidc/github/__init__.py +110 -0
- truthound/secrets/oidc/github/claims.py +718 -0
- truthound/secrets/oidc/github/enhanced_provider.py +693 -0
- truthound/secrets/oidc/github/trust_policy.py +742 -0
- truthound/secrets/oidc/github/verification.py +723 -0
- truthound/secrets/oidc/github/workflow.py +691 -0
- truthound/secrets/oidc/providers.py +825 -0
- truthound/secrets/providers.py +506 -0
- truthound/secrets/resolver.py +495 -0
- truthound/stores/__init__.py +177 -0
- truthound/stores/backends/__init__.py +18 -0
- truthound/stores/backends/_protocols.py +340 -0
- truthound/stores/backends/azure_blob.py +530 -0
- truthound/stores/backends/concurrent_filesystem.py +915 -0
- truthound/stores/backends/connection_pool.py +1365 -0
- truthound/stores/backends/database.py +743 -0
- truthound/stores/backends/filesystem.py +538 -0
- truthound/stores/backends/gcs.py +399 -0
- truthound/stores/backends/memory.py +354 -0
- truthound/stores/backends/s3.py +434 -0
- truthound/stores/backpressure/__init__.py +84 -0
- truthound/stores/backpressure/base.py +375 -0
- truthound/stores/backpressure/circuit_breaker.py +434 -0
- truthound/stores/backpressure/monitor.py +376 -0
- truthound/stores/backpressure/strategies.py +677 -0
- truthound/stores/base.py +551 -0
- truthound/stores/batching/__init__.py +65 -0
- truthound/stores/batching/base.py +305 -0
- truthound/stores/batching/buffer.py +370 -0
- truthound/stores/batching/store.py +248 -0
- truthound/stores/batching/writer.py +521 -0
- truthound/stores/caching/__init__.py +60 -0
- truthound/stores/caching/backends.py +684 -0
- truthound/stores/caching/base.py +356 -0
- truthound/stores/caching/store.py +305 -0
- truthound/stores/compression/__init__.py +193 -0
- truthound/stores/compression/adaptive.py +694 -0
- truthound/stores/compression/base.py +514 -0
- truthound/stores/compression/pipeline.py +868 -0
- truthound/stores/compression/providers.py +672 -0
- truthound/stores/compression/streaming.py +832 -0
- truthound/stores/concurrency/__init__.py +81 -0
- truthound/stores/concurrency/atomic.py +556 -0
- truthound/stores/concurrency/index.py +775 -0
- truthound/stores/concurrency/locks.py +576 -0
- truthound/stores/concurrency/manager.py +482 -0
- truthound/stores/encryption/__init__.py +297 -0
- truthound/stores/encryption/base.py +952 -0
- truthound/stores/encryption/keys.py +1191 -0
- truthound/stores/encryption/pipeline.py +903 -0
- truthound/stores/encryption/providers.py +953 -0
- truthound/stores/encryption/streaming.py +950 -0
- truthound/stores/expectations.py +227 -0
- truthound/stores/factory.py +246 -0
- truthound/stores/migration/__init__.py +75 -0
- truthound/stores/migration/base.py +480 -0
- truthound/stores/migration/manager.py +347 -0
- truthound/stores/migration/registry.py +382 -0
- truthound/stores/migration/store.py +559 -0
- truthound/stores/observability/__init__.py +106 -0
- truthound/stores/observability/audit.py +718 -0
- truthound/stores/observability/config.py +270 -0
- truthound/stores/observability/factory.py +208 -0
- truthound/stores/observability/metrics.py +636 -0
- truthound/stores/observability/protocols.py +410 -0
- truthound/stores/observability/store.py +570 -0
- truthound/stores/observability/tracing.py +784 -0
- truthound/stores/replication/__init__.py +76 -0
- truthound/stores/replication/base.py +260 -0
- truthound/stores/replication/monitor.py +269 -0
- truthound/stores/replication/store.py +439 -0
- truthound/stores/replication/syncer.py +391 -0
- truthound/stores/results.py +359 -0
- truthound/stores/retention/__init__.py +77 -0
- truthound/stores/retention/base.py +378 -0
- truthound/stores/retention/policies.py +621 -0
- truthound/stores/retention/scheduler.py +279 -0
- truthound/stores/retention/store.py +526 -0
- truthound/stores/streaming/__init__.py +138 -0
- truthound/stores/streaming/base.py +801 -0
- truthound/stores/streaming/database.py +984 -0
- truthound/stores/streaming/filesystem.py +719 -0
- truthound/stores/streaming/reader.py +629 -0
- truthound/stores/streaming/s3.py +843 -0
- truthound/stores/streaming/writer.py +790 -0
- truthound/stores/tiering/__init__.py +108 -0
- truthound/stores/tiering/base.py +462 -0
- truthound/stores/tiering/manager.py +249 -0
- truthound/stores/tiering/policies.py +692 -0
- truthound/stores/tiering/store.py +526 -0
- truthound/stores/versioning/__init__.py +56 -0
- truthound/stores/versioning/base.py +376 -0
- truthound/stores/versioning/store.py +660 -0
- truthound/stores/versioning/strategies.py +353 -0
- truthound/types.py +56 -0
- truthound/validators/__init__.py +774 -0
- truthound/validators/aggregate/__init__.py +27 -0
- truthound/validators/aggregate/central.py +116 -0
- truthound/validators/aggregate/extremes.py +116 -0
- truthound/validators/aggregate/spread.py +118 -0
- truthound/validators/aggregate/sum.py +64 -0
- truthound/validators/aggregate/type.py +78 -0
- truthound/validators/anomaly/__init__.py +93 -0
- truthound/validators/anomaly/base.py +431 -0
- truthound/validators/anomaly/ml_based.py +1190 -0
- truthound/validators/anomaly/multivariate.py +647 -0
- truthound/validators/anomaly/statistical.py +599 -0
- truthound/validators/base.py +1089 -0
- truthound/validators/business_rule/__init__.py +46 -0
- truthound/validators/business_rule/base.py +147 -0
- truthound/validators/business_rule/checksum.py +509 -0
- truthound/validators/business_rule/financial.py +526 -0
- truthound/validators/cache.py +733 -0
- truthound/validators/completeness/__init__.py +39 -0
- truthound/validators/completeness/conditional.py +73 -0
- truthound/validators/completeness/default.py +98 -0
- truthound/validators/completeness/empty.py +103 -0
- truthound/validators/completeness/nan.py +337 -0
- truthound/validators/completeness/null.py +152 -0
- truthound/validators/cross_table/__init__.py +17 -0
- truthound/validators/cross_table/aggregate.py +333 -0
- truthound/validators/cross_table/row_count.py +122 -0
- truthound/validators/datetime/__init__.py +29 -0
- truthound/validators/datetime/format.py +78 -0
- truthound/validators/datetime/freshness.py +269 -0
- truthound/validators/datetime/order.py +73 -0
- truthound/validators/datetime/parseable.py +185 -0
- truthound/validators/datetime/range.py +202 -0
- truthound/validators/datetime/timezone.py +69 -0
- truthound/validators/distribution/__init__.py +49 -0
- truthound/validators/distribution/distribution.py +128 -0
- truthound/validators/distribution/monotonic.py +119 -0
- truthound/validators/distribution/outlier.py +178 -0
- truthound/validators/distribution/quantile.py +80 -0
- truthound/validators/distribution/range.py +254 -0
- truthound/validators/distribution/set.py +125 -0
- truthound/validators/distribution/statistical.py +459 -0
- truthound/validators/drift/__init__.py +79 -0
- truthound/validators/drift/base.py +427 -0
- truthound/validators/drift/multi_feature.py +401 -0
- truthound/validators/drift/numeric.py +395 -0
- truthound/validators/drift/psi.py +446 -0
- truthound/validators/drift/statistical.py +510 -0
- truthound/validators/enterprise.py +1658 -0
- truthound/validators/geospatial/__init__.py +80 -0
- truthound/validators/geospatial/base.py +97 -0
- truthound/validators/geospatial/boundary.py +238 -0
- truthound/validators/geospatial/coordinate.py +351 -0
- truthound/validators/geospatial/distance.py +399 -0
- truthound/validators/geospatial/polygon.py +665 -0
- truthound/validators/i18n/__init__.py +308 -0
- truthound/validators/i18n/bidi.py +571 -0
- truthound/validators/i18n/catalogs.py +570 -0
- truthound/validators/i18n/dialects.py +763 -0
- truthound/validators/i18n/extended_catalogs.py +549 -0
- truthound/validators/i18n/formatting.py +1434 -0
- truthound/validators/i18n/loader.py +1020 -0
- truthound/validators/i18n/messages.py +521 -0
- truthound/validators/i18n/plural.py +683 -0
- truthound/validators/i18n/protocols.py +855 -0
- truthound/validators/i18n/tms.py +1162 -0
- truthound/validators/localization/__init__.py +53 -0
- truthound/validators/localization/base.py +122 -0
- truthound/validators/localization/chinese.py +362 -0
- truthound/validators/localization/japanese.py +275 -0
- truthound/validators/localization/korean.py +524 -0
- truthound/validators/memory/__init__.py +94 -0
- truthound/validators/memory/approximate_knn.py +506 -0
- truthound/validators/memory/base.py +547 -0
- truthound/validators/memory/sgd_online.py +719 -0
- truthound/validators/memory/streaming_ecdf.py +753 -0
- truthound/validators/ml_feature/__init__.py +54 -0
- truthound/validators/ml_feature/base.py +249 -0
- truthound/validators/ml_feature/correlation.py +299 -0
- truthound/validators/ml_feature/leakage.py +344 -0
- truthound/validators/ml_feature/null_impact.py +270 -0
- truthound/validators/ml_feature/scale.py +264 -0
- truthound/validators/multi_column/__init__.py +89 -0
- truthound/validators/multi_column/arithmetic.py +284 -0
- truthound/validators/multi_column/base.py +231 -0
- truthound/validators/multi_column/comparison.py +273 -0
- truthound/validators/multi_column/consistency.py +312 -0
- truthound/validators/multi_column/statistical.py +299 -0
- truthound/validators/optimization/__init__.py +164 -0
- truthound/validators/optimization/aggregation.py +563 -0
- truthound/validators/optimization/covariance.py +556 -0
- truthound/validators/optimization/geo.py +626 -0
- truthound/validators/optimization/graph.py +587 -0
- truthound/validators/optimization/orchestrator.py +970 -0
- truthound/validators/optimization/profiling.py +1312 -0
- truthound/validators/privacy/__init__.py +223 -0
- truthound/validators/privacy/base.py +635 -0
- truthound/validators/privacy/ccpa.py +670 -0
- truthound/validators/privacy/gdpr.py +728 -0
- truthound/validators/privacy/global_patterns.py +604 -0
- truthound/validators/privacy/plugins.py +867 -0
- truthound/validators/profiling/__init__.py +52 -0
- truthound/validators/profiling/base.py +175 -0
- truthound/validators/profiling/cardinality.py +312 -0
- truthound/validators/profiling/entropy.py +391 -0
- truthound/validators/profiling/frequency.py +455 -0
- truthound/validators/pushdown_support.py +660 -0
- truthound/validators/query/__init__.py +91 -0
- truthound/validators/query/aggregate.py +346 -0
- truthound/validators/query/base.py +246 -0
- truthound/validators/query/column.py +249 -0
- truthound/validators/query/expression.py +274 -0
- truthound/validators/query/result.py +323 -0
- truthound/validators/query/row_count.py +264 -0
- truthound/validators/referential/__init__.py +80 -0
- truthound/validators/referential/base.py +395 -0
- truthound/validators/referential/cascade.py +391 -0
- truthound/validators/referential/circular.py +563 -0
- truthound/validators/referential/foreign_key.py +624 -0
- truthound/validators/referential/orphan.py +485 -0
- truthound/validators/registry.py +112 -0
- truthound/validators/schema/__init__.py +41 -0
- truthound/validators/schema/column_count.py +142 -0
- truthound/validators/schema/column_exists.py +80 -0
- truthound/validators/schema/column_order.py +82 -0
- truthound/validators/schema/column_pair.py +85 -0
- truthound/validators/schema/column_pair_set.py +195 -0
- truthound/validators/schema/column_type.py +94 -0
- truthound/validators/schema/multi_column.py +53 -0
- truthound/validators/schema/multi_column_aggregate.py +175 -0
- truthound/validators/schema/referential.py +274 -0
- truthound/validators/schema/table_schema.py +91 -0
- truthound/validators/schema_validator.py +219 -0
- truthound/validators/sdk/__init__.py +250 -0
- truthound/validators/sdk/builder.py +680 -0
- truthound/validators/sdk/decorators.py +474 -0
- truthound/validators/sdk/enterprise/__init__.py +211 -0
- truthound/validators/sdk/enterprise/docs.py +725 -0
- truthound/validators/sdk/enterprise/fuzzing.py +659 -0
- truthound/validators/sdk/enterprise/licensing.py +709 -0
- truthound/validators/sdk/enterprise/manager.py +543 -0
- truthound/validators/sdk/enterprise/resources.py +628 -0
- truthound/validators/sdk/enterprise/sandbox.py +766 -0
- truthound/validators/sdk/enterprise/signing.py +603 -0
- truthound/validators/sdk/enterprise/templates.py +865 -0
- truthound/validators/sdk/enterprise/versioning.py +659 -0
- truthound/validators/sdk/templates.py +757 -0
- truthound/validators/sdk/testing.py +807 -0
- truthound/validators/security/__init__.py +181 -0
- truthound/validators/security/redos/__init__.py +182 -0
- truthound/validators/security/redos/core.py +861 -0
- truthound/validators/security/redos/cpu_monitor.py +593 -0
- truthound/validators/security/redos/cve_database.py +791 -0
- truthound/validators/security/redos/ml/__init__.py +155 -0
- truthound/validators/security/redos/ml/base.py +785 -0
- truthound/validators/security/redos/ml/datasets.py +618 -0
- truthound/validators/security/redos/ml/features.py +359 -0
- truthound/validators/security/redos/ml/models.py +1000 -0
- truthound/validators/security/redos/ml/predictor.py +507 -0
- truthound/validators/security/redos/ml/storage.py +632 -0
- truthound/validators/security/redos/ml/training.py +571 -0
- truthound/validators/security/redos/ml_analyzer.py +937 -0
- truthound/validators/security/redos/optimizer.py +674 -0
- truthound/validators/security/redos/profiler.py +682 -0
- truthound/validators/security/redos/re2_engine.py +709 -0
- truthound/validators/security/redos.py +886 -0
- truthound/validators/security/sql_security.py +1247 -0
- truthound/validators/streaming/__init__.py +126 -0
- truthound/validators/streaming/base.py +292 -0
- truthound/validators/streaming/completeness.py +210 -0
- truthound/validators/streaming/mixin.py +575 -0
- truthound/validators/streaming/range.py +308 -0
- truthound/validators/streaming/sources.py +846 -0
- truthound/validators/string/__init__.py +57 -0
- truthound/validators/string/casing.py +158 -0
- truthound/validators/string/charset.py +96 -0
- truthound/validators/string/format.py +501 -0
- truthound/validators/string/json.py +77 -0
- truthound/validators/string/json_schema.py +184 -0
- truthound/validators/string/length.py +104 -0
- truthound/validators/string/like_pattern.py +237 -0
- truthound/validators/string/regex.py +202 -0
- truthound/validators/string/regex_extended.py +435 -0
- truthound/validators/table/__init__.py +88 -0
- truthound/validators/table/base.py +78 -0
- truthound/validators/table/column_count.py +198 -0
- truthound/validators/table/freshness.py +362 -0
- truthound/validators/table/row_count.py +251 -0
- truthound/validators/table/schema.py +333 -0
- truthound/validators/table/size.py +285 -0
- truthound/validators/timeout/__init__.py +102 -0
- truthound/validators/timeout/advanced/__init__.py +247 -0
- truthound/validators/timeout/advanced/circuit_breaker.py +675 -0
- truthound/validators/timeout/advanced/prediction.py +773 -0
- truthound/validators/timeout/advanced/priority.py +618 -0
- truthound/validators/timeout/advanced/redis_backend.py +770 -0
- truthound/validators/timeout/advanced/retry.py +721 -0
- truthound/validators/timeout/advanced/sampling.py +788 -0
- truthound/validators/timeout/advanced/sla.py +661 -0
- truthound/validators/timeout/advanced/telemetry.py +804 -0
- truthound/validators/timeout/cascade.py +477 -0
- truthound/validators/timeout/deadline.py +657 -0
- truthound/validators/timeout/degradation.py +525 -0
- truthound/validators/timeout/distributed.py +597 -0
- truthound/validators/timeseries/__init__.py +89 -0
- truthound/validators/timeseries/base.py +326 -0
- truthound/validators/timeseries/completeness.py +617 -0
- truthound/validators/timeseries/gap.py +485 -0
- truthound/validators/timeseries/monotonic.py +310 -0
- truthound/validators/timeseries/seasonality.py +422 -0
- truthound/validators/timeseries/trend.py +510 -0
- truthound/validators/uniqueness/__init__.py +59 -0
- truthound/validators/uniqueness/approximate.py +475 -0
- truthound/validators/uniqueness/distinct_values.py +253 -0
- truthound/validators/uniqueness/duplicate.py +118 -0
- truthound/validators/uniqueness/primary_key.py +140 -0
- truthound/validators/uniqueness/unique.py +191 -0
- truthound/validators/uniqueness/within_record.py +599 -0
- truthound/validators/utils.py +756 -0
- truthound-1.0.8.dist-info/METADATA +474 -0
- truthound-1.0.8.dist-info/RECORD +877 -0
- truthound-1.0.8.dist-info/WHEEL +4 -0
- truthound-1.0.8.dist-info/entry_points.txt +2 -0
- truthound-1.0.8.dist-info/licenses/LICENSE +190 -0
truthound/ml/base.py
ADDED
|
@@ -0,0 +1,1178 @@
|
|
|
1
|
+
"""Base classes and core abstractions for ML-based validation.
|
|
2
|
+
|
|
3
|
+
This module provides the foundational abstractions for the ML system:
|
|
4
|
+
- MLModel: Abstract base class for all ML models
|
|
5
|
+
- ModelRegistry: Dynamic registration and management of ML models
|
|
6
|
+
- AnomalyDetector: Base class for anomaly detection models
|
|
7
|
+
- DriftDetector: Base class for drift detection models
|
|
8
|
+
- RuleLearner: Base class for rule learning models
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
from abc import ABC, abstractmethod
|
|
14
|
+
from dataclasses import dataclass, field
|
|
15
|
+
from datetime import datetime
|
|
16
|
+
from enum import Enum
|
|
17
|
+
from pathlib import Path
|
|
18
|
+
from typing import (
|
|
19
|
+
TYPE_CHECKING,
|
|
20
|
+
Any,
|
|
21
|
+
Callable,
|
|
22
|
+
Generic,
|
|
23
|
+
Iterator,
|
|
24
|
+
Protocol,
|
|
25
|
+
TypeVar,
|
|
26
|
+
runtime_checkable,
|
|
27
|
+
)
|
|
28
|
+
import threading
|
|
29
|
+
import json
|
|
30
|
+
|
|
31
|
+
import polars as pl
|
|
32
|
+
|
|
33
|
+
if TYPE_CHECKING:
|
|
34
|
+
from truthound.validators.base import ValidationIssue
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
# =============================================================================
|
|
38
|
+
# Enums
|
|
39
|
+
# =============================================================================
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
class ModelType(str, Enum):
|
|
43
|
+
"""Types of ML models supported."""
|
|
44
|
+
|
|
45
|
+
ANOMALY_DETECTOR = "anomaly_detector"
|
|
46
|
+
DRIFT_DETECTOR = "drift_detector"
|
|
47
|
+
RULE_LEARNER = "rule_learner"
|
|
48
|
+
CLASSIFIER = "classifier"
|
|
49
|
+
REGRESSOR = "regressor"
|
|
50
|
+
CUSTOM = "custom"
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
class ModelState(str, Enum):
|
|
54
|
+
"""Lifecycle states for ML models."""
|
|
55
|
+
|
|
56
|
+
UNTRAINED = "untrained"
|
|
57
|
+
TRAINING = "training"
|
|
58
|
+
TRAINED = "trained"
|
|
59
|
+
VALIDATING = "validating"
|
|
60
|
+
READY = "ready"
|
|
61
|
+
ERROR = "error"
|
|
62
|
+
DEPRECATED = "deprecated"
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
class AnomalyType(str, Enum):
|
|
66
|
+
"""Types of anomalies detected."""
|
|
67
|
+
|
|
68
|
+
POINT = "point" # Single data point anomaly
|
|
69
|
+
CONTEXTUAL = "contextual" # Anomaly in context
|
|
70
|
+
COLLECTIVE = "collective" # Group of data points
|
|
71
|
+
PATTERN = "pattern" # Pattern-based anomaly
|
|
72
|
+
TREND = "trend" # Trend deviation
|
|
73
|
+
SEASONAL = "seasonal" # Seasonal pattern violation
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
class SeverityLevel(str, Enum):
|
|
77
|
+
"""Severity levels for ML-detected issues."""
|
|
78
|
+
|
|
79
|
+
INFO = "info"
|
|
80
|
+
LOW = "low"
|
|
81
|
+
MEDIUM = "medium"
|
|
82
|
+
HIGH = "high"
|
|
83
|
+
CRITICAL = "critical"
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
# =============================================================================
|
|
87
|
+
# Exceptions
|
|
88
|
+
# =============================================================================
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
class MLError(Exception):
|
|
92
|
+
"""Base exception for ML-related errors."""
|
|
93
|
+
|
|
94
|
+
def __init__(self, message: str, model_name: str | None = None):
|
|
95
|
+
self.model_name = model_name
|
|
96
|
+
super().__init__(message)
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
class ModelNotTrainedError(MLError):
|
|
100
|
+
"""Raised when trying to use an untrained model."""
|
|
101
|
+
|
|
102
|
+
pass
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
class ModelTrainingError(MLError):
|
|
106
|
+
"""Raised when model training fails."""
|
|
107
|
+
|
|
108
|
+
pass
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
class ModelLoadError(MLError):
|
|
112
|
+
"""Raised when model loading fails."""
|
|
113
|
+
|
|
114
|
+
pass
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
class InsufficientDataError(MLError):
|
|
118
|
+
"""Raised when there's not enough data for ML operations."""
|
|
119
|
+
|
|
120
|
+
pass
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
# =============================================================================
|
|
124
|
+
# Configuration Classes
|
|
125
|
+
# =============================================================================
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
@dataclass
|
|
129
|
+
class MLConfig:
|
|
130
|
+
"""Base configuration for ML operations.
|
|
131
|
+
|
|
132
|
+
Attributes:
|
|
133
|
+
sample_size: Maximum samples to use for training
|
|
134
|
+
random_seed: Random seed for reproducibility
|
|
135
|
+
n_jobs: Number of parallel jobs (-1 for all cores)
|
|
136
|
+
cache_predictions: Whether to cache prediction results
|
|
137
|
+
verbose: Verbosity level (0=silent, 1=progress, 2=debug)
|
|
138
|
+
"""
|
|
139
|
+
|
|
140
|
+
sample_size: int | None = None
|
|
141
|
+
random_seed: int = 42
|
|
142
|
+
n_jobs: int = 1
|
|
143
|
+
cache_predictions: bool = True
|
|
144
|
+
verbose: int = 0
|
|
145
|
+
metadata: dict[str, Any] = field(default_factory=dict)
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
@dataclass
|
|
149
|
+
class AnomalyConfig(MLConfig):
|
|
150
|
+
"""Configuration for anomaly detection.
|
|
151
|
+
|
|
152
|
+
Attributes:
|
|
153
|
+
contamination: Expected proportion of outliers (0.0 to 0.5)
|
|
154
|
+
sensitivity: Detection sensitivity (0.0 to 1.0)
|
|
155
|
+
min_samples: Minimum samples required for detection
|
|
156
|
+
window_size: Window size for temporal anomaly detection
|
|
157
|
+
columns: Specific columns to analyze (None for all numeric columns)
|
|
158
|
+
"""
|
|
159
|
+
|
|
160
|
+
contamination: float = 0.1
|
|
161
|
+
sensitivity: float = 0.5
|
|
162
|
+
min_samples: int = 100
|
|
163
|
+
window_size: int | None = None
|
|
164
|
+
score_threshold: float | None = None
|
|
165
|
+
columns: list[str] | None = None
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
@dataclass
|
|
169
|
+
class DriftConfig(MLConfig):
|
|
170
|
+
"""Configuration for drift detection.
|
|
171
|
+
|
|
172
|
+
Attributes:
|
|
173
|
+
reference_window: Size of reference window
|
|
174
|
+
detection_window: Size of detection window
|
|
175
|
+
threshold: Drift detection threshold
|
|
176
|
+
min_samples_per_window: Minimum samples per window
|
|
177
|
+
"""
|
|
178
|
+
|
|
179
|
+
reference_window: int = 1000
|
|
180
|
+
detection_window: int = 100
|
|
181
|
+
threshold: float = 0.05
|
|
182
|
+
min_samples_per_window: int = 30
|
|
183
|
+
detect_gradual: bool = True
|
|
184
|
+
detect_sudden: bool = True
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
@dataclass
|
|
188
|
+
class RuleLearningConfig(MLConfig):
|
|
189
|
+
"""Configuration for rule learning.
|
|
190
|
+
|
|
191
|
+
Attributes:
|
|
192
|
+
min_support: Minimum support for rules
|
|
193
|
+
min_confidence: Minimum confidence for rules
|
|
194
|
+
max_rules: Maximum number of rules to generate
|
|
195
|
+
max_antecedent_length: Maximum length of rule antecedent
|
|
196
|
+
"""
|
|
197
|
+
|
|
198
|
+
min_support: float = 0.1
|
|
199
|
+
min_confidence: float = 0.8
|
|
200
|
+
max_rules: int = 100
|
|
201
|
+
max_antecedent_length: int = 3
|
|
202
|
+
include_negations: bool = False
|
|
203
|
+
|
|
204
|
+
|
|
205
|
+
ConfigT = TypeVar("ConfigT", bound=MLConfig)
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
# =============================================================================
|
|
209
|
+
# Result Classes
|
|
210
|
+
# =============================================================================
|
|
211
|
+
|
|
212
|
+
|
|
213
|
+
@dataclass(frozen=True)
|
|
214
|
+
class AnomalyScore:
|
|
215
|
+
"""Score for a single data point or window.
|
|
216
|
+
|
|
217
|
+
Attributes:
|
|
218
|
+
index: Index or identifier of the data point
|
|
219
|
+
score: Anomaly score (higher = more anomalous)
|
|
220
|
+
is_anomaly: Whether classified as anomaly
|
|
221
|
+
anomaly_type: Type of anomaly detected
|
|
222
|
+
confidence: Confidence in the classification
|
|
223
|
+
contributing_features: Features contributing to the anomaly
|
|
224
|
+
"""
|
|
225
|
+
|
|
226
|
+
index: int | str
|
|
227
|
+
score: float
|
|
228
|
+
is_anomaly: bool
|
|
229
|
+
anomaly_type: AnomalyType = AnomalyType.POINT
|
|
230
|
+
confidence: float = 1.0
|
|
231
|
+
contributing_features: tuple[str, ...] = field(default_factory=tuple)
|
|
232
|
+
details: str | None = None
|
|
233
|
+
|
|
234
|
+
def to_dict(self) -> dict[str, Any]:
|
|
235
|
+
return {
|
|
236
|
+
"index": self.index,
|
|
237
|
+
"score": round(self.score, 6),
|
|
238
|
+
"is_anomaly": self.is_anomaly,
|
|
239
|
+
"anomaly_type": self.anomaly_type.value,
|
|
240
|
+
"confidence": round(self.confidence, 4),
|
|
241
|
+
"contributing_features": list(self.contributing_features),
|
|
242
|
+
"details": self.details,
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
|
|
246
|
+
@dataclass(frozen=True)
|
|
247
|
+
class AnomalyResult:
|
|
248
|
+
"""Complete result of anomaly detection.
|
|
249
|
+
|
|
250
|
+
Attributes:
|
|
251
|
+
scores: Individual anomaly scores
|
|
252
|
+
anomaly_count: Total number of anomalies detected
|
|
253
|
+
anomaly_ratio: Ratio of anomalies to total points
|
|
254
|
+
model_name: Name of the model used
|
|
255
|
+
detection_time_ms: Time taken for detection in milliseconds
|
|
256
|
+
"""
|
|
257
|
+
|
|
258
|
+
scores: tuple[AnomalyScore, ...] = field(default_factory=tuple)
|
|
259
|
+
anomaly_count: int = 0
|
|
260
|
+
anomaly_ratio: float = 0.0
|
|
261
|
+
total_points: int = 0
|
|
262
|
+
model_name: str = ""
|
|
263
|
+
detection_time_ms: float = 0.0
|
|
264
|
+
threshold_used: float | None = None
|
|
265
|
+
detected_at: datetime = field(default_factory=datetime.now)
|
|
266
|
+
|
|
267
|
+
def __iter__(self) -> Iterator[AnomalyScore]:
|
|
268
|
+
return iter(self.scores)
|
|
269
|
+
|
|
270
|
+
def get_anomalies(self) -> tuple[AnomalyScore, ...]:
|
|
271
|
+
"""Get only the anomalous scores."""
|
|
272
|
+
return tuple(s for s in self.scores if s.is_anomaly)
|
|
273
|
+
|
|
274
|
+
def to_dict(self) -> dict[str, Any]:
|
|
275
|
+
return {
|
|
276
|
+
"anomaly_count": self.anomaly_count,
|
|
277
|
+
"anomaly_ratio": round(self.anomaly_ratio, 4),
|
|
278
|
+
"total_points": self.total_points,
|
|
279
|
+
"model_name": self.model_name,
|
|
280
|
+
"detection_time_ms": round(self.detection_time_ms, 2),
|
|
281
|
+
"threshold_used": self.threshold_used,
|
|
282
|
+
"detected_at": self.detected_at.isoformat(),
|
|
283
|
+
"anomalies": [s.to_dict() for s in self.get_anomalies()],
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
|
|
287
|
+
@dataclass(frozen=True)
|
|
288
|
+
class DriftResult:
|
|
289
|
+
"""Result of drift detection analysis.
|
|
290
|
+
|
|
291
|
+
Attributes:
|
|
292
|
+
is_drifted: Whether drift was detected
|
|
293
|
+
drift_score: Overall drift score
|
|
294
|
+
column_scores: Per-column drift scores
|
|
295
|
+
drift_type: Type of drift (gradual, sudden, etc.)
|
|
296
|
+
"""
|
|
297
|
+
|
|
298
|
+
is_drifted: bool = False
|
|
299
|
+
drift_score: float = 0.0
|
|
300
|
+
column_scores: tuple[tuple[str, float], ...] = field(default_factory=tuple)
|
|
301
|
+
drift_type: str = "none"
|
|
302
|
+
p_value: float | None = None
|
|
303
|
+
confidence: float = 1.0
|
|
304
|
+
details: str | None = None
|
|
305
|
+
detected_at: datetime = field(default_factory=datetime.now)
|
|
306
|
+
|
|
307
|
+
def get_drifted_columns(self, threshold: float = 0.5) -> list[str]:
|
|
308
|
+
"""Get columns with drift score above threshold."""
|
|
309
|
+
return [col for col, score in self.column_scores if score >= threshold]
|
|
310
|
+
|
|
311
|
+
def to_dict(self) -> dict[str, Any]:
|
|
312
|
+
return {
|
|
313
|
+
"is_drifted": self.is_drifted,
|
|
314
|
+
"drift_score": round(self.drift_score, 6),
|
|
315
|
+
"drift_type": self.drift_type,
|
|
316
|
+
"p_value": round(self.p_value, 6) if self.p_value else None,
|
|
317
|
+
"confidence": round(self.confidence, 4),
|
|
318
|
+
"column_scores": {col: round(score, 6) for col, score in self.column_scores},
|
|
319
|
+
"details": self.details,
|
|
320
|
+
"detected_at": self.detected_at.isoformat(),
|
|
321
|
+
}
|
|
322
|
+
|
|
323
|
+
|
|
324
|
+
@dataclass(frozen=True)
|
|
325
|
+
class LearnedRule:
|
|
326
|
+
"""A validation rule learned from data.
|
|
327
|
+
|
|
328
|
+
Attributes:
|
|
329
|
+
name: Rule name/identifier
|
|
330
|
+
rule_type: Type of rule (e.g., 'range', 'pattern', 'constraint')
|
|
331
|
+
column: Target column(s)
|
|
332
|
+
condition: Rule condition expression
|
|
333
|
+
support: Proportion of data supporting the rule
|
|
334
|
+
confidence: Rule confidence
|
|
335
|
+
"""
|
|
336
|
+
|
|
337
|
+
name: str
|
|
338
|
+
rule_type: str
|
|
339
|
+
column: str | tuple[str, ...]
|
|
340
|
+
condition: str
|
|
341
|
+
support: float
|
|
342
|
+
confidence: float
|
|
343
|
+
validator_config: dict[str, Any] = field(default_factory=dict)
|
|
344
|
+
description: str = ""
|
|
345
|
+
|
|
346
|
+
def to_dict(self) -> dict[str, Any]:
|
|
347
|
+
return {
|
|
348
|
+
"name": self.name,
|
|
349
|
+
"rule_type": self.rule_type,
|
|
350
|
+
"column": self.column if isinstance(self.column, str) else list(self.column),
|
|
351
|
+
"condition": self.condition,
|
|
352
|
+
"support": round(self.support, 4),
|
|
353
|
+
"confidence": round(self.confidence, 4),
|
|
354
|
+
"validator_config": self.validator_config,
|
|
355
|
+
"description": self.description,
|
|
356
|
+
}
|
|
357
|
+
|
|
358
|
+
def to_validator_spec(self) -> dict[str, Any]:
|
|
359
|
+
"""Convert to validator specification for use with Truthound."""
|
|
360
|
+
return {
|
|
361
|
+
"type": self.rule_type,
|
|
362
|
+
"columns": [self.column] if isinstance(self.column, str) else list(self.column),
|
|
363
|
+
**self.validator_config,
|
|
364
|
+
}
|
|
365
|
+
|
|
366
|
+
|
|
367
|
+
@dataclass(frozen=True)
|
|
368
|
+
class RuleLearningResult:
|
|
369
|
+
"""Result of rule learning process.
|
|
370
|
+
|
|
371
|
+
Attributes:
|
|
372
|
+
rules: Learned validation rules
|
|
373
|
+
data_profile: Profile of the data used for learning
|
|
374
|
+
learning_time_ms: Time taken for learning
|
|
375
|
+
"""
|
|
376
|
+
|
|
377
|
+
rules: tuple[LearnedRule, ...] = field(default_factory=tuple)
|
|
378
|
+
total_rules: int = 0
|
|
379
|
+
filtered_rules: int = 0 # Rules filtered by min_confidence/support
|
|
380
|
+
learning_time_ms: float = 0.0
|
|
381
|
+
data_profile: dict[str, Any] = field(default_factory=dict)
|
|
382
|
+
learned_at: datetime = field(default_factory=datetime.now)
|
|
383
|
+
|
|
384
|
+
def __iter__(self) -> Iterator[LearnedRule]:
|
|
385
|
+
return iter(self.rules)
|
|
386
|
+
|
|
387
|
+
def get_rules_by_type(self, rule_type: str) -> list[LearnedRule]:
|
|
388
|
+
"""Get rules of a specific type."""
|
|
389
|
+
return [r for r in self.rules if r.rule_type == rule_type]
|
|
390
|
+
|
|
391
|
+
def get_rules_for_column(self, column: str) -> list[LearnedRule]:
|
|
392
|
+
"""Get rules for a specific column."""
|
|
393
|
+
return [r for r in self.rules if column in (
|
|
394
|
+
[r.column] if isinstance(r.column, str) else list(r.column)
|
|
395
|
+
)]
|
|
396
|
+
|
|
397
|
+
def to_validation_suite(self) -> dict[str, Any]:
|
|
398
|
+
"""Convert to validation suite format."""
|
|
399
|
+
return {
|
|
400
|
+
"validators": [r.to_validator_spec() for r in self.rules],
|
|
401
|
+
"generated_at": self.learned_at.isoformat(),
|
|
402
|
+
"total_rules": self.total_rules,
|
|
403
|
+
}
|
|
404
|
+
|
|
405
|
+
def to_dict(self) -> dict[str, Any]:
|
|
406
|
+
return {
|
|
407
|
+
"total_rules": self.total_rules,
|
|
408
|
+
"filtered_rules": self.filtered_rules,
|
|
409
|
+
"learning_time_ms": round(self.learning_time_ms, 2),
|
|
410
|
+
"rules": [r.to_dict() for r in self.rules],
|
|
411
|
+
"data_profile": self.data_profile,
|
|
412
|
+
"learned_at": self.learned_at.isoformat(),
|
|
413
|
+
}
|
|
414
|
+
|
|
415
|
+
|
|
416
|
+
# =============================================================================
|
|
417
|
+
# Model Metadata
|
|
418
|
+
# =============================================================================
|
|
419
|
+
|
|
420
|
+
|
|
421
|
+
@dataclass(frozen=True)
|
|
422
|
+
class ModelInfo:
|
|
423
|
+
"""Metadata about an ML model.
|
|
424
|
+
|
|
425
|
+
Attributes:
|
|
426
|
+
name: Unique model identifier
|
|
427
|
+
version: Model version
|
|
428
|
+
model_type: Type of model
|
|
429
|
+
description: Human-readable description
|
|
430
|
+
author: Model author
|
|
431
|
+
created_at: Creation timestamp
|
|
432
|
+
input_schema: Expected input schema
|
|
433
|
+
output_schema: Output schema
|
|
434
|
+
"""
|
|
435
|
+
|
|
436
|
+
name: str
|
|
437
|
+
version: str
|
|
438
|
+
model_type: ModelType
|
|
439
|
+
description: str = ""
|
|
440
|
+
author: str = ""
|
|
441
|
+
created_at: datetime = field(default_factory=datetime.now)
|
|
442
|
+
input_features: tuple[str, ...] = field(default_factory=tuple)
|
|
443
|
+
supports_incremental: bool = False
|
|
444
|
+
supports_online_learning: bool = False
|
|
445
|
+
min_samples_required: int = 10
|
|
446
|
+
tags: tuple[str, ...] = field(default_factory=tuple)
|
|
447
|
+
|
|
448
|
+
def to_dict(self) -> dict[str, Any]:
|
|
449
|
+
return {
|
|
450
|
+
"name": self.name,
|
|
451
|
+
"version": self.version,
|
|
452
|
+
"model_type": self.model_type.value,
|
|
453
|
+
"description": self.description,
|
|
454
|
+
"author": self.author,
|
|
455
|
+
"created_at": self.created_at.isoformat(),
|
|
456
|
+
"input_features": list(self.input_features),
|
|
457
|
+
"supports_incremental": self.supports_incremental,
|
|
458
|
+
"supports_online_learning": self.supports_online_learning,
|
|
459
|
+
"min_samples_required": self.min_samples_required,
|
|
460
|
+
"tags": list(self.tags),
|
|
461
|
+
}
|
|
462
|
+
|
|
463
|
+
|
|
464
|
+
# =============================================================================
|
|
465
|
+
# Base ML Model Class
|
|
466
|
+
# =============================================================================
|
|
467
|
+
|
|
468
|
+
|
|
469
|
+
class MLModel(ABC, Generic[ConfigT]):
|
|
470
|
+
"""Abstract base class for all ML models.
|
|
471
|
+
|
|
472
|
+
This provides the foundational interface for ML models in Truthound.
|
|
473
|
+
Subclasses must implement fit() and predict() methods.
|
|
474
|
+
|
|
475
|
+
Example:
|
|
476
|
+
class MyAnomalyModel(MLModel[AnomalyConfig]):
|
|
477
|
+
@property
|
|
478
|
+
def info(self) -> ModelInfo:
|
|
479
|
+
return ModelInfo(
|
|
480
|
+
name="my-anomaly",
|
|
481
|
+
version="1.0.0",
|
|
482
|
+
model_type=ModelType.ANOMALY_DETECTOR,
|
|
483
|
+
)
|
|
484
|
+
|
|
485
|
+
def fit(self, data: pl.LazyFrame) -> None:
|
|
486
|
+
# Train the model
|
|
487
|
+
...
|
|
488
|
+
|
|
489
|
+
def predict(self, data: pl.LazyFrame) -> Any:
|
|
490
|
+
# Make predictions
|
|
491
|
+
...
|
|
492
|
+
"""
|
|
493
|
+
|
|
494
|
+
def __init__(self, config: ConfigT | None = None, **kwargs: Any):
|
|
495
|
+
"""Initialize the model.
|
|
496
|
+
|
|
497
|
+
Args:
|
|
498
|
+
config: Model configuration
|
|
499
|
+
**kwargs: Additional parameters that override config
|
|
500
|
+
"""
|
|
501
|
+
self._config: ConfigT = config or self._default_config() # type: ignore
|
|
502
|
+
self._state: ModelState = ModelState.UNTRAINED
|
|
503
|
+
self._error: Exception | None = None
|
|
504
|
+
self._trained_at: datetime | None = None
|
|
505
|
+
self._training_samples: int = 0
|
|
506
|
+
self._lock = threading.RLock()
|
|
507
|
+
|
|
508
|
+
# Apply kwargs overrides
|
|
509
|
+
for key, value in kwargs.items():
|
|
510
|
+
if hasattr(self._config, key):
|
|
511
|
+
object.__setattr__(self._config, key, value)
|
|
512
|
+
|
|
513
|
+
@property
|
|
514
|
+
@abstractmethod
|
|
515
|
+
def info(self) -> ModelInfo:
|
|
516
|
+
"""Return model metadata.
|
|
517
|
+
|
|
518
|
+
Returns:
|
|
519
|
+
ModelInfo with model name, version, type, etc.
|
|
520
|
+
"""
|
|
521
|
+
...
|
|
522
|
+
|
|
523
|
+
@abstractmethod
|
|
524
|
+
def fit(self, data: pl.LazyFrame) -> None:
|
|
525
|
+
"""Train the model on data.
|
|
526
|
+
|
|
527
|
+
Args:
|
|
528
|
+
data: Training data as LazyFrame
|
|
529
|
+
|
|
530
|
+
Raises:
|
|
531
|
+
ModelTrainingError: If training fails
|
|
532
|
+
InsufficientDataError: If not enough data
|
|
533
|
+
"""
|
|
534
|
+
...
|
|
535
|
+
|
|
536
|
+
@abstractmethod
|
|
537
|
+
def predict(self, data: pl.LazyFrame) -> Any:
|
|
538
|
+
"""Make predictions on new data.
|
|
539
|
+
|
|
540
|
+
Args:
|
|
541
|
+
data: Data to predict on
|
|
542
|
+
|
|
543
|
+
Returns:
|
|
544
|
+
Predictions (type depends on model)
|
|
545
|
+
|
|
546
|
+
Raises:
|
|
547
|
+
ModelNotTrainedError: If model not trained
|
|
548
|
+
"""
|
|
549
|
+
...
|
|
550
|
+
|
|
551
|
+
def fit_predict(self, data: pl.LazyFrame) -> Any:
|
|
552
|
+
"""Train and predict in one step.
|
|
553
|
+
|
|
554
|
+
Args:
|
|
555
|
+
data: Data to train on and predict
|
|
556
|
+
|
|
557
|
+
Returns:
|
|
558
|
+
Predictions on the training data
|
|
559
|
+
"""
|
|
560
|
+
self.fit(data)
|
|
561
|
+
return self.predict(data)
|
|
562
|
+
|
|
563
|
+
def partial_fit(self, data: pl.LazyFrame) -> None:
|
|
564
|
+
"""Incrementally update the model with new data.
|
|
565
|
+
|
|
566
|
+
Override this for models that support online learning.
|
|
567
|
+
|
|
568
|
+
Args:
|
|
569
|
+
data: New data to learn from
|
|
570
|
+
|
|
571
|
+
Raises:
|
|
572
|
+
NotImplementedError: If not supported
|
|
573
|
+
"""
|
|
574
|
+
if not self.info.supports_online_learning:
|
|
575
|
+
raise NotImplementedError(
|
|
576
|
+
f"{self.info.name} does not support online learning"
|
|
577
|
+
)
|
|
578
|
+
# Default implementation: just refit
|
|
579
|
+
self.fit(data)
|
|
580
|
+
|
|
581
|
+
def save(self, path: str | Path) -> None:
|
|
582
|
+
"""Save the model to disk.
|
|
583
|
+
|
|
584
|
+
Args:
|
|
585
|
+
path: Path to save the model
|
|
586
|
+
|
|
587
|
+
Raises:
|
|
588
|
+
ModelNotTrainedError: If model not trained
|
|
589
|
+
"""
|
|
590
|
+
if self._state not in (ModelState.TRAINED, ModelState.READY):
|
|
591
|
+
raise ModelNotTrainedError(
|
|
592
|
+
"Cannot save untrained model",
|
|
593
|
+
model_name=self.info.name,
|
|
594
|
+
)
|
|
595
|
+
|
|
596
|
+
path = Path(path)
|
|
597
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
598
|
+
|
|
599
|
+
model_data = self._serialize()
|
|
600
|
+
with open(path, "w") as f:
|
|
601
|
+
json.dump(model_data, f, indent=2, default=str)
|
|
602
|
+
|
|
603
|
+
def load(self, path: str | Path) -> None:
|
|
604
|
+
"""Load the model from disk.
|
|
605
|
+
|
|
606
|
+
Args:
|
|
607
|
+
path: Path to load the model from
|
|
608
|
+
|
|
609
|
+
Raises:
|
|
610
|
+
ModelLoadError: If loading fails
|
|
611
|
+
"""
|
|
612
|
+
path = Path(path)
|
|
613
|
+
if not path.exists():
|
|
614
|
+
raise ModelLoadError(
|
|
615
|
+
f"Model file not found: {path}",
|
|
616
|
+
model_name=self.info.name,
|
|
617
|
+
)
|
|
618
|
+
|
|
619
|
+
try:
|
|
620
|
+
with open(path) as f:
|
|
621
|
+
model_data = json.load(f)
|
|
622
|
+
self._deserialize(model_data)
|
|
623
|
+
self._state = ModelState.READY
|
|
624
|
+
except Exception as e:
|
|
625
|
+
self._state = ModelState.ERROR
|
|
626
|
+
self._error = e
|
|
627
|
+
raise ModelLoadError(
|
|
628
|
+
f"Failed to load model: {e}",
|
|
629
|
+
model_name=self.info.name,
|
|
630
|
+
) from e
|
|
631
|
+
|
|
632
|
+
def _serialize(self) -> dict[str, Any]:
|
|
633
|
+
"""Serialize model state for saving.
|
|
634
|
+
|
|
635
|
+
Override in subclasses to save model-specific state.
|
|
636
|
+
"""
|
|
637
|
+
return {
|
|
638
|
+
"info": self.info.to_dict(),
|
|
639
|
+
"state": self._state.value,
|
|
640
|
+
"trained_at": self._trained_at.isoformat() if self._trained_at else None,
|
|
641
|
+
"training_samples": self._training_samples,
|
|
642
|
+
}
|
|
643
|
+
|
|
644
|
+
def _deserialize(self, data: dict[str, Any]) -> None:
|
|
645
|
+
"""Deserialize model state from saved data.
|
|
646
|
+
|
|
647
|
+
Override in subclasses to restore model-specific state.
|
|
648
|
+
"""
|
|
649
|
+
self._trained_at = (
|
|
650
|
+
datetime.fromisoformat(data["trained_at"])
|
|
651
|
+
if data.get("trained_at")
|
|
652
|
+
else None
|
|
653
|
+
)
|
|
654
|
+
self._training_samples = data.get("training_samples", 0)
|
|
655
|
+
|
|
656
|
+
def _default_config(self) -> MLConfig:
|
|
657
|
+
"""Return default configuration.
|
|
658
|
+
|
|
659
|
+
Override in subclasses with specific config types.
|
|
660
|
+
"""
|
|
661
|
+
return MLConfig()
|
|
662
|
+
|
|
663
|
+
def _validate_data(self, data: pl.LazyFrame, min_samples: int | None = None) -> int:
|
|
664
|
+
"""Validate input data and return row count.
|
|
665
|
+
|
|
666
|
+
Args:
|
|
667
|
+
data: Data to validate
|
|
668
|
+
min_samples: Minimum required samples
|
|
669
|
+
|
|
670
|
+
Returns:
|
|
671
|
+
Number of rows in data
|
|
672
|
+
|
|
673
|
+
Raises:
|
|
674
|
+
InsufficientDataError: If not enough data
|
|
675
|
+
"""
|
|
676
|
+
row_count = data.select(pl.len()).collect().item()
|
|
677
|
+
min_required = min_samples or self.info.min_samples_required
|
|
678
|
+
|
|
679
|
+
if row_count < min_required:
|
|
680
|
+
raise InsufficientDataError(
|
|
681
|
+
f"Need at least {min_required} samples, got {row_count}",
|
|
682
|
+
model_name=self.info.name,
|
|
683
|
+
)
|
|
684
|
+
|
|
685
|
+
return row_count
|
|
686
|
+
|
|
687
|
+
def _maybe_sample(self, data: pl.LazyFrame) -> pl.LazyFrame:
|
|
688
|
+
"""Apply sampling if configured."""
|
|
689
|
+
if self._config.sample_size is not None:
|
|
690
|
+
return data.head(self._config.sample_size)
|
|
691
|
+
return data
|
|
692
|
+
|
|
693
|
+
@property
|
|
694
|
+
def config(self) -> ConfigT:
|
|
695
|
+
"""Get model configuration."""
|
|
696
|
+
return self._config
|
|
697
|
+
|
|
698
|
+
@property
|
|
699
|
+
def state(self) -> ModelState:
|
|
700
|
+
"""Get current model state."""
|
|
701
|
+
return self._state
|
|
702
|
+
|
|
703
|
+
@property
|
|
704
|
+
def is_trained(self) -> bool:
|
|
705
|
+
"""Check if model is trained and ready."""
|
|
706
|
+
return self._state in (ModelState.TRAINED, ModelState.READY)
|
|
707
|
+
|
|
708
|
+
@property
|
|
709
|
+
def error(self) -> Exception | None:
|
|
710
|
+
"""Get error if model is in error state."""
|
|
711
|
+
return self._error
|
|
712
|
+
|
|
713
|
+
@property
|
|
714
|
+
def training_info(self) -> dict[str, Any]:
|
|
715
|
+
"""Get training information."""
|
|
716
|
+
return {
|
|
717
|
+
"trained_at": self._trained_at.isoformat() if self._trained_at else None,
|
|
718
|
+
"training_samples": self._training_samples,
|
|
719
|
+
"state": self._state.value,
|
|
720
|
+
}
|
|
721
|
+
|
|
722
|
+
def __repr__(self) -> str:
|
|
723
|
+
return (
|
|
724
|
+
f"<{self.__class__.__name__} "
|
|
725
|
+
f"name={self.info.name!r} "
|
|
726
|
+
f"state={self.state.value!r}>"
|
|
727
|
+
)
|
|
728
|
+
|
|
729
|
+
|
|
730
|
+
# =============================================================================
|
|
731
|
+
# Specialized Base Classes
|
|
732
|
+
# =============================================================================
|
|
733
|
+
|
|
734
|
+
|
|
735
|
+
class AnomalyDetector(MLModel[AnomalyConfig]):
|
|
736
|
+
"""Abstract base class for anomaly detection models.
|
|
737
|
+
|
|
738
|
+
Provides specialized interface for anomaly detection including
|
|
739
|
+
score computation and threshold-based classification.
|
|
740
|
+
"""
|
|
741
|
+
|
|
742
|
+
@property
|
|
743
|
+
def info(self) -> ModelInfo:
|
|
744
|
+
return ModelInfo(
|
|
745
|
+
name=self._get_model_name(),
|
|
746
|
+
version=self._get_model_version(),
|
|
747
|
+
model_type=ModelType.ANOMALY_DETECTOR,
|
|
748
|
+
description=self._get_description(),
|
|
749
|
+
)
|
|
750
|
+
|
|
751
|
+
def _get_model_name(self) -> str:
|
|
752
|
+
"""Override to provide model name."""
|
|
753
|
+
return self.__class__.__name__.lower().replace("detector", "")
|
|
754
|
+
|
|
755
|
+
def _get_model_version(self) -> str:
|
|
756
|
+
"""Override to provide model version."""
|
|
757
|
+
return "1.0.0"
|
|
758
|
+
|
|
759
|
+
def _get_description(self) -> str:
|
|
760
|
+
"""Override to provide description."""
|
|
761
|
+
return self.__class__.__doc__ or ""
|
|
762
|
+
|
|
763
|
+
@abstractmethod
|
|
764
|
+
def score(self, data: pl.LazyFrame) -> pl.Series:
|
|
765
|
+
"""Compute anomaly scores for data.
|
|
766
|
+
|
|
767
|
+
Args:
|
|
768
|
+
data: Data to score
|
|
769
|
+
|
|
770
|
+
Returns:
|
|
771
|
+
Series of anomaly scores (higher = more anomalous)
|
|
772
|
+
"""
|
|
773
|
+
...
|
|
774
|
+
|
|
775
|
+
def predict(self, data: pl.LazyFrame) -> AnomalyResult:
|
|
776
|
+
"""Detect anomalies in data.
|
|
777
|
+
|
|
778
|
+
Args:
|
|
779
|
+
data: Data to analyze
|
|
780
|
+
|
|
781
|
+
Returns:
|
|
782
|
+
AnomalyResult with detected anomalies
|
|
783
|
+
"""
|
|
784
|
+
import time
|
|
785
|
+
start = time.perf_counter()
|
|
786
|
+
|
|
787
|
+
if not self.is_trained:
|
|
788
|
+
raise ModelNotTrainedError(
|
|
789
|
+
"Model must be trained before prediction",
|
|
790
|
+
model_name=self.info.name,
|
|
791
|
+
)
|
|
792
|
+
|
|
793
|
+
scores = self.score(data)
|
|
794
|
+
threshold = self._get_threshold()
|
|
795
|
+
|
|
796
|
+
anomaly_scores = []
|
|
797
|
+
for idx, score in enumerate(scores.to_list()):
|
|
798
|
+
is_anomaly = score >= threshold
|
|
799
|
+
anomaly_scores.append(
|
|
800
|
+
AnomalyScore(
|
|
801
|
+
index=idx,
|
|
802
|
+
score=score,
|
|
803
|
+
is_anomaly=is_anomaly,
|
|
804
|
+
anomaly_type=AnomalyType.POINT,
|
|
805
|
+
confidence=min(1.0, score / threshold) if threshold > 0 else 1.0,
|
|
806
|
+
)
|
|
807
|
+
)
|
|
808
|
+
|
|
809
|
+
anomaly_count = sum(1 for s in anomaly_scores if s.is_anomaly)
|
|
810
|
+
total_points = len(anomaly_scores)
|
|
811
|
+
|
|
812
|
+
elapsed = (time.perf_counter() - start) * 1000
|
|
813
|
+
|
|
814
|
+
return AnomalyResult(
|
|
815
|
+
scores=tuple(anomaly_scores),
|
|
816
|
+
anomaly_count=anomaly_count,
|
|
817
|
+
anomaly_ratio=anomaly_count / total_points if total_points > 0 else 0.0,
|
|
818
|
+
total_points=total_points,
|
|
819
|
+
model_name=self.info.name,
|
|
820
|
+
detection_time_ms=elapsed,
|
|
821
|
+
threshold_used=threshold,
|
|
822
|
+
)
|
|
823
|
+
|
|
824
|
+
def _get_threshold(self) -> float:
|
|
825
|
+
"""Get the threshold for anomaly classification."""
|
|
826
|
+
if self.config.score_threshold is not None:
|
|
827
|
+
return self.config.score_threshold
|
|
828
|
+
# Default: use contamination to determine threshold
|
|
829
|
+
return 1.0 - self.config.contamination
|
|
830
|
+
|
|
831
|
+
def _default_config(self) -> AnomalyConfig:
|
|
832
|
+
return AnomalyConfig()
|
|
833
|
+
|
|
834
|
+
|
|
835
|
+
class MLDriftDetector(MLModel[DriftConfig]):
|
|
836
|
+
"""Abstract base class for ML-based drift detection.
|
|
837
|
+
|
|
838
|
+
Extends the statistical drift detection in truthound.drift
|
|
839
|
+
with ML-based approaches.
|
|
840
|
+
"""
|
|
841
|
+
|
|
842
|
+
@property
|
|
843
|
+
def info(self) -> ModelInfo:
|
|
844
|
+
return ModelInfo(
|
|
845
|
+
name=self._get_model_name(),
|
|
846
|
+
version=self._get_model_version(),
|
|
847
|
+
model_type=ModelType.DRIFT_DETECTOR,
|
|
848
|
+
description=self._get_description(),
|
|
849
|
+
supports_incremental=True,
|
|
850
|
+
)
|
|
851
|
+
|
|
852
|
+
def _get_model_name(self) -> str:
|
|
853
|
+
return self.__class__.__name__.lower().replace("detector", "")
|
|
854
|
+
|
|
855
|
+
def _get_model_version(self) -> str:
|
|
856
|
+
return "1.0.0"
|
|
857
|
+
|
|
858
|
+
def _get_description(self) -> str:
|
|
859
|
+
return self.__class__.__doc__ or ""
|
|
860
|
+
|
|
861
|
+
@abstractmethod
|
|
862
|
+
def detect(
|
|
863
|
+
self,
|
|
864
|
+
reference: pl.LazyFrame,
|
|
865
|
+
current: pl.LazyFrame,
|
|
866
|
+
columns: list[str] | None = None,
|
|
867
|
+
) -> DriftResult:
|
|
868
|
+
"""Detect drift between reference and current data.
|
|
869
|
+
|
|
870
|
+
Args:
|
|
871
|
+
reference: Reference (baseline) data
|
|
872
|
+
current: Current data to compare
|
|
873
|
+
columns: Specific columns to check (None = all)
|
|
874
|
+
|
|
875
|
+
Returns:
|
|
876
|
+
DriftResult with drift analysis
|
|
877
|
+
"""
|
|
878
|
+
...
|
|
879
|
+
|
|
880
|
+
def predict(self, data: pl.LazyFrame) -> DriftResult:
|
|
881
|
+
"""Predict drift using stored reference data.
|
|
882
|
+
|
|
883
|
+
Requires that fit() was called to store reference data.
|
|
884
|
+
"""
|
|
885
|
+
if not self.is_trained:
|
|
886
|
+
raise ModelNotTrainedError(
|
|
887
|
+
"Model must be trained with reference data first",
|
|
888
|
+
model_name=self.info.name,
|
|
889
|
+
)
|
|
890
|
+
return self.detect(self._reference_data, data)
|
|
891
|
+
|
|
892
|
+
def fit(self, data: pl.LazyFrame) -> None:
|
|
893
|
+
"""Store reference data for drift detection.
|
|
894
|
+
|
|
895
|
+
Args:
|
|
896
|
+
data: Reference data to store
|
|
897
|
+
"""
|
|
898
|
+
import time
|
|
899
|
+
|
|
900
|
+
start = time.perf_counter()
|
|
901
|
+
self._state = ModelState.TRAINING
|
|
902
|
+
|
|
903
|
+
try:
|
|
904
|
+
row_count = self._validate_data(data)
|
|
905
|
+
self._reference_data = self._maybe_sample(data)
|
|
906
|
+
self._training_samples = row_count
|
|
907
|
+
self._trained_at = datetime.now()
|
|
908
|
+
self._state = ModelState.TRAINED
|
|
909
|
+
except Exception as e:
|
|
910
|
+
self._state = ModelState.ERROR
|
|
911
|
+
self._error = e
|
|
912
|
+
raise ModelTrainingError(
|
|
913
|
+
f"Failed to store reference data: {e}",
|
|
914
|
+
model_name=self.info.name,
|
|
915
|
+
) from e
|
|
916
|
+
|
|
917
|
+
def _default_config(self) -> DriftConfig:
|
|
918
|
+
return DriftConfig()
|
|
919
|
+
|
|
920
|
+
|
|
921
|
+
class RuleLearner(MLModel[RuleLearningConfig]):
|
|
922
|
+
"""Abstract base class for rule learning models.
|
|
923
|
+
|
|
924
|
+
Learns validation rules from data characteristics.
|
|
925
|
+
"""
|
|
926
|
+
|
|
927
|
+
@property
|
|
928
|
+
def info(self) -> ModelInfo:
|
|
929
|
+
return ModelInfo(
|
|
930
|
+
name=self._get_model_name(),
|
|
931
|
+
version=self._get_model_version(),
|
|
932
|
+
model_type=ModelType.RULE_LEARNER,
|
|
933
|
+
description=self._get_description(),
|
|
934
|
+
)
|
|
935
|
+
|
|
936
|
+
def _get_model_name(self) -> str:
|
|
937
|
+
return self.__class__.__name__.lower().replace("learner", "")
|
|
938
|
+
|
|
939
|
+
def _get_model_version(self) -> str:
|
|
940
|
+
return "1.0.0"
|
|
941
|
+
|
|
942
|
+
def _get_description(self) -> str:
|
|
943
|
+
return self.__class__.__doc__ or ""
|
|
944
|
+
|
|
945
|
+
@abstractmethod
|
|
946
|
+
def learn_rules(self, data: pl.LazyFrame) -> RuleLearningResult:
|
|
947
|
+
"""Learn validation rules from data.
|
|
948
|
+
|
|
949
|
+
Args:
|
|
950
|
+
data: Data to analyze
|
|
951
|
+
|
|
952
|
+
Returns:
|
|
953
|
+
RuleLearningResult with learned rules
|
|
954
|
+
"""
|
|
955
|
+
...
|
|
956
|
+
|
|
957
|
+
def fit(self, data: pl.LazyFrame) -> None:
|
|
958
|
+
"""Learn rules from data (alias for learn_rules).
|
|
959
|
+
|
|
960
|
+
Args:
|
|
961
|
+
data: Training data
|
|
962
|
+
"""
|
|
963
|
+
import time
|
|
964
|
+
|
|
965
|
+
start = time.perf_counter()
|
|
966
|
+
self._state = ModelState.TRAINING
|
|
967
|
+
|
|
968
|
+
try:
|
|
969
|
+
row_count = self._validate_data(data)
|
|
970
|
+
self._learned_rules = self.learn_rules(data)
|
|
971
|
+
self._training_samples = row_count
|
|
972
|
+
self._trained_at = datetime.now()
|
|
973
|
+
self._state = ModelState.TRAINED
|
|
974
|
+
except Exception as e:
|
|
975
|
+
self._state = ModelState.ERROR
|
|
976
|
+
self._error = e
|
|
977
|
+
raise ModelTrainingError(
|
|
978
|
+
f"Failed to learn rules: {e}",
|
|
979
|
+
model_name=self.info.name,
|
|
980
|
+
) from e
|
|
981
|
+
|
|
982
|
+
def predict(self, data: pl.LazyFrame) -> RuleLearningResult:
|
|
983
|
+
"""Return learned rules (rules don't make predictions per se)."""
|
|
984
|
+
if not self.is_trained:
|
|
985
|
+
raise ModelNotTrainedError(
|
|
986
|
+
"Model must be trained first",
|
|
987
|
+
model_name=self.info.name,
|
|
988
|
+
)
|
|
989
|
+
return self._learned_rules
|
|
990
|
+
|
|
991
|
+
def get_rules(self) -> tuple[LearnedRule, ...]:
|
|
992
|
+
"""Get learned rules."""
|
|
993
|
+
if not self.is_trained:
|
|
994
|
+
return tuple()
|
|
995
|
+
return self._learned_rules.rules
|
|
996
|
+
|
|
997
|
+
def _default_config(self) -> RuleLearningConfig:
|
|
998
|
+
return RuleLearningConfig()
|
|
999
|
+
|
|
1000
|
+
|
|
1001
|
+
# =============================================================================
|
|
1002
|
+
# Model Registry
|
|
1003
|
+
# =============================================================================
|
|
1004
|
+
|
|
1005
|
+
|
|
1006
|
+
class ModelRegistry:
|
|
1007
|
+
"""Registry for ML model registration and discovery.
|
|
1008
|
+
|
|
1009
|
+
Provides a centralized way to register and retrieve ML models.
|
|
1010
|
+
Thread-safe for concurrent access.
|
|
1011
|
+
|
|
1012
|
+
Example:
|
|
1013
|
+
registry = ModelRegistry()
|
|
1014
|
+
registry.register(IsolationForestDetector)
|
|
1015
|
+
|
|
1016
|
+
# Later
|
|
1017
|
+
model_cls = registry.get("isolation_forest")
|
|
1018
|
+
model = model_cls()
|
|
1019
|
+
"""
|
|
1020
|
+
|
|
1021
|
+
_instance: "ModelRegistry | None" = None
|
|
1022
|
+
|
|
1023
|
+
def __new__(cls) -> "ModelRegistry":
|
|
1024
|
+
if cls._instance is None:
|
|
1025
|
+
cls._instance = super().__new__(cls)
|
|
1026
|
+
cls._instance._models: dict[str, type[MLModel]] = {}
|
|
1027
|
+
cls._instance._by_type: dict[ModelType, dict[str, type[MLModel]]] = {}
|
|
1028
|
+
cls._instance._lock = threading.RLock()
|
|
1029
|
+
cls._instance._initialized = False
|
|
1030
|
+
return cls._instance
|
|
1031
|
+
|
|
1032
|
+
def register(
|
|
1033
|
+
self,
|
|
1034
|
+
model_class: type[MLModel],
|
|
1035
|
+
name: str | None = None,
|
|
1036
|
+
) -> None:
|
|
1037
|
+
"""Register a model class.
|
|
1038
|
+
|
|
1039
|
+
Args:
|
|
1040
|
+
model_class: Model class to register
|
|
1041
|
+
name: Optional name override
|
|
1042
|
+
"""
|
|
1043
|
+
with self._lock:
|
|
1044
|
+
# Get name from class if not provided
|
|
1045
|
+
instance = model_class.__new__(model_class)
|
|
1046
|
+
instance._config = instance._default_config()
|
|
1047
|
+
model_name = name or instance.info.name
|
|
1048
|
+
model_type = instance.info.model_type
|
|
1049
|
+
|
|
1050
|
+
self._models[model_name] = model_class
|
|
1051
|
+
|
|
1052
|
+
if model_type not in self._by_type:
|
|
1053
|
+
self._by_type[model_type] = {}
|
|
1054
|
+
self._by_type[model_type][model_name] = model_class
|
|
1055
|
+
|
|
1056
|
+
def unregister(self, name: str) -> None:
|
|
1057
|
+
"""Unregister a model.
|
|
1058
|
+
|
|
1059
|
+
Args:
|
|
1060
|
+
name: Model name to unregister
|
|
1061
|
+
"""
|
|
1062
|
+
with self._lock:
|
|
1063
|
+
if name in self._models:
|
|
1064
|
+
model_class = self._models.pop(name)
|
|
1065
|
+
# Remove from type index
|
|
1066
|
+
for type_dict in self._by_type.values():
|
|
1067
|
+
if name in type_dict:
|
|
1068
|
+
del type_dict[name]
|
|
1069
|
+
|
|
1070
|
+
def get(self, name: str) -> type[MLModel]:
|
|
1071
|
+
"""Get a registered model class by name.
|
|
1072
|
+
|
|
1073
|
+
Args:
|
|
1074
|
+
name: Model name
|
|
1075
|
+
|
|
1076
|
+
Returns:
|
|
1077
|
+
Model class
|
|
1078
|
+
|
|
1079
|
+
Raises:
|
|
1080
|
+
KeyError: If model not found
|
|
1081
|
+
"""
|
|
1082
|
+
with self._lock:
|
|
1083
|
+
if name not in self._models:
|
|
1084
|
+
raise KeyError(
|
|
1085
|
+
f"Model '{name}' not found. "
|
|
1086
|
+
f"Available: {list(self._models.keys())}"
|
|
1087
|
+
)
|
|
1088
|
+
return self._models[name]
|
|
1089
|
+
|
|
1090
|
+
def get_by_type(self, model_type: ModelType) -> dict[str, type[MLModel]]:
|
|
1091
|
+
"""Get all models of a specific type.
|
|
1092
|
+
|
|
1093
|
+
Args:
|
|
1094
|
+
model_type: Type of models to retrieve
|
|
1095
|
+
|
|
1096
|
+
Returns:
|
|
1097
|
+
Dict of model name to model class
|
|
1098
|
+
"""
|
|
1099
|
+
with self._lock:
|
|
1100
|
+
return dict(self._by_type.get(model_type, {}))
|
|
1101
|
+
|
|
1102
|
+
def list_all(self) -> list[str]:
|
|
1103
|
+
"""List all registered model names."""
|
|
1104
|
+
with self._lock:
|
|
1105
|
+
return list(self._models.keys())
|
|
1106
|
+
|
|
1107
|
+
def list_by_type(self, model_type: ModelType) -> list[str]:
|
|
1108
|
+
"""List model names of a specific type."""
|
|
1109
|
+
with self._lock:
|
|
1110
|
+
return list(self._by_type.get(model_type, {}).keys())
|
|
1111
|
+
|
|
1112
|
+
def clear(self) -> None:
|
|
1113
|
+
"""Clear all registered models."""
|
|
1114
|
+
with self._lock:
|
|
1115
|
+
self._models.clear()
|
|
1116
|
+
self._by_type.clear()
|
|
1117
|
+
self._initialized = False
|
|
1118
|
+
|
|
1119
|
+
|
|
1120
|
+
# Global registry instance
|
|
1121
|
+
model_registry = ModelRegistry()
|
|
1122
|
+
|
|
1123
|
+
|
|
1124
|
+
def register_model(
|
|
1125
|
+
name: str | None = None,
|
|
1126
|
+
) -> Callable[[type[MLModel]], type[MLModel]]:
|
|
1127
|
+
"""Decorator to register a model class.
|
|
1128
|
+
|
|
1129
|
+
Example:
|
|
1130
|
+
@register_model("my_detector")
|
|
1131
|
+
class MyAnomalyDetector(AnomalyDetector):
|
|
1132
|
+
...
|
|
1133
|
+
"""
|
|
1134
|
+
def decorator(cls: type[MLModel]) -> type[MLModel]:
|
|
1135
|
+
model_registry.register(cls, name)
|
|
1136
|
+
return cls
|
|
1137
|
+
return decorator
|
|
1138
|
+
|
|
1139
|
+
|
|
1140
|
+
# =============================================================================
|
|
1141
|
+
# Protocols
|
|
1142
|
+
# =============================================================================
|
|
1143
|
+
|
|
1144
|
+
|
|
1145
|
+
@runtime_checkable
|
|
1146
|
+
class MLModelProtocol(Protocol):
|
|
1147
|
+
"""Protocol for ML models (duck typing support)."""
|
|
1148
|
+
|
|
1149
|
+
@property
|
|
1150
|
+
def info(self) -> ModelInfo: ...
|
|
1151
|
+
|
|
1152
|
+
def fit(self, data: pl.LazyFrame) -> None: ...
|
|
1153
|
+
|
|
1154
|
+
def predict(self, data: pl.LazyFrame) -> Any: ...
|
|
1155
|
+
|
|
1156
|
+
@property
|
|
1157
|
+
def is_trained(self) -> bool: ...
|
|
1158
|
+
|
|
1159
|
+
|
|
1160
|
+
@runtime_checkable
|
|
1161
|
+
class AnomalyDetectorProtocol(Protocol):
|
|
1162
|
+
"""Protocol for anomaly detectors."""
|
|
1163
|
+
|
|
1164
|
+
def score(self, data: pl.LazyFrame) -> pl.Series: ...
|
|
1165
|
+
|
|
1166
|
+
def predict(self, data: pl.LazyFrame) -> AnomalyResult: ...
|
|
1167
|
+
|
|
1168
|
+
|
|
1169
|
+
@runtime_checkable
|
|
1170
|
+
class DriftDetectorProtocol(Protocol):
|
|
1171
|
+
"""Protocol for drift detectors."""
|
|
1172
|
+
|
|
1173
|
+
def detect(
|
|
1174
|
+
self,
|
|
1175
|
+
reference: pl.LazyFrame,
|
|
1176
|
+
current: pl.LazyFrame,
|
|
1177
|
+
columns: list[str] | None = None,
|
|
1178
|
+
) -> DriftResult: ...
|