truthound 1.0.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- truthound/__init__.py +162 -0
- truthound/adapters.py +100 -0
- truthound/api.py +365 -0
- truthound/audit/__init__.py +248 -0
- truthound/audit/core.py +967 -0
- truthound/audit/filters.py +620 -0
- truthound/audit/formatters.py +707 -0
- truthound/audit/logger.py +902 -0
- truthound/audit/middleware.py +571 -0
- truthound/audit/storage.py +1083 -0
- truthound/benchmark/__init__.py +123 -0
- truthound/benchmark/base.py +757 -0
- truthound/benchmark/comparison.py +635 -0
- truthound/benchmark/generators.py +706 -0
- truthound/benchmark/reporters.py +718 -0
- truthound/benchmark/runner.py +635 -0
- truthound/benchmark/scenarios.py +712 -0
- truthound/cache.py +252 -0
- truthound/checkpoint/__init__.py +136 -0
- truthound/checkpoint/actions/__init__.py +164 -0
- truthound/checkpoint/actions/base.py +324 -0
- truthound/checkpoint/actions/custom.py +234 -0
- truthound/checkpoint/actions/discord_notify.py +290 -0
- truthound/checkpoint/actions/email_notify.py +405 -0
- truthound/checkpoint/actions/github_action.py +406 -0
- truthound/checkpoint/actions/opsgenie.py +1499 -0
- truthound/checkpoint/actions/pagerduty.py +226 -0
- truthound/checkpoint/actions/slack_notify.py +233 -0
- truthound/checkpoint/actions/store_result.py +249 -0
- truthound/checkpoint/actions/teams_notify.py +1570 -0
- truthound/checkpoint/actions/telegram_notify.py +419 -0
- truthound/checkpoint/actions/update_docs.py +552 -0
- truthound/checkpoint/actions/webhook.py +293 -0
- truthound/checkpoint/analytics/__init__.py +147 -0
- truthound/checkpoint/analytics/aggregations/__init__.py +23 -0
- truthound/checkpoint/analytics/aggregations/rollup.py +481 -0
- truthound/checkpoint/analytics/aggregations/time_bucket.py +306 -0
- truthound/checkpoint/analytics/analyzers/__init__.py +17 -0
- truthound/checkpoint/analytics/analyzers/anomaly.py +386 -0
- truthound/checkpoint/analytics/analyzers/base.py +270 -0
- truthound/checkpoint/analytics/analyzers/forecast.py +421 -0
- truthound/checkpoint/analytics/analyzers/trend.py +314 -0
- truthound/checkpoint/analytics/models.py +292 -0
- truthound/checkpoint/analytics/protocols.py +549 -0
- truthound/checkpoint/analytics/service.py +718 -0
- truthound/checkpoint/analytics/stores/__init__.py +16 -0
- truthound/checkpoint/analytics/stores/base.py +306 -0
- truthound/checkpoint/analytics/stores/memory_store.py +353 -0
- truthound/checkpoint/analytics/stores/sqlite_store.py +557 -0
- truthound/checkpoint/analytics/stores/timescale_store.py +501 -0
- truthound/checkpoint/async_actions.py +794 -0
- truthound/checkpoint/async_base.py +708 -0
- truthound/checkpoint/async_checkpoint.py +617 -0
- truthound/checkpoint/async_runner.py +639 -0
- truthound/checkpoint/checkpoint.py +527 -0
- truthound/checkpoint/ci/__init__.py +61 -0
- truthound/checkpoint/ci/detector.py +355 -0
- truthound/checkpoint/ci/reporter.py +436 -0
- truthound/checkpoint/ci/templates.py +454 -0
- truthound/checkpoint/circuitbreaker/__init__.py +133 -0
- truthound/checkpoint/circuitbreaker/breaker.py +542 -0
- truthound/checkpoint/circuitbreaker/core.py +252 -0
- truthound/checkpoint/circuitbreaker/detection.py +459 -0
- truthound/checkpoint/circuitbreaker/middleware.py +389 -0
- truthound/checkpoint/circuitbreaker/registry.py +357 -0
- truthound/checkpoint/distributed/__init__.py +139 -0
- truthound/checkpoint/distributed/backends/__init__.py +35 -0
- truthound/checkpoint/distributed/backends/celery_backend.py +503 -0
- truthound/checkpoint/distributed/backends/kubernetes_backend.py +696 -0
- truthound/checkpoint/distributed/backends/local_backend.py +397 -0
- truthound/checkpoint/distributed/backends/ray_backend.py +625 -0
- truthound/checkpoint/distributed/base.py +774 -0
- truthound/checkpoint/distributed/orchestrator.py +765 -0
- truthound/checkpoint/distributed/protocols.py +842 -0
- truthound/checkpoint/distributed/registry.py +449 -0
- truthound/checkpoint/idempotency/__init__.py +120 -0
- truthound/checkpoint/idempotency/core.py +295 -0
- truthound/checkpoint/idempotency/fingerprint.py +454 -0
- truthound/checkpoint/idempotency/locking.py +604 -0
- truthound/checkpoint/idempotency/service.py +592 -0
- truthound/checkpoint/idempotency/stores.py +653 -0
- truthound/checkpoint/monitoring/__init__.py +134 -0
- truthound/checkpoint/monitoring/aggregators/__init__.py +15 -0
- truthound/checkpoint/monitoring/aggregators/base.py +372 -0
- truthound/checkpoint/monitoring/aggregators/realtime.py +300 -0
- truthound/checkpoint/monitoring/aggregators/window.py +493 -0
- truthound/checkpoint/monitoring/collectors/__init__.py +17 -0
- truthound/checkpoint/monitoring/collectors/base.py +257 -0
- truthound/checkpoint/monitoring/collectors/memory_collector.py +617 -0
- truthound/checkpoint/monitoring/collectors/prometheus_collector.py +451 -0
- truthound/checkpoint/monitoring/collectors/redis_collector.py +518 -0
- truthound/checkpoint/monitoring/events.py +410 -0
- truthound/checkpoint/monitoring/protocols.py +636 -0
- truthound/checkpoint/monitoring/service.py +578 -0
- truthound/checkpoint/monitoring/views/__init__.py +17 -0
- truthound/checkpoint/monitoring/views/base.py +172 -0
- truthound/checkpoint/monitoring/views/queue_view.py +220 -0
- truthound/checkpoint/monitoring/views/task_view.py +240 -0
- truthound/checkpoint/monitoring/views/worker_view.py +263 -0
- truthound/checkpoint/registry.py +337 -0
- truthound/checkpoint/runner.py +356 -0
- truthound/checkpoint/transaction/__init__.py +133 -0
- truthound/checkpoint/transaction/base.py +389 -0
- truthound/checkpoint/transaction/compensatable.py +537 -0
- truthound/checkpoint/transaction/coordinator.py +576 -0
- truthound/checkpoint/transaction/executor.py +622 -0
- truthound/checkpoint/transaction/idempotency.py +534 -0
- truthound/checkpoint/transaction/saga/__init__.py +143 -0
- truthound/checkpoint/transaction/saga/builder.py +584 -0
- truthound/checkpoint/transaction/saga/definition.py +515 -0
- truthound/checkpoint/transaction/saga/event_store.py +542 -0
- truthound/checkpoint/transaction/saga/patterns.py +833 -0
- truthound/checkpoint/transaction/saga/runner.py +718 -0
- truthound/checkpoint/transaction/saga/state_machine.py +793 -0
- truthound/checkpoint/transaction/saga/strategies.py +780 -0
- truthound/checkpoint/transaction/saga/testing.py +886 -0
- truthound/checkpoint/triggers/__init__.py +58 -0
- truthound/checkpoint/triggers/base.py +237 -0
- truthound/checkpoint/triggers/event.py +385 -0
- truthound/checkpoint/triggers/schedule.py +355 -0
- truthound/cli.py +2358 -0
- truthound/cli_modules/__init__.py +124 -0
- truthound/cli_modules/advanced/__init__.py +45 -0
- truthound/cli_modules/advanced/benchmark.py +343 -0
- truthound/cli_modules/advanced/docs.py +225 -0
- truthound/cli_modules/advanced/lineage.py +209 -0
- truthound/cli_modules/advanced/ml.py +320 -0
- truthound/cli_modules/advanced/realtime.py +196 -0
- truthound/cli_modules/checkpoint/__init__.py +46 -0
- truthound/cli_modules/checkpoint/init.py +114 -0
- truthound/cli_modules/checkpoint/list.py +71 -0
- truthound/cli_modules/checkpoint/run.py +159 -0
- truthound/cli_modules/checkpoint/validate.py +67 -0
- truthound/cli_modules/common/__init__.py +71 -0
- truthound/cli_modules/common/errors.py +414 -0
- truthound/cli_modules/common/options.py +419 -0
- truthound/cli_modules/common/output.py +507 -0
- truthound/cli_modules/common/protocol.py +552 -0
- truthound/cli_modules/core/__init__.py +48 -0
- truthound/cli_modules/core/check.py +123 -0
- truthound/cli_modules/core/compare.py +104 -0
- truthound/cli_modules/core/learn.py +57 -0
- truthound/cli_modules/core/mask.py +77 -0
- truthound/cli_modules/core/profile.py +65 -0
- truthound/cli_modules/core/scan.py +61 -0
- truthound/cli_modules/profiler/__init__.py +51 -0
- truthound/cli_modules/profiler/auto_profile.py +175 -0
- truthound/cli_modules/profiler/metadata.py +107 -0
- truthound/cli_modules/profiler/suite.py +283 -0
- truthound/cli_modules/registry.py +431 -0
- truthound/cli_modules/scaffolding/__init__.py +89 -0
- truthound/cli_modules/scaffolding/base.py +631 -0
- truthound/cli_modules/scaffolding/commands.py +545 -0
- truthound/cli_modules/scaffolding/plugins.py +1072 -0
- truthound/cli_modules/scaffolding/reporters.py +594 -0
- truthound/cli_modules/scaffolding/validators.py +1127 -0
- truthound/common/__init__.py +18 -0
- truthound/common/resilience/__init__.py +130 -0
- truthound/common/resilience/bulkhead.py +266 -0
- truthound/common/resilience/circuit_breaker.py +516 -0
- truthound/common/resilience/composite.py +332 -0
- truthound/common/resilience/config.py +292 -0
- truthound/common/resilience/protocols.py +217 -0
- truthound/common/resilience/rate_limiter.py +404 -0
- truthound/common/resilience/retry.py +341 -0
- truthound/datadocs/__init__.py +260 -0
- truthound/datadocs/base.py +571 -0
- truthound/datadocs/builder.py +761 -0
- truthound/datadocs/charts.py +764 -0
- truthound/datadocs/dashboard/__init__.py +63 -0
- truthound/datadocs/dashboard/app.py +576 -0
- truthound/datadocs/dashboard/components.py +584 -0
- truthound/datadocs/dashboard/state.py +240 -0
- truthound/datadocs/engine/__init__.py +46 -0
- truthound/datadocs/engine/context.py +376 -0
- truthound/datadocs/engine/pipeline.py +618 -0
- truthound/datadocs/engine/registry.py +469 -0
- truthound/datadocs/exporters/__init__.py +49 -0
- truthound/datadocs/exporters/base.py +198 -0
- truthound/datadocs/exporters/html.py +178 -0
- truthound/datadocs/exporters/json_exporter.py +253 -0
- truthound/datadocs/exporters/markdown.py +284 -0
- truthound/datadocs/exporters/pdf.py +392 -0
- truthound/datadocs/i18n/__init__.py +86 -0
- truthound/datadocs/i18n/catalog.py +960 -0
- truthound/datadocs/i18n/formatting.py +505 -0
- truthound/datadocs/i18n/loader.py +256 -0
- truthound/datadocs/i18n/plurals.py +378 -0
- truthound/datadocs/renderers/__init__.py +42 -0
- truthound/datadocs/renderers/base.py +401 -0
- truthound/datadocs/renderers/custom.py +342 -0
- truthound/datadocs/renderers/jinja.py +697 -0
- truthound/datadocs/sections.py +736 -0
- truthound/datadocs/styles.py +931 -0
- truthound/datadocs/themes/__init__.py +101 -0
- truthound/datadocs/themes/base.py +336 -0
- truthound/datadocs/themes/default.py +417 -0
- truthound/datadocs/themes/enterprise.py +419 -0
- truthound/datadocs/themes/loader.py +336 -0
- truthound/datadocs/themes.py +301 -0
- truthound/datadocs/transformers/__init__.py +57 -0
- truthound/datadocs/transformers/base.py +268 -0
- truthound/datadocs/transformers/enrichers.py +544 -0
- truthound/datadocs/transformers/filters.py +447 -0
- truthound/datadocs/transformers/i18n.py +468 -0
- truthound/datadocs/versioning/__init__.py +62 -0
- truthound/datadocs/versioning/diff.py +639 -0
- truthound/datadocs/versioning/storage.py +497 -0
- truthound/datadocs/versioning/version.py +358 -0
- truthound/datasources/__init__.py +223 -0
- truthound/datasources/_async_protocols.py +222 -0
- truthound/datasources/_protocols.py +159 -0
- truthound/datasources/adapters.py +428 -0
- truthound/datasources/async_base.py +599 -0
- truthound/datasources/async_factory.py +511 -0
- truthound/datasources/base.py +516 -0
- truthound/datasources/factory.py +433 -0
- truthound/datasources/nosql/__init__.py +47 -0
- truthound/datasources/nosql/base.py +487 -0
- truthound/datasources/nosql/elasticsearch.py +801 -0
- truthound/datasources/nosql/mongodb.py +636 -0
- truthound/datasources/pandas_optimized.py +582 -0
- truthound/datasources/pandas_source.py +216 -0
- truthound/datasources/polars_source.py +395 -0
- truthound/datasources/spark_source.py +479 -0
- truthound/datasources/sql/__init__.py +154 -0
- truthound/datasources/sql/base.py +710 -0
- truthound/datasources/sql/bigquery.py +410 -0
- truthound/datasources/sql/cloud_base.py +199 -0
- truthound/datasources/sql/databricks.py +471 -0
- truthound/datasources/sql/mysql.py +316 -0
- truthound/datasources/sql/oracle.py +427 -0
- truthound/datasources/sql/postgresql.py +321 -0
- truthound/datasources/sql/redshift.py +479 -0
- truthound/datasources/sql/snowflake.py +439 -0
- truthound/datasources/sql/sqlite.py +286 -0
- truthound/datasources/sql/sqlserver.py +437 -0
- truthound/datasources/streaming/__init__.py +47 -0
- truthound/datasources/streaming/base.py +350 -0
- truthound/datasources/streaming/kafka.py +670 -0
- truthound/decorators.py +98 -0
- truthound/docs/__init__.py +69 -0
- truthound/docs/extractor.py +971 -0
- truthound/docs/generator.py +601 -0
- truthound/docs/parser.py +1037 -0
- truthound/docs/renderer.py +999 -0
- truthound/drift/__init__.py +22 -0
- truthound/drift/compare.py +189 -0
- truthound/drift/detectors.py +464 -0
- truthound/drift/report.py +160 -0
- truthound/execution/__init__.py +65 -0
- truthound/execution/_protocols.py +324 -0
- truthound/execution/base.py +576 -0
- truthound/execution/distributed/__init__.py +179 -0
- truthound/execution/distributed/aggregations.py +731 -0
- truthound/execution/distributed/arrow_bridge.py +817 -0
- truthound/execution/distributed/base.py +550 -0
- truthound/execution/distributed/dask_engine.py +976 -0
- truthound/execution/distributed/mixins.py +766 -0
- truthound/execution/distributed/protocols.py +756 -0
- truthound/execution/distributed/ray_engine.py +1127 -0
- truthound/execution/distributed/registry.py +446 -0
- truthound/execution/distributed/spark_engine.py +1011 -0
- truthound/execution/distributed/validator_adapter.py +682 -0
- truthound/execution/pandas_engine.py +401 -0
- truthound/execution/polars_engine.py +497 -0
- truthound/execution/pushdown/__init__.py +230 -0
- truthound/execution/pushdown/ast.py +1550 -0
- truthound/execution/pushdown/builder.py +1550 -0
- truthound/execution/pushdown/dialects.py +1072 -0
- truthound/execution/pushdown/executor.py +829 -0
- truthound/execution/pushdown/optimizer.py +1041 -0
- truthound/execution/sql_engine.py +518 -0
- truthound/infrastructure/__init__.py +189 -0
- truthound/infrastructure/audit.py +1515 -0
- truthound/infrastructure/config.py +1133 -0
- truthound/infrastructure/encryption.py +1132 -0
- truthound/infrastructure/logging.py +1503 -0
- truthound/infrastructure/metrics.py +1220 -0
- truthound/lineage/__init__.py +89 -0
- truthound/lineage/base.py +746 -0
- truthound/lineage/impact_analysis.py +474 -0
- truthound/lineage/integrations/__init__.py +22 -0
- truthound/lineage/integrations/openlineage.py +548 -0
- truthound/lineage/tracker.py +512 -0
- truthound/lineage/visualization/__init__.py +33 -0
- truthound/lineage/visualization/protocols.py +145 -0
- truthound/lineage/visualization/renderers/__init__.py +20 -0
- truthound/lineage/visualization/renderers/cytoscape.py +329 -0
- truthound/lineage/visualization/renderers/d3.py +331 -0
- truthound/lineage/visualization/renderers/graphviz.py +276 -0
- truthound/lineage/visualization/renderers/mermaid.py +308 -0
- truthound/maskers.py +113 -0
- truthound/ml/__init__.py +124 -0
- truthound/ml/anomaly_models/__init__.py +31 -0
- truthound/ml/anomaly_models/ensemble.py +362 -0
- truthound/ml/anomaly_models/isolation_forest.py +444 -0
- truthound/ml/anomaly_models/statistical.py +392 -0
- truthound/ml/base.py +1178 -0
- truthound/ml/drift_detection/__init__.py +26 -0
- truthound/ml/drift_detection/concept.py +381 -0
- truthound/ml/drift_detection/distribution.py +361 -0
- truthound/ml/drift_detection/feature.py +442 -0
- truthound/ml/drift_detection/multivariate.py +495 -0
- truthound/ml/monitoring/__init__.py +88 -0
- truthound/ml/monitoring/alerting/__init__.py +33 -0
- truthound/ml/monitoring/alerting/handlers.py +427 -0
- truthound/ml/monitoring/alerting/rules.py +508 -0
- truthound/ml/monitoring/collectors/__init__.py +19 -0
- truthound/ml/monitoring/collectors/composite.py +105 -0
- truthound/ml/monitoring/collectors/drift.py +324 -0
- truthound/ml/monitoring/collectors/performance.py +179 -0
- truthound/ml/monitoring/collectors/quality.py +369 -0
- truthound/ml/monitoring/monitor.py +536 -0
- truthound/ml/monitoring/protocols.py +451 -0
- truthound/ml/monitoring/stores/__init__.py +15 -0
- truthound/ml/monitoring/stores/memory.py +201 -0
- truthound/ml/monitoring/stores/prometheus.py +296 -0
- truthound/ml/rule_learning/__init__.py +25 -0
- truthound/ml/rule_learning/constraint_miner.py +443 -0
- truthound/ml/rule_learning/pattern_learner.py +499 -0
- truthound/ml/rule_learning/profile_learner.py +462 -0
- truthound/multitenancy/__init__.py +326 -0
- truthound/multitenancy/core.py +852 -0
- truthound/multitenancy/integration.py +597 -0
- truthound/multitenancy/isolation.py +630 -0
- truthound/multitenancy/manager.py +770 -0
- truthound/multitenancy/middleware.py +765 -0
- truthound/multitenancy/quota.py +537 -0
- truthound/multitenancy/resolvers.py +603 -0
- truthound/multitenancy/storage.py +703 -0
- truthound/observability/__init__.py +307 -0
- truthound/observability/context.py +531 -0
- truthound/observability/instrumentation.py +611 -0
- truthound/observability/logging.py +887 -0
- truthound/observability/metrics.py +1157 -0
- truthound/observability/tracing/__init__.py +178 -0
- truthound/observability/tracing/baggage.py +310 -0
- truthound/observability/tracing/config.py +426 -0
- truthound/observability/tracing/exporter.py +787 -0
- truthound/observability/tracing/integration.py +1018 -0
- truthound/observability/tracing/otel/__init__.py +146 -0
- truthound/observability/tracing/otel/adapter.py +982 -0
- truthound/observability/tracing/otel/bridge.py +1177 -0
- truthound/observability/tracing/otel/compat.py +681 -0
- truthound/observability/tracing/otel/config.py +691 -0
- truthound/observability/tracing/otel/detection.py +327 -0
- truthound/observability/tracing/otel/protocols.py +426 -0
- truthound/observability/tracing/processor.py +561 -0
- truthound/observability/tracing/propagator.py +757 -0
- truthound/observability/tracing/provider.py +569 -0
- truthound/observability/tracing/resource.py +515 -0
- truthound/observability/tracing/sampler.py +487 -0
- truthound/observability/tracing/span.py +676 -0
- truthound/plugins/__init__.py +198 -0
- truthound/plugins/base.py +599 -0
- truthound/plugins/cli.py +680 -0
- truthound/plugins/dependencies/__init__.py +42 -0
- truthound/plugins/dependencies/graph.py +422 -0
- truthound/plugins/dependencies/resolver.py +417 -0
- truthound/plugins/discovery.py +379 -0
- truthound/plugins/docs/__init__.py +46 -0
- truthound/plugins/docs/extractor.py +444 -0
- truthound/plugins/docs/renderer.py +499 -0
- truthound/plugins/enterprise_manager.py +877 -0
- truthound/plugins/examples/__init__.py +19 -0
- truthound/plugins/examples/custom_validators.py +317 -0
- truthound/plugins/examples/slack_notifier.py +312 -0
- truthound/plugins/examples/xml_reporter.py +254 -0
- truthound/plugins/hooks.py +558 -0
- truthound/plugins/lifecycle/__init__.py +43 -0
- truthound/plugins/lifecycle/hot_reload.py +402 -0
- truthound/plugins/lifecycle/manager.py +371 -0
- truthound/plugins/manager.py +736 -0
- truthound/plugins/registry.py +338 -0
- truthound/plugins/security/__init__.py +93 -0
- truthound/plugins/security/exceptions.py +332 -0
- truthound/plugins/security/policies.py +348 -0
- truthound/plugins/security/protocols.py +643 -0
- truthound/plugins/security/sandbox/__init__.py +45 -0
- truthound/plugins/security/sandbox/context.py +158 -0
- truthound/plugins/security/sandbox/engines/__init__.py +19 -0
- truthound/plugins/security/sandbox/engines/container.py +379 -0
- truthound/plugins/security/sandbox/engines/noop.py +144 -0
- truthound/plugins/security/sandbox/engines/process.py +336 -0
- truthound/plugins/security/sandbox/factory.py +211 -0
- truthound/plugins/security/signing/__init__.py +57 -0
- truthound/plugins/security/signing/service.py +330 -0
- truthound/plugins/security/signing/trust_store.py +368 -0
- truthound/plugins/security/signing/verifier.py +459 -0
- truthound/plugins/versioning/__init__.py +41 -0
- truthound/plugins/versioning/constraints.py +297 -0
- truthound/plugins/versioning/resolver.py +329 -0
- truthound/profiler/__init__.py +1729 -0
- truthound/profiler/_lazy.py +452 -0
- truthound/profiler/ab_testing/__init__.py +80 -0
- truthound/profiler/ab_testing/analysis.py +449 -0
- truthound/profiler/ab_testing/base.py +257 -0
- truthound/profiler/ab_testing/experiment.py +395 -0
- truthound/profiler/ab_testing/tracking.py +368 -0
- truthound/profiler/auto_threshold.py +1170 -0
- truthound/profiler/base.py +579 -0
- truthound/profiler/cache_patterns.py +911 -0
- truthound/profiler/caching.py +1303 -0
- truthound/profiler/column_profiler.py +712 -0
- truthound/profiler/comparison.py +1007 -0
- truthound/profiler/custom_patterns.py +1170 -0
- truthound/profiler/dashboard/__init__.py +50 -0
- truthound/profiler/dashboard/app.py +476 -0
- truthound/profiler/dashboard/components.py +457 -0
- truthound/profiler/dashboard/config.py +72 -0
- truthound/profiler/distributed/__init__.py +83 -0
- truthound/profiler/distributed/base.py +281 -0
- truthound/profiler/distributed/dask_backend.py +498 -0
- truthound/profiler/distributed/local_backend.py +293 -0
- truthound/profiler/distributed/profiler.py +304 -0
- truthound/profiler/distributed/ray_backend.py +374 -0
- truthound/profiler/distributed/spark_backend.py +375 -0
- truthound/profiler/distributed.py +1366 -0
- truthound/profiler/enterprise_sampling.py +1065 -0
- truthound/profiler/errors.py +488 -0
- truthound/profiler/evolution/__init__.py +91 -0
- truthound/profiler/evolution/alerts.py +426 -0
- truthound/profiler/evolution/changes.py +206 -0
- truthound/profiler/evolution/compatibility.py +365 -0
- truthound/profiler/evolution/detector.py +372 -0
- truthound/profiler/evolution/protocols.py +121 -0
- truthound/profiler/generators/__init__.py +48 -0
- truthound/profiler/generators/base.py +384 -0
- truthound/profiler/generators/ml_rules.py +375 -0
- truthound/profiler/generators/pattern_rules.py +384 -0
- truthound/profiler/generators/schema_rules.py +267 -0
- truthound/profiler/generators/stats_rules.py +324 -0
- truthound/profiler/generators/suite_generator.py +857 -0
- truthound/profiler/i18n.py +1542 -0
- truthound/profiler/incremental.py +554 -0
- truthound/profiler/incremental_validation.py +1710 -0
- truthound/profiler/integration/__init__.py +73 -0
- truthound/profiler/integration/adapters.py +345 -0
- truthound/profiler/integration/context.py +371 -0
- truthound/profiler/integration/executor.py +527 -0
- truthound/profiler/integration/naming.py +75 -0
- truthound/profiler/integration/protocols.py +243 -0
- truthound/profiler/memory.py +1185 -0
- truthound/profiler/migration/__init__.py +60 -0
- truthound/profiler/migration/base.py +345 -0
- truthound/profiler/migration/manager.py +444 -0
- truthound/profiler/migration/v1_0_to_v1_1.py +484 -0
- truthound/profiler/ml/__init__.py +73 -0
- truthound/profiler/ml/base.py +244 -0
- truthound/profiler/ml/classifier.py +507 -0
- truthound/profiler/ml/feature_extraction.py +604 -0
- truthound/profiler/ml/pretrained.py +448 -0
- truthound/profiler/ml_inference.py +1276 -0
- truthound/profiler/native_patterns.py +815 -0
- truthound/profiler/observability.py +1184 -0
- truthound/profiler/process_timeout.py +1566 -0
- truthound/profiler/progress.py +568 -0
- truthound/profiler/progress_callbacks.py +1734 -0
- truthound/profiler/quality.py +1345 -0
- truthound/profiler/resilience.py +1180 -0
- truthound/profiler/sampled_matcher.py +794 -0
- truthound/profiler/sampling.py +1288 -0
- truthound/profiler/scheduling/__init__.py +82 -0
- truthound/profiler/scheduling/protocols.py +214 -0
- truthound/profiler/scheduling/scheduler.py +474 -0
- truthound/profiler/scheduling/storage.py +457 -0
- truthound/profiler/scheduling/triggers.py +449 -0
- truthound/profiler/schema.py +603 -0
- truthound/profiler/streaming.py +685 -0
- truthound/profiler/streaming_patterns.py +1354 -0
- truthound/profiler/suite_cli.py +625 -0
- truthound/profiler/suite_config.py +789 -0
- truthound/profiler/suite_export.py +1268 -0
- truthound/profiler/table_profiler.py +547 -0
- truthound/profiler/timeout.py +565 -0
- truthound/profiler/validation.py +1532 -0
- truthound/profiler/visualization/__init__.py +118 -0
- truthound/profiler/visualization/base.py +346 -0
- truthound/profiler/visualization/generator.py +1259 -0
- truthound/profiler/visualization/plotly_renderer.py +811 -0
- truthound/profiler/visualization/renderers.py +669 -0
- truthound/profiler/visualization/sections.py +540 -0
- truthound/profiler/visualization.py +2122 -0
- truthound/profiler/yaml_validation.py +1151 -0
- truthound/py.typed +0 -0
- truthound/ratelimit/__init__.py +248 -0
- truthound/ratelimit/algorithms.py +1108 -0
- truthound/ratelimit/core.py +573 -0
- truthound/ratelimit/integration.py +532 -0
- truthound/ratelimit/limiter.py +663 -0
- truthound/ratelimit/middleware.py +700 -0
- truthound/ratelimit/policy.py +792 -0
- truthound/ratelimit/storage.py +763 -0
- truthound/rbac/__init__.py +340 -0
- truthound/rbac/core.py +976 -0
- truthound/rbac/integration.py +760 -0
- truthound/rbac/manager.py +1052 -0
- truthound/rbac/middleware.py +842 -0
- truthound/rbac/policy.py +954 -0
- truthound/rbac/storage.py +878 -0
- truthound/realtime/__init__.py +141 -0
- truthound/realtime/adapters/__init__.py +43 -0
- truthound/realtime/adapters/base.py +533 -0
- truthound/realtime/adapters/kafka.py +487 -0
- truthound/realtime/adapters/kinesis.py +479 -0
- truthound/realtime/adapters/mock.py +243 -0
- truthound/realtime/base.py +553 -0
- truthound/realtime/factory.py +382 -0
- truthound/realtime/incremental.py +660 -0
- truthound/realtime/processing/__init__.py +67 -0
- truthound/realtime/processing/exactly_once.py +575 -0
- truthound/realtime/processing/state.py +547 -0
- truthound/realtime/processing/windows.py +647 -0
- truthound/realtime/protocols.py +569 -0
- truthound/realtime/streaming.py +605 -0
- truthound/realtime/testing/__init__.py +32 -0
- truthound/realtime/testing/containers.py +615 -0
- truthound/realtime/testing/fixtures.py +484 -0
- truthound/report.py +280 -0
- truthound/reporters/__init__.py +46 -0
- truthound/reporters/_protocols.py +30 -0
- truthound/reporters/base.py +324 -0
- truthound/reporters/ci/__init__.py +66 -0
- truthound/reporters/ci/azure.py +436 -0
- truthound/reporters/ci/base.py +509 -0
- truthound/reporters/ci/bitbucket.py +567 -0
- truthound/reporters/ci/circleci.py +547 -0
- truthound/reporters/ci/detection.py +364 -0
- truthound/reporters/ci/factory.py +182 -0
- truthound/reporters/ci/github.py +388 -0
- truthound/reporters/ci/gitlab.py +471 -0
- truthound/reporters/ci/jenkins.py +525 -0
- truthound/reporters/console_reporter.py +299 -0
- truthound/reporters/factory.py +211 -0
- truthound/reporters/html_reporter.py +524 -0
- truthound/reporters/json_reporter.py +256 -0
- truthound/reporters/markdown_reporter.py +280 -0
- truthound/reporters/sdk/__init__.py +174 -0
- truthound/reporters/sdk/builder.py +558 -0
- truthound/reporters/sdk/mixins.py +1150 -0
- truthound/reporters/sdk/schema.py +1493 -0
- truthound/reporters/sdk/templates.py +666 -0
- truthound/reporters/sdk/testing.py +968 -0
- truthound/scanners.py +170 -0
- truthound/scheduling/__init__.py +122 -0
- truthound/scheduling/cron.py +1136 -0
- truthound/scheduling/presets.py +212 -0
- truthound/schema.py +275 -0
- truthound/secrets/__init__.py +173 -0
- truthound/secrets/base.py +618 -0
- truthound/secrets/cloud.py +682 -0
- truthound/secrets/integration.py +507 -0
- truthound/secrets/manager.py +633 -0
- truthound/secrets/oidc/__init__.py +172 -0
- truthound/secrets/oidc/base.py +902 -0
- truthound/secrets/oidc/credential_provider.py +623 -0
- truthound/secrets/oidc/exchangers.py +1001 -0
- truthound/secrets/oidc/github/__init__.py +110 -0
- truthound/secrets/oidc/github/claims.py +718 -0
- truthound/secrets/oidc/github/enhanced_provider.py +693 -0
- truthound/secrets/oidc/github/trust_policy.py +742 -0
- truthound/secrets/oidc/github/verification.py +723 -0
- truthound/secrets/oidc/github/workflow.py +691 -0
- truthound/secrets/oidc/providers.py +825 -0
- truthound/secrets/providers.py +506 -0
- truthound/secrets/resolver.py +495 -0
- truthound/stores/__init__.py +177 -0
- truthound/stores/backends/__init__.py +18 -0
- truthound/stores/backends/_protocols.py +340 -0
- truthound/stores/backends/azure_blob.py +530 -0
- truthound/stores/backends/concurrent_filesystem.py +915 -0
- truthound/stores/backends/connection_pool.py +1365 -0
- truthound/stores/backends/database.py +743 -0
- truthound/stores/backends/filesystem.py +538 -0
- truthound/stores/backends/gcs.py +399 -0
- truthound/stores/backends/memory.py +354 -0
- truthound/stores/backends/s3.py +434 -0
- truthound/stores/backpressure/__init__.py +84 -0
- truthound/stores/backpressure/base.py +375 -0
- truthound/stores/backpressure/circuit_breaker.py +434 -0
- truthound/stores/backpressure/monitor.py +376 -0
- truthound/stores/backpressure/strategies.py +677 -0
- truthound/stores/base.py +551 -0
- truthound/stores/batching/__init__.py +65 -0
- truthound/stores/batching/base.py +305 -0
- truthound/stores/batching/buffer.py +370 -0
- truthound/stores/batching/store.py +248 -0
- truthound/stores/batching/writer.py +521 -0
- truthound/stores/caching/__init__.py +60 -0
- truthound/stores/caching/backends.py +684 -0
- truthound/stores/caching/base.py +356 -0
- truthound/stores/caching/store.py +305 -0
- truthound/stores/compression/__init__.py +193 -0
- truthound/stores/compression/adaptive.py +694 -0
- truthound/stores/compression/base.py +514 -0
- truthound/stores/compression/pipeline.py +868 -0
- truthound/stores/compression/providers.py +672 -0
- truthound/stores/compression/streaming.py +832 -0
- truthound/stores/concurrency/__init__.py +81 -0
- truthound/stores/concurrency/atomic.py +556 -0
- truthound/stores/concurrency/index.py +775 -0
- truthound/stores/concurrency/locks.py +576 -0
- truthound/stores/concurrency/manager.py +482 -0
- truthound/stores/encryption/__init__.py +297 -0
- truthound/stores/encryption/base.py +952 -0
- truthound/stores/encryption/keys.py +1191 -0
- truthound/stores/encryption/pipeline.py +903 -0
- truthound/stores/encryption/providers.py +953 -0
- truthound/stores/encryption/streaming.py +950 -0
- truthound/stores/expectations.py +227 -0
- truthound/stores/factory.py +246 -0
- truthound/stores/migration/__init__.py +75 -0
- truthound/stores/migration/base.py +480 -0
- truthound/stores/migration/manager.py +347 -0
- truthound/stores/migration/registry.py +382 -0
- truthound/stores/migration/store.py +559 -0
- truthound/stores/observability/__init__.py +106 -0
- truthound/stores/observability/audit.py +718 -0
- truthound/stores/observability/config.py +270 -0
- truthound/stores/observability/factory.py +208 -0
- truthound/stores/observability/metrics.py +636 -0
- truthound/stores/observability/protocols.py +410 -0
- truthound/stores/observability/store.py +570 -0
- truthound/stores/observability/tracing.py +784 -0
- truthound/stores/replication/__init__.py +76 -0
- truthound/stores/replication/base.py +260 -0
- truthound/stores/replication/monitor.py +269 -0
- truthound/stores/replication/store.py +439 -0
- truthound/stores/replication/syncer.py +391 -0
- truthound/stores/results.py +359 -0
- truthound/stores/retention/__init__.py +77 -0
- truthound/stores/retention/base.py +378 -0
- truthound/stores/retention/policies.py +621 -0
- truthound/stores/retention/scheduler.py +279 -0
- truthound/stores/retention/store.py +526 -0
- truthound/stores/streaming/__init__.py +138 -0
- truthound/stores/streaming/base.py +801 -0
- truthound/stores/streaming/database.py +984 -0
- truthound/stores/streaming/filesystem.py +719 -0
- truthound/stores/streaming/reader.py +629 -0
- truthound/stores/streaming/s3.py +843 -0
- truthound/stores/streaming/writer.py +790 -0
- truthound/stores/tiering/__init__.py +108 -0
- truthound/stores/tiering/base.py +462 -0
- truthound/stores/tiering/manager.py +249 -0
- truthound/stores/tiering/policies.py +692 -0
- truthound/stores/tiering/store.py +526 -0
- truthound/stores/versioning/__init__.py +56 -0
- truthound/stores/versioning/base.py +376 -0
- truthound/stores/versioning/store.py +660 -0
- truthound/stores/versioning/strategies.py +353 -0
- truthound/types.py +56 -0
- truthound/validators/__init__.py +774 -0
- truthound/validators/aggregate/__init__.py +27 -0
- truthound/validators/aggregate/central.py +116 -0
- truthound/validators/aggregate/extremes.py +116 -0
- truthound/validators/aggregate/spread.py +118 -0
- truthound/validators/aggregate/sum.py +64 -0
- truthound/validators/aggregate/type.py +78 -0
- truthound/validators/anomaly/__init__.py +93 -0
- truthound/validators/anomaly/base.py +431 -0
- truthound/validators/anomaly/ml_based.py +1190 -0
- truthound/validators/anomaly/multivariate.py +647 -0
- truthound/validators/anomaly/statistical.py +599 -0
- truthound/validators/base.py +1089 -0
- truthound/validators/business_rule/__init__.py +46 -0
- truthound/validators/business_rule/base.py +147 -0
- truthound/validators/business_rule/checksum.py +509 -0
- truthound/validators/business_rule/financial.py +526 -0
- truthound/validators/cache.py +733 -0
- truthound/validators/completeness/__init__.py +39 -0
- truthound/validators/completeness/conditional.py +73 -0
- truthound/validators/completeness/default.py +98 -0
- truthound/validators/completeness/empty.py +103 -0
- truthound/validators/completeness/nan.py +337 -0
- truthound/validators/completeness/null.py +152 -0
- truthound/validators/cross_table/__init__.py +17 -0
- truthound/validators/cross_table/aggregate.py +333 -0
- truthound/validators/cross_table/row_count.py +122 -0
- truthound/validators/datetime/__init__.py +29 -0
- truthound/validators/datetime/format.py +78 -0
- truthound/validators/datetime/freshness.py +269 -0
- truthound/validators/datetime/order.py +73 -0
- truthound/validators/datetime/parseable.py +185 -0
- truthound/validators/datetime/range.py +202 -0
- truthound/validators/datetime/timezone.py +69 -0
- truthound/validators/distribution/__init__.py +49 -0
- truthound/validators/distribution/distribution.py +128 -0
- truthound/validators/distribution/monotonic.py +119 -0
- truthound/validators/distribution/outlier.py +178 -0
- truthound/validators/distribution/quantile.py +80 -0
- truthound/validators/distribution/range.py +254 -0
- truthound/validators/distribution/set.py +125 -0
- truthound/validators/distribution/statistical.py +459 -0
- truthound/validators/drift/__init__.py +79 -0
- truthound/validators/drift/base.py +427 -0
- truthound/validators/drift/multi_feature.py +401 -0
- truthound/validators/drift/numeric.py +395 -0
- truthound/validators/drift/psi.py +446 -0
- truthound/validators/drift/statistical.py +510 -0
- truthound/validators/enterprise.py +1658 -0
- truthound/validators/geospatial/__init__.py +80 -0
- truthound/validators/geospatial/base.py +97 -0
- truthound/validators/geospatial/boundary.py +238 -0
- truthound/validators/geospatial/coordinate.py +351 -0
- truthound/validators/geospatial/distance.py +399 -0
- truthound/validators/geospatial/polygon.py +665 -0
- truthound/validators/i18n/__init__.py +308 -0
- truthound/validators/i18n/bidi.py +571 -0
- truthound/validators/i18n/catalogs.py +570 -0
- truthound/validators/i18n/dialects.py +763 -0
- truthound/validators/i18n/extended_catalogs.py +549 -0
- truthound/validators/i18n/formatting.py +1434 -0
- truthound/validators/i18n/loader.py +1020 -0
- truthound/validators/i18n/messages.py +521 -0
- truthound/validators/i18n/plural.py +683 -0
- truthound/validators/i18n/protocols.py +855 -0
- truthound/validators/i18n/tms.py +1162 -0
- truthound/validators/localization/__init__.py +53 -0
- truthound/validators/localization/base.py +122 -0
- truthound/validators/localization/chinese.py +362 -0
- truthound/validators/localization/japanese.py +275 -0
- truthound/validators/localization/korean.py +524 -0
- truthound/validators/memory/__init__.py +94 -0
- truthound/validators/memory/approximate_knn.py +506 -0
- truthound/validators/memory/base.py +547 -0
- truthound/validators/memory/sgd_online.py +719 -0
- truthound/validators/memory/streaming_ecdf.py +753 -0
- truthound/validators/ml_feature/__init__.py +54 -0
- truthound/validators/ml_feature/base.py +249 -0
- truthound/validators/ml_feature/correlation.py +299 -0
- truthound/validators/ml_feature/leakage.py +344 -0
- truthound/validators/ml_feature/null_impact.py +270 -0
- truthound/validators/ml_feature/scale.py +264 -0
- truthound/validators/multi_column/__init__.py +89 -0
- truthound/validators/multi_column/arithmetic.py +284 -0
- truthound/validators/multi_column/base.py +231 -0
- truthound/validators/multi_column/comparison.py +273 -0
- truthound/validators/multi_column/consistency.py +312 -0
- truthound/validators/multi_column/statistical.py +299 -0
- truthound/validators/optimization/__init__.py +164 -0
- truthound/validators/optimization/aggregation.py +563 -0
- truthound/validators/optimization/covariance.py +556 -0
- truthound/validators/optimization/geo.py +626 -0
- truthound/validators/optimization/graph.py +587 -0
- truthound/validators/optimization/orchestrator.py +970 -0
- truthound/validators/optimization/profiling.py +1312 -0
- truthound/validators/privacy/__init__.py +223 -0
- truthound/validators/privacy/base.py +635 -0
- truthound/validators/privacy/ccpa.py +670 -0
- truthound/validators/privacy/gdpr.py +728 -0
- truthound/validators/privacy/global_patterns.py +604 -0
- truthound/validators/privacy/plugins.py +867 -0
- truthound/validators/profiling/__init__.py +52 -0
- truthound/validators/profiling/base.py +175 -0
- truthound/validators/profiling/cardinality.py +312 -0
- truthound/validators/profiling/entropy.py +391 -0
- truthound/validators/profiling/frequency.py +455 -0
- truthound/validators/pushdown_support.py +660 -0
- truthound/validators/query/__init__.py +91 -0
- truthound/validators/query/aggregate.py +346 -0
- truthound/validators/query/base.py +246 -0
- truthound/validators/query/column.py +249 -0
- truthound/validators/query/expression.py +274 -0
- truthound/validators/query/result.py +323 -0
- truthound/validators/query/row_count.py +264 -0
- truthound/validators/referential/__init__.py +80 -0
- truthound/validators/referential/base.py +395 -0
- truthound/validators/referential/cascade.py +391 -0
- truthound/validators/referential/circular.py +563 -0
- truthound/validators/referential/foreign_key.py +624 -0
- truthound/validators/referential/orphan.py +485 -0
- truthound/validators/registry.py +112 -0
- truthound/validators/schema/__init__.py +41 -0
- truthound/validators/schema/column_count.py +142 -0
- truthound/validators/schema/column_exists.py +80 -0
- truthound/validators/schema/column_order.py +82 -0
- truthound/validators/schema/column_pair.py +85 -0
- truthound/validators/schema/column_pair_set.py +195 -0
- truthound/validators/schema/column_type.py +94 -0
- truthound/validators/schema/multi_column.py +53 -0
- truthound/validators/schema/multi_column_aggregate.py +175 -0
- truthound/validators/schema/referential.py +274 -0
- truthound/validators/schema/table_schema.py +91 -0
- truthound/validators/schema_validator.py +219 -0
- truthound/validators/sdk/__init__.py +250 -0
- truthound/validators/sdk/builder.py +680 -0
- truthound/validators/sdk/decorators.py +474 -0
- truthound/validators/sdk/enterprise/__init__.py +211 -0
- truthound/validators/sdk/enterprise/docs.py +725 -0
- truthound/validators/sdk/enterprise/fuzzing.py +659 -0
- truthound/validators/sdk/enterprise/licensing.py +709 -0
- truthound/validators/sdk/enterprise/manager.py +543 -0
- truthound/validators/sdk/enterprise/resources.py +628 -0
- truthound/validators/sdk/enterprise/sandbox.py +766 -0
- truthound/validators/sdk/enterprise/signing.py +603 -0
- truthound/validators/sdk/enterprise/templates.py +865 -0
- truthound/validators/sdk/enterprise/versioning.py +659 -0
- truthound/validators/sdk/templates.py +757 -0
- truthound/validators/sdk/testing.py +807 -0
- truthound/validators/security/__init__.py +181 -0
- truthound/validators/security/redos/__init__.py +182 -0
- truthound/validators/security/redos/core.py +861 -0
- truthound/validators/security/redos/cpu_monitor.py +593 -0
- truthound/validators/security/redos/cve_database.py +791 -0
- truthound/validators/security/redos/ml/__init__.py +155 -0
- truthound/validators/security/redos/ml/base.py +785 -0
- truthound/validators/security/redos/ml/datasets.py +618 -0
- truthound/validators/security/redos/ml/features.py +359 -0
- truthound/validators/security/redos/ml/models.py +1000 -0
- truthound/validators/security/redos/ml/predictor.py +507 -0
- truthound/validators/security/redos/ml/storage.py +632 -0
- truthound/validators/security/redos/ml/training.py +571 -0
- truthound/validators/security/redos/ml_analyzer.py +937 -0
- truthound/validators/security/redos/optimizer.py +674 -0
- truthound/validators/security/redos/profiler.py +682 -0
- truthound/validators/security/redos/re2_engine.py +709 -0
- truthound/validators/security/redos.py +886 -0
- truthound/validators/security/sql_security.py +1247 -0
- truthound/validators/streaming/__init__.py +126 -0
- truthound/validators/streaming/base.py +292 -0
- truthound/validators/streaming/completeness.py +210 -0
- truthound/validators/streaming/mixin.py +575 -0
- truthound/validators/streaming/range.py +308 -0
- truthound/validators/streaming/sources.py +846 -0
- truthound/validators/string/__init__.py +57 -0
- truthound/validators/string/casing.py +158 -0
- truthound/validators/string/charset.py +96 -0
- truthound/validators/string/format.py +501 -0
- truthound/validators/string/json.py +77 -0
- truthound/validators/string/json_schema.py +184 -0
- truthound/validators/string/length.py +104 -0
- truthound/validators/string/like_pattern.py +237 -0
- truthound/validators/string/regex.py +202 -0
- truthound/validators/string/regex_extended.py +435 -0
- truthound/validators/table/__init__.py +88 -0
- truthound/validators/table/base.py +78 -0
- truthound/validators/table/column_count.py +198 -0
- truthound/validators/table/freshness.py +362 -0
- truthound/validators/table/row_count.py +251 -0
- truthound/validators/table/schema.py +333 -0
- truthound/validators/table/size.py +285 -0
- truthound/validators/timeout/__init__.py +102 -0
- truthound/validators/timeout/advanced/__init__.py +247 -0
- truthound/validators/timeout/advanced/circuit_breaker.py +675 -0
- truthound/validators/timeout/advanced/prediction.py +773 -0
- truthound/validators/timeout/advanced/priority.py +618 -0
- truthound/validators/timeout/advanced/redis_backend.py +770 -0
- truthound/validators/timeout/advanced/retry.py +721 -0
- truthound/validators/timeout/advanced/sampling.py +788 -0
- truthound/validators/timeout/advanced/sla.py +661 -0
- truthound/validators/timeout/advanced/telemetry.py +804 -0
- truthound/validators/timeout/cascade.py +477 -0
- truthound/validators/timeout/deadline.py +657 -0
- truthound/validators/timeout/degradation.py +525 -0
- truthound/validators/timeout/distributed.py +597 -0
- truthound/validators/timeseries/__init__.py +89 -0
- truthound/validators/timeseries/base.py +326 -0
- truthound/validators/timeseries/completeness.py +617 -0
- truthound/validators/timeseries/gap.py +485 -0
- truthound/validators/timeseries/monotonic.py +310 -0
- truthound/validators/timeseries/seasonality.py +422 -0
- truthound/validators/timeseries/trend.py +510 -0
- truthound/validators/uniqueness/__init__.py +59 -0
- truthound/validators/uniqueness/approximate.py +475 -0
- truthound/validators/uniqueness/distinct_values.py +253 -0
- truthound/validators/uniqueness/duplicate.py +118 -0
- truthound/validators/uniqueness/primary_key.py +140 -0
- truthound/validators/uniqueness/unique.py +191 -0
- truthound/validators/uniqueness/within_record.py +599 -0
- truthound/validators/utils.py +756 -0
- truthound-1.0.8.dist-info/METADATA +474 -0
- truthound-1.0.8.dist-info/RECORD +877 -0
- truthound-1.0.8.dist-info/WHEEL +4 -0
- truthound-1.0.8.dist-info/entry_points.txt +2 -0
- truthound-1.0.8.dist-info/licenses/LICENSE +190 -0
|
@@ -0,0 +1,1000 @@
|
|
|
1
|
+
"""ML model implementations for ReDoS prediction.
|
|
2
|
+
|
|
3
|
+
This module provides concrete implementations of ReDoS prediction models,
|
|
4
|
+
including rule-based baseline, scikit-learn models, and ensemble methods.
|
|
5
|
+
|
|
6
|
+
Available Models:
|
|
7
|
+
- RuleBasedReDoSModel: Deterministic rule-based classifier (no ML deps)
|
|
8
|
+
- RandomForestReDoSModel: Random Forest classifier
|
|
9
|
+
- GradientBoostingReDoSModel: Gradient Boosting classifier
|
|
10
|
+
- LogisticRegressionReDoSModel: Logistic Regression classifier
|
|
11
|
+
- EnsembleReDoSModel: Combines multiple models for robust predictions
|
|
12
|
+
|
|
13
|
+
Example:
|
|
14
|
+
>>> from truthound.validators.security.redos.ml.models import (
|
|
15
|
+
... RandomForestReDoSModel,
|
|
16
|
+
... create_model,
|
|
17
|
+
... )
|
|
18
|
+
>>> model = create_model("random_forest")
|
|
19
|
+
>>> model.train(training_data)
|
|
20
|
+
>>> probability, confidence = model.predict(feature_vector)
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
from __future__ import annotations
|
|
24
|
+
|
|
25
|
+
import logging
|
|
26
|
+
import math
|
|
27
|
+
import time
|
|
28
|
+
from pathlib import Path
|
|
29
|
+
from typing import Any, Dict, List, Optional, Tuple, Type
|
|
30
|
+
|
|
31
|
+
from truthound.validators.security.redos.ml.base import (
|
|
32
|
+
BaseReDoSModel,
|
|
33
|
+
ModelConfig,
|
|
34
|
+
ModelType,
|
|
35
|
+
PatternFeatures,
|
|
36
|
+
ReDoSModelMetrics,
|
|
37
|
+
ReDoSTrainingData,
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
logger = logging.getLogger(__name__)
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
# =============================================================================
|
|
45
|
+
# Rule-Based Model (No ML Dependencies)
|
|
46
|
+
# =============================================================================
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
class RuleBasedReDoSModel(BaseReDoSModel):
|
|
50
|
+
"""Rule-based model for ReDoS risk prediction.
|
|
51
|
+
|
|
52
|
+
This model uses hand-crafted rules based on known ReDoS patterns
|
|
53
|
+
to estimate risk. It serves as a baseline and fallback when
|
|
54
|
+
ML models are not available or not trained.
|
|
55
|
+
|
|
56
|
+
The model assigns weights to various pattern features and combines
|
|
57
|
+
them using a logistic function to produce a probability.
|
|
58
|
+
|
|
59
|
+
Feature weights are derived from analysis of known vulnerable patterns
|
|
60
|
+
and security research on regex backtracking behavior.
|
|
61
|
+
|
|
62
|
+
Attributes:
|
|
63
|
+
FEATURE_WEIGHTS: Dictionary mapping feature names to weights
|
|
64
|
+
BIAS: Bias term for logistic function
|
|
65
|
+
"""
|
|
66
|
+
|
|
67
|
+
name = "rule_based"
|
|
68
|
+
version = "1.0.0"
|
|
69
|
+
|
|
70
|
+
# Feature weights learned from known vulnerable patterns
|
|
71
|
+
FEATURE_WEIGHTS: Dict[str, float] = {
|
|
72
|
+
"nested_quantifier_count": 5.0,
|
|
73
|
+
"quantified_backreference_count": 4.0,
|
|
74
|
+
"quantified_alternation_count": 3.5,
|
|
75
|
+
"adjacent_quantifier_count": 2.5,
|
|
76
|
+
"unbounded_quantifier_count": 1.5,
|
|
77
|
+
"max_nesting_depth": 0.8,
|
|
78
|
+
"star_count": 0.5,
|
|
79
|
+
"plus_count": 0.5,
|
|
80
|
+
"alternation_count": 0.3,
|
|
81
|
+
"quantifier_density": 2.0,
|
|
82
|
+
"backtracking_potential": 0.1,
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
BIAS = -2.0
|
|
86
|
+
|
|
87
|
+
def __init__(self, config: ModelConfig | None = None):
|
|
88
|
+
"""Initialize the rule-based model."""
|
|
89
|
+
super().__init__(config)
|
|
90
|
+
self._trained = True # Always ready
|
|
91
|
+
|
|
92
|
+
def predict(self, features: List[float]) -> Tuple[float, float]:
|
|
93
|
+
"""Predict risk probability using rules.
|
|
94
|
+
|
|
95
|
+
Args:
|
|
96
|
+
features: Feature vector
|
|
97
|
+
|
|
98
|
+
Returns:
|
|
99
|
+
Tuple of (risk_probability, confidence)
|
|
100
|
+
"""
|
|
101
|
+
feature_dict = dict(zip(self._feature_names, features))
|
|
102
|
+
|
|
103
|
+
# Calculate weighted sum
|
|
104
|
+
weighted_sum = self.BIAS
|
|
105
|
+
for feature_name, weight in self.FEATURE_WEIGHTS.items():
|
|
106
|
+
if feature_name in feature_dict:
|
|
107
|
+
weighted_sum += feature_dict[feature_name] * weight
|
|
108
|
+
|
|
109
|
+
# Apply logistic function
|
|
110
|
+
probability = 1.0 / (1.0 + math.exp(-weighted_sum))
|
|
111
|
+
|
|
112
|
+
# Confidence based on how extreme the score is
|
|
113
|
+
# More extreme probabilities indicate higher confidence
|
|
114
|
+
confidence = abs(2 * probability - 1)
|
|
115
|
+
|
|
116
|
+
return probability, confidence
|
|
117
|
+
|
|
118
|
+
def predict_batch(
|
|
119
|
+
self, features: List[List[float]]
|
|
120
|
+
) -> List[Tuple[float, float]]:
|
|
121
|
+
"""Predict for multiple samples."""
|
|
122
|
+
return [self.predict(f) for f in features]
|
|
123
|
+
|
|
124
|
+
def train(self, data: ReDoSTrainingData) -> ReDoSModelMetrics:
|
|
125
|
+
"""Rule-based model doesn't need training.
|
|
126
|
+
|
|
127
|
+
Returns default metrics indicating the model is ready.
|
|
128
|
+
"""
|
|
129
|
+
# Calculate accuracy on training data for reference
|
|
130
|
+
if data.features is None:
|
|
131
|
+
from truthound.validators.security.redos.ml.features import (
|
|
132
|
+
PatternFeatureExtractor,
|
|
133
|
+
)
|
|
134
|
+
|
|
135
|
+
extractor = PatternFeatureExtractor()
|
|
136
|
+
features = [extractor.extract(p).to_vector() for p in data.patterns]
|
|
137
|
+
else:
|
|
138
|
+
features = data.features
|
|
139
|
+
|
|
140
|
+
predictions = [self.predict(f)[0] for f in features]
|
|
141
|
+
predicted_labels = [1 if p >= 0.5 else 0 for p in predictions]
|
|
142
|
+
|
|
143
|
+
# Calculate metrics
|
|
144
|
+
tp = sum(1 for p, l in zip(predicted_labels, data.labels) if p == 1 and l == 1)
|
|
145
|
+
tn = sum(1 for p, l in zip(predicted_labels, data.labels) if p == 0 and l == 0)
|
|
146
|
+
fp = sum(1 for p, l in zip(predicted_labels, data.labels) if p == 1 and l == 0)
|
|
147
|
+
fn = sum(1 for p, l in zip(predicted_labels, data.labels) if p == 0 and l == 1)
|
|
148
|
+
|
|
149
|
+
accuracy = (tp + tn) / max(len(data), 1)
|
|
150
|
+
precision = tp / max(tp + fp, 1)
|
|
151
|
+
recall = tp / max(tp + fn, 1)
|
|
152
|
+
f1 = 2 * precision * recall / max(precision + recall, 1e-10)
|
|
153
|
+
specificity = tn / max(tn + fp, 1)
|
|
154
|
+
|
|
155
|
+
self._metrics = ReDoSModelMetrics(
|
|
156
|
+
accuracy=accuracy,
|
|
157
|
+
precision=precision,
|
|
158
|
+
recall=recall,
|
|
159
|
+
f1_score=f1,
|
|
160
|
+
specificity=specificity,
|
|
161
|
+
confusion_matrix=[[tn, fp], [fn, tp]],
|
|
162
|
+
training_samples=len(data),
|
|
163
|
+
)
|
|
164
|
+
|
|
165
|
+
return self._metrics
|
|
166
|
+
|
|
167
|
+
def get_feature_importance(self) -> List[float]:
|
|
168
|
+
"""Get feature importance based on rule weights."""
|
|
169
|
+
return [self.FEATURE_WEIGHTS.get(name, 0.0) for name in self._feature_names]
|
|
170
|
+
|
|
171
|
+
def _save_model_data(self) -> Dict[str, Any]:
|
|
172
|
+
"""Save rule weights."""
|
|
173
|
+
return {"weights": self.FEATURE_WEIGHTS, "bias": self.BIAS}
|
|
174
|
+
|
|
175
|
+
def _load_model_data(self, data: Dict[str, Any]) -> None:
|
|
176
|
+
"""Load rule weights."""
|
|
177
|
+
if "weights" in data:
|
|
178
|
+
self.FEATURE_WEIGHTS = data["weights"]
|
|
179
|
+
if "bias" in data:
|
|
180
|
+
self.BIAS = data["bias"]
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
# =============================================================================
|
|
184
|
+
# Scikit-Learn Based Models
|
|
185
|
+
# =============================================================================
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
def _check_sklearn_available() -> bool:
|
|
189
|
+
"""Check if scikit-learn is available."""
|
|
190
|
+
try:
|
|
191
|
+
import sklearn
|
|
192
|
+
|
|
193
|
+
return True
|
|
194
|
+
except ImportError:
|
|
195
|
+
return False
|
|
196
|
+
|
|
197
|
+
|
|
198
|
+
class RandomForestReDoSModel(BaseReDoSModel):
|
|
199
|
+
"""Random Forest classifier for ReDoS prediction.
|
|
200
|
+
|
|
201
|
+
This model uses scikit-learn's RandomForestClassifier for robust
|
|
202
|
+
predictions with built-in feature importance scores.
|
|
203
|
+
|
|
204
|
+
Random Forest provides:
|
|
205
|
+
- Robust predictions resistant to outliers
|
|
206
|
+
- Built-in feature importance
|
|
207
|
+
- Good performance with default hyperparameters
|
|
208
|
+
- Parallel training capability
|
|
209
|
+
"""
|
|
210
|
+
|
|
211
|
+
name = "random_forest"
|
|
212
|
+
version = "1.0.0"
|
|
213
|
+
|
|
214
|
+
def __init__(self, config: ModelConfig | None = None):
|
|
215
|
+
"""Initialize the Random Forest model."""
|
|
216
|
+
super().__init__(config)
|
|
217
|
+
self._model: Any = None
|
|
218
|
+
self._sklearn_available = _check_sklearn_available()
|
|
219
|
+
|
|
220
|
+
def predict(self, features: List[float]) -> Tuple[float, float]:
|
|
221
|
+
"""Predict risk probability.
|
|
222
|
+
|
|
223
|
+
Falls back to rule-based model if not trained or sklearn unavailable.
|
|
224
|
+
"""
|
|
225
|
+
# Use rule fallback if sklearn wasn't available
|
|
226
|
+
if hasattr(self, "_rule_fallback") and self._rule_fallback is not None:
|
|
227
|
+
return self._rule_fallback.predict(features)
|
|
228
|
+
|
|
229
|
+
if not self._trained or self._model is None:
|
|
230
|
+
return RuleBasedReDoSModel(self._config).predict(features)
|
|
231
|
+
|
|
232
|
+
import numpy as np
|
|
233
|
+
|
|
234
|
+
X = np.array([features])
|
|
235
|
+
proba = self._model.predict_proba(X)[0]
|
|
236
|
+
|
|
237
|
+
# proba is [P(safe), P(vulnerable)]
|
|
238
|
+
probability = float(proba[1]) if len(proba) > 1 else float(proba[0])
|
|
239
|
+
confidence = abs(probability - 0.5) * 2 # Confidence from certainty
|
|
240
|
+
|
|
241
|
+
return probability, confidence
|
|
242
|
+
|
|
243
|
+
def predict_batch(
|
|
244
|
+
self, features: List[List[float]]
|
|
245
|
+
) -> List[Tuple[float, float]]:
|
|
246
|
+
"""Predict for multiple samples."""
|
|
247
|
+
# Use rule fallback if sklearn wasn't available
|
|
248
|
+
if hasattr(self, "_rule_fallback") and self._rule_fallback is not None:
|
|
249
|
+
return self._rule_fallback.predict_batch(features)
|
|
250
|
+
|
|
251
|
+
if not self._trained or self._model is None:
|
|
252
|
+
fallback = RuleBasedReDoSModel(self._config)
|
|
253
|
+
return fallback.predict_batch(features)
|
|
254
|
+
|
|
255
|
+
import numpy as np
|
|
256
|
+
|
|
257
|
+
X = np.array(features)
|
|
258
|
+
probas = self._model.predict_proba(X)
|
|
259
|
+
|
|
260
|
+
results = []
|
|
261
|
+
for proba in probas:
|
|
262
|
+
probability = float(proba[1]) if len(proba) > 1 else float(proba[0])
|
|
263
|
+
confidence = abs(probability - 0.5) * 2
|
|
264
|
+
results.append((probability, confidence))
|
|
265
|
+
|
|
266
|
+
return results
|
|
267
|
+
|
|
268
|
+
def train(self, data: ReDoSTrainingData) -> ReDoSModelMetrics:
|
|
269
|
+
"""Train the Random Forest model.
|
|
270
|
+
|
|
271
|
+
Args:
|
|
272
|
+
data: Training data container
|
|
273
|
+
|
|
274
|
+
Returns:
|
|
275
|
+
Training metrics
|
|
276
|
+
"""
|
|
277
|
+
if not self._sklearn_available:
|
|
278
|
+
logger.warning("scikit-learn not available, using rule-based fallback")
|
|
279
|
+
# Use rule-based model internally but mark as trained
|
|
280
|
+
self._rule_fallback = RuleBasedReDoSModel(self._config)
|
|
281
|
+
metrics = self._rule_fallback.train(data)
|
|
282
|
+
self._trained = True
|
|
283
|
+
self._metrics = metrics
|
|
284
|
+
return metrics
|
|
285
|
+
|
|
286
|
+
import numpy as np
|
|
287
|
+
from sklearn.ensemble import RandomForestClassifier
|
|
288
|
+
from sklearn.metrics import (
|
|
289
|
+
accuracy_score,
|
|
290
|
+
confusion_matrix,
|
|
291
|
+
f1_score,
|
|
292
|
+
precision_score,
|
|
293
|
+
recall_score,
|
|
294
|
+
roc_auc_score,
|
|
295
|
+
)
|
|
296
|
+
from sklearn.model_selection import cross_val_score, train_test_split
|
|
297
|
+
|
|
298
|
+
start_time = time.time()
|
|
299
|
+
|
|
300
|
+
# Prepare features
|
|
301
|
+
if data.features is None:
|
|
302
|
+
from truthound.validators.security.redos.ml.features import (
|
|
303
|
+
PatternFeatureExtractor,
|
|
304
|
+
)
|
|
305
|
+
|
|
306
|
+
extractor = PatternFeatureExtractor()
|
|
307
|
+
X = np.array([extractor.extract(p).to_vector() for p in data.patterns])
|
|
308
|
+
else:
|
|
309
|
+
X = np.array(data.features)
|
|
310
|
+
|
|
311
|
+
y = np.array(data.labels)
|
|
312
|
+
|
|
313
|
+
# Split data
|
|
314
|
+
X_train, X_test, y_train, y_test = train_test_split(
|
|
315
|
+
X,
|
|
316
|
+
y,
|
|
317
|
+
test_size=self._config.validation_split,
|
|
318
|
+
random_state=self._config.random_state,
|
|
319
|
+
stratify=y if len(set(y)) > 1 else None,
|
|
320
|
+
)
|
|
321
|
+
|
|
322
|
+
# Create and train model
|
|
323
|
+
self._model = RandomForestClassifier(
|
|
324
|
+
n_estimators=self._config.n_estimators,
|
|
325
|
+
max_depth=self._config.max_depth,
|
|
326
|
+
min_samples_split=self._config.min_samples_split,
|
|
327
|
+
min_samples_leaf=self._config.min_samples_leaf,
|
|
328
|
+
random_state=self._config.random_state,
|
|
329
|
+
n_jobs=self._config.n_jobs,
|
|
330
|
+
class_weight=self._config.class_weight,
|
|
331
|
+
)
|
|
332
|
+
|
|
333
|
+
self._model.fit(X_train, y_train)
|
|
334
|
+
|
|
335
|
+
# Evaluate
|
|
336
|
+
y_pred = self._model.predict(X_test)
|
|
337
|
+
y_proba = self._model.predict_proba(X_test)
|
|
338
|
+
|
|
339
|
+
# Cross-validation
|
|
340
|
+
cv_scores = cross_val_score(
|
|
341
|
+
self._model,
|
|
342
|
+
X,
|
|
343
|
+
y,
|
|
344
|
+
cv=min(self._config.cross_validation_folds, len(set(y))),
|
|
345
|
+
n_jobs=self._config.n_jobs,
|
|
346
|
+
)
|
|
347
|
+
|
|
348
|
+
# Calculate metrics
|
|
349
|
+
accuracy = accuracy_score(y_test, y_pred)
|
|
350
|
+
precision = precision_score(y_test, y_pred, zero_division=0)
|
|
351
|
+
recall = recall_score(y_test, y_pred, zero_division=0)
|
|
352
|
+
f1 = f1_score(y_test, y_pred, zero_division=0)
|
|
353
|
+
cm = confusion_matrix(y_test, y_pred)
|
|
354
|
+
specificity = cm[0, 0] / max(cm[0].sum(), 1) if len(cm) > 1 else 0.0
|
|
355
|
+
|
|
356
|
+
# AUC-ROC if we have probability predictions
|
|
357
|
+
try:
|
|
358
|
+
auc = roc_auc_score(y_test, y_proba[:, 1]) if len(set(y_test)) > 1 else None
|
|
359
|
+
except Exception:
|
|
360
|
+
auc = None
|
|
361
|
+
|
|
362
|
+
# Feature importances
|
|
363
|
+
importances = dict(
|
|
364
|
+
zip(self._feature_names, self._model.feature_importances_.tolist())
|
|
365
|
+
)
|
|
366
|
+
|
|
367
|
+
training_time = time.time() - start_time
|
|
368
|
+
self._trained = True
|
|
369
|
+
|
|
370
|
+
self._metrics = ReDoSModelMetrics(
|
|
371
|
+
accuracy=accuracy,
|
|
372
|
+
precision=precision,
|
|
373
|
+
recall=recall,
|
|
374
|
+
f1_score=f1,
|
|
375
|
+
specificity=specificity,
|
|
376
|
+
auc_roc=auc,
|
|
377
|
+
confusion_matrix=cm.tolist(),
|
|
378
|
+
feature_importances=importances,
|
|
379
|
+
cross_val_scores=cv_scores.tolist(),
|
|
380
|
+
training_samples=len(data),
|
|
381
|
+
training_time_seconds=training_time,
|
|
382
|
+
)
|
|
383
|
+
|
|
384
|
+
logger.info(f"Random Forest training complete. Accuracy: {accuracy:.2%}, F1: {f1:.2%}")
|
|
385
|
+
|
|
386
|
+
return self._metrics
|
|
387
|
+
|
|
388
|
+
def get_feature_importance(self) -> List[float]:
|
|
389
|
+
"""Get feature importance from trained model."""
|
|
390
|
+
if self._model is not None and hasattr(self._model, "feature_importances_"):
|
|
391
|
+
return self._model.feature_importances_.tolist()
|
|
392
|
+
return [0.0] * len(self._feature_names)
|
|
393
|
+
|
|
394
|
+
def _save_model_data(self) -> Dict[str, Any]:
|
|
395
|
+
"""Save the sklearn model."""
|
|
396
|
+
return {"sklearn_model": self._model}
|
|
397
|
+
|
|
398
|
+
def _load_model_data(self, data: Dict[str, Any]) -> None:
|
|
399
|
+
"""Load the sklearn model."""
|
|
400
|
+
self._model = data.get("sklearn_model")
|
|
401
|
+
if self._model is not None:
|
|
402
|
+
self._trained = True
|
|
403
|
+
|
|
404
|
+
|
|
405
|
+
class GradientBoostingReDoSModel(BaseReDoSModel):
|
|
406
|
+
"""Gradient Boosting classifier for ReDoS prediction.
|
|
407
|
+
|
|
408
|
+
This model uses scikit-learn's GradientBoostingClassifier which
|
|
409
|
+
often provides higher accuracy than Random Forest through
|
|
410
|
+
sequential boosting.
|
|
411
|
+
|
|
412
|
+
Gradient Boosting provides:
|
|
413
|
+
- Often higher accuracy than Random Forest
|
|
414
|
+
- Good handling of imbalanced classes
|
|
415
|
+
- Built-in feature importance
|
|
416
|
+
"""
|
|
417
|
+
|
|
418
|
+
name = "gradient_boosting"
|
|
419
|
+
version = "1.0.0"
|
|
420
|
+
|
|
421
|
+
def __init__(self, config: ModelConfig | None = None):
|
|
422
|
+
"""Initialize the Gradient Boosting model."""
|
|
423
|
+
super().__init__(config)
|
|
424
|
+
self._model: Any = None
|
|
425
|
+
self._sklearn_available = _check_sklearn_available()
|
|
426
|
+
|
|
427
|
+
def predict(self, features: List[float]) -> Tuple[float, float]:
|
|
428
|
+
"""Predict risk probability."""
|
|
429
|
+
# Use rule fallback if sklearn wasn't available
|
|
430
|
+
if hasattr(self, "_rule_fallback") and self._rule_fallback is not None:
|
|
431
|
+
return self._rule_fallback.predict(features)
|
|
432
|
+
|
|
433
|
+
if not self._trained or self._model is None:
|
|
434
|
+
return RuleBasedReDoSModel(self._config).predict(features)
|
|
435
|
+
|
|
436
|
+
import numpy as np
|
|
437
|
+
|
|
438
|
+
X = np.array([features])
|
|
439
|
+
proba = self._model.predict_proba(X)[0]
|
|
440
|
+
|
|
441
|
+
probability = float(proba[1]) if len(proba) > 1 else float(proba[0])
|
|
442
|
+
confidence = abs(probability - 0.5) * 2
|
|
443
|
+
|
|
444
|
+
return probability, confidence
|
|
445
|
+
|
|
446
|
+
def predict_batch(
|
|
447
|
+
self, features: List[List[float]]
|
|
448
|
+
) -> List[Tuple[float, float]]:
|
|
449
|
+
"""Predict for multiple samples."""
|
|
450
|
+
# Use rule fallback if sklearn wasn't available
|
|
451
|
+
if hasattr(self, "_rule_fallback") and self._rule_fallback is not None:
|
|
452
|
+
return self._rule_fallback.predict_batch(features)
|
|
453
|
+
|
|
454
|
+
if not self._trained or self._model is None:
|
|
455
|
+
fallback = RuleBasedReDoSModel(self._config)
|
|
456
|
+
return fallback.predict_batch(features)
|
|
457
|
+
|
|
458
|
+
import numpy as np
|
|
459
|
+
|
|
460
|
+
X = np.array(features)
|
|
461
|
+
probas = self._model.predict_proba(X)
|
|
462
|
+
|
|
463
|
+
results = []
|
|
464
|
+
for proba in probas:
|
|
465
|
+
probability = float(proba[1]) if len(proba) > 1 else float(proba[0])
|
|
466
|
+
confidence = abs(probability - 0.5) * 2
|
|
467
|
+
results.append((probability, confidence))
|
|
468
|
+
|
|
469
|
+
return results
|
|
470
|
+
|
|
471
|
+
def train(self, data: ReDoSTrainingData) -> ReDoSModelMetrics:
|
|
472
|
+
"""Train the Gradient Boosting model."""
|
|
473
|
+
if not self._sklearn_available:
|
|
474
|
+
logger.warning("scikit-learn not available, using rule-based fallback")
|
|
475
|
+
self._rule_fallback = RuleBasedReDoSModel(self._config)
|
|
476
|
+
metrics = self._rule_fallback.train(data)
|
|
477
|
+
self._trained = True
|
|
478
|
+
self._metrics = metrics
|
|
479
|
+
return metrics
|
|
480
|
+
|
|
481
|
+
import numpy as np
|
|
482
|
+
from sklearn.ensemble import GradientBoostingClassifier
|
|
483
|
+
from sklearn.metrics import (
|
|
484
|
+
accuracy_score,
|
|
485
|
+
confusion_matrix,
|
|
486
|
+
f1_score,
|
|
487
|
+
precision_score,
|
|
488
|
+
recall_score,
|
|
489
|
+
roc_auc_score,
|
|
490
|
+
)
|
|
491
|
+
from sklearn.model_selection import cross_val_score, train_test_split
|
|
492
|
+
|
|
493
|
+
start_time = time.time()
|
|
494
|
+
|
|
495
|
+
# Prepare features
|
|
496
|
+
if data.features is None:
|
|
497
|
+
from truthound.validators.security.redos.ml.features import (
|
|
498
|
+
PatternFeatureExtractor,
|
|
499
|
+
)
|
|
500
|
+
|
|
501
|
+
extractor = PatternFeatureExtractor()
|
|
502
|
+
X = np.array([extractor.extract(p).to_vector() for p in data.patterns])
|
|
503
|
+
else:
|
|
504
|
+
X = np.array(data.features)
|
|
505
|
+
|
|
506
|
+
y = np.array(data.labels)
|
|
507
|
+
|
|
508
|
+
# Split data
|
|
509
|
+
X_train, X_test, y_train, y_test = train_test_split(
|
|
510
|
+
X,
|
|
511
|
+
y,
|
|
512
|
+
test_size=self._config.validation_split,
|
|
513
|
+
random_state=self._config.random_state,
|
|
514
|
+
stratify=y if len(set(y)) > 1 else None,
|
|
515
|
+
)
|
|
516
|
+
|
|
517
|
+
# Create and train model (GB works better with shallow trees)
|
|
518
|
+
self._model = GradientBoostingClassifier(
|
|
519
|
+
n_estimators=self._config.n_estimators,
|
|
520
|
+
max_depth=min(self._config.max_depth, 5),
|
|
521
|
+
min_samples_split=self._config.min_samples_split,
|
|
522
|
+
min_samples_leaf=self._config.min_samples_leaf,
|
|
523
|
+
learning_rate=self._config.learning_rate,
|
|
524
|
+
random_state=self._config.random_state,
|
|
525
|
+
)
|
|
526
|
+
|
|
527
|
+
self._model.fit(X_train, y_train)
|
|
528
|
+
|
|
529
|
+
# Evaluate
|
|
530
|
+
y_pred = self._model.predict(X_test)
|
|
531
|
+
y_proba = self._model.predict_proba(X_test)
|
|
532
|
+
|
|
533
|
+
# Cross-validation
|
|
534
|
+
cv_scores = cross_val_score(
|
|
535
|
+
self._model,
|
|
536
|
+
X,
|
|
537
|
+
y,
|
|
538
|
+
cv=min(self._config.cross_validation_folds, len(set(y))),
|
|
539
|
+
)
|
|
540
|
+
|
|
541
|
+
# Calculate metrics
|
|
542
|
+
accuracy = accuracy_score(y_test, y_pred)
|
|
543
|
+
precision = precision_score(y_test, y_pred, zero_division=0)
|
|
544
|
+
recall = recall_score(y_test, y_pred, zero_division=0)
|
|
545
|
+
f1 = f1_score(y_test, y_pred, zero_division=0)
|
|
546
|
+
cm = confusion_matrix(y_test, y_pred)
|
|
547
|
+
specificity = cm[0, 0] / max(cm[0].sum(), 1) if len(cm) > 1 else 0.0
|
|
548
|
+
|
|
549
|
+
try:
|
|
550
|
+
auc = roc_auc_score(y_test, y_proba[:, 1]) if len(set(y_test)) > 1 else None
|
|
551
|
+
except Exception:
|
|
552
|
+
auc = None
|
|
553
|
+
|
|
554
|
+
importances = dict(
|
|
555
|
+
zip(self._feature_names, self._model.feature_importances_.tolist())
|
|
556
|
+
)
|
|
557
|
+
|
|
558
|
+
training_time = time.time() - start_time
|
|
559
|
+
self._trained = True
|
|
560
|
+
|
|
561
|
+
self._metrics = ReDoSModelMetrics(
|
|
562
|
+
accuracy=accuracy,
|
|
563
|
+
precision=precision,
|
|
564
|
+
recall=recall,
|
|
565
|
+
f1_score=f1,
|
|
566
|
+
specificity=specificity,
|
|
567
|
+
auc_roc=auc,
|
|
568
|
+
confusion_matrix=cm.tolist(),
|
|
569
|
+
feature_importances=importances,
|
|
570
|
+
cross_val_scores=cv_scores.tolist(),
|
|
571
|
+
training_samples=len(data),
|
|
572
|
+
training_time_seconds=training_time,
|
|
573
|
+
)
|
|
574
|
+
|
|
575
|
+
logger.info(f"Gradient Boosting training complete. Accuracy: {accuracy:.2%}, F1: {f1:.2%}")
|
|
576
|
+
|
|
577
|
+
return self._metrics
|
|
578
|
+
|
|
579
|
+
def get_feature_importance(self) -> List[float]:
|
|
580
|
+
"""Get feature importance from trained model."""
|
|
581
|
+
if self._model is not None and hasattr(self._model, "feature_importances_"):
|
|
582
|
+
return self._model.feature_importances_.tolist()
|
|
583
|
+
return [0.0] * len(self._feature_names)
|
|
584
|
+
|
|
585
|
+
def _save_model_data(self) -> Dict[str, Any]:
|
|
586
|
+
"""Save the sklearn model."""
|
|
587
|
+
return {"sklearn_model": self._model}
|
|
588
|
+
|
|
589
|
+
def _load_model_data(self, data: Dict[str, Any]) -> None:
|
|
590
|
+
"""Load the sklearn model."""
|
|
591
|
+
self._model = data.get("sklearn_model")
|
|
592
|
+
if self._model is not None:
|
|
593
|
+
self._trained = True
|
|
594
|
+
|
|
595
|
+
|
|
596
|
+
class LogisticRegressionReDoSModel(BaseReDoSModel):
|
|
597
|
+
"""Logistic Regression classifier for ReDoS prediction.
|
|
598
|
+
|
|
599
|
+
Simple linear model that provides interpretable coefficients
|
|
600
|
+
and fast training/inference.
|
|
601
|
+
"""
|
|
602
|
+
|
|
603
|
+
name = "logistic_regression"
|
|
604
|
+
version = "1.0.0"
|
|
605
|
+
|
|
606
|
+
def __init__(self, config: ModelConfig | None = None):
|
|
607
|
+
"""Initialize the Logistic Regression model."""
|
|
608
|
+
super().__init__(config)
|
|
609
|
+
self._model: Any = None
|
|
610
|
+
self._sklearn_available = _check_sklearn_available()
|
|
611
|
+
|
|
612
|
+
def predict(self, features: List[float]) -> Tuple[float, float]:
|
|
613
|
+
"""Predict risk probability."""
|
|
614
|
+
# Use rule fallback if sklearn wasn't available
|
|
615
|
+
if hasattr(self, "_rule_fallback") and self._rule_fallback is not None:
|
|
616
|
+
return self._rule_fallback.predict(features)
|
|
617
|
+
|
|
618
|
+
if not self._trained or self._model is None:
|
|
619
|
+
return RuleBasedReDoSModel(self._config).predict(features)
|
|
620
|
+
|
|
621
|
+
import numpy as np
|
|
622
|
+
|
|
623
|
+
X = np.array([features])
|
|
624
|
+
proba = self._model.predict_proba(X)[0]
|
|
625
|
+
|
|
626
|
+
probability = float(proba[1]) if len(proba) > 1 else float(proba[0])
|
|
627
|
+
confidence = abs(probability - 0.5) * 2
|
|
628
|
+
|
|
629
|
+
return probability, confidence
|
|
630
|
+
|
|
631
|
+
def predict_batch(
|
|
632
|
+
self, features: List[List[float]]
|
|
633
|
+
) -> List[Tuple[float, float]]:
|
|
634
|
+
"""Predict for multiple samples."""
|
|
635
|
+
# Use rule fallback if sklearn wasn't available
|
|
636
|
+
if hasattr(self, "_rule_fallback") and self._rule_fallback is not None:
|
|
637
|
+
return self._rule_fallback.predict_batch(features)
|
|
638
|
+
|
|
639
|
+
if not self._trained or self._model is None:
|
|
640
|
+
fallback = RuleBasedReDoSModel(self._config)
|
|
641
|
+
return fallback.predict_batch(features)
|
|
642
|
+
|
|
643
|
+
import numpy as np
|
|
644
|
+
|
|
645
|
+
X = np.array(features)
|
|
646
|
+
probas = self._model.predict_proba(X)
|
|
647
|
+
|
|
648
|
+
results = []
|
|
649
|
+
for proba in probas:
|
|
650
|
+
probability = float(proba[1]) if len(proba) > 1 else float(proba[0])
|
|
651
|
+
confidence = abs(probability - 0.5) * 2
|
|
652
|
+
results.append((probability, confidence))
|
|
653
|
+
|
|
654
|
+
return results
|
|
655
|
+
|
|
656
|
+
def train(self, data: ReDoSTrainingData) -> ReDoSModelMetrics:
|
|
657
|
+
"""Train the Logistic Regression model."""
|
|
658
|
+
if not self._sklearn_available:
|
|
659
|
+
logger.warning("scikit-learn not available, using rule-based fallback")
|
|
660
|
+
self._rule_fallback = RuleBasedReDoSModel(self._config)
|
|
661
|
+
metrics = self._rule_fallback.train(data)
|
|
662
|
+
self._trained = True
|
|
663
|
+
self._metrics = metrics
|
|
664
|
+
return metrics
|
|
665
|
+
|
|
666
|
+
import numpy as np
|
|
667
|
+
from sklearn.linear_model import LogisticRegression
|
|
668
|
+
from sklearn.metrics import (
|
|
669
|
+
accuracy_score,
|
|
670
|
+
confusion_matrix,
|
|
671
|
+
f1_score,
|
|
672
|
+
precision_score,
|
|
673
|
+
recall_score,
|
|
674
|
+
roc_auc_score,
|
|
675
|
+
)
|
|
676
|
+
from sklearn.model_selection import cross_val_score, train_test_split
|
|
677
|
+
from sklearn.preprocessing import StandardScaler
|
|
678
|
+
|
|
679
|
+
start_time = time.time()
|
|
680
|
+
|
|
681
|
+
# Prepare features
|
|
682
|
+
if data.features is None:
|
|
683
|
+
from truthound.validators.security.redos.ml.features import (
|
|
684
|
+
PatternFeatureExtractor,
|
|
685
|
+
)
|
|
686
|
+
|
|
687
|
+
extractor = PatternFeatureExtractor()
|
|
688
|
+
X = np.array([extractor.extract(p).to_vector() for p in data.patterns])
|
|
689
|
+
else:
|
|
690
|
+
X = np.array(data.features)
|
|
691
|
+
|
|
692
|
+
y = np.array(data.labels)
|
|
693
|
+
|
|
694
|
+
# Scale features for logistic regression
|
|
695
|
+
self._scaler = StandardScaler()
|
|
696
|
+
X_scaled = self._scaler.fit_transform(X)
|
|
697
|
+
|
|
698
|
+
# Split data
|
|
699
|
+
X_train, X_test, y_train, y_test = train_test_split(
|
|
700
|
+
X_scaled,
|
|
701
|
+
y,
|
|
702
|
+
test_size=self._config.validation_split,
|
|
703
|
+
random_state=self._config.random_state,
|
|
704
|
+
stratify=y if len(set(y)) > 1 else None,
|
|
705
|
+
)
|
|
706
|
+
|
|
707
|
+
# Create and train model
|
|
708
|
+
self._model = LogisticRegression(
|
|
709
|
+
random_state=self._config.random_state,
|
|
710
|
+
class_weight=self._config.class_weight,
|
|
711
|
+
max_iter=1000,
|
|
712
|
+
n_jobs=self._config.n_jobs,
|
|
713
|
+
)
|
|
714
|
+
|
|
715
|
+
self._model.fit(X_train, y_train)
|
|
716
|
+
|
|
717
|
+
# Evaluate
|
|
718
|
+
y_pred = self._model.predict(X_test)
|
|
719
|
+
y_proba = self._model.predict_proba(X_test)
|
|
720
|
+
|
|
721
|
+
# Cross-validation
|
|
722
|
+
cv_scores = cross_val_score(
|
|
723
|
+
self._model,
|
|
724
|
+
X_scaled,
|
|
725
|
+
y,
|
|
726
|
+
cv=min(self._config.cross_validation_folds, len(set(y))),
|
|
727
|
+
)
|
|
728
|
+
|
|
729
|
+
# Calculate metrics
|
|
730
|
+
accuracy = accuracy_score(y_test, y_pred)
|
|
731
|
+
precision = precision_score(y_test, y_pred, zero_division=0)
|
|
732
|
+
recall = recall_score(y_test, y_pred, zero_division=0)
|
|
733
|
+
f1 = f1_score(y_test, y_pred, zero_division=0)
|
|
734
|
+
cm = confusion_matrix(y_test, y_pred)
|
|
735
|
+
specificity = cm[0, 0] / max(cm[0].sum(), 1) if len(cm) > 1 else 0.0
|
|
736
|
+
|
|
737
|
+
try:
|
|
738
|
+
auc = roc_auc_score(y_test, y_proba[:, 1]) if len(set(y_test)) > 1 else None
|
|
739
|
+
except Exception:
|
|
740
|
+
auc = None
|
|
741
|
+
|
|
742
|
+
# Feature importance from coefficients
|
|
743
|
+
coeffs = self._model.coef_[0] if len(self._model.coef_.shape) > 1 else self._model.coef_
|
|
744
|
+
importances = dict(zip(self._feature_names, np.abs(coeffs).tolist()))
|
|
745
|
+
|
|
746
|
+
training_time = time.time() - start_time
|
|
747
|
+
self._trained = True
|
|
748
|
+
|
|
749
|
+
self._metrics = ReDoSModelMetrics(
|
|
750
|
+
accuracy=accuracy,
|
|
751
|
+
precision=precision,
|
|
752
|
+
recall=recall,
|
|
753
|
+
f1_score=f1,
|
|
754
|
+
specificity=specificity,
|
|
755
|
+
auc_roc=auc,
|
|
756
|
+
confusion_matrix=cm.tolist(),
|
|
757
|
+
feature_importances=importances,
|
|
758
|
+
cross_val_scores=cv_scores.tolist(),
|
|
759
|
+
training_samples=len(data),
|
|
760
|
+
training_time_seconds=training_time,
|
|
761
|
+
)
|
|
762
|
+
|
|
763
|
+
logger.info(f"Logistic Regression training complete. Accuracy: {accuracy:.2%}, F1: {f1:.2%}")
|
|
764
|
+
|
|
765
|
+
return self._metrics
|
|
766
|
+
|
|
767
|
+
def get_feature_importance(self) -> List[float]:
|
|
768
|
+
"""Get feature importance from coefficients."""
|
|
769
|
+
if self._model is not None and hasattr(self._model, "coef_"):
|
|
770
|
+
import numpy as np
|
|
771
|
+
|
|
772
|
+
coeffs = self._model.coef_[0] if len(self._model.coef_.shape) > 1 else self._model.coef_
|
|
773
|
+
return np.abs(coeffs).tolist()
|
|
774
|
+
return [0.0] * len(self._feature_names)
|
|
775
|
+
|
|
776
|
+
def _save_model_data(self) -> Dict[str, Any]:
|
|
777
|
+
"""Save the sklearn model and scaler."""
|
|
778
|
+
return {
|
|
779
|
+
"sklearn_model": self._model,
|
|
780
|
+
"scaler": getattr(self, "_scaler", None),
|
|
781
|
+
}
|
|
782
|
+
|
|
783
|
+
def _load_model_data(self, data: Dict[str, Any]) -> None:
|
|
784
|
+
"""Load the sklearn model and scaler."""
|
|
785
|
+
self._model = data.get("sklearn_model")
|
|
786
|
+
self._scaler = data.get("scaler")
|
|
787
|
+
if self._model is not None:
|
|
788
|
+
self._trained = True
|
|
789
|
+
|
|
790
|
+
|
|
791
|
+
class EnsembleReDoSModel(BaseReDoSModel):
|
|
792
|
+
"""Ensemble model combining multiple prediction strategies.
|
|
793
|
+
|
|
794
|
+
This model combines rule-based heuristics with pattern signature
|
|
795
|
+
matching for robust predictions even without training data.
|
|
796
|
+
|
|
797
|
+
The ensemble uses weighted voting from:
|
|
798
|
+
- Rule-based baseline model
|
|
799
|
+
- Trained ML model (if available)
|
|
800
|
+
- Pattern signature matching
|
|
801
|
+
|
|
802
|
+
This approach provides robust fallback behavior while leveraging
|
|
803
|
+
ML improvements when trained models are available.
|
|
804
|
+
"""
|
|
805
|
+
|
|
806
|
+
name = "ensemble"
|
|
807
|
+
version = "1.0.0"
|
|
808
|
+
|
|
809
|
+
# Known dangerous pattern signatures with risk scores
|
|
810
|
+
DANGEROUS_SIGNATURES: List[Tuple[str, float]] = [
|
|
811
|
+
(r"\([^)]*[+*][^)]*\)[+*]", 0.95), # Nested quantifiers
|
|
812
|
+
(r"\\[1-9][+*]", 0.85), # Quantified backreference
|
|
813
|
+
(r"\([^)]*\|[^)]*\)[+*]", 0.75), # Quantified alternation
|
|
814
|
+
(r"[+*][+*]", 0.65), # Adjacent quantifiers
|
|
815
|
+
(r"\([^)]*\)\{[\d,]+\}\{", 0.70), # Nested bounded quantifiers
|
|
816
|
+
]
|
|
817
|
+
|
|
818
|
+
def __init__(
|
|
819
|
+
self,
|
|
820
|
+
config: ModelConfig | None = None,
|
|
821
|
+
ml_model: BaseReDoSModel | None = None,
|
|
822
|
+
):
|
|
823
|
+
"""Initialize the ensemble model.
|
|
824
|
+
|
|
825
|
+
Args:
|
|
826
|
+
config: Model configuration
|
|
827
|
+
ml_model: Optional trained ML model to include in ensemble
|
|
828
|
+
"""
|
|
829
|
+
import re
|
|
830
|
+
|
|
831
|
+
super().__init__(config)
|
|
832
|
+
self._rule_model = RuleBasedReDoSModel(config)
|
|
833
|
+
self._ml_model = ml_model
|
|
834
|
+
self._trained = True # Rule-based is always ready
|
|
835
|
+
|
|
836
|
+
# Compile signature patterns
|
|
837
|
+
self._compiled_signatures = [
|
|
838
|
+
(re.compile(pattern), risk) for pattern, risk in self.DANGEROUS_SIGNATURES
|
|
839
|
+
]
|
|
840
|
+
|
|
841
|
+
def predict(
|
|
842
|
+
self, features: List[float], pattern: str = ""
|
|
843
|
+
) -> Tuple[float, float]:
|
|
844
|
+
"""Predict using ensemble of methods.
|
|
845
|
+
|
|
846
|
+
Args:
|
|
847
|
+
features: Feature vector
|
|
848
|
+
pattern: Original pattern for signature matching
|
|
849
|
+
|
|
850
|
+
Returns:
|
|
851
|
+
Tuple of (risk_probability, confidence)
|
|
852
|
+
"""
|
|
853
|
+
# Rule-based prediction
|
|
854
|
+
rule_prob, rule_conf = self._rule_model.predict(features)
|
|
855
|
+
|
|
856
|
+
# ML model prediction (if available and trained)
|
|
857
|
+
ml_prob, ml_conf = 0.0, 0.0
|
|
858
|
+
if self._ml_model is not None and self._ml_model.is_trained:
|
|
859
|
+
ml_prob, ml_conf = self._ml_model.predict(features)
|
|
860
|
+
|
|
861
|
+
# Pattern signature matching
|
|
862
|
+
sig_prob = 0.0
|
|
863
|
+
if pattern:
|
|
864
|
+
for sig_pattern, risk in self._compiled_signatures:
|
|
865
|
+
if sig_pattern.search(pattern):
|
|
866
|
+
sig_prob = max(sig_prob, risk)
|
|
867
|
+
|
|
868
|
+
# Combine predictions
|
|
869
|
+
if self._ml_model is not None and self._ml_model.is_trained:
|
|
870
|
+
# ML model available: weighted average of all three
|
|
871
|
+
if sig_prob > 0:
|
|
872
|
+
final_prob = 0.4 * ml_prob + 0.35 * sig_prob + 0.25 * rule_prob
|
|
873
|
+
final_conf = max(ml_conf, 0.9)
|
|
874
|
+
else:
|
|
875
|
+
final_prob = 0.6 * ml_prob + 0.4 * rule_prob
|
|
876
|
+
final_conf = (ml_conf + rule_conf) / 2
|
|
877
|
+
else:
|
|
878
|
+
# No ML model: combine rule-based with signatures
|
|
879
|
+
if sig_prob > 0:
|
|
880
|
+
final_prob = 0.6 * sig_prob + 0.4 * rule_prob
|
|
881
|
+
final_conf = max(rule_conf, 0.9)
|
|
882
|
+
else:
|
|
883
|
+
final_prob = rule_prob
|
|
884
|
+
final_conf = rule_conf
|
|
885
|
+
|
|
886
|
+
return final_prob, final_conf
|
|
887
|
+
|
|
888
|
+
def predict_batch(
|
|
889
|
+
self, features: List[List[float]]
|
|
890
|
+
) -> List[Tuple[float, float]]:
|
|
891
|
+
"""Predict for multiple samples (without pattern context)."""
|
|
892
|
+
return [self.predict(f) for f in features]
|
|
893
|
+
|
|
894
|
+
def predict_with_pattern(
|
|
895
|
+
self, features: List[float], pattern: str
|
|
896
|
+
) -> Tuple[float, float]:
|
|
897
|
+
"""Predict with pattern context for signature matching."""
|
|
898
|
+
return self.predict(features, pattern)
|
|
899
|
+
|
|
900
|
+
def train(self, data: ReDoSTrainingData) -> ReDoSModelMetrics:
|
|
901
|
+
"""Train the ML component of the ensemble.
|
|
902
|
+
|
|
903
|
+
The rule-based component doesn't need training, but the ML
|
|
904
|
+
component benefits from training data.
|
|
905
|
+
"""
|
|
906
|
+
# Create and train ML model if not provided
|
|
907
|
+
if self._ml_model is None:
|
|
908
|
+
self._ml_model = RandomForestReDoSModel(self._config)
|
|
909
|
+
|
|
910
|
+
metrics = self._ml_model.train(data)
|
|
911
|
+
self._metrics = metrics
|
|
912
|
+
return metrics
|
|
913
|
+
|
|
914
|
+
def get_feature_importance(self) -> List[float]:
|
|
915
|
+
"""Get feature importance from rule model."""
|
|
916
|
+
if self._ml_model is not None and self._ml_model.is_trained:
|
|
917
|
+
return self._ml_model.get_feature_importance()
|
|
918
|
+
return self._rule_model.get_feature_importance()
|
|
919
|
+
|
|
920
|
+
def _save_model_data(self) -> Dict[str, Any]:
|
|
921
|
+
"""Save ensemble components."""
|
|
922
|
+
return {
|
|
923
|
+
"ml_model_data": (
|
|
924
|
+
self._ml_model._save_model_data()
|
|
925
|
+
if self._ml_model is not None
|
|
926
|
+
else None
|
|
927
|
+
),
|
|
928
|
+
"ml_model_type": (
|
|
929
|
+
self._ml_model.name if self._ml_model is not None else None
|
|
930
|
+
),
|
|
931
|
+
}
|
|
932
|
+
|
|
933
|
+
def _load_model_data(self, data: Dict[str, Any]) -> None:
|
|
934
|
+
"""Load ensemble components."""
|
|
935
|
+
if data.get("ml_model_type") and data.get("ml_model_data"):
|
|
936
|
+
model_type = data["ml_model_type"]
|
|
937
|
+
self._ml_model = create_model(model_type, self._config)
|
|
938
|
+
self._ml_model._load_model_data(data["ml_model_data"])
|
|
939
|
+
|
|
940
|
+
|
|
941
|
+
# =============================================================================
|
|
942
|
+
# Model Registry and Factory
|
|
943
|
+
# =============================================================================
|
|
944
|
+
|
|
945
|
+
|
|
946
|
+
MODEL_REGISTRY: Dict[str, Type[BaseReDoSModel]] = {
|
|
947
|
+
"rule_based": RuleBasedReDoSModel,
|
|
948
|
+
"random_forest": RandomForestReDoSModel,
|
|
949
|
+
"gradient_boosting": GradientBoostingReDoSModel,
|
|
950
|
+
"logistic_regression": LogisticRegressionReDoSModel,
|
|
951
|
+
"ensemble": EnsembleReDoSModel,
|
|
952
|
+
}
|
|
953
|
+
|
|
954
|
+
|
|
955
|
+
def create_model(
|
|
956
|
+
model_type: str | ModelType,
|
|
957
|
+
config: ModelConfig | None = None,
|
|
958
|
+
) -> BaseReDoSModel:
|
|
959
|
+
"""Create a ReDoS model by type.
|
|
960
|
+
|
|
961
|
+
Args:
|
|
962
|
+
model_type: Type of model to create
|
|
963
|
+
config: Optional model configuration
|
|
964
|
+
|
|
965
|
+
Returns:
|
|
966
|
+
Instantiated model
|
|
967
|
+
|
|
968
|
+
Raises:
|
|
969
|
+
ValueError: If model type is not recognized
|
|
970
|
+
"""
|
|
971
|
+
if isinstance(model_type, ModelType):
|
|
972
|
+
model_type = model_type.value
|
|
973
|
+
|
|
974
|
+
model_class = MODEL_REGISTRY.get(model_type)
|
|
975
|
+
if model_class is None:
|
|
976
|
+
available = ", ".join(MODEL_REGISTRY.keys())
|
|
977
|
+
raise ValueError(
|
|
978
|
+
f"Unknown model type: {model_type}. Available types: {available}"
|
|
979
|
+
)
|
|
980
|
+
|
|
981
|
+
return model_class(config)
|
|
982
|
+
|
|
983
|
+
|
|
984
|
+
def register_model(name: str, model_class: Type[BaseReDoSModel]) -> None:
|
|
985
|
+
"""Register a custom model type.
|
|
986
|
+
|
|
987
|
+
Args:
|
|
988
|
+
name: Name to register the model under
|
|
989
|
+
model_class: Model class to register
|
|
990
|
+
"""
|
|
991
|
+
MODEL_REGISTRY[name] = model_class
|
|
992
|
+
|
|
993
|
+
|
|
994
|
+
def list_available_models() -> List[str]:
|
|
995
|
+
"""List all available model types.
|
|
996
|
+
|
|
997
|
+
Returns:
|
|
998
|
+
List of model type names
|
|
999
|
+
"""
|
|
1000
|
+
return list(MODEL_REGISTRY.keys())
|