truthound 1.0.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- truthound/__init__.py +162 -0
- truthound/adapters.py +100 -0
- truthound/api.py +365 -0
- truthound/audit/__init__.py +248 -0
- truthound/audit/core.py +967 -0
- truthound/audit/filters.py +620 -0
- truthound/audit/formatters.py +707 -0
- truthound/audit/logger.py +902 -0
- truthound/audit/middleware.py +571 -0
- truthound/audit/storage.py +1083 -0
- truthound/benchmark/__init__.py +123 -0
- truthound/benchmark/base.py +757 -0
- truthound/benchmark/comparison.py +635 -0
- truthound/benchmark/generators.py +706 -0
- truthound/benchmark/reporters.py +718 -0
- truthound/benchmark/runner.py +635 -0
- truthound/benchmark/scenarios.py +712 -0
- truthound/cache.py +252 -0
- truthound/checkpoint/__init__.py +136 -0
- truthound/checkpoint/actions/__init__.py +164 -0
- truthound/checkpoint/actions/base.py +324 -0
- truthound/checkpoint/actions/custom.py +234 -0
- truthound/checkpoint/actions/discord_notify.py +290 -0
- truthound/checkpoint/actions/email_notify.py +405 -0
- truthound/checkpoint/actions/github_action.py +406 -0
- truthound/checkpoint/actions/opsgenie.py +1499 -0
- truthound/checkpoint/actions/pagerduty.py +226 -0
- truthound/checkpoint/actions/slack_notify.py +233 -0
- truthound/checkpoint/actions/store_result.py +249 -0
- truthound/checkpoint/actions/teams_notify.py +1570 -0
- truthound/checkpoint/actions/telegram_notify.py +419 -0
- truthound/checkpoint/actions/update_docs.py +552 -0
- truthound/checkpoint/actions/webhook.py +293 -0
- truthound/checkpoint/analytics/__init__.py +147 -0
- truthound/checkpoint/analytics/aggregations/__init__.py +23 -0
- truthound/checkpoint/analytics/aggregations/rollup.py +481 -0
- truthound/checkpoint/analytics/aggregations/time_bucket.py +306 -0
- truthound/checkpoint/analytics/analyzers/__init__.py +17 -0
- truthound/checkpoint/analytics/analyzers/anomaly.py +386 -0
- truthound/checkpoint/analytics/analyzers/base.py +270 -0
- truthound/checkpoint/analytics/analyzers/forecast.py +421 -0
- truthound/checkpoint/analytics/analyzers/trend.py +314 -0
- truthound/checkpoint/analytics/models.py +292 -0
- truthound/checkpoint/analytics/protocols.py +549 -0
- truthound/checkpoint/analytics/service.py +718 -0
- truthound/checkpoint/analytics/stores/__init__.py +16 -0
- truthound/checkpoint/analytics/stores/base.py +306 -0
- truthound/checkpoint/analytics/stores/memory_store.py +353 -0
- truthound/checkpoint/analytics/stores/sqlite_store.py +557 -0
- truthound/checkpoint/analytics/stores/timescale_store.py +501 -0
- truthound/checkpoint/async_actions.py +794 -0
- truthound/checkpoint/async_base.py +708 -0
- truthound/checkpoint/async_checkpoint.py +617 -0
- truthound/checkpoint/async_runner.py +639 -0
- truthound/checkpoint/checkpoint.py +527 -0
- truthound/checkpoint/ci/__init__.py +61 -0
- truthound/checkpoint/ci/detector.py +355 -0
- truthound/checkpoint/ci/reporter.py +436 -0
- truthound/checkpoint/ci/templates.py +454 -0
- truthound/checkpoint/circuitbreaker/__init__.py +133 -0
- truthound/checkpoint/circuitbreaker/breaker.py +542 -0
- truthound/checkpoint/circuitbreaker/core.py +252 -0
- truthound/checkpoint/circuitbreaker/detection.py +459 -0
- truthound/checkpoint/circuitbreaker/middleware.py +389 -0
- truthound/checkpoint/circuitbreaker/registry.py +357 -0
- truthound/checkpoint/distributed/__init__.py +139 -0
- truthound/checkpoint/distributed/backends/__init__.py +35 -0
- truthound/checkpoint/distributed/backends/celery_backend.py +503 -0
- truthound/checkpoint/distributed/backends/kubernetes_backend.py +696 -0
- truthound/checkpoint/distributed/backends/local_backend.py +397 -0
- truthound/checkpoint/distributed/backends/ray_backend.py +625 -0
- truthound/checkpoint/distributed/base.py +774 -0
- truthound/checkpoint/distributed/orchestrator.py +765 -0
- truthound/checkpoint/distributed/protocols.py +842 -0
- truthound/checkpoint/distributed/registry.py +449 -0
- truthound/checkpoint/idempotency/__init__.py +120 -0
- truthound/checkpoint/idempotency/core.py +295 -0
- truthound/checkpoint/idempotency/fingerprint.py +454 -0
- truthound/checkpoint/idempotency/locking.py +604 -0
- truthound/checkpoint/idempotency/service.py +592 -0
- truthound/checkpoint/idempotency/stores.py +653 -0
- truthound/checkpoint/monitoring/__init__.py +134 -0
- truthound/checkpoint/monitoring/aggregators/__init__.py +15 -0
- truthound/checkpoint/monitoring/aggregators/base.py +372 -0
- truthound/checkpoint/monitoring/aggregators/realtime.py +300 -0
- truthound/checkpoint/monitoring/aggregators/window.py +493 -0
- truthound/checkpoint/monitoring/collectors/__init__.py +17 -0
- truthound/checkpoint/monitoring/collectors/base.py +257 -0
- truthound/checkpoint/monitoring/collectors/memory_collector.py +617 -0
- truthound/checkpoint/monitoring/collectors/prometheus_collector.py +451 -0
- truthound/checkpoint/monitoring/collectors/redis_collector.py +518 -0
- truthound/checkpoint/monitoring/events.py +410 -0
- truthound/checkpoint/monitoring/protocols.py +636 -0
- truthound/checkpoint/monitoring/service.py +578 -0
- truthound/checkpoint/monitoring/views/__init__.py +17 -0
- truthound/checkpoint/monitoring/views/base.py +172 -0
- truthound/checkpoint/monitoring/views/queue_view.py +220 -0
- truthound/checkpoint/monitoring/views/task_view.py +240 -0
- truthound/checkpoint/monitoring/views/worker_view.py +263 -0
- truthound/checkpoint/registry.py +337 -0
- truthound/checkpoint/runner.py +356 -0
- truthound/checkpoint/transaction/__init__.py +133 -0
- truthound/checkpoint/transaction/base.py +389 -0
- truthound/checkpoint/transaction/compensatable.py +537 -0
- truthound/checkpoint/transaction/coordinator.py +576 -0
- truthound/checkpoint/transaction/executor.py +622 -0
- truthound/checkpoint/transaction/idempotency.py +534 -0
- truthound/checkpoint/transaction/saga/__init__.py +143 -0
- truthound/checkpoint/transaction/saga/builder.py +584 -0
- truthound/checkpoint/transaction/saga/definition.py +515 -0
- truthound/checkpoint/transaction/saga/event_store.py +542 -0
- truthound/checkpoint/transaction/saga/patterns.py +833 -0
- truthound/checkpoint/transaction/saga/runner.py +718 -0
- truthound/checkpoint/transaction/saga/state_machine.py +793 -0
- truthound/checkpoint/transaction/saga/strategies.py +780 -0
- truthound/checkpoint/transaction/saga/testing.py +886 -0
- truthound/checkpoint/triggers/__init__.py +58 -0
- truthound/checkpoint/triggers/base.py +237 -0
- truthound/checkpoint/triggers/event.py +385 -0
- truthound/checkpoint/triggers/schedule.py +355 -0
- truthound/cli.py +2358 -0
- truthound/cli_modules/__init__.py +124 -0
- truthound/cli_modules/advanced/__init__.py +45 -0
- truthound/cli_modules/advanced/benchmark.py +343 -0
- truthound/cli_modules/advanced/docs.py +225 -0
- truthound/cli_modules/advanced/lineage.py +209 -0
- truthound/cli_modules/advanced/ml.py +320 -0
- truthound/cli_modules/advanced/realtime.py +196 -0
- truthound/cli_modules/checkpoint/__init__.py +46 -0
- truthound/cli_modules/checkpoint/init.py +114 -0
- truthound/cli_modules/checkpoint/list.py +71 -0
- truthound/cli_modules/checkpoint/run.py +159 -0
- truthound/cli_modules/checkpoint/validate.py +67 -0
- truthound/cli_modules/common/__init__.py +71 -0
- truthound/cli_modules/common/errors.py +414 -0
- truthound/cli_modules/common/options.py +419 -0
- truthound/cli_modules/common/output.py +507 -0
- truthound/cli_modules/common/protocol.py +552 -0
- truthound/cli_modules/core/__init__.py +48 -0
- truthound/cli_modules/core/check.py +123 -0
- truthound/cli_modules/core/compare.py +104 -0
- truthound/cli_modules/core/learn.py +57 -0
- truthound/cli_modules/core/mask.py +77 -0
- truthound/cli_modules/core/profile.py +65 -0
- truthound/cli_modules/core/scan.py +61 -0
- truthound/cli_modules/profiler/__init__.py +51 -0
- truthound/cli_modules/profiler/auto_profile.py +175 -0
- truthound/cli_modules/profiler/metadata.py +107 -0
- truthound/cli_modules/profiler/suite.py +283 -0
- truthound/cli_modules/registry.py +431 -0
- truthound/cli_modules/scaffolding/__init__.py +89 -0
- truthound/cli_modules/scaffolding/base.py +631 -0
- truthound/cli_modules/scaffolding/commands.py +545 -0
- truthound/cli_modules/scaffolding/plugins.py +1072 -0
- truthound/cli_modules/scaffolding/reporters.py +594 -0
- truthound/cli_modules/scaffolding/validators.py +1127 -0
- truthound/common/__init__.py +18 -0
- truthound/common/resilience/__init__.py +130 -0
- truthound/common/resilience/bulkhead.py +266 -0
- truthound/common/resilience/circuit_breaker.py +516 -0
- truthound/common/resilience/composite.py +332 -0
- truthound/common/resilience/config.py +292 -0
- truthound/common/resilience/protocols.py +217 -0
- truthound/common/resilience/rate_limiter.py +404 -0
- truthound/common/resilience/retry.py +341 -0
- truthound/datadocs/__init__.py +260 -0
- truthound/datadocs/base.py +571 -0
- truthound/datadocs/builder.py +761 -0
- truthound/datadocs/charts.py +764 -0
- truthound/datadocs/dashboard/__init__.py +63 -0
- truthound/datadocs/dashboard/app.py +576 -0
- truthound/datadocs/dashboard/components.py +584 -0
- truthound/datadocs/dashboard/state.py +240 -0
- truthound/datadocs/engine/__init__.py +46 -0
- truthound/datadocs/engine/context.py +376 -0
- truthound/datadocs/engine/pipeline.py +618 -0
- truthound/datadocs/engine/registry.py +469 -0
- truthound/datadocs/exporters/__init__.py +49 -0
- truthound/datadocs/exporters/base.py +198 -0
- truthound/datadocs/exporters/html.py +178 -0
- truthound/datadocs/exporters/json_exporter.py +253 -0
- truthound/datadocs/exporters/markdown.py +284 -0
- truthound/datadocs/exporters/pdf.py +392 -0
- truthound/datadocs/i18n/__init__.py +86 -0
- truthound/datadocs/i18n/catalog.py +960 -0
- truthound/datadocs/i18n/formatting.py +505 -0
- truthound/datadocs/i18n/loader.py +256 -0
- truthound/datadocs/i18n/plurals.py +378 -0
- truthound/datadocs/renderers/__init__.py +42 -0
- truthound/datadocs/renderers/base.py +401 -0
- truthound/datadocs/renderers/custom.py +342 -0
- truthound/datadocs/renderers/jinja.py +697 -0
- truthound/datadocs/sections.py +736 -0
- truthound/datadocs/styles.py +931 -0
- truthound/datadocs/themes/__init__.py +101 -0
- truthound/datadocs/themes/base.py +336 -0
- truthound/datadocs/themes/default.py +417 -0
- truthound/datadocs/themes/enterprise.py +419 -0
- truthound/datadocs/themes/loader.py +336 -0
- truthound/datadocs/themes.py +301 -0
- truthound/datadocs/transformers/__init__.py +57 -0
- truthound/datadocs/transformers/base.py +268 -0
- truthound/datadocs/transformers/enrichers.py +544 -0
- truthound/datadocs/transformers/filters.py +447 -0
- truthound/datadocs/transformers/i18n.py +468 -0
- truthound/datadocs/versioning/__init__.py +62 -0
- truthound/datadocs/versioning/diff.py +639 -0
- truthound/datadocs/versioning/storage.py +497 -0
- truthound/datadocs/versioning/version.py +358 -0
- truthound/datasources/__init__.py +223 -0
- truthound/datasources/_async_protocols.py +222 -0
- truthound/datasources/_protocols.py +159 -0
- truthound/datasources/adapters.py +428 -0
- truthound/datasources/async_base.py +599 -0
- truthound/datasources/async_factory.py +511 -0
- truthound/datasources/base.py +516 -0
- truthound/datasources/factory.py +433 -0
- truthound/datasources/nosql/__init__.py +47 -0
- truthound/datasources/nosql/base.py +487 -0
- truthound/datasources/nosql/elasticsearch.py +801 -0
- truthound/datasources/nosql/mongodb.py +636 -0
- truthound/datasources/pandas_optimized.py +582 -0
- truthound/datasources/pandas_source.py +216 -0
- truthound/datasources/polars_source.py +395 -0
- truthound/datasources/spark_source.py +479 -0
- truthound/datasources/sql/__init__.py +154 -0
- truthound/datasources/sql/base.py +710 -0
- truthound/datasources/sql/bigquery.py +410 -0
- truthound/datasources/sql/cloud_base.py +199 -0
- truthound/datasources/sql/databricks.py +471 -0
- truthound/datasources/sql/mysql.py +316 -0
- truthound/datasources/sql/oracle.py +427 -0
- truthound/datasources/sql/postgresql.py +321 -0
- truthound/datasources/sql/redshift.py +479 -0
- truthound/datasources/sql/snowflake.py +439 -0
- truthound/datasources/sql/sqlite.py +286 -0
- truthound/datasources/sql/sqlserver.py +437 -0
- truthound/datasources/streaming/__init__.py +47 -0
- truthound/datasources/streaming/base.py +350 -0
- truthound/datasources/streaming/kafka.py +670 -0
- truthound/decorators.py +98 -0
- truthound/docs/__init__.py +69 -0
- truthound/docs/extractor.py +971 -0
- truthound/docs/generator.py +601 -0
- truthound/docs/parser.py +1037 -0
- truthound/docs/renderer.py +999 -0
- truthound/drift/__init__.py +22 -0
- truthound/drift/compare.py +189 -0
- truthound/drift/detectors.py +464 -0
- truthound/drift/report.py +160 -0
- truthound/execution/__init__.py +65 -0
- truthound/execution/_protocols.py +324 -0
- truthound/execution/base.py +576 -0
- truthound/execution/distributed/__init__.py +179 -0
- truthound/execution/distributed/aggregations.py +731 -0
- truthound/execution/distributed/arrow_bridge.py +817 -0
- truthound/execution/distributed/base.py +550 -0
- truthound/execution/distributed/dask_engine.py +976 -0
- truthound/execution/distributed/mixins.py +766 -0
- truthound/execution/distributed/protocols.py +756 -0
- truthound/execution/distributed/ray_engine.py +1127 -0
- truthound/execution/distributed/registry.py +446 -0
- truthound/execution/distributed/spark_engine.py +1011 -0
- truthound/execution/distributed/validator_adapter.py +682 -0
- truthound/execution/pandas_engine.py +401 -0
- truthound/execution/polars_engine.py +497 -0
- truthound/execution/pushdown/__init__.py +230 -0
- truthound/execution/pushdown/ast.py +1550 -0
- truthound/execution/pushdown/builder.py +1550 -0
- truthound/execution/pushdown/dialects.py +1072 -0
- truthound/execution/pushdown/executor.py +829 -0
- truthound/execution/pushdown/optimizer.py +1041 -0
- truthound/execution/sql_engine.py +518 -0
- truthound/infrastructure/__init__.py +189 -0
- truthound/infrastructure/audit.py +1515 -0
- truthound/infrastructure/config.py +1133 -0
- truthound/infrastructure/encryption.py +1132 -0
- truthound/infrastructure/logging.py +1503 -0
- truthound/infrastructure/metrics.py +1220 -0
- truthound/lineage/__init__.py +89 -0
- truthound/lineage/base.py +746 -0
- truthound/lineage/impact_analysis.py +474 -0
- truthound/lineage/integrations/__init__.py +22 -0
- truthound/lineage/integrations/openlineage.py +548 -0
- truthound/lineage/tracker.py +512 -0
- truthound/lineage/visualization/__init__.py +33 -0
- truthound/lineage/visualization/protocols.py +145 -0
- truthound/lineage/visualization/renderers/__init__.py +20 -0
- truthound/lineage/visualization/renderers/cytoscape.py +329 -0
- truthound/lineage/visualization/renderers/d3.py +331 -0
- truthound/lineage/visualization/renderers/graphviz.py +276 -0
- truthound/lineage/visualization/renderers/mermaid.py +308 -0
- truthound/maskers.py +113 -0
- truthound/ml/__init__.py +124 -0
- truthound/ml/anomaly_models/__init__.py +31 -0
- truthound/ml/anomaly_models/ensemble.py +362 -0
- truthound/ml/anomaly_models/isolation_forest.py +444 -0
- truthound/ml/anomaly_models/statistical.py +392 -0
- truthound/ml/base.py +1178 -0
- truthound/ml/drift_detection/__init__.py +26 -0
- truthound/ml/drift_detection/concept.py +381 -0
- truthound/ml/drift_detection/distribution.py +361 -0
- truthound/ml/drift_detection/feature.py +442 -0
- truthound/ml/drift_detection/multivariate.py +495 -0
- truthound/ml/monitoring/__init__.py +88 -0
- truthound/ml/monitoring/alerting/__init__.py +33 -0
- truthound/ml/monitoring/alerting/handlers.py +427 -0
- truthound/ml/monitoring/alerting/rules.py +508 -0
- truthound/ml/monitoring/collectors/__init__.py +19 -0
- truthound/ml/monitoring/collectors/composite.py +105 -0
- truthound/ml/monitoring/collectors/drift.py +324 -0
- truthound/ml/monitoring/collectors/performance.py +179 -0
- truthound/ml/monitoring/collectors/quality.py +369 -0
- truthound/ml/monitoring/monitor.py +536 -0
- truthound/ml/monitoring/protocols.py +451 -0
- truthound/ml/monitoring/stores/__init__.py +15 -0
- truthound/ml/monitoring/stores/memory.py +201 -0
- truthound/ml/monitoring/stores/prometheus.py +296 -0
- truthound/ml/rule_learning/__init__.py +25 -0
- truthound/ml/rule_learning/constraint_miner.py +443 -0
- truthound/ml/rule_learning/pattern_learner.py +499 -0
- truthound/ml/rule_learning/profile_learner.py +462 -0
- truthound/multitenancy/__init__.py +326 -0
- truthound/multitenancy/core.py +852 -0
- truthound/multitenancy/integration.py +597 -0
- truthound/multitenancy/isolation.py +630 -0
- truthound/multitenancy/manager.py +770 -0
- truthound/multitenancy/middleware.py +765 -0
- truthound/multitenancy/quota.py +537 -0
- truthound/multitenancy/resolvers.py +603 -0
- truthound/multitenancy/storage.py +703 -0
- truthound/observability/__init__.py +307 -0
- truthound/observability/context.py +531 -0
- truthound/observability/instrumentation.py +611 -0
- truthound/observability/logging.py +887 -0
- truthound/observability/metrics.py +1157 -0
- truthound/observability/tracing/__init__.py +178 -0
- truthound/observability/tracing/baggage.py +310 -0
- truthound/observability/tracing/config.py +426 -0
- truthound/observability/tracing/exporter.py +787 -0
- truthound/observability/tracing/integration.py +1018 -0
- truthound/observability/tracing/otel/__init__.py +146 -0
- truthound/observability/tracing/otel/adapter.py +982 -0
- truthound/observability/tracing/otel/bridge.py +1177 -0
- truthound/observability/tracing/otel/compat.py +681 -0
- truthound/observability/tracing/otel/config.py +691 -0
- truthound/observability/tracing/otel/detection.py +327 -0
- truthound/observability/tracing/otel/protocols.py +426 -0
- truthound/observability/tracing/processor.py +561 -0
- truthound/observability/tracing/propagator.py +757 -0
- truthound/observability/tracing/provider.py +569 -0
- truthound/observability/tracing/resource.py +515 -0
- truthound/observability/tracing/sampler.py +487 -0
- truthound/observability/tracing/span.py +676 -0
- truthound/plugins/__init__.py +198 -0
- truthound/plugins/base.py +599 -0
- truthound/plugins/cli.py +680 -0
- truthound/plugins/dependencies/__init__.py +42 -0
- truthound/plugins/dependencies/graph.py +422 -0
- truthound/plugins/dependencies/resolver.py +417 -0
- truthound/plugins/discovery.py +379 -0
- truthound/plugins/docs/__init__.py +46 -0
- truthound/plugins/docs/extractor.py +444 -0
- truthound/plugins/docs/renderer.py +499 -0
- truthound/plugins/enterprise_manager.py +877 -0
- truthound/plugins/examples/__init__.py +19 -0
- truthound/plugins/examples/custom_validators.py +317 -0
- truthound/plugins/examples/slack_notifier.py +312 -0
- truthound/plugins/examples/xml_reporter.py +254 -0
- truthound/plugins/hooks.py +558 -0
- truthound/plugins/lifecycle/__init__.py +43 -0
- truthound/plugins/lifecycle/hot_reload.py +402 -0
- truthound/plugins/lifecycle/manager.py +371 -0
- truthound/plugins/manager.py +736 -0
- truthound/plugins/registry.py +338 -0
- truthound/plugins/security/__init__.py +93 -0
- truthound/plugins/security/exceptions.py +332 -0
- truthound/plugins/security/policies.py +348 -0
- truthound/plugins/security/protocols.py +643 -0
- truthound/plugins/security/sandbox/__init__.py +45 -0
- truthound/plugins/security/sandbox/context.py +158 -0
- truthound/plugins/security/sandbox/engines/__init__.py +19 -0
- truthound/plugins/security/sandbox/engines/container.py +379 -0
- truthound/plugins/security/sandbox/engines/noop.py +144 -0
- truthound/plugins/security/sandbox/engines/process.py +336 -0
- truthound/plugins/security/sandbox/factory.py +211 -0
- truthound/plugins/security/signing/__init__.py +57 -0
- truthound/plugins/security/signing/service.py +330 -0
- truthound/plugins/security/signing/trust_store.py +368 -0
- truthound/plugins/security/signing/verifier.py +459 -0
- truthound/plugins/versioning/__init__.py +41 -0
- truthound/plugins/versioning/constraints.py +297 -0
- truthound/plugins/versioning/resolver.py +329 -0
- truthound/profiler/__init__.py +1729 -0
- truthound/profiler/_lazy.py +452 -0
- truthound/profiler/ab_testing/__init__.py +80 -0
- truthound/profiler/ab_testing/analysis.py +449 -0
- truthound/profiler/ab_testing/base.py +257 -0
- truthound/profiler/ab_testing/experiment.py +395 -0
- truthound/profiler/ab_testing/tracking.py +368 -0
- truthound/profiler/auto_threshold.py +1170 -0
- truthound/profiler/base.py +579 -0
- truthound/profiler/cache_patterns.py +911 -0
- truthound/profiler/caching.py +1303 -0
- truthound/profiler/column_profiler.py +712 -0
- truthound/profiler/comparison.py +1007 -0
- truthound/profiler/custom_patterns.py +1170 -0
- truthound/profiler/dashboard/__init__.py +50 -0
- truthound/profiler/dashboard/app.py +476 -0
- truthound/profiler/dashboard/components.py +457 -0
- truthound/profiler/dashboard/config.py +72 -0
- truthound/profiler/distributed/__init__.py +83 -0
- truthound/profiler/distributed/base.py +281 -0
- truthound/profiler/distributed/dask_backend.py +498 -0
- truthound/profiler/distributed/local_backend.py +293 -0
- truthound/profiler/distributed/profiler.py +304 -0
- truthound/profiler/distributed/ray_backend.py +374 -0
- truthound/profiler/distributed/spark_backend.py +375 -0
- truthound/profiler/distributed.py +1366 -0
- truthound/profiler/enterprise_sampling.py +1065 -0
- truthound/profiler/errors.py +488 -0
- truthound/profiler/evolution/__init__.py +91 -0
- truthound/profiler/evolution/alerts.py +426 -0
- truthound/profiler/evolution/changes.py +206 -0
- truthound/profiler/evolution/compatibility.py +365 -0
- truthound/profiler/evolution/detector.py +372 -0
- truthound/profiler/evolution/protocols.py +121 -0
- truthound/profiler/generators/__init__.py +48 -0
- truthound/profiler/generators/base.py +384 -0
- truthound/profiler/generators/ml_rules.py +375 -0
- truthound/profiler/generators/pattern_rules.py +384 -0
- truthound/profiler/generators/schema_rules.py +267 -0
- truthound/profiler/generators/stats_rules.py +324 -0
- truthound/profiler/generators/suite_generator.py +857 -0
- truthound/profiler/i18n.py +1542 -0
- truthound/profiler/incremental.py +554 -0
- truthound/profiler/incremental_validation.py +1710 -0
- truthound/profiler/integration/__init__.py +73 -0
- truthound/profiler/integration/adapters.py +345 -0
- truthound/profiler/integration/context.py +371 -0
- truthound/profiler/integration/executor.py +527 -0
- truthound/profiler/integration/naming.py +75 -0
- truthound/profiler/integration/protocols.py +243 -0
- truthound/profiler/memory.py +1185 -0
- truthound/profiler/migration/__init__.py +60 -0
- truthound/profiler/migration/base.py +345 -0
- truthound/profiler/migration/manager.py +444 -0
- truthound/profiler/migration/v1_0_to_v1_1.py +484 -0
- truthound/profiler/ml/__init__.py +73 -0
- truthound/profiler/ml/base.py +244 -0
- truthound/profiler/ml/classifier.py +507 -0
- truthound/profiler/ml/feature_extraction.py +604 -0
- truthound/profiler/ml/pretrained.py +448 -0
- truthound/profiler/ml_inference.py +1276 -0
- truthound/profiler/native_patterns.py +815 -0
- truthound/profiler/observability.py +1184 -0
- truthound/profiler/process_timeout.py +1566 -0
- truthound/profiler/progress.py +568 -0
- truthound/profiler/progress_callbacks.py +1734 -0
- truthound/profiler/quality.py +1345 -0
- truthound/profiler/resilience.py +1180 -0
- truthound/profiler/sampled_matcher.py +794 -0
- truthound/profiler/sampling.py +1288 -0
- truthound/profiler/scheduling/__init__.py +82 -0
- truthound/profiler/scheduling/protocols.py +214 -0
- truthound/profiler/scheduling/scheduler.py +474 -0
- truthound/profiler/scheduling/storage.py +457 -0
- truthound/profiler/scheduling/triggers.py +449 -0
- truthound/profiler/schema.py +603 -0
- truthound/profiler/streaming.py +685 -0
- truthound/profiler/streaming_patterns.py +1354 -0
- truthound/profiler/suite_cli.py +625 -0
- truthound/profiler/suite_config.py +789 -0
- truthound/profiler/suite_export.py +1268 -0
- truthound/profiler/table_profiler.py +547 -0
- truthound/profiler/timeout.py +565 -0
- truthound/profiler/validation.py +1532 -0
- truthound/profiler/visualization/__init__.py +118 -0
- truthound/profiler/visualization/base.py +346 -0
- truthound/profiler/visualization/generator.py +1259 -0
- truthound/profiler/visualization/plotly_renderer.py +811 -0
- truthound/profiler/visualization/renderers.py +669 -0
- truthound/profiler/visualization/sections.py +540 -0
- truthound/profiler/visualization.py +2122 -0
- truthound/profiler/yaml_validation.py +1151 -0
- truthound/py.typed +0 -0
- truthound/ratelimit/__init__.py +248 -0
- truthound/ratelimit/algorithms.py +1108 -0
- truthound/ratelimit/core.py +573 -0
- truthound/ratelimit/integration.py +532 -0
- truthound/ratelimit/limiter.py +663 -0
- truthound/ratelimit/middleware.py +700 -0
- truthound/ratelimit/policy.py +792 -0
- truthound/ratelimit/storage.py +763 -0
- truthound/rbac/__init__.py +340 -0
- truthound/rbac/core.py +976 -0
- truthound/rbac/integration.py +760 -0
- truthound/rbac/manager.py +1052 -0
- truthound/rbac/middleware.py +842 -0
- truthound/rbac/policy.py +954 -0
- truthound/rbac/storage.py +878 -0
- truthound/realtime/__init__.py +141 -0
- truthound/realtime/adapters/__init__.py +43 -0
- truthound/realtime/adapters/base.py +533 -0
- truthound/realtime/adapters/kafka.py +487 -0
- truthound/realtime/adapters/kinesis.py +479 -0
- truthound/realtime/adapters/mock.py +243 -0
- truthound/realtime/base.py +553 -0
- truthound/realtime/factory.py +382 -0
- truthound/realtime/incremental.py +660 -0
- truthound/realtime/processing/__init__.py +67 -0
- truthound/realtime/processing/exactly_once.py +575 -0
- truthound/realtime/processing/state.py +547 -0
- truthound/realtime/processing/windows.py +647 -0
- truthound/realtime/protocols.py +569 -0
- truthound/realtime/streaming.py +605 -0
- truthound/realtime/testing/__init__.py +32 -0
- truthound/realtime/testing/containers.py +615 -0
- truthound/realtime/testing/fixtures.py +484 -0
- truthound/report.py +280 -0
- truthound/reporters/__init__.py +46 -0
- truthound/reporters/_protocols.py +30 -0
- truthound/reporters/base.py +324 -0
- truthound/reporters/ci/__init__.py +66 -0
- truthound/reporters/ci/azure.py +436 -0
- truthound/reporters/ci/base.py +509 -0
- truthound/reporters/ci/bitbucket.py +567 -0
- truthound/reporters/ci/circleci.py +547 -0
- truthound/reporters/ci/detection.py +364 -0
- truthound/reporters/ci/factory.py +182 -0
- truthound/reporters/ci/github.py +388 -0
- truthound/reporters/ci/gitlab.py +471 -0
- truthound/reporters/ci/jenkins.py +525 -0
- truthound/reporters/console_reporter.py +299 -0
- truthound/reporters/factory.py +211 -0
- truthound/reporters/html_reporter.py +524 -0
- truthound/reporters/json_reporter.py +256 -0
- truthound/reporters/markdown_reporter.py +280 -0
- truthound/reporters/sdk/__init__.py +174 -0
- truthound/reporters/sdk/builder.py +558 -0
- truthound/reporters/sdk/mixins.py +1150 -0
- truthound/reporters/sdk/schema.py +1493 -0
- truthound/reporters/sdk/templates.py +666 -0
- truthound/reporters/sdk/testing.py +968 -0
- truthound/scanners.py +170 -0
- truthound/scheduling/__init__.py +122 -0
- truthound/scheduling/cron.py +1136 -0
- truthound/scheduling/presets.py +212 -0
- truthound/schema.py +275 -0
- truthound/secrets/__init__.py +173 -0
- truthound/secrets/base.py +618 -0
- truthound/secrets/cloud.py +682 -0
- truthound/secrets/integration.py +507 -0
- truthound/secrets/manager.py +633 -0
- truthound/secrets/oidc/__init__.py +172 -0
- truthound/secrets/oidc/base.py +902 -0
- truthound/secrets/oidc/credential_provider.py +623 -0
- truthound/secrets/oidc/exchangers.py +1001 -0
- truthound/secrets/oidc/github/__init__.py +110 -0
- truthound/secrets/oidc/github/claims.py +718 -0
- truthound/secrets/oidc/github/enhanced_provider.py +693 -0
- truthound/secrets/oidc/github/trust_policy.py +742 -0
- truthound/secrets/oidc/github/verification.py +723 -0
- truthound/secrets/oidc/github/workflow.py +691 -0
- truthound/secrets/oidc/providers.py +825 -0
- truthound/secrets/providers.py +506 -0
- truthound/secrets/resolver.py +495 -0
- truthound/stores/__init__.py +177 -0
- truthound/stores/backends/__init__.py +18 -0
- truthound/stores/backends/_protocols.py +340 -0
- truthound/stores/backends/azure_blob.py +530 -0
- truthound/stores/backends/concurrent_filesystem.py +915 -0
- truthound/stores/backends/connection_pool.py +1365 -0
- truthound/stores/backends/database.py +743 -0
- truthound/stores/backends/filesystem.py +538 -0
- truthound/stores/backends/gcs.py +399 -0
- truthound/stores/backends/memory.py +354 -0
- truthound/stores/backends/s3.py +434 -0
- truthound/stores/backpressure/__init__.py +84 -0
- truthound/stores/backpressure/base.py +375 -0
- truthound/stores/backpressure/circuit_breaker.py +434 -0
- truthound/stores/backpressure/monitor.py +376 -0
- truthound/stores/backpressure/strategies.py +677 -0
- truthound/stores/base.py +551 -0
- truthound/stores/batching/__init__.py +65 -0
- truthound/stores/batching/base.py +305 -0
- truthound/stores/batching/buffer.py +370 -0
- truthound/stores/batching/store.py +248 -0
- truthound/stores/batching/writer.py +521 -0
- truthound/stores/caching/__init__.py +60 -0
- truthound/stores/caching/backends.py +684 -0
- truthound/stores/caching/base.py +356 -0
- truthound/stores/caching/store.py +305 -0
- truthound/stores/compression/__init__.py +193 -0
- truthound/stores/compression/adaptive.py +694 -0
- truthound/stores/compression/base.py +514 -0
- truthound/stores/compression/pipeline.py +868 -0
- truthound/stores/compression/providers.py +672 -0
- truthound/stores/compression/streaming.py +832 -0
- truthound/stores/concurrency/__init__.py +81 -0
- truthound/stores/concurrency/atomic.py +556 -0
- truthound/stores/concurrency/index.py +775 -0
- truthound/stores/concurrency/locks.py +576 -0
- truthound/stores/concurrency/manager.py +482 -0
- truthound/stores/encryption/__init__.py +297 -0
- truthound/stores/encryption/base.py +952 -0
- truthound/stores/encryption/keys.py +1191 -0
- truthound/stores/encryption/pipeline.py +903 -0
- truthound/stores/encryption/providers.py +953 -0
- truthound/stores/encryption/streaming.py +950 -0
- truthound/stores/expectations.py +227 -0
- truthound/stores/factory.py +246 -0
- truthound/stores/migration/__init__.py +75 -0
- truthound/stores/migration/base.py +480 -0
- truthound/stores/migration/manager.py +347 -0
- truthound/stores/migration/registry.py +382 -0
- truthound/stores/migration/store.py +559 -0
- truthound/stores/observability/__init__.py +106 -0
- truthound/stores/observability/audit.py +718 -0
- truthound/stores/observability/config.py +270 -0
- truthound/stores/observability/factory.py +208 -0
- truthound/stores/observability/metrics.py +636 -0
- truthound/stores/observability/protocols.py +410 -0
- truthound/stores/observability/store.py +570 -0
- truthound/stores/observability/tracing.py +784 -0
- truthound/stores/replication/__init__.py +76 -0
- truthound/stores/replication/base.py +260 -0
- truthound/stores/replication/monitor.py +269 -0
- truthound/stores/replication/store.py +439 -0
- truthound/stores/replication/syncer.py +391 -0
- truthound/stores/results.py +359 -0
- truthound/stores/retention/__init__.py +77 -0
- truthound/stores/retention/base.py +378 -0
- truthound/stores/retention/policies.py +621 -0
- truthound/stores/retention/scheduler.py +279 -0
- truthound/stores/retention/store.py +526 -0
- truthound/stores/streaming/__init__.py +138 -0
- truthound/stores/streaming/base.py +801 -0
- truthound/stores/streaming/database.py +984 -0
- truthound/stores/streaming/filesystem.py +719 -0
- truthound/stores/streaming/reader.py +629 -0
- truthound/stores/streaming/s3.py +843 -0
- truthound/stores/streaming/writer.py +790 -0
- truthound/stores/tiering/__init__.py +108 -0
- truthound/stores/tiering/base.py +462 -0
- truthound/stores/tiering/manager.py +249 -0
- truthound/stores/tiering/policies.py +692 -0
- truthound/stores/tiering/store.py +526 -0
- truthound/stores/versioning/__init__.py +56 -0
- truthound/stores/versioning/base.py +376 -0
- truthound/stores/versioning/store.py +660 -0
- truthound/stores/versioning/strategies.py +353 -0
- truthound/types.py +56 -0
- truthound/validators/__init__.py +774 -0
- truthound/validators/aggregate/__init__.py +27 -0
- truthound/validators/aggregate/central.py +116 -0
- truthound/validators/aggregate/extremes.py +116 -0
- truthound/validators/aggregate/spread.py +118 -0
- truthound/validators/aggregate/sum.py +64 -0
- truthound/validators/aggregate/type.py +78 -0
- truthound/validators/anomaly/__init__.py +93 -0
- truthound/validators/anomaly/base.py +431 -0
- truthound/validators/anomaly/ml_based.py +1190 -0
- truthound/validators/anomaly/multivariate.py +647 -0
- truthound/validators/anomaly/statistical.py +599 -0
- truthound/validators/base.py +1089 -0
- truthound/validators/business_rule/__init__.py +46 -0
- truthound/validators/business_rule/base.py +147 -0
- truthound/validators/business_rule/checksum.py +509 -0
- truthound/validators/business_rule/financial.py +526 -0
- truthound/validators/cache.py +733 -0
- truthound/validators/completeness/__init__.py +39 -0
- truthound/validators/completeness/conditional.py +73 -0
- truthound/validators/completeness/default.py +98 -0
- truthound/validators/completeness/empty.py +103 -0
- truthound/validators/completeness/nan.py +337 -0
- truthound/validators/completeness/null.py +152 -0
- truthound/validators/cross_table/__init__.py +17 -0
- truthound/validators/cross_table/aggregate.py +333 -0
- truthound/validators/cross_table/row_count.py +122 -0
- truthound/validators/datetime/__init__.py +29 -0
- truthound/validators/datetime/format.py +78 -0
- truthound/validators/datetime/freshness.py +269 -0
- truthound/validators/datetime/order.py +73 -0
- truthound/validators/datetime/parseable.py +185 -0
- truthound/validators/datetime/range.py +202 -0
- truthound/validators/datetime/timezone.py +69 -0
- truthound/validators/distribution/__init__.py +49 -0
- truthound/validators/distribution/distribution.py +128 -0
- truthound/validators/distribution/monotonic.py +119 -0
- truthound/validators/distribution/outlier.py +178 -0
- truthound/validators/distribution/quantile.py +80 -0
- truthound/validators/distribution/range.py +254 -0
- truthound/validators/distribution/set.py +125 -0
- truthound/validators/distribution/statistical.py +459 -0
- truthound/validators/drift/__init__.py +79 -0
- truthound/validators/drift/base.py +427 -0
- truthound/validators/drift/multi_feature.py +401 -0
- truthound/validators/drift/numeric.py +395 -0
- truthound/validators/drift/psi.py +446 -0
- truthound/validators/drift/statistical.py +510 -0
- truthound/validators/enterprise.py +1658 -0
- truthound/validators/geospatial/__init__.py +80 -0
- truthound/validators/geospatial/base.py +97 -0
- truthound/validators/geospatial/boundary.py +238 -0
- truthound/validators/geospatial/coordinate.py +351 -0
- truthound/validators/geospatial/distance.py +399 -0
- truthound/validators/geospatial/polygon.py +665 -0
- truthound/validators/i18n/__init__.py +308 -0
- truthound/validators/i18n/bidi.py +571 -0
- truthound/validators/i18n/catalogs.py +570 -0
- truthound/validators/i18n/dialects.py +763 -0
- truthound/validators/i18n/extended_catalogs.py +549 -0
- truthound/validators/i18n/formatting.py +1434 -0
- truthound/validators/i18n/loader.py +1020 -0
- truthound/validators/i18n/messages.py +521 -0
- truthound/validators/i18n/plural.py +683 -0
- truthound/validators/i18n/protocols.py +855 -0
- truthound/validators/i18n/tms.py +1162 -0
- truthound/validators/localization/__init__.py +53 -0
- truthound/validators/localization/base.py +122 -0
- truthound/validators/localization/chinese.py +362 -0
- truthound/validators/localization/japanese.py +275 -0
- truthound/validators/localization/korean.py +524 -0
- truthound/validators/memory/__init__.py +94 -0
- truthound/validators/memory/approximate_knn.py +506 -0
- truthound/validators/memory/base.py +547 -0
- truthound/validators/memory/sgd_online.py +719 -0
- truthound/validators/memory/streaming_ecdf.py +753 -0
- truthound/validators/ml_feature/__init__.py +54 -0
- truthound/validators/ml_feature/base.py +249 -0
- truthound/validators/ml_feature/correlation.py +299 -0
- truthound/validators/ml_feature/leakage.py +344 -0
- truthound/validators/ml_feature/null_impact.py +270 -0
- truthound/validators/ml_feature/scale.py +264 -0
- truthound/validators/multi_column/__init__.py +89 -0
- truthound/validators/multi_column/arithmetic.py +284 -0
- truthound/validators/multi_column/base.py +231 -0
- truthound/validators/multi_column/comparison.py +273 -0
- truthound/validators/multi_column/consistency.py +312 -0
- truthound/validators/multi_column/statistical.py +299 -0
- truthound/validators/optimization/__init__.py +164 -0
- truthound/validators/optimization/aggregation.py +563 -0
- truthound/validators/optimization/covariance.py +556 -0
- truthound/validators/optimization/geo.py +626 -0
- truthound/validators/optimization/graph.py +587 -0
- truthound/validators/optimization/orchestrator.py +970 -0
- truthound/validators/optimization/profiling.py +1312 -0
- truthound/validators/privacy/__init__.py +223 -0
- truthound/validators/privacy/base.py +635 -0
- truthound/validators/privacy/ccpa.py +670 -0
- truthound/validators/privacy/gdpr.py +728 -0
- truthound/validators/privacy/global_patterns.py +604 -0
- truthound/validators/privacy/plugins.py +867 -0
- truthound/validators/profiling/__init__.py +52 -0
- truthound/validators/profiling/base.py +175 -0
- truthound/validators/profiling/cardinality.py +312 -0
- truthound/validators/profiling/entropy.py +391 -0
- truthound/validators/profiling/frequency.py +455 -0
- truthound/validators/pushdown_support.py +660 -0
- truthound/validators/query/__init__.py +91 -0
- truthound/validators/query/aggregate.py +346 -0
- truthound/validators/query/base.py +246 -0
- truthound/validators/query/column.py +249 -0
- truthound/validators/query/expression.py +274 -0
- truthound/validators/query/result.py +323 -0
- truthound/validators/query/row_count.py +264 -0
- truthound/validators/referential/__init__.py +80 -0
- truthound/validators/referential/base.py +395 -0
- truthound/validators/referential/cascade.py +391 -0
- truthound/validators/referential/circular.py +563 -0
- truthound/validators/referential/foreign_key.py +624 -0
- truthound/validators/referential/orphan.py +485 -0
- truthound/validators/registry.py +112 -0
- truthound/validators/schema/__init__.py +41 -0
- truthound/validators/schema/column_count.py +142 -0
- truthound/validators/schema/column_exists.py +80 -0
- truthound/validators/schema/column_order.py +82 -0
- truthound/validators/schema/column_pair.py +85 -0
- truthound/validators/schema/column_pair_set.py +195 -0
- truthound/validators/schema/column_type.py +94 -0
- truthound/validators/schema/multi_column.py +53 -0
- truthound/validators/schema/multi_column_aggregate.py +175 -0
- truthound/validators/schema/referential.py +274 -0
- truthound/validators/schema/table_schema.py +91 -0
- truthound/validators/schema_validator.py +219 -0
- truthound/validators/sdk/__init__.py +250 -0
- truthound/validators/sdk/builder.py +680 -0
- truthound/validators/sdk/decorators.py +474 -0
- truthound/validators/sdk/enterprise/__init__.py +211 -0
- truthound/validators/sdk/enterprise/docs.py +725 -0
- truthound/validators/sdk/enterprise/fuzzing.py +659 -0
- truthound/validators/sdk/enterprise/licensing.py +709 -0
- truthound/validators/sdk/enterprise/manager.py +543 -0
- truthound/validators/sdk/enterprise/resources.py +628 -0
- truthound/validators/sdk/enterprise/sandbox.py +766 -0
- truthound/validators/sdk/enterprise/signing.py +603 -0
- truthound/validators/sdk/enterprise/templates.py +865 -0
- truthound/validators/sdk/enterprise/versioning.py +659 -0
- truthound/validators/sdk/templates.py +757 -0
- truthound/validators/sdk/testing.py +807 -0
- truthound/validators/security/__init__.py +181 -0
- truthound/validators/security/redos/__init__.py +182 -0
- truthound/validators/security/redos/core.py +861 -0
- truthound/validators/security/redos/cpu_monitor.py +593 -0
- truthound/validators/security/redos/cve_database.py +791 -0
- truthound/validators/security/redos/ml/__init__.py +155 -0
- truthound/validators/security/redos/ml/base.py +785 -0
- truthound/validators/security/redos/ml/datasets.py +618 -0
- truthound/validators/security/redos/ml/features.py +359 -0
- truthound/validators/security/redos/ml/models.py +1000 -0
- truthound/validators/security/redos/ml/predictor.py +507 -0
- truthound/validators/security/redos/ml/storage.py +632 -0
- truthound/validators/security/redos/ml/training.py +571 -0
- truthound/validators/security/redos/ml_analyzer.py +937 -0
- truthound/validators/security/redos/optimizer.py +674 -0
- truthound/validators/security/redos/profiler.py +682 -0
- truthound/validators/security/redos/re2_engine.py +709 -0
- truthound/validators/security/redos.py +886 -0
- truthound/validators/security/sql_security.py +1247 -0
- truthound/validators/streaming/__init__.py +126 -0
- truthound/validators/streaming/base.py +292 -0
- truthound/validators/streaming/completeness.py +210 -0
- truthound/validators/streaming/mixin.py +575 -0
- truthound/validators/streaming/range.py +308 -0
- truthound/validators/streaming/sources.py +846 -0
- truthound/validators/string/__init__.py +57 -0
- truthound/validators/string/casing.py +158 -0
- truthound/validators/string/charset.py +96 -0
- truthound/validators/string/format.py +501 -0
- truthound/validators/string/json.py +77 -0
- truthound/validators/string/json_schema.py +184 -0
- truthound/validators/string/length.py +104 -0
- truthound/validators/string/like_pattern.py +237 -0
- truthound/validators/string/regex.py +202 -0
- truthound/validators/string/regex_extended.py +435 -0
- truthound/validators/table/__init__.py +88 -0
- truthound/validators/table/base.py +78 -0
- truthound/validators/table/column_count.py +198 -0
- truthound/validators/table/freshness.py +362 -0
- truthound/validators/table/row_count.py +251 -0
- truthound/validators/table/schema.py +333 -0
- truthound/validators/table/size.py +285 -0
- truthound/validators/timeout/__init__.py +102 -0
- truthound/validators/timeout/advanced/__init__.py +247 -0
- truthound/validators/timeout/advanced/circuit_breaker.py +675 -0
- truthound/validators/timeout/advanced/prediction.py +773 -0
- truthound/validators/timeout/advanced/priority.py +618 -0
- truthound/validators/timeout/advanced/redis_backend.py +770 -0
- truthound/validators/timeout/advanced/retry.py +721 -0
- truthound/validators/timeout/advanced/sampling.py +788 -0
- truthound/validators/timeout/advanced/sla.py +661 -0
- truthound/validators/timeout/advanced/telemetry.py +804 -0
- truthound/validators/timeout/cascade.py +477 -0
- truthound/validators/timeout/deadline.py +657 -0
- truthound/validators/timeout/degradation.py +525 -0
- truthound/validators/timeout/distributed.py +597 -0
- truthound/validators/timeseries/__init__.py +89 -0
- truthound/validators/timeseries/base.py +326 -0
- truthound/validators/timeseries/completeness.py +617 -0
- truthound/validators/timeseries/gap.py +485 -0
- truthound/validators/timeseries/monotonic.py +310 -0
- truthound/validators/timeseries/seasonality.py +422 -0
- truthound/validators/timeseries/trend.py +510 -0
- truthound/validators/uniqueness/__init__.py +59 -0
- truthound/validators/uniqueness/approximate.py +475 -0
- truthound/validators/uniqueness/distinct_values.py +253 -0
- truthound/validators/uniqueness/duplicate.py +118 -0
- truthound/validators/uniqueness/primary_key.py +140 -0
- truthound/validators/uniqueness/unique.py +191 -0
- truthound/validators/uniqueness/within_record.py +599 -0
- truthound/validators/utils.py +756 -0
- truthound-1.0.8.dist-info/METADATA +474 -0
- truthound-1.0.8.dist-info/RECORD +877 -0
- truthound-1.0.8.dist-info/WHEEL +4 -0
- truthound-1.0.8.dist-info/entry_points.txt +2 -0
- truthound-1.0.8.dist-info/licenses/LICENSE +190 -0
|
@@ -0,0 +1,937 @@
|
|
|
1
|
+
"""ML-based ReDoS Pattern Analysis.
|
|
2
|
+
|
|
3
|
+
This module provides machine learning-based analysis for predicting
|
|
4
|
+
ReDoS vulnerability risk in regex patterns. It uses feature extraction
|
|
5
|
+
and trained models to assess pattern safety.
|
|
6
|
+
|
|
7
|
+
Architecture:
|
|
8
|
+
┌─────────────────────────────────────────────────────────────────┐
|
|
9
|
+
│ ML Pattern Analyzer │
|
|
10
|
+
└─────────────────────────────────────────────────────────────────┘
|
|
11
|
+
│
|
|
12
|
+
┌───────────────┬───────────────┼───────────────┬─────────────────┐
|
|
13
|
+
│ │ │ │ │
|
|
14
|
+
▼ ▼ ▼ ▼ ▼
|
|
15
|
+
┌─────────┐ ┌─────────┐ ┌──────────┐ ┌──────────┐ ┌─────────┐
|
|
16
|
+
│ Feature │ │ Model │ │Prediction│ │ Training │ │ Model │
|
|
17
|
+
│Extractor│ │ Manager │ │ Pipeline │ │ Pipeline │ │ Storage │
|
|
18
|
+
└─────────┘ └─────────┘ └──────────┘ └──────────┘ └─────────┘
|
|
19
|
+
|
|
20
|
+
Features extracted:
|
|
21
|
+
- Structural features (length, depth, groups, etc.)
|
|
22
|
+
- Quantifier features (count, types, positions)
|
|
23
|
+
- Alternation features (count, complexity)
|
|
24
|
+
- Character class features (ranges, negation)
|
|
25
|
+
- Backtracking potential features
|
|
26
|
+
|
|
27
|
+
Usage:
|
|
28
|
+
from truthound.validators.security.redos.ml_analyzer import (
|
|
29
|
+
MLPatternAnalyzer,
|
|
30
|
+
predict_redos_risk,
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
# Quick prediction
|
|
34
|
+
result = predict_redos_risk(r"(a+)+b")
|
|
35
|
+
print(result.risk_probability) # 0.95
|
|
36
|
+
print(result.risk_level) # ReDoSRisk.CRITICAL
|
|
37
|
+
|
|
38
|
+
# Full analyzer with custom model
|
|
39
|
+
analyzer = MLPatternAnalyzer()
|
|
40
|
+
analyzer.train(training_patterns, labels)
|
|
41
|
+
result = analyzer.predict(pattern)
|
|
42
|
+
"""
|
|
43
|
+
|
|
44
|
+
from __future__ import annotations
|
|
45
|
+
|
|
46
|
+
import json
|
|
47
|
+
import math
|
|
48
|
+
import re
|
|
49
|
+
from abc import ABC, abstractmethod
|
|
50
|
+
from dataclasses import dataclass, field
|
|
51
|
+
from pathlib import Path
|
|
52
|
+
from typing import Any, Protocol, Sequence
|
|
53
|
+
|
|
54
|
+
from truthound.validators.security.redos.core import ReDoSRisk
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
@dataclass
|
|
58
|
+
class PatternFeatures:
|
|
59
|
+
"""Extracted features from a regex pattern.
|
|
60
|
+
|
|
61
|
+
These features are used for ML-based risk prediction.
|
|
62
|
+
"""
|
|
63
|
+
|
|
64
|
+
# Structural features
|
|
65
|
+
length: int = 0
|
|
66
|
+
group_count: int = 0
|
|
67
|
+
capture_group_count: int = 0
|
|
68
|
+
non_capture_group_count: int = 0
|
|
69
|
+
max_nesting_depth: int = 0
|
|
70
|
+
alternation_count: int = 0
|
|
71
|
+
|
|
72
|
+
# Quantifier features
|
|
73
|
+
plus_count: int = 0
|
|
74
|
+
star_count: int = 0
|
|
75
|
+
question_count: int = 0
|
|
76
|
+
bounded_quantifier_count: int = 0
|
|
77
|
+
unbounded_quantifier_count: int = 0
|
|
78
|
+
lazy_quantifier_count: int = 0
|
|
79
|
+
possessive_quantifier_count: int = 0
|
|
80
|
+
quantifier_density: float = 0.0
|
|
81
|
+
|
|
82
|
+
# Dangerous pattern indicators
|
|
83
|
+
nested_quantifier_count: int = 0
|
|
84
|
+
adjacent_quantifier_count: int = 0
|
|
85
|
+
quantified_alternation_count: int = 0
|
|
86
|
+
quantified_backreference_count: int = 0
|
|
87
|
+
|
|
88
|
+
# Character class features
|
|
89
|
+
char_class_count: int = 0
|
|
90
|
+
negated_char_class_count: int = 0
|
|
91
|
+
dot_count: int = 0
|
|
92
|
+
word_boundary_count: int = 0
|
|
93
|
+
|
|
94
|
+
# Lookaround features
|
|
95
|
+
lookahead_count: int = 0
|
|
96
|
+
lookbehind_count: int = 0
|
|
97
|
+
negative_lookaround_count: int = 0
|
|
98
|
+
|
|
99
|
+
# Backreference features
|
|
100
|
+
backreference_count: int = 0
|
|
101
|
+
max_backreference_index: int = 0
|
|
102
|
+
|
|
103
|
+
# Anchor features
|
|
104
|
+
start_anchor: bool = False
|
|
105
|
+
end_anchor: bool = False
|
|
106
|
+
anchored: bool = False
|
|
107
|
+
|
|
108
|
+
# Complexity metrics
|
|
109
|
+
backtracking_potential: float = 0.0
|
|
110
|
+
estimated_states: int = 0
|
|
111
|
+
|
|
112
|
+
def to_vector(self) -> list[float]:
|
|
113
|
+
"""Convert features to a numeric vector for ML models."""
|
|
114
|
+
return [
|
|
115
|
+
float(self.length),
|
|
116
|
+
float(self.group_count),
|
|
117
|
+
float(self.capture_group_count),
|
|
118
|
+
float(self.non_capture_group_count),
|
|
119
|
+
float(self.max_nesting_depth),
|
|
120
|
+
float(self.alternation_count),
|
|
121
|
+
float(self.plus_count),
|
|
122
|
+
float(self.star_count),
|
|
123
|
+
float(self.question_count),
|
|
124
|
+
float(self.bounded_quantifier_count),
|
|
125
|
+
float(self.unbounded_quantifier_count),
|
|
126
|
+
float(self.lazy_quantifier_count),
|
|
127
|
+
float(self.possessive_quantifier_count),
|
|
128
|
+
float(self.quantifier_density),
|
|
129
|
+
float(self.nested_quantifier_count),
|
|
130
|
+
float(self.adjacent_quantifier_count),
|
|
131
|
+
float(self.quantified_alternation_count),
|
|
132
|
+
float(self.quantified_backreference_count),
|
|
133
|
+
float(self.char_class_count),
|
|
134
|
+
float(self.negated_char_class_count),
|
|
135
|
+
float(self.dot_count),
|
|
136
|
+
float(self.word_boundary_count),
|
|
137
|
+
float(self.lookahead_count),
|
|
138
|
+
float(self.lookbehind_count),
|
|
139
|
+
float(self.negative_lookaround_count),
|
|
140
|
+
float(self.backreference_count),
|
|
141
|
+
float(self.max_backreference_index),
|
|
142
|
+
float(self.start_anchor),
|
|
143
|
+
float(self.end_anchor),
|
|
144
|
+
float(self.anchored),
|
|
145
|
+
float(self.backtracking_potential),
|
|
146
|
+
float(self.estimated_states),
|
|
147
|
+
]
|
|
148
|
+
|
|
149
|
+
@classmethod
|
|
150
|
+
def feature_names(cls) -> list[str]:
|
|
151
|
+
"""Get names of all features in vector order."""
|
|
152
|
+
return [
|
|
153
|
+
"length",
|
|
154
|
+
"group_count",
|
|
155
|
+
"capture_group_count",
|
|
156
|
+
"non_capture_group_count",
|
|
157
|
+
"max_nesting_depth",
|
|
158
|
+
"alternation_count",
|
|
159
|
+
"plus_count",
|
|
160
|
+
"star_count",
|
|
161
|
+
"question_count",
|
|
162
|
+
"bounded_quantifier_count",
|
|
163
|
+
"unbounded_quantifier_count",
|
|
164
|
+
"lazy_quantifier_count",
|
|
165
|
+
"possessive_quantifier_count",
|
|
166
|
+
"quantifier_density",
|
|
167
|
+
"nested_quantifier_count",
|
|
168
|
+
"adjacent_quantifier_count",
|
|
169
|
+
"quantified_alternation_count",
|
|
170
|
+
"quantified_backreference_count",
|
|
171
|
+
"char_class_count",
|
|
172
|
+
"negated_char_class_count",
|
|
173
|
+
"dot_count",
|
|
174
|
+
"word_boundary_count",
|
|
175
|
+
"lookahead_count",
|
|
176
|
+
"lookbehind_count",
|
|
177
|
+
"negative_lookaround_count",
|
|
178
|
+
"backreference_count",
|
|
179
|
+
"max_backreference_index",
|
|
180
|
+
"start_anchor",
|
|
181
|
+
"end_anchor",
|
|
182
|
+
"anchored",
|
|
183
|
+
"backtracking_potential",
|
|
184
|
+
"estimated_states",
|
|
185
|
+
]
|
|
186
|
+
|
|
187
|
+
def to_dict(self) -> dict[str, Any]:
|
|
188
|
+
"""Convert to dictionary."""
|
|
189
|
+
return {name: value for name, value in zip(self.feature_names(), self.to_vector())}
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
class FeatureExtractor:
|
|
193
|
+
"""Extracts ML-relevant features from regex patterns.
|
|
194
|
+
|
|
195
|
+
This extractor analyzes regex patterns and produces a feature vector
|
|
196
|
+
suitable for machine learning models.
|
|
197
|
+
|
|
198
|
+
Example:
|
|
199
|
+
extractor = FeatureExtractor()
|
|
200
|
+
features = extractor.extract(r"(a+)+b")
|
|
201
|
+
print(features.nested_quantifier_count) # 1
|
|
202
|
+
print(features.backtracking_potential) # high value
|
|
203
|
+
"""
|
|
204
|
+
|
|
205
|
+
# Compiled patterns for feature extraction
|
|
206
|
+
_PLUS_PATTERN = re.compile(r"(?<!\\)\+")
|
|
207
|
+
_STAR_PATTERN = re.compile(r"(?<!\\)\*")
|
|
208
|
+
_QUESTION_PATTERN = re.compile(r"(?<!\\)\?(?![=!<:])")
|
|
209
|
+
_BOUNDED_QUANT_PATTERN = re.compile(r"\{(\d+)(?:,(\d*))?\}")
|
|
210
|
+
_LAZY_QUANT_PATTERN = re.compile(r"[+*?]\?|\{[^}]+\}\?")
|
|
211
|
+
_CHAR_CLASS_PATTERN = re.compile(r"\[[^\]]+\]")
|
|
212
|
+
_NEGATED_CLASS_PATTERN = re.compile(r"\[\^[^\]]+\]")
|
|
213
|
+
_LOOKAHEAD_PATTERN = re.compile(r"\(\?[=!]")
|
|
214
|
+
_LOOKBEHIND_PATTERN = re.compile(r"\(\?<[=!]")
|
|
215
|
+
_BACKREFERENCE_PATTERN = re.compile(r"\\([1-9]\d*)")
|
|
216
|
+
_NESTED_QUANT_PATTERN = re.compile(r"\([^)]*[+*][^)]*\)[+*]")
|
|
217
|
+
_ADJACENT_QUANT_PATTERN = re.compile(r"[+*][+*]")
|
|
218
|
+
_QUANTIFIED_ALT_PATTERN = re.compile(r"\([^)]*\|[^)]*\)[+*?]")
|
|
219
|
+
_QUANTIFIED_BACKREF_PATTERN = re.compile(r"\\[1-9][+*]|\{[^}]+\}")
|
|
220
|
+
_DOT_PATTERN = re.compile(r"(?<!\\)\.")
|
|
221
|
+
_WORD_BOUNDARY_PATTERN = re.compile(r"\\b")
|
|
222
|
+
_NON_CAPTURE_GROUP_PATTERN = re.compile(r"\(\?(?:[imsxLu]|:)")
|
|
223
|
+
_CAPTURE_GROUP_PATTERN = re.compile(r"\((?!\?)")
|
|
224
|
+
|
|
225
|
+
def extract(self, pattern: str) -> PatternFeatures:
|
|
226
|
+
"""Extract features from a regex pattern.
|
|
227
|
+
|
|
228
|
+
Args:
|
|
229
|
+
pattern: Regex pattern to analyze
|
|
230
|
+
|
|
231
|
+
Returns:
|
|
232
|
+
PatternFeatures with all extracted features
|
|
233
|
+
"""
|
|
234
|
+
features = PatternFeatures()
|
|
235
|
+
|
|
236
|
+
if not pattern:
|
|
237
|
+
return features
|
|
238
|
+
|
|
239
|
+
# Structural features
|
|
240
|
+
features.length = len(pattern)
|
|
241
|
+
features.max_nesting_depth = self._calculate_nesting_depth(pattern)
|
|
242
|
+
features.alternation_count = pattern.count("|")
|
|
243
|
+
|
|
244
|
+
# Group counts
|
|
245
|
+
features.capture_group_count = len(self._CAPTURE_GROUP_PATTERN.findall(pattern))
|
|
246
|
+
features.non_capture_group_count = len(self._NON_CAPTURE_GROUP_PATTERN.findall(pattern))
|
|
247
|
+
features.group_count = features.capture_group_count + features.non_capture_group_count
|
|
248
|
+
|
|
249
|
+
# Quantifier features
|
|
250
|
+
features.plus_count = len(self._PLUS_PATTERN.findall(pattern))
|
|
251
|
+
features.star_count = len(self._STAR_PATTERN.findall(pattern))
|
|
252
|
+
features.question_count = len(self._QUESTION_PATTERN.findall(pattern))
|
|
253
|
+
features.lazy_quantifier_count = len(self._LAZY_QUANT_PATTERN.findall(pattern))
|
|
254
|
+
|
|
255
|
+
bounded_matches = self._BOUNDED_QUANT_PATTERN.findall(pattern)
|
|
256
|
+
features.bounded_quantifier_count = len(bounded_matches)
|
|
257
|
+
|
|
258
|
+
# Unbounded quantifiers
|
|
259
|
+
unbounded_count = 0
|
|
260
|
+
for min_val, max_val in bounded_matches:
|
|
261
|
+
if max_val == "": # {n,} form
|
|
262
|
+
unbounded_count += 1
|
|
263
|
+
features.unbounded_quantifier_count = (
|
|
264
|
+
features.plus_count + features.star_count + unbounded_count
|
|
265
|
+
)
|
|
266
|
+
|
|
267
|
+
# Quantifier density
|
|
268
|
+
total_quantifiers = (
|
|
269
|
+
features.plus_count
|
|
270
|
+
+ features.star_count
|
|
271
|
+
+ features.question_count
|
|
272
|
+
+ features.bounded_quantifier_count
|
|
273
|
+
)
|
|
274
|
+
features.quantifier_density = total_quantifiers / max(features.length, 1)
|
|
275
|
+
|
|
276
|
+
# Dangerous patterns
|
|
277
|
+
features.nested_quantifier_count = len(self._NESTED_QUANT_PATTERN.findall(pattern))
|
|
278
|
+
features.adjacent_quantifier_count = len(self._ADJACENT_QUANT_PATTERN.findall(pattern))
|
|
279
|
+
features.quantified_alternation_count = len(self._QUANTIFIED_ALT_PATTERN.findall(pattern))
|
|
280
|
+
features.quantified_backreference_count = len(
|
|
281
|
+
self._QUANTIFIED_BACKREF_PATTERN.findall(pattern)
|
|
282
|
+
)
|
|
283
|
+
|
|
284
|
+
# Character class features
|
|
285
|
+
features.char_class_count = len(self._CHAR_CLASS_PATTERN.findall(pattern))
|
|
286
|
+
features.negated_char_class_count = len(self._NEGATED_CLASS_PATTERN.findall(pattern))
|
|
287
|
+
features.dot_count = len(self._DOT_PATTERN.findall(pattern))
|
|
288
|
+
features.word_boundary_count = len(self._WORD_BOUNDARY_PATTERN.findall(pattern))
|
|
289
|
+
|
|
290
|
+
# Lookaround features
|
|
291
|
+
lookahead_matches = self._LOOKAHEAD_PATTERN.findall(pattern)
|
|
292
|
+
lookbehind_matches = self._LOOKBEHIND_PATTERN.findall(pattern)
|
|
293
|
+
features.lookahead_count = len(lookahead_matches)
|
|
294
|
+
features.lookbehind_count = len(lookbehind_matches)
|
|
295
|
+
features.negative_lookaround_count = (
|
|
296
|
+
pattern.count("(?!") + pattern.count("(?<!")
|
|
297
|
+
)
|
|
298
|
+
|
|
299
|
+
# Backreference features
|
|
300
|
+
backref_matches = self._BACKREFERENCE_PATTERN.findall(pattern)
|
|
301
|
+
features.backreference_count = len(backref_matches)
|
|
302
|
+
if backref_matches:
|
|
303
|
+
features.max_backreference_index = max(int(m) for m in backref_matches)
|
|
304
|
+
|
|
305
|
+
# Anchor features
|
|
306
|
+
features.start_anchor = pattern.startswith("^") or "\\A" in pattern
|
|
307
|
+
features.end_anchor = pattern.endswith("$") or "\\Z" in pattern or "\\z" in pattern
|
|
308
|
+
features.anchored = features.start_anchor and features.end_anchor
|
|
309
|
+
|
|
310
|
+
# Complexity metrics
|
|
311
|
+
features.backtracking_potential = self._calculate_backtracking_potential(features)
|
|
312
|
+
features.estimated_states = self._estimate_nfa_states(features)
|
|
313
|
+
|
|
314
|
+
return features
|
|
315
|
+
|
|
316
|
+
def _calculate_nesting_depth(self, pattern: str) -> int:
|
|
317
|
+
"""Calculate maximum nesting depth of groups."""
|
|
318
|
+
depth = 0
|
|
319
|
+
max_depth = 0
|
|
320
|
+
for char in pattern:
|
|
321
|
+
if char == "(":
|
|
322
|
+
depth += 1
|
|
323
|
+
max_depth = max(max_depth, depth)
|
|
324
|
+
elif char == ")":
|
|
325
|
+
depth = max(0, depth - 1)
|
|
326
|
+
return max_depth
|
|
327
|
+
|
|
328
|
+
def _calculate_backtracking_potential(self, features: PatternFeatures) -> float:
|
|
329
|
+
"""Estimate backtracking potential based on features.
|
|
330
|
+
|
|
331
|
+
Higher values indicate higher risk of catastrophic backtracking.
|
|
332
|
+
"""
|
|
333
|
+
potential = 0.0
|
|
334
|
+
|
|
335
|
+
# Nested quantifiers are the biggest risk
|
|
336
|
+
potential += features.nested_quantifier_count * 50.0
|
|
337
|
+
|
|
338
|
+
# Quantified alternation is also risky
|
|
339
|
+
potential += features.quantified_alternation_count * 30.0
|
|
340
|
+
|
|
341
|
+
# Adjacent quantifiers
|
|
342
|
+
potential += features.adjacent_quantifier_count * 20.0
|
|
343
|
+
|
|
344
|
+
# Unbounded quantifiers increase potential
|
|
345
|
+
potential += features.unbounded_quantifier_count * 5.0
|
|
346
|
+
|
|
347
|
+
# Deep nesting increases potential
|
|
348
|
+
potential += features.max_nesting_depth * 3.0
|
|
349
|
+
|
|
350
|
+
# Backreferences with quantifiers
|
|
351
|
+
potential += features.quantified_backreference_count * 40.0
|
|
352
|
+
|
|
353
|
+
# Lack of anchoring increases potential
|
|
354
|
+
if not features.anchored:
|
|
355
|
+
potential *= 1.2
|
|
356
|
+
|
|
357
|
+
return min(potential, 100.0)
|
|
358
|
+
|
|
359
|
+
def _estimate_nfa_states(self, features: PatternFeatures) -> int:
|
|
360
|
+
"""Estimate number of NFA states.
|
|
361
|
+
|
|
362
|
+
This is a rough approximation based on pattern features.
|
|
363
|
+
"""
|
|
364
|
+
# Base states from length
|
|
365
|
+
states = features.length
|
|
366
|
+
|
|
367
|
+
# Groups add states
|
|
368
|
+
states += features.group_count * 2
|
|
369
|
+
|
|
370
|
+
# Quantifiers add states
|
|
371
|
+
states += features.plus_count * 2
|
|
372
|
+
states += features.star_count * 2
|
|
373
|
+
states += features.question_count
|
|
374
|
+
|
|
375
|
+
# Bounded quantifiers can add many states
|
|
376
|
+
states += features.bounded_quantifier_count * 5
|
|
377
|
+
|
|
378
|
+
# Alternations add branch states
|
|
379
|
+
states += features.alternation_count * 2
|
|
380
|
+
|
|
381
|
+
return states
|
|
382
|
+
|
|
383
|
+
|
|
384
|
+
@dataclass
|
|
385
|
+
class MLPredictionResult:
|
|
386
|
+
"""Result of ML-based ReDoS risk prediction.
|
|
387
|
+
|
|
388
|
+
Attributes:
|
|
389
|
+
pattern: The analyzed pattern
|
|
390
|
+
features: Extracted features
|
|
391
|
+
risk_probability: Probability of ReDoS vulnerability (0-1)
|
|
392
|
+
risk_level: Categorical risk level
|
|
393
|
+
confidence: Model confidence in the prediction
|
|
394
|
+
contributing_factors: Features that most influenced the prediction
|
|
395
|
+
model_version: Version of the model used
|
|
396
|
+
"""
|
|
397
|
+
|
|
398
|
+
pattern: str
|
|
399
|
+
features: PatternFeatures
|
|
400
|
+
risk_probability: float
|
|
401
|
+
risk_level: ReDoSRisk
|
|
402
|
+
confidence: float
|
|
403
|
+
contributing_factors: list[tuple[str, float]] = field(default_factory=list)
|
|
404
|
+
model_version: str = "1.0.0"
|
|
405
|
+
|
|
406
|
+
def to_dict(self) -> dict[str, Any]:
|
|
407
|
+
"""Convert to dictionary."""
|
|
408
|
+
return {
|
|
409
|
+
"pattern": self.pattern,
|
|
410
|
+
"features": self.features.to_dict(),
|
|
411
|
+
"risk_probability": round(self.risk_probability, 4),
|
|
412
|
+
"risk_level": self.risk_level.name,
|
|
413
|
+
"confidence": round(self.confidence, 4),
|
|
414
|
+
"contributing_factors": [
|
|
415
|
+
{"feature": name, "contribution": round(contrib, 4)}
|
|
416
|
+
for name, contrib in self.contributing_factors
|
|
417
|
+
],
|
|
418
|
+
"model_version": self.model_version,
|
|
419
|
+
}
|
|
420
|
+
|
|
421
|
+
|
|
422
|
+
class MLModelProtocol(Protocol):
|
|
423
|
+
"""Protocol for ML models used in ReDoS prediction."""
|
|
424
|
+
|
|
425
|
+
def predict(self, features: list[float]) -> tuple[float, float]:
|
|
426
|
+
"""Predict risk probability and confidence.
|
|
427
|
+
|
|
428
|
+
Args:
|
|
429
|
+
features: Feature vector
|
|
430
|
+
|
|
431
|
+
Returns:
|
|
432
|
+
Tuple of (risk_probability, confidence)
|
|
433
|
+
"""
|
|
434
|
+
...
|
|
435
|
+
|
|
436
|
+
def get_feature_importance(self) -> list[float]:
|
|
437
|
+
"""Get feature importance scores."""
|
|
438
|
+
...
|
|
439
|
+
|
|
440
|
+
|
|
441
|
+
class RuleBasedModel:
|
|
442
|
+
"""Rule-based model for ReDoS risk prediction.
|
|
443
|
+
|
|
444
|
+
This model uses hand-crafted rules based on known ReDoS patterns
|
|
445
|
+
to estimate risk. It serves as a baseline and fallback when
|
|
446
|
+
ML models are not available.
|
|
447
|
+
|
|
448
|
+
The model assigns weights to various pattern features and combines
|
|
449
|
+
them using a logistic function to produce a probability.
|
|
450
|
+
"""
|
|
451
|
+
|
|
452
|
+
# Feature weights (learned from known vulnerable patterns)
|
|
453
|
+
FEATURE_WEIGHTS: dict[str, float] = {
|
|
454
|
+
"nested_quantifier_count": 5.0,
|
|
455
|
+
"quantified_backreference_count": 4.0,
|
|
456
|
+
"quantified_alternation_count": 3.5,
|
|
457
|
+
"adjacent_quantifier_count": 2.5,
|
|
458
|
+
"unbounded_quantifier_count": 1.5,
|
|
459
|
+
"max_nesting_depth": 0.8,
|
|
460
|
+
"star_count": 0.5,
|
|
461
|
+
"plus_count": 0.5,
|
|
462
|
+
"alternation_count": 0.3,
|
|
463
|
+
"quantifier_density": 2.0,
|
|
464
|
+
"backtracking_potential": 0.1, # Already composite
|
|
465
|
+
}
|
|
466
|
+
|
|
467
|
+
# Bias term
|
|
468
|
+
BIAS = -2.0
|
|
469
|
+
|
|
470
|
+
def __init__(self):
|
|
471
|
+
"""Initialize the rule-based model."""
|
|
472
|
+
self._feature_names = PatternFeatures.feature_names()
|
|
473
|
+
|
|
474
|
+
def predict(self, features: list[float]) -> tuple[float, float]:
|
|
475
|
+
"""Predict risk probability using rules.
|
|
476
|
+
|
|
477
|
+
Args:
|
|
478
|
+
features: Feature vector
|
|
479
|
+
|
|
480
|
+
Returns:
|
|
481
|
+
Tuple of (risk_probability, confidence)
|
|
482
|
+
"""
|
|
483
|
+
# Map features to dictionary for easier access
|
|
484
|
+
feature_dict = dict(zip(self._feature_names, features))
|
|
485
|
+
|
|
486
|
+
# Calculate weighted sum
|
|
487
|
+
weighted_sum = self.BIAS
|
|
488
|
+
for feature_name, weight in self.FEATURE_WEIGHTS.items():
|
|
489
|
+
if feature_name in feature_dict:
|
|
490
|
+
weighted_sum += feature_dict[feature_name] * weight
|
|
491
|
+
|
|
492
|
+
# Apply logistic function
|
|
493
|
+
probability = 1.0 / (1.0 + math.exp(-weighted_sum))
|
|
494
|
+
|
|
495
|
+
# Confidence based on how extreme the score is
|
|
496
|
+
confidence = abs(2 * probability - 1)
|
|
497
|
+
|
|
498
|
+
return probability, confidence
|
|
499
|
+
|
|
500
|
+
def get_feature_importance(self) -> list[float]:
|
|
501
|
+
"""Get feature importance scores."""
|
|
502
|
+
importance = []
|
|
503
|
+
for name in self._feature_names:
|
|
504
|
+
importance.append(self.FEATURE_WEIGHTS.get(name, 0.0))
|
|
505
|
+
return importance
|
|
506
|
+
|
|
507
|
+
|
|
508
|
+
class EnsembleModel:
|
|
509
|
+
"""Ensemble model combining multiple prediction strategies.
|
|
510
|
+
|
|
511
|
+
This model combines rule-based heuristics with pattern matching
|
|
512
|
+
for more robust predictions.
|
|
513
|
+
"""
|
|
514
|
+
|
|
515
|
+
def __init__(self):
|
|
516
|
+
"""Initialize ensemble model."""
|
|
517
|
+
self._rule_model = RuleBasedModel()
|
|
518
|
+
self._feature_names = PatternFeatures.feature_names()
|
|
519
|
+
|
|
520
|
+
# Known dangerous pattern signatures with risk scores
|
|
521
|
+
self._dangerous_signatures: list[tuple[re.Pattern, float]] = [
|
|
522
|
+
(re.compile(r"\([^)]*[+*][^)]*\)[+*]"), 0.95), # Nested quantifiers
|
|
523
|
+
(re.compile(r"\\[1-9][+*]"), 0.85), # Quantified backreference
|
|
524
|
+
(re.compile(r"\([^)]*\|[^)]*\)[+*]"), 0.75), # Quantified alternation
|
|
525
|
+
(re.compile(r"[+*][+*]"), 0.65), # Adjacent quantifiers
|
|
526
|
+
]
|
|
527
|
+
|
|
528
|
+
def predict(self, features: list[float], pattern: str = "") -> tuple[float, float]:
|
|
529
|
+
"""Predict using ensemble of methods.
|
|
530
|
+
|
|
531
|
+
Args:
|
|
532
|
+
features: Feature vector
|
|
533
|
+
pattern: Original pattern (optional, for signature matching)
|
|
534
|
+
|
|
535
|
+
Returns:
|
|
536
|
+
Tuple of (risk_probability, confidence)
|
|
537
|
+
"""
|
|
538
|
+
# Rule-based prediction
|
|
539
|
+
rule_prob, rule_conf = self._rule_model.predict(features)
|
|
540
|
+
|
|
541
|
+
# Pattern signature matching
|
|
542
|
+
sig_prob = 0.0
|
|
543
|
+
for sig_pattern, risk in self._dangerous_signatures:
|
|
544
|
+
if pattern and sig_pattern.search(pattern):
|
|
545
|
+
sig_prob = max(sig_prob, risk)
|
|
546
|
+
|
|
547
|
+
# Combine predictions (weighted average)
|
|
548
|
+
if sig_prob > 0:
|
|
549
|
+
# Signature match has high confidence
|
|
550
|
+
final_prob = 0.6 * sig_prob + 0.4 * rule_prob
|
|
551
|
+
final_conf = max(rule_conf, 0.9) # High confidence when signature matches
|
|
552
|
+
else:
|
|
553
|
+
final_prob = rule_prob
|
|
554
|
+
final_conf = rule_conf
|
|
555
|
+
|
|
556
|
+
return final_prob, final_conf
|
|
557
|
+
|
|
558
|
+
def get_feature_importance(self) -> list[float]:
|
|
559
|
+
"""Get feature importance from base model."""
|
|
560
|
+
return self._rule_model.get_feature_importance()
|
|
561
|
+
|
|
562
|
+
|
|
563
|
+
class MLPatternAnalyzer:
|
|
564
|
+
"""Machine learning-based regex pattern analyzer.
|
|
565
|
+
|
|
566
|
+
This analyzer uses ML models to predict ReDoS vulnerability risk.
|
|
567
|
+
It supports multiple model backends and can be trained on custom data.
|
|
568
|
+
|
|
569
|
+
Example:
|
|
570
|
+
analyzer = MLPatternAnalyzer()
|
|
571
|
+
|
|
572
|
+
# Predict risk
|
|
573
|
+
result = analyzer.predict(r"(a+)+b")
|
|
574
|
+
print(result.risk_level) # ReDoSRisk.CRITICAL
|
|
575
|
+
|
|
576
|
+
# Get detailed features
|
|
577
|
+
features = analyzer.extract_features(r"^[a-z]+$")
|
|
578
|
+
print(features.quantifier_density)
|
|
579
|
+
|
|
580
|
+
# Train custom model
|
|
581
|
+
patterns = ["(a+)+", "^[a-z]+$", ...]
|
|
582
|
+
labels = [1, 0, ...] # 1 = vulnerable, 0 = safe
|
|
583
|
+
analyzer.train(patterns, labels)
|
|
584
|
+
"""
|
|
585
|
+
|
|
586
|
+
VERSION = "1.0.0"
|
|
587
|
+
|
|
588
|
+
# Risk thresholds
|
|
589
|
+
RISK_THRESHOLDS: dict[ReDoSRisk, float] = {
|
|
590
|
+
ReDoSRisk.NONE: 0.1,
|
|
591
|
+
ReDoSRisk.LOW: 0.3,
|
|
592
|
+
ReDoSRisk.MEDIUM: 0.5,
|
|
593
|
+
ReDoSRisk.HIGH: 0.7,
|
|
594
|
+
ReDoSRisk.CRITICAL: 0.85,
|
|
595
|
+
}
|
|
596
|
+
|
|
597
|
+
def __init__(
|
|
598
|
+
self,
|
|
599
|
+
model: MLModelProtocol | None = None,
|
|
600
|
+
feature_extractor: FeatureExtractor | None = None,
|
|
601
|
+
):
|
|
602
|
+
"""Initialize the analyzer.
|
|
603
|
+
|
|
604
|
+
Args:
|
|
605
|
+
model: ML model to use (defaults to EnsembleModel)
|
|
606
|
+
feature_extractor: Feature extractor (defaults to FeatureExtractor)
|
|
607
|
+
"""
|
|
608
|
+
self.extractor = feature_extractor or FeatureExtractor()
|
|
609
|
+
self._model: Any = model or EnsembleModel()
|
|
610
|
+
self._trained = False
|
|
611
|
+
|
|
612
|
+
def extract_features(self, pattern: str) -> PatternFeatures:
|
|
613
|
+
"""Extract features from a pattern.
|
|
614
|
+
|
|
615
|
+
Args:
|
|
616
|
+
pattern: Regex pattern
|
|
617
|
+
|
|
618
|
+
Returns:
|
|
619
|
+
PatternFeatures object
|
|
620
|
+
"""
|
|
621
|
+
return self.extractor.extract(pattern)
|
|
622
|
+
|
|
623
|
+
def predict(self, pattern: str) -> MLPredictionResult:
|
|
624
|
+
"""Predict ReDoS risk for a pattern.
|
|
625
|
+
|
|
626
|
+
Uses the trained ML model if available, otherwise falls back
|
|
627
|
+
to the rule-based/ensemble model.
|
|
628
|
+
|
|
629
|
+
Args:
|
|
630
|
+
pattern: Regex pattern to analyze
|
|
631
|
+
|
|
632
|
+
Returns:
|
|
633
|
+
MLPredictionResult with prediction details
|
|
634
|
+
"""
|
|
635
|
+
# Use trained ML predictor if available
|
|
636
|
+
if hasattr(self, "_ml_predictor") and self._ml_predictor is not None:
|
|
637
|
+
prediction = self._ml_predictor.predict(pattern)
|
|
638
|
+
# Convert to legacy MLPredictionResult format
|
|
639
|
+
return MLPredictionResult(
|
|
640
|
+
pattern=prediction.pattern,
|
|
641
|
+
features=self._convert_features(prediction.features),
|
|
642
|
+
risk_probability=prediction.risk_probability,
|
|
643
|
+
risk_level=prediction.risk_level,
|
|
644
|
+
confidence=prediction.confidence,
|
|
645
|
+
contributing_factors=prediction.contributing_factors,
|
|
646
|
+
model_version=prediction.model_version,
|
|
647
|
+
)
|
|
648
|
+
|
|
649
|
+
# Fallback to original implementation
|
|
650
|
+
# Extract features
|
|
651
|
+
features = self.extractor.extract(pattern)
|
|
652
|
+
feature_vector = features.to_vector()
|
|
653
|
+
|
|
654
|
+
# Get prediction
|
|
655
|
+
if isinstance(self._model, EnsembleModel):
|
|
656
|
+
probability, confidence = self._model.predict(feature_vector, pattern)
|
|
657
|
+
else:
|
|
658
|
+
probability, confidence = self._model.predict(feature_vector)
|
|
659
|
+
|
|
660
|
+
# Determine risk level
|
|
661
|
+
risk_level = self._probability_to_risk_level(probability)
|
|
662
|
+
|
|
663
|
+
# Get contributing factors
|
|
664
|
+
contributing_factors = self._get_contributing_factors(features)
|
|
665
|
+
|
|
666
|
+
return MLPredictionResult(
|
|
667
|
+
pattern=pattern,
|
|
668
|
+
features=features,
|
|
669
|
+
risk_probability=probability,
|
|
670
|
+
risk_level=risk_level,
|
|
671
|
+
confidence=confidence,
|
|
672
|
+
contributing_factors=contributing_factors,
|
|
673
|
+
model_version=self.VERSION,
|
|
674
|
+
)
|
|
675
|
+
|
|
676
|
+
def _convert_features(self, new_features: Any) -> PatternFeatures:
|
|
677
|
+
"""Convert new PatternFeatures format to legacy format.
|
|
678
|
+
|
|
679
|
+
This is for backward compatibility with existing code that
|
|
680
|
+
expects the old PatternFeatures dataclass.
|
|
681
|
+
"""
|
|
682
|
+
# The new and old PatternFeatures have the same structure
|
|
683
|
+
# Just return the new features directly
|
|
684
|
+
return PatternFeatures(
|
|
685
|
+
length=new_features.length,
|
|
686
|
+
group_count=new_features.group_count,
|
|
687
|
+
capture_group_count=new_features.capture_group_count,
|
|
688
|
+
non_capture_group_count=new_features.non_capture_group_count,
|
|
689
|
+
max_nesting_depth=new_features.max_nesting_depth,
|
|
690
|
+
alternation_count=new_features.alternation_count,
|
|
691
|
+
plus_count=new_features.plus_count,
|
|
692
|
+
star_count=new_features.star_count,
|
|
693
|
+
question_count=new_features.question_count,
|
|
694
|
+
bounded_quantifier_count=new_features.bounded_quantifier_count,
|
|
695
|
+
unbounded_quantifier_count=new_features.unbounded_quantifier_count,
|
|
696
|
+
lazy_quantifier_count=new_features.lazy_quantifier_count,
|
|
697
|
+
possessive_quantifier_count=new_features.possessive_quantifier_count,
|
|
698
|
+
quantifier_density=new_features.quantifier_density,
|
|
699
|
+
nested_quantifier_count=new_features.nested_quantifier_count,
|
|
700
|
+
adjacent_quantifier_count=new_features.adjacent_quantifier_count,
|
|
701
|
+
quantified_alternation_count=new_features.quantified_alternation_count,
|
|
702
|
+
quantified_backreference_count=new_features.quantified_backreference_count,
|
|
703
|
+
char_class_count=new_features.char_class_count,
|
|
704
|
+
negated_char_class_count=new_features.negated_char_class_count,
|
|
705
|
+
dot_count=new_features.dot_count,
|
|
706
|
+
word_boundary_count=new_features.word_boundary_count,
|
|
707
|
+
lookahead_count=new_features.lookahead_count,
|
|
708
|
+
lookbehind_count=new_features.lookbehind_count,
|
|
709
|
+
negative_lookaround_count=new_features.negative_lookaround_count,
|
|
710
|
+
backreference_count=new_features.backreference_count,
|
|
711
|
+
max_backreference_index=new_features.max_backreference_index,
|
|
712
|
+
start_anchor=new_features.start_anchor,
|
|
713
|
+
end_anchor=new_features.end_anchor,
|
|
714
|
+
anchored=new_features.anchored,
|
|
715
|
+
backtracking_potential=new_features.backtracking_potential,
|
|
716
|
+
estimated_states=new_features.estimated_states,
|
|
717
|
+
)
|
|
718
|
+
|
|
719
|
+
def predict_batch(self, patterns: Sequence[str]) -> list[MLPredictionResult]:
|
|
720
|
+
"""Predict risk for multiple patterns.
|
|
721
|
+
|
|
722
|
+
Args:
|
|
723
|
+
patterns: Sequence of patterns to analyze
|
|
724
|
+
|
|
725
|
+
Returns:
|
|
726
|
+
List of MLPredictionResult objects
|
|
727
|
+
"""
|
|
728
|
+
return [self.predict(pattern) for pattern in patterns]
|
|
729
|
+
|
|
730
|
+
def train(
|
|
731
|
+
self,
|
|
732
|
+
patterns: Sequence[str],
|
|
733
|
+
labels: Sequence[int],
|
|
734
|
+
validation_split: float = 0.2,
|
|
735
|
+
) -> dict[str, float]:
|
|
736
|
+
"""Train the model on labeled data.
|
|
737
|
+
|
|
738
|
+
This method trains a scikit-learn based Random Forest classifier
|
|
739
|
+
on the provided patterns. If scikit-learn is not available, it
|
|
740
|
+
falls back to a rule-based model that is always "trained".
|
|
741
|
+
|
|
742
|
+
Args:
|
|
743
|
+
patterns: Training patterns
|
|
744
|
+
labels: Labels (1 = vulnerable, 0 = safe)
|
|
745
|
+
validation_split: Fraction of data for validation
|
|
746
|
+
|
|
747
|
+
Returns:
|
|
748
|
+
Training metrics dictionary containing accuracy, precision,
|
|
749
|
+
recall, f1, and sample count.
|
|
750
|
+
|
|
751
|
+
Raises:
|
|
752
|
+
ValueError: If patterns and labels have different lengths
|
|
753
|
+
|
|
754
|
+
Example:
|
|
755
|
+
>>> analyzer = MLPatternAnalyzer()
|
|
756
|
+
>>> patterns = ["(a+)+", "^[a-z]+$", "(.*)+", "\\d+"]
|
|
757
|
+
>>> labels = [1, 0, 1, 0] # 1=vulnerable, 0=safe
|
|
758
|
+
>>> metrics = analyzer.train(patterns, labels)
|
|
759
|
+
>>> print(f"Accuracy: {metrics['accuracy']:.2%}")
|
|
760
|
+
"""
|
|
761
|
+
if len(patterns) != len(labels):
|
|
762
|
+
raise ValueError("Patterns and labels must have same length")
|
|
763
|
+
|
|
764
|
+
# Import the new ML framework
|
|
765
|
+
from truthound.validators.security.redos.ml import (
|
|
766
|
+
ReDoSTrainingData,
|
|
767
|
+
TrainingPipeline,
|
|
768
|
+
TrainingConfig,
|
|
769
|
+
ModelType,
|
|
770
|
+
ReDoSMLPredictor,
|
|
771
|
+
)
|
|
772
|
+
|
|
773
|
+
# Create training data
|
|
774
|
+
training_data = ReDoSTrainingData(
|
|
775
|
+
patterns=list(patterns),
|
|
776
|
+
labels=list(labels),
|
|
777
|
+
)
|
|
778
|
+
|
|
779
|
+
# Configure training
|
|
780
|
+
config = TrainingConfig(
|
|
781
|
+
model_type=ModelType.RANDOM_FOREST,
|
|
782
|
+
test_split=validation_split,
|
|
783
|
+
cv_folds=5,
|
|
784
|
+
verbose=0,
|
|
785
|
+
)
|
|
786
|
+
|
|
787
|
+
# Train using the pipeline
|
|
788
|
+
pipeline = TrainingPipeline(config=config)
|
|
789
|
+
result = pipeline.train(training_data)
|
|
790
|
+
|
|
791
|
+
# Store the trained model for predictions
|
|
792
|
+
self._ml_predictor = ReDoSMLPredictor(model=result.model)
|
|
793
|
+
self._trained = True
|
|
794
|
+
self._metrics = result.metrics
|
|
795
|
+
|
|
796
|
+
# Return metrics as dictionary for backward compatibility
|
|
797
|
+
return {
|
|
798
|
+
"accuracy": result.metrics.accuracy,
|
|
799
|
+
"precision": result.metrics.precision,
|
|
800
|
+
"recall": result.metrics.recall,
|
|
801
|
+
"f1": result.metrics.f1_score,
|
|
802
|
+
"samples": float(len(patterns)),
|
|
803
|
+
}
|
|
804
|
+
|
|
805
|
+
def save_model(self, path: str | Path) -> None:
|
|
806
|
+
"""Save the trained model to disk.
|
|
807
|
+
|
|
808
|
+
Saves the trained ML model using pickle/joblib serialization.
|
|
809
|
+
The model can be loaded later using load_model().
|
|
810
|
+
|
|
811
|
+
Args:
|
|
812
|
+
path: Path to save the model (recommended: .pkl extension)
|
|
813
|
+
|
|
814
|
+
Raises:
|
|
815
|
+
ValueError: If model has not been trained
|
|
816
|
+
|
|
817
|
+
Example:
|
|
818
|
+
>>> analyzer = MLPatternAnalyzer()
|
|
819
|
+
>>> analyzer.train(patterns, labels)
|
|
820
|
+
>>> analyzer.save_model("redos_model.pkl")
|
|
821
|
+
"""
|
|
822
|
+
from truthound.validators.security.redos.ml.storage import save_model
|
|
823
|
+
|
|
824
|
+
if hasattr(self, "_ml_predictor") and self._ml_predictor is not None:
|
|
825
|
+
save_model(self._ml_predictor.model, path)
|
|
826
|
+
else:
|
|
827
|
+
# Fallback for legacy format
|
|
828
|
+
path = Path(path)
|
|
829
|
+
model_data = {
|
|
830
|
+
"version": self.VERSION,
|
|
831
|
+
"trained": self._trained,
|
|
832
|
+
"model_type": type(self._model).__name__,
|
|
833
|
+
"thresholds": {k.name: v for k, v in self.RISK_THRESHOLDS.items()},
|
|
834
|
+
}
|
|
835
|
+
path.write_text(json.dumps(model_data, indent=2))
|
|
836
|
+
|
|
837
|
+
def load_model(self, path: str | Path) -> None:
|
|
838
|
+
"""Load a trained model from disk.
|
|
839
|
+
|
|
840
|
+
Loads a previously saved ML model. The loaded model will be
|
|
841
|
+
used for all subsequent predictions.
|
|
842
|
+
|
|
843
|
+
Args:
|
|
844
|
+
path: Path to the saved model
|
|
845
|
+
|
|
846
|
+
Example:
|
|
847
|
+
>>> analyzer = MLPatternAnalyzer()
|
|
848
|
+
>>> analyzer.load_model("redos_model.pkl")
|
|
849
|
+
>>> result = analyzer.predict("(a+)+b")
|
|
850
|
+
"""
|
|
851
|
+
from truthound.validators.security.redos.ml import ReDoSMLPredictor
|
|
852
|
+
from truthound.validators.security.redos.ml.storage import load_model
|
|
853
|
+
|
|
854
|
+
path = Path(path)
|
|
855
|
+
|
|
856
|
+
# Try loading as new format first
|
|
857
|
+
try:
|
|
858
|
+
model = load_model(path)
|
|
859
|
+
self._ml_predictor = ReDoSMLPredictor(model=model)
|
|
860
|
+
self._trained = True
|
|
861
|
+
except Exception:
|
|
862
|
+
# Fallback to legacy JSON format
|
|
863
|
+
try:
|
|
864
|
+
model_data = json.loads(path.read_text())
|
|
865
|
+
self._trained = model_data.get("trained", False)
|
|
866
|
+
except Exception:
|
|
867
|
+
self._trained = False
|
|
868
|
+
|
|
869
|
+
def _probability_to_risk_level(self, probability: float) -> ReDoSRisk:
|
|
870
|
+
"""Convert probability to risk level."""
|
|
871
|
+
if probability >= self.RISK_THRESHOLDS[ReDoSRisk.CRITICAL]:
|
|
872
|
+
return ReDoSRisk.CRITICAL
|
|
873
|
+
elif probability >= self.RISK_THRESHOLDS[ReDoSRisk.HIGH]:
|
|
874
|
+
return ReDoSRisk.HIGH
|
|
875
|
+
elif probability >= self.RISK_THRESHOLDS[ReDoSRisk.MEDIUM]:
|
|
876
|
+
return ReDoSRisk.MEDIUM
|
|
877
|
+
elif probability >= self.RISK_THRESHOLDS[ReDoSRisk.LOW]:
|
|
878
|
+
return ReDoSRisk.LOW
|
|
879
|
+
else:
|
|
880
|
+
return ReDoSRisk.NONE
|
|
881
|
+
|
|
882
|
+
def _get_contributing_factors(
|
|
883
|
+
self,
|
|
884
|
+
features: PatternFeatures,
|
|
885
|
+
) -> list[tuple[str, float]]:
|
|
886
|
+
"""Get features that contribute most to the risk prediction.
|
|
887
|
+
|
|
888
|
+
Args:
|
|
889
|
+
features: Extracted pattern features
|
|
890
|
+
|
|
891
|
+
Returns:
|
|
892
|
+
List of (feature_name, contribution) tuples, sorted by contribution
|
|
893
|
+
"""
|
|
894
|
+
feature_importance = self._model.get_feature_importance()
|
|
895
|
+
feature_values = features.to_vector()
|
|
896
|
+
feature_names = PatternFeatures.feature_names()
|
|
897
|
+
|
|
898
|
+
# Calculate contributions
|
|
899
|
+
contributions: list[tuple[str, float]] = []
|
|
900
|
+
for name, importance, value in zip(feature_names, feature_importance, feature_values):
|
|
901
|
+
contribution = importance * value
|
|
902
|
+
if contribution > 0:
|
|
903
|
+
contributions.append((name, contribution))
|
|
904
|
+
|
|
905
|
+
# Sort by contribution (descending)
|
|
906
|
+
contributions.sort(key=lambda x: x[1], reverse=True)
|
|
907
|
+
|
|
908
|
+
# Return top 5 contributors
|
|
909
|
+
return contributions[:5]
|
|
910
|
+
|
|
911
|
+
|
|
912
|
+
# ============================================================================
|
|
913
|
+
# Convenience functions
|
|
914
|
+
# ============================================================================
|
|
915
|
+
|
|
916
|
+
|
|
917
|
+
def predict_redos_risk(
|
|
918
|
+
pattern: str,
|
|
919
|
+
analyzer: MLPatternAnalyzer | None = None,
|
|
920
|
+
) -> MLPredictionResult:
|
|
921
|
+
"""Predict ReDoS risk for a regex pattern using ML.
|
|
922
|
+
|
|
923
|
+
Args:
|
|
924
|
+
pattern: Regex pattern to analyze
|
|
925
|
+
analyzer: Optional custom analyzer
|
|
926
|
+
|
|
927
|
+
Returns:
|
|
928
|
+
MLPredictionResult with prediction details
|
|
929
|
+
|
|
930
|
+
Example:
|
|
931
|
+
result = predict_redos_risk(r"(a+)+b")
|
|
932
|
+
print(result.risk_level) # ReDoSRisk.CRITICAL
|
|
933
|
+
print(result.risk_probability) # ~0.95
|
|
934
|
+
"""
|
|
935
|
+
if analyzer is None:
|
|
936
|
+
analyzer = MLPatternAnalyzer()
|
|
937
|
+
return analyzer.predict(pattern)
|