truthound 1.0.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- truthound/__init__.py +162 -0
- truthound/adapters.py +100 -0
- truthound/api.py +365 -0
- truthound/audit/__init__.py +248 -0
- truthound/audit/core.py +967 -0
- truthound/audit/filters.py +620 -0
- truthound/audit/formatters.py +707 -0
- truthound/audit/logger.py +902 -0
- truthound/audit/middleware.py +571 -0
- truthound/audit/storage.py +1083 -0
- truthound/benchmark/__init__.py +123 -0
- truthound/benchmark/base.py +757 -0
- truthound/benchmark/comparison.py +635 -0
- truthound/benchmark/generators.py +706 -0
- truthound/benchmark/reporters.py +718 -0
- truthound/benchmark/runner.py +635 -0
- truthound/benchmark/scenarios.py +712 -0
- truthound/cache.py +252 -0
- truthound/checkpoint/__init__.py +136 -0
- truthound/checkpoint/actions/__init__.py +164 -0
- truthound/checkpoint/actions/base.py +324 -0
- truthound/checkpoint/actions/custom.py +234 -0
- truthound/checkpoint/actions/discord_notify.py +290 -0
- truthound/checkpoint/actions/email_notify.py +405 -0
- truthound/checkpoint/actions/github_action.py +406 -0
- truthound/checkpoint/actions/opsgenie.py +1499 -0
- truthound/checkpoint/actions/pagerduty.py +226 -0
- truthound/checkpoint/actions/slack_notify.py +233 -0
- truthound/checkpoint/actions/store_result.py +249 -0
- truthound/checkpoint/actions/teams_notify.py +1570 -0
- truthound/checkpoint/actions/telegram_notify.py +419 -0
- truthound/checkpoint/actions/update_docs.py +552 -0
- truthound/checkpoint/actions/webhook.py +293 -0
- truthound/checkpoint/analytics/__init__.py +147 -0
- truthound/checkpoint/analytics/aggregations/__init__.py +23 -0
- truthound/checkpoint/analytics/aggregations/rollup.py +481 -0
- truthound/checkpoint/analytics/aggregations/time_bucket.py +306 -0
- truthound/checkpoint/analytics/analyzers/__init__.py +17 -0
- truthound/checkpoint/analytics/analyzers/anomaly.py +386 -0
- truthound/checkpoint/analytics/analyzers/base.py +270 -0
- truthound/checkpoint/analytics/analyzers/forecast.py +421 -0
- truthound/checkpoint/analytics/analyzers/trend.py +314 -0
- truthound/checkpoint/analytics/models.py +292 -0
- truthound/checkpoint/analytics/protocols.py +549 -0
- truthound/checkpoint/analytics/service.py +718 -0
- truthound/checkpoint/analytics/stores/__init__.py +16 -0
- truthound/checkpoint/analytics/stores/base.py +306 -0
- truthound/checkpoint/analytics/stores/memory_store.py +353 -0
- truthound/checkpoint/analytics/stores/sqlite_store.py +557 -0
- truthound/checkpoint/analytics/stores/timescale_store.py +501 -0
- truthound/checkpoint/async_actions.py +794 -0
- truthound/checkpoint/async_base.py +708 -0
- truthound/checkpoint/async_checkpoint.py +617 -0
- truthound/checkpoint/async_runner.py +639 -0
- truthound/checkpoint/checkpoint.py +527 -0
- truthound/checkpoint/ci/__init__.py +61 -0
- truthound/checkpoint/ci/detector.py +355 -0
- truthound/checkpoint/ci/reporter.py +436 -0
- truthound/checkpoint/ci/templates.py +454 -0
- truthound/checkpoint/circuitbreaker/__init__.py +133 -0
- truthound/checkpoint/circuitbreaker/breaker.py +542 -0
- truthound/checkpoint/circuitbreaker/core.py +252 -0
- truthound/checkpoint/circuitbreaker/detection.py +459 -0
- truthound/checkpoint/circuitbreaker/middleware.py +389 -0
- truthound/checkpoint/circuitbreaker/registry.py +357 -0
- truthound/checkpoint/distributed/__init__.py +139 -0
- truthound/checkpoint/distributed/backends/__init__.py +35 -0
- truthound/checkpoint/distributed/backends/celery_backend.py +503 -0
- truthound/checkpoint/distributed/backends/kubernetes_backend.py +696 -0
- truthound/checkpoint/distributed/backends/local_backend.py +397 -0
- truthound/checkpoint/distributed/backends/ray_backend.py +625 -0
- truthound/checkpoint/distributed/base.py +774 -0
- truthound/checkpoint/distributed/orchestrator.py +765 -0
- truthound/checkpoint/distributed/protocols.py +842 -0
- truthound/checkpoint/distributed/registry.py +449 -0
- truthound/checkpoint/idempotency/__init__.py +120 -0
- truthound/checkpoint/idempotency/core.py +295 -0
- truthound/checkpoint/idempotency/fingerprint.py +454 -0
- truthound/checkpoint/idempotency/locking.py +604 -0
- truthound/checkpoint/idempotency/service.py +592 -0
- truthound/checkpoint/idempotency/stores.py +653 -0
- truthound/checkpoint/monitoring/__init__.py +134 -0
- truthound/checkpoint/monitoring/aggregators/__init__.py +15 -0
- truthound/checkpoint/monitoring/aggregators/base.py +372 -0
- truthound/checkpoint/monitoring/aggregators/realtime.py +300 -0
- truthound/checkpoint/monitoring/aggregators/window.py +493 -0
- truthound/checkpoint/monitoring/collectors/__init__.py +17 -0
- truthound/checkpoint/monitoring/collectors/base.py +257 -0
- truthound/checkpoint/monitoring/collectors/memory_collector.py +617 -0
- truthound/checkpoint/monitoring/collectors/prometheus_collector.py +451 -0
- truthound/checkpoint/monitoring/collectors/redis_collector.py +518 -0
- truthound/checkpoint/monitoring/events.py +410 -0
- truthound/checkpoint/monitoring/protocols.py +636 -0
- truthound/checkpoint/monitoring/service.py +578 -0
- truthound/checkpoint/monitoring/views/__init__.py +17 -0
- truthound/checkpoint/monitoring/views/base.py +172 -0
- truthound/checkpoint/monitoring/views/queue_view.py +220 -0
- truthound/checkpoint/monitoring/views/task_view.py +240 -0
- truthound/checkpoint/monitoring/views/worker_view.py +263 -0
- truthound/checkpoint/registry.py +337 -0
- truthound/checkpoint/runner.py +356 -0
- truthound/checkpoint/transaction/__init__.py +133 -0
- truthound/checkpoint/transaction/base.py +389 -0
- truthound/checkpoint/transaction/compensatable.py +537 -0
- truthound/checkpoint/transaction/coordinator.py +576 -0
- truthound/checkpoint/transaction/executor.py +622 -0
- truthound/checkpoint/transaction/idempotency.py +534 -0
- truthound/checkpoint/transaction/saga/__init__.py +143 -0
- truthound/checkpoint/transaction/saga/builder.py +584 -0
- truthound/checkpoint/transaction/saga/definition.py +515 -0
- truthound/checkpoint/transaction/saga/event_store.py +542 -0
- truthound/checkpoint/transaction/saga/patterns.py +833 -0
- truthound/checkpoint/transaction/saga/runner.py +718 -0
- truthound/checkpoint/transaction/saga/state_machine.py +793 -0
- truthound/checkpoint/transaction/saga/strategies.py +780 -0
- truthound/checkpoint/transaction/saga/testing.py +886 -0
- truthound/checkpoint/triggers/__init__.py +58 -0
- truthound/checkpoint/triggers/base.py +237 -0
- truthound/checkpoint/triggers/event.py +385 -0
- truthound/checkpoint/triggers/schedule.py +355 -0
- truthound/cli.py +2358 -0
- truthound/cli_modules/__init__.py +124 -0
- truthound/cli_modules/advanced/__init__.py +45 -0
- truthound/cli_modules/advanced/benchmark.py +343 -0
- truthound/cli_modules/advanced/docs.py +225 -0
- truthound/cli_modules/advanced/lineage.py +209 -0
- truthound/cli_modules/advanced/ml.py +320 -0
- truthound/cli_modules/advanced/realtime.py +196 -0
- truthound/cli_modules/checkpoint/__init__.py +46 -0
- truthound/cli_modules/checkpoint/init.py +114 -0
- truthound/cli_modules/checkpoint/list.py +71 -0
- truthound/cli_modules/checkpoint/run.py +159 -0
- truthound/cli_modules/checkpoint/validate.py +67 -0
- truthound/cli_modules/common/__init__.py +71 -0
- truthound/cli_modules/common/errors.py +414 -0
- truthound/cli_modules/common/options.py +419 -0
- truthound/cli_modules/common/output.py +507 -0
- truthound/cli_modules/common/protocol.py +552 -0
- truthound/cli_modules/core/__init__.py +48 -0
- truthound/cli_modules/core/check.py +123 -0
- truthound/cli_modules/core/compare.py +104 -0
- truthound/cli_modules/core/learn.py +57 -0
- truthound/cli_modules/core/mask.py +77 -0
- truthound/cli_modules/core/profile.py +65 -0
- truthound/cli_modules/core/scan.py +61 -0
- truthound/cli_modules/profiler/__init__.py +51 -0
- truthound/cli_modules/profiler/auto_profile.py +175 -0
- truthound/cli_modules/profiler/metadata.py +107 -0
- truthound/cli_modules/profiler/suite.py +283 -0
- truthound/cli_modules/registry.py +431 -0
- truthound/cli_modules/scaffolding/__init__.py +89 -0
- truthound/cli_modules/scaffolding/base.py +631 -0
- truthound/cli_modules/scaffolding/commands.py +545 -0
- truthound/cli_modules/scaffolding/plugins.py +1072 -0
- truthound/cli_modules/scaffolding/reporters.py +594 -0
- truthound/cli_modules/scaffolding/validators.py +1127 -0
- truthound/common/__init__.py +18 -0
- truthound/common/resilience/__init__.py +130 -0
- truthound/common/resilience/bulkhead.py +266 -0
- truthound/common/resilience/circuit_breaker.py +516 -0
- truthound/common/resilience/composite.py +332 -0
- truthound/common/resilience/config.py +292 -0
- truthound/common/resilience/protocols.py +217 -0
- truthound/common/resilience/rate_limiter.py +404 -0
- truthound/common/resilience/retry.py +341 -0
- truthound/datadocs/__init__.py +260 -0
- truthound/datadocs/base.py +571 -0
- truthound/datadocs/builder.py +761 -0
- truthound/datadocs/charts.py +764 -0
- truthound/datadocs/dashboard/__init__.py +63 -0
- truthound/datadocs/dashboard/app.py +576 -0
- truthound/datadocs/dashboard/components.py +584 -0
- truthound/datadocs/dashboard/state.py +240 -0
- truthound/datadocs/engine/__init__.py +46 -0
- truthound/datadocs/engine/context.py +376 -0
- truthound/datadocs/engine/pipeline.py +618 -0
- truthound/datadocs/engine/registry.py +469 -0
- truthound/datadocs/exporters/__init__.py +49 -0
- truthound/datadocs/exporters/base.py +198 -0
- truthound/datadocs/exporters/html.py +178 -0
- truthound/datadocs/exporters/json_exporter.py +253 -0
- truthound/datadocs/exporters/markdown.py +284 -0
- truthound/datadocs/exporters/pdf.py +392 -0
- truthound/datadocs/i18n/__init__.py +86 -0
- truthound/datadocs/i18n/catalog.py +960 -0
- truthound/datadocs/i18n/formatting.py +505 -0
- truthound/datadocs/i18n/loader.py +256 -0
- truthound/datadocs/i18n/plurals.py +378 -0
- truthound/datadocs/renderers/__init__.py +42 -0
- truthound/datadocs/renderers/base.py +401 -0
- truthound/datadocs/renderers/custom.py +342 -0
- truthound/datadocs/renderers/jinja.py +697 -0
- truthound/datadocs/sections.py +736 -0
- truthound/datadocs/styles.py +931 -0
- truthound/datadocs/themes/__init__.py +101 -0
- truthound/datadocs/themes/base.py +336 -0
- truthound/datadocs/themes/default.py +417 -0
- truthound/datadocs/themes/enterprise.py +419 -0
- truthound/datadocs/themes/loader.py +336 -0
- truthound/datadocs/themes.py +301 -0
- truthound/datadocs/transformers/__init__.py +57 -0
- truthound/datadocs/transformers/base.py +268 -0
- truthound/datadocs/transformers/enrichers.py +544 -0
- truthound/datadocs/transformers/filters.py +447 -0
- truthound/datadocs/transformers/i18n.py +468 -0
- truthound/datadocs/versioning/__init__.py +62 -0
- truthound/datadocs/versioning/diff.py +639 -0
- truthound/datadocs/versioning/storage.py +497 -0
- truthound/datadocs/versioning/version.py +358 -0
- truthound/datasources/__init__.py +223 -0
- truthound/datasources/_async_protocols.py +222 -0
- truthound/datasources/_protocols.py +159 -0
- truthound/datasources/adapters.py +428 -0
- truthound/datasources/async_base.py +599 -0
- truthound/datasources/async_factory.py +511 -0
- truthound/datasources/base.py +516 -0
- truthound/datasources/factory.py +433 -0
- truthound/datasources/nosql/__init__.py +47 -0
- truthound/datasources/nosql/base.py +487 -0
- truthound/datasources/nosql/elasticsearch.py +801 -0
- truthound/datasources/nosql/mongodb.py +636 -0
- truthound/datasources/pandas_optimized.py +582 -0
- truthound/datasources/pandas_source.py +216 -0
- truthound/datasources/polars_source.py +395 -0
- truthound/datasources/spark_source.py +479 -0
- truthound/datasources/sql/__init__.py +154 -0
- truthound/datasources/sql/base.py +710 -0
- truthound/datasources/sql/bigquery.py +410 -0
- truthound/datasources/sql/cloud_base.py +199 -0
- truthound/datasources/sql/databricks.py +471 -0
- truthound/datasources/sql/mysql.py +316 -0
- truthound/datasources/sql/oracle.py +427 -0
- truthound/datasources/sql/postgresql.py +321 -0
- truthound/datasources/sql/redshift.py +479 -0
- truthound/datasources/sql/snowflake.py +439 -0
- truthound/datasources/sql/sqlite.py +286 -0
- truthound/datasources/sql/sqlserver.py +437 -0
- truthound/datasources/streaming/__init__.py +47 -0
- truthound/datasources/streaming/base.py +350 -0
- truthound/datasources/streaming/kafka.py +670 -0
- truthound/decorators.py +98 -0
- truthound/docs/__init__.py +69 -0
- truthound/docs/extractor.py +971 -0
- truthound/docs/generator.py +601 -0
- truthound/docs/parser.py +1037 -0
- truthound/docs/renderer.py +999 -0
- truthound/drift/__init__.py +22 -0
- truthound/drift/compare.py +189 -0
- truthound/drift/detectors.py +464 -0
- truthound/drift/report.py +160 -0
- truthound/execution/__init__.py +65 -0
- truthound/execution/_protocols.py +324 -0
- truthound/execution/base.py +576 -0
- truthound/execution/distributed/__init__.py +179 -0
- truthound/execution/distributed/aggregations.py +731 -0
- truthound/execution/distributed/arrow_bridge.py +817 -0
- truthound/execution/distributed/base.py +550 -0
- truthound/execution/distributed/dask_engine.py +976 -0
- truthound/execution/distributed/mixins.py +766 -0
- truthound/execution/distributed/protocols.py +756 -0
- truthound/execution/distributed/ray_engine.py +1127 -0
- truthound/execution/distributed/registry.py +446 -0
- truthound/execution/distributed/spark_engine.py +1011 -0
- truthound/execution/distributed/validator_adapter.py +682 -0
- truthound/execution/pandas_engine.py +401 -0
- truthound/execution/polars_engine.py +497 -0
- truthound/execution/pushdown/__init__.py +230 -0
- truthound/execution/pushdown/ast.py +1550 -0
- truthound/execution/pushdown/builder.py +1550 -0
- truthound/execution/pushdown/dialects.py +1072 -0
- truthound/execution/pushdown/executor.py +829 -0
- truthound/execution/pushdown/optimizer.py +1041 -0
- truthound/execution/sql_engine.py +518 -0
- truthound/infrastructure/__init__.py +189 -0
- truthound/infrastructure/audit.py +1515 -0
- truthound/infrastructure/config.py +1133 -0
- truthound/infrastructure/encryption.py +1132 -0
- truthound/infrastructure/logging.py +1503 -0
- truthound/infrastructure/metrics.py +1220 -0
- truthound/lineage/__init__.py +89 -0
- truthound/lineage/base.py +746 -0
- truthound/lineage/impact_analysis.py +474 -0
- truthound/lineage/integrations/__init__.py +22 -0
- truthound/lineage/integrations/openlineage.py +548 -0
- truthound/lineage/tracker.py +512 -0
- truthound/lineage/visualization/__init__.py +33 -0
- truthound/lineage/visualization/protocols.py +145 -0
- truthound/lineage/visualization/renderers/__init__.py +20 -0
- truthound/lineage/visualization/renderers/cytoscape.py +329 -0
- truthound/lineage/visualization/renderers/d3.py +331 -0
- truthound/lineage/visualization/renderers/graphviz.py +276 -0
- truthound/lineage/visualization/renderers/mermaid.py +308 -0
- truthound/maskers.py +113 -0
- truthound/ml/__init__.py +124 -0
- truthound/ml/anomaly_models/__init__.py +31 -0
- truthound/ml/anomaly_models/ensemble.py +362 -0
- truthound/ml/anomaly_models/isolation_forest.py +444 -0
- truthound/ml/anomaly_models/statistical.py +392 -0
- truthound/ml/base.py +1178 -0
- truthound/ml/drift_detection/__init__.py +26 -0
- truthound/ml/drift_detection/concept.py +381 -0
- truthound/ml/drift_detection/distribution.py +361 -0
- truthound/ml/drift_detection/feature.py +442 -0
- truthound/ml/drift_detection/multivariate.py +495 -0
- truthound/ml/monitoring/__init__.py +88 -0
- truthound/ml/monitoring/alerting/__init__.py +33 -0
- truthound/ml/monitoring/alerting/handlers.py +427 -0
- truthound/ml/monitoring/alerting/rules.py +508 -0
- truthound/ml/monitoring/collectors/__init__.py +19 -0
- truthound/ml/monitoring/collectors/composite.py +105 -0
- truthound/ml/monitoring/collectors/drift.py +324 -0
- truthound/ml/monitoring/collectors/performance.py +179 -0
- truthound/ml/monitoring/collectors/quality.py +369 -0
- truthound/ml/monitoring/monitor.py +536 -0
- truthound/ml/monitoring/protocols.py +451 -0
- truthound/ml/monitoring/stores/__init__.py +15 -0
- truthound/ml/monitoring/stores/memory.py +201 -0
- truthound/ml/monitoring/stores/prometheus.py +296 -0
- truthound/ml/rule_learning/__init__.py +25 -0
- truthound/ml/rule_learning/constraint_miner.py +443 -0
- truthound/ml/rule_learning/pattern_learner.py +499 -0
- truthound/ml/rule_learning/profile_learner.py +462 -0
- truthound/multitenancy/__init__.py +326 -0
- truthound/multitenancy/core.py +852 -0
- truthound/multitenancy/integration.py +597 -0
- truthound/multitenancy/isolation.py +630 -0
- truthound/multitenancy/manager.py +770 -0
- truthound/multitenancy/middleware.py +765 -0
- truthound/multitenancy/quota.py +537 -0
- truthound/multitenancy/resolvers.py +603 -0
- truthound/multitenancy/storage.py +703 -0
- truthound/observability/__init__.py +307 -0
- truthound/observability/context.py +531 -0
- truthound/observability/instrumentation.py +611 -0
- truthound/observability/logging.py +887 -0
- truthound/observability/metrics.py +1157 -0
- truthound/observability/tracing/__init__.py +178 -0
- truthound/observability/tracing/baggage.py +310 -0
- truthound/observability/tracing/config.py +426 -0
- truthound/observability/tracing/exporter.py +787 -0
- truthound/observability/tracing/integration.py +1018 -0
- truthound/observability/tracing/otel/__init__.py +146 -0
- truthound/observability/tracing/otel/adapter.py +982 -0
- truthound/observability/tracing/otel/bridge.py +1177 -0
- truthound/observability/tracing/otel/compat.py +681 -0
- truthound/observability/tracing/otel/config.py +691 -0
- truthound/observability/tracing/otel/detection.py +327 -0
- truthound/observability/tracing/otel/protocols.py +426 -0
- truthound/observability/tracing/processor.py +561 -0
- truthound/observability/tracing/propagator.py +757 -0
- truthound/observability/tracing/provider.py +569 -0
- truthound/observability/tracing/resource.py +515 -0
- truthound/observability/tracing/sampler.py +487 -0
- truthound/observability/tracing/span.py +676 -0
- truthound/plugins/__init__.py +198 -0
- truthound/plugins/base.py +599 -0
- truthound/plugins/cli.py +680 -0
- truthound/plugins/dependencies/__init__.py +42 -0
- truthound/plugins/dependencies/graph.py +422 -0
- truthound/plugins/dependencies/resolver.py +417 -0
- truthound/plugins/discovery.py +379 -0
- truthound/plugins/docs/__init__.py +46 -0
- truthound/plugins/docs/extractor.py +444 -0
- truthound/plugins/docs/renderer.py +499 -0
- truthound/plugins/enterprise_manager.py +877 -0
- truthound/plugins/examples/__init__.py +19 -0
- truthound/plugins/examples/custom_validators.py +317 -0
- truthound/plugins/examples/slack_notifier.py +312 -0
- truthound/plugins/examples/xml_reporter.py +254 -0
- truthound/plugins/hooks.py +558 -0
- truthound/plugins/lifecycle/__init__.py +43 -0
- truthound/plugins/lifecycle/hot_reload.py +402 -0
- truthound/plugins/lifecycle/manager.py +371 -0
- truthound/plugins/manager.py +736 -0
- truthound/plugins/registry.py +338 -0
- truthound/plugins/security/__init__.py +93 -0
- truthound/plugins/security/exceptions.py +332 -0
- truthound/plugins/security/policies.py +348 -0
- truthound/plugins/security/protocols.py +643 -0
- truthound/plugins/security/sandbox/__init__.py +45 -0
- truthound/plugins/security/sandbox/context.py +158 -0
- truthound/plugins/security/sandbox/engines/__init__.py +19 -0
- truthound/plugins/security/sandbox/engines/container.py +379 -0
- truthound/plugins/security/sandbox/engines/noop.py +144 -0
- truthound/plugins/security/sandbox/engines/process.py +336 -0
- truthound/plugins/security/sandbox/factory.py +211 -0
- truthound/plugins/security/signing/__init__.py +57 -0
- truthound/plugins/security/signing/service.py +330 -0
- truthound/plugins/security/signing/trust_store.py +368 -0
- truthound/plugins/security/signing/verifier.py +459 -0
- truthound/plugins/versioning/__init__.py +41 -0
- truthound/plugins/versioning/constraints.py +297 -0
- truthound/plugins/versioning/resolver.py +329 -0
- truthound/profiler/__init__.py +1729 -0
- truthound/profiler/_lazy.py +452 -0
- truthound/profiler/ab_testing/__init__.py +80 -0
- truthound/profiler/ab_testing/analysis.py +449 -0
- truthound/profiler/ab_testing/base.py +257 -0
- truthound/profiler/ab_testing/experiment.py +395 -0
- truthound/profiler/ab_testing/tracking.py +368 -0
- truthound/profiler/auto_threshold.py +1170 -0
- truthound/profiler/base.py +579 -0
- truthound/profiler/cache_patterns.py +911 -0
- truthound/profiler/caching.py +1303 -0
- truthound/profiler/column_profiler.py +712 -0
- truthound/profiler/comparison.py +1007 -0
- truthound/profiler/custom_patterns.py +1170 -0
- truthound/profiler/dashboard/__init__.py +50 -0
- truthound/profiler/dashboard/app.py +476 -0
- truthound/profiler/dashboard/components.py +457 -0
- truthound/profiler/dashboard/config.py +72 -0
- truthound/profiler/distributed/__init__.py +83 -0
- truthound/profiler/distributed/base.py +281 -0
- truthound/profiler/distributed/dask_backend.py +498 -0
- truthound/profiler/distributed/local_backend.py +293 -0
- truthound/profiler/distributed/profiler.py +304 -0
- truthound/profiler/distributed/ray_backend.py +374 -0
- truthound/profiler/distributed/spark_backend.py +375 -0
- truthound/profiler/distributed.py +1366 -0
- truthound/profiler/enterprise_sampling.py +1065 -0
- truthound/profiler/errors.py +488 -0
- truthound/profiler/evolution/__init__.py +91 -0
- truthound/profiler/evolution/alerts.py +426 -0
- truthound/profiler/evolution/changes.py +206 -0
- truthound/profiler/evolution/compatibility.py +365 -0
- truthound/profiler/evolution/detector.py +372 -0
- truthound/profiler/evolution/protocols.py +121 -0
- truthound/profiler/generators/__init__.py +48 -0
- truthound/profiler/generators/base.py +384 -0
- truthound/profiler/generators/ml_rules.py +375 -0
- truthound/profiler/generators/pattern_rules.py +384 -0
- truthound/profiler/generators/schema_rules.py +267 -0
- truthound/profiler/generators/stats_rules.py +324 -0
- truthound/profiler/generators/suite_generator.py +857 -0
- truthound/profiler/i18n.py +1542 -0
- truthound/profiler/incremental.py +554 -0
- truthound/profiler/incremental_validation.py +1710 -0
- truthound/profiler/integration/__init__.py +73 -0
- truthound/profiler/integration/adapters.py +345 -0
- truthound/profiler/integration/context.py +371 -0
- truthound/profiler/integration/executor.py +527 -0
- truthound/profiler/integration/naming.py +75 -0
- truthound/profiler/integration/protocols.py +243 -0
- truthound/profiler/memory.py +1185 -0
- truthound/profiler/migration/__init__.py +60 -0
- truthound/profiler/migration/base.py +345 -0
- truthound/profiler/migration/manager.py +444 -0
- truthound/profiler/migration/v1_0_to_v1_1.py +484 -0
- truthound/profiler/ml/__init__.py +73 -0
- truthound/profiler/ml/base.py +244 -0
- truthound/profiler/ml/classifier.py +507 -0
- truthound/profiler/ml/feature_extraction.py +604 -0
- truthound/profiler/ml/pretrained.py +448 -0
- truthound/profiler/ml_inference.py +1276 -0
- truthound/profiler/native_patterns.py +815 -0
- truthound/profiler/observability.py +1184 -0
- truthound/profiler/process_timeout.py +1566 -0
- truthound/profiler/progress.py +568 -0
- truthound/profiler/progress_callbacks.py +1734 -0
- truthound/profiler/quality.py +1345 -0
- truthound/profiler/resilience.py +1180 -0
- truthound/profiler/sampled_matcher.py +794 -0
- truthound/profiler/sampling.py +1288 -0
- truthound/profiler/scheduling/__init__.py +82 -0
- truthound/profiler/scheduling/protocols.py +214 -0
- truthound/profiler/scheduling/scheduler.py +474 -0
- truthound/profiler/scheduling/storage.py +457 -0
- truthound/profiler/scheduling/triggers.py +449 -0
- truthound/profiler/schema.py +603 -0
- truthound/profiler/streaming.py +685 -0
- truthound/profiler/streaming_patterns.py +1354 -0
- truthound/profiler/suite_cli.py +625 -0
- truthound/profiler/suite_config.py +789 -0
- truthound/profiler/suite_export.py +1268 -0
- truthound/profiler/table_profiler.py +547 -0
- truthound/profiler/timeout.py +565 -0
- truthound/profiler/validation.py +1532 -0
- truthound/profiler/visualization/__init__.py +118 -0
- truthound/profiler/visualization/base.py +346 -0
- truthound/profiler/visualization/generator.py +1259 -0
- truthound/profiler/visualization/plotly_renderer.py +811 -0
- truthound/profiler/visualization/renderers.py +669 -0
- truthound/profiler/visualization/sections.py +540 -0
- truthound/profiler/visualization.py +2122 -0
- truthound/profiler/yaml_validation.py +1151 -0
- truthound/py.typed +0 -0
- truthound/ratelimit/__init__.py +248 -0
- truthound/ratelimit/algorithms.py +1108 -0
- truthound/ratelimit/core.py +573 -0
- truthound/ratelimit/integration.py +532 -0
- truthound/ratelimit/limiter.py +663 -0
- truthound/ratelimit/middleware.py +700 -0
- truthound/ratelimit/policy.py +792 -0
- truthound/ratelimit/storage.py +763 -0
- truthound/rbac/__init__.py +340 -0
- truthound/rbac/core.py +976 -0
- truthound/rbac/integration.py +760 -0
- truthound/rbac/manager.py +1052 -0
- truthound/rbac/middleware.py +842 -0
- truthound/rbac/policy.py +954 -0
- truthound/rbac/storage.py +878 -0
- truthound/realtime/__init__.py +141 -0
- truthound/realtime/adapters/__init__.py +43 -0
- truthound/realtime/adapters/base.py +533 -0
- truthound/realtime/adapters/kafka.py +487 -0
- truthound/realtime/adapters/kinesis.py +479 -0
- truthound/realtime/adapters/mock.py +243 -0
- truthound/realtime/base.py +553 -0
- truthound/realtime/factory.py +382 -0
- truthound/realtime/incremental.py +660 -0
- truthound/realtime/processing/__init__.py +67 -0
- truthound/realtime/processing/exactly_once.py +575 -0
- truthound/realtime/processing/state.py +547 -0
- truthound/realtime/processing/windows.py +647 -0
- truthound/realtime/protocols.py +569 -0
- truthound/realtime/streaming.py +605 -0
- truthound/realtime/testing/__init__.py +32 -0
- truthound/realtime/testing/containers.py +615 -0
- truthound/realtime/testing/fixtures.py +484 -0
- truthound/report.py +280 -0
- truthound/reporters/__init__.py +46 -0
- truthound/reporters/_protocols.py +30 -0
- truthound/reporters/base.py +324 -0
- truthound/reporters/ci/__init__.py +66 -0
- truthound/reporters/ci/azure.py +436 -0
- truthound/reporters/ci/base.py +509 -0
- truthound/reporters/ci/bitbucket.py +567 -0
- truthound/reporters/ci/circleci.py +547 -0
- truthound/reporters/ci/detection.py +364 -0
- truthound/reporters/ci/factory.py +182 -0
- truthound/reporters/ci/github.py +388 -0
- truthound/reporters/ci/gitlab.py +471 -0
- truthound/reporters/ci/jenkins.py +525 -0
- truthound/reporters/console_reporter.py +299 -0
- truthound/reporters/factory.py +211 -0
- truthound/reporters/html_reporter.py +524 -0
- truthound/reporters/json_reporter.py +256 -0
- truthound/reporters/markdown_reporter.py +280 -0
- truthound/reporters/sdk/__init__.py +174 -0
- truthound/reporters/sdk/builder.py +558 -0
- truthound/reporters/sdk/mixins.py +1150 -0
- truthound/reporters/sdk/schema.py +1493 -0
- truthound/reporters/sdk/templates.py +666 -0
- truthound/reporters/sdk/testing.py +968 -0
- truthound/scanners.py +170 -0
- truthound/scheduling/__init__.py +122 -0
- truthound/scheduling/cron.py +1136 -0
- truthound/scheduling/presets.py +212 -0
- truthound/schema.py +275 -0
- truthound/secrets/__init__.py +173 -0
- truthound/secrets/base.py +618 -0
- truthound/secrets/cloud.py +682 -0
- truthound/secrets/integration.py +507 -0
- truthound/secrets/manager.py +633 -0
- truthound/secrets/oidc/__init__.py +172 -0
- truthound/secrets/oidc/base.py +902 -0
- truthound/secrets/oidc/credential_provider.py +623 -0
- truthound/secrets/oidc/exchangers.py +1001 -0
- truthound/secrets/oidc/github/__init__.py +110 -0
- truthound/secrets/oidc/github/claims.py +718 -0
- truthound/secrets/oidc/github/enhanced_provider.py +693 -0
- truthound/secrets/oidc/github/trust_policy.py +742 -0
- truthound/secrets/oidc/github/verification.py +723 -0
- truthound/secrets/oidc/github/workflow.py +691 -0
- truthound/secrets/oidc/providers.py +825 -0
- truthound/secrets/providers.py +506 -0
- truthound/secrets/resolver.py +495 -0
- truthound/stores/__init__.py +177 -0
- truthound/stores/backends/__init__.py +18 -0
- truthound/stores/backends/_protocols.py +340 -0
- truthound/stores/backends/azure_blob.py +530 -0
- truthound/stores/backends/concurrent_filesystem.py +915 -0
- truthound/stores/backends/connection_pool.py +1365 -0
- truthound/stores/backends/database.py +743 -0
- truthound/stores/backends/filesystem.py +538 -0
- truthound/stores/backends/gcs.py +399 -0
- truthound/stores/backends/memory.py +354 -0
- truthound/stores/backends/s3.py +434 -0
- truthound/stores/backpressure/__init__.py +84 -0
- truthound/stores/backpressure/base.py +375 -0
- truthound/stores/backpressure/circuit_breaker.py +434 -0
- truthound/stores/backpressure/monitor.py +376 -0
- truthound/stores/backpressure/strategies.py +677 -0
- truthound/stores/base.py +551 -0
- truthound/stores/batching/__init__.py +65 -0
- truthound/stores/batching/base.py +305 -0
- truthound/stores/batching/buffer.py +370 -0
- truthound/stores/batching/store.py +248 -0
- truthound/stores/batching/writer.py +521 -0
- truthound/stores/caching/__init__.py +60 -0
- truthound/stores/caching/backends.py +684 -0
- truthound/stores/caching/base.py +356 -0
- truthound/stores/caching/store.py +305 -0
- truthound/stores/compression/__init__.py +193 -0
- truthound/stores/compression/adaptive.py +694 -0
- truthound/stores/compression/base.py +514 -0
- truthound/stores/compression/pipeline.py +868 -0
- truthound/stores/compression/providers.py +672 -0
- truthound/stores/compression/streaming.py +832 -0
- truthound/stores/concurrency/__init__.py +81 -0
- truthound/stores/concurrency/atomic.py +556 -0
- truthound/stores/concurrency/index.py +775 -0
- truthound/stores/concurrency/locks.py +576 -0
- truthound/stores/concurrency/manager.py +482 -0
- truthound/stores/encryption/__init__.py +297 -0
- truthound/stores/encryption/base.py +952 -0
- truthound/stores/encryption/keys.py +1191 -0
- truthound/stores/encryption/pipeline.py +903 -0
- truthound/stores/encryption/providers.py +953 -0
- truthound/stores/encryption/streaming.py +950 -0
- truthound/stores/expectations.py +227 -0
- truthound/stores/factory.py +246 -0
- truthound/stores/migration/__init__.py +75 -0
- truthound/stores/migration/base.py +480 -0
- truthound/stores/migration/manager.py +347 -0
- truthound/stores/migration/registry.py +382 -0
- truthound/stores/migration/store.py +559 -0
- truthound/stores/observability/__init__.py +106 -0
- truthound/stores/observability/audit.py +718 -0
- truthound/stores/observability/config.py +270 -0
- truthound/stores/observability/factory.py +208 -0
- truthound/stores/observability/metrics.py +636 -0
- truthound/stores/observability/protocols.py +410 -0
- truthound/stores/observability/store.py +570 -0
- truthound/stores/observability/tracing.py +784 -0
- truthound/stores/replication/__init__.py +76 -0
- truthound/stores/replication/base.py +260 -0
- truthound/stores/replication/monitor.py +269 -0
- truthound/stores/replication/store.py +439 -0
- truthound/stores/replication/syncer.py +391 -0
- truthound/stores/results.py +359 -0
- truthound/stores/retention/__init__.py +77 -0
- truthound/stores/retention/base.py +378 -0
- truthound/stores/retention/policies.py +621 -0
- truthound/stores/retention/scheduler.py +279 -0
- truthound/stores/retention/store.py +526 -0
- truthound/stores/streaming/__init__.py +138 -0
- truthound/stores/streaming/base.py +801 -0
- truthound/stores/streaming/database.py +984 -0
- truthound/stores/streaming/filesystem.py +719 -0
- truthound/stores/streaming/reader.py +629 -0
- truthound/stores/streaming/s3.py +843 -0
- truthound/stores/streaming/writer.py +790 -0
- truthound/stores/tiering/__init__.py +108 -0
- truthound/stores/tiering/base.py +462 -0
- truthound/stores/tiering/manager.py +249 -0
- truthound/stores/tiering/policies.py +692 -0
- truthound/stores/tiering/store.py +526 -0
- truthound/stores/versioning/__init__.py +56 -0
- truthound/stores/versioning/base.py +376 -0
- truthound/stores/versioning/store.py +660 -0
- truthound/stores/versioning/strategies.py +353 -0
- truthound/types.py +56 -0
- truthound/validators/__init__.py +774 -0
- truthound/validators/aggregate/__init__.py +27 -0
- truthound/validators/aggregate/central.py +116 -0
- truthound/validators/aggregate/extremes.py +116 -0
- truthound/validators/aggregate/spread.py +118 -0
- truthound/validators/aggregate/sum.py +64 -0
- truthound/validators/aggregate/type.py +78 -0
- truthound/validators/anomaly/__init__.py +93 -0
- truthound/validators/anomaly/base.py +431 -0
- truthound/validators/anomaly/ml_based.py +1190 -0
- truthound/validators/anomaly/multivariate.py +647 -0
- truthound/validators/anomaly/statistical.py +599 -0
- truthound/validators/base.py +1089 -0
- truthound/validators/business_rule/__init__.py +46 -0
- truthound/validators/business_rule/base.py +147 -0
- truthound/validators/business_rule/checksum.py +509 -0
- truthound/validators/business_rule/financial.py +526 -0
- truthound/validators/cache.py +733 -0
- truthound/validators/completeness/__init__.py +39 -0
- truthound/validators/completeness/conditional.py +73 -0
- truthound/validators/completeness/default.py +98 -0
- truthound/validators/completeness/empty.py +103 -0
- truthound/validators/completeness/nan.py +337 -0
- truthound/validators/completeness/null.py +152 -0
- truthound/validators/cross_table/__init__.py +17 -0
- truthound/validators/cross_table/aggregate.py +333 -0
- truthound/validators/cross_table/row_count.py +122 -0
- truthound/validators/datetime/__init__.py +29 -0
- truthound/validators/datetime/format.py +78 -0
- truthound/validators/datetime/freshness.py +269 -0
- truthound/validators/datetime/order.py +73 -0
- truthound/validators/datetime/parseable.py +185 -0
- truthound/validators/datetime/range.py +202 -0
- truthound/validators/datetime/timezone.py +69 -0
- truthound/validators/distribution/__init__.py +49 -0
- truthound/validators/distribution/distribution.py +128 -0
- truthound/validators/distribution/monotonic.py +119 -0
- truthound/validators/distribution/outlier.py +178 -0
- truthound/validators/distribution/quantile.py +80 -0
- truthound/validators/distribution/range.py +254 -0
- truthound/validators/distribution/set.py +125 -0
- truthound/validators/distribution/statistical.py +459 -0
- truthound/validators/drift/__init__.py +79 -0
- truthound/validators/drift/base.py +427 -0
- truthound/validators/drift/multi_feature.py +401 -0
- truthound/validators/drift/numeric.py +395 -0
- truthound/validators/drift/psi.py +446 -0
- truthound/validators/drift/statistical.py +510 -0
- truthound/validators/enterprise.py +1658 -0
- truthound/validators/geospatial/__init__.py +80 -0
- truthound/validators/geospatial/base.py +97 -0
- truthound/validators/geospatial/boundary.py +238 -0
- truthound/validators/geospatial/coordinate.py +351 -0
- truthound/validators/geospatial/distance.py +399 -0
- truthound/validators/geospatial/polygon.py +665 -0
- truthound/validators/i18n/__init__.py +308 -0
- truthound/validators/i18n/bidi.py +571 -0
- truthound/validators/i18n/catalogs.py +570 -0
- truthound/validators/i18n/dialects.py +763 -0
- truthound/validators/i18n/extended_catalogs.py +549 -0
- truthound/validators/i18n/formatting.py +1434 -0
- truthound/validators/i18n/loader.py +1020 -0
- truthound/validators/i18n/messages.py +521 -0
- truthound/validators/i18n/plural.py +683 -0
- truthound/validators/i18n/protocols.py +855 -0
- truthound/validators/i18n/tms.py +1162 -0
- truthound/validators/localization/__init__.py +53 -0
- truthound/validators/localization/base.py +122 -0
- truthound/validators/localization/chinese.py +362 -0
- truthound/validators/localization/japanese.py +275 -0
- truthound/validators/localization/korean.py +524 -0
- truthound/validators/memory/__init__.py +94 -0
- truthound/validators/memory/approximate_knn.py +506 -0
- truthound/validators/memory/base.py +547 -0
- truthound/validators/memory/sgd_online.py +719 -0
- truthound/validators/memory/streaming_ecdf.py +753 -0
- truthound/validators/ml_feature/__init__.py +54 -0
- truthound/validators/ml_feature/base.py +249 -0
- truthound/validators/ml_feature/correlation.py +299 -0
- truthound/validators/ml_feature/leakage.py +344 -0
- truthound/validators/ml_feature/null_impact.py +270 -0
- truthound/validators/ml_feature/scale.py +264 -0
- truthound/validators/multi_column/__init__.py +89 -0
- truthound/validators/multi_column/arithmetic.py +284 -0
- truthound/validators/multi_column/base.py +231 -0
- truthound/validators/multi_column/comparison.py +273 -0
- truthound/validators/multi_column/consistency.py +312 -0
- truthound/validators/multi_column/statistical.py +299 -0
- truthound/validators/optimization/__init__.py +164 -0
- truthound/validators/optimization/aggregation.py +563 -0
- truthound/validators/optimization/covariance.py +556 -0
- truthound/validators/optimization/geo.py +626 -0
- truthound/validators/optimization/graph.py +587 -0
- truthound/validators/optimization/orchestrator.py +970 -0
- truthound/validators/optimization/profiling.py +1312 -0
- truthound/validators/privacy/__init__.py +223 -0
- truthound/validators/privacy/base.py +635 -0
- truthound/validators/privacy/ccpa.py +670 -0
- truthound/validators/privacy/gdpr.py +728 -0
- truthound/validators/privacy/global_patterns.py +604 -0
- truthound/validators/privacy/plugins.py +867 -0
- truthound/validators/profiling/__init__.py +52 -0
- truthound/validators/profiling/base.py +175 -0
- truthound/validators/profiling/cardinality.py +312 -0
- truthound/validators/profiling/entropy.py +391 -0
- truthound/validators/profiling/frequency.py +455 -0
- truthound/validators/pushdown_support.py +660 -0
- truthound/validators/query/__init__.py +91 -0
- truthound/validators/query/aggregate.py +346 -0
- truthound/validators/query/base.py +246 -0
- truthound/validators/query/column.py +249 -0
- truthound/validators/query/expression.py +274 -0
- truthound/validators/query/result.py +323 -0
- truthound/validators/query/row_count.py +264 -0
- truthound/validators/referential/__init__.py +80 -0
- truthound/validators/referential/base.py +395 -0
- truthound/validators/referential/cascade.py +391 -0
- truthound/validators/referential/circular.py +563 -0
- truthound/validators/referential/foreign_key.py +624 -0
- truthound/validators/referential/orphan.py +485 -0
- truthound/validators/registry.py +112 -0
- truthound/validators/schema/__init__.py +41 -0
- truthound/validators/schema/column_count.py +142 -0
- truthound/validators/schema/column_exists.py +80 -0
- truthound/validators/schema/column_order.py +82 -0
- truthound/validators/schema/column_pair.py +85 -0
- truthound/validators/schema/column_pair_set.py +195 -0
- truthound/validators/schema/column_type.py +94 -0
- truthound/validators/schema/multi_column.py +53 -0
- truthound/validators/schema/multi_column_aggregate.py +175 -0
- truthound/validators/schema/referential.py +274 -0
- truthound/validators/schema/table_schema.py +91 -0
- truthound/validators/schema_validator.py +219 -0
- truthound/validators/sdk/__init__.py +250 -0
- truthound/validators/sdk/builder.py +680 -0
- truthound/validators/sdk/decorators.py +474 -0
- truthound/validators/sdk/enterprise/__init__.py +211 -0
- truthound/validators/sdk/enterprise/docs.py +725 -0
- truthound/validators/sdk/enterprise/fuzzing.py +659 -0
- truthound/validators/sdk/enterprise/licensing.py +709 -0
- truthound/validators/sdk/enterprise/manager.py +543 -0
- truthound/validators/sdk/enterprise/resources.py +628 -0
- truthound/validators/sdk/enterprise/sandbox.py +766 -0
- truthound/validators/sdk/enterprise/signing.py +603 -0
- truthound/validators/sdk/enterprise/templates.py +865 -0
- truthound/validators/sdk/enterprise/versioning.py +659 -0
- truthound/validators/sdk/templates.py +757 -0
- truthound/validators/sdk/testing.py +807 -0
- truthound/validators/security/__init__.py +181 -0
- truthound/validators/security/redos/__init__.py +182 -0
- truthound/validators/security/redos/core.py +861 -0
- truthound/validators/security/redos/cpu_monitor.py +593 -0
- truthound/validators/security/redos/cve_database.py +791 -0
- truthound/validators/security/redos/ml/__init__.py +155 -0
- truthound/validators/security/redos/ml/base.py +785 -0
- truthound/validators/security/redos/ml/datasets.py +618 -0
- truthound/validators/security/redos/ml/features.py +359 -0
- truthound/validators/security/redos/ml/models.py +1000 -0
- truthound/validators/security/redos/ml/predictor.py +507 -0
- truthound/validators/security/redos/ml/storage.py +632 -0
- truthound/validators/security/redos/ml/training.py +571 -0
- truthound/validators/security/redos/ml_analyzer.py +937 -0
- truthound/validators/security/redos/optimizer.py +674 -0
- truthound/validators/security/redos/profiler.py +682 -0
- truthound/validators/security/redos/re2_engine.py +709 -0
- truthound/validators/security/redos.py +886 -0
- truthound/validators/security/sql_security.py +1247 -0
- truthound/validators/streaming/__init__.py +126 -0
- truthound/validators/streaming/base.py +292 -0
- truthound/validators/streaming/completeness.py +210 -0
- truthound/validators/streaming/mixin.py +575 -0
- truthound/validators/streaming/range.py +308 -0
- truthound/validators/streaming/sources.py +846 -0
- truthound/validators/string/__init__.py +57 -0
- truthound/validators/string/casing.py +158 -0
- truthound/validators/string/charset.py +96 -0
- truthound/validators/string/format.py +501 -0
- truthound/validators/string/json.py +77 -0
- truthound/validators/string/json_schema.py +184 -0
- truthound/validators/string/length.py +104 -0
- truthound/validators/string/like_pattern.py +237 -0
- truthound/validators/string/regex.py +202 -0
- truthound/validators/string/regex_extended.py +435 -0
- truthound/validators/table/__init__.py +88 -0
- truthound/validators/table/base.py +78 -0
- truthound/validators/table/column_count.py +198 -0
- truthound/validators/table/freshness.py +362 -0
- truthound/validators/table/row_count.py +251 -0
- truthound/validators/table/schema.py +333 -0
- truthound/validators/table/size.py +285 -0
- truthound/validators/timeout/__init__.py +102 -0
- truthound/validators/timeout/advanced/__init__.py +247 -0
- truthound/validators/timeout/advanced/circuit_breaker.py +675 -0
- truthound/validators/timeout/advanced/prediction.py +773 -0
- truthound/validators/timeout/advanced/priority.py +618 -0
- truthound/validators/timeout/advanced/redis_backend.py +770 -0
- truthound/validators/timeout/advanced/retry.py +721 -0
- truthound/validators/timeout/advanced/sampling.py +788 -0
- truthound/validators/timeout/advanced/sla.py +661 -0
- truthound/validators/timeout/advanced/telemetry.py +804 -0
- truthound/validators/timeout/cascade.py +477 -0
- truthound/validators/timeout/deadline.py +657 -0
- truthound/validators/timeout/degradation.py +525 -0
- truthound/validators/timeout/distributed.py +597 -0
- truthound/validators/timeseries/__init__.py +89 -0
- truthound/validators/timeseries/base.py +326 -0
- truthound/validators/timeseries/completeness.py +617 -0
- truthound/validators/timeseries/gap.py +485 -0
- truthound/validators/timeseries/monotonic.py +310 -0
- truthound/validators/timeseries/seasonality.py +422 -0
- truthound/validators/timeseries/trend.py +510 -0
- truthound/validators/uniqueness/__init__.py +59 -0
- truthound/validators/uniqueness/approximate.py +475 -0
- truthound/validators/uniqueness/distinct_values.py +253 -0
- truthound/validators/uniqueness/duplicate.py +118 -0
- truthound/validators/uniqueness/primary_key.py +140 -0
- truthound/validators/uniqueness/unique.py +191 -0
- truthound/validators/uniqueness/within_record.py +599 -0
- truthound/validators/utils.py +756 -0
- truthound-1.0.8.dist-info/METADATA +474 -0
- truthound-1.0.8.dist-info/RECORD +877 -0
- truthound-1.0.8.dist-info/WHEEL +4 -0
- truthound-1.0.8.dist-info/entry_points.txt +2 -0
- truthound-1.0.8.dist-info/licenses/LICENSE +190 -0
|
@@ -0,0 +1,861 @@
|
|
|
1
|
+
"""Core ReDoS Protection - Static Analysis and Safe Execution.
|
|
2
|
+
|
|
3
|
+
This module provides the foundational ReDoS protection:
|
|
4
|
+
- Static analysis of regex patterns for dangerous constructs
|
|
5
|
+
- Complexity estimation for potential exponential backtracking
|
|
6
|
+
- Safe regex compilation with configurable limits
|
|
7
|
+
- Runtime execution monitoring with timeout
|
|
8
|
+
|
|
9
|
+
This is the base module that other advanced features build upon.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
|
|
14
|
+
import re
|
|
15
|
+
import time
|
|
16
|
+
import threading
|
|
17
|
+
from dataclasses import dataclass, field
|
|
18
|
+
from enum import Enum, auto
|
|
19
|
+
from typing import Any, Callable, Protocol
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class ReDoSRisk(Enum):
|
|
23
|
+
"""Risk level for ReDoS vulnerability."""
|
|
24
|
+
|
|
25
|
+
NONE = auto() # No known vulnerability patterns
|
|
26
|
+
LOW = auto() # Minor concerns, likely safe
|
|
27
|
+
MEDIUM = auto() # Some concerning patterns, use with caution
|
|
28
|
+
HIGH = auto() # Dangerous patterns detected, avoid
|
|
29
|
+
CRITICAL = auto() # Known ReDoS pattern, reject
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
@dataclass(frozen=True)
|
|
33
|
+
class SafeRegexConfig:
|
|
34
|
+
"""Configuration for safe regex operations.
|
|
35
|
+
|
|
36
|
+
Attributes:
|
|
37
|
+
max_pattern_length: Maximum pattern length (chars)
|
|
38
|
+
max_groups: Maximum capture groups allowed
|
|
39
|
+
max_quantifier_range: Maximum {n,m} range (m-n)
|
|
40
|
+
max_alternations: Maximum alternation branches
|
|
41
|
+
max_nested_depth: Maximum nesting depth
|
|
42
|
+
allow_backreferences: Whether to allow backreferences
|
|
43
|
+
allow_lookaround: Whether to allow lookahead/lookbehind
|
|
44
|
+
timeout_seconds: Max execution time for matching
|
|
45
|
+
max_input_length: Maximum input string length to match
|
|
46
|
+
"""
|
|
47
|
+
|
|
48
|
+
max_pattern_length: int = 1000
|
|
49
|
+
max_groups: int = 20
|
|
50
|
+
max_quantifier_range: int = 100
|
|
51
|
+
max_alternations: int = 50
|
|
52
|
+
max_nested_depth: int = 10
|
|
53
|
+
allow_backreferences: bool = False
|
|
54
|
+
allow_lookaround: bool = True
|
|
55
|
+
timeout_seconds: float = 1.0
|
|
56
|
+
max_input_length: int = 100_000
|
|
57
|
+
|
|
58
|
+
@classmethod
|
|
59
|
+
def strict(cls) -> "SafeRegexConfig":
|
|
60
|
+
"""Create strict configuration for untrusted patterns."""
|
|
61
|
+
return cls(
|
|
62
|
+
max_pattern_length=500,
|
|
63
|
+
max_groups=10,
|
|
64
|
+
max_quantifier_range=50,
|
|
65
|
+
max_alternations=20,
|
|
66
|
+
max_nested_depth=5,
|
|
67
|
+
allow_backreferences=False,
|
|
68
|
+
allow_lookaround=False,
|
|
69
|
+
timeout_seconds=0.5,
|
|
70
|
+
max_input_length=10_000,
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
@classmethod
|
|
74
|
+
def lenient(cls) -> "SafeRegexConfig":
|
|
75
|
+
"""Create lenient configuration for trusted patterns."""
|
|
76
|
+
return cls(
|
|
77
|
+
max_pattern_length=5000,
|
|
78
|
+
max_groups=50,
|
|
79
|
+
max_quantifier_range=1000,
|
|
80
|
+
max_alternations=100,
|
|
81
|
+
max_nested_depth=20,
|
|
82
|
+
allow_backreferences=True,
|
|
83
|
+
allow_lookaround=True,
|
|
84
|
+
timeout_seconds=5.0,
|
|
85
|
+
max_input_length=1_000_000,
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
@dataclass
|
|
90
|
+
class RegexAnalysisResult:
|
|
91
|
+
"""Result of regex pattern analysis.
|
|
92
|
+
|
|
93
|
+
Attributes:
|
|
94
|
+
pattern: The analyzed pattern
|
|
95
|
+
risk_level: Overall ReDoS risk level
|
|
96
|
+
complexity_score: Numeric complexity estimate (0-100)
|
|
97
|
+
warnings: List of warning messages
|
|
98
|
+
dangerous_constructs: List of detected dangerous constructs
|
|
99
|
+
metrics: Detailed pattern metrics
|
|
100
|
+
is_safe: Whether the pattern is considered safe
|
|
101
|
+
recommendation: Suggested action or alternative
|
|
102
|
+
"""
|
|
103
|
+
|
|
104
|
+
pattern: str
|
|
105
|
+
risk_level: ReDoSRisk
|
|
106
|
+
complexity_score: float
|
|
107
|
+
warnings: list[str] = field(default_factory=list)
|
|
108
|
+
dangerous_constructs: list[str] = field(default_factory=list)
|
|
109
|
+
metrics: dict[str, Any] = field(default_factory=dict)
|
|
110
|
+
is_safe: bool = True
|
|
111
|
+
recommendation: str = ""
|
|
112
|
+
|
|
113
|
+
def to_dict(self) -> dict[str, Any]:
|
|
114
|
+
"""Convert to dictionary."""
|
|
115
|
+
return {
|
|
116
|
+
"pattern": self.pattern,
|
|
117
|
+
"risk_level": self.risk_level.name,
|
|
118
|
+
"complexity_score": round(self.complexity_score, 2),
|
|
119
|
+
"warnings": self.warnings,
|
|
120
|
+
"dangerous_constructs": self.dangerous_constructs,
|
|
121
|
+
"metrics": self.metrics,
|
|
122
|
+
"is_safe": self.is_safe,
|
|
123
|
+
"recommendation": self.recommendation,
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
class PatternAnalyzerProtocol(Protocol):
|
|
128
|
+
"""Protocol for pattern analyzers."""
|
|
129
|
+
|
|
130
|
+
def analyze(self, pattern: str) -> RegexAnalysisResult:
|
|
131
|
+
"""Analyze a regex pattern."""
|
|
132
|
+
...
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
class RegexComplexityAnalyzer:
|
|
136
|
+
"""Analyzes regex patterns for complexity and ReDoS vulnerability.
|
|
137
|
+
|
|
138
|
+
This analyzer performs static analysis on regex patterns to detect
|
|
139
|
+
potentially dangerous constructs that could lead to exponential
|
|
140
|
+
backtracking (ReDoS attacks).
|
|
141
|
+
|
|
142
|
+
Detection Categories:
|
|
143
|
+
1. Nested Quantifiers: (a+)+ - exponential backtracking
|
|
144
|
+
2. Overlapping Alternation: (a|a)+ - ambiguous matching
|
|
145
|
+
3. Polynomial Backtracking: a*b*c*d* on non-matching input
|
|
146
|
+
4. Atomic Group Absence: Patterns that would benefit from atomic groups
|
|
147
|
+
5. Catastrophic Backreference: (a+)\\1+ with long inputs
|
|
148
|
+
|
|
149
|
+
Example:
|
|
150
|
+
analyzer = RegexComplexityAnalyzer()
|
|
151
|
+
result = analyzer.analyze(r"(a+)+b")
|
|
152
|
+
print(result.risk_level) # ReDoSRisk.CRITICAL
|
|
153
|
+
print(result.dangerous_constructs) # ["nested_quantifiers"]
|
|
154
|
+
"""
|
|
155
|
+
|
|
156
|
+
# Dangerous pattern signatures
|
|
157
|
+
DANGEROUS_PATTERNS: list[tuple[str, str, ReDoSRisk]] = [
|
|
158
|
+
# Nested quantifiers - exponential
|
|
159
|
+
(r"\([^)]*[+*][^)]*\)[+*]", "nested_quantifiers", ReDoSRisk.CRITICAL),
|
|
160
|
+
(r"\([^)]*[+*][^)]*\)\{[0-9]+,\}", "nested_quantifiers_bounded", ReDoSRisk.CRITICAL),
|
|
161
|
+
|
|
162
|
+
# Nested groups with quantifiers
|
|
163
|
+
(r"\(\([^)]*\)[+*]\)[+*]", "deeply_nested_quantifiers", ReDoSRisk.CRITICAL),
|
|
164
|
+
|
|
165
|
+
# Overlapping character classes in alternation
|
|
166
|
+
(r"\([^)]*\|[^)]*\)[+*]", "alternation_with_quantifier", ReDoSRisk.HIGH),
|
|
167
|
+
|
|
168
|
+
# Backreference with quantifier
|
|
169
|
+
(r"\\[0-9]+[+*]", "quantified_backreference", ReDoSRisk.HIGH),
|
|
170
|
+
(r"\\[0-9]+\{[0-9]+,\}", "bounded_quantified_backreference", ReDoSRisk.HIGH),
|
|
171
|
+
|
|
172
|
+
# Multiple adjacent quantifiers (greedy conflict)
|
|
173
|
+
(r"[+*][+*]", "adjacent_quantifiers", ReDoSRisk.MEDIUM),
|
|
174
|
+
|
|
175
|
+
# Long alternation chains
|
|
176
|
+
(r"(?:\|[^|)]+){10,}", "long_alternation_chain", ReDoSRisk.MEDIUM),
|
|
177
|
+
|
|
178
|
+
# Greedy quantifier followed by same pattern
|
|
179
|
+
(r"\.+\.", "greedy_dot_conflict", ReDoSRisk.MEDIUM),
|
|
180
|
+
(r"\.\*\.", "greedy_dotstar_conflict", ReDoSRisk.MEDIUM),
|
|
181
|
+
|
|
182
|
+
# Unbounded repetition at start
|
|
183
|
+
(r"^[+*]", "start_with_quantifier", ReDoSRisk.LOW),
|
|
184
|
+
|
|
185
|
+
# Possessive/atomic group simulation (not actually supported in Python)
|
|
186
|
+
(r"\(\?\>", "atomic_group_attempt", ReDoSRisk.LOW),
|
|
187
|
+
]
|
|
188
|
+
|
|
189
|
+
# Quantifier patterns for extraction
|
|
190
|
+
QUANTIFIER_PATTERN = re.compile(
|
|
191
|
+
r"""
|
|
192
|
+
(?:
|
|
193
|
+
\+\?? | # + or +?
|
|
194
|
+
\*\?? | # * or *?
|
|
195
|
+
\?\?? | # ? or ??
|
|
196
|
+
\{(\d+)\} | # {n}
|
|
197
|
+
\{(\d+),\} | # {n,}
|
|
198
|
+
\{(\d+),(\d+)\} # {n,m}
|
|
199
|
+
)
|
|
200
|
+
""",
|
|
201
|
+
re.VERBOSE,
|
|
202
|
+
)
|
|
203
|
+
|
|
204
|
+
def __init__(self, config: SafeRegexConfig | None = None):
|
|
205
|
+
"""Initialize the analyzer.
|
|
206
|
+
|
|
207
|
+
Args:
|
|
208
|
+
config: Safety configuration
|
|
209
|
+
"""
|
|
210
|
+
self.config = config or SafeRegexConfig()
|
|
211
|
+
self._compile_dangerous_patterns()
|
|
212
|
+
|
|
213
|
+
def _compile_dangerous_patterns(self) -> None:
|
|
214
|
+
"""Pre-compile dangerous pattern detectors."""
|
|
215
|
+
self._compiled_patterns: list[tuple[re.Pattern, str, ReDoSRisk]] = []
|
|
216
|
+
for pattern_str, name, risk in self.DANGEROUS_PATTERNS:
|
|
217
|
+
try:
|
|
218
|
+
compiled = re.compile(pattern_str)
|
|
219
|
+
self._compiled_patterns.append((compiled, name, risk))
|
|
220
|
+
except re.error:
|
|
221
|
+
# Skip invalid patterns
|
|
222
|
+
pass
|
|
223
|
+
|
|
224
|
+
def analyze(self, pattern: str) -> RegexAnalysisResult:
|
|
225
|
+
"""Analyze a regex pattern for ReDoS vulnerability.
|
|
226
|
+
|
|
227
|
+
Args:
|
|
228
|
+
pattern: Regex pattern to analyze
|
|
229
|
+
|
|
230
|
+
Returns:
|
|
231
|
+
RegexAnalysisResult with risk assessment
|
|
232
|
+
"""
|
|
233
|
+
warnings: list[str] = []
|
|
234
|
+
dangerous_constructs: list[str] = []
|
|
235
|
+
max_risk = ReDoSRisk.NONE
|
|
236
|
+
complexity_score = 0.0
|
|
237
|
+
|
|
238
|
+
# Basic validation
|
|
239
|
+
if not pattern:
|
|
240
|
+
return RegexAnalysisResult(
|
|
241
|
+
pattern=pattern,
|
|
242
|
+
risk_level=ReDoSRisk.NONE,
|
|
243
|
+
complexity_score=0.0,
|
|
244
|
+
is_safe=True,
|
|
245
|
+
)
|
|
246
|
+
|
|
247
|
+
# Check pattern length
|
|
248
|
+
if len(pattern) > self.config.max_pattern_length:
|
|
249
|
+
warnings.append(
|
|
250
|
+
f"Pattern length ({len(pattern)}) exceeds limit "
|
|
251
|
+
f"({self.config.max_pattern_length})"
|
|
252
|
+
)
|
|
253
|
+
max_risk = max(max_risk, ReDoSRisk.MEDIUM, key=lambda r: r.value)
|
|
254
|
+
complexity_score += 10
|
|
255
|
+
|
|
256
|
+
# Extract metrics
|
|
257
|
+
metrics = self._extract_metrics(pattern)
|
|
258
|
+
|
|
259
|
+
# Check group count
|
|
260
|
+
if metrics["group_count"] > self.config.max_groups:
|
|
261
|
+
warnings.append(
|
|
262
|
+
f"Too many groups ({metrics['group_count']} > {self.config.max_groups})"
|
|
263
|
+
)
|
|
264
|
+
complexity_score += 5
|
|
265
|
+
|
|
266
|
+
# Check nesting depth
|
|
267
|
+
if metrics["max_nesting"] > self.config.max_nested_depth:
|
|
268
|
+
warnings.append(
|
|
269
|
+
f"Nesting too deep ({metrics['max_nesting']} > {self.config.max_nested_depth})"
|
|
270
|
+
)
|
|
271
|
+
complexity_score += 15
|
|
272
|
+
max_risk = max(max_risk, ReDoSRisk.MEDIUM, key=lambda r: r.value)
|
|
273
|
+
|
|
274
|
+
# Check for backreferences
|
|
275
|
+
if metrics["has_backreference"] and not self.config.allow_backreferences:
|
|
276
|
+
warnings.append("Backreferences not allowed")
|
|
277
|
+
dangerous_constructs.append("backreference")
|
|
278
|
+
complexity_score += 20
|
|
279
|
+
max_risk = max(max_risk, ReDoSRisk.HIGH, key=lambda r: r.value)
|
|
280
|
+
|
|
281
|
+
# Check for lookaround
|
|
282
|
+
if metrics["has_lookaround"] and not self.config.allow_lookaround:
|
|
283
|
+
warnings.append("Lookaround assertions not allowed")
|
|
284
|
+
complexity_score += 5
|
|
285
|
+
|
|
286
|
+
# Check quantifier ranges
|
|
287
|
+
for qmin, qmax in metrics.get("quantifier_ranges", []):
|
|
288
|
+
if qmax is not None and qmax - qmin > self.config.max_quantifier_range:
|
|
289
|
+
warnings.append(
|
|
290
|
+
f"Quantifier range too large: {{{qmin},{qmax}}}"
|
|
291
|
+
)
|
|
292
|
+
complexity_score += 10
|
|
293
|
+
|
|
294
|
+
# Check for dangerous patterns
|
|
295
|
+
for compiled, name, risk in self._compiled_patterns:
|
|
296
|
+
if compiled.search(pattern):
|
|
297
|
+
dangerous_constructs.append(name)
|
|
298
|
+
max_risk = max(max_risk, risk, key=lambda r: r.value)
|
|
299
|
+
complexity_score += self._risk_to_score(risk)
|
|
300
|
+
|
|
301
|
+
# Additional heuristic checks
|
|
302
|
+
complexity_score += self._analyze_quantifier_density(pattern)
|
|
303
|
+
complexity_score += self._analyze_alternation_complexity(pattern)
|
|
304
|
+
|
|
305
|
+
# Determine if safe
|
|
306
|
+
is_safe = max_risk.value <= ReDoSRisk.LOW.value
|
|
307
|
+
|
|
308
|
+
# Generate recommendation
|
|
309
|
+
recommendation = self._generate_recommendation(
|
|
310
|
+
max_risk, dangerous_constructs, warnings
|
|
311
|
+
)
|
|
312
|
+
|
|
313
|
+
return RegexAnalysisResult(
|
|
314
|
+
pattern=pattern,
|
|
315
|
+
risk_level=max_risk,
|
|
316
|
+
complexity_score=min(complexity_score, 100),
|
|
317
|
+
warnings=warnings,
|
|
318
|
+
dangerous_constructs=dangerous_constructs,
|
|
319
|
+
metrics=metrics,
|
|
320
|
+
is_safe=is_safe,
|
|
321
|
+
recommendation=recommendation,
|
|
322
|
+
)
|
|
323
|
+
|
|
324
|
+
def _extract_metrics(self, pattern: str) -> dict[str, Any]:
|
|
325
|
+
"""Extract metrics from pattern.
|
|
326
|
+
|
|
327
|
+
Args:
|
|
328
|
+
pattern: Regex pattern
|
|
329
|
+
|
|
330
|
+
Returns:
|
|
331
|
+
Dictionary of metrics
|
|
332
|
+
"""
|
|
333
|
+
metrics: dict[str, Any] = {
|
|
334
|
+
"length": len(pattern),
|
|
335
|
+
"group_count": 0,
|
|
336
|
+
"max_nesting": 0,
|
|
337
|
+
"quantifier_count": 0,
|
|
338
|
+
"alternation_count": pattern.count("|"),
|
|
339
|
+
"has_backreference": bool(re.search(r"\\[1-9]", pattern)),
|
|
340
|
+
"has_lookaround": bool(re.search(r"\(\?[=!<]", pattern)),
|
|
341
|
+
"has_atomic": bool(re.search(r"\(\?>", pattern)),
|
|
342
|
+
"quantifier_ranges": [],
|
|
343
|
+
}
|
|
344
|
+
|
|
345
|
+
# Count groups and nesting
|
|
346
|
+
depth = 0
|
|
347
|
+
max_depth = 0
|
|
348
|
+
for char in pattern:
|
|
349
|
+
if char == "(":
|
|
350
|
+
depth += 1
|
|
351
|
+
max_depth = max(max_depth, depth)
|
|
352
|
+
elif char == ")":
|
|
353
|
+
depth = max(0, depth - 1)
|
|
354
|
+
|
|
355
|
+
metrics["group_count"] = pattern.count("(") - pattern.count("(?")
|
|
356
|
+
metrics["max_nesting"] = max_depth
|
|
357
|
+
|
|
358
|
+
# Extract quantifier information
|
|
359
|
+
for match in self.QUANTIFIER_PATTERN.finditer(pattern):
|
|
360
|
+
metrics["quantifier_count"] += 1
|
|
361
|
+
groups = match.groups()
|
|
362
|
+
if groups[0]: # {n}
|
|
363
|
+
n = int(groups[0])
|
|
364
|
+
metrics["quantifier_ranges"].append((n, n))
|
|
365
|
+
elif groups[1]: # {n,}
|
|
366
|
+
n = int(groups[1])
|
|
367
|
+
metrics["quantifier_ranges"].append((n, None))
|
|
368
|
+
elif groups[2] and groups[3]: # {n,m}
|
|
369
|
+
n, m = int(groups[2]), int(groups[3])
|
|
370
|
+
metrics["quantifier_ranges"].append((n, m))
|
|
371
|
+
|
|
372
|
+
return metrics
|
|
373
|
+
|
|
374
|
+
def _analyze_quantifier_density(self, pattern: str) -> float:
|
|
375
|
+
"""Analyze quantifier density for complexity.
|
|
376
|
+
|
|
377
|
+
High density of quantifiers increases backtracking potential.
|
|
378
|
+
"""
|
|
379
|
+
quantifier_chars = sum(1 for c in pattern if c in "+*?{}")
|
|
380
|
+
if len(pattern) == 0:
|
|
381
|
+
return 0
|
|
382
|
+
density = quantifier_chars / len(pattern)
|
|
383
|
+
return density * 20 # Scale to 0-20
|
|
384
|
+
|
|
385
|
+
def _analyze_alternation_complexity(self, pattern: str) -> float:
|
|
386
|
+
"""Analyze alternation complexity.
|
|
387
|
+
|
|
388
|
+
Overlapping alternatives can cause exponential matching.
|
|
389
|
+
"""
|
|
390
|
+
if "|" not in pattern:
|
|
391
|
+
return 0
|
|
392
|
+
|
|
393
|
+
# Count alternations in quantified groups
|
|
394
|
+
quantified_alt_pattern = r"\([^)]*\|[^)]*\)[+*?]"
|
|
395
|
+
matches = re.findall(quantified_alt_pattern, pattern)
|
|
396
|
+
|
|
397
|
+
return len(matches) * 15 # Each quantified alternation adds risk
|
|
398
|
+
|
|
399
|
+
def _risk_to_score(self, risk: ReDoSRisk) -> float:
|
|
400
|
+
"""Convert risk level to complexity score contribution."""
|
|
401
|
+
scores = {
|
|
402
|
+
ReDoSRisk.NONE: 0,
|
|
403
|
+
ReDoSRisk.LOW: 5,
|
|
404
|
+
ReDoSRisk.MEDIUM: 15,
|
|
405
|
+
ReDoSRisk.HIGH: 30,
|
|
406
|
+
ReDoSRisk.CRITICAL: 50,
|
|
407
|
+
}
|
|
408
|
+
return scores.get(risk, 0)
|
|
409
|
+
|
|
410
|
+
def _generate_recommendation(
|
|
411
|
+
self,
|
|
412
|
+
risk: ReDoSRisk,
|
|
413
|
+
constructs: list[str],
|
|
414
|
+
warnings: list[str],
|
|
415
|
+
) -> str:
|
|
416
|
+
"""Generate recommendation based on analysis.
|
|
417
|
+
|
|
418
|
+
Args:
|
|
419
|
+
risk: Overall risk level
|
|
420
|
+
constructs: Dangerous constructs found
|
|
421
|
+
warnings: Warning messages
|
|
422
|
+
|
|
423
|
+
Returns:
|
|
424
|
+
Recommendation string
|
|
425
|
+
"""
|
|
426
|
+
if risk == ReDoSRisk.NONE:
|
|
427
|
+
return "Pattern appears safe."
|
|
428
|
+
|
|
429
|
+
if risk == ReDoSRisk.LOW:
|
|
430
|
+
return "Pattern has minor concerns but is likely safe for typical inputs."
|
|
431
|
+
|
|
432
|
+
if risk == ReDoSRisk.MEDIUM:
|
|
433
|
+
return (
|
|
434
|
+
"Pattern has moderate risk. Consider simplifying or adding input "
|
|
435
|
+
"length limits."
|
|
436
|
+
)
|
|
437
|
+
|
|
438
|
+
if risk == ReDoSRisk.HIGH:
|
|
439
|
+
recommendations = ["Pattern has high ReDoS risk. Consider:"]
|
|
440
|
+
if "nested_quantifiers" in constructs:
|
|
441
|
+
recommendations.append("- Avoid nested quantifiers like (a+)+")
|
|
442
|
+
if "alternation_with_quantifier" in constructs:
|
|
443
|
+
recommendations.append("- Avoid quantified alternation like (a|b)+")
|
|
444
|
+
if "quantified_backreference" in constructs:
|
|
445
|
+
recommendations.append("- Avoid quantified backreferences like (a+)\\1+")
|
|
446
|
+
recommendations.append("- Use possessive quantifiers if available")
|
|
447
|
+
recommendations.append("- Limit input length strictly")
|
|
448
|
+
return "\n".join(recommendations)
|
|
449
|
+
|
|
450
|
+
# CRITICAL
|
|
451
|
+
return (
|
|
452
|
+
"CRITICAL: Pattern contains known ReDoS vulnerability. "
|
|
453
|
+
"Do NOT use with untrusted input. Rewrite the pattern to avoid "
|
|
454
|
+
"nested quantifiers and overlapping alternatives."
|
|
455
|
+
)
|
|
456
|
+
|
|
457
|
+
|
|
458
|
+
class RegexSafetyChecker:
|
|
459
|
+
"""High-level API for checking regex pattern safety.
|
|
460
|
+
|
|
461
|
+
This class provides a simple interface for validating regex patterns
|
|
462
|
+
before use. It combines static analysis with optional runtime testing.
|
|
463
|
+
|
|
464
|
+
Example:
|
|
465
|
+
checker = RegexSafetyChecker()
|
|
466
|
+
|
|
467
|
+
# Quick check
|
|
468
|
+
is_safe, warning = checker.check(r"^[a-z]+$")
|
|
469
|
+
# is_safe = True, warning = None
|
|
470
|
+
|
|
471
|
+
# Check dangerous pattern
|
|
472
|
+
is_safe, warning = checker.check(r"(a+)+b")
|
|
473
|
+
# is_safe = False, warning = "Nested quantifiers detected..."
|
|
474
|
+
|
|
475
|
+
# Check with custom config
|
|
476
|
+
config = SafeRegexConfig.strict()
|
|
477
|
+
checker = RegexSafetyChecker(config)
|
|
478
|
+
"""
|
|
479
|
+
|
|
480
|
+
def __init__(self, config: SafeRegexConfig | None = None):
|
|
481
|
+
"""Initialize the checker.
|
|
482
|
+
|
|
483
|
+
Args:
|
|
484
|
+
config: Safety configuration
|
|
485
|
+
"""
|
|
486
|
+
self.config = config or SafeRegexConfig()
|
|
487
|
+
self.analyzer = RegexComplexityAnalyzer(self.config)
|
|
488
|
+
|
|
489
|
+
def check(self, pattern: str) -> tuple[bool, str | None]:
|
|
490
|
+
"""Check if a regex pattern is safe to use.
|
|
491
|
+
|
|
492
|
+
Args:
|
|
493
|
+
pattern: Regex pattern to check
|
|
494
|
+
|
|
495
|
+
Returns:
|
|
496
|
+
Tuple of (is_safe, warning_message)
|
|
497
|
+
"""
|
|
498
|
+
# Length check
|
|
499
|
+
if len(pattern) > self.config.max_pattern_length:
|
|
500
|
+
return False, f"Pattern too long ({len(pattern)} > {self.config.max_pattern_length})"
|
|
501
|
+
|
|
502
|
+
# Syntax validation
|
|
503
|
+
try:
|
|
504
|
+
re.compile(pattern)
|
|
505
|
+
except re.error as e:
|
|
506
|
+
return False, f"Invalid regex syntax: {e}"
|
|
507
|
+
|
|
508
|
+
# Analyze for ReDoS
|
|
509
|
+
result = self.analyzer.analyze(pattern)
|
|
510
|
+
|
|
511
|
+
if not result.is_safe:
|
|
512
|
+
warnings = "; ".join(result.warnings) if result.warnings else ""
|
|
513
|
+
constructs = ", ".join(result.dangerous_constructs)
|
|
514
|
+
message = f"ReDoS risk ({result.risk_level.name})"
|
|
515
|
+
if constructs:
|
|
516
|
+
message += f": {constructs}"
|
|
517
|
+
if warnings:
|
|
518
|
+
message += f". {warnings}"
|
|
519
|
+
return False, message
|
|
520
|
+
|
|
521
|
+
return True, None
|
|
522
|
+
|
|
523
|
+
def check_pattern(self, pattern: str) -> tuple[bool, str | None]:
|
|
524
|
+
"""Alias for check() for backward compatibility."""
|
|
525
|
+
return self.check(pattern)
|
|
526
|
+
|
|
527
|
+
def analyze(self, pattern: str) -> RegexAnalysisResult:
|
|
528
|
+
"""Get detailed analysis of a pattern.
|
|
529
|
+
|
|
530
|
+
Args:
|
|
531
|
+
pattern: Regex pattern to analyze
|
|
532
|
+
|
|
533
|
+
Returns:
|
|
534
|
+
RegexAnalysisResult with full details
|
|
535
|
+
"""
|
|
536
|
+
return self.analyzer.analyze(pattern)
|
|
537
|
+
|
|
538
|
+
def validate_and_compile(
|
|
539
|
+
self,
|
|
540
|
+
pattern: str,
|
|
541
|
+
flags: int = 0,
|
|
542
|
+
) -> re.Pattern:
|
|
543
|
+
"""Validate pattern and compile if safe.
|
|
544
|
+
|
|
545
|
+
Args:
|
|
546
|
+
pattern: Regex pattern
|
|
547
|
+
flags: Regex flags
|
|
548
|
+
|
|
549
|
+
Returns:
|
|
550
|
+
Compiled pattern
|
|
551
|
+
|
|
552
|
+
Raises:
|
|
553
|
+
ValueError: If pattern is unsafe or invalid
|
|
554
|
+
"""
|
|
555
|
+
is_safe, warning = self.check(pattern)
|
|
556
|
+
if not is_safe:
|
|
557
|
+
raise ValueError(f"Unsafe regex pattern: {warning}")
|
|
558
|
+
|
|
559
|
+
return re.compile(pattern, flags)
|
|
560
|
+
|
|
561
|
+
|
|
562
|
+
class SafeRegexExecutor:
|
|
563
|
+
"""Execute regex matching with timeout protection.
|
|
564
|
+
|
|
565
|
+
This class wraps regex operations to prevent ReDoS by enforcing
|
|
566
|
+
timeouts on matching operations.
|
|
567
|
+
|
|
568
|
+
Example:
|
|
569
|
+
executor = SafeRegexExecutor(timeout_seconds=1.0)
|
|
570
|
+
|
|
571
|
+
# Safe execution
|
|
572
|
+
result = executor.match(r"^[a-z]+$", "hello")
|
|
573
|
+
# result = <Match object>
|
|
574
|
+
|
|
575
|
+
# Timeout on dangerous pattern
|
|
576
|
+
result = executor.match(r"(a+)+b", "a" * 30)
|
|
577
|
+
# Raises TimeoutError after 1 second
|
|
578
|
+
"""
|
|
579
|
+
|
|
580
|
+
def __init__(
|
|
581
|
+
self,
|
|
582
|
+
timeout_seconds: float = 1.0,
|
|
583
|
+
max_input_length: int = 100_000,
|
|
584
|
+
):
|
|
585
|
+
"""Initialize the executor.
|
|
586
|
+
|
|
587
|
+
Args:
|
|
588
|
+
timeout_seconds: Maximum execution time
|
|
589
|
+
max_input_length: Maximum input string length
|
|
590
|
+
"""
|
|
591
|
+
self.timeout_seconds = timeout_seconds
|
|
592
|
+
self.max_input_length = max_input_length
|
|
593
|
+
|
|
594
|
+
def match(
|
|
595
|
+
self,
|
|
596
|
+
pattern: str | re.Pattern,
|
|
597
|
+
string: str,
|
|
598
|
+
flags: int = 0,
|
|
599
|
+
) -> re.Match | None:
|
|
600
|
+
"""Execute regex match with timeout.
|
|
601
|
+
|
|
602
|
+
Args:
|
|
603
|
+
pattern: Regex pattern or compiled pattern
|
|
604
|
+
string: String to match
|
|
605
|
+
flags: Regex flags (if pattern is string)
|
|
606
|
+
|
|
607
|
+
Returns:
|
|
608
|
+
Match object or None
|
|
609
|
+
|
|
610
|
+
Raises:
|
|
611
|
+
TimeoutError: If matching exceeds timeout
|
|
612
|
+
ValueError: If input exceeds max length
|
|
613
|
+
"""
|
|
614
|
+
if len(string) > self.max_input_length:
|
|
615
|
+
raise ValueError(
|
|
616
|
+
f"Input too long ({len(string)} > {self.max_input_length})"
|
|
617
|
+
)
|
|
618
|
+
|
|
619
|
+
if isinstance(pattern, str):
|
|
620
|
+
compiled = re.compile(pattern, flags)
|
|
621
|
+
else:
|
|
622
|
+
compiled = pattern
|
|
623
|
+
|
|
624
|
+
return self._execute_with_timeout(compiled.match, string)
|
|
625
|
+
|
|
626
|
+
def search(
|
|
627
|
+
self,
|
|
628
|
+
pattern: str | re.Pattern,
|
|
629
|
+
string: str,
|
|
630
|
+
flags: int = 0,
|
|
631
|
+
) -> re.Match | None:
|
|
632
|
+
"""Execute regex search with timeout.
|
|
633
|
+
|
|
634
|
+
Args:
|
|
635
|
+
pattern: Regex pattern or compiled pattern
|
|
636
|
+
string: String to search
|
|
637
|
+
flags: Regex flags
|
|
638
|
+
|
|
639
|
+
Returns:
|
|
640
|
+
Match object or None
|
|
641
|
+
|
|
642
|
+
Raises:
|
|
643
|
+
TimeoutError: If search exceeds timeout
|
|
644
|
+
"""
|
|
645
|
+
if len(string) > self.max_input_length:
|
|
646
|
+
raise ValueError(
|
|
647
|
+
f"Input too long ({len(string)} > {self.max_input_length})"
|
|
648
|
+
)
|
|
649
|
+
|
|
650
|
+
if isinstance(pattern, str):
|
|
651
|
+
compiled = re.compile(pattern, flags)
|
|
652
|
+
else:
|
|
653
|
+
compiled = pattern
|
|
654
|
+
|
|
655
|
+
return self._execute_with_timeout(compiled.search, string)
|
|
656
|
+
|
|
657
|
+
def findall(
|
|
658
|
+
self,
|
|
659
|
+
pattern: str | re.Pattern,
|
|
660
|
+
string: str,
|
|
661
|
+
flags: int = 0,
|
|
662
|
+
) -> list[Any]:
|
|
663
|
+
"""Execute regex findall with timeout.
|
|
664
|
+
|
|
665
|
+
Args:
|
|
666
|
+
pattern: Regex pattern or compiled pattern
|
|
667
|
+
string: String to search
|
|
668
|
+
flags: Regex flags
|
|
669
|
+
|
|
670
|
+
Returns:
|
|
671
|
+
List of matches
|
|
672
|
+
|
|
673
|
+
Raises:
|
|
674
|
+
TimeoutError: If operation exceeds timeout
|
|
675
|
+
"""
|
|
676
|
+
if len(string) > self.max_input_length:
|
|
677
|
+
raise ValueError(
|
|
678
|
+
f"Input too long ({len(string)} > {self.max_input_length})"
|
|
679
|
+
)
|
|
680
|
+
|
|
681
|
+
if isinstance(pattern, str):
|
|
682
|
+
compiled = re.compile(pattern, flags)
|
|
683
|
+
else:
|
|
684
|
+
compiled = pattern
|
|
685
|
+
|
|
686
|
+
return self._execute_with_timeout(compiled.findall, string)
|
|
687
|
+
|
|
688
|
+
def _execute_with_timeout(
|
|
689
|
+
self,
|
|
690
|
+
func: Callable,
|
|
691
|
+
*args: Any,
|
|
692
|
+
) -> Any:
|
|
693
|
+
"""Execute function with timeout.
|
|
694
|
+
|
|
695
|
+
Uses threading for cross-platform timeout support.
|
|
696
|
+
|
|
697
|
+
Args:
|
|
698
|
+
func: Function to execute
|
|
699
|
+
*args: Function arguments
|
|
700
|
+
|
|
701
|
+
Returns:
|
|
702
|
+
Function result
|
|
703
|
+
|
|
704
|
+
Raises:
|
|
705
|
+
TimeoutError: If execution exceeds timeout
|
|
706
|
+
"""
|
|
707
|
+
result: list[Any] = [None]
|
|
708
|
+
exception: list[Exception | None] = [None]
|
|
709
|
+
completed = threading.Event()
|
|
710
|
+
|
|
711
|
+
def target() -> None:
|
|
712
|
+
try:
|
|
713
|
+
result[0] = func(*args)
|
|
714
|
+
except Exception as e:
|
|
715
|
+
exception[0] = e
|
|
716
|
+
finally:
|
|
717
|
+
completed.set()
|
|
718
|
+
|
|
719
|
+
thread = threading.Thread(target=target, daemon=True)
|
|
720
|
+
thread.start()
|
|
721
|
+
|
|
722
|
+
if not completed.wait(timeout=self.timeout_seconds):
|
|
723
|
+
raise TimeoutError(
|
|
724
|
+
f"Regex operation timed out after {self.timeout_seconds}s"
|
|
725
|
+
)
|
|
726
|
+
|
|
727
|
+
if exception[0]:
|
|
728
|
+
raise exception[0]
|
|
729
|
+
|
|
730
|
+
return result[0]
|
|
731
|
+
|
|
732
|
+
|
|
733
|
+
# ============================================================================
|
|
734
|
+
# Module-level convenience functions
|
|
735
|
+
# ============================================================================
|
|
736
|
+
|
|
737
|
+
|
|
738
|
+
def check_regex_safety(
|
|
739
|
+
pattern: str,
|
|
740
|
+
config: SafeRegexConfig | None = None,
|
|
741
|
+
) -> tuple[bool, str | None]:
|
|
742
|
+
"""Check if a regex pattern is safe to use.
|
|
743
|
+
|
|
744
|
+
Args:
|
|
745
|
+
pattern: Regex pattern to check
|
|
746
|
+
config: Optional safety configuration
|
|
747
|
+
|
|
748
|
+
Returns:
|
|
749
|
+
Tuple of (is_safe, warning_message)
|
|
750
|
+
|
|
751
|
+
Example:
|
|
752
|
+
is_safe, warning = check_regex_safety(r"(a+)+b")
|
|
753
|
+
# is_safe = False, warning = "ReDoS risk (CRITICAL): nested_quantifiers"
|
|
754
|
+
"""
|
|
755
|
+
checker = RegexSafetyChecker(config)
|
|
756
|
+
return checker.check(pattern)
|
|
757
|
+
|
|
758
|
+
|
|
759
|
+
def analyze_regex_complexity(
|
|
760
|
+
pattern: str,
|
|
761
|
+
config: SafeRegexConfig | None = None,
|
|
762
|
+
) -> RegexAnalysisResult:
|
|
763
|
+
"""Get detailed complexity analysis of a regex pattern.
|
|
764
|
+
|
|
765
|
+
Args:
|
|
766
|
+
pattern: Regex pattern to analyze
|
|
767
|
+
config: Optional safety configuration
|
|
768
|
+
|
|
769
|
+
Returns:
|
|
770
|
+
RegexAnalysisResult with full analysis
|
|
771
|
+
|
|
772
|
+
Example:
|
|
773
|
+
result = analyze_regex_complexity(r"^[a-z]+@[a-z]+\\.com$")
|
|
774
|
+
print(result.risk_level) # ReDoSRisk.LOW
|
|
775
|
+
print(result.complexity_score) # 2.5
|
|
776
|
+
"""
|
|
777
|
+
analyzer = RegexComplexityAnalyzer(config)
|
|
778
|
+
return analyzer.analyze(pattern)
|
|
779
|
+
|
|
780
|
+
|
|
781
|
+
def create_safe_regex(
|
|
782
|
+
pattern: str,
|
|
783
|
+
flags: int = 0,
|
|
784
|
+
config: SafeRegexConfig | None = None,
|
|
785
|
+
) -> re.Pattern:
|
|
786
|
+
"""Create a compiled regex pattern after safety validation.
|
|
787
|
+
|
|
788
|
+
Args:
|
|
789
|
+
pattern: Regex pattern to compile
|
|
790
|
+
flags: Regex flags
|
|
791
|
+
config: Optional safety configuration
|
|
792
|
+
|
|
793
|
+
Returns:
|
|
794
|
+
Compiled regex pattern
|
|
795
|
+
|
|
796
|
+
Raises:
|
|
797
|
+
ValueError: If pattern is unsafe or invalid
|
|
798
|
+
|
|
799
|
+
Example:
|
|
800
|
+
try:
|
|
801
|
+
compiled = create_safe_regex(r"^[a-z]+$")
|
|
802
|
+
# Use compiled pattern...
|
|
803
|
+
except ValueError as e:
|
|
804
|
+
print(f"Unsafe pattern: {e}")
|
|
805
|
+
"""
|
|
806
|
+
checker = RegexSafetyChecker(config)
|
|
807
|
+
return checker.validate_and_compile(pattern, flags)
|
|
808
|
+
|
|
809
|
+
|
|
810
|
+
def safe_match(
|
|
811
|
+
pattern: str,
|
|
812
|
+
string: str,
|
|
813
|
+
timeout: float = 1.0,
|
|
814
|
+
flags: int = 0,
|
|
815
|
+
) -> re.Match | None:
|
|
816
|
+
"""Execute regex match with timeout protection.
|
|
817
|
+
|
|
818
|
+
Args:
|
|
819
|
+
pattern: Regex pattern
|
|
820
|
+
string: String to match
|
|
821
|
+
timeout: Maximum execution time in seconds
|
|
822
|
+
flags: Regex flags
|
|
823
|
+
|
|
824
|
+
Returns:
|
|
825
|
+
Match object or None
|
|
826
|
+
|
|
827
|
+
Raises:
|
|
828
|
+
TimeoutError: If matching exceeds timeout
|
|
829
|
+
ValueError: If input is too long
|
|
830
|
+
|
|
831
|
+
Example:
|
|
832
|
+
result = safe_match(r"^[a-z]+$", "hello", timeout=0.5)
|
|
833
|
+
if result:
|
|
834
|
+
print("Matched!")
|
|
835
|
+
"""
|
|
836
|
+
executor = SafeRegexExecutor(timeout_seconds=timeout)
|
|
837
|
+
return executor.match(pattern, string, flags)
|
|
838
|
+
|
|
839
|
+
|
|
840
|
+
def safe_search(
|
|
841
|
+
pattern: str,
|
|
842
|
+
string: str,
|
|
843
|
+
timeout: float = 1.0,
|
|
844
|
+
flags: int = 0,
|
|
845
|
+
) -> re.Match | None:
|
|
846
|
+
"""Execute regex search with timeout protection.
|
|
847
|
+
|
|
848
|
+
Args:
|
|
849
|
+
pattern: Regex pattern
|
|
850
|
+
string: String to search
|
|
851
|
+
timeout: Maximum execution time in seconds
|
|
852
|
+
flags: Regex flags
|
|
853
|
+
|
|
854
|
+
Returns:
|
|
855
|
+
Match object or None
|
|
856
|
+
|
|
857
|
+
Raises:
|
|
858
|
+
TimeoutError: If search exceeds timeout
|
|
859
|
+
"""
|
|
860
|
+
executor = SafeRegexExecutor(timeout_seconds=timeout)
|
|
861
|
+
return executor.search(pattern, string, flags)
|