truthound 1.0.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- truthound/__init__.py +162 -0
- truthound/adapters.py +100 -0
- truthound/api.py +365 -0
- truthound/audit/__init__.py +248 -0
- truthound/audit/core.py +967 -0
- truthound/audit/filters.py +620 -0
- truthound/audit/formatters.py +707 -0
- truthound/audit/logger.py +902 -0
- truthound/audit/middleware.py +571 -0
- truthound/audit/storage.py +1083 -0
- truthound/benchmark/__init__.py +123 -0
- truthound/benchmark/base.py +757 -0
- truthound/benchmark/comparison.py +635 -0
- truthound/benchmark/generators.py +706 -0
- truthound/benchmark/reporters.py +718 -0
- truthound/benchmark/runner.py +635 -0
- truthound/benchmark/scenarios.py +712 -0
- truthound/cache.py +252 -0
- truthound/checkpoint/__init__.py +136 -0
- truthound/checkpoint/actions/__init__.py +164 -0
- truthound/checkpoint/actions/base.py +324 -0
- truthound/checkpoint/actions/custom.py +234 -0
- truthound/checkpoint/actions/discord_notify.py +290 -0
- truthound/checkpoint/actions/email_notify.py +405 -0
- truthound/checkpoint/actions/github_action.py +406 -0
- truthound/checkpoint/actions/opsgenie.py +1499 -0
- truthound/checkpoint/actions/pagerduty.py +226 -0
- truthound/checkpoint/actions/slack_notify.py +233 -0
- truthound/checkpoint/actions/store_result.py +249 -0
- truthound/checkpoint/actions/teams_notify.py +1570 -0
- truthound/checkpoint/actions/telegram_notify.py +419 -0
- truthound/checkpoint/actions/update_docs.py +552 -0
- truthound/checkpoint/actions/webhook.py +293 -0
- truthound/checkpoint/analytics/__init__.py +147 -0
- truthound/checkpoint/analytics/aggregations/__init__.py +23 -0
- truthound/checkpoint/analytics/aggregations/rollup.py +481 -0
- truthound/checkpoint/analytics/aggregations/time_bucket.py +306 -0
- truthound/checkpoint/analytics/analyzers/__init__.py +17 -0
- truthound/checkpoint/analytics/analyzers/anomaly.py +386 -0
- truthound/checkpoint/analytics/analyzers/base.py +270 -0
- truthound/checkpoint/analytics/analyzers/forecast.py +421 -0
- truthound/checkpoint/analytics/analyzers/trend.py +314 -0
- truthound/checkpoint/analytics/models.py +292 -0
- truthound/checkpoint/analytics/protocols.py +549 -0
- truthound/checkpoint/analytics/service.py +718 -0
- truthound/checkpoint/analytics/stores/__init__.py +16 -0
- truthound/checkpoint/analytics/stores/base.py +306 -0
- truthound/checkpoint/analytics/stores/memory_store.py +353 -0
- truthound/checkpoint/analytics/stores/sqlite_store.py +557 -0
- truthound/checkpoint/analytics/stores/timescale_store.py +501 -0
- truthound/checkpoint/async_actions.py +794 -0
- truthound/checkpoint/async_base.py +708 -0
- truthound/checkpoint/async_checkpoint.py +617 -0
- truthound/checkpoint/async_runner.py +639 -0
- truthound/checkpoint/checkpoint.py +527 -0
- truthound/checkpoint/ci/__init__.py +61 -0
- truthound/checkpoint/ci/detector.py +355 -0
- truthound/checkpoint/ci/reporter.py +436 -0
- truthound/checkpoint/ci/templates.py +454 -0
- truthound/checkpoint/circuitbreaker/__init__.py +133 -0
- truthound/checkpoint/circuitbreaker/breaker.py +542 -0
- truthound/checkpoint/circuitbreaker/core.py +252 -0
- truthound/checkpoint/circuitbreaker/detection.py +459 -0
- truthound/checkpoint/circuitbreaker/middleware.py +389 -0
- truthound/checkpoint/circuitbreaker/registry.py +357 -0
- truthound/checkpoint/distributed/__init__.py +139 -0
- truthound/checkpoint/distributed/backends/__init__.py +35 -0
- truthound/checkpoint/distributed/backends/celery_backend.py +503 -0
- truthound/checkpoint/distributed/backends/kubernetes_backend.py +696 -0
- truthound/checkpoint/distributed/backends/local_backend.py +397 -0
- truthound/checkpoint/distributed/backends/ray_backend.py +625 -0
- truthound/checkpoint/distributed/base.py +774 -0
- truthound/checkpoint/distributed/orchestrator.py +765 -0
- truthound/checkpoint/distributed/protocols.py +842 -0
- truthound/checkpoint/distributed/registry.py +449 -0
- truthound/checkpoint/idempotency/__init__.py +120 -0
- truthound/checkpoint/idempotency/core.py +295 -0
- truthound/checkpoint/idempotency/fingerprint.py +454 -0
- truthound/checkpoint/idempotency/locking.py +604 -0
- truthound/checkpoint/idempotency/service.py +592 -0
- truthound/checkpoint/idempotency/stores.py +653 -0
- truthound/checkpoint/monitoring/__init__.py +134 -0
- truthound/checkpoint/monitoring/aggregators/__init__.py +15 -0
- truthound/checkpoint/monitoring/aggregators/base.py +372 -0
- truthound/checkpoint/monitoring/aggregators/realtime.py +300 -0
- truthound/checkpoint/monitoring/aggregators/window.py +493 -0
- truthound/checkpoint/monitoring/collectors/__init__.py +17 -0
- truthound/checkpoint/monitoring/collectors/base.py +257 -0
- truthound/checkpoint/monitoring/collectors/memory_collector.py +617 -0
- truthound/checkpoint/monitoring/collectors/prometheus_collector.py +451 -0
- truthound/checkpoint/monitoring/collectors/redis_collector.py +518 -0
- truthound/checkpoint/monitoring/events.py +410 -0
- truthound/checkpoint/monitoring/protocols.py +636 -0
- truthound/checkpoint/monitoring/service.py +578 -0
- truthound/checkpoint/monitoring/views/__init__.py +17 -0
- truthound/checkpoint/monitoring/views/base.py +172 -0
- truthound/checkpoint/monitoring/views/queue_view.py +220 -0
- truthound/checkpoint/monitoring/views/task_view.py +240 -0
- truthound/checkpoint/monitoring/views/worker_view.py +263 -0
- truthound/checkpoint/registry.py +337 -0
- truthound/checkpoint/runner.py +356 -0
- truthound/checkpoint/transaction/__init__.py +133 -0
- truthound/checkpoint/transaction/base.py +389 -0
- truthound/checkpoint/transaction/compensatable.py +537 -0
- truthound/checkpoint/transaction/coordinator.py +576 -0
- truthound/checkpoint/transaction/executor.py +622 -0
- truthound/checkpoint/transaction/idempotency.py +534 -0
- truthound/checkpoint/transaction/saga/__init__.py +143 -0
- truthound/checkpoint/transaction/saga/builder.py +584 -0
- truthound/checkpoint/transaction/saga/definition.py +515 -0
- truthound/checkpoint/transaction/saga/event_store.py +542 -0
- truthound/checkpoint/transaction/saga/patterns.py +833 -0
- truthound/checkpoint/transaction/saga/runner.py +718 -0
- truthound/checkpoint/transaction/saga/state_machine.py +793 -0
- truthound/checkpoint/transaction/saga/strategies.py +780 -0
- truthound/checkpoint/transaction/saga/testing.py +886 -0
- truthound/checkpoint/triggers/__init__.py +58 -0
- truthound/checkpoint/triggers/base.py +237 -0
- truthound/checkpoint/triggers/event.py +385 -0
- truthound/checkpoint/triggers/schedule.py +355 -0
- truthound/cli.py +2358 -0
- truthound/cli_modules/__init__.py +124 -0
- truthound/cli_modules/advanced/__init__.py +45 -0
- truthound/cli_modules/advanced/benchmark.py +343 -0
- truthound/cli_modules/advanced/docs.py +225 -0
- truthound/cli_modules/advanced/lineage.py +209 -0
- truthound/cli_modules/advanced/ml.py +320 -0
- truthound/cli_modules/advanced/realtime.py +196 -0
- truthound/cli_modules/checkpoint/__init__.py +46 -0
- truthound/cli_modules/checkpoint/init.py +114 -0
- truthound/cli_modules/checkpoint/list.py +71 -0
- truthound/cli_modules/checkpoint/run.py +159 -0
- truthound/cli_modules/checkpoint/validate.py +67 -0
- truthound/cli_modules/common/__init__.py +71 -0
- truthound/cli_modules/common/errors.py +414 -0
- truthound/cli_modules/common/options.py +419 -0
- truthound/cli_modules/common/output.py +507 -0
- truthound/cli_modules/common/protocol.py +552 -0
- truthound/cli_modules/core/__init__.py +48 -0
- truthound/cli_modules/core/check.py +123 -0
- truthound/cli_modules/core/compare.py +104 -0
- truthound/cli_modules/core/learn.py +57 -0
- truthound/cli_modules/core/mask.py +77 -0
- truthound/cli_modules/core/profile.py +65 -0
- truthound/cli_modules/core/scan.py +61 -0
- truthound/cli_modules/profiler/__init__.py +51 -0
- truthound/cli_modules/profiler/auto_profile.py +175 -0
- truthound/cli_modules/profiler/metadata.py +107 -0
- truthound/cli_modules/profiler/suite.py +283 -0
- truthound/cli_modules/registry.py +431 -0
- truthound/cli_modules/scaffolding/__init__.py +89 -0
- truthound/cli_modules/scaffolding/base.py +631 -0
- truthound/cli_modules/scaffolding/commands.py +545 -0
- truthound/cli_modules/scaffolding/plugins.py +1072 -0
- truthound/cli_modules/scaffolding/reporters.py +594 -0
- truthound/cli_modules/scaffolding/validators.py +1127 -0
- truthound/common/__init__.py +18 -0
- truthound/common/resilience/__init__.py +130 -0
- truthound/common/resilience/bulkhead.py +266 -0
- truthound/common/resilience/circuit_breaker.py +516 -0
- truthound/common/resilience/composite.py +332 -0
- truthound/common/resilience/config.py +292 -0
- truthound/common/resilience/protocols.py +217 -0
- truthound/common/resilience/rate_limiter.py +404 -0
- truthound/common/resilience/retry.py +341 -0
- truthound/datadocs/__init__.py +260 -0
- truthound/datadocs/base.py +571 -0
- truthound/datadocs/builder.py +761 -0
- truthound/datadocs/charts.py +764 -0
- truthound/datadocs/dashboard/__init__.py +63 -0
- truthound/datadocs/dashboard/app.py +576 -0
- truthound/datadocs/dashboard/components.py +584 -0
- truthound/datadocs/dashboard/state.py +240 -0
- truthound/datadocs/engine/__init__.py +46 -0
- truthound/datadocs/engine/context.py +376 -0
- truthound/datadocs/engine/pipeline.py +618 -0
- truthound/datadocs/engine/registry.py +469 -0
- truthound/datadocs/exporters/__init__.py +49 -0
- truthound/datadocs/exporters/base.py +198 -0
- truthound/datadocs/exporters/html.py +178 -0
- truthound/datadocs/exporters/json_exporter.py +253 -0
- truthound/datadocs/exporters/markdown.py +284 -0
- truthound/datadocs/exporters/pdf.py +392 -0
- truthound/datadocs/i18n/__init__.py +86 -0
- truthound/datadocs/i18n/catalog.py +960 -0
- truthound/datadocs/i18n/formatting.py +505 -0
- truthound/datadocs/i18n/loader.py +256 -0
- truthound/datadocs/i18n/plurals.py +378 -0
- truthound/datadocs/renderers/__init__.py +42 -0
- truthound/datadocs/renderers/base.py +401 -0
- truthound/datadocs/renderers/custom.py +342 -0
- truthound/datadocs/renderers/jinja.py +697 -0
- truthound/datadocs/sections.py +736 -0
- truthound/datadocs/styles.py +931 -0
- truthound/datadocs/themes/__init__.py +101 -0
- truthound/datadocs/themes/base.py +336 -0
- truthound/datadocs/themes/default.py +417 -0
- truthound/datadocs/themes/enterprise.py +419 -0
- truthound/datadocs/themes/loader.py +336 -0
- truthound/datadocs/themes.py +301 -0
- truthound/datadocs/transformers/__init__.py +57 -0
- truthound/datadocs/transformers/base.py +268 -0
- truthound/datadocs/transformers/enrichers.py +544 -0
- truthound/datadocs/transformers/filters.py +447 -0
- truthound/datadocs/transformers/i18n.py +468 -0
- truthound/datadocs/versioning/__init__.py +62 -0
- truthound/datadocs/versioning/diff.py +639 -0
- truthound/datadocs/versioning/storage.py +497 -0
- truthound/datadocs/versioning/version.py +358 -0
- truthound/datasources/__init__.py +223 -0
- truthound/datasources/_async_protocols.py +222 -0
- truthound/datasources/_protocols.py +159 -0
- truthound/datasources/adapters.py +428 -0
- truthound/datasources/async_base.py +599 -0
- truthound/datasources/async_factory.py +511 -0
- truthound/datasources/base.py +516 -0
- truthound/datasources/factory.py +433 -0
- truthound/datasources/nosql/__init__.py +47 -0
- truthound/datasources/nosql/base.py +487 -0
- truthound/datasources/nosql/elasticsearch.py +801 -0
- truthound/datasources/nosql/mongodb.py +636 -0
- truthound/datasources/pandas_optimized.py +582 -0
- truthound/datasources/pandas_source.py +216 -0
- truthound/datasources/polars_source.py +395 -0
- truthound/datasources/spark_source.py +479 -0
- truthound/datasources/sql/__init__.py +154 -0
- truthound/datasources/sql/base.py +710 -0
- truthound/datasources/sql/bigquery.py +410 -0
- truthound/datasources/sql/cloud_base.py +199 -0
- truthound/datasources/sql/databricks.py +471 -0
- truthound/datasources/sql/mysql.py +316 -0
- truthound/datasources/sql/oracle.py +427 -0
- truthound/datasources/sql/postgresql.py +321 -0
- truthound/datasources/sql/redshift.py +479 -0
- truthound/datasources/sql/snowflake.py +439 -0
- truthound/datasources/sql/sqlite.py +286 -0
- truthound/datasources/sql/sqlserver.py +437 -0
- truthound/datasources/streaming/__init__.py +47 -0
- truthound/datasources/streaming/base.py +350 -0
- truthound/datasources/streaming/kafka.py +670 -0
- truthound/decorators.py +98 -0
- truthound/docs/__init__.py +69 -0
- truthound/docs/extractor.py +971 -0
- truthound/docs/generator.py +601 -0
- truthound/docs/parser.py +1037 -0
- truthound/docs/renderer.py +999 -0
- truthound/drift/__init__.py +22 -0
- truthound/drift/compare.py +189 -0
- truthound/drift/detectors.py +464 -0
- truthound/drift/report.py +160 -0
- truthound/execution/__init__.py +65 -0
- truthound/execution/_protocols.py +324 -0
- truthound/execution/base.py +576 -0
- truthound/execution/distributed/__init__.py +179 -0
- truthound/execution/distributed/aggregations.py +731 -0
- truthound/execution/distributed/arrow_bridge.py +817 -0
- truthound/execution/distributed/base.py +550 -0
- truthound/execution/distributed/dask_engine.py +976 -0
- truthound/execution/distributed/mixins.py +766 -0
- truthound/execution/distributed/protocols.py +756 -0
- truthound/execution/distributed/ray_engine.py +1127 -0
- truthound/execution/distributed/registry.py +446 -0
- truthound/execution/distributed/spark_engine.py +1011 -0
- truthound/execution/distributed/validator_adapter.py +682 -0
- truthound/execution/pandas_engine.py +401 -0
- truthound/execution/polars_engine.py +497 -0
- truthound/execution/pushdown/__init__.py +230 -0
- truthound/execution/pushdown/ast.py +1550 -0
- truthound/execution/pushdown/builder.py +1550 -0
- truthound/execution/pushdown/dialects.py +1072 -0
- truthound/execution/pushdown/executor.py +829 -0
- truthound/execution/pushdown/optimizer.py +1041 -0
- truthound/execution/sql_engine.py +518 -0
- truthound/infrastructure/__init__.py +189 -0
- truthound/infrastructure/audit.py +1515 -0
- truthound/infrastructure/config.py +1133 -0
- truthound/infrastructure/encryption.py +1132 -0
- truthound/infrastructure/logging.py +1503 -0
- truthound/infrastructure/metrics.py +1220 -0
- truthound/lineage/__init__.py +89 -0
- truthound/lineage/base.py +746 -0
- truthound/lineage/impact_analysis.py +474 -0
- truthound/lineage/integrations/__init__.py +22 -0
- truthound/lineage/integrations/openlineage.py +548 -0
- truthound/lineage/tracker.py +512 -0
- truthound/lineage/visualization/__init__.py +33 -0
- truthound/lineage/visualization/protocols.py +145 -0
- truthound/lineage/visualization/renderers/__init__.py +20 -0
- truthound/lineage/visualization/renderers/cytoscape.py +329 -0
- truthound/lineage/visualization/renderers/d3.py +331 -0
- truthound/lineage/visualization/renderers/graphviz.py +276 -0
- truthound/lineage/visualization/renderers/mermaid.py +308 -0
- truthound/maskers.py +113 -0
- truthound/ml/__init__.py +124 -0
- truthound/ml/anomaly_models/__init__.py +31 -0
- truthound/ml/anomaly_models/ensemble.py +362 -0
- truthound/ml/anomaly_models/isolation_forest.py +444 -0
- truthound/ml/anomaly_models/statistical.py +392 -0
- truthound/ml/base.py +1178 -0
- truthound/ml/drift_detection/__init__.py +26 -0
- truthound/ml/drift_detection/concept.py +381 -0
- truthound/ml/drift_detection/distribution.py +361 -0
- truthound/ml/drift_detection/feature.py +442 -0
- truthound/ml/drift_detection/multivariate.py +495 -0
- truthound/ml/monitoring/__init__.py +88 -0
- truthound/ml/monitoring/alerting/__init__.py +33 -0
- truthound/ml/monitoring/alerting/handlers.py +427 -0
- truthound/ml/monitoring/alerting/rules.py +508 -0
- truthound/ml/monitoring/collectors/__init__.py +19 -0
- truthound/ml/monitoring/collectors/composite.py +105 -0
- truthound/ml/monitoring/collectors/drift.py +324 -0
- truthound/ml/monitoring/collectors/performance.py +179 -0
- truthound/ml/monitoring/collectors/quality.py +369 -0
- truthound/ml/monitoring/monitor.py +536 -0
- truthound/ml/monitoring/protocols.py +451 -0
- truthound/ml/monitoring/stores/__init__.py +15 -0
- truthound/ml/monitoring/stores/memory.py +201 -0
- truthound/ml/monitoring/stores/prometheus.py +296 -0
- truthound/ml/rule_learning/__init__.py +25 -0
- truthound/ml/rule_learning/constraint_miner.py +443 -0
- truthound/ml/rule_learning/pattern_learner.py +499 -0
- truthound/ml/rule_learning/profile_learner.py +462 -0
- truthound/multitenancy/__init__.py +326 -0
- truthound/multitenancy/core.py +852 -0
- truthound/multitenancy/integration.py +597 -0
- truthound/multitenancy/isolation.py +630 -0
- truthound/multitenancy/manager.py +770 -0
- truthound/multitenancy/middleware.py +765 -0
- truthound/multitenancy/quota.py +537 -0
- truthound/multitenancy/resolvers.py +603 -0
- truthound/multitenancy/storage.py +703 -0
- truthound/observability/__init__.py +307 -0
- truthound/observability/context.py +531 -0
- truthound/observability/instrumentation.py +611 -0
- truthound/observability/logging.py +887 -0
- truthound/observability/metrics.py +1157 -0
- truthound/observability/tracing/__init__.py +178 -0
- truthound/observability/tracing/baggage.py +310 -0
- truthound/observability/tracing/config.py +426 -0
- truthound/observability/tracing/exporter.py +787 -0
- truthound/observability/tracing/integration.py +1018 -0
- truthound/observability/tracing/otel/__init__.py +146 -0
- truthound/observability/tracing/otel/adapter.py +982 -0
- truthound/observability/tracing/otel/bridge.py +1177 -0
- truthound/observability/tracing/otel/compat.py +681 -0
- truthound/observability/tracing/otel/config.py +691 -0
- truthound/observability/tracing/otel/detection.py +327 -0
- truthound/observability/tracing/otel/protocols.py +426 -0
- truthound/observability/tracing/processor.py +561 -0
- truthound/observability/tracing/propagator.py +757 -0
- truthound/observability/tracing/provider.py +569 -0
- truthound/observability/tracing/resource.py +515 -0
- truthound/observability/tracing/sampler.py +487 -0
- truthound/observability/tracing/span.py +676 -0
- truthound/plugins/__init__.py +198 -0
- truthound/plugins/base.py +599 -0
- truthound/plugins/cli.py +680 -0
- truthound/plugins/dependencies/__init__.py +42 -0
- truthound/plugins/dependencies/graph.py +422 -0
- truthound/plugins/dependencies/resolver.py +417 -0
- truthound/plugins/discovery.py +379 -0
- truthound/plugins/docs/__init__.py +46 -0
- truthound/plugins/docs/extractor.py +444 -0
- truthound/plugins/docs/renderer.py +499 -0
- truthound/plugins/enterprise_manager.py +877 -0
- truthound/plugins/examples/__init__.py +19 -0
- truthound/plugins/examples/custom_validators.py +317 -0
- truthound/plugins/examples/slack_notifier.py +312 -0
- truthound/plugins/examples/xml_reporter.py +254 -0
- truthound/plugins/hooks.py +558 -0
- truthound/plugins/lifecycle/__init__.py +43 -0
- truthound/plugins/lifecycle/hot_reload.py +402 -0
- truthound/plugins/lifecycle/manager.py +371 -0
- truthound/plugins/manager.py +736 -0
- truthound/plugins/registry.py +338 -0
- truthound/plugins/security/__init__.py +93 -0
- truthound/plugins/security/exceptions.py +332 -0
- truthound/plugins/security/policies.py +348 -0
- truthound/plugins/security/protocols.py +643 -0
- truthound/plugins/security/sandbox/__init__.py +45 -0
- truthound/plugins/security/sandbox/context.py +158 -0
- truthound/plugins/security/sandbox/engines/__init__.py +19 -0
- truthound/plugins/security/sandbox/engines/container.py +379 -0
- truthound/plugins/security/sandbox/engines/noop.py +144 -0
- truthound/plugins/security/sandbox/engines/process.py +336 -0
- truthound/plugins/security/sandbox/factory.py +211 -0
- truthound/plugins/security/signing/__init__.py +57 -0
- truthound/plugins/security/signing/service.py +330 -0
- truthound/plugins/security/signing/trust_store.py +368 -0
- truthound/plugins/security/signing/verifier.py +459 -0
- truthound/plugins/versioning/__init__.py +41 -0
- truthound/plugins/versioning/constraints.py +297 -0
- truthound/plugins/versioning/resolver.py +329 -0
- truthound/profiler/__init__.py +1729 -0
- truthound/profiler/_lazy.py +452 -0
- truthound/profiler/ab_testing/__init__.py +80 -0
- truthound/profiler/ab_testing/analysis.py +449 -0
- truthound/profiler/ab_testing/base.py +257 -0
- truthound/profiler/ab_testing/experiment.py +395 -0
- truthound/profiler/ab_testing/tracking.py +368 -0
- truthound/profiler/auto_threshold.py +1170 -0
- truthound/profiler/base.py +579 -0
- truthound/profiler/cache_patterns.py +911 -0
- truthound/profiler/caching.py +1303 -0
- truthound/profiler/column_profiler.py +712 -0
- truthound/profiler/comparison.py +1007 -0
- truthound/profiler/custom_patterns.py +1170 -0
- truthound/profiler/dashboard/__init__.py +50 -0
- truthound/profiler/dashboard/app.py +476 -0
- truthound/profiler/dashboard/components.py +457 -0
- truthound/profiler/dashboard/config.py +72 -0
- truthound/profiler/distributed/__init__.py +83 -0
- truthound/profiler/distributed/base.py +281 -0
- truthound/profiler/distributed/dask_backend.py +498 -0
- truthound/profiler/distributed/local_backend.py +293 -0
- truthound/profiler/distributed/profiler.py +304 -0
- truthound/profiler/distributed/ray_backend.py +374 -0
- truthound/profiler/distributed/spark_backend.py +375 -0
- truthound/profiler/distributed.py +1366 -0
- truthound/profiler/enterprise_sampling.py +1065 -0
- truthound/profiler/errors.py +488 -0
- truthound/profiler/evolution/__init__.py +91 -0
- truthound/profiler/evolution/alerts.py +426 -0
- truthound/profiler/evolution/changes.py +206 -0
- truthound/profiler/evolution/compatibility.py +365 -0
- truthound/profiler/evolution/detector.py +372 -0
- truthound/profiler/evolution/protocols.py +121 -0
- truthound/profiler/generators/__init__.py +48 -0
- truthound/profiler/generators/base.py +384 -0
- truthound/profiler/generators/ml_rules.py +375 -0
- truthound/profiler/generators/pattern_rules.py +384 -0
- truthound/profiler/generators/schema_rules.py +267 -0
- truthound/profiler/generators/stats_rules.py +324 -0
- truthound/profiler/generators/suite_generator.py +857 -0
- truthound/profiler/i18n.py +1542 -0
- truthound/profiler/incremental.py +554 -0
- truthound/profiler/incremental_validation.py +1710 -0
- truthound/profiler/integration/__init__.py +73 -0
- truthound/profiler/integration/adapters.py +345 -0
- truthound/profiler/integration/context.py +371 -0
- truthound/profiler/integration/executor.py +527 -0
- truthound/profiler/integration/naming.py +75 -0
- truthound/profiler/integration/protocols.py +243 -0
- truthound/profiler/memory.py +1185 -0
- truthound/profiler/migration/__init__.py +60 -0
- truthound/profiler/migration/base.py +345 -0
- truthound/profiler/migration/manager.py +444 -0
- truthound/profiler/migration/v1_0_to_v1_1.py +484 -0
- truthound/profiler/ml/__init__.py +73 -0
- truthound/profiler/ml/base.py +244 -0
- truthound/profiler/ml/classifier.py +507 -0
- truthound/profiler/ml/feature_extraction.py +604 -0
- truthound/profiler/ml/pretrained.py +448 -0
- truthound/profiler/ml_inference.py +1276 -0
- truthound/profiler/native_patterns.py +815 -0
- truthound/profiler/observability.py +1184 -0
- truthound/profiler/process_timeout.py +1566 -0
- truthound/profiler/progress.py +568 -0
- truthound/profiler/progress_callbacks.py +1734 -0
- truthound/profiler/quality.py +1345 -0
- truthound/profiler/resilience.py +1180 -0
- truthound/profiler/sampled_matcher.py +794 -0
- truthound/profiler/sampling.py +1288 -0
- truthound/profiler/scheduling/__init__.py +82 -0
- truthound/profiler/scheduling/protocols.py +214 -0
- truthound/profiler/scheduling/scheduler.py +474 -0
- truthound/profiler/scheduling/storage.py +457 -0
- truthound/profiler/scheduling/triggers.py +449 -0
- truthound/profiler/schema.py +603 -0
- truthound/profiler/streaming.py +685 -0
- truthound/profiler/streaming_patterns.py +1354 -0
- truthound/profiler/suite_cli.py +625 -0
- truthound/profiler/suite_config.py +789 -0
- truthound/profiler/suite_export.py +1268 -0
- truthound/profiler/table_profiler.py +547 -0
- truthound/profiler/timeout.py +565 -0
- truthound/profiler/validation.py +1532 -0
- truthound/profiler/visualization/__init__.py +118 -0
- truthound/profiler/visualization/base.py +346 -0
- truthound/profiler/visualization/generator.py +1259 -0
- truthound/profiler/visualization/plotly_renderer.py +811 -0
- truthound/profiler/visualization/renderers.py +669 -0
- truthound/profiler/visualization/sections.py +540 -0
- truthound/profiler/visualization.py +2122 -0
- truthound/profiler/yaml_validation.py +1151 -0
- truthound/py.typed +0 -0
- truthound/ratelimit/__init__.py +248 -0
- truthound/ratelimit/algorithms.py +1108 -0
- truthound/ratelimit/core.py +573 -0
- truthound/ratelimit/integration.py +532 -0
- truthound/ratelimit/limiter.py +663 -0
- truthound/ratelimit/middleware.py +700 -0
- truthound/ratelimit/policy.py +792 -0
- truthound/ratelimit/storage.py +763 -0
- truthound/rbac/__init__.py +340 -0
- truthound/rbac/core.py +976 -0
- truthound/rbac/integration.py +760 -0
- truthound/rbac/manager.py +1052 -0
- truthound/rbac/middleware.py +842 -0
- truthound/rbac/policy.py +954 -0
- truthound/rbac/storage.py +878 -0
- truthound/realtime/__init__.py +141 -0
- truthound/realtime/adapters/__init__.py +43 -0
- truthound/realtime/adapters/base.py +533 -0
- truthound/realtime/adapters/kafka.py +487 -0
- truthound/realtime/adapters/kinesis.py +479 -0
- truthound/realtime/adapters/mock.py +243 -0
- truthound/realtime/base.py +553 -0
- truthound/realtime/factory.py +382 -0
- truthound/realtime/incremental.py +660 -0
- truthound/realtime/processing/__init__.py +67 -0
- truthound/realtime/processing/exactly_once.py +575 -0
- truthound/realtime/processing/state.py +547 -0
- truthound/realtime/processing/windows.py +647 -0
- truthound/realtime/protocols.py +569 -0
- truthound/realtime/streaming.py +605 -0
- truthound/realtime/testing/__init__.py +32 -0
- truthound/realtime/testing/containers.py +615 -0
- truthound/realtime/testing/fixtures.py +484 -0
- truthound/report.py +280 -0
- truthound/reporters/__init__.py +46 -0
- truthound/reporters/_protocols.py +30 -0
- truthound/reporters/base.py +324 -0
- truthound/reporters/ci/__init__.py +66 -0
- truthound/reporters/ci/azure.py +436 -0
- truthound/reporters/ci/base.py +509 -0
- truthound/reporters/ci/bitbucket.py +567 -0
- truthound/reporters/ci/circleci.py +547 -0
- truthound/reporters/ci/detection.py +364 -0
- truthound/reporters/ci/factory.py +182 -0
- truthound/reporters/ci/github.py +388 -0
- truthound/reporters/ci/gitlab.py +471 -0
- truthound/reporters/ci/jenkins.py +525 -0
- truthound/reporters/console_reporter.py +299 -0
- truthound/reporters/factory.py +211 -0
- truthound/reporters/html_reporter.py +524 -0
- truthound/reporters/json_reporter.py +256 -0
- truthound/reporters/markdown_reporter.py +280 -0
- truthound/reporters/sdk/__init__.py +174 -0
- truthound/reporters/sdk/builder.py +558 -0
- truthound/reporters/sdk/mixins.py +1150 -0
- truthound/reporters/sdk/schema.py +1493 -0
- truthound/reporters/sdk/templates.py +666 -0
- truthound/reporters/sdk/testing.py +968 -0
- truthound/scanners.py +170 -0
- truthound/scheduling/__init__.py +122 -0
- truthound/scheduling/cron.py +1136 -0
- truthound/scheduling/presets.py +212 -0
- truthound/schema.py +275 -0
- truthound/secrets/__init__.py +173 -0
- truthound/secrets/base.py +618 -0
- truthound/secrets/cloud.py +682 -0
- truthound/secrets/integration.py +507 -0
- truthound/secrets/manager.py +633 -0
- truthound/secrets/oidc/__init__.py +172 -0
- truthound/secrets/oidc/base.py +902 -0
- truthound/secrets/oidc/credential_provider.py +623 -0
- truthound/secrets/oidc/exchangers.py +1001 -0
- truthound/secrets/oidc/github/__init__.py +110 -0
- truthound/secrets/oidc/github/claims.py +718 -0
- truthound/secrets/oidc/github/enhanced_provider.py +693 -0
- truthound/secrets/oidc/github/trust_policy.py +742 -0
- truthound/secrets/oidc/github/verification.py +723 -0
- truthound/secrets/oidc/github/workflow.py +691 -0
- truthound/secrets/oidc/providers.py +825 -0
- truthound/secrets/providers.py +506 -0
- truthound/secrets/resolver.py +495 -0
- truthound/stores/__init__.py +177 -0
- truthound/stores/backends/__init__.py +18 -0
- truthound/stores/backends/_protocols.py +340 -0
- truthound/stores/backends/azure_blob.py +530 -0
- truthound/stores/backends/concurrent_filesystem.py +915 -0
- truthound/stores/backends/connection_pool.py +1365 -0
- truthound/stores/backends/database.py +743 -0
- truthound/stores/backends/filesystem.py +538 -0
- truthound/stores/backends/gcs.py +399 -0
- truthound/stores/backends/memory.py +354 -0
- truthound/stores/backends/s3.py +434 -0
- truthound/stores/backpressure/__init__.py +84 -0
- truthound/stores/backpressure/base.py +375 -0
- truthound/stores/backpressure/circuit_breaker.py +434 -0
- truthound/stores/backpressure/monitor.py +376 -0
- truthound/stores/backpressure/strategies.py +677 -0
- truthound/stores/base.py +551 -0
- truthound/stores/batching/__init__.py +65 -0
- truthound/stores/batching/base.py +305 -0
- truthound/stores/batching/buffer.py +370 -0
- truthound/stores/batching/store.py +248 -0
- truthound/stores/batching/writer.py +521 -0
- truthound/stores/caching/__init__.py +60 -0
- truthound/stores/caching/backends.py +684 -0
- truthound/stores/caching/base.py +356 -0
- truthound/stores/caching/store.py +305 -0
- truthound/stores/compression/__init__.py +193 -0
- truthound/stores/compression/adaptive.py +694 -0
- truthound/stores/compression/base.py +514 -0
- truthound/stores/compression/pipeline.py +868 -0
- truthound/stores/compression/providers.py +672 -0
- truthound/stores/compression/streaming.py +832 -0
- truthound/stores/concurrency/__init__.py +81 -0
- truthound/stores/concurrency/atomic.py +556 -0
- truthound/stores/concurrency/index.py +775 -0
- truthound/stores/concurrency/locks.py +576 -0
- truthound/stores/concurrency/manager.py +482 -0
- truthound/stores/encryption/__init__.py +297 -0
- truthound/stores/encryption/base.py +952 -0
- truthound/stores/encryption/keys.py +1191 -0
- truthound/stores/encryption/pipeline.py +903 -0
- truthound/stores/encryption/providers.py +953 -0
- truthound/stores/encryption/streaming.py +950 -0
- truthound/stores/expectations.py +227 -0
- truthound/stores/factory.py +246 -0
- truthound/stores/migration/__init__.py +75 -0
- truthound/stores/migration/base.py +480 -0
- truthound/stores/migration/manager.py +347 -0
- truthound/stores/migration/registry.py +382 -0
- truthound/stores/migration/store.py +559 -0
- truthound/stores/observability/__init__.py +106 -0
- truthound/stores/observability/audit.py +718 -0
- truthound/stores/observability/config.py +270 -0
- truthound/stores/observability/factory.py +208 -0
- truthound/stores/observability/metrics.py +636 -0
- truthound/stores/observability/protocols.py +410 -0
- truthound/stores/observability/store.py +570 -0
- truthound/stores/observability/tracing.py +784 -0
- truthound/stores/replication/__init__.py +76 -0
- truthound/stores/replication/base.py +260 -0
- truthound/stores/replication/monitor.py +269 -0
- truthound/stores/replication/store.py +439 -0
- truthound/stores/replication/syncer.py +391 -0
- truthound/stores/results.py +359 -0
- truthound/stores/retention/__init__.py +77 -0
- truthound/stores/retention/base.py +378 -0
- truthound/stores/retention/policies.py +621 -0
- truthound/stores/retention/scheduler.py +279 -0
- truthound/stores/retention/store.py +526 -0
- truthound/stores/streaming/__init__.py +138 -0
- truthound/stores/streaming/base.py +801 -0
- truthound/stores/streaming/database.py +984 -0
- truthound/stores/streaming/filesystem.py +719 -0
- truthound/stores/streaming/reader.py +629 -0
- truthound/stores/streaming/s3.py +843 -0
- truthound/stores/streaming/writer.py +790 -0
- truthound/stores/tiering/__init__.py +108 -0
- truthound/stores/tiering/base.py +462 -0
- truthound/stores/tiering/manager.py +249 -0
- truthound/stores/tiering/policies.py +692 -0
- truthound/stores/tiering/store.py +526 -0
- truthound/stores/versioning/__init__.py +56 -0
- truthound/stores/versioning/base.py +376 -0
- truthound/stores/versioning/store.py +660 -0
- truthound/stores/versioning/strategies.py +353 -0
- truthound/types.py +56 -0
- truthound/validators/__init__.py +774 -0
- truthound/validators/aggregate/__init__.py +27 -0
- truthound/validators/aggregate/central.py +116 -0
- truthound/validators/aggregate/extremes.py +116 -0
- truthound/validators/aggregate/spread.py +118 -0
- truthound/validators/aggregate/sum.py +64 -0
- truthound/validators/aggregate/type.py +78 -0
- truthound/validators/anomaly/__init__.py +93 -0
- truthound/validators/anomaly/base.py +431 -0
- truthound/validators/anomaly/ml_based.py +1190 -0
- truthound/validators/anomaly/multivariate.py +647 -0
- truthound/validators/anomaly/statistical.py +599 -0
- truthound/validators/base.py +1089 -0
- truthound/validators/business_rule/__init__.py +46 -0
- truthound/validators/business_rule/base.py +147 -0
- truthound/validators/business_rule/checksum.py +509 -0
- truthound/validators/business_rule/financial.py +526 -0
- truthound/validators/cache.py +733 -0
- truthound/validators/completeness/__init__.py +39 -0
- truthound/validators/completeness/conditional.py +73 -0
- truthound/validators/completeness/default.py +98 -0
- truthound/validators/completeness/empty.py +103 -0
- truthound/validators/completeness/nan.py +337 -0
- truthound/validators/completeness/null.py +152 -0
- truthound/validators/cross_table/__init__.py +17 -0
- truthound/validators/cross_table/aggregate.py +333 -0
- truthound/validators/cross_table/row_count.py +122 -0
- truthound/validators/datetime/__init__.py +29 -0
- truthound/validators/datetime/format.py +78 -0
- truthound/validators/datetime/freshness.py +269 -0
- truthound/validators/datetime/order.py +73 -0
- truthound/validators/datetime/parseable.py +185 -0
- truthound/validators/datetime/range.py +202 -0
- truthound/validators/datetime/timezone.py +69 -0
- truthound/validators/distribution/__init__.py +49 -0
- truthound/validators/distribution/distribution.py +128 -0
- truthound/validators/distribution/monotonic.py +119 -0
- truthound/validators/distribution/outlier.py +178 -0
- truthound/validators/distribution/quantile.py +80 -0
- truthound/validators/distribution/range.py +254 -0
- truthound/validators/distribution/set.py +125 -0
- truthound/validators/distribution/statistical.py +459 -0
- truthound/validators/drift/__init__.py +79 -0
- truthound/validators/drift/base.py +427 -0
- truthound/validators/drift/multi_feature.py +401 -0
- truthound/validators/drift/numeric.py +395 -0
- truthound/validators/drift/psi.py +446 -0
- truthound/validators/drift/statistical.py +510 -0
- truthound/validators/enterprise.py +1658 -0
- truthound/validators/geospatial/__init__.py +80 -0
- truthound/validators/geospatial/base.py +97 -0
- truthound/validators/geospatial/boundary.py +238 -0
- truthound/validators/geospatial/coordinate.py +351 -0
- truthound/validators/geospatial/distance.py +399 -0
- truthound/validators/geospatial/polygon.py +665 -0
- truthound/validators/i18n/__init__.py +308 -0
- truthound/validators/i18n/bidi.py +571 -0
- truthound/validators/i18n/catalogs.py +570 -0
- truthound/validators/i18n/dialects.py +763 -0
- truthound/validators/i18n/extended_catalogs.py +549 -0
- truthound/validators/i18n/formatting.py +1434 -0
- truthound/validators/i18n/loader.py +1020 -0
- truthound/validators/i18n/messages.py +521 -0
- truthound/validators/i18n/plural.py +683 -0
- truthound/validators/i18n/protocols.py +855 -0
- truthound/validators/i18n/tms.py +1162 -0
- truthound/validators/localization/__init__.py +53 -0
- truthound/validators/localization/base.py +122 -0
- truthound/validators/localization/chinese.py +362 -0
- truthound/validators/localization/japanese.py +275 -0
- truthound/validators/localization/korean.py +524 -0
- truthound/validators/memory/__init__.py +94 -0
- truthound/validators/memory/approximate_knn.py +506 -0
- truthound/validators/memory/base.py +547 -0
- truthound/validators/memory/sgd_online.py +719 -0
- truthound/validators/memory/streaming_ecdf.py +753 -0
- truthound/validators/ml_feature/__init__.py +54 -0
- truthound/validators/ml_feature/base.py +249 -0
- truthound/validators/ml_feature/correlation.py +299 -0
- truthound/validators/ml_feature/leakage.py +344 -0
- truthound/validators/ml_feature/null_impact.py +270 -0
- truthound/validators/ml_feature/scale.py +264 -0
- truthound/validators/multi_column/__init__.py +89 -0
- truthound/validators/multi_column/arithmetic.py +284 -0
- truthound/validators/multi_column/base.py +231 -0
- truthound/validators/multi_column/comparison.py +273 -0
- truthound/validators/multi_column/consistency.py +312 -0
- truthound/validators/multi_column/statistical.py +299 -0
- truthound/validators/optimization/__init__.py +164 -0
- truthound/validators/optimization/aggregation.py +563 -0
- truthound/validators/optimization/covariance.py +556 -0
- truthound/validators/optimization/geo.py +626 -0
- truthound/validators/optimization/graph.py +587 -0
- truthound/validators/optimization/orchestrator.py +970 -0
- truthound/validators/optimization/profiling.py +1312 -0
- truthound/validators/privacy/__init__.py +223 -0
- truthound/validators/privacy/base.py +635 -0
- truthound/validators/privacy/ccpa.py +670 -0
- truthound/validators/privacy/gdpr.py +728 -0
- truthound/validators/privacy/global_patterns.py +604 -0
- truthound/validators/privacy/plugins.py +867 -0
- truthound/validators/profiling/__init__.py +52 -0
- truthound/validators/profiling/base.py +175 -0
- truthound/validators/profiling/cardinality.py +312 -0
- truthound/validators/profiling/entropy.py +391 -0
- truthound/validators/profiling/frequency.py +455 -0
- truthound/validators/pushdown_support.py +660 -0
- truthound/validators/query/__init__.py +91 -0
- truthound/validators/query/aggregate.py +346 -0
- truthound/validators/query/base.py +246 -0
- truthound/validators/query/column.py +249 -0
- truthound/validators/query/expression.py +274 -0
- truthound/validators/query/result.py +323 -0
- truthound/validators/query/row_count.py +264 -0
- truthound/validators/referential/__init__.py +80 -0
- truthound/validators/referential/base.py +395 -0
- truthound/validators/referential/cascade.py +391 -0
- truthound/validators/referential/circular.py +563 -0
- truthound/validators/referential/foreign_key.py +624 -0
- truthound/validators/referential/orphan.py +485 -0
- truthound/validators/registry.py +112 -0
- truthound/validators/schema/__init__.py +41 -0
- truthound/validators/schema/column_count.py +142 -0
- truthound/validators/schema/column_exists.py +80 -0
- truthound/validators/schema/column_order.py +82 -0
- truthound/validators/schema/column_pair.py +85 -0
- truthound/validators/schema/column_pair_set.py +195 -0
- truthound/validators/schema/column_type.py +94 -0
- truthound/validators/schema/multi_column.py +53 -0
- truthound/validators/schema/multi_column_aggregate.py +175 -0
- truthound/validators/schema/referential.py +274 -0
- truthound/validators/schema/table_schema.py +91 -0
- truthound/validators/schema_validator.py +219 -0
- truthound/validators/sdk/__init__.py +250 -0
- truthound/validators/sdk/builder.py +680 -0
- truthound/validators/sdk/decorators.py +474 -0
- truthound/validators/sdk/enterprise/__init__.py +211 -0
- truthound/validators/sdk/enterprise/docs.py +725 -0
- truthound/validators/sdk/enterprise/fuzzing.py +659 -0
- truthound/validators/sdk/enterprise/licensing.py +709 -0
- truthound/validators/sdk/enterprise/manager.py +543 -0
- truthound/validators/sdk/enterprise/resources.py +628 -0
- truthound/validators/sdk/enterprise/sandbox.py +766 -0
- truthound/validators/sdk/enterprise/signing.py +603 -0
- truthound/validators/sdk/enterprise/templates.py +865 -0
- truthound/validators/sdk/enterprise/versioning.py +659 -0
- truthound/validators/sdk/templates.py +757 -0
- truthound/validators/sdk/testing.py +807 -0
- truthound/validators/security/__init__.py +181 -0
- truthound/validators/security/redos/__init__.py +182 -0
- truthound/validators/security/redos/core.py +861 -0
- truthound/validators/security/redos/cpu_monitor.py +593 -0
- truthound/validators/security/redos/cve_database.py +791 -0
- truthound/validators/security/redos/ml/__init__.py +155 -0
- truthound/validators/security/redos/ml/base.py +785 -0
- truthound/validators/security/redos/ml/datasets.py +618 -0
- truthound/validators/security/redos/ml/features.py +359 -0
- truthound/validators/security/redos/ml/models.py +1000 -0
- truthound/validators/security/redos/ml/predictor.py +507 -0
- truthound/validators/security/redos/ml/storage.py +632 -0
- truthound/validators/security/redos/ml/training.py +571 -0
- truthound/validators/security/redos/ml_analyzer.py +937 -0
- truthound/validators/security/redos/optimizer.py +674 -0
- truthound/validators/security/redos/profiler.py +682 -0
- truthound/validators/security/redos/re2_engine.py +709 -0
- truthound/validators/security/redos.py +886 -0
- truthound/validators/security/sql_security.py +1247 -0
- truthound/validators/streaming/__init__.py +126 -0
- truthound/validators/streaming/base.py +292 -0
- truthound/validators/streaming/completeness.py +210 -0
- truthound/validators/streaming/mixin.py +575 -0
- truthound/validators/streaming/range.py +308 -0
- truthound/validators/streaming/sources.py +846 -0
- truthound/validators/string/__init__.py +57 -0
- truthound/validators/string/casing.py +158 -0
- truthound/validators/string/charset.py +96 -0
- truthound/validators/string/format.py +501 -0
- truthound/validators/string/json.py +77 -0
- truthound/validators/string/json_schema.py +184 -0
- truthound/validators/string/length.py +104 -0
- truthound/validators/string/like_pattern.py +237 -0
- truthound/validators/string/regex.py +202 -0
- truthound/validators/string/regex_extended.py +435 -0
- truthound/validators/table/__init__.py +88 -0
- truthound/validators/table/base.py +78 -0
- truthound/validators/table/column_count.py +198 -0
- truthound/validators/table/freshness.py +362 -0
- truthound/validators/table/row_count.py +251 -0
- truthound/validators/table/schema.py +333 -0
- truthound/validators/table/size.py +285 -0
- truthound/validators/timeout/__init__.py +102 -0
- truthound/validators/timeout/advanced/__init__.py +247 -0
- truthound/validators/timeout/advanced/circuit_breaker.py +675 -0
- truthound/validators/timeout/advanced/prediction.py +773 -0
- truthound/validators/timeout/advanced/priority.py +618 -0
- truthound/validators/timeout/advanced/redis_backend.py +770 -0
- truthound/validators/timeout/advanced/retry.py +721 -0
- truthound/validators/timeout/advanced/sampling.py +788 -0
- truthound/validators/timeout/advanced/sla.py +661 -0
- truthound/validators/timeout/advanced/telemetry.py +804 -0
- truthound/validators/timeout/cascade.py +477 -0
- truthound/validators/timeout/deadline.py +657 -0
- truthound/validators/timeout/degradation.py +525 -0
- truthound/validators/timeout/distributed.py +597 -0
- truthound/validators/timeseries/__init__.py +89 -0
- truthound/validators/timeseries/base.py +326 -0
- truthound/validators/timeseries/completeness.py +617 -0
- truthound/validators/timeseries/gap.py +485 -0
- truthound/validators/timeseries/monotonic.py +310 -0
- truthound/validators/timeseries/seasonality.py +422 -0
- truthound/validators/timeseries/trend.py +510 -0
- truthound/validators/uniqueness/__init__.py +59 -0
- truthound/validators/uniqueness/approximate.py +475 -0
- truthound/validators/uniqueness/distinct_values.py +253 -0
- truthound/validators/uniqueness/duplicate.py +118 -0
- truthound/validators/uniqueness/primary_key.py +140 -0
- truthound/validators/uniqueness/unique.py +191 -0
- truthound/validators/uniqueness/within_record.py +599 -0
- truthound/validators/utils.py +756 -0
- truthound-1.0.8.dist-info/METADATA +474 -0
- truthound-1.0.8.dist-info/RECORD +877 -0
- truthound-1.0.8.dist-info/WHEEL +4 -0
- truthound-1.0.8.dist-info/entry_points.txt +2 -0
- truthound-1.0.8.dist-info/licenses/LICENSE +190 -0
|
@@ -0,0 +1,674 @@
|
|
|
1
|
+
"""Pattern Optimizer - Automatic ReDoS Pattern Transformation.
|
|
2
|
+
|
|
3
|
+
This module provides automatic optimization of dangerous regex patterns
|
|
4
|
+
to safer alternatives while preserving matching semantics.
|
|
5
|
+
|
|
6
|
+
Architecture:
|
|
7
|
+
┌─────────────────────────────────────────────────────────────────┐
|
|
8
|
+
│ Pattern Optimizer │
|
|
9
|
+
└─────────────────────────────────────────────────────────────────┘
|
|
10
|
+
│
|
|
11
|
+
┌───────────────┬───────────────┼───────────────┬─────────────────┐
|
|
12
|
+
│ │ │ │ │
|
|
13
|
+
▼ ▼ ▼ ▼ ▼
|
|
14
|
+
┌─────────┐ ┌─────────┐ ┌──────────┐ ┌──────────┐ ┌─────────┐
|
|
15
|
+
│ Rule │ │Transform│ │ Semantic │ │ Safety │ │ Report │
|
|
16
|
+
│ Engine │ │ Pipeline│ │ Verifier │ │ Validator│ │Generator│
|
|
17
|
+
└─────────┘ └─────────┘ └──────────┘ └──────────┘ └─────────┘
|
|
18
|
+
|
|
19
|
+
Optimization strategies:
|
|
20
|
+
1. Nested Quantifier Flattening: (a+)+ → a+
|
|
21
|
+
2. Alternation Simplification: (a|a)+ → a+
|
|
22
|
+
3. Possessive Quantifier Simulation: a++ equivalent
|
|
23
|
+
4. Atomic Group Insertion (where applicable)
|
|
24
|
+
5. Anchor Addition: Reduce backtracking scope
|
|
25
|
+
6. Character Class Optimization: [a-zA-Z] → \\w (when appropriate)
|
|
26
|
+
|
|
27
|
+
Usage:
|
|
28
|
+
from truthound.validators.security.redos.optimizer import (
|
|
29
|
+
PatternOptimizer,
|
|
30
|
+
optimize_pattern,
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
# Quick optimization
|
|
34
|
+
result = optimize_pattern(r"(a+)+b")
|
|
35
|
+
print(result.optimized_pattern) # "a+b"
|
|
36
|
+
print(result.applied_rules) # ["flatten_nested_quantifiers"]
|
|
37
|
+
|
|
38
|
+
# Full optimizer with custom rules
|
|
39
|
+
optimizer = PatternOptimizer()
|
|
40
|
+
result = optimizer.optimize(pattern)
|
|
41
|
+
"""
|
|
42
|
+
|
|
43
|
+
from __future__ import annotations
|
|
44
|
+
|
|
45
|
+
import re
|
|
46
|
+
from abc import ABC, abstractmethod
|
|
47
|
+
from dataclasses import dataclass, field
|
|
48
|
+
from enum import Enum, auto
|
|
49
|
+
from typing import Any, Callable, Protocol, Sequence
|
|
50
|
+
|
|
51
|
+
from truthound.validators.security.redos.core import (
|
|
52
|
+
ReDoSRisk,
|
|
53
|
+
RegexComplexityAnalyzer,
|
|
54
|
+
SafeRegexConfig,
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
class OptimizationType(Enum):
|
|
59
|
+
"""Types of optimization applied."""
|
|
60
|
+
|
|
61
|
+
FLATTEN_NESTED_QUANTIFIERS = auto()
|
|
62
|
+
SIMPLIFY_ALTERNATION = auto()
|
|
63
|
+
ADD_ANCHORS = auto()
|
|
64
|
+
LIMIT_QUANTIFIERS = auto()
|
|
65
|
+
SIMPLIFY_CHARACTER_CLASS = auto()
|
|
66
|
+
REMOVE_REDUNDANT_GROUPS = auto()
|
|
67
|
+
POSSESSIVE_SIMULATION = auto()
|
|
68
|
+
ATOMIC_GROUP_SIMULATION = auto()
|
|
69
|
+
FACTOR_COMMON_PREFIX = auto()
|
|
70
|
+
LAZY_TO_POSSESSIVE = auto()
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
@dataclass
|
|
74
|
+
class OptimizationRule:
|
|
75
|
+
"""Represents a single optimization rule.
|
|
76
|
+
|
|
77
|
+
Attributes:
|
|
78
|
+
name: Rule identifier
|
|
79
|
+
description: Human-readable description
|
|
80
|
+
pattern: Regex pattern to match dangerous constructs
|
|
81
|
+
replacement: Replacement pattern or function
|
|
82
|
+
risk_reduction: Expected risk reduction (0-1)
|
|
83
|
+
preserves_semantics: Whether the rule preserves exact matching
|
|
84
|
+
optimization_type: Type of optimization
|
|
85
|
+
"""
|
|
86
|
+
|
|
87
|
+
name: str
|
|
88
|
+
description: str
|
|
89
|
+
pattern: re.Pattern | str
|
|
90
|
+
replacement: str | Callable[[re.Match], str]
|
|
91
|
+
risk_reduction: float
|
|
92
|
+
preserves_semantics: bool = True
|
|
93
|
+
optimization_type: OptimizationType = OptimizationType.FLATTEN_NESTED_QUANTIFIERS
|
|
94
|
+
|
|
95
|
+
def __post_init__(self):
|
|
96
|
+
"""Compile pattern if it's a string."""
|
|
97
|
+
if isinstance(self.pattern, str):
|
|
98
|
+
self.pattern = re.compile(self.pattern)
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
@dataclass
|
|
102
|
+
class OptimizationResult:
|
|
103
|
+
"""Result of pattern optimization.
|
|
104
|
+
|
|
105
|
+
Attributes:
|
|
106
|
+
original_pattern: The input pattern
|
|
107
|
+
optimized_pattern: The optimized pattern
|
|
108
|
+
applied_rules: List of rules that were applied
|
|
109
|
+
risk_before: Risk level before optimization
|
|
110
|
+
risk_after: Risk level after optimization
|
|
111
|
+
semantics_preserved: Whether matching semantics are preserved
|
|
112
|
+
warnings: Any warnings about the optimization
|
|
113
|
+
transformations: Detailed transformation log
|
|
114
|
+
"""
|
|
115
|
+
|
|
116
|
+
original_pattern: str
|
|
117
|
+
optimized_pattern: str
|
|
118
|
+
applied_rules: list[str] = field(default_factory=list)
|
|
119
|
+
risk_before: ReDoSRisk = ReDoSRisk.NONE
|
|
120
|
+
risk_after: ReDoSRisk = ReDoSRisk.NONE
|
|
121
|
+
semantics_preserved: bool = True
|
|
122
|
+
warnings: list[str] = field(default_factory=list)
|
|
123
|
+
transformations: list[dict[str, Any]] = field(default_factory=list)
|
|
124
|
+
|
|
125
|
+
@property
|
|
126
|
+
def was_optimized(self) -> bool:
|
|
127
|
+
"""Check if any optimization was applied."""
|
|
128
|
+
return self.original_pattern != self.optimized_pattern
|
|
129
|
+
|
|
130
|
+
@property
|
|
131
|
+
def risk_reduced(self) -> bool:
|
|
132
|
+
"""Check if risk was reduced."""
|
|
133
|
+
return self.risk_after.value < self.risk_before.value
|
|
134
|
+
|
|
135
|
+
def to_dict(self) -> dict[str, Any]:
|
|
136
|
+
"""Convert to dictionary."""
|
|
137
|
+
return {
|
|
138
|
+
"original_pattern": self.original_pattern,
|
|
139
|
+
"optimized_pattern": self.optimized_pattern,
|
|
140
|
+
"applied_rules": self.applied_rules,
|
|
141
|
+
"risk_before": self.risk_before.name,
|
|
142
|
+
"risk_after": self.risk_after.name,
|
|
143
|
+
"was_optimized": self.was_optimized,
|
|
144
|
+
"risk_reduced": self.risk_reduced,
|
|
145
|
+
"semantics_preserved": self.semantics_preserved,
|
|
146
|
+
"warnings": self.warnings,
|
|
147
|
+
"transformations": self.transformations,
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
class TransformationStrategyProtocol(Protocol):
|
|
152
|
+
"""Protocol for transformation strategies."""
|
|
153
|
+
|
|
154
|
+
def can_apply(self, pattern: str) -> bool:
|
|
155
|
+
"""Check if this strategy can be applied."""
|
|
156
|
+
...
|
|
157
|
+
|
|
158
|
+
def apply(self, pattern: str) -> tuple[str, bool]:
|
|
159
|
+
"""Apply the transformation.
|
|
160
|
+
|
|
161
|
+
Returns:
|
|
162
|
+
Tuple of (transformed_pattern, semantics_preserved)
|
|
163
|
+
"""
|
|
164
|
+
...
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
class BaseTransformation(ABC):
|
|
168
|
+
"""Base class for pattern transformations."""
|
|
169
|
+
|
|
170
|
+
name: str = "base"
|
|
171
|
+
description: str = ""
|
|
172
|
+
optimization_type: OptimizationType = OptimizationType.FLATTEN_NESTED_QUANTIFIERS
|
|
173
|
+
|
|
174
|
+
@abstractmethod
|
|
175
|
+
def can_apply(self, pattern: str) -> bool:
|
|
176
|
+
"""Check if this transformation can be applied."""
|
|
177
|
+
pass
|
|
178
|
+
|
|
179
|
+
@abstractmethod
|
|
180
|
+
def apply(self, pattern: str) -> tuple[str, bool]:
|
|
181
|
+
"""Apply the transformation.
|
|
182
|
+
|
|
183
|
+
Returns:
|
|
184
|
+
Tuple of (transformed_pattern, semantics_preserved)
|
|
185
|
+
"""
|
|
186
|
+
pass
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
class FlattenNestedQuantifiers(BaseTransformation):
|
|
190
|
+
"""Flatten nested quantifiers like (a+)+ → a+.
|
|
191
|
+
|
|
192
|
+
This is the most common ReDoS pattern and flattening
|
|
193
|
+
dramatically reduces backtracking.
|
|
194
|
+
"""
|
|
195
|
+
|
|
196
|
+
name = "flatten_nested_quantifiers"
|
|
197
|
+
description = "Flatten nested quantifiers (a+)+ to a+"
|
|
198
|
+
optimization_type = OptimizationType.FLATTEN_NESTED_QUANTIFIERS
|
|
199
|
+
|
|
200
|
+
# Pattern to match nested quantifiers
|
|
201
|
+
_PATTERN = re.compile(
|
|
202
|
+
r"\(([^()]+)([+*])\)\2" # (content+)+ or (content*)*
|
|
203
|
+
)
|
|
204
|
+
|
|
205
|
+
def can_apply(self, pattern: str) -> bool:
|
|
206
|
+
"""Check for nested quantifiers."""
|
|
207
|
+
return bool(self._PATTERN.search(pattern))
|
|
208
|
+
|
|
209
|
+
def apply(self, pattern: str) -> tuple[str, bool]:
|
|
210
|
+
"""Flatten nested quantifiers."""
|
|
211
|
+
def replacer(match: re.Match) -> str:
|
|
212
|
+
content = match.group(1)
|
|
213
|
+
quantifier = match.group(2)
|
|
214
|
+
# If content already ends with a quantifier, just use the content
|
|
215
|
+
if content.endswith(("+", "*", "?")):
|
|
216
|
+
return content
|
|
217
|
+
return f"({content}){quantifier}"
|
|
218
|
+
|
|
219
|
+
result = self._PATTERN.sub(replacer, pattern)
|
|
220
|
+
|
|
221
|
+
# Second pass for simpler cases like (a+)+
|
|
222
|
+
simple_pattern = re.compile(r"\(([^()]+)[+*]\)[+*]")
|
|
223
|
+
while simple_pattern.search(result):
|
|
224
|
+
result = simple_pattern.sub(r"(\1)+", result)
|
|
225
|
+
|
|
226
|
+
return result, True # Semantically equivalent for matching purposes
|
|
227
|
+
|
|
228
|
+
|
|
229
|
+
class SimplifyAlternation(BaseTransformation):
|
|
230
|
+
"""Simplify overlapping alternations.
|
|
231
|
+
|
|
232
|
+
Patterns like (a|aa)+ can be simplified to a+
|
|
233
|
+
when alternatives overlap.
|
|
234
|
+
"""
|
|
235
|
+
|
|
236
|
+
name = "simplify_alternation"
|
|
237
|
+
description = "Simplify overlapping alternations"
|
|
238
|
+
optimization_type = OptimizationType.SIMPLIFY_ALTERNATION
|
|
239
|
+
|
|
240
|
+
def can_apply(self, pattern: str) -> bool:
|
|
241
|
+
"""Check for quantified alternation."""
|
|
242
|
+
return bool(re.search(r"\([^)]*\|[^)]*\)[+*]", pattern))
|
|
243
|
+
|
|
244
|
+
def apply(self, pattern: str) -> tuple[str, bool]:
|
|
245
|
+
"""Simplify alternations where possible."""
|
|
246
|
+
# This is a simplified implementation
|
|
247
|
+
# Full implementation would analyze overlap semantics
|
|
248
|
+
|
|
249
|
+
# Pattern like (a|ab)+ where one is prefix of another
|
|
250
|
+
prefix_alt = re.compile(r"\(([a-z]+)\|\1[a-z]+\)[+*]")
|
|
251
|
+
result = prefix_alt.sub(r"(\1[a-z]*)+", pattern)
|
|
252
|
+
|
|
253
|
+
# Check if we made changes
|
|
254
|
+
semantics_preserved = result == pattern # Conservative: assume not preserved if changed
|
|
255
|
+
|
|
256
|
+
return result, semantics_preserved
|
|
257
|
+
|
|
258
|
+
|
|
259
|
+
class AddAnchors(BaseTransformation):
|
|
260
|
+
"""Add anchors to reduce backtracking scope.
|
|
261
|
+
|
|
262
|
+
Adding ^ and $ anchors limits where the pattern
|
|
263
|
+
can match, reducing backtracking significantly.
|
|
264
|
+
"""
|
|
265
|
+
|
|
266
|
+
name = "add_anchors"
|
|
267
|
+
description = "Add anchors to limit matching scope"
|
|
268
|
+
optimization_type = OptimizationType.ADD_ANCHORS
|
|
269
|
+
|
|
270
|
+
def can_apply(self, pattern: str) -> bool:
|
|
271
|
+
"""Check if pattern lacks anchors."""
|
|
272
|
+
has_start = pattern.startswith("^") or pattern.startswith("\\A")
|
|
273
|
+
has_end = pattern.endswith("$") or pattern.endswith("\\Z")
|
|
274
|
+
return not (has_start and has_end)
|
|
275
|
+
|
|
276
|
+
def apply(self, pattern: str) -> tuple[str, bool]:
|
|
277
|
+
"""Add anchors if missing."""
|
|
278
|
+
result = pattern
|
|
279
|
+
if not (pattern.startswith("^") or pattern.startswith("\\A")):
|
|
280
|
+
result = "^" + result
|
|
281
|
+
if not (pattern.endswith("$") or pattern.endswith("\\Z")):
|
|
282
|
+
result = result + "$"
|
|
283
|
+
|
|
284
|
+
# Anchors change matching semantics
|
|
285
|
+
return result, False
|
|
286
|
+
|
|
287
|
+
|
|
288
|
+
class LimitQuantifiers(BaseTransformation):
|
|
289
|
+
"""Convert unbounded quantifiers to bounded.
|
|
290
|
+
|
|
291
|
+
Patterns like a* → a{0,1000} to limit potential matches.
|
|
292
|
+
"""
|
|
293
|
+
|
|
294
|
+
name = "limit_quantifiers"
|
|
295
|
+
description = "Convert unbounded quantifiers to bounded"
|
|
296
|
+
optimization_type = OptimizationType.LIMIT_QUANTIFIERS
|
|
297
|
+
|
|
298
|
+
DEFAULT_LIMIT = 1000
|
|
299
|
+
|
|
300
|
+
def __init__(self, limit: int = DEFAULT_LIMIT):
|
|
301
|
+
"""Initialize with max limit."""
|
|
302
|
+
self.limit = limit
|
|
303
|
+
|
|
304
|
+
def can_apply(self, pattern: str) -> bool:
|
|
305
|
+
"""Check for unbounded quantifiers."""
|
|
306
|
+
return bool(re.search(r"[+*](?!\?)", pattern))
|
|
307
|
+
|
|
308
|
+
def apply(self, pattern: str) -> tuple[str, bool]:
|
|
309
|
+
"""Limit quantifiers."""
|
|
310
|
+
# Convert * to {0,limit}
|
|
311
|
+
result = re.sub(r"\*(?!\?)", f"{{0,{self.limit}}}", pattern)
|
|
312
|
+
# Convert + to {1,limit}
|
|
313
|
+
result = re.sub(r"\+(?!\?)", f"{{1,{self.limit}}}", result)
|
|
314
|
+
|
|
315
|
+
# This changes semantics for very long inputs
|
|
316
|
+
return result, False
|
|
317
|
+
|
|
318
|
+
|
|
319
|
+
class RemoveRedundantGroups(BaseTransformation):
|
|
320
|
+
"""Remove unnecessary capturing groups.
|
|
321
|
+
|
|
322
|
+
Groups that don't need to capture can be converted
|
|
323
|
+
to non-capturing for slight performance improvement.
|
|
324
|
+
"""
|
|
325
|
+
|
|
326
|
+
name = "remove_redundant_groups"
|
|
327
|
+
description = "Convert unnecessary capturing groups to non-capturing"
|
|
328
|
+
optimization_type = OptimizationType.REMOVE_REDUNDANT_GROUPS
|
|
329
|
+
|
|
330
|
+
def can_apply(self, pattern: str) -> bool:
|
|
331
|
+
"""Check for capturing groups."""
|
|
332
|
+
# Has capturing group that's not referenced
|
|
333
|
+
has_capture = bool(re.search(r"\((?!\?)", pattern))
|
|
334
|
+
has_backref = bool(re.search(r"\\[1-9]", pattern))
|
|
335
|
+
return has_capture and not has_backref
|
|
336
|
+
|
|
337
|
+
def apply(self, pattern: str) -> tuple[str, bool]:
|
|
338
|
+
"""Convert to non-capturing groups."""
|
|
339
|
+
# Only convert groups that aren't followed by quantifiers
|
|
340
|
+
# (quantified groups are more likely to be intentional)
|
|
341
|
+
result = re.sub(r"\((?!\?)([^()]+)\)(?![+*?{])", r"(?:\1)", pattern)
|
|
342
|
+
|
|
343
|
+
return result, True # Matching semantics preserved
|
|
344
|
+
|
|
345
|
+
|
|
346
|
+
class FactorCommonPrefix(BaseTransformation):
|
|
347
|
+
"""Factor out common prefixes from alternations.
|
|
348
|
+
|
|
349
|
+
(abc|abd) → ab(c|d) reduces redundant matching.
|
|
350
|
+
"""
|
|
351
|
+
|
|
352
|
+
name = "factor_common_prefix"
|
|
353
|
+
description = "Factor common prefixes from alternations"
|
|
354
|
+
optimization_type = OptimizationType.FACTOR_COMMON_PREFIX
|
|
355
|
+
|
|
356
|
+
def can_apply(self, pattern: str) -> bool:
|
|
357
|
+
"""Check for alternation."""
|
|
358
|
+
return "|" in pattern
|
|
359
|
+
|
|
360
|
+
def apply(self, pattern: str) -> tuple[str, bool]:
|
|
361
|
+
"""Factor common prefixes."""
|
|
362
|
+
# Find alternation groups
|
|
363
|
+
alt_pattern = re.compile(r"\(([^()]+\|[^()]+)\)")
|
|
364
|
+
|
|
365
|
+
def factor_group(match: re.Match) -> str:
|
|
366
|
+
alternatives = match.group(1).split("|")
|
|
367
|
+
if len(alternatives) < 2:
|
|
368
|
+
return match.group(0)
|
|
369
|
+
|
|
370
|
+
# Find common prefix
|
|
371
|
+
prefix = alternatives[0]
|
|
372
|
+
for alt in alternatives[1:]:
|
|
373
|
+
while prefix and not alt.startswith(prefix):
|
|
374
|
+
prefix = prefix[:-1]
|
|
375
|
+
|
|
376
|
+
if not prefix:
|
|
377
|
+
return match.group(0)
|
|
378
|
+
|
|
379
|
+
# Factor out prefix
|
|
380
|
+
new_alts = [alt[len(prefix):] or "(?:)" for alt in alternatives]
|
|
381
|
+
return f"{prefix}({'|'.join(new_alts)})"
|
|
382
|
+
|
|
383
|
+
result = alt_pattern.sub(factor_group, pattern)
|
|
384
|
+
return result, True
|
|
385
|
+
|
|
386
|
+
|
|
387
|
+
class PatternOptimizer:
|
|
388
|
+
"""Main optimizer for dangerous regex patterns.
|
|
389
|
+
|
|
390
|
+
This optimizer applies a series of transformations to convert
|
|
391
|
+
potentially dangerous regex patterns into safer alternatives.
|
|
392
|
+
|
|
393
|
+
Example:
|
|
394
|
+
optimizer = PatternOptimizer()
|
|
395
|
+
|
|
396
|
+
# Basic optimization
|
|
397
|
+
result = optimizer.optimize(r"(a+)+b")
|
|
398
|
+
print(result.optimized_pattern) # "a+b"
|
|
399
|
+
|
|
400
|
+
# Aggressive optimization (may change semantics)
|
|
401
|
+
result = optimizer.optimize(r".*foo.*", aggressive=True)
|
|
402
|
+
print(result.optimized_pattern) # "^.*foo.*$"
|
|
403
|
+
|
|
404
|
+
# Custom transformation pipeline
|
|
405
|
+
optimizer = PatternOptimizer(
|
|
406
|
+
transformations=[
|
|
407
|
+
FlattenNestedQuantifiers(),
|
|
408
|
+
RemoveRedundantGroups(),
|
|
409
|
+
]
|
|
410
|
+
)
|
|
411
|
+
"""
|
|
412
|
+
|
|
413
|
+
# Default transformations in order of application
|
|
414
|
+
DEFAULT_TRANSFORMATIONS: list[type[BaseTransformation]] = [
|
|
415
|
+
FlattenNestedQuantifiers,
|
|
416
|
+
SimplifyAlternation,
|
|
417
|
+
RemoveRedundantGroups,
|
|
418
|
+
FactorCommonPrefix,
|
|
419
|
+
]
|
|
420
|
+
|
|
421
|
+
# Aggressive transformations (may change semantics)
|
|
422
|
+
AGGRESSIVE_TRANSFORMATIONS: list[type[BaseTransformation]] = [
|
|
423
|
+
LimitQuantifiers,
|
|
424
|
+
AddAnchors,
|
|
425
|
+
]
|
|
426
|
+
|
|
427
|
+
def __init__(
|
|
428
|
+
self,
|
|
429
|
+
transformations: Sequence[BaseTransformation] | None = None,
|
|
430
|
+
config: SafeRegexConfig | None = None,
|
|
431
|
+
max_iterations: int = 10,
|
|
432
|
+
):
|
|
433
|
+
"""Initialize the optimizer.
|
|
434
|
+
|
|
435
|
+
Args:
|
|
436
|
+
transformations: Custom transformations to apply
|
|
437
|
+
config: Safety configuration
|
|
438
|
+
max_iterations: Maximum optimization passes
|
|
439
|
+
"""
|
|
440
|
+
if transformations is not None:
|
|
441
|
+
self._transformations = list(transformations)
|
|
442
|
+
else:
|
|
443
|
+
self._transformations = [t() for t in self.DEFAULT_TRANSFORMATIONS]
|
|
444
|
+
|
|
445
|
+
self._aggressive_transformations = [t() for t in self.AGGRESSIVE_TRANSFORMATIONS]
|
|
446
|
+
self.config = config or SafeRegexConfig()
|
|
447
|
+
self.max_iterations = max_iterations
|
|
448
|
+
self._analyzer = RegexComplexityAnalyzer(self.config)
|
|
449
|
+
|
|
450
|
+
def optimize(
|
|
451
|
+
self,
|
|
452
|
+
pattern: str,
|
|
453
|
+
aggressive: bool = False,
|
|
454
|
+
preserve_semantics: bool = True,
|
|
455
|
+
) -> OptimizationResult:
|
|
456
|
+
"""Optimize a regex pattern.
|
|
457
|
+
|
|
458
|
+
Args:
|
|
459
|
+
pattern: Pattern to optimize
|
|
460
|
+
aggressive: Apply aggressive optimizations
|
|
461
|
+
preserve_semantics: Only apply semantic-preserving transformations
|
|
462
|
+
|
|
463
|
+
Returns:
|
|
464
|
+
OptimizationResult with optimization details
|
|
465
|
+
"""
|
|
466
|
+
# Validate input
|
|
467
|
+
try:
|
|
468
|
+
re.compile(pattern)
|
|
469
|
+
except re.error as e:
|
|
470
|
+
return OptimizationResult(
|
|
471
|
+
original_pattern=pattern,
|
|
472
|
+
optimized_pattern=pattern,
|
|
473
|
+
warnings=[f"Invalid regex: {e}"],
|
|
474
|
+
)
|
|
475
|
+
|
|
476
|
+
# Get initial risk
|
|
477
|
+
initial_analysis = self._analyzer.analyze(pattern)
|
|
478
|
+
risk_before = initial_analysis.risk_level
|
|
479
|
+
|
|
480
|
+
# Apply transformations
|
|
481
|
+
current = pattern
|
|
482
|
+
applied_rules: list[str] = []
|
|
483
|
+
transformations_log: list[dict[str, Any]] = []
|
|
484
|
+
semantics_preserved = True
|
|
485
|
+
|
|
486
|
+
# Get transformations to apply
|
|
487
|
+
all_transforms = list(self._transformations)
|
|
488
|
+
if aggressive:
|
|
489
|
+
all_transforms.extend(self._aggressive_transformations)
|
|
490
|
+
|
|
491
|
+
# Iterative optimization
|
|
492
|
+
for iteration in range(self.max_iterations):
|
|
493
|
+
made_change = False
|
|
494
|
+
|
|
495
|
+
for transform in all_transforms:
|
|
496
|
+
if not transform.can_apply(current):
|
|
497
|
+
continue
|
|
498
|
+
|
|
499
|
+
new_pattern, preserves = transform.apply(current)
|
|
500
|
+
|
|
501
|
+
# Skip if semantics change and we require preservation
|
|
502
|
+
if preserve_semantics and not preserves:
|
|
503
|
+
continue
|
|
504
|
+
|
|
505
|
+
# Validate transformed pattern
|
|
506
|
+
try:
|
|
507
|
+
re.compile(new_pattern)
|
|
508
|
+
except re.error:
|
|
509
|
+
continue # Skip invalid transformation
|
|
510
|
+
|
|
511
|
+
# Check if transformation reduced risk
|
|
512
|
+
new_analysis = self._analyzer.analyze(new_pattern)
|
|
513
|
+
if new_analysis.risk_level.value <= initial_analysis.risk_level.value:
|
|
514
|
+
if new_pattern != current:
|
|
515
|
+
transformations_log.append({
|
|
516
|
+
"iteration": iteration,
|
|
517
|
+
"rule": transform.name,
|
|
518
|
+
"before": current,
|
|
519
|
+
"after": new_pattern,
|
|
520
|
+
"preserves_semantics": preserves,
|
|
521
|
+
})
|
|
522
|
+
current = new_pattern
|
|
523
|
+
applied_rules.append(transform.name)
|
|
524
|
+
made_change = True
|
|
525
|
+
|
|
526
|
+
if not preserves:
|
|
527
|
+
semantics_preserved = False
|
|
528
|
+
|
|
529
|
+
if not made_change:
|
|
530
|
+
break
|
|
531
|
+
|
|
532
|
+
# Get final risk
|
|
533
|
+
final_analysis = self._analyzer.analyze(current)
|
|
534
|
+
risk_after = final_analysis.risk_level
|
|
535
|
+
|
|
536
|
+
# Generate warnings
|
|
537
|
+
warnings: list[str] = []
|
|
538
|
+
if not semantics_preserved:
|
|
539
|
+
warnings.append(
|
|
540
|
+
"Optimization changed matching semantics. "
|
|
541
|
+
"Verify behavior with test cases."
|
|
542
|
+
)
|
|
543
|
+
if risk_after == risk_before and risk_before.value >= ReDoSRisk.HIGH.value:
|
|
544
|
+
warnings.append(
|
|
545
|
+
"Could not reduce risk. Consider rewriting the pattern manually."
|
|
546
|
+
)
|
|
547
|
+
|
|
548
|
+
return OptimizationResult(
|
|
549
|
+
original_pattern=pattern,
|
|
550
|
+
optimized_pattern=current,
|
|
551
|
+
applied_rules=applied_rules,
|
|
552
|
+
risk_before=risk_before,
|
|
553
|
+
risk_after=risk_after,
|
|
554
|
+
semantics_preserved=semantics_preserved,
|
|
555
|
+
warnings=warnings,
|
|
556
|
+
transformations=transformations_log,
|
|
557
|
+
)
|
|
558
|
+
|
|
559
|
+
def add_transformation(self, transformation: BaseTransformation) -> None:
|
|
560
|
+
"""Add a custom transformation.
|
|
561
|
+
|
|
562
|
+
Args:
|
|
563
|
+
transformation: Transformation to add
|
|
564
|
+
"""
|
|
565
|
+
self._transformations.append(transformation)
|
|
566
|
+
|
|
567
|
+
def suggest_alternatives(self, pattern: str, count: int = 3) -> list[str]:
|
|
568
|
+
"""Suggest alternative patterns.
|
|
569
|
+
|
|
570
|
+
Args:
|
|
571
|
+
pattern: Original pattern
|
|
572
|
+
count: Number of alternatives to suggest
|
|
573
|
+
|
|
574
|
+
Returns:
|
|
575
|
+
List of alternative patterns
|
|
576
|
+
"""
|
|
577
|
+
alternatives: list[str] = []
|
|
578
|
+
|
|
579
|
+
# Try different optimization strategies
|
|
580
|
+
for aggressive in [False, True]:
|
|
581
|
+
for preserve in [True, False]:
|
|
582
|
+
result = self.optimize(
|
|
583
|
+
pattern,
|
|
584
|
+
aggressive=aggressive,
|
|
585
|
+
preserve_semantics=preserve,
|
|
586
|
+
)
|
|
587
|
+
if result.was_optimized and result.optimized_pattern not in alternatives:
|
|
588
|
+
alternatives.append(result.optimized_pattern)
|
|
589
|
+
if len(alternatives) >= count:
|
|
590
|
+
return alternatives
|
|
591
|
+
|
|
592
|
+
return alternatives
|
|
593
|
+
|
|
594
|
+
def explain_optimization(self, pattern: str) -> str:
|
|
595
|
+
"""Explain what optimizations would be applied.
|
|
596
|
+
|
|
597
|
+
Args:
|
|
598
|
+
pattern: Pattern to analyze
|
|
599
|
+
|
|
600
|
+
Returns:
|
|
601
|
+
Human-readable explanation
|
|
602
|
+
"""
|
|
603
|
+
lines = [f"Pattern: {pattern}", ""]
|
|
604
|
+
|
|
605
|
+
# List applicable transformations
|
|
606
|
+
applicable = []
|
|
607
|
+
for transform in self._transformations + self._aggressive_transformations:
|
|
608
|
+
if transform.can_apply(pattern):
|
|
609
|
+
applicable.append(transform)
|
|
610
|
+
|
|
611
|
+
if not applicable:
|
|
612
|
+
lines.append("No optimizations applicable.")
|
|
613
|
+
else:
|
|
614
|
+
lines.append("Applicable optimizations:")
|
|
615
|
+
for transform in applicable:
|
|
616
|
+
lines.append(f" - {transform.name}: {transform.description}")
|
|
617
|
+
|
|
618
|
+
# Show optimization result
|
|
619
|
+
result = self.optimize(pattern)
|
|
620
|
+
lines.append("")
|
|
621
|
+
if result.was_optimized:
|
|
622
|
+
lines.append(f"Optimized pattern: {result.optimized_pattern}")
|
|
623
|
+
lines.append(f"Risk reduction: {result.risk_before.name} → {result.risk_after.name}")
|
|
624
|
+
else:
|
|
625
|
+
lines.append("No optimizations applied.")
|
|
626
|
+
|
|
627
|
+
return "\n".join(lines)
|
|
628
|
+
|
|
629
|
+
|
|
630
|
+
# ============================================================================
|
|
631
|
+
# Convenience functions
|
|
632
|
+
# ============================================================================
|
|
633
|
+
|
|
634
|
+
|
|
635
|
+
def optimize_pattern(
|
|
636
|
+
pattern: str,
|
|
637
|
+
aggressive: bool = False,
|
|
638
|
+
preserve_semantics: bool = True,
|
|
639
|
+
) -> OptimizationResult:
|
|
640
|
+
"""Optimize a regex pattern.
|
|
641
|
+
|
|
642
|
+
Args:
|
|
643
|
+
pattern: Pattern to optimize
|
|
644
|
+
aggressive: Apply aggressive optimizations
|
|
645
|
+
preserve_semantics: Only apply semantic-preserving transformations
|
|
646
|
+
|
|
647
|
+
Returns:
|
|
648
|
+
OptimizationResult with optimization details
|
|
649
|
+
|
|
650
|
+
Example:
|
|
651
|
+
result = optimize_pattern(r"(a+)+b")
|
|
652
|
+
print(result.optimized_pattern) # "a+b"
|
|
653
|
+
print(result.risk_reduced) # True
|
|
654
|
+
"""
|
|
655
|
+
optimizer = PatternOptimizer()
|
|
656
|
+
return optimizer.optimize(pattern, aggressive, preserve_semantics)
|
|
657
|
+
|
|
658
|
+
|
|
659
|
+
def suggest_safe_alternatives(pattern: str, count: int = 3) -> list[str]:
|
|
660
|
+
"""Suggest safer alternatives for a pattern.
|
|
661
|
+
|
|
662
|
+
Args:
|
|
663
|
+
pattern: Original pattern
|
|
664
|
+
count: Number of alternatives
|
|
665
|
+
|
|
666
|
+
Returns:
|
|
667
|
+
List of alternative patterns
|
|
668
|
+
|
|
669
|
+
Example:
|
|
670
|
+
alternatives = suggest_safe_alternatives(r"(a+)+")
|
|
671
|
+
# Returns ["a+", "a{1,1000}", ...]
|
|
672
|
+
"""
|
|
673
|
+
optimizer = PatternOptimizer()
|
|
674
|
+
return optimizer.suggest_alternatives(pattern, count)
|