truthound 1.0.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- truthound/__init__.py +162 -0
- truthound/adapters.py +100 -0
- truthound/api.py +365 -0
- truthound/audit/__init__.py +248 -0
- truthound/audit/core.py +967 -0
- truthound/audit/filters.py +620 -0
- truthound/audit/formatters.py +707 -0
- truthound/audit/logger.py +902 -0
- truthound/audit/middleware.py +571 -0
- truthound/audit/storage.py +1083 -0
- truthound/benchmark/__init__.py +123 -0
- truthound/benchmark/base.py +757 -0
- truthound/benchmark/comparison.py +635 -0
- truthound/benchmark/generators.py +706 -0
- truthound/benchmark/reporters.py +718 -0
- truthound/benchmark/runner.py +635 -0
- truthound/benchmark/scenarios.py +712 -0
- truthound/cache.py +252 -0
- truthound/checkpoint/__init__.py +136 -0
- truthound/checkpoint/actions/__init__.py +164 -0
- truthound/checkpoint/actions/base.py +324 -0
- truthound/checkpoint/actions/custom.py +234 -0
- truthound/checkpoint/actions/discord_notify.py +290 -0
- truthound/checkpoint/actions/email_notify.py +405 -0
- truthound/checkpoint/actions/github_action.py +406 -0
- truthound/checkpoint/actions/opsgenie.py +1499 -0
- truthound/checkpoint/actions/pagerduty.py +226 -0
- truthound/checkpoint/actions/slack_notify.py +233 -0
- truthound/checkpoint/actions/store_result.py +249 -0
- truthound/checkpoint/actions/teams_notify.py +1570 -0
- truthound/checkpoint/actions/telegram_notify.py +419 -0
- truthound/checkpoint/actions/update_docs.py +552 -0
- truthound/checkpoint/actions/webhook.py +293 -0
- truthound/checkpoint/analytics/__init__.py +147 -0
- truthound/checkpoint/analytics/aggregations/__init__.py +23 -0
- truthound/checkpoint/analytics/aggregations/rollup.py +481 -0
- truthound/checkpoint/analytics/aggregations/time_bucket.py +306 -0
- truthound/checkpoint/analytics/analyzers/__init__.py +17 -0
- truthound/checkpoint/analytics/analyzers/anomaly.py +386 -0
- truthound/checkpoint/analytics/analyzers/base.py +270 -0
- truthound/checkpoint/analytics/analyzers/forecast.py +421 -0
- truthound/checkpoint/analytics/analyzers/trend.py +314 -0
- truthound/checkpoint/analytics/models.py +292 -0
- truthound/checkpoint/analytics/protocols.py +549 -0
- truthound/checkpoint/analytics/service.py +718 -0
- truthound/checkpoint/analytics/stores/__init__.py +16 -0
- truthound/checkpoint/analytics/stores/base.py +306 -0
- truthound/checkpoint/analytics/stores/memory_store.py +353 -0
- truthound/checkpoint/analytics/stores/sqlite_store.py +557 -0
- truthound/checkpoint/analytics/stores/timescale_store.py +501 -0
- truthound/checkpoint/async_actions.py +794 -0
- truthound/checkpoint/async_base.py +708 -0
- truthound/checkpoint/async_checkpoint.py +617 -0
- truthound/checkpoint/async_runner.py +639 -0
- truthound/checkpoint/checkpoint.py +527 -0
- truthound/checkpoint/ci/__init__.py +61 -0
- truthound/checkpoint/ci/detector.py +355 -0
- truthound/checkpoint/ci/reporter.py +436 -0
- truthound/checkpoint/ci/templates.py +454 -0
- truthound/checkpoint/circuitbreaker/__init__.py +133 -0
- truthound/checkpoint/circuitbreaker/breaker.py +542 -0
- truthound/checkpoint/circuitbreaker/core.py +252 -0
- truthound/checkpoint/circuitbreaker/detection.py +459 -0
- truthound/checkpoint/circuitbreaker/middleware.py +389 -0
- truthound/checkpoint/circuitbreaker/registry.py +357 -0
- truthound/checkpoint/distributed/__init__.py +139 -0
- truthound/checkpoint/distributed/backends/__init__.py +35 -0
- truthound/checkpoint/distributed/backends/celery_backend.py +503 -0
- truthound/checkpoint/distributed/backends/kubernetes_backend.py +696 -0
- truthound/checkpoint/distributed/backends/local_backend.py +397 -0
- truthound/checkpoint/distributed/backends/ray_backend.py +625 -0
- truthound/checkpoint/distributed/base.py +774 -0
- truthound/checkpoint/distributed/orchestrator.py +765 -0
- truthound/checkpoint/distributed/protocols.py +842 -0
- truthound/checkpoint/distributed/registry.py +449 -0
- truthound/checkpoint/idempotency/__init__.py +120 -0
- truthound/checkpoint/idempotency/core.py +295 -0
- truthound/checkpoint/idempotency/fingerprint.py +454 -0
- truthound/checkpoint/idempotency/locking.py +604 -0
- truthound/checkpoint/idempotency/service.py +592 -0
- truthound/checkpoint/idempotency/stores.py +653 -0
- truthound/checkpoint/monitoring/__init__.py +134 -0
- truthound/checkpoint/monitoring/aggregators/__init__.py +15 -0
- truthound/checkpoint/monitoring/aggregators/base.py +372 -0
- truthound/checkpoint/monitoring/aggregators/realtime.py +300 -0
- truthound/checkpoint/monitoring/aggregators/window.py +493 -0
- truthound/checkpoint/monitoring/collectors/__init__.py +17 -0
- truthound/checkpoint/monitoring/collectors/base.py +257 -0
- truthound/checkpoint/monitoring/collectors/memory_collector.py +617 -0
- truthound/checkpoint/monitoring/collectors/prometheus_collector.py +451 -0
- truthound/checkpoint/monitoring/collectors/redis_collector.py +518 -0
- truthound/checkpoint/monitoring/events.py +410 -0
- truthound/checkpoint/monitoring/protocols.py +636 -0
- truthound/checkpoint/monitoring/service.py +578 -0
- truthound/checkpoint/monitoring/views/__init__.py +17 -0
- truthound/checkpoint/monitoring/views/base.py +172 -0
- truthound/checkpoint/monitoring/views/queue_view.py +220 -0
- truthound/checkpoint/monitoring/views/task_view.py +240 -0
- truthound/checkpoint/monitoring/views/worker_view.py +263 -0
- truthound/checkpoint/registry.py +337 -0
- truthound/checkpoint/runner.py +356 -0
- truthound/checkpoint/transaction/__init__.py +133 -0
- truthound/checkpoint/transaction/base.py +389 -0
- truthound/checkpoint/transaction/compensatable.py +537 -0
- truthound/checkpoint/transaction/coordinator.py +576 -0
- truthound/checkpoint/transaction/executor.py +622 -0
- truthound/checkpoint/transaction/idempotency.py +534 -0
- truthound/checkpoint/transaction/saga/__init__.py +143 -0
- truthound/checkpoint/transaction/saga/builder.py +584 -0
- truthound/checkpoint/transaction/saga/definition.py +515 -0
- truthound/checkpoint/transaction/saga/event_store.py +542 -0
- truthound/checkpoint/transaction/saga/patterns.py +833 -0
- truthound/checkpoint/transaction/saga/runner.py +718 -0
- truthound/checkpoint/transaction/saga/state_machine.py +793 -0
- truthound/checkpoint/transaction/saga/strategies.py +780 -0
- truthound/checkpoint/transaction/saga/testing.py +886 -0
- truthound/checkpoint/triggers/__init__.py +58 -0
- truthound/checkpoint/triggers/base.py +237 -0
- truthound/checkpoint/triggers/event.py +385 -0
- truthound/checkpoint/triggers/schedule.py +355 -0
- truthound/cli.py +2358 -0
- truthound/cli_modules/__init__.py +124 -0
- truthound/cli_modules/advanced/__init__.py +45 -0
- truthound/cli_modules/advanced/benchmark.py +343 -0
- truthound/cli_modules/advanced/docs.py +225 -0
- truthound/cli_modules/advanced/lineage.py +209 -0
- truthound/cli_modules/advanced/ml.py +320 -0
- truthound/cli_modules/advanced/realtime.py +196 -0
- truthound/cli_modules/checkpoint/__init__.py +46 -0
- truthound/cli_modules/checkpoint/init.py +114 -0
- truthound/cli_modules/checkpoint/list.py +71 -0
- truthound/cli_modules/checkpoint/run.py +159 -0
- truthound/cli_modules/checkpoint/validate.py +67 -0
- truthound/cli_modules/common/__init__.py +71 -0
- truthound/cli_modules/common/errors.py +414 -0
- truthound/cli_modules/common/options.py +419 -0
- truthound/cli_modules/common/output.py +507 -0
- truthound/cli_modules/common/protocol.py +552 -0
- truthound/cli_modules/core/__init__.py +48 -0
- truthound/cli_modules/core/check.py +123 -0
- truthound/cli_modules/core/compare.py +104 -0
- truthound/cli_modules/core/learn.py +57 -0
- truthound/cli_modules/core/mask.py +77 -0
- truthound/cli_modules/core/profile.py +65 -0
- truthound/cli_modules/core/scan.py +61 -0
- truthound/cli_modules/profiler/__init__.py +51 -0
- truthound/cli_modules/profiler/auto_profile.py +175 -0
- truthound/cli_modules/profiler/metadata.py +107 -0
- truthound/cli_modules/profiler/suite.py +283 -0
- truthound/cli_modules/registry.py +431 -0
- truthound/cli_modules/scaffolding/__init__.py +89 -0
- truthound/cli_modules/scaffolding/base.py +631 -0
- truthound/cli_modules/scaffolding/commands.py +545 -0
- truthound/cli_modules/scaffolding/plugins.py +1072 -0
- truthound/cli_modules/scaffolding/reporters.py +594 -0
- truthound/cli_modules/scaffolding/validators.py +1127 -0
- truthound/common/__init__.py +18 -0
- truthound/common/resilience/__init__.py +130 -0
- truthound/common/resilience/bulkhead.py +266 -0
- truthound/common/resilience/circuit_breaker.py +516 -0
- truthound/common/resilience/composite.py +332 -0
- truthound/common/resilience/config.py +292 -0
- truthound/common/resilience/protocols.py +217 -0
- truthound/common/resilience/rate_limiter.py +404 -0
- truthound/common/resilience/retry.py +341 -0
- truthound/datadocs/__init__.py +260 -0
- truthound/datadocs/base.py +571 -0
- truthound/datadocs/builder.py +761 -0
- truthound/datadocs/charts.py +764 -0
- truthound/datadocs/dashboard/__init__.py +63 -0
- truthound/datadocs/dashboard/app.py +576 -0
- truthound/datadocs/dashboard/components.py +584 -0
- truthound/datadocs/dashboard/state.py +240 -0
- truthound/datadocs/engine/__init__.py +46 -0
- truthound/datadocs/engine/context.py +376 -0
- truthound/datadocs/engine/pipeline.py +618 -0
- truthound/datadocs/engine/registry.py +469 -0
- truthound/datadocs/exporters/__init__.py +49 -0
- truthound/datadocs/exporters/base.py +198 -0
- truthound/datadocs/exporters/html.py +178 -0
- truthound/datadocs/exporters/json_exporter.py +253 -0
- truthound/datadocs/exporters/markdown.py +284 -0
- truthound/datadocs/exporters/pdf.py +392 -0
- truthound/datadocs/i18n/__init__.py +86 -0
- truthound/datadocs/i18n/catalog.py +960 -0
- truthound/datadocs/i18n/formatting.py +505 -0
- truthound/datadocs/i18n/loader.py +256 -0
- truthound/datadocs/i18n/plurals.py +378 -0
- truthound/datadocs/renderers/__init__.py +42 -0
- truthound/datadocs/renderers/base.py +401 -0
- truthound/datadocs/renderers/custom.py +342 -0
- truthound/datadocs/renderers/jinja.py +697 -0
- truthound/datadocs/sections.py +736 -0
- truthound/datadocs/styles.py +931 -0
- truthound/datadocs/themes/__init__.py +101 -0
- truthound/datadocs/themes/base.py +336 -0
- truthound/datadocs/themes/default.py +417 -0
- truthound/datadocs/themes/enterprise.py +419 -0
- truthound/datadocs/themes/loader.py +336 -0
- truthound/datadocs/themes.py +301 -0
- truthound/datadocs/transformers/__init__.py +57 -0
- truthound/datadocs/transformers/base.py +268 -0
- truthound/datadocs/transformers/enrichers.py +544 -0
- truthound/datadocs/transformers/filters.py +447 -0
- truthound/datadocs/transformers/i18n.py +468 -0
- truthound/datadocs/versioning/__init__.py +62 -0
- truthound/datadocs/versioning/diff.py +639 -0
- truthound/datadocs/versioning/storage.py +497 -0
- truthound/datadocs/versioning/version.py +358 -0
- truthound/datasources/__init__.py +223 -0
- truthound/datasources/_async_protocols.py +222 -0
- truthound/datasources/_protocols.py +159 -0
- truthound/datasources/adapters.py +428 -0
- truthound/datasources/async_base.py +599 -0
- truthound/datasources/async_factory.py +511 -0
- truthound/datasources/base.py +516 -0
- truthound/datasources/factory.py +433 -0
- truthound/datasources/nosql/__init__.py +47 -0
- truthound/datasources/nosql/base.py +487 -0
- truthound/datasources/nosql/elasticsearch.py +801 -0
- truthound/datasources/nosql/mongodb.py +636 -0
- truthound/datasources/pandas_optimized.py +582 -0
- truthound/datasources/pandas_source.py +216 -0
- truthound/datasources/polars_source.py +395 -0
- truthound/datasources/spark_source.py +479 -0
- truthound/datasources/sql/__init__.py +154 -0
- truthound/datasources/sql/base.py +710 -0
- truthound/datasources/sql/bigquery.py +410 -0
- truthound/datasources/sql/cloud_base.py +199 -0
- truthound/datasources/sql/databricks.py +471 -0
- truthound/datasources/sql/mysql.py +316 -0
- truthound/datasources/sql/oracle.py +427 -0
- truthound/datasources/sql/postgresql.py +321 -0
- truthound/datasources/sql/redshift.py +479 -0
- truthound/datasources/sql/snowflake.py +439 -0
- truthound/datasources/sql/sqlite.py +286 -0
- truthound/datasources/sql/sqlserver.py +437 -0
- truthound/datasources/streaming/__init__.py +47 -0
- truthound/datasources/streaming/base.py +350 -0
- truthound/datasources/streaming/kafka.py +670 -0
- truthound/decorators.py +98 -0
- truthound/docs/__init__.py +69 -0
- truthound/docs/extractor.py +971 -0
- truthound/docs/generator.py +601 -0
- truthound/docs/parser.py +1037 -0
- truthound/docs/renderer.py +999 -0
- truthound/drift/__init__.py +22 -0
- truthound/drift/compare.py +189 -0
- truthound/drift/detectors.py +464 -0
- truthound/drift/report.py +160 -0
- truthound/execution/__init__.py +65 -0
- truthound/execution/_protocols.py +324 -0
- truthound/execution/base.py +576 -0
- truthound/execution/distributed/__init__.py +179 -0
- truthound/execution/distributed/aggregations.py +731 -0
- truthound/execution/distributed/arrow_bridge.py +817 -0
- truthound/execution/distributed/base.py +550 -0
- truthound/execution/distributed/dask_engine.py +976 -0
- truthound/execution/distributed/mixins.py +766 -0
- truthound/execution/distributed/protocols.py +756 -0
- truthound/execution/distributed/ray_engine.py +1127 -0
- truthound/execution/distributed/registry.py +446 -0
- truthound/execution/distributed/spark_engine.py +1011 -0
- truthound/execution/distributed/validator_adapter.py +682 -0
- truthound/execution/pandas_engine.py +401 -0
- truthound/execution/polars_engine.py +497 -0
- truthound/execution/pushdown/__init__.py +230 -0
- truthound/execution/pushdown/ast.py +1550 -0
- truthound/execution/pushdown/builder.py +1550 -0
- truthound/execution/pushdown/dialects.py +1072 -0
- truthound/execution/pushdown/executor.py +829 -0
- truthound/execution/pushdown/optimizer.py +1041 -0
- truthound/execution/sql_engine.py +518 -0
- truthound/infrastructure/__init__.py +189 -0
- truthound/infrastructure/audit.py +1515 -0
- truthound/infrastructure/config.py +1133 -0
- truthound/infrastructure/encryption.py +1132 -0
- truthound/infrastructure/logging.py +1503 -0
- truthound/infrastructure/metrics.py +1220 -0
- truthound/lineage/__init__.py +89 -0
- truthound/lineage/base.py +746 -0
- truthound/lineage/impact_analysis.py +474 -0
- truthound/lineage/integrations/__init__.py +22 -0
- truthound/lineage/integrations/openlineage.py +548 -0
- truthound/lineage/tracker.py +512 -0
- truthound/lineage/visualization/__init__.py +33 -0
- truthound/lineage/visualization/protocols.py +145 -0
- truthound/lineage/visualization/renderers/__init__.py +20 -0
- truthound/lineage/visualization/renderers/cytoscape.py +329 -0
- truthound/lineage/visualization/renderers/d3.py +331 -0
- truthound/lineage/visualization/renderers/graphviz.py +276 -0
- truthound/lineage/visualization/renderers/mermaid.py +308 -0
- truthound/maskers.py +113 -0
- truthound/ml/__init__.py +124 -0
- truthound/ml/anomaly_models/__init__.py +31 -0
- truthound/ml/anomaly_models/ensemble.py +362 -0
- truthound/ml/anomaly_models/isolation_forest.py +444 -0
- truthound/ml/anomaly_models/statistical.py +392 -0
- truthound/ml/base.py +1178 -0
- truthound/ml/drift_detection/__init__.py +26 -0
- truthound/ml/drift_detection/concept.py +381 -0
- truthound/ml/drift_detection/distribution.py +361 -0
- truthound/ml/drift_detection/feature.py +442 -0
- truthound/ml/drift_detection/multivariate.py +495 -0
- truthound/ml/monitoring/__init__.py +88 -0
- truthound/ml/monitoring/alerting/__init__.py +33 -0
- truthound/ml/monitoring/alerting/handlers.py +427 -0
- truthound/ml/monitoring/alerting/rules.py +508 -0
- truthound/ml/monitoring/collectors/__init__.py +19 -0
- truthound/ml/monitoring/collectors/composite.py +105 -0
- truthound/ml/monitoring/collectors/drift.py +324 -0
- truthound/ml/monitoring/collectors/performance.py +179 -0
- truthound/ml/monitoring/collectors/quality.py +369 -0
- truthound/ml/monitoring/monitor.py +536 -0
- truthound/ml/monitoring/protocols.py +451 -0
- truthound/ml/monitoring/stores/__init__.py +15 -0
- truthound/ml/monitoring/stores/memory.py +201 -0
- truthound/ml/monitoring/stores/prometheus.py +296 -0
- truthound/ml/rule_learning/__init__.py +25 -0
- truthound/ml/rule_learning/constraint_miner.py +443 -0
- truthound/ml/rule_learning/pattern_learner.py +499 -0
- truthound/ml/rule_learning/profile_learner.py +462 -0
- truthound/multitenancy/__init__.py +326 -0
- truthound/multitenancy/core.py +852 -0
- truthound/multitenancy/integration.py +597 -0
- truthound/multitenancy/isolation.py +630 -0
- truthound/multitenancy/manager.py +770 -0
- truthound/multitenancy/middleware.py +765 -0
- truthound/multitenancy/quota.py +537 -0
- truthound/multitenancy/resolvers.py +603 -0
- truthound/multitenancy/storage.py +703 -0
- truthound/observability/__init__.py +307 -0
- truthound/observability/context.py +531 -0
- truthound/observability/instrumentation.py +611 -0
- truthound/observability/logging.py +887 -0
- truthound/observability/metrics.py +1157 -0
- truthound/observability/tracing/__init__.py +178 -0
- truthound/observability/tracing/baggage.py +310 -0
- truthound/observability/tracing/config.py +426 -0
- truthound/observability/tracing/exporter.py +787 -0
- truthound/observability/tracing/integration.py +1018 -0
- truthound/observability/tracing/otel/__init__.py +146 -0
- truthound/observability/tracing/otel/adapter.py +982 -0
- truthound/observability/tracing/otel/bridge.py +1177 -0
- truthound/observability/tracing/otel/compat.py +681 -0
- truthound/observability/tracing/otel/config.py +691 -0
- truthound/observability/tracing/otel/detection.py +327 -0
- truthound/observability/tracing/otel/protocols.py +426 -0
- truthound/observability/tracing/processor.py +561 -0
- truthound/observability/tracing/propagator.py +757 -0
- truthound/observability/tracing/provider.py +569 -0
- truthound/observability/tracing/resource.py +515 -0
- truthound/observability/tracing/sampler.py +487 -0
- truthound/observability/tracing/span.py +676 -0
- truthound/plugins/__init__.py +198 -0
- truthound/plugins/base.py +599 -0
- truthound/plugins/cli.py +680 -0
- truthound/plugins/dependencies/__init__.py +42 -0
- truthound/plugins/dependencies/graph.py +422 -0
- truthound/plugins/dependencies/resolver.py +417 -0
- truthound/plugins/discovery.py +379 -0
- truthound/plugins/docs/__init__.py +46 -0
- truthound/plugins/docs/extractor.py +444 -0
- truthound/plugins/docs/renderer.py +499 -0
- truthound/plugins/enterprise_manager.py +877 -0
- truthound/plugins/examples/__init__.py +19 -0
- truthound/plugins/examples/custom_validators.py +317 -0
- truthound/plugins/examples/slack_notifier.py +312 -0
- truthound/plugins/examples/xml_reporter.py +254 -0
- truthound/plugins/hooks.py +558 -0
- truthound/plugins/lifecycle/__init__.py +43 -0
- truthound/plugins/lifecycle/hot_reload.py +402 -0
- truthound/plugins/lifecycle/manager.py +371 -0
- truthound/plugins/manager.py +736 -0
- truthound/plugins/registry.py +338 -0
- truthound/plugins/security/__init__.py +93 -0
- truthound/plugins/security/exceptions.py +332 -0
- truthound/plugins/security/policies.py +348 -0
- truthound/plugins/security/protocols.py +643 -0
- truthound/plugins/security/sandbox/__init__.py +45 -0
- truthound/plugins/security/sandbox/context.py +158 -0
- truthound/plugins/security/sandbox/engines/__init__.py +19 -0
- truthound/plugins/security/sandbox/engines/container.py +379 -0
- truthound/plugins/security/sandbox/engines/noop.py +144 -0
- truthound/plugins/security/sandbox/engines/process.py +336 -0
- truthound/plugins/security/sandbox/factory.py +211 -0
- truthound/plugins/security/signing/__init__.py +57 -0
- truthound/plugins/security/signing/service.py +330 -0
- truthound/plugins/security/signing/trust_store.py +368 -0
- truthound/plugins/security/signing/verifier.py +459 -0
- truthound/plugins/versioning/__init__.py +41 -0
- truthound/plugins/versioning/constraints.py +297 -0
- truthound/plugins/versioning/resolver.py +329 -0
- truthound/profiler/__init__.py +1729 -0
- truthound/profiler/_lazy.py +452 -0
- truthound/profiler/ab_testing/__init__.py +80 -0
- truthound/profiler/ab_testing/analysis.py +449 -0
- truthound/profiler/ab_testing/base.py +257 -0
- truthound/profiler/ab_testing/experiment.py +395 -0
- truthound/profiler/ab_testing/tracking.py +368 -0
- truthound/profiler/auto_threshold.py +1170 -0
- truthound/profiler/base.py +579 -0
- truthound/profiler/cache_patterns.py +911 -0
- truthound/profiler/caching.py +1303 -0
- truthound/profiler/column_profiler.py +712 -0
- truthound/profiler/comparison.py +1007 -0
- truthound/profiler/custom_patterns.py +1170 -0
- truthound/profiler/dashboard/__init__.py +50 -0
- truthound/profiler/dashboard/app.py +476 -0
- truthound/profiler/dashboard/components.py +457 -0
- truthound/profiler/dashboard/config.py +72 -0
- truthound/profiler/distributed/__init__.py +83 -0
- truthound/profiler/distributed/base.py +281 -0
- truthound/profiler/distributed/dask_backend.py +498 -0
- truthound/profiler/distributed/local_backend.py +293 -0
- truthound/profiler/distributed/profiler.py +304 -0
- truthound/profiler/distributed/ray_backend.py +374 -0
- truthound/profiler/distributed/spark_backend.py +375 -0
- truthound/profiler/distributed.py +1366 -0
- truthound/profiler/enterprise_sampling.py +1065 -0
- truthound/profiler/errors.py +488 -0
- truthound/profiler/evolution/__init__.py +91 -0
- truthound/profiler/evolution/alerts.py +426 -0
- truthound/profiler/evolution/changes.py +206 -0
- truthound/profiler/evolution/compatibility.py +365 -0
- truthound/profiler/evolution/detector.py +372 -0
- truthound/profiler/evolution/protocols.py +121 -0
- truthound/profiler/generators/__init__.py +48 -0
- truthound/profiler/generators/base.py +384 -0
- truthound/profiler/generators/ml_rules.py +375 -0
- truthound/profiler/generators/pattern_rules.py +384 -0
- truthound/profiler/generators/schema_rules.py +267 -0
- truthound/profiler/generators/stats_rules.py +324 -0
- truthound/profiler/generators/suite_generator.py +857 -0
- truthound/profiler/i18n.py +1542 -0
- truthound/profiler/incremental.py +554 -0
- truthound/profiler/incremental_validation.py +1710 -0
- truthound/profiler/integration/__init__.py +73 -0
- truthound/profiler/integration/adapters.py +345 -0
- truthound/profiler/integration/context.py +371 -0
- truthound/profiler/integration/executor.py +527 -0
- truthound/profiler/integration/naming.py +75 -0
- truthound/profiler/integration/protocols.py +243 -0
- truthound/profiler/memory.py +1185 -0
- truthound/profiler/migration/__init__.py +60 -0
- truthound/profiler/migration/base.py +345 -0
- truthound/profiler/migration/manager.py +444 -0
- truthound/profiler/migration/v1_0_to_v1_1.py +484 -0
- truthound/profiler/ml/__init__.py +73 -0
- truthound/profiler/ml/base.py +244 -0
- truthound/profiler/ml/classifier.py +507 -0
- truthound/profiler/ml/feature_extraction.py +604 -0
- truthound/profiler/ml/pretrained.py +448 -0
- truthound/profiler/ml_inference.py +1276 -0
- truthound/profiler/native_patterns.py +815 -0
- truthound/profiler/observability.py +1184 -0
- truthound/profiler/process_timeout.py +1566 -0
- truthound/profiler/progress.py +568 -0
- truthound/profiler/progress_callbacks.py +1734 -0
- truthound/profiler/quality.py +1345 -0
- truthound/profiler/resilience.py +1180 -0
- truthound/profiler/sampled_matcher.py +794 -0
- truthound/profiler/sampling.py +1288 -0
- truthound/profiler/scheduling/__init__.py +82 -0
- truthound/profiler/scheduling/protocols.py +214 -0
- truthound/profiler/scheduling/scheduler.py +474 -0
- truthound/profiler/scheduling/storage.py +457 -0
- truthound/profiler/scheduling/triggers.py +449 -0
- truthound/profiler/schema.py +603 -0
- truthound/profiler/streaming.py +685 -0
- truthound/profiler/streaming_patterns.py +1354 -0
- truthound/profiler/suite_cli.py +625 -0
- truthound/profiler/suite_config.py +789 -0
- truthound/profiler/suite_export.py +1268 -0
- truthound/profiler/table_profiler.py +547 -0
- truthound/profiler/timeout.py +565 -0
- truthound/profiler/validation.py +1532 -0
- truthound/profiler/visualization/__init__.py +118 -0
- truthound/profiler/visualization/base.py +346 -0
- truthound/profiler/visualization/generator.py +1259 -0
- truthound/profiler/visualization/plotly_renderer.py +811 -0
- truthound/profiler/visualization/renderers.py +669 -0
- truthound/profiler/visualization/sections.py +540 -0
- truthound/profiler/visualization.py +2122 -0
- truthound/profiler/yaml_validation.py +1151 -0
- truthound/py.typed +0 -0
- truthound/ratelimit/__init__.py +248 -0
- truthound/ratelimit/algorithms.py +1108 -0
- truthound/ratelimit/core.py +573 -0
- truthound/ratelimit/integration.py +532 -0
- truthound/ratelimit/limiter.py +663 -0
- truthound/ratelimit/middleware.py +700 -0
- truthound/ratelimit/policy.py +792 -0
- truthound/ratelimit/storage.py +763 -0
- truthound/rbac/__init__.py +340 -0
- truthound/rbac/core.py +976 -0
- truthound/rbac/integration.py +760 -0
- truthound/rbac/manager.py +1052 -0
- truthound/rbac/middleware.py +842 -0
- truthound/rbac/policy.py +954 -0
- truthound/rbac/storage.py +878 -0
- truthound/realtime/__init__.py +141 -0
- truthound/realtime/adapters/__init__.py +43 -0
- truthound/realtime/adapters/base.py +533 -0
- truthound/realtime/adapters/kafka.py +487 -0
- truthound/realtime/adapters/kinesis.py +479 -0
- truthound/realtime/adapters/mock.py +243 -0
- truthound/realtime/base.py +553 -0
- truthound/realtime/factory.py +382 -0
- truthound/realtime/incremental.py +660 -0
- truthound/realtime/processing/__init__.py +67 -0
- truthound/realtime/processing/exactly_once.py +575 -0
- truthound/realtime/processing/state.py +547 -0
- truthound/realtime/processing/windows.py +647 -0
- truthound/realtime/protocols.py +569 -0
- truthound/realtime/streaming.py +605 -0
- truthound/realtime/testing/__init__.py +32 -0
- truthound/realtime/testing/containers.py +615 -0
- truthound/realtime/testing/fixtures.py +484 -0
- truthound/report.py +280 -0
- truthound/reporters/__init__.py +46 -0
- truthound/reporters/_protocols.py +30 -0
- truthound/reporters/base.py +324 -0
- truthound/reporters/ci/__init__.py +66 -0
- truthound/reporters/ci/azure.py +436 -0
- truthound/reporters/ci/base.py +509 -0
- truthound/reporters/ci/bitbucket.py +567 -0
- truthound/reporters/ci/circleci.py +547 -0
- truthound/reporters/ci/detection.py +364 -0
- truthound/reporters/ci/factory.py +182 -0
- truthound/reporters/ci/github.py +388 -0
- truthound/reporters/ci/gitlab.py +471 -0
- truthound/reporters/ci/jenkins.py +525 -0
- truthound/reporters/console_reporter.py +299 -0
- truthound/reporters/factory.py +211 -0
- truthound/reporters/html_reporter.py +524 -0
- truthound/reporters/json_reporter.py +256 -0
- truthound/reporters/markdown_reporter.py +280 -0
- truthound/reporters/sdk/__init__.py +174 -0
- truthound/reporters/sdk/builder.py +558 -0
- truthound/reporters/sdk/mixins.py +1150 -0
- truthound/reporters/sdk/schema.py +1493 -0
- truthound/reporters/sdk/templates.py +666 -0
- truthound/reporters/sdk/testing.py +968 -0
- truthound/scanners.py +170 -0
- truthound/scheduling/__init__.py +122 -0
- truthound/scheduling/cron.py +1136 -0
- truthound/scheduling/presets.py +212 -0
- truthound/schema.py +275 -0
- truthound/secrets/__init__.py +173 -0
- truthound/secrets/base.py +618 -0
- truthound/secrets/cloud.py +682 -0
- truthound/secrets/integration.py +507 -0
- truthound/secrets/manager.py +633 -0
- truthound/secrets/oidc/__init__.py +172 -0
- truthound/secrets/oidc/base.py +902 -0
- truthound/secrets/oidc/credential_provider.py +623 -0
- truthound/secrets/oidc/exchangers.py +1001 -0
- truthound/secrets/oidc/github/__init__.py +110 -0
- truthound/secrets/oidc/github/claims.py +718 -0
- truthound/secrets/oidc/github/enhanced_provider.py +693 -0
- truthound/secrets/oidc/github/trust_policy.py +742 -0
- truthound/secrets/oidc/github/verification.py +723 -0
- truthound/secrets/oidc/github/workflow.py +691 -0
- truthound/secrets/oidc/providers.py +825 -0
- truthound/secrets/providers.py +506 -0
- truthound/secrets/resolver.py +495 -0
- truthound/stores/__init__.py +177 -0
- truthound/stores/backends/__init__.py +18 -0
- truthound/stores/backends/_protocols.py +340 -0
- truthound/stores/backends/azure_blob.py +530 -0
- truthound/stores/backends/concurrent_filesystem.py +915 -0
- truthound/stores/backends/connection_pool.py +1365 -0
- truthound/stores/backends/database.py +743 -0
- truthound/stores/backends/filesystem.py +538 -0
- truthound/stores/backends/gcs.py +399 -0
- truthound/stores/backends/memory.py +354 -0
- truthound/stores/backends/s3.py +434 -0
- truthound/stores/backpressure/__init__.py +84 -0
- truthound/stores/backpressure/base.py +375 -0
- truthound/stores/backpressure/circuit_breaker.py +434 -0
- truthound/stores/backpressure/monitor.py +376 -0
- truthound/stores/backpressure/strategies.py +677 -0
- truthound/stores/base.py +551 -0
- truthound/stores/batching/__init__.py +65 -0
- truthound/stores/batching/base.py +305 -0
- truthound/stores/batching/buffer.py +370 -0
- truthound/stores/batching/store.py +248 -0
- truthound/stores/batching/writer.py +521 -0
- truthound/stores/caching/__init__.py +60 -0
- truthound/stores/caching/backends.py +684 -0
- truthound/stores/caching/base.py +356 -0
- truthound/stores/caching/store.py +305 -0
- truthound/stores/compression/__init__.py +193 -0
- truthound/stores/compression/adaptive.py +694 -0
- truthound/stores/compression/base.py +514 -0
- truthound/stores/compression/pipeline.py +868 -0
- truthound/stores/compression/providers.py +672 -0
- truthound/stores/compression/streaming.py +832 -0
- truthound/stores/concurrency/__init__.py +81 -0
- truthound/stores/concurrency/atomic.py +556 -0
- truthound/stores/concurrency/index.py +775 -0
- truthound/stores/concurrency/locks.py +576 -0
- truthound/stores/concurrency/manager.py +482 -0
- truthound/stores/encryption/__init__.py +297 -0
- truthound/stores/encryption/base.py +952 -0
- truthound/stores/encryption/keys.py +1191 -0
- truthound/stores/encryption/pipeline.py +903 -0
- truthound/stores/encryption/providers.py +953 -0
- truthound/stores/encryption/streaming.py +950 -0
- truthound/stores/expectations.py +227 -0
- truthound/stores/factory.py +246 -0
- truthound/stores/migration/__init__.py +75 -0
- truthound/stores/migration/base.py +480 -0
- truthound/stores/migration/manager.py +347 -0
- truthound/stores/migration/registry.py +382 -0
- truthound/stores/migration/store.py +559 -0
- truthound/stores/observability/__init__.py +106 -0
- truthound/stores/observability/audit.py +718 -0
- truthound/stores/observability/config.py +270 -0
- truthound/stores/observability/factory.py +208 -0
- truthound/stores/observability/metrics.py +636 -0
- truthound/stores/observability/protocols.py +410 -0
- truthound/stores/observability/store.py +570 -0
- truthound/stores/observability/tracing.py +784 -0
- truthound/stores/replication/__init__.py +76 -0
- truthound/stores/replication/base.py +260 -0
- truthound/stores/replication/monitor.py +269 -0
- truthound/stores/replication/store.py +439 -0
- truthound/stores/replication/syncer.py +391 -0
- truthound/stores/results.py +359 -0
- truthound/stores/retention/__init__.py +77 -0
- truthound/stores/retention/base.py +378 -0
- truthound/stores/retention/policies.py +621 -0
- truthound/stores/retention/scheduler.py +279 -0
- truthound/stores/retention/store.py +526 -0
- truthound/stores/streaming/__init__.py +138 -0
- truthound/stores/streaming/base.py +801 -0
- truthound/stores/streaming/database.py +984 -0
- truthound/stores/streaming/filesystem.py +719 -0
- truthound/stores/streaming/reader.py +629 -0
- truthound/stores/streaming/s3.py +843 -0
- truthound/stores/streaming/writer.py +790 -0
- truthound/stores/tiering/__init__.py +108 -0
- truthound/stores/tiering/base.py +462 -0
- truthound/stores/tiering/manager.py +249 -0
- truthound/stores/tiering/policies.py +692 -0
- truthound/stores/tiering/store.py +526 -0
- truthound/stores/versioning/__init__.py +56 -0
- truthound/stores/versioning/base.py +376 -0
- truthound/stores/versioning/store.py +660 -0
- truthound/stores/versioning/strategies.py +353 -0
- truthound/types.py +56 -0
- truthound/validators/__init__.py +774 -0
- truthound/validators/aggregate/__init__.py +27 -0
- truthound/validators/aggregate/central.py +116 -0
- truthound/validators/aggregate/extremes.py +116 -0
- truthound/validators/aggregate/spread.py +118 -0
- truthound/validators/aggregate/sum.py +64 -0
- truthound/validators/aggregate/type.py +78 -0
- truthound/validators/anomaly/__init__.py +93 -0
- truthound/validators/anomaly/base.py +431 -0
- truthound/validators/anomaly/ml_based.py +1190 -0
- truthound/validators/anomaly/multivariate.py +647 -0
- truthound/validators/anomaly/statistical.py +599 -0
- truthound/validators/base.py +1089 -0
- truthound/validators/business_rule/__init__.py +46 -0
- truthound/validators/business_rule/base.py +147 -0
- truthound/validators/business_rule/checksum.py +509 -0
- truthound/validators/business_rule/financial.py +526 -0
- truthound/validators/cache.py +733 -0
- truthound/validators/completeness/__init__.py +39 -0
- truthound/validators/completeness/conditional.py +73 -0
- truthound/validators/completeness/default.py +98 -0
- truthound/validators/completeness/empty.py +103 -0
- truthound/validators/completeness/nan.py +337 -0
- truthound/validators/completeness/null.py +152 -0
- truthound/validators/cross_table/__init__.py +17 -0
- truthound/validators/cross_table/aggregate.py +333 -0
- truthound/validators/cross_table/row_count.py +122 -0
- truthound/validators/datetime/__init__.py +29 -0
- truthound/validators/datetime/format.py +78 -0
- truthound/validators/datetime/freshness.py +269 -0
- truthound/validators/datetime/order.py +73 -0
- truthound/validators/datetime/parseable.py +185 -0
- truthound/validators/datetime/range.py +202 -0
- truthound/validators/datetime/timezone.py +69 -0
- truthound/validators/distribution/__init__.py +49 -0
- truthound/validators/distribution/distribution.py +128 -0
- truthound/validators/distribution/monotonic.py +119 -0
- truthound/validators/distribution/outlier.py +178 -0
- truthound/validators/distribution/quantile.py +80 -0
- truthound/validators/distribution/range.py +254 -0
- truthound/validators/distribution/set.py +125 -0
- truthound/validators/distribution/statistical.py +459 -0
- truthound/validators/drift/__init__.py +79 -0
- truthound/validators/drift/base.py +427 -0
- truthound/validators/drift/multi_feature.py +401 -0
- truthound/validators/drift/numeric.py +395 -0
- truthound/validators/drift/psi.py +446 -0
- truthound/validators/drift/statistical.py +510 -0
- truthound/validators/enterprise.py +1658 -0
- truthound/validators/geospatial/__init__.py +80 -0
- truthound/validators/geospatial/base.py +97 -0
- truthound/validators/geospatial/boundary.py +238 -0
- truthound/validators/geospatial/coordinate.py +351 -0
- truthound/validators/geospatial/distance.py +399 -0
- truthound/validators/geospatial/polygon.py +665 -0
- truthound/validators/i18n/__init__.py +308 -0
- truthound/validators/i18n/bidi.py +571 -0
- truthound/validators/i18n/catalogs.py +570 -0
- truthound/validators/i18n/dialects.py +763 -0
- truthound/validators/i18n/extended_catalogs.py +549 -0
- truthound/validators/i18n/formatting.py +1434 -0
- truthound/validators/i18n/loader.py +1020 -0
- truthound/validators/i18n/messages.py +521 -0
- truthound/validators/i18n/plural.py +683 -0
- truthound/validators/i18n/protocols.py +855 -0
- truthound/validators/i18n/tms.py +1162 -0
- truthound/validators/localization/__init__.py +53 -0
- truthound/validators/localization/base.py +122 -0
- truthound/validators/localization/chinese.py +362 -0
- truthound/validators/localization/japanese.py +275 -0
- truthound/validators/localization/korean.py +524 -0
- truthound/validators/memory/__init__.py +94 -0
- truthound/validators/memory/approximate_knn.py +506 -0
- truthound/validators/memory/base.py +547 -0
- truthound/validators/memory/sgd_online.py +719 -0
- truthound/validators/memory/streaming_ecdf.py +753 -0
- truthound/validators/ml_feature/__init__.py +54 -0
- truthound/validators/ml_feature/base.py +249 -0
- truthound/validators/ml_feature/correlation.py +299 -0
- truthound/validators/ml_feature/leakage.py +344 -0
- truthound/validators/ml_feature/null_impact.py +270 -0
- truthound/validators/ml_feature/scale.py +264 -0
- truthound/validators/multi_column/__init__.py +89 -0
- truthound/validators/multi_column/arithmetic.py +284 -0
- truthound/validators/multi_column/base.py +231 -0
- truthound/validators/multi_column/comparison.py +273 -0
- truthound/validators/multi_column/consistency.py +312 -0
- truthound/validators/multi_column/statistical.py +299 -0
- truthound/validators/optimization/__init__.py +164 -0
- truthound/validators/optimization/aggregation.py +563 -0
- truthound/validators/optimization/covariance.py +556 -0
- truthound/validators/optimization/geo.py +626 -0
- truthound/validators/optimization/graph.py +587 -0
- truthound/validators/optimization/orchestrator.py +970 -0
- truthound/validators/optimization/profiling.py +1312 -0
- truthound/validators/privacy/__init__.py +223 -0
- truthound/validators/privacy/base.py +635 -0
- truthound/validators/privacy/ccpa.py +670 -0
- truthound/validators/privacy/gdpr.py +728 -0
- truthound/validators/privacy/global_patterns.py +604 -0
- truthound/validators/privacy/plugins.py +867 -0
- truthound/validators/profiling/__init__.py +52 -0
- truthound/validators/profiling/base.py +175 -0
- truthound/validators/profiling/cardinality.py +312 -0
- truthound/validators/profiling/entropy.py +391 -0
- truthound/validators/profiling/frequency.py +455 -0
- truthound/validators/pushdown_support.py +660 -0
- truthound/validators/query/__init__.py +91 -0
- truthound/validators/query/aggregate.py +346 -0
- truthound/validators/query/base.py +246 -0
- truthound/validators/query/column.py +249 -0
- truthound/validators/query/expression.py +274 -0
- truthound/validators/query/result.py +323 -0
- truthound/validators/query/row_count.py +264 -0
- truthound/validators/referential/__init__.py +80 -0
- truthound/validators/referential/base.py +395 -0
- truthound/validators/referential/cascade.py +391 -0
- truthound/validators/referential/circular.py +563 -0
- truthound/validators/referential/foreign_key.py +624 -0
- truthound/validators/referential/orphan.py +485 -0
- truthound/validators/registry.py +112 -0
- truthound/validators/schema/__init__.py +41 -0
- truthound/validators/schema/column_count.py +142 -0
- truthound/validators/schema/column_exists.py +80 -0
- truthound/validators/schema/column_order.py +82 -0
- truthound/validators/schema/column_pair.py +85 -0
- truthound/validators/schema/column_pair_set.py +195 -0
- truthound/validators/schema/column_type.py +94 -0
- truthound/validators/schema/multi_column.py +53 -0
- truthound/validators/schema/multi_column_aggregate.py +175 -0
- truthound/validators/schema/referential.py +274 -0
- truthound/validators/schema/table_schema.py +91 -0
- truthound/validators/schema_validator.py +219 -0
- truthound/validators/sdk/__init__.py +250 -0
- truthound/validators/sdk/builder.py +680 -0
- truthound/validators/sdk/decorators.py +474 -0
- truthound/validators/sdk/enterprise/__init__.py +211 -0
- truthound/validators/sdk/enterprise/docs.py +725 -0
- truthound/validators/sdk/enterprise/fuzzing.py +659 -0
- truthound/validators/sdk/enterprise/licensing.py +709 -0
- truthound/validators/sdk/enterprise/manager.py +543 -0
- truthound/validators/sdk/enterprise/resources.py +628 -0
- truthound/validators/sdk/enterprise/sandbox.py +766 -0
- truthound/validators/sdk/enterprise/signing.py +603 -0
- truthound/validators/sdk/enterprise/templates.py +865 -0
- truthound/validators/sdk/enterprise/versioning.py +659 -0
- truthound/validators/sdk/templates.py +757 -0
- truthound/validators/sdk/testing.py +807 -0
- truthound/validators/security/__init__.py +181 -0
- truthound/validators/security/redos/__init__.py +182 -0
- truthound/validators/security/redos/core.py +861 -0
- truthound/validators/security/redos/cpu_monitor.py +593 -0
- truthound/validators/security/redos/cve_database.py +791 -0
- truthound/validators/security/redos/ml/__init__.py +155 -0
- truthound/validators/security/redos/ml/base.py +785 -0
- truthound/validators/security/redos/ml/datasets.py +618 -0
- truthound/validators/security/redos/ml/features.py +359 -0
- truthound/validators/security/redos/ml/models.py +1000 -0
- truthound/validators/security/redos/ml/predictor.py +507 -0
- truthound/validators/security/redos/ml/storage.py +632 -0
- truthound/validators/security/redos/ml/training.py +571 -0
- truthound/validators/security/redos/ml_analyzer.py +937 -0
- truthound/validators/security/redos/optimizer.py +674 -0
- truthound/validators/security/redos/profiler.py +682 -0
- truthound/validators/security/redos/re2_engine.py +709 -0
- truthound/validators/security/redos.py +886 -0
- truthound/validators/security/sql_security.py +1247 -0
- truthound/validators/streaming/__init__.py +126 -0
- truthound/validators/streaming/base.py +292 -0
- truthound/validators/streaming/completeness.py +210 -0
- truthound/validators/streaming/mixin.py +575 -0
- truthound/validators/streaming/range.py +308 -0
- truthound/validators/streaming/sources.py +846 -0
- truthound/validators/string/__init__.py +57 -0
- truthound/validators/string/casing.py +158 -0
- truthound/validators/string/charset.py +96 -0
- truthound/validators/string/format.py +501 -0
- truthound/validators/string/json.py +77 -0
- truthound/validators/string/json_schema.py +184 -0
- truthound/validators/string/length.py +104 -0
- truthound/validators/string/like_pattern.py +237 -0
- truthound/validators/string/regex.py +202 -0
- truthound/validators/string/regex_extended.py +435 -0
- truthound/validators/table/__init__.py +88 -0
- truthound/validators/table/base.py +78 -0
- truthound/validators/table/column_count.py +198 -0
- truthound/validators/table/freshness.py +362 -0
- truthound/validators/table/row_count.py +251 -0
- truthound/validators/table/schema.py +333 -0
- truthound/validators/table/size.py +285 -0
- truthound/validators/timeout/__init__.py +102 -0
- truthound/validators/timeout/advanced/__init__.py +247 -0
- truthound/validators/timeout/advanced/circuit_breaker.py +675 -0
- truthound/validators/timeout/advanced/prediction.py +773 -0
- truthound/validators/timeout/advanced/priority.py +618 -0
- truthound/validators/timeout/advanced/redis_backend.py +770 -0
- truthound/validators/timeout/advanced/retry.py +721 -0
- truthound/validators/timeout/advanced/sampling.py +788 -0
- truthound/validators/timeout/advanced/sla.py +661 -0
- truthound/validators/timeout/advanced/telemetry.py +804 -0
- truthound/validators/timeout/cascade.py +477 -0
- truthound/validators/timeout/deadline.py +657 -0
- truthound/validators/timeout/degradation.py +525 -0
- truthound/validators/timeout/distributed.py +597 -0
- truthound/validators/timeseries/__init__.py +89 -0
- truthound/validators/timeseries/base.py +326 -0
- truthound/validators/timeseries/completeness.py +617 -0
- truthound/validators/timeseries/gap.py +485 -0
- truthound/validators/timeseries/monotonic.py +310 -0
- truthound/validators/timeseries/seasonality.py +422 -0
- truthound/validators/timeseries/trend.py +510 -0
- truthound/validators/uniqueness/__init__.py +59 -0
- truthound/validators/uniqueness/approximate.py +475 -0
- truthound/validators/uniqueness/distinct_values.py +253 -0
- truthound/validators/uniqueness/duplicate.py +118 -0
- truthound/validators/uniqueness/primary_key.py +140 -0
- truthound/validators/uniqueness/unique.py +191 -0
- truthound/validators/uniqueness/within_record.py +599 -0
- truthound/validators/utils.py +756 -0
- truthound-1.0.8.dist-info/METADATA +474 -0
- truthound-1.0.8.dist-info/RECORD +877 -0
- truthound-1.0.8.dist-info/WHEEL +4 -0
- truthound-1.0.8.dist-info/entry_points.txt +2 -0
- truthound-1.0.8.dist-info/licenses/LICENSE +190 -0
|
@@ -0,0 +1,785 @@
|
|
|
1
|
+
"""Base abstractions for ReDoS ML framework.
|
|
2
|
+
|
|
3
|
+
This module defines the core protocols, data classes, and base types used
|
|
4
|
+
throughout the ML framework. It follows the principle of dependency inversion
|
|
5
|
+
by defining abstractions that concrete implementations depend on.
|
|
6
|
+
|
|
7
|
+
Design Principles:
|
|
8
|
+
- Protocol-based design for loose coupling
|
|
9
|
+
- Immutable data classes for thread safety
|
|
10
|
+
- Clear separation of concerns
|
|
11
|
+
- Extensibility through composition
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from __future__ import annotations
|
|
15
|
+
|
|
16
|
+
import json
|
|
17
|
+
from abc import ABC, abstractmethod
|
|
18
|
+
from dataclasses import dataclass, field
|
|
19
|
+
from datetime import datetime
|
|
20
|
+
from enum import Enum
|
|
21
|
+
from pathlib import Path
|
|
22
|
+
from typing import Any, Dict, List, Optional, Protocol, Sequence, Tuple, runtime_checkable
|
|
23
|
+
|
|
24
|
+
from truthound.validators.security.redos.core import ReDoSRisk
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
# =============================================================================
|
|
28
|
+
# Enums
|
|
29
|
+
# =============================================================================
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class ModelType(str, Enum):
|
|
33
|
+
"""Supported model types for ReDoS prediction."""
|
|
34
|
+
|
|
35
|
+
RULE_BASED = "rule_based"
|
|
36
|
+
RANDOM_FOREST = "random_forest"
|
|
37
|
+
GRADIENT_BOOSTING = "gradient_boosting"
|
|
38
|
+
LOGISTIC_REGRESSION = "logistic_regression"
|
|
39
|
+
SVM = "svm"
|
|
40
|
+
NEURAL_NETWORK = "neural_network"
|
|
41
|
+
ENSEMBLE = "ensemble"
|
|
42
|
+
|
|
43
|
+
def __str__(self) -> str:
|
|
44
|
+
return self.value
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
# =============================================================================
|
|
48
|
+
# Data Classes
|
|
49
|
+
# =============================================================================
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
@dataclass(frozen=True)
|
|
53
|
+
class PatternFeatures:
|
|
54
|
+
"""Immutable container for extracted pattern features.
|
|
55
|
+
|
|
56
|
+
This class holds all features extracted from a regex pattern for ML
|
|
57
|
+
prediction. Features are categorized into structural, quantifier,
|
|
58
|
+
dangerous pattern indicators, and complexity metrics.
|
|
59
|
+
|
|
60
|
+
Attributes:
|
|
61
|
+
length: Total length of the pattern string
|
|
62
|
+
group_count: Total number of groups (capture + non-capture)
|
|
63
|
+
capture_group_count: Number of capturing groups
|
|
64
|
+
non_capture_group_count: Number of non-capturing groups
|
|
65
|
+
max_nesting_depth: Maximum depth of nested parentheses
|
|
66
|
+
alternation_count: Number of alternation operators (|)
|
|
67
|
+
plus_count: Number of + quantifiers
|
|
68
|
+
star_count: Number of * quantifiers
|
|
69
|
+
question_count: Number of ? quantifiers (non-lookahead)
|
|
70
|
+
bounded_quantifier_count: Number of {n,m} quantifiers
|
|
71
|
+
unbounded_quantifier_count: Number of unbounded quantifiers (+, *, {n,})
|
|
72
|
+
lazy_quantifier_count: Number of lazy quantifiers (+?, *?, etc.)
|
|
73
|
+
possessive_quantifier_count: Number of possessive quantifiers
|
|
74
|
+
quantifier_density: Ratio of quantifiers to pattern length
|
|
75
|
+
nested_quantifier_count: Number of nested quantifier patterns
|
|
76
|
+
adjacent_quantifier_count: Number of adjacent quantifiers
|
|
77
|
+
quantified_alternation_count: Number of alternations with quantifiers
|
|
78
|
+
quantified_backreference_count: Number of backreferences with quantifiers
|
|
79
|
+
char_class_count: Number of character classes []
|
|
80
|
+
negated_char_class_count: Number of negated character classes [^]
|
|
81
|
+
dot_count: Number of dot metacharacters
|
|
82
|
+
word_boundary_count: Number of word boundary assertions
|
|
83
|
+
lookahead_count: Number of lookahead assertions
|
|
84
|
+
lookbehind_count: Number of lookbehind assertions
|
|
85
|
+
negative_lookaround_count: Number of negative lookaround assertions
|
|
86
|
+
backreference_count: Number of backreferences
|
|
87
|
+
max_backreference_index: Highest backreference index used
|
|
88
|
+
start_anchor: Whether pattern starts with ^ or \\A
|
|
89
|
+
end_anchor: Whether pattern ends with $ or \\Z
|
|
90
|
+
anchored: Whether pattern is fully anchored (both ends)
|
|
91
|
+
backtracking_potential: Estimated backtracking risk score (0-100)
|
|
92
|
+
estimated_states: Estimated number of NFA states
|
|
93
|
+
"""
|
|
94
|
+
|
|
95
|
+
# Structural features
|
|
96
|
+
length: int = 0
|
|
97
|
+
group_count: int = 0
|
|
98
|
+
capture_group_count: int = 0
|
|
99
|
+
non_capture_group_count: int = 0
|
|
100
|
+
max_nesting_depth: int = 0
|
|
101
|
+
alternation_count: int = 0
|
|
102
|
+
|
|
103
|
+
# Quantifier features
|
|
104
|
+
plus_count: int = 0
|
|
105
|
+
star_count: int = 0
|
|
106
|
+
question_count: int = 0
|
|
107
|
+
bounded_quantifier_count: int = 0
|
|
108
|
+
unbounded_quantifier_count: int = 0
|
|
109
|
+
lazy_quantifier_count: int = 0
|
|
110
|
+
possessive_quantifier_count: int = 0
|
|
111
|
+
quantifier_density: float = 0.0
|
|
112
|
+
|
|
113
|
+
# Dangerous pattern indicators
|
|
114
|
+
nested_quantifier_count: int = 0
|
|
115
|
+
adjacent_quantifier_count: int = 0
|
|
116
|
+
quantified_alternation_count: int = 0
|
|
117
|
+
quantified_backreference_count: int = 0
|
|
118
|
+
|
|
119
|
+
# Character class features
|
|
120
|
+
char_class_count: int = 0
|
|
121
|
+
negated_char_class_count: int = 0
|
|
122
|
+
dot_count: int = 0
|
|
123
|
+
word_boundary_count: int = 0
|
|
124
|
+
|
|
125
|
+
# Lookaround features
|
|
126
|
+
lookahead_count: int = 0
|
|
127
|
+
lookbehind_count: int = 0
|
|
128
|
+
negative_lookaround_count: int = 0
|
|
129
|
+
|
|
130
|
+
# Backreference features
|
|
131
|
+
backreference_count: int = 0
|
|
132
|
+
max_backreference_index: int = 0
|
|
133
|
+
|
|
134
|
+
# Anchor features
|
|
135
|
+
start_anchor: bool = False
|
|
136
|
+
end_anchor: bool = False
|
|
137
|
+
anchored: bool = False
|
|
138
|
+
|
|
139
|
+
# Complexity metrics
|
|
140
|
+
backtracking_potential: float = 0.0
|
|
141
|
+
estimated_states: int = 0
|
|
142
|
+
|
|
143
|
+
def to_vector(self) -> List[float]:
|
|
144
|
+
"""Convert features to a numeric vector for ML models.
|
|
145
|
+
|
|
146
|
+
Returns:
|
|
147
|
+
List of float values in consistent order.
|
|
148
|
+
"""
|
|
149
|
+
return [
|
|
150
|
+
float(self.length),
|
|
151
|
+
float(self.group_count),
|
|
152
|
+
float(self.capture_group_count),
|
|
153
|
+
float(self.non_capture_group_count),
|
|
154
|
+
float(self.max_nesting_depth),
|
|
155
|
+
float(self.alternation_count),
|
|
156
|
+
float(self.plus_count),
|
|
157
|
+
float(self.star_count),
|
|
158
|
+
float(self.question_count),
|
|
159
|
+
float(self.bounded_quantifier_count),
|
|
160
|
+
float(self.unbounded_quantifier_count),
|
|
161
|
+
float(self.lazy_quantifier_count),
|
|
162
|
+
float(self.possessive_quantifier_count),
|
|
163
|
+
float(self.quantifier_density),
|
|
164
|
+
float(self.nested_quantifier_count),
|
|
165
|
+
float(self.adjacent_quantifier_count),
|
|
166
|
+
float(self.quantified_alternation_count),
|
|
167
|
+
float(self.quantified_backreference_count),
|
|
168
|
+
float(self.char_class_count),
|
|
169
|
+
float(self.negated_char_class_count),
|
|
170
|
+
float(self.dot_count),
|
|
171
|
+
float(self.word_boundary_count),
|
|
172
|
+
float(self.lookahead_count),
|
|
173
|
+
float(self.lookbehind_count),
|
|
174
|
+
float(self.negative_lookaround_count),
|
|
175
|
+
float(self.backreference_count),
|
|
176
|
+
float(self.max_backreference_index),
|
|
177
|
+
float(self.start_anchor),
|
|
178
|
+
float(self.end_anchor),
|
|
179
|
+
float(self.anchored),
|
|
180
|
+
float(self.backtracking_potential),
|
|
181
|
+
float(self.estimated_states),
|
|
182
|
+
]
|
|
183
|
+
|
|
184
|
+
@classmethod
|
|
185
|
+
def feature_names(cls) -> List[str]:
|
|
186
|
+
"""Get names of all features in vector order.
|
|
187
|
+
|
|
188
|
+
Returns:
|
|
189
|
+
List of feature names matching to_vector() order.
|
|
190
|
+
"""
|
|
191
|
+
return [
|
|
192
|
+
"length",
|
|
193
|
+
"group_count",
|
|
194
|
+
"capture_group_count",
|
|
195
|
+
"non_capture_group_count",
|
|
196
|
+
"max_nesting_depth",
|
|
197
|
+
"alternation_count",
|
|
198
|
+
"plus_count",
|
|
199
|
+
"star_count",
|
|
200
|
+
"question_count",
|
|
201
|
+
"bounded_quantifier_count",
|
|
202
|
+
"unbounded_quantifier_count",
|
|
203
|
+
"lazy_quantifier_count",
|
|
204
|
+
"possessive_quantifier_count",
|
|
205
|
+
"quantifier_density",
|
|
206
|
+
"nested_quantifier_count",
|
|
207
|
+
"adjacent_quantifier_count",
|
|
208
|
+
"quantified_alternation_count",
|
|
209
|
+
"quantified_backreference_count",
|
|
210
|
+
"char_class_count",
|
|
211
|
+
"negated_char_class_count",
|
|
212
|
+
"dot_count",
|
|
213
|
+
"word_boundary_count",
|
|
214
|
+
"lookahead_count",
|
|
215
|
+
"lookbehind_count",
|
|
216
|
+
"negative_lookaround_count",
|
|
217
|
+
"backreference_count",
|
|
218
|
+
"max_backreference_index",
|
|
219
|
+
"start_anchor",
|
|
220
|
+
"end_anchor",
|
|
221
|
+
"anchored",
|
|
222
|
+
"backtracking_potential",
|
|
223
|
+
"estimated_states",
|
|
224
|
+
]
|
|
225
|
+
|
|
226
|
+
@classmethod
|
|
227
|
+
def num_features(cls) -> int:
|
|
228
|
+
"""Get the number of features."""
|
|
229
|
+
return len(cls.feature_names())
|
|
230
|
+
|
|
231
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
232
|
+
"""Convert to dictionary."""
|
|
233
|
+
return {name: value for name, value in zip(self.feature_names(), self.to_vector())}
|
|
234
|
+
|
|
235
|
+
|
|
236
|
+
@dataclass
|
|
237
|
+
class ReDoSPrediction:
|
|
238
|
+
"""Result of ReDoS risk prediction.
|
|
239
|
+
|
|
240
|
+
Attributes:
|
|
241
|
+
pattern: The analyzed regex pattern
|
|
242
|
+
features: Extracted feature values
|
|
243
|
+
risk_probability: Probability of ReDoS vulnerability (0.0 to 1.0)
|
|
244
|
+
risk_level: Categorical risk level based on probability
|
|
245
|
+
confidence: Model confidence in prediction (0.0 to 1.0)
|
|
246
|
+
contributing_factors: Top features influencing the prediction
|
|
247
|
+
model_type: Type of model used for prediction
|
|
248
|
+
model_version: Version of the model used
|
|
249
|
+
inference_time_ms: Time taken for inference in milliseconds
|
|
250
|
+
"""
|
|
251
|
+
|
|
252
|
+
pattern: str
|
|
253
|
+
features: PatternFeatures
|
|
254
|
+
risk_probability: float
|
|
255
|
+
risk_level: ReDoSRisk
|
|
256
|
+
confidence: float
|
|
257
|
+
contributing_factors: List[Tuple[str, float]] = field(default_factory=list)
|
|
258
|
+
model_type: str = ""
|
|
259
|
+
model_version: str = ""
|
|
260
|
+
inference_time_ms: float = 0.0
|
|
261
|
+
|
|
262
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
263
|
+
"""Convert to dictionary for serialization."""
|
|
264
|
+
return {
|
|
265
|
+
"pattern": self.pattern,
|
|
266
|
+
"features": self.features.to_dict(),
|
|
267
|
+
"risk_probability": round(self.risk_probability, 4),
|
|
268
|
+
"risk_level": self.risk_level.name,
|
|
269
|
+
"confidence": round(self.confidence, 4),
|
|
270
|
+
"contributing_factors": [
|
|
271
|
+
{"feature": name, "contribution": round(contrib, 4)}
|
|
272
|
+
for name, contrib in self.contributing_factors
|
|
273
|
+
],
|
|
274
|
+
"model_type": self.model_type,
|
|
275
|
+
"model_version": self.model_version,
|
|
276
|
+
"inference_time_ms": round(self.inference_time_ms, 3),
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
def to_json(self) -> str:
|
|
280
|
+
"""Convert to JSON string."""
|
|
281
|
+
return json.dumps(self.to_dict(), indent=2)
|
|
282
|
+
|
|
283
|
+
|
|
284
|
+
@dataclass
|
|
285
|
+
class ReDoSTrainingData:
|
|
286
|
+
"""Container for training data.
|
|
287
|
+
|
|
288
|
+
Attributes:
|
|
289
|
+
patterns: List of regex patterns
|
|
290
|
+
labels: Corresponding labels (0=safe, 1=vulnerable)
|
|
291
|
+
features: Pre-extracted features (optional, can be computed)
|
|
292
|
+
feature_names: Names of features in feature vectors
|
|
293
|
+
sample_weights: Optional weights for samples
|
|
294
|
+
metadata: Additional metadata about the dataset
|
|
295
|
+
"""
|
|
296
|
+
|
|
297
|
+
patterns: List[str]
|
|
298
|
+
labels: List[int]
|
|
299
|
+
features: Optional[List[List[float]]] = None
|
|
300
|
+
feature_names: List[str] = field(default_factory=list)
|
|
301
|
+
sample_weights: Optional[List[float]] = None
|
|
302
|
+
metadata: Dict[str, Any] = field(default_factory=dict)
|
|
303
|
+
|
|
304
|
+
def __post_init__(self):
|
|
305
|
+
if len(self.patterns) != len(self.labels):
|
|
306
|
+
raise ValueError(
|
|
307
|
+
f"Number of patterns ({len(self.patterns)}) must match "
|
|
308
|
+
f"number of labels ({len(self.labels)})"
|
|
309
|
+
)
|
|
310
|
+
if self.features is not None and len(self.features) != len(self.patterns):
|
|
311
|
+
raise ValueError(
|
|
312
|
+
f"Number of feature vectors ({len(self.features)}) must match "
|
|
313
|
+
f"number of patterns ({len(self.patterns)})"
|
|
314
|
+
)
|
|
315
|
+
if not self.feature_names:
|
|
316
|
+
self.feature_names = PatternFeatures.feature_names()
|
|
317
|
+
|
|
318
|
+
def __len__(self) -> int:
|
|
319
|
+
return len(self.patterns)
|
|
320
|
+
|
|
321
|
+
@property
|
|
322
|
+
def num_vulnerable(self) -> int:
|
|
323
|
+
"""Count of vulnerable patterns."""
|
|
324
|
+
return sum(self.labels)
|
|
325
|
+
|
|
326
|
+
@property
|
|
327
|
+
def num_safe(self) -> int:
|
|
328
|
+
"""Count of safe patterns."""
|
|
329
|
+
return len(self.labels) - sum(self.labels)
|
|
330
|
+
|
|
331
|
+
@property
|
|
332
|
+
def class_balance(self) -> float:
|
|
333
|
+
"""Ratio of vulnerable to total samples."""
|
|
334
|
+
return self.num_vulnerable / len(self) if len(self) > 0 else 0.0
|
|
335
|
+
|
|
336
|
+
|
|
337
|
+
@dataclass
|
|
338
|
+
class ReDoSModelMetrics:
|
|
339
|
+
"""Model evaluation metrics.
|
|
340
|
+
|
|
341
|
+
Attributes:
|
|
342
|
+
accuracy: Overall classification accuracy
|
|
343
|
+
precision: Precision for vulnerable class
|
|
344
|
+
recall: Recall for vulnerable class (sensitivity)
|
|
345
|
+
f1_score: F1 score (harmonic mean of precision and recall)
|
|
346
|
+
specificity: True negative rate
|
|
347
|
+
auc_roc: Area under ROC curve (if available)
|
|
348
|
+
confusion_matrix: [[TN, FP], [FN, TP]]
|
|
349
|
+
feature_importances: Feature importance scores (if available)
|
|
350
|
+
cross_val_scores: Cross-validation scores (if available)
|
|
351
|
+
training_samples: Number of training samples
|
|
352
|
+
training_time_seconds: Time taken for training
|
|
353
|
+
trained_at: Timestamp of training completion
|
|
354
|
+
"""
|
|
355
|
+
|
|
356
|
+
accuracy: float
|
|
357
|
+
precision: float
|
|
358
|
+
recall: float
|
|
359
|
+
f1_score: float
|
|
360
|
+
specificity: float = 0.0
|
|
361
|
+
auc_roc: Optional[float] = None
|
|
362
|
+
confusion_matrix: Optional[List[List[int]]] = None
|
|
363
|
+
feature_importances: Optional[Dict[str, float]] = None
|
|
364
|
+
cross_val_scores: Optional[List[float]] = None
|
|
365
|
+
training_samples: int = 0
|
|
366
|
+
training_time_seconds: float = 0.0
|
|
367
|
+
trained_at: datetime = field(default_factory=datetime.now)
|
|
368
|
+
|
|
369
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
370
|
+
"""Convert to dictionary."""
|
|
371
|
+
result = {
|
|
372
|
+
"accuracy": round(self.accuracy, 4),
|
|
373
|
+
"precision": round(self.precision, 4),
|
|
374
|
+
"recall": round(self.recall, 4),
|
|
375
|
+
"f1_score": round(self.f1_score, 4),
|
|
376
|
+
"specificity": round(self.specificity, 4),
|
|
377
|
+
"training_samples": self.training_samples,
|
|
378
|
+
"training_time_seconds": round(self.training_time_seconds, 3),
|
|
379
|
+
"trained_at": self.trained_at.isoformat(),
|
|
380
|
+
}
|
|
381
|
+
if self.auc_roc is not None:
|
|
382
|
+
result["auc_roc"] = round(self.auc_roc, 4)
|
|
383
|
+
if self.confusion_matrix is not None:
|
|
384
|
+
result["confusion_matrix"] = self.confusion_matrix
|
|
385
|
+
if self.feature_importances is not None:
|
|
386
|
+
result["feature_importances"] = {
|
|
387
|
+
k: round(v, 4) for k, v in self.feature_importances.items()
|
|
388
|
+
}
|
|
389
|
+
if self.cross_val_scores is not None:
|
|
390
|
+
result["cross_val_scores"] = [round(s, 4) for s in self.cross_val_scores]
|
|
391
|
+
return result
|
|
392
|
+
|
|
393
|
+
def summary(self) -> str:
|
|
394
|
+
"""Get a human-readable summary of metrics."""
|
|
395
|
+
lines = [
|
|
396
|
+
f"Accuracy: {self.accuracy:.2%}",
|
|
397
|
+
f"Precision: {self.precision:.2%}",
|
|
398
|
+
f"Recall: {self.recall:.2%}",
|
|
399
|
+
f"F1 Score: {self.f1_score:.2%}",
|
|
400
|
+
f"Specificity: {self.specificity:.2%}",
|
|
401
|
+
]
|
|
402
|
+
if self.auc_roc is not None:
|
|
403
|
+
lines.append(f"AUC-ROC: {self.auc_roc:.4f}")
|
|
404
|
+
if self.cross_val_scores is not None:
|
|
405
|
+
mean_cv = sum(self.cross_val_scores) / len(self.cross_val_scores)
|
|
406
|
+
lines.append(f"CV Mean: {mean_cv:.2%}")
|
|
407
|
+
return "\n".join(lines)
|
|
408
|
+
|
|
409
|
+
|
|
410
|
+
@dataclass
|
|
411
|
+
class ModelConfig:
|
|
412
|
+
"""Configuration for ReDoS ML models.
|
|
413
|
+
|
|
414
|
+
This configuration class controls all aspects of model training and
|
|
415
|
+
inference, from algorithm-specific hyperparameters to general training
|
|
416
|
+
settings.
|
|
417
|
+
|
|
418
|
+
Attributes:
|
|
419
|
+
model_type: Type of model to use
|
|
420
|
+
n_estimators: Number of estimators for ensemble methods
|
|
421
|
+
max_depth: Maximum tree depth
|
|
422
|
+
min_samples_split: Minimum samples required to split an internal node
|
|
423
|
+
min_samples_leaf: Minimum samples required at a leaf node
|
|
424
|
+
learning_rate: Learning rate for gradient-based methods
|
|
425
|
+
random_state: Random seed for reproducibility
|
|
426
|
+
n_jobs: Number of parallel jobs (-1 for all cores)
|
|
427
|
+
class_weight: How to handle class imbalance
|
|
428
|
+
feature_selection: Whether to perform feature selection
|
|
429
|
+
max_features: Maximum number of features to use
|
|
430
|
+
cross_validation_folds: Number of CV folds
|
|
431
|
+
validation_split: Fraction of data for validation
|
|
432
|
+
early_stopping: Whether to use early stopping
|
|
433
|
+
model_version: Version string for the model
|
|
434
|
+
"""
|
|
435
|
+
|
|
436
|
+
model_type: ModelType = ModelType.RANDOM_FOREST
|
|
437
|
+
n_estimators: int = 100
|
|
438
|
+
max_depth: int = 10
|
|
439
|
+
min_samples_split: int = 5
|
|
440
|
+
min_samples_leaf: int = 2
|
|
441
|
+
learning_rate: float = 0.1
|
|
442
|
+
random_state: int = 42
|
|
443
|
+
n_jobs: int = -1
|
|
444
|
+
class_weight: str = "balanced"
|
|
445
|
+
feature_selection: bool = True
|
|
446
|
+
max_features: int = 50
|
|
447
|
+
cross_validation_folds: int = 5
|
|
448
|
+
validation_split: float = 0.2
|
|
449
|
+
early_stopping: bool = True
|
|
450
|
+
model_version: str = "1.0.0"
|
|
451
|
+
|
|
452
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
453
|
+
"""Convert to dictionary."""
|
|
454
|
+
return {
|
|
455
|
+
"model_type": self.model_type.value,
|
|
456
|
+
"n_estimators": self.n_estimators,
|
|
457
|
+
"max_depth": self.max_depth,
|
|
458
|
+
"min_samples_split": self.min_samples_split,
|
|
459
|
+
"min_samples_leaf": self.min_samples_leaf,
|
|
460
|
+
"learning_rate": self.learning_rate,
|
|
461
|
+
"random_state": self.random_state,
|
|
462
|
+
"n_jobs": self.n_jobs,
|
|
463
|
+
"class_weight": self.class_weight,
|
|
464
|
+
"feature_selection": self.feature_selection,
|
|
465
|
+
"max_features": self.max_features,
|
|
466
|
+
"cross_validation_folds": self.cross_validation_folds,
|
|
467
|
+
"validation_split": self.validation_split,
|
|
468
|
+
"early_stopping": self.early_stopping,
|
|
469
|
+
"model_version": self.model_version,
|
|
470
|
+
}
|
|
471
|
+
|
|
472
|
+
@classmethod
|
|
473
|
+
def from_dict(cls, data: Dict[str, Any]) -> "ModelConfig":
|
|
474
|
+
"""Create from dictionary."""
|
|
475
|
+
if "model_type" in data and isinstance(data["model_type"], str):
|
|
476
|
+
data = dict(data)
|
|
477
|
+
data["model_type"] = ModelType(data["model_type"])
|
|
478
|
+
return cls(**data)
|
|
479
|
+
|
|
480
|
+
@classmethod
|
|
481
|
+
def default(cls) -> "ModelConfig":
|
|
482
|
+
"""Create default configuration."""
|
|
483
|
+
return cls()
|
|
484
|
+
|
|
485
|
+
@classmethod
|
|
486
|
+
def fast_training(cls) -> "ModelConfig":
|
|
487
|
+
"""Configuration optimized for fast training."""
|
|
488
|
+
return cls(
|
|
489
|
+
n_estimators=50,
|
|
490
|
+
max_depth=5,
|
|
491
|
+
cross_validation_folds=3,
|
|
492
|
+
)
|
|
493
|
+
|
|
494
|
+
@classmethod
|
|
495
|
+
def high_accuracy(cls) -> "ModelConfig":
|
|
496
|
+
"""Configuration optimized for high accuracy."""
|
|
497
|
+
return cls(
|
|
498
|
+
model_type=ModelType.GRADIENT_BOOSTING,
|
|
499
|
+
n_estimators=200,
|
|
500
|
+
max_depth=15,
|
|
501
|
+
cross_validation_folds=10,
|
|
502
|
+
)
|
|
503
|
+
|
|
504
|
+
|
|
505
|
+
# =============================================================================
|
|
506
|
+
# Protocols
|
|
507
|
+
# =============================================================================
|
|
508
|
+
|
|
509
|
+
|
|
510
|
+
@runtime_checkable
|
|
511
|
+
class FeatureExtractorProtocol(Protocol):
|
|
512
|
+
"""Protocol for feature extractors.
|
|
513
|
+
|
|
514
|
+
Feature extractors are responsible for converting raw regex patterns
|
|
515
|
+
into numeric feature vectors suitable for ML models.
|
|
516
|
+
"""
|
|
517
|
+
|
|
518
|
+
def extract(self, pattern: str) -> PatternFeatures:
|
|
519
|
+
"""Extract features from a regex pattern.
|
|
520
|
+
|
|
521
|
+
Args:
|
|
522
|
+
pattern: Regex pattern string
|
|
523
|
+
|
|
524
|
+
Returns:
|
|
525
|
+
PatternFeatures instance containing all extracted features
|
|
526
|
+
"""
|
|
527
|
+
...
|
|
528
|
+
|
|
529
|
+
def extract_batch(self, patterns: Sequence[str]) -> List[PatternFeatures]:
|
|
530
|
+
"""Extract features from multiple patterns.
|
|
531
|
+
|
|
532
|
+
Args:
|
|
533
|
+
patterns: Sequence of regex pattern strings
|
|
534
|
+
|
|
535
|
+
Returns:
|
|
536
|
+
List of PatternFeatures instances
|
|
537
|
+
"""
|
|
538
|
+
...
|
|
539
|
+
|
|
540
|
+
|
|
541
|
+
@runtime_checkable
|
|
542
|
+
class ReDoSModelProtocol(Protocol):
|
|
543
|
+
"""Protocol for ReDoS prediction models.
|
|
544
|
+
|
|
545
|
+
This protocol defines the interface that all ReDoS ML models must
|
|
546
|
+
implement, enabling polymorphic usage and easy swapping of models.
|
|
547
|
+
"""
|
|
548
|
+
|
|
549
|
+
@property
|
|
550
|
+
def is_trained(self) -> bool:
|
|
551
|
+
"""Check if the model has been trained."""
|
|
552
|
+
...
|
|
553
|
+
|
|
554
|
+
@property
|
|
555
|
+
def config(self) -> ModelConfig:
|
|
556
|
+
"""Get the model configuration."""
|
|
557
|
+
...
|
|
558
|
+
|
|
559
|
+
def predict(self, features: List[float]) -> Tuple[float, float]:
|
|
560
|
+
"""Predict risk probability and confidence.
|
|
561
|
+
|
|
562
|
+
Args:
|
|
563
|
+
features: Feature vector
|
|
564
|
+
|
|
565
|
+
Returns:
|
|
566
|
+
Tuple of (risk_probability, confidence)
|
|
567
|
+
"""
|
|
568
|
+
...
|
|
569
|
+
|
|
570
|
+
def predict_batch(
|
|
571
|
+
self, features: List[List[float]]
|
|
572
|
+
) -> List[Tuple[float, float]]:
|
|
573
|
+
"""Predict for multiple feature vectors.
|
|
574
|
+
|
|
575
|
+
Args:
|
|
576
|
+
features: List of feature vectors
|
|
577
|
+
|
|
578
|
+
Returns:
|
|
579
|
+
List of (risk_probability, confidence) tuples
|
|
580
|
+
"""
|
|
581
|
+
...
|
|
582
|
+
|
|
583
|
+
def train(self, data: ReDoSTrainingData) -> ReDoSModelMetrics:
|
|
584
|
+
"""Train the model on labeled data.
|
|
585
|
+
|
|
586
|
+
Args:
|
|
587
|
+
data: Training data container
|
|
588
|
+
|
|
589
|
+
Returns:
|
|
590
|
+
Training metrics
|
|
591
|
+
"""
|
|
592
|
+
...
|
|
593
|
+
|
|
594
|
+
def get_feature_importance(self) -> List[float]:
|
|
595
|
+
"""Get feature importance scores.
|
|
596
|
+
|
|
597
|
+
Returns:
|
|
598
|
+
List of importance scores for each feature
|
|
599
|
+
"""
|
|
600
|
+
...
|
|
601
|
+
|
|
602
|
+
def save(self, path: str | Path) -> None:
|
|
603
|
+
"""Save model to disk.
|
|
604
|
+
|
|
605
|
+
Args:
|
|
606
|
+
path: Path to save the model
|
|
607
|
+
"""
|
|
608
|
+
...
|
|
609
|
+
|
|
610
|
+
def load(self, path: str | Path) -> None:
|
|
611
|
+
"""Load model from disk.
|
|
612
|
+
|
|
613
|
+
Args:
|
|
614
|
+
path: Path to the saved model
|
|
615
|
+
"""
|
|
616
|
+
...
|
|
617
|
+
|
|
618
|
+
|
|
619
|
+
# =============================================================================
|
|
620
|
+
# Base Classes
|
|
621
|
+
# =============================================================================
|
|
622
|
+
|
|
623
|
+
|
|
624
|
+
class BaseReDoSModel(ABC):
|
|
625
|
+
"""Abstract base class for ReDoS ML models.
|
|
626
|
+
|
|
627
|
+
This class provides common functionality shared by all model implementations,
|
|
628
|
+
including configuration management, feature name tracking, and serialization
|
|
629
|
+
utilities.
|
|
630
|
+
|
|
631
|
+
Subclasses must implement:
|
|
632
|
+
- predict(): Single sample prediction
|
|
633
|
+
- predict_batch(): Batch prediction
|
|
634
|
+
- train(): Model training
|
|
635
|
+
- _save_model_data(): Model-specific save logic
|
|
636
|
+
- _load_model_data(): Model-specific load logic
|
|
637
|
+
"""
|
|
638
|
+
|
|
639
|
+
name: str = "base"
|
|
640
|
+
version: str = "1.0.0"
|
|
641
|
+
|
|
642
|
+
def __init__(self, config: ModelConfig | None = None):
|
|
643
|
+
"""Initialize the model.
|
|
644
|
+
|
|
645
|
+
Args:
|
|
646
|
+
config: Model configuration (uses default if None)
|
|
647
|
+
"""
|
|
648
|
+
self._config = config or ModelConfig.default()
|
|
649
|
+
self._trained = False
|
|
650
|
+
self._metrics: Optional[ReDoSModelMetrics] = None
|
|
651
|
+
self._feature_names: List[str] = PatternFeatures.feature_names()
|
|
652
|
+
|
|
653
|
+
@property
|
|
654
|
+
def is_trained(self) -> bool:
|
|
655
|
+
"""Check if the model has been trained."""
|
|
656
|
+
return self._trained
|
|
657
|
+
|
|
658
|
+
@property
|
|
659
|
+
def config(self) -> ModelConfig:
|
|
660
|
+
"""Get the model configuration."""
|
|
661
|
+
return self._config
|
|
662
|
+
|
|
663
|
+
@property
|
|
664
|
+
def metrics(self) -> Optional[ReDoSModelMetrics]:
|
|
665
|
+
"""Get training metrics if available."""
|
|
666
|
+
return self._metrics
|
|
667
|
+
|
|
668
|
+
@property
|
|
669
|
+
def feature_names(self) -> List[str]:
|
|
670
|
+
"""Get feature names."""
|
|
671
|
+
return self._feature_names
|
|
672
|
+
|
|
673
|
+
@abstractmethod
|
|
674
|
+
def predict(self, features: List[float]) -> Tuple[float, float]:
|
|
675
|
+
"""Predict risk probability and confidence.
|
|
676
|
+
|
|
677
|
+
Args:
|
|
678
|
+
features: Feature vector
|
|
679
|
+
|
|
680
|
+
Returns:
|
|
681
|
+
Tuple of (risk_probability, confidence)
|
|
682
|
+
"""
|
|
683
|
+
pass
|
|
684
|
+
|
|
685
|
+
@abstractmethod
|
|
686
|
+
def predict_batch(
|
|
687
|
+
self, features: List[List[float]]
|
|
688
|
+
) -> List[Tuple[float, float]]:
|
|
689
|
+
"""Predict for multiple feature vectors."""
|
|
690
|
+
pass
|
|
691
|
+
|
|
692
|
+
@abstractmethod
|
|
693
|
+
def train(self, data: ReDoSTrainingData) -> ReDoSModelMetrics:
|
|
694
|
+
"""Train the model on labeled data."""
|
|
695
|
+
pass
|
|
696
|
+
|
|
697
|
+
@abstractmethod
|
|
698
|
+
def get_feature_importance(self) -> List[float]:
|
|
699
|
+
"""Get feature importance scores."""
|
|
700
|
+
pass
|
|
701
|
+
|
|
702
|
+
@abstractmethod
|
|
703
|
+
def _save_model_data(self) -> Dict[str, Any]:
|
|
704
|
+
"""Get model-specific data for saving.
|
|
705
|
+
|
|
706
|
+
Returns:
|
|
707
|
+
Dictionary of data to serialize
|
|
708
|
+
"""
|
|
709
|
+
pass
|
|
710
|
+
|
|
711
|
+
@abstractmethod
|
|
712
|
+
def _load_model_data(self, data: Dict[str, Any]) -> None:
|
|
713
|
+
"""Load model-specific data.
|
|
714
|
+
|
|
715
|
+
Args:
|
|
716
|
+
data: Dictionary of serialized data
|
|
717
|
+
"""
|
|
718
|
+
pass
|
|
719
|
+
|
|
720
|
+
def save(self, path: str | Path) -> None:
|
|
721
|
+
"""Save model to disk.
|
|
722
|
+
|
|
723
|
+
Args:
|
|
724
|
+
path: Path to save the model
|
|
725
|
+
"""
|
|
726
|
+
import pickle
|
|
727
|
+
|
|
728
|
+
path = Path(path)
|
|
729
|
+
data = {
|
|
730
|
+
"name": self.name,
|
|
731
|
+
"version": self.version,
|
|
732
|
+
"config": self._config.to_dict(),
|
|
733
|
+
"trained": self._trained,
|
|
734
|
+
"metrics": self._metrics.to_dict() if self._metrics else None,
|
|
735
|
+
"feature_names": self._feature_names,
|
|
736
|
+
"model_data": self._save_model_data(),
|
|
737
|
+
}
|
|
738
|
+
|
|
739
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
740
|
+
with open(path, "wb") as f:
|
|
741
|
+
pickle.dump(data, f)
|
|
742
|
+
|
|
743
|
+
def load(self, path: str | Path) -> None:
|
|
744
|
+
"""Load model from disk.
|
|
745
|
+
|
|
746
|
+
Args:
|
|
747
|
+
path: Path to the saved model
|
|
748
|
+
"""
|
|
749
|
+
import pickle
|
|
750
|
+
|
|
751
|
+
path = Path(path)
|
|
752
|
+
with open(path, "rb") as f:
|
|
753
|
+
data = pickle.load(f)
|
|
754
|
+
|
|
755
|
+
self._config = ModelConfig.from_dict(data["config"])
|
|
756
|
+
self._trained = data["trained"]
|
|
757
|
+
self._feature_names = data.get("feature_names", PatternFeatures.feature_names())
|
|
758
|
+
|
|
759
|
+
if data.get("metrics"):
|
|
760
|
+
# Reconstruct metrics from dict
|
|
761
|
+
metrics_dict = data["metrics"]
|
|
762
|
+
self._metrics = ReDoSModelMetrics(
|
|
763
|
+
accuracy=metrics_dict["accuracy"],
|
|
764
|
+
precision=metrics_dict["precision"],
|
|
765
|
+
recall=metrics_dict["recall"],
|
|
766
|
+
f1_score=metrics_dict["f1_score"],
|
|
767
|
+
specificity=metrics_dict.get("specificity", 0.0),
|
|
768
|
+
auc_roc=metrics_dict.get("auc_roc"),
|
|
769
|
+
confusion_matrix=metrics_dict.get("confusion_matrix"),
|
|
770
|
+
feature_importances=metrics_dict.get("feature_importances"),
|
|
771
|
+
cross_val_scores=metrics_dict.get("cross_val_scores"),
|
|
772
|
+
training_samples=metrics_dict.get("training_samples", 0),
|
|
773
|
+
training_time_seconds=metrics_dict.get("training_time_seconds", 0.0),
|
|
774
|
+
)
|
|
775
|
+
|
|
776
|
+
self._load_model_data(data.get("model_data", {}))
|
|
777
|
+
|
|
778
|
+
def get_feature_importance_dict(self) -> Dict[str, float]:
|
|
779
|
+
"""Get feature importance as a dictionary.
|
|
780
|
+
|
|
781
|
+
Returns:
|
|
782
|
+
Dictionary mapping feature names to importance scores
|
|
783
|
+
"""
|
|
784
|
+
importance = self.get_feature_importance()
|
|
785
|
+
return dict(zip(self._feature_names, importance))
|