truthound 1.0.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- truthound/__init__.py +162 -0
- truthound/adapters.py +100 -0
- truthound/api.py +365 -0
- truthound/audit/__init__.py +248 -0
- truthound/audit/core.py +967 -0
- truthound/audit/filters.py +620 -0
- truthound/audit/formatters.py +707 -0
- truthound/audit/logger.py +902 -0
- truthound/audit/middleware.py +571 -0
- truthound/audit/storage.py +1083 -0
- truthound/benchmark/__init__.py +123 -0
- truthound/benchmark/base.py +757 -0
- truthound/benchmark/comparison.py +635 -0
- truthound/benchmark/generators.py +706 -0
- truthound/benchmark/reporters.py +718 -0
- truthound/benchmark/runner.py +635 -0
- truthound/benchmark/scenarios.py +712 -0
- truthound/cache.py +252 -0
- truthound/checkpoint/__init__.py +136 -0
- truthound/checkpoint/actions/__init__.py +164 -0
- truthound/checkpoint/actions/base.py +324 -0
- truthound/checkpoint/actions/custom.py +234 -0
- truthound/checkpoint/actions/discord_notify.py +290 -0
- truthound/checkpoint/actions/email_notify.py +405 -0
- truthound/checkpoint/actions/github_action.py +406 -0
- truthound/checkpoint/actions/opsgenie.py +1499 -0
- truthound/checkpoint/actions/pagerduty.py +226 -0
- truthound/checkpoint/actions/slack_notify.py +233 -0
- truthound/checkpoint/actions/store_result.py +249 -0
- truthound/checkpoint/actions/teams_notify.py +1570 -0
- truthound/checkpoint/actions/telegram_notify.py +419 -0
- truthound/checkpoint/actions/update_docs.py +552 -0
- truthound/checkpoint/actions/webhook.py +293 -0
- truthound/checkpoint/analytics/__init__.py +147 -0
- truthound/checkpoint/analytics/aggregations/__init__.py +23 -0
- truthound/checkpoint/analytics/aggregations/rollup.py +481 -0
- truthound/checkpoint/analytics/aggregations/time_bucket.py +306 -0
- truthound/checkpoint/analytics/analyzers/__init__.py +17 -0
- truthound/checkpoint/analytics/analyzers/anomaly.py +386 -0
- truthound/checkpoint/analytics/analyzers/base.py +270 -0
- truthound/checkpoint/analytics/analyzers/forecast.py +421 -0
- truthound/checkpoint/analytics/analyzers/trend.py +314 -0
- truthound/checkpoint/analytics/models.py +292 -0
- truthound/checkpoint/analytics/protocols.py +549 -0
- truthound/checkpoint/analytics/service.py +718 -0
- truthound/checkpoint/analytics/stores/__init__.py +16 -0
- truthound/checkpoint/analytics/stores/base.py +306 -0
- truthound/checkpoint/analytics/stores/memory_store.py +353 -0
- truthound/checkpoint/analytics/stores/sqlite_store.py +557 -0
- truthound/checkpoint/analytics/stores/timescale_store.py +501 -0
- truthound/checkpoint/async_actions.py +794 -0
- truthound/checkpoint/async_base.py +708 -0
- truthound/checkpoint/async_checkpoint.py +617 -0
- truthound/checkpoint/async_runner.py +639 -0
- truthound/checkpoint/checkpoint.py +527 -0
- truthound/checkpoint/ci/__init__.py +61 -0
- truthound/checkpoint/ci/detector.py +355 -0
- truthound/checkpoint/ci/reporter.py +436 -0
- truthound/checkpoint/ci/templates.py +454 -0
- truthound/checkpoint/circuitbreaker/__init__.py +133 -0
- truthound/checkpoint/circuitbreaker/breaker.py +542 -0
- truthound/checkpoint/circuitbreaker/core.py +252 -0
- truthound/checkpoint/circuitbreaker/detection.py +459 -0
- truthound/checkpoint/circuitbreaker/middleware.py +389 -0
- truthound/checkpoint/circuitbreaker/registry.py +357 -0
- truthound/checkpoint/distributed/__init__.py +139 -0
- truthound/checkpoint/distributed/backends/__init__.py +35 -0
- truthound/checkpoint/distributed/backends/celery_backend.py +503 -0
- truthound/checkpoint/distributed/backends/kubernetes_backend.py +696 -0
- truthound/checkpoint/distributed/backends/local_backend.py +397 -0
- truthound/checkpoint/distributed/backends/ray_backend.py +625 -0
- truthound/checkpoint/distributed/base.py +774 -0
- truthound/checkpoint/distributed/orchestrator.py +765 -0
- truthound/checkpoint/distributed/protocols.py +842 -0
- truthound/checkpoint/distributed/registry.py +449 -0
- truthound/checkpoint/idempotency/__init__.py +120 -0
- truthound/checkpoint/idempotency/core.py +295 -0
- truthound/checkpoint/idempotency/fingerprint.py +454 -0
- truthound/checkpoint/idempotency/locking.py +604 -0
- truthound/checkpoint/idempotency/service.py +592 -0
- truthound/checkpoint/idempotency/stores.py +653 -0
- truthound/checkpoint/monitoring/__init__.py +134 -0
- truthound/checkpoint/monitoring/aggregators/__init__.py +15 -0
- truthound/checkpoint/monitoring/aggregators/base.py +372 -0
- truthound/checkpoint/monitoring/aggregators/realtime.py +300 -0
- truthound/checkpoint/monitoring/aggregators/window.py +493 -0
- truthound/checkpoint/monitoring/collectors/__init__.py +17 -0
- truthound/checkpoint/monitoring/collectors/base.py +257 -0
- truthound/checkpoint/monitoring/collectors/memory_collector.py +617 -0
- truthound/checkpoint/monitoring/collectors/prometheus_collector.py +451 -0
- truthound/checkpoint/monitoring/collectors/redis_collector.py +518 -0
- truthound/checkpoint/monitoring/events.py +410 -0
- truthound/checkpoint/monitoring/protocols.py +636 -0
- truthound/checkpoint/monitoring/service.py +578 -0
- truthound/checkpoint/monitoring/views/__init__.py +17 -0
- truthound/checkpoint/monitoring/views/base.py +172 -0
- truthound/checkpoint/monitoring/views/queue_view.py +220 -0
- truthound/checkpoint/monitoring/views/task_view.py +240 -0
- truthound/checkpoint/monitoring/views/worker_view.py +263 -0
- truthound/checkpoint/registry.py +337 -0
- truthound/checkpoint/runner.py +356 -0
- truthound/checkpoint/transaction/__init__.py +133 -0
- truthound/checkpoint/transaction/base.py +389 -0
- truthound/checkpoint/transaction/compensatable.py +537 -0
- truthound/checkpoint/transaction/coordinator.py +576 -0
- truthound/checkpoint/transaction/executor.py +622 -0
- truthound/checkpoint/transaction/idempotency.py +534 -0
- truthound/checkpoint/transaction/saga/__init__.py +143 -0
- truthound/checkpoint/transaction/saga/builder.py +584 -0
- truthound/checkpoint/transaction/saga/definition.py +515 -0
- truthound/checkpoint/transaction/saga/event_store.py +542 -0
- truthound/checkpoint/transaction/saga/patterns.py +833 -0
- truthound/checkpoint/transaction/saga/runner.py +718 -0
- truthound/checkpoint/transaction/saga/state_machine.py +793 -0
- truthound/checkpoint/transaction/saga/strategies.py +780 -0
- truthound/checkpoint/transaction/saga/testing.py +886 -0
- truthound/checkpoint/triggers/__init__.py +58 -0
- truthound/checkpoint/triggers/base.py +237 -0
- truthound/checkpoint/triggers/event.py +385 -0
- truthound/checkpoint/triggers/schedule.py +355 -0
- truthound/cli.py +2358 -0
- truthound/cli_modules/__init__.py +124 -0
- truthound/cli_modules/advanced/__init__.py +45 -0
- truthound/cli_modules/advanced/benchmark.py +343 -0
- truthound/cli_modules/advanced/docs.py +225 -0
- truthound/cli_modules/advanced/lineage.py +209 -0
- truthound/cli_modules/advanced/ml.py +320 -0
- truthound/cli_modules/advanced/realtime.py +196 -0
- truthound/cli_modules/checkpoint/__init__.py +46 -0
- truthound/cli_modules/checkpoint/init.py +114 -0
- truthound/cli_modules/checkpoint/list.py +71 -0
- truthound/cli_modules/checkpoint/run.py +159 -0
- truthound/cli_modules/checkpoint/validate.py +67 -0
- truthound/cli_modules/common/__init__.py +71 -0
- truthound/cli_modules/common/errors.py +414 -0
- truthound/cli_modules/common/options.py +419 -0
- truthound/cli_modules/common/output.py +507 -0
- truthound/cli_modules/common/protocol.py +552 -0
- truthound/cli_modules/core/__init__.py +48 -0
- truthound/cli_modules/core/check.py +123 -0
- truthound/cli_modules/core/compare.py +104 -0
- truthound/cli_modules/core/learn.py +57 -0
- truthound/cli_modules/core/mask.py +77 -0
- truthound/cli_modules/core/profile.py +65 -0
- truthound/cli_modules/core/scan.py +61 -0
- truthound/cli_modules/profiler/__init__.py +51 -0
- truthound/cli_modules/profiler/auto_profile.py +175 -0
- truthound/cli_modules/profiler/metadata.py +107 -0
- truthound/cli_modules/profiler/suite.py +283 -0
- truthound/cli_modules/registry.py +431 -0
- truthound/cli_modules/scaffolding/__init__.py +89 -0
- truthound/cli_modules/scaffolding/base.py +631 -0
- truthound/cli_modules/scaffolding/commands.py +545 -0
- truthound/cli_modules/scaffolding/plugins.py +1072 -0
- truthound/cli_modules/scaffolding/reporters.py +594 -0
- truthound/cli_modules/scaffolding/validators.py +1127 -0
- truthound/common/__init__.py +18 -0
- truthound/common/resilience/__init__.py +130 -0
- truthound/common/resilience/bulkhead.py +266 -0
- truthound/common/resilience/circuit_breaker.py +516 -0
- truthound/common/resilience/composite.py +332 -0
- truthound/common/resilience/config.py +292 -0
- truthound/common/resilience/protocols.py +217 -0
- truthound/common/resilience/rate_limiter.py +404 -0
- truthound/common/resilience/retry.py +341 -0
- truthound/datadocs/__init__.py +260 -0
- truthound/datadocs/base.py +571 -0
- truthound/datadocs/builder.py +761 -0
- truthound/datadocs/charts.py +764 -0
- truthound/datadocs/dashboard/__init__.py +63 -0
- truthound/datadocs/dashboard/app.py +576 -0
- truthound/datadocs/dashboard/components.py +584 -0
- truthound/datadocs/dashboard/state.py +240 -0
- truthound/datadocs/engine/__init__.py +46 -0
- truthound/datadocs/engine/context.py +376 -0
- truthound/datadocs/engine/pipeline.py +618 -0
- truthound/datadocs/engine/registry.py +469 -0
- truthound/datadocs/exporters/__init__.py +49 -0
- truthound/datadocs/exporters/base.py +198 -0
- truthound/datadocs/exporters/html.py +178 -0
- truthound/datadocs/exporters/json_exporter.py +253 -0
- truthound/datadocs/exporters/markdown.py +284 -0
- truthound/datadocs/exporters/pdf.py +392 -0
- truthound/datadocs/i18n/__init__.py +86 -0
- truthound/datadocs/i18n/catalog.py +960 -0
- truthound/datadocs/i18n/formatting.py +505 -0
- truthound/datadocs/i18n/loader.py +256 -0
- truthound/datadocs/i18n/plurals.py +378 -0
- truthound/datadocs/renderers/__init__.py +42 -0
- truthound/datadocs/renderers/base.py +401 -0
- truthound/datadocs/renderers/custom.py +342 -0
- truthound/datadocs/renderers/jinja.py +697 -0
- truthound/datadocs/sections.py +736 -0
- truthound/datadocs/styles.py +931 -0
- truthound/datadocs/themes/__init__.py +101 -0
- truthound/datadocs/themes/base.py +336 -0
- truthound/datadocs/themes/default.py +417 -0
- truthound/datadocs/themes/enterprise.py +419 -0
- truthound/datadocs/themes/loader.py +336 -0
- truthound/datadocs/themes.py +301 -0
- truthound/datadocs/transformers/__init__.py +57 -0
- truthound/datadocs/transformers/base.py +268 -0
- truthound/datadocs/transformers/enrichers.py +544 -0
- truthound/datadocs/transformers/filters.py +447 -0
- truthound/datadocs/transformers/i18n.py +468 -0
- truthound/datadocs/versioning/__init__.py +62 -0
- truthound/datadocs/versioning/diff.py +639 -0
- truthound/datadocs/versioning/storage.py +497 -0
- truthound/datadocs/versioning/version.py +358 -0
- truthound/datasources/__init__.py +223 -0
- truthound/datasources/_async_protocols.py +222 -0
- truthound/datasources/_protocols.py +159 -0
- truthound/datasources/adapters.py +428 -0
- truthound/datasources/async_base.py +599 -0
- truthound/datasources/async_factory.py +511 -0
- truthound/datasources/base.py +516 -0
- truthound/datasources/factory.py +433 -0
- truthound/datasources/nosql/__init__.py +47 -0
- truthound/datasources/nosql/base.py +487 -0
- truthound/datasources/nosql/elasticsearch.py +801 -0
- truthound/datasources/nosql/mongodb.py +636 -0
- truthound/datasources/pandas_optimized.py +582 -0
- truthound/datasources/pandas_source.py +216 -0
- truthound/datasources/polars_source.py +395 -0
- truthound/datasources/spark_source.py +479 -0
- truthound/datasources/sql/__init__.py +154 -0
- truthound/datasources/sql/base.py +710 -0
- truthound/datasources/sql/bigquery.py +410 -0
- truthound/datasources/sql/cloud_base.py +199 -0
- truthound/datasources/sql/databricks.py +471 -0
- truthound/datasources/sql/mysql.py +316 -0
- truthound/datasources/sql/oracle.py +427 -0
- truthound/datasources/sql/postgresql.py +321 -0
- truthound/datasources/sql/redshift.py +479 -0
- truthound/datasources/sql/snowflake.py +439 -0
- truthound/datasources/sql/sqlite.py +286 -0
- truthound/datasources/sql/sqlserver.py +437 -0
- truthound/datasources/streaming/__init__.py +47 -0
- truthound/datasources/streaming/base.py +350 -0
- truthound/datasources/streaming/kafka.py +670 -0
- truthound/decorators.py +98 -0
- truthound/docs/__init__.py +69 -0
- truthound/docs/extractor.py +971 -0
- truthound/docs/generator.py +601 -0
- truthound/docs/parser.py +1037 -0
- truthound/docs/renderer.py +999 -0
- truthound/drift/__init__.py +22 -0
- truthound/drift/compare.py +189 -0
- truthound/drift/detectors.py +464 -0
- truthound/drift/report.py +160 -0
- truthound/execution/__init__.py +65 -0
- truthound/execution/_protocols.py +324 -0
- truthound/execution/base.py +576 -0
- truthound/execution/distributed/__init__.py +179 -0
- truthound/execution/distributed/aggregations.py +731 -0
- truthound/execution/distributed/arrow_bridge.py +817 -0
- truthound/execution/distributed/base.py +550 -0
- truthound/execution/distributed/dask_engine.py +976 -0
- truthound/execution/distributed/mixins.py +766 -0
- truthound/execution/distributed/protocols.py +756 -0
- truthound/execution/distributed/ray_engine.py +1127 -0
- truthound/execution/distributed/registry.py +446 -0
- truthound/execution/distributed/spark_engine.py +1011 -0
- truthound/execution/distributed/validator_adapter.py +682 -0
- truthound/execution/pandas_engine.py +401 -0
- truthound/execution/polars_engine.py +497 -0
- truthound/execution/pushdown/__init__.py +230 -0
- truthound/execution/pushdown/ast.py +1550 -0
- truthound/execution/pushdown/builder.py +1550 -0
- truthound/execution/pushdown/dialects.py +1072 -0
- truthound/execution/pushdown/executor.py +829 -0
- truthound/execution/pushdown/optimizer.py +1041 -0
- truthound/execution/sql_engine.py +518 -0
- truthound/infrastructure/__init__.py +189 -0
- truthound/infrastructure/audit.py +1515 -0
- truthound/infrastructure/config.py +1133 -0
- truthound/infrastructure/encryption.py +1132 -0
- truthound/infrastructure/logging.py +1503 -0
- truthound/infrastructure/metrics.py +1220 -0
- truthound/lineage/__init__.py +89 -0
- truthound/lineage/base.py +746 -0
- truthound/lineage/impact_analysis.py +474 -0
- truthound/lineage/integrations/__init__.py +22 -0
- truthound/lineage/integrations/openlineage.py +548 -0
- truthound/lineage/tracker.py +512 -0
- truthound/lineage/visualization/__init__.py +33 -0
- truthound/lineage/visualization/protocols.py +145 -0
- truthound/lineage/visualization/renderers/__init__.py +20 -0
- truthound/lineage/visualization/renderers/cytoscape.py +329 -0
- truthound/lineage/visualization/renderers/d3.py +331 -0
- truthound/lineage/visualization/renderers/graphviz.py +276 -0
- truthound/lineage/visualization/renderers/mermaid.py +308 -0
- truthound/maskers.py +113 -0
- truthound/ml/__init__.py +124 -0
- truthound/ml/anomaly_models/__init__.py +31 -0
- truthound/ml/anomaly_models/ensemble.py +362 -0
- truthound/ml/anomaly_models/isolation_forest.py +444 -0
- truthound/ml/anomaly_models/statistical.py +392 -0
- truthound/ml/base.py +1178 -0
- truthound/ml/drift_detection/__init__.py +26 -0
- truthound/ml/drift_detection/concept.py +381 -0
- truthound/ml/drift_detection/distribution.py +361 -0
- truthound/ml/drift_detection/feature.py +442 -0
- truthound/ml/drift_detection/multivariate.py +495 -0
- truthound/ml/monitoring/__init__.py +88 -0
- truthound/ml/monitoring/alerting/__init__.py +33 -0
- truthound/ml/monitoring/alerting/handlers.py +427 -0
- truthound/ml/monitoring/alerting/rules.py +508 -0
- truthound/ml/monitoring/collectors/__init__.py +19 -0
- truthound/ml/monitoring/collectors/composite.py +105 -0
- truthound/ml/monitoring/collectors/drift.py +324 -0
- truthound/ml/monitoring/collectors/performance.py +179 -0
- truthound/ml/monitoring/collectors/quality.py +369 -0
- truthound/ml/monitoring/monitor.py +536 -0
- truthound/ml/monitoring/protocols.py +451 -0
- truthound/ml/monitoring/stores/__init__.py +15 -0
- truthound/ml/monitoring/stores/memory.py +201 -0
- truthound/ml/monitoring/stores/prometheus.py +296 -0
- truthound/ml/rule_learning/__init__.py +25 -0
- truthound/ml/rule_learning/constraint_miner.py +443 -0
- truthound/ml/rule_learning/pattern_learner.py +499 -0
- truthound/ml/rule_learning/profile_learner.py +462 -0
- truthound/multitenancy/__init__.py +326 -0
- truthound/multitenancy/core.py +852 -0
- truthound/multitenancy/integration.py +597 -0
- truthound/multitenancy/isolation.py +630 -0
- truthound/multitenancy/manager.py +770 -0
- truthound/multitenancy/middleware.py +765 -0
- truthound/multitenancy/quota.py +537 -0
- truthound/multitenancy/resolvers.py +603 -0
- truthound/multitenancy/storage.py +703 -0
- truthound/observability/__init__.py +307 -0
- truthound/observability/context.py +531 -0
- truthound/observability/instrumentation.py +611 -0
- truthound/observability/logging.py +887 -0
- truthound/observability/metrics.py +1157 -0
- truthound/observability/tracing/__init__.py +178 -0
- truthound/observability/tracing/baggage.py +310 -0
- truthound/observability/tracing/config.py +426 -0
- truthound/observability/tracing/exporter.py +787 -0
- truthound/observability/tracing/integration.py +1018 -0
- truthound/observability/tracing/otel/__init__.py +146 -0
- truthound/observability/tracing/otel/adapter.py +982 -0
- truthound/observability/tracing/otel/bridge.py +1177 -0
- truthound/observability/tracing/otel/compat.py +681 -0
- truthound/observability/tracing/otel/config.py +691 -0
- truthound/observability/tracing/otel/detection.py +327 -0
- truthound/observability/tracing/otel/protocols.py +426 -0
- truthound/observability/tracing/processor.py +561 -0
- truthound/observability/tracing/propagator.py +757 -0
- truthound/observability/tracing/provider.py +569 -0
- truthound/observability/tracing/resource.py +515 -0
- truthound/observability/tracing/sampler.py +487 -0
- truthound/observability/tracing/span.py +676 -0
- truthound/plugins/__init__.py +198 -0
- truthound/plugins/base.py +599 -0
- truthound/plugins/cli.py +680 -0
- truthound/plugins/dependencies/__init__.py +42 -0
- truthound/plugins/dependencies/graph.py +422 -0
- truthound/plugins/dependencies/resolver.py +417 -0
- truthound/plugins/discovery.py +379 -0
- truthound/plugins/docs/__init__.py +46 -0
- truthound/plugins/docs/extractor.py +444 -0
- truthound/plugins/docs/renderer.py +499 -0
- truthound/plugins/enterprise_manager.py +877 -0
- truthound/plugins/examples/__init__.py +19 -0
- truthound/plugins/examples/custom_validators.py +317 -0
- truthound/plugins/examples/slack_notifier.py +312 -0
- truthound/plugins/examples/xml_reporter.py +254 -0
- truthound/plugins/hooks.py +558 -0
- truthound/plugins/lifecycle/__init__.py +43 -0
- truthound/plugins/lifecycle/hot_reload.py +402 -0
- truthound/plugins/lifecycle/manager.py +371 -0
- truthound/plugins/manager.py +736 -0
- truthound/plugins/registry.py +338 -0
- truthound/plugins/security/__init__.py +93 -0
- truthound/plugins/security/exceptions.py +332 -0
- truthound/plugins/security/policies.py +348 -0
- truthound/plugins/security/protocols.py +643 -0
- truthound/plugins/security/sandbox/__init__.py +45 -0
- truthound/plugins/security/sandbox/context.py +158 -0
- truthound/plugins/security/sandbox/engines/__init__.py +19 -0
- truthound/plugins/security/sandbox/engines/container.py +379 -0
- truthound/plugins/security/sandbox/engines/noop.py +144 -0
- truthound/plugins/security/sandbox/engines/process.py +336 -0
- truthound/plugins/security/sandbox/factory.py +211 -0
- truthound/plugins/security/signing/__init__.py +57 -0
- truthound/plugins/security/signing/service.py +330 -0
- truthound/plugins/security/signing/trust_store.py +368 -0
- truthound/plugins/security/signing/verifier.py +459 -0
- truthound/plugins/versioning/__init__.py +41 -0
- truthound/plugins/versioning/constraints.py +297 -0
- truthound/plugins/versioning/resolver.py +329 -0
- truthound/profiler/__init__.py +1729 -0
- truthound/profiler/_lazy.py +452 -0
- truthound/profiler/ab_testing/__init__.py +80 -0
- truthound/profiler/ab_testing/analysis.py +449 -0
- truthound/profiler/ab_testing/base.py +257 -0
- truthound/profiler/ab_testing/experiment.py +395 -0
- truthound/profiler/ab_testing/tracking.py +368 -0
- truthound/profiler/auto_threshold.py +1170 -0
- truthound/profiler/base.py +579 -0
- truthound/profiler/cache_patterns.py +911 -0
- truthound/profiler/caching.py +1303 -0
- truthound/profiler/column_profiler.py +712 -0
- truthound/profiler/comparison.py +1007 -0
- truthound/profiler/custom_patterns.py +1170 -0
- truthound/profiler/dashboard/__init__.py +50 -0
- truthound/profiler/dashboard/app.py +476 -0
- truthound/profiler/dashboard/components.py +457 -0
- truthound/profiler/dashboard/config.py +72 -0
- truthound/profiler/distributed/__init__.py +83 -0
- truthound/profiler/distributed/base.py +281 -0
- truthound/profiler/distributed/dask_backend.py +498 -0
- truthound/profiler/distributed/local_backend.py +293 -0
- truthound/profiler/distributed/profiler.py +304 -0
- truthound/profiler/distributed/ray_backend.py +374 -0
- truthound/profiler/distributed/spark_backend.py +375 -0
- truthound/profiler/distributed.py +1366 -0
- truthound/profiler/enterprise_sampling.py +1065 -0
- truthound/profiler/errors.py +488 -0
- truthound/profiler/evolution/__init__.py +91 -0
- truthound/profiler/evolution/alerts.py +426 -0
- truthound/profiler/evolution/changes.py +206 -0
- truthound/profiler/evolution/compatibility.py +365 -0
- truthound/profiler/evolution/detector.py +372 -0
- truthound/profiler/evolution/protocols.py +121 -0
- truthound/profiler/generators/__init__.py +48 -0
- truthound/profiler/generators/base.py +384 -0
- truthound/profiler/generators/ml_rules.py +375 -0
- truthound/profiler/generators/pattern_rules.py +384 -0
- truthound/profiler/generators/schema_rules.py +267 -0
- truthound/profiler/generators/stats_rules.py +324 -0
- truthound/profiler/generators/suite_generator.py +857 -0
- truthound/profiler/i18n.py +1542 -0
- truthound/profiler/incremental.py +554 -0
- truthound/profiler/incremental_validation.py +1710 -0
- truthound/profiler/integration/__init__.py +73 -0
- truthound/profiler/integration/adapters.py +345 -0
- truthound/profiler/integration/context.py +371 -0
- truthound/profiler/integration/executor.py +527 -0
- truthound/profiler/integration/naming.py +75 -0
- truthound/profiler/integration/protocols.py +243 -0
- truthound/profiler/memory.py +1185 -0
- truthound/profiler/migration/__init__.py +60 -0
- truthound/profiler/migration/base.py +345 -0
- truthound/profiler/migration/manager.py +444 -0
- truthound/profiler/migration/v1_0_to_v1_1.py +484 -0
- truthound/profiler/ml/__init__.py +73 -0
- truthound/profiler/ml/base.py +244 -0
- truthound/profiler/ml/classifier.py +507 -0
- truthound/profiler/ml/feature_extraction.py +604 -0
- truthound/profiler/ml/pretrained.py +448 -0
- truthound/profiler/ml_inference.py +1276 -0
- truthound/profiler/native_patterns.py +815 -0
- truthound/profiler/observability.py +1184 -0
- truthound/profiler/process_timeout.py +1566 -0
- truthound/profiler/progress.py +568 -0
- truthound/profiler/progress_callbacks.py +1734 -0
- truthound/profiler/quality.py +1345 -0
- truthound/profiler/resilience.py +1180 -0
- truthound/profiler/sampled_matcher.py +794 -0
- truthound/profiler/sampling.py +1288 -0
- truthound/profiler/scheduling/__init__.py +82 -0
- truthound/profiler/scheduling/protocols.py +214 -0
- truthound/profiler/scheduling/scheduler.py +474 -0
- truthound/profiler/scheduling/storage.py +457 -0
- truthound/profiler/scheduling/triggers.py +449 -0
- truthound/profiler/schema.py +603 -0
- truthound/profiler/streaming.py +685 -0
- truthound/profiler/streaming_patterns.py +1354 -0
- truthound/profiler/suite_cli.py +625 -0
- truthound/profiler/suite_config.py +789 -0
- truthound/profiler/suite_export.py +1268 -0
- truthound/profiler/table_profiler.py +547 -0
- truthound/profiler/timeout.py +565 -0
- truthound/profiler/validation.py +1532 -0
- truthound/profiler/visualization/__init__.py +118 -0
- truthound/profiler/visualization/base.py +346 -0
- truthound/profiler/visualization/generator.py +1259 -0
- truthound/profiler/visualization/plotly_renderer.py +811 -0
- truthound/profiler/visualization/renderers.py +669 -0
- truthound/profiler/visualization/sections.py +540 -0
- truthound/profiler/visualization.py +2122 -0
- truthound/profiler/yaml_validation.py +1151 -0
- truthound/py.typed +0 -0
- truthound/ratelimit/__init__.py +248 -0
- truthound/ratelimit/algorithms.py +1108 -0
- truthound/ratelimit/core.py +573 -0
- truthound/ratelimit/integration.py +532 -0
- truthound/ratelimit/limiter.py +663 -0
- truthound/ratelimit/middleware.py +700 -0
- truthound/ratelimit/policy.py +792 -0
- truthound/ratelimit/storage.py +763 -0
- truthound/rbac/__init__.py +340 -0
- truthound/rbac/core.py +976 -0
- truthound/rbac/integration.py +760 -0
- truthound/rbac/manager.py +1052 -0
- truthound/rbac/middleware.py +842 -0
- truthound/rbac/policy.py +954 -0
- truthound/rbac/storage.py +878 -0
- truthound/realtime/__init__.py +141 -0
- truthound/realtime/adapters/__init__.py +43 -0
- truthound/realtime/adapters/base.py +533 -0
- truthound/realtime/adapters/kafka.py +487 -0
- truthound/realtime/adapters/kinesis.py +479 -0
- truthound/realtime/adapters/mock.py +243 -0
- truthound/realtime/base.py +553 -0
- truthound/realtime/factory.py +382 -0
- truthound/realtime/incremental.py +660 -0
- truthound/realtime/processing/__init__.py +67 -0
- truthound/realtime/processing/exactly_once.py +575 -0
- truthound/realtime/processing/state.py +547 -0
- truthound/realtime/processing/windows.py +647 -0
- truthound/realtime/protocols.py +569 -0
- truthound/realtime/streaming.py +605 -0
- truthound/realtime/testing/__init__.py +32 -0
- truthound/realtime/testing/containers.py +615 -0
- truthound/realtime/testing/fixtures.py +484 -0
- truthound/report.py +280 -0
- truthound/reporters/__init__.py +46 -0
- truthound/reporters/_protocols.py +30 -0
- truthound/reporters/base.py +324 -0
- truthound/reporters/ci/__init__.py +66 -0
- truthound/reporters/ci/azure.py +436 -0
- truthound/reporters/ci/base.py +509 -0
- truthound/reporters/ci/bitbucket.py +567 -0
- truthound/reporters/ci/circleci.py +547 -0
- truthound/reporters/ci/detection.py +364 -0
- truthound/reporters/ci/factory.py +182 -0
- truthound/reporters/ci/github.py +388 -0
- truthound/reporters/ci/gitlab.py +471 -0
- truthound/reporters/ci/jenkins.py +525 -0
- truthound/reporters/console_reporter.py +299 -0
- truthound/reporters/factory.py +211 -0
- truthound/reporters/html_reporter.py +524 -0
- truthound/reporters/json_reporter.py +256 -0
- truthound/reporters/markdown_reporter.py +280 -0
- truthound/reporters/sdk/__init__.py +174 -0
- truthound/reporters/sdk/builder.py +558 -0
- truthound/reporters/sdk/mixins.py +1150 -0
- truthound/reporters/sdk/schema.py +1493 -0
- truthound/reporters/sdk/templates.py +666 -0
- truthound/reporters/sdk/testing.py +968 -0
- truthound/scanners.py +170 -0
- truthound/scheduling/__init__.py +122 -0
- truthound/scheduling/cron.py +1136 -0
- truthound/scheduling/presets.py +212 -0
- truthound/schema.py +275 -0
- truthound/secrets/__init__.py +173 -0
- truthound/secrets/base.py +618 -0
- truthound/secrets/cloud.py +682 -0
- truthound/secrets/integration.py +507 -0
- truthound/secrets/manager.py +633 -0
- truthound/secrets/oidc/__init__.py +172 -0
- truthound/secrets/oidc/base.py +902 -0
- truthound/secrets/oidc/credential_provider.py +623 -0
- truthound/secrets/oidc/exchangers.py +1001 -0
- truthound/secrets/oidc/github/__init__.py +110 -0
- truthound/secrets/oidc/github/claims.py +718 -0
- truthound/secrets/oidc/github/enhanced_provider.py +693 -0
- truthound/secrets/oidc/github/trust_policy.py +742 -0
- truthound/secrets/oidc/github/verification.py +723 -0
- truthound/secrets/oidc/github/workflow.py +691 -0
- truthound/secrets/oidc/providers.py +825 -0
- truthound/secrets/providers.py +506 -0
- truthound/secrets/resolver.py +495 -0
- truthound/stores/__init__.py +177 -0
- truthound/stores/backends/__init__.py +18 -0
- truthound/stores/backends/_protocols.py +340 -0
- truthound/stores/backends/azure_blob.py +530 -0
- truthound/stores/backends/concurrent_filesystem.py +915 -0
- truthound/stores/backends/connection_pool.py +1365 -0
- truthound/stores/backends/database.py +743 -0
- truthound/stores/backends/filesystem.py +538 -0
- truthound/stores/backends/gcs.py +399 -0
- truthound/stores/backends/memory.py +354 -0
- truthound/stores/backends/s3.py +434 -0
- truthound/stores/backpressure/__init__.py +84 -0
- truthound/stores/backpressure/base.py +375 -0
- truthound/stores/backpressure/circuit_breaker.py +434 -0
- truthound/stores/backpressure/monitor.py +376 -0
- truthound/stores/backpressure/strategies.py +677 -0
- truthound/stores/base.py +551 -0
- truthound/stores/batching/__init__.py +65 -0
- truthound/stores/batching/base.py +305 -0
- truthound/stores/batching/buffer.py +370 -0
- truthound/stores/batching/store.py +248 -0
- truthound/stores/batching/writer.py +521 -0
- truthound/stores/caching/__init__.py +60 -0
- truthound/stores/caching/backends.py +684 -0
- truthound/stores/caching/base.py +356 -0
- truthound/stores/caching/store.py +305 -0
- truthound/stores/compression/__init__.py +193 -0
- truthound/stores/compression/adaptive.py +694 -0
- truthound/stores/compression/base.py +514 -0
- truthound/stores/compression/pipeline.py +868 -0
- truthound/stores/compression/providers.py +672 -0
- truthound/stores/compression/streaming.py +832 -0
- truthound/stores/concurrency/__init__.py +81 -0
- truthound/stores/concurrency/atomic.py +556 -0
- truthound/stores/concurrency/index.py +775 -0
- truthound/stores/concurrency/locks.py +576 -0
- truthound/stores/concurrency/manager.py +482 -0
- truthound/stores/encryption/__init__.py +297 -0
- truthound/stores/encryption/base.py +952 -0
- truthound/stores/encryption/keys.py +1191 -0
- truthound/stores/encryption/pipeline.py +903 -0
- truthound/stores/encryption/providers.py +953 -0
- truthound/stores/encryption/streaming.py +950 -0
- truthound/stores/expectations.py +227 -0
- truthound/stores/factory.py +246 -0
- truthound/stores/migration/__init__.py +75 -0
- truthound/stores/migration/base.py +480 -0
- truthound/stores/migration/manager.py +347 -0
- truthound/stores/migration/registry.py +382 -0
- truthound/stores/migration/store.py +559 -0
- truthound/stores/observability/__init__.py +106 -0
- truthound/stores/observability/audit.py +718 -0
- truthound/stores/observability/config.py +270 -0
- truthound/stores/observability/factory.py +208 -0
- truthound/stores/observability/metrics.py +636 -0
- truthound/stores/observability/protocols.py +410 -0
- truthound/stores/observability/store.py +570 -0
- truthound/stores/observability/tracing.py +784 -0
- truthound/stores/replication/__init__.py +76 -0
- truthound/stores/replication/base.py +260 -0
- truthound/stores/replication/monitor.py +269 -0
- truthound/stores/replication/store.py +439 -0
- truthound/stores/replication/syncer.py +391 -0
- truthound/stores/results.py +359 -0
- truthound/stores/retention/__init__.py +77 -0
- truthound/stores/retention/base.py +378 -0
- truthound/stores/retention/policies.py +621 -0
- truthound/stores/retention/scheduler.py +279 -0
- truthound/stores/retention/store.py +526 -0
- truthound/stores/streaming/__init__.py +138 -0
- truthound/stores/streaming/base.py +801 -0
- truthound/stores/streaming/database.py +984 -0
- truthound/stores/streaming/filesystem.py +719 -0
- truthound/stores/streaming/reader.py +629 -0
- truthound/stores/streaming/s3.py +843 -0
- truthound/stores/streaming/writer.py +790 -0
- truthound/stores/tiering/__init__.py +108 -0
- truthound/stores/tiering/base.py +462 -0
- truthound/stores/tiering/manager.py +249 -0
- truthound/stores/tiering/policies.py +692 -0
- truthound/stores/tiering/store.py +526 -0
- truthound/stores/versioning/__init__.py +56 -0
- truthound/stores/versioning/base.py +376 -0
- truthound/stores/versioning/store.py +660 -0
- truthound/stores/versioning/strategies.py +353 -0
- truthound/types.py +56 -0
- truthound/validators/__init__.py +774 -0
- truthound/validators/aggregate/__init__.py +27 -0
- truthound/validators/aggregate/central.py +116 -0
- truthound/validators/aggregate/extremes.py +116 -0
- truthound/validators/aggregate/spread.py +118 -0
- truthound/validators/aggregate/sum.py +64 -0
- truthound/validators/aggregate/type.py +78 -0
- truthound/validators/anomaly/__init__.py +93 -0
- truthound/validators/anomaly/base.py +431 -0
- truthound/validators/anomaly/ml_based.py +1190 -0
- truthound/validators/anomaly/multivariate.py +647 -0
- truthound/validators/anomaly/statistical.py +599 -0
- truthound/validators/base.py +1089 -0
- truthound/validators/business_rule/__init__.py +46 -0
- truthound/validators/business_rule/base.py +147 -0
- truthound/validators/business_rule/checksum.py +509 -0
- truthound/validators/business_rule/financial.py +526 -0
- truthound/validators/cache.py +733 -0
- truthound/validators/completeness/__init__.py +39 -0
- truthound/validators/completeness/conditional.py +73 -0
- truthound/validators/completeness/default.py +98 -0
- truthound/validators/completeness/empty.py +103 -0
- truthound/validators/completeness/nan.py +337 -0
- truthound/validators/completeness/null.py +152 -0
- truthound/validators/cross_table/__init__.py +17 -0
- truthound/validators/cross_table/aggregate.py +333 -0
- truthound/validators/cross_table/row_count.py +122 -0
- truthound/validators/datetime/__init__.py +29 -0
- truthound/validators/datetime/format.py +78 -0
- truthound/validators/datetime/freshness.py +269 -0
- truthound/validators/datetime/order.py +73 -0
- truthound/validators/datetime/parseable.py +185 -0
- truthound/validators/datetime/range.py +202 -0
- truthound/validators/datetime/timezone.py +69 -0
- truthound/validators/distribution/__init__.py +49 -0
- truthound/validators/distribution/distribution.py +128 -0
- truthound/validators/distribution/monotonic.py +119 -0
- truthound/validators/distribution/outlier.py +178 -0
- truthound/validators/distribution/quantile.py +80 -0
- truthound/validators/distribution/range.py +254 -0
- truthound/validators/distribution/set.py +125 -0
- truthound/validators/distribution/statistical.py +459 -0
- truthound/validators/drift/__init__.py +79 -0
- truthound/validators/drift/base.py +427 -0
- truthound/validators/drift/multi_feature.py +401 -0
- truthound/validators/drift/numeric.py +395 -0
- truthound/validators/drift/psi.py +446 -0
- truthound/validators/drift/statistical.py +510 -0
- truthound/validators/enterprise.py +1658 -0
- truthound/validators/geospatial/__init__.py +80 -0
- truthound/validators/geospatial/base.py +97 -0
- truthound/validators/geospatial/boundary.py +238 -0
- truthound/validators/geospatial/coordinate.py +351 -0
- truthound/validators/geospatial/distance.py +399 -0
- truthound/validators/geospatial/polygon.py +665 -0
- truthound/validators/i18n/__init__.py +308 -0
- truthound/validators/i18n/bidi.py +571 -0
- truthound/validators/i18n/catalogs.py +570 -0
- truthound/validators/i18n/dialects.py +763 -0
- truthound/validators/i18n/extended_catalogs.py +549 -0
- truthound/validators/i18n/formatting.py +1434 -0
- truthound/validators/i18n/loader.py +1020 -0
- truthound/validators/i18n/messages.py +521 -0
- truthound/validators/i18n/plural.py +683 -0
- truthound/validators/i18n/protocols.py +855 -0
- truthound/validators/i18n/tms.py +1162 -0
- truthound/validators/localization/__init__.py +53 -0
- truthound/validators/localization/base.py +122 -0
- truthound/validators/localization/chinese.py +362 -0
- truthound/validators/localization/japanese.py +275 -0
- truthound/validators/localization/korean.py +524 -0
- truthound/validators/memory/__init__.py +94 -0
- truthound/validators/memory/approximate_knn.py +506 -0
- truthound/validators/memory/base.py +547 -0
- truthound/validators/memory/sgd_online.py +719 -0
- truthound/validators/memory/streaming_ecdf.py +753 -0
- truthound/validators/ml_feature/__init__.py +54 -0
- truthound/validators/ml_feature/base.py +249 -0
- truthound/validators/ml_feature/correlation.py +299 -0
- truthound/validators/ml_feature/leakage.py +344 -0
- truthound/validators/ml_feature/null_impact.py +270 -0
- truthound/validators/ml_feature/scale.py +264 -0
- truthound/validators/multi_column/__init__.py +89 -0
- truthound/validators/multi_column/arithmetic.py +284 -0
- truthound/validators/multi_column/base.py +231 -0
- truthound/validators/multi_column/comparison.py +273 -0
- truthound/validators/multi_column/consistency.py +312 -0
- truthound/validators/multi_column/statistical.py +299 -0
- truthound/validators/optimization/__init__.py +164 -0
- truthound/validators/optimization/aggregation.py +563 -0
- truthound/validators/optimization/covariance.py +556 -0
- truthound/validators/optimization/geo.py +626 -0
- truthound/validators/optimization/graph.py +587 -0
- truthound/validators/optimization/orchestrator.py +970 -0
- truthound/validators/optimization/profiling.py +1312 -0
- truthound/validators/privacy/__init__.py +223 -0
- truthound/validators/privacy/base.py +635 -0
- truthound/validators/privacy/ccpa.py +670 -0
- truthound/validators/privacy/gdpr.py +728 -0
- truthound/validators/privacy/global_patterns.py +604 -0
- truthound/validators/privacy/plugins.py +867 -0
- truthound/validators/profiling/__init__.py +52 -0
- truthound/validators/profiling/base.py +175 -0
- truthound/validators/profiling/cardinality.py +312 -0
- truthound/validators/profiling/entropy.py +391 -0
- truthound/validators/profiling/frequency.py +455 -0
- truthound/validators/pushdown_support.py +660 -0
- truthound/validators/query/__init__.py +91 -0
- truthound/validators/query/aggregate.py +346 -0
- truthound/validators/query/base.py +246 -0
- truthound/validators/query/column.py +249 -0
- truthound/validators/query/expression.py +274 -0
- truthound/validators/query/result.py +323 -0
- truthound/validators/query/row_count.py +264 -0
- truthound/validators/referential/__init__.py +80 -0
- truthound/validators/referential/base.py +395 -0
- truthound/validators/referential/cascade.py +391 -0
- truthound/validators/referential/circular.py +563 -0
- truthound/validators/referential/foreign_key.py +624 -0
- truthound/validators/referential/orphan.py +485 -0
- truthound/validators/registry.py +112 -0
- truthound/validators/schema/__init__.py +41 -0
- truthound/validators/schema/column_count.py +142 -0
- truthound/validators/schema/column_exists.py +80 -0
- truthound/validators/schema/column_order.py +82 -0
- truthound/validators/schema/column_pair.py +85 -0
- truthound/validators/schema/column_pair_set.py +195 -0
- truthound/validators/schema/column_type.py +94 -0
- truthound/validators/schema/multi_column.py +53 -0
- truthound/validators/schema/multi_column_aggregate.py +175 -0
- truthound/validators/schema/referential.py +274 -0
- truthound/validators/schema/table_schema.py +91 -0
- truthound/validators/schema_validator.py +219 -0
- truthound/validators/sdk/__init__.py +250 -0
- truthound/validators/sdk/builder.py +680 -0
- truthound/validators/sdk/decorators.py +474 -0
- truthound/validators/sdk/enterprise/__init__.py +211 -0
- truthound/validators/sdk/enterprise/docs.py +725 -0
- truthound/validators/sdk/enterprise/fuzzing.py +659 -0
- truthound/validators/sdk/enterprise/licensing.py +709 -0
- truthound/validators/sdk/enterprise/manager.py +543 -0
- truthound/validators/sdk/enterprise/resources.py +628 -0
- truthound/validators/sdk/enterprise/sandbox.py +766 -0
- truthound/validators/sdk/enterprise/signing.py +603 -0
- truthound/validators/sdk/enterprise/templates.py +865 -0
- truthound/validators/sdk/enterprise/versioning.py +659 -0
- truthound/validators/sdk/templates.py +757 -0
- truthound/validators/sdk/testing.py +807 -0
- truthound/validators/security/__init__.py +181 -0
- truthound/validators/security/redos/__init__.py +182 -0
- truthound/validators/security/redos/core.py +861 -0
- truthound/validators/security/redos/cpu_monitor.py +593 -0
- truthound/validators/security/redos/cve_database.py +791 -0
- truthound/validators/security/redos/ml/__init__.py +155 -0
- truthound/validators/security/redos/ml/base.py +785 -0
- truthound/validators/security/redos/ml/datasets.py +618 -0
- truthound/validators/security/redos/ml/features.py +359 -0
- truthound/validators/security/redos/ml/models.py +1000 -0
- truthound/validators/security/redos/ml/predictor.py +507 -0
- truthound/validators/security/redos/ml/storage.py +632 -0
- truthound/validators/security/redos/ml/training.py +571 -0
- truthound/validators/security/redos/ml_analyzer.py +937 -0
- truthound/validators/security/redos/optimizer.py +674 -0
- truthound/validators/security/redos/profiler.py +682 -0
- truthound/validators/security/redos/re2_engine.py +709 -0
- truthound/validators/security/redos.py +886 -0
- truthound/validators/security/sql_security.py +1247 -0
- truthound/validators/streaming/__init__.py +126 -0
- truthound/validators/streaming/base.py +292 -0
- truthound/validators/streaming/completeness.py +210 -0
- truthound/validators/streaming/mixin.py +575 -0
- truthound/validators/streaming/range.py +308 -0
- truthound/validators/streaming/sources.py +846 -0
- truthound/validators/string/__init__.py +57 -0
- truthound/validators/string/casing.py +158 -0
- truthound/validators/string/charset.py +96 -0
- truthound/validators/string/format.py +501 -0
- truthound/validators/string/json.py +77 -0
- truthound/validators/string/json_schema.py +184 -0
- truthound/validators/string/length.py +104 -0
- truthound/validators/string/like_pattern.py +237 -0
- truthound/validators/string/regex.py +202 -0
- truthound/validators/string/regex_extended.py +435 -0
- truthound/validators/table/__init__.py +88 -0
- truthound/validators/table/base.py +78 -0
- truthound/validators/table/column_count.py +198 -0
- truthound/validators/table/freshness.py +362 -0
- truthound/validators/table/row_count.py +251 -0
- truthound/validators/table/schema.py +333 -0
- truthound/validators/table/size.py +285 -0
- truthound/validators/timeout/__init__.py +102 -0
- truthound/validators/timeout/advanced/__init__.py +247 -0
- truthound/validators/timeout/advanced/circuit_breaker.py +675 -0
- truthound/validators/timeout/advanced/prediction.py +773 -0
- truthound/validators/timeout/advanced/priority.py +618 -0
- truthound/validators/timeout/advanced/redis_backend.py +770 -0
- truthound/validators/timeout/advanced/retry.py +721 -0
- truthound/validators/timeout/advanced/sampling.py +788 -0
- truthound/validators/timeout/advanced/sla.py +661 -0
- truthound/validators/timeout/advanced/telemetry.py +804 -0
- truthound/validators/timeout/cascade.py +477 -0
- truthound/validators/timeout/deadline.py +657 -0
- truthound/validators/timeout/degradation.py +525 -0
- truthound/validators/timeout/distributed.py +597 -0
- truthound/validators/timeseries/__init__.py +89 -0
- truthound/validators/timeseries/base.py +326 -0
- truthound/validators/timeseries/completeness.py +617 -0
- truthound/validators/timeseries/gap.py +485 -0
- truthound/validators/timeseries/monotonic.py +310 -0
- truthound/validators/timeseries/seasonality.py +422 -0
- truthound/validators/timeseries/trend.py +510 -0
- truthound/validators/uniqueness/__init__.py +59 -0
- truthound/validators/uniqueness/approximate.py +475 -0
- truthound/validators/uniqueness/distinct_values.py +253 -0
- truthound/validators/uniqueness/duplicate.py +118 -0
- truthound/validators/uniqueness/primary_key.py +140 -0
- truthound/validators/uniqueness/unique.py +191 -0
- truthound/validators/uniqueness/within_record.py +599 -0
- truthound/validators/utils.py +756 -0
- truthound-1.0.8.dist-info/METADATA +474 -0
- truthound-1.0.8.dist-info/RECORD +877 -0
- truthound-1.0.8.dist-info/WHEEL +4 -0
- truthound-1.0.8.dist-info/entry_points.txt +2 -0
- truthound-1.0.8.dist-info/licenses/LICENSE +190 -0
|
@@ -0,0 +1,970 @@
|
|
|
1
|
+
"""DAG-based Validator Orchestration System.
|
|
2
|
+
|
|
3
|
+
This module provides a dependency-aware execution framework for validators
|
|
4
|
+
with support for parallel execution, caching, and multiple execution strategies.
|
|
5
|
+
|
|
6
|
+
Key Features:
|
|
7
|
+
- Dependency-based topological ordering
|
|
8
|
+
- Parallel execution of independent validators
|
|
9
|
+
- Result caching for dependent validators
|
|
10
|
+
- Multiple execution strategies (Sequential, Parallel, Adaptive)
|
|
11
|
+
- Execution metrics and profiling
|
|
12
|
+
|
|
13
|
+
Usage:
|
|
14
|
+
from truthound.validators.optimization.orchestrator import (
|
|
15
|
+
ValidatorDAG,
|
|
16
|
+
ExecutionPlan,
|
|
17
|
+
ParallelExecutionStrategy,
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
# Build DAG from validators
|
|
21
|
+
dag = ValidatorDAG()
|
|
22
|
+
dag.add_validators(validators)
|
|
23
|
+
|
|
24
|
+
# Create execution plan
|
|
25
|
+
plan = dag.build_execution_plan()
|
|
26
|
+
|
|
27
|
+
# Execute with parallel strategy
|
|
28
|
+
strategy = ParallelExecutionStrategy(max_workers=4)
|
|
29
|
+
results = plan.execute(lf, strategy)
|
|
30
|
+
|
|
31
|
+
Architecture:
|
|
32
|
+
ValidatorDAG
|
|
33
|
+
│
|
|
34
|
+
├── ValidatorNode (wraps Validator with metadata)
|
|
35
|
+
│ ├── dependencies: set[str]
|
|
36
|
+
│ ├── provides: set[str]
|
|
37
|
+
│ └── priority: int
|
|
38
|
+
│
|
|
39
|
+
├── build_execution_plan() -> ExecutionPlan
|
|
40
|
+
│ └── Topological sort into execution levels
|
|
41
|
+
│
|
|
42
|
+
└── ExecutionPlan
|
|
43
|
+
├── levels: list[ExecutionLevel]
|
|
44
|
+
│ └── validators in same level can run in parallel
|
|
45
|
+
│
|
|
46
|
+
└── execute(lf, strategy) -> ExecutionResult
|
|
47
|
+
├── SequentialExecutionStrategy
|
|
48
|
+
├── ParallelExecutionStrategy
|
|
49
|
+
└── AdaptiveExecutionStrategy
|
|
50
|
+
"""
|
|
51
|
+
|
|
52
|
+
from __future__ import annotations
|
|
53
|
+
|
|
54
|
+
import time
|
|
55
|
+
import logging
|
|
56
|
+
from abc import ABC, abstractmethod
|
|
57
|
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
58
|
+
from dataclasses import dataclass, field
|
|
59
|
+
from enum import Enum, auto
|
|
60
|
+
from typing import Any, Callable, TypeVar, Generic, Iterator
|
|
61
|
+
|
|
62
|
+
import polars as pl
|
|
63
|
+
|
|
64
|
+
from truthound.validators.base import (
|
|
65
|
+
Validator,
|
|
66
|
+
ValidationIssue,
|
|
67
|
+
ValidatorExecutionResult,
|
|
68
|
+
ValidationResult,
|
|
69
|
+
ErrorContext,
|
|
70
|
+
_validate_safe,
|
|
71
|
+
)
|
|
72
|
+
from truthound.validators.optimization.graph import TopologicalSort
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
logger = logging.getLogger("truthound.orchestrator")
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
# ============================================================================
|
|
79
|
+
# Validator Categories for Dependency Resolution
|
|
80
|
+
# ============================================================================
|
|
81
|
+
|
|
82
|
+
class ValidatorPhase(Enum):
|
|
83
|
+
"""Execution phases for validators.
|
|
84
|
+
|
|
85
|
+
Validators in earlier phases must complete before later phases begin.
|
|
86
|
+
Within a phase, validators can run in parallel if they don't have
|
|
87
|
+
explicit dependencies.
|
|
88
|
+
"""
|
|
89
|
+
SCHEMA = auto() # Schema validation (column existence, types)
|
|
90
|
+
COMPLETENESS = auto() # Null checks, missing values
|
|
91
|
+
UNIQUENESS = auto() # Duplicate detection, key validation
|
|
92
|
+
FORMAT = auto() # Pattern matching, format validation
|
|
93
|
+
RANGE = auto() # Value range, distribution checks
|
|
94
|
+
STATISTICAL = auto() # Aggregate statistics, outliers
|
|
95
|
+
CROSS_TABLE = auto() # Multi-table validation
|
|
96
|
+
CUSTOM = auto() # User-defined validators
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
# Default phase mapping for built-in validator categories
|
|
100
|
+
CATEGORY_TO_PHASE: dict[str, ValidatorPhase] = {
|
|
101
|
+
"schema": ValidatorPhase.SCHEMA,
|
|
102
|
+
"completeness": ValidatorPhase.COMPLETENESS,
|
|
103
|
+
"uniqueness": ValidatorPhase.UNIQUENESS,
|
|
104
|
+
"string": ValidatorPhase.FORMAT,
|
|
105
|
+
"datetime": ValidatorPhase.FORMAT,
|
|
106
|
+
"distribution": ValidatorPhase.RANGE,
|
|
107
|
+
"aggregate": ValidatorPhase.STATISTICAL,
|
|
108
|
+
"anomaly": ValidatorPhase.STATISTICAL,
|
|
109
|
+
"cross_table": ValidatorPhase.CROSS_TABLE,
|
|
110
|
+
"referential": ValidatorPhase.CROSS_TABLE,
|
|
111
|
+
"general": ValidatorPhase.CUSTOM,
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
# ============================================================================
|
|
116
|
+
# Validator Node (Wrapper with Dependency Metadata)
|
|
117
|
+
# ============================================================================
|
|
118
|
+
|
|
119
|
+
@dataclass
|
|
120
|
+
class ValidatorNode:
|
|
121
|
+
"""Wrapper for Validator with dependency and execution metadata.
|
|
122
|
+
|
|
123
|
+
Attributes:
|
|
124
|
+
validator: The actual Validator instance
|
|
125
|
+
node_id: Unique identifier (defaults to validator.name)
|
|
126
|
+
dependencies: Set of node_ids this validator depends on
|
|
127
|
+
provides: Set of capabilities this validator provides
|
|
128
|
+
phase: Execution phase for ordering
|
|
129
|
+
priority: Priority within phase (lower = earlier)
|
|
130
|
+
estimated_cost: Estimated execution cost (for adaptive scheduling)
|
|
131
|
+
"""
|
|
132
|
+
validator: Validator
|
|
133
|
+
node_id: str = ""
|
|
134
|
+
dependencies: set[str] = field(default_factory=set)
|
|
135
|
+
provides: set[str] = field(default_factory=set)
|
|
136
|
+
phase: ValidatorPhase = ValidatorPhase.CUSTOM
|
|
137
|
+
priority: int = 100
|
|
138
|
+
estimated_cost: float = 1.0
|
|
139
|
+
|
|
140
|
+
def __post_init__(self) -> None:
|
|
141
|
+
if not self.node_id:
|
|
142
|
+
self.node_id = self.validator.name
|
|
143
|
+
|
|
144
|
+
# Auto-detect phase from category
|
|
145
|
+
category = getattr(self.validator, "category", "general")
|
|
146
|
+
if self.phase == ValidatorPhase.CUSTOM and category in CATEGORY_TO_PHASE:
|
|
147
|
+
self.phase = CATEGORY_TO_PHASE[category]
|
|
148
|
+
|
|
149
|
+
# Auto-populate provides if not set
|
|
150
|
+
if not self.provides:
|
|
151
|
+
self.provides = {self.node_id}
|
|
152
|
+
|
|
153
|
+
def __hash__(self) -> int:
|
|
154
|
+
return hash(self.node_id)
|
|
155
|
+
|
|
156
|
+
def __eq__(self, other: object) -> bool:
|
|
157
|
+
if isinstance(other, ValidatorNode):
|
|
158
|
+
return self.node_id == other.node_id
|
|
159
|
+
return False
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
# ============================================================================
|
|
163
|
+
# Execution Level (Group of Parallel-Safe Validators)
|
|
164
|
+
# ============================================================================
|
|
165
|
+
|
|
166
|
+
@dataclass
|
|
167
|
+
class ExecutionLevel:
|
|
168
|
+
"""A group of validators that can execute in parallel.
|
|
169
|
+
|
|
170
|
+
All validators in a level have no dependencies on each other,
|
|
171
|
+
only on validators in previous levels.
|
|
172
|
+
"""
|
|
173
|
+
level_index: int
|
|
174
|
+
nodes: list[ValidatorNode]
|
|
175
|
+
phase: ValidatorPhase
|
|
176
|
+
|
|
177
|
+
@property
|
|
178
|
+
def size(self) -> int:
|
|
179
|
+
return len(self.nodes)
|
|
180
|
+
|
|
181
|
+
@property
|
|
182
|
+
def node_ids(self) -> list[str]:
|
|
183
|
+
return [n.node_id for n in self.nodes]
|
|
184
|
+
|
|
185
|
+
def __iter__(self) -> Iterator[ValidatorNode]:
|
|
186
|
+
return iter(self.nodes)
|
|
187
|
+
|
|
188
|
+
def __len__(self) -> int:
|
|
189
|
+
return len(self.nodes)
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
# ============================================================================
|
|
193
|
+
# Execution Result
|
|
194
|
+
# ============================================================================
|
|
195
|
+
|
|
196
|
+
@dataclass
|
|
197
|
+
class NodeExecutionResult:
|
|
198
|
+
"""Result of executing a single validator node."""
|
|
199
|
+
node_id: str
|
|
200
|
+
result: ValidatorExecutionResult
|
|
201
|
+
start_time: float
|
|
202
|
+
end_time: float
|
|
203
|
+
|
|
204
|
+
@property
|
|
205
|
+
def duration_ms(self) -> float:
|
|
206
|
+
return (self.end_time - self.start_time) * 1000
|
|
207
|
+
|
|
208
|
+
@property
|
|
209
|
+
def issues(self) -> list[ValidationIssue]:
|
|
210
|
+
return self.result.issues
|
|
211
|
+
|
|
212
|
+
@property
|
|
213
|
+
def status(self) -> ValidationResult:
|
|
214
|
+
return self.result.status
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
@dataclass
|
|
218
|
+
class LevelExecutionResult:
|
|
219
|
+
"""Result of executing an entire level."""
|
|
220
|
+
level_index: int
|
|
221
|
+
node_results: list[NodeExecutionResult]
|
|
222
|
+
start_time: float
|
|
223
|
+
end_time: float
|
|
224
|
+
|
|
225
|
+
@property
|
|
226
|
+
def duration_ms(self) -> float:
|
|
227
|
+
return (self.end_time - self.start_time) * 1000
|
|
228
|
+
|
|
229
|
+
@property
|
|
230
|
+
def all_issues(self) -> list[ValidationIssue]:
|
|
231
|
+
issues = []
|
|
232
|
+
for node_result in self.node_results:
|
|
233
|
+
issues.extend(node_result.issues)
|
|
234
|
+
return issues
|
|
235
|
+
|
|
236
|
+
@property
|
|
237
|
+
def success_count(self) -> int:
|
|
238
|
+
return sum(1 for r in self.node_results if r.status == ValidationResult.SUCCESS)
|
|
239
|
+
|
|
240
|
+
@property
|
|
241
|
+
def failure_count(self) -> int:
|
|
242
|
+
return sum(1 for r in self.node_results if r.status == ValidationResult.FAILED)
|
|
243
|
+
|
|
244
|
+
|
|
245
|
+
@dataclass
|
|
246
|
+
class ExecutionResult:
|
|
247
|
+
"""Complete result of executing the entire DAG."""
|
|
248
|
+
level_results: list[LevelExecutionResult]
|
|
249
|
+
total_start_time: float
|
|
250
|
+
total_end_time: float
|
|
251
|
+
strategy_name: str
|
|
252
|
+
|
|
253
|
+
@property
|
|
254
|
+
def total_duration_ms(self) -> float:
|
|
255
|
+
return (self.total_end_time - self.total_start_time) * 1000
|
|
256
|
+
|
|
257
|
+
@property
|
|
258
|
+
def all_issues(self) -> list[ValidationIssue]:
|
|
259
|
+
issues = []
|
|
260
|
+
for level_result in self.level_results:
|
|
261
|
+
issues.extend(level_result.all_issues)
|
|
262
|
+
return issues
|
|
263
|
+
|
|
264
|
+
@property
|
|
265
|
+
def node_results(self) -> list[NodeExecutionResult]:
|
|
266
|
+
results = []
|
|
267
|
+
for level_result in self.level_results:
|
|
268
|
+
results.extend(level_result.node_results)
|
|
269
|
+
return results
|
|
270
|
+
|
|
271
|
+
@property
|
|
272
|
+
def total_validators(self) -> int:
|
|
273
|
+
return sum(len(lr.node_results) for lr in self.level_results)
|
|
274
|
+
|
|
275
|
+
@property
|
|
276
|
+
def success_count(self) -> int:
|
|
277
|
+
return sum(lr.success_count for lr in self.level_results)
|
|
278
|
+
|
|
279
|
+
@property
|
|
280
|
+
def failure_count(self) -> int:
|
|
281
|
+
return sum(lr.failure_count for lr in self.level_results)
|
|
282
|
+
|
|
283
|
+
def get_metrics(self) -> dict[str, Any]:
|
|
284
|
+
"""Get execution metrics summary."""
|
|
285
|
+
return {
|
|
286
|
+
"total_duration_ms": self.total_duration_ms,
|
|
287
|
+
"total_validators": self.total_validators,
|
|
288
|
+
"total_issues": len(self.all_issues),
|
|
289
|
+
"success_count": self.success_count,
|
|
290
|
+
"failure_count": self.failure_count,
|
|
291
|
+
"levels": len(self.level_results),
|
|
292
|
+
"strategy": self.strategy_name,
|
|
293
|
+
"parallelism_factor": self._compute_parallelism_factor(),
|
|
294
|
+
}
|
|
295
|
+
|
|
296
|
+
def _compute_parallelism_factor(self) -> float:
|
|
297
|
+
"""Compute how much parallelism was achieved."""
|
|
298
|
+
if not self.node_results:
|
|
299
|
+
return 1.0
|
|
300
|
+
|
|
301
|
+
sequential_time = sum(r.duration_ms for r in self.node_results)
|
|
302
|
+
if sequential_time == 0:
|
|
303
|
+
return 1.0
|
|
304
|
+
|
|
305
|
+
return sequential_time / self.total_duration_ms
|
|
306
|
+
|
|
307
|
+
|
|
308
|
+
# ============================================================================
|
|
309
|
+
# Execution Strategies
|
|
310
|
+
# ============================================================================
|
|
311
|
+
|
|
312
|
+
class ExecutionStrategy(ABC):
|
|
313
|
+
"""Abstract base class for execution strategies."""
|
|
314
|
+
|
|
315
|
+
name: str = "base"
|
|
316
|
+
|
|
317
|
+
@abstractmethod
|
|
318
|
+
def execute_level(
|
|
319
|
+
self,
|
|
320
|
+
level: ExecutionLevel,
|
|
321
|
+
lf: pl.LazyFrame,
|
|
322
|
+
context: ExecutionContext,
|
|
323
|
+
) -> LevelExecutionResult:
|
|
324
|
+
"""Execute all validators in a level."""
|
|
325
|
+
pass
|
|
326
|
+
|
|
327
|
+
|
|
328
|
+
@dataclass
|
|
329
|
+
class ExecutionContext:
|
|
330
|
+
"""Shared context for execution."""
|
|
331
|
+
previous_results: dict[str, NodeExecutionResult] = field(default_factory=dict)
|
|
332
|
+
cached_data: dict[str, Any] = field(default_factory=dict)
|
|
333
|
+
skip_on_error: bool = True
|
|
334
|
+
log_errors: bool = True
|
|
335
|
+
|
|
336
|
+
def get_result(self, node_id: str) -> NodeExecutionResult | None:
|
|
337
|
+
return self.previous_results.get(node_id)
|
|
338
|
+
|
|
339
|
+
def add_result(self, result: NodeExecutionResult) -> None:
|
|
340
|
+
self.previous_results[result.node_id] = result
|
|
341
|
+
|
|
342
|
+
|
|
343
|
+
class SequentialExecutionStrategy(ExecutionStrategy):
|
|
344
|
+
"""Execute validators one at a time.
|
|
345
|
+
|
|
346
|
+
Simplest strategy, useful for debugging and low-resource environments.
|
|
347
|
+
"""
|
|
348
|
+
|
|
349
|
+
name = "sequential"
|
|
350
|
+
|
|
351
|
+
def execute_level(
|
|
352
|
+
self,
|
|
353
|
+
level: ExecutionLevel,
|
|
354
|
+
lf: pl.LazyFrame,
|
|
355
|
+
context: ExecutionContext,
|
|
356
|
+
) -> LevelExecutionResult:
|
|
357
|
+
level_start = time.time()
|
|
358
|
+
node_results: list[NodeExecutionResult] = []
|
|
359
|
+
|
|
360
|
+
for node in level:
|
|
361
|
+
start = time.time()
|
|
362
|
+
result = _validate_safe(
|
|
363
|
+
node.validator,
|
|
364
|
+
lf,
|
|
365
|
+
skip_on_error=context.skip_on_error,
|
|
366
|
+
log_errors=context.log_errors,
|
|
367
|
+
)
|
|
368
|
+
end = time.time()
|
|
369
|
+
|
|
370
|
+
node_result = NodeExecutionResult(
|
|
371
|
+
node_id=node.node_id,
|
|
372
|
+
result=result,
|
|
373
|
+
start_time=start,
|
|
374
|
+
end_time=end,
|
|
375
|
+
)
|
|
376
|
+
node_results.append(node_result)
|
|
377
|
+
context.add_result(node_result)
|
|
378
|
+
|
|
379
|
+
return LevelExecutionResult(
|
|
380
|
+
level_index=level.level_index,
|
|
381
|
+
node_results=node_results,
|
|
382
|
+
start_time=level_start,
|
|
383
|
+
end_time=time.time(),
|
|
384
|
+
)
|
|
385
|
+
|
|
386
|
+
|
|
387
|
+
class ParallelExecutionStrategy(ExecutionStrategy):
|
|
388
|
+
"""Execute validators in parallel using ThreadPoolExecutor.
|
|
389
|
+
|
|
390
|
+
Best for I/O-bound validators or when using Polars' streaming mode.
|
|
391
|
+
"""
|
|
392
|
+
|
|
393
|
+
name = "parallel"
|
|
394
|
+
|
|
395
|
+
def __init__(self, max_workers: int | None = None):
|
|
396
|
+
"""Initialize parallel strategy.
|
|
397
|
+
|
|
398
|
+
Args:
|
|
399
|
+
max_workers: Maximum number of worker threads.
|
|
400
|
+
None = min(32, cpu_count + 4)
|
|
401
|
+
"""
|
|
402
|
+
self.max_workers = max_workers
|
|
403
|
+
|
|
404
|
+
def execute_level(
|
|
405
|
+
self,
|
|
406
|
+
level: ExecutionLevel,
|
|
407
|
+
lf: pl.LazyFrame,
|
|
408
|
+
context: ExecutionContext,
|
|
409
|
+
) -> LevelExecutionResult:
|
|
410
|
+
level_start = time.time()
|
|
411
|
+
node_results: list[NodeExecutionResult] = []
|
|
412
|
+
|
|
413
|
+
# For single validator, no need for thread pool
|
|
414
|
+
if len(level) <= 1:
|
|
415
|
+
for node in level:
|
|
416
|
+
start = time.time()
|
|
417
|
+
result = _validate_safe(
|
|
418
|
+
node.validator,
|
|
419
|
+
lf,
|
|
420
|
+
skip_on_error=context.skip_on_error,
|
|
421
|
+
log_errors=context.log_errors,
|
|
422
|
+
)
|
|
423
|
+
end = time.time()
|
|
424
|
+
|
|
425
|
+
node_result = NodeExecutionResult(
|
|
426
|
+
node_id=node.node_id,
|
|
427
|
+
result=result,
|
|
428
|
+
start_time=start,
|
|
429
|
+
end_time=end,
|
|
430
|
+
)
|
|
431
|
+
node_results.append(node_result)
|
|
432
|
+
context.add_result(node_result)
|
|
433
|
+
else:
|
|
434
|
+
# Execute in parallel
|
|
435
|
+
with ThreadPoolExecutor(max_workers=self.max_workers) as executor:
|
|
436
|
+
future_to_node = {}
|
|
437
|
+
|
|
438
|
+
for node in level:
|
|
439
|
+
future = executor.submit(
|
|
440
|
+
self._execute_node,
|
|
441
|
+
node,
|
|
442
|
+
lf,
|
|
443
|
+
context.skip_on_error,
|
|
444
|
+
context.log_errors,
|
|
445
|
+
)
|
|
446
|
+
future_to_node[future] = node
|
|
447
|
+
|
|
448
|
+
for future in as_completed(future_to_node):
|
|
449
|
+
node = future_to_node[future]
|
|
450
|
+
try:
|
|
451
|
+
node_result = future.result()
|
|
452
|
+
node_results.append(node_result)
|
|
453
|
+
context.add_result(node_result)
|
|
454
|
+
except Exception as e:
|
|
455
|
+
logger.error(f"Error executing {node.node_id}: {e}")
|
|
456
|
+
node_result = NodeExecutionResult(
|
|
457
|
+
node_id=node.node_id,
|
|
458
|
+
result=ValidatorExecutionResult(
|
|
459
|
+
validator_name=node.validator.name,
|
|
460
|
+
status=ValidationResult.FAILED,
|
|
461
|
+
issues=[],
|
|
462
|
+
error_message=str(e),
|
|
463
|
+
error_context=ErrorContext(type(e).__name__, str(e)),
|
|
464
|
+
),
|
|
465
|
+
start_time=time.time(),
|
|
466
|
+
end_time=time.time(),
|
|
467
|
+
)
|
|
468
|
+
node_results.append(node_result)
|
|
469
|
+
context.add_result(node_result)
|
|
470
|
+
|
|
471
|
+
return LevelExecutionResult(
|
|
472
|
+
level_index=level.level_index,
|
|
473
|
+
node_results=node_results,
|
|
474
|
+
start_time=level_start,
|
|
475
|
+
end_time=time.time(),
|
|
476
|
+
)
|
|
477
|
+
|
|
478
|
+
def _execute_node(
|
|
479
|
+
self,
|
|
480
|
+
node: ValidatorNode,
|
|
481
|
+
lf: pl.LazyFrame,
|
|
482
|
+
skip_on_error: bool,
|
|
483
|
+
log_errors: bool,
|
|
484
|
+
) -> NodeExecutionResult:
|
|
485
|
+
"""Execute a single node (for thread pool)."""
|
|
486
|
+
start = time.time()
|
|
487
|
+
result = _validate_safe(
|
|
488
|
+
node.validator,
|
|
489
|
+
lf,
|
|
490
|
+
skip_on_error=skip_on_error,
|
|
491
|
+
log_errors=log_errors,
|
|
492
|
+
)
|
|
493
|
+
end = time.time()
|
|
494
|
+
|
|
495
|
+
return NodeExecutionResult(
|
|
496
|
+
node_id=node.node_id,
|
|
497
|
+
result=result,
|
|
498
|
+
start_time=start,
|
|
499
|
+
end_time=end,
|
|
500
|
+
)
|
|
501
|
+
|
|
502
|
+
|
|
503
|
+
class AdaptiveExecutionStrategy(ExecutionStrategy):
|
|
504
|
+
"""Dynamically choose between sequential and parallel execution.
|
|
505
|
+
|
|
506
|
+
Uses heuristics based on:
|
|
507
|
+
- Number of validators in level
|
|
508
|
+
- Estimated cost of validators
|
|
509
|
+
- System resource availability
|
|
510
|
+
"""
|
|
511
|
+
|
|
512
|
+
name = "adaptive"
|
|
513
|
+
|
|
514
|
+
def __init__(
|
|
515
|
+
self,
|
|
516
|
+
parallel_threshold: int = 3,
|
|
517
|
+
max_workers: int | None = None,
|
|
518
|
+
):
|
|
519
|
+
"""Initialize adaptive strategy.
|
|
520
|
+
|
|
521
|
+
Args:
|
|
522
|
+
parallel_threshold: Minimum validators in level to use parallel
|
|
523
|
+
max_workers: Maximum workers for parallel execution
|
|
524
|
+
"""
|
|
525
|
+
self.parallel_threshold = parallel_threshold
|
|
526
|
+
self.sequential = SequentialExecutionStrategy()
|
|
527
|
+
self.parallel = ParallelExecutionStrategy(max_workers=max_workers)
|
|
528
|
+
|
|
529
|
+
def execute_level(
|
|
530
|
+
self,
|
|
531
|
+
level: ExecutionLevel,
|
|
532
|
+
lf: pl.LazyFrame,
|
|
533
|
+
context: ExecutionContext,
|
|
534
|
+
) -> LevelExecutionResult:
|
|
535
|
+
if len(level) >= self.parallel_threshold:
|
|
536
|
+
return self.parallel.execute_level(level, lf, context)
|
|
537
|
+
else:
|
|
538
|
+
return self.sequential.execute_level(level, lf, context)
|
|
539
|
+
|
|
540
|
+
|
|
541
|
+
# ============================================================================
|
|
542
|
+
# Execution Plan
|
|
543
|
+
# ============================================================================
|
|
544
|
+
|
|
545
|
+
@dataclass
|
|
546
|
+
class ExecutionPlan:
|
|
547
|
+
"""Executable plan for running validators in dependency order.
|
|
548
|
+
|
|
549
|
+
The plan consists of levels, where each level contains validators
|
|
550
|
+
that can run in parallel. Levels are executed sequentially.
|
|
551
|
+
"""
|
|
552
|
+
levels: list[ExecutionLevel]
|
|
553
|
+
total_nodes: int
|
|
554
|
+
has_cycles: bool = False
|
|
555
|
+
cycle_info: str | None = None
|
|
556
|
+
|
|
557
|
+
def execute(
|
|
558
|
+
self,
|
|
559
|
+
lf: pl.LazyFrame,
|
|
560
|
+
strategy: ExecutionStrategy | None = None,
|
|
561
|
+
skip_on_error: bool = True,
|
|
562
|
+
log_errors: bool = True,
|
|
563
|
+
) -> ExecutionResult:
|
|
564
|
+
"""Execute the plan.
|
|
565
|
+
|
|
566
|
+
Args:
|
|
567
|
+
lf: LazyFrame to validate
|
|
568
|
+
strategy: Execution strategy (default: AdaptiveExecutionStrategy)
|
|
569
|
+
skip_on_error: Continue on validator errors
|
|
570
|
+
log_errors: Log validation errors
|
|
571
|
+
|
|
572
|
+
Returns:
|
|
573
|
+
ExecutionResult with all validation results
|
|
574
|
+
"""
|
|
575
|
+
if strategy is None:
|
|
576
|
+
strategy = AdaptiveExecutionStrategy()
|
|
577
|
+
|
|
578
|
+
total_start = time.time()
|
|
579
|
+
context = ExecutionContext(
|
|
580
|
+
skip_on_error=skip_on_error,
|
|
581
|
+
log_errors=log_errors,
|
|
582
|
+
)
|
|
583
|
+
level_results: list[LevelExecutionResult] = []
|
|
584
|
+
|
|
585
|
+
for level in self.levels:
|
|
586
|
+
level_result = strategy.execute_level(level, lf, context)
|
|
587
|
+
level_results.append(level_result)
|
|
588
|
+
|
|
589
|
+
return ExecutionResult(
|
|
590
|
+
level_results=level_results,
|
|
591
|
+
total_start_time=total_start,
|
|
592
|
+
total_end_time=time.time(),
|
|
593
|
+
strategy_name=strategy.name,
|
|
594
|
+
)
|
|
595
|
+
|
|
596
|
+
def get_summary(self) -> dict[str, Any]:
|
|
597
|
+
"""Get plan summary."""
|
|
598
|
+
return {
|
|
599
|
+
"total_nodes": self.total_nodes,
|
|
600
|
+
"total_levels": len(self.levels),
|
|
601
|
+
"has_cycles": self.has_cycles,
|
|
602
|
+
"levels": [
|
|
603
|
+
{
|
|
604
|
+
"index": level.level_index,
|
|
605
|
+
"size": level.size,
|
|
606
|
+
"phase": level.phase.name,
|
|
607
|
+
"nodes": level.node_ids,
|
|
608
|
+
}
|
|
609
|
+
for level in self.levels
|
|
610
|
+
],
|
|
611
|
+
}
|
|
612
|
+
|
|
613
|
+
def __repr__(self) -> str:
|
|
614
|
+
return (
|
|
615
|
+
f"ExecutionPlan(nodes={self.total_nodes}, "
|
|
616
|
+
f"levels={len(self.levels)}, has_cycles={self.has_cycles})"
|
|
617
|
+
)
|
|
618
|
+
|
|
619
|
+
|
|
620
|
+
# ============================================================================
|
|
621
|
+
# Validator DAG
|
|
622
|
+
# ============================================================================
|
|
623
|
+
|
|
624
|
+
class ValidatorDAG:
|
|
625
|
+
"""Directed Acyclic Graph for validator dependency management.
|
|
626
|
+
|
|
627
|
+
Builds an execution plan from a set of validators based on:
|
|
628
|
+
1. Explicit dependencies (validator.dependencies)
|
|
629
|
+
2. Phase ordering (schema -> completeness -> uniqueness -> ...)
|
|
630
|
+
3. Priority within phase
|
|
631
|
+
|
|
632
|
+
Example:
|
|
633
|
+
dag = ValidatorDAG()
|
|
634
|
+
|
|
635
|
+
# Add validators with automatic dependency detection
|
|
636
|
+
dag.add_validator(NullValidator())
|
|
637
|
+
dag.add_validator(DuplicateValidator())
|
|
638
|
+
|
|
639
|
+
# Add with explicit dependencies
|
|
640
|
+
dag.add_validator(
|
|
641
|
+
RangeValidator(),
|
|
642
|
+
dependencies={"null"}, # Must run after NullValidator
|
|
643
|
+
)
|
|
644
|
+
|
|
645
|
+
# Build and execute plan
|
|
646
|
+
plan = dag.build_execution_plan()
|
|
647
|
+
result = plan.execute(lf)
|
|
648
|
+
"""
|
|
649
|
+
|
|
650
|
+
def __init__(self):
|
|
651
|
+
self.nodes: dict[str, ValidatorNode] = {}
|
|
652
|
+
self._dependency_graph: dict[str, set[str]] = {}
|
|
653
|
+
|
|
654
|
+
def add_validator(
|
|
655
|
+
self,
|
|
656
|
+
validator: Validator,
|
|
657
|
+
dependencies: set[str] | None = None,
|
|
658
|
+
provides: set[str] | None = None,
|
|
659
|
+
phase: ValidatorPhase | None = None,
|
|
660
|
+
priority: int = 100,
|
|
661
|
+
estimated_cost: float = 1.0,
|
|
662
|
+
) -> ValidatorNode:
|
|
663
|
+
"""Add a validator to the DAG.
|
|
664
|
+
|
|
665
|
+
Args:
|
|
666
|
+
validator: Validator instance
|
|
667
|
+
dependencies: Set of node_ids this depends on
|
|
668
|
+
provides: Set of capabilities this provides
|
|
669
|
+
phase: Execution phase override
|
|
670
|
+
priority: Priority within phase (lower = earlier)
|
|
671
|
+
estimated_cost: Estimated execution cost
|
|
672
|
+
|
|
673
|
+
Returns:
|
|
674
|
+
The created ValidatorNode
|
|
675
|
+
"""
|
|
676
|
+
node_id = validator.name
|
|
677
|
+
|
|
678
|
+
# Check for explicit dependencies on validator class
|
|
679
|
+
if dependencies is None:
|
|
680
|
+
dependencies = getattr(validator, "dependencies", set())
|
|
681
|
+
if dependencies is None:
|
|
682
|
+
dependencies = set()
|
|
683
|
+
|
|
684
|
+
# Auto-detect phase from category
|
|
685
|
+
if phase is None:
|
|
686
|
+
category = getattr(validator, "category", "general")
|
|
687
|
+
phase = CATEGORY_TO_PHASE.get(category, ValidatorPhase.CUSTOM)
|
|
688
|
+
|
|
689
|
+
node = ValidatorNode(
|
|
690
|
+
validator=validator,
|
|
691
|
+
node_id=node_id,
|
|
692
|
+
dependencies=set(dependencies),
|
|
693
|
+
provides=provides or {node_id},
|
|
694
|
+
phase=phase,
|
|
695
|
+
priority=priority,
|
|
696
|
+
estimated_cost=estimated_cost,
|
|
697
|
+
)
|
|
698
|
+
|
|
699
|
+
self.nodes[node_id] = node
|
|
700
|
+
return node
|
|
701
|
+
|
|
702
|
+
def add_validators(
|
|
703
|
+
self,
|
|
704
|
+
validators: list[Validator],
|
|
705
|
+
) -> list[ValidatorNode]:
|
|
706
|
+
"""Add multiple validators.
|
|
707
|
+
|
|
708
|
+
Args:
|
|
709
|
+
validators: List of Validator instances
|
|
710
|
+
|
|
711
|
+
Returns:
|
|
712
|
+
List of created ValidatorNodes
|
|
713
|
+
"""
|
|
714
|
+
return [self.add_validator(v) for v in validators]
|
|
715
|
+
|
|
716
|
+
def add_dependency(self, from_id: str, to_id: str) -> None:
|
|
717
|
+
"""Add a dependency edge.
|
|
718
|
+
|
|
719
|
+
Args:
|
|
720
|
+
from_id: Node that depends
|
|
721
|
+
to_id: Node that is depended upon
|
|
722
|
+
"""
|
|
723
|
+
if from_id in self.nodes:
|
|
724
|
+
self.nodes[from_id].dependencies.add(to_id)
|
|
725
|
+
|
|
726
|
+
def build_execution_plan(self) -> ExecutionPlan:
|
|
727
|
+
"""Build an execution plan from the DAG.
|
|
728
|
+
|
|
729
|
+
Returns:
|
|
730
|
+
ExecutionPlan with validators organized into levels
|
|
731
|
+
"""
|
|
732
|
+
if not self.nodes:
|
|
733
|
+
return ExecutionPlan(levels=[], total_nodes=0)
|
|
734
|
+
|
|
735
|
+
# Build full dependency graph including phase dependencies
|
|
736
|
+
adjacency = self._build_adjacency_with_phases()
|
|
737
|
+
|
|
738
|
+
# Check for cycles
|
|
739
|
+
try:
|
|
740
|
+
sorter = TopologicalSort(adjacency)
|
|
741
|
+
sorted_ids = sorter.sort()
|
|
742
|
+
except ValueError as e:
|
|
743
|
+
logger.warning(f"Cycle detected in validator dependencies: {e}")
|
|
744
|
+
# Fallback to phase-only ordering
|
|
745
|
+
sorted_ids = self._sort_by_phase_only()
|
|
746
|
+
return ExecutionPlan(
|
|
747
|
+
levels=self._group_into_levels(sorted_ids),
|
|
748
|
+
total_nodes=len(self.nodes),
|
|
749
|
+
has_cycles=True,
|
|
750
|
+
cycle_info=str(e),
|
|
751
|
+
)
|
|
752
|
+
|
|
753
|
+
# Group into execution levels
|
|
754
|
+
levels = self._group_into_levels(sorted_ids)
|
|
755
|
+
|
|
756
|
+
return ExecutionPlan(
|
|
757
|
+
levels=levels,
|
|
758
|
+
total_nodes=len(self.nodes),
|
|
759
|
+
)
|
|
760
|
+
|
|
761
|
+
def _build_adjacency_with_phases(self) -> dict[str, list[str]]:
|
|
762
|
+
"""Build adjacency list including implicit phase dependencies."""
|
|
763
|
+
adjacency: dict[str, list[str]] = {node_id: [] for node_id in self.nodes}
|
|
764
|
+
|
|
765
|
+
# Add explicit dependencies
|
|
766
|
+
for node_id, node in self.nodes.items():
|
|
767
|
+
for dep in node.dependencies:
|
|
768
|
+
if dep in self.nodes:
|
|
769
|
+
adjacency[dep].append(node_id)
|
|
770
|
+
|
|
771
|
+
# Add implicit phase dependencies
|
|
772
|
+
# Validators in later phases depend on validators in earlier phases
|
|
773
|
+
phase_to_nodes: dict[ValidatorPhase, list[str]] = {}
|
|
774
|
+
for node_id, node in self.nodes.items():
|
|
775
|
+
if node.phase not in phase_to_nodes:
|
|
776
|
+
phase_to_nodes[node.phase] = []
|
|
777
|
+
phase_to_nodes[node.phase].append(node_id)
|
|
778
|
+
|
|
779
|
+
# Sort phases by value
|
|
780
|
+
sorted_phases = sorted(phase_to_nodes.keys(), key=lambda p: p.value)
|
|
781
|
+
|
|
782
|
+
# Add edges from each phase to the next
|
|
783
|
+
for i in range(len(sorted_phases) - 1):
|
|
784
|
+
current_phase = sorted_phases[i]
|
|
785
|
+
next_phase = sorted_phases[i + 1]
|
|
786
|
+
|
|
787
|
+
# Each node in next phase depends on all nodes in current phase
|
|
788
|
+
for current_node in phase_to_nodes[current_phase]:
|
|
789
|
+
for next_node in phase_to_nodes[next_phase]:
|
|
790
|
+
if next_node not in adjacency[current_node]:
|
|
791
|
+
adjacency[current_node].append(next_node)
|
|
792
|
+
|
|
793
|
+
return adjacency
|
|
794
|
+
|
|
795
|
+
def _sort_by_phase_only(self) -> list[str]:
|
|
796
|
+
"""Fallback sort using only phases (ignores explicit dependencies)."""
|
|
797
|
+
sorted_nodes = sorted(
|
|
798
|
+
self.nodes.values(),
|
|
799
|
+
key=lambda n: (n.phase.value, n.priority, n.node_id),
|
|
800
|
+
)
|
|
801
|
+
return [n.node_id for n in sorted_nodes]
|
|
802
|
+
|
|
803
|
+
def _group_into_levels(self, sorted_ids: list[str]) -> list[ExecutionLevel]:
|
|
804
|
+
"""Group sorted node IDs into execution levels.
|
|
805
|
+
|
|
806
|
+
Nodes with no dependencies on each other can be in the same level.
|
|
807
|
+
"""
|
|
808
|
+
if not sorted_ids:
|
|
809
|
+
return []
|
|
810
|
+
|
|
811
|
+
levels: list[ExecutionLevel] = []
|
|
812
|
+
assigned: set[str] = set()
|
|
813
|
+
remaining = list(sorted_ids)
|
|
814
|
+
|
|
815
|
+
while remaining:
|
|
816
|
+
# Find all nodes whose dependencies are already assigned
|
|
817
|
+
current_level_nodes: list[ValidatorNode] = []
|
|
818
|
+
current_phase = None
|
|
819
|
+
|
|
820
|
+
for node_id in remaining:
|
|
821
|
+
node = self.nodes[node_id]
|
|
822
|
+
deps_satisfied = all(
|
|
823
|
+
dep in assigned or dep not in self.nodes
|
|
824
|
+
for dep in node.dependencies
|
|
825
|
+
)
|
|
826
|
+
|
|
827
|
+
if deps_satisfied:
|
|
828
|
+
# Check phase compatibility - only group same phase
|
|
829
|
+
if current_phase is None:
|
|
830
|
+
current_phase = node.phase
|
|
831
|
+
|
|
832
|
+
if node.phase == current_phase:
|
|
833
|
+
current_level_nodes.append(node)
|
|
834
|
+
|
|
835
|
+
if not current_level_nodes:
|
|
836
|
+
# Shouldn't happen if graph is acyclic, but handle gracefully
|
|
837
|
+
logger.warning("Could not find nodes for next level")
|
|
838
|
+
# Take the first remaining node
|
|
839
|
+
node_id = remaining[0]
|
|
840
|
+
current_level_nodes = [self.nodes[node_id]]
|
|
841
|
+
current_phase = self.nodes[node_id].phase
|
|
842
|
+
|
|
843
|
+
# Sort within level by priority
|
|
844
|
+
current_level_nodes.sort(key=lambda n: (n.priority, n.node_id))
|
|
845
|
+
|
|
846
|
+
# Create level
|
|
847
|
+
level = ExecutionLevel(
|
|
848
|
+
level_index=len(levels),
|
|
849
|
+
nodes=current_level_nodes,
|
|
850
|
+
phase=current_phase or ValidatorPhase.CUSTOM,
|
|
851
|
+
)
|
|
852
|
+
levels.append(level)
|
|
853
|
+
|
|
854
|
+
# Mark as assigned
|
|
855
|
+
for node in current_level_nodes:
|
|
856
|
+
assigned.add(node.node_id)
|
|
857
|
+
remaining.remove(node.node_id)
|
|
858
|
+
|
|
859
|
+
return levels
|
|
860
|
+
|
|
861
|
+
def get_dependency_chain(self, node_id: str) -> list[str]:
|
|
862
|
+
"""Get the full dependency chain for a node.
|
|
863
|
+
|
|
864
|
+
Args:
|
|
865
|
+
node_id: Node to get dependencies for
|
|
866
|
+
|
|
867
|
+
Returns:
|
|
868
|
+
List of node_ids in dependency order
|
|
869
|
+
"""
|
|
870
|
+
if node_id not in self.nodes:
|
|
871
|
+
return []
|
|
872
|
+
|
|
873
|
+
visited: set[str] = set()
|
|
874
|
+
chain: list[str] = []
|
|
875
|
+
|
|
876
|
+
def visit(nid: str) -> None:
|
|
877
|
+
if nid in visited or nid not in self.nodes:
|
|
878
|
+
return
|
|
879
|
+
visited.add(nid)
|
|
880
|
+
|
|
881
|
+
for dep in self.nodes[nid].dependencies:
|
|
882
|
+
visit(dep)
|
|
883
|
+
|
|
884
|
+
chain.append(nid)
|
|
885
|
+
|
|
886
|
+
visit(node_id)
|
|
887
|
+
return chain
|
|
888
|
+
|
|
889
|
+
def visualize(self) -> str:
|
|
890
|
+
"""Create ASCII visualization of the DAG.
|
|
891
|
+
|
|
892
|
+
Returns:
|
|
893
|
+
ASCII art representation of the DAG
|
|
894
|
+
"""
|
|
895
|
+
if not self.nodes:
|
|
896
|
+
return "Empty DAG"
|
|
897
|
+
|
|
898
|
+
lines = ["ValidatorDAG:"]
|
|
899
|
+
|
|
900
|
+
# Group by phase
|
|
901
|
+
phase_to_nodes: dict[ValidatorPhase, list[ValidatorNode]] = {}
|
|
902
|
+
for node in self.nodes.values():
|
|
903
|
+
if node.phase not in phase_to_nodes:
|
|
904
|
+
phase_to_nodes[node.phase] = []
|
|
905
|
+
phase_to_nodes[node.phase].append(node)
|
|
906
|
+
|
|
907
|
+
for phase in sorted(phase_to_nodes.keys(), key=lambda p: p.value):
|
|
908
|
+
lines.append(f"\n [{phase.name}]")
|
|
909
|
+
nodes = sorted(phase_to_nodes[phase], key=lambda n: n.priority)
|
|
910
|
+
|
|
911
|
+
for node in nodes:
|
|
912
|
+
deps = ", ".join(sorted(node.dependencies)) if node.dependencies else "none"
|
|
913
|
+
lines.append(f" - {node.node_id} (deps: {deps})")
|
|
914
|
+
|
|
915
|
+
return "\n".join(lines)
|
|
916
|
+
|
|
917
|
+
def __repr__(self) -> str:
|
|
918
|
+
return f"ValidatorDAG(nodes={len(self.nodes)})"
|
|
919
|
+
|
|
920
|
+
def __len__(self) -> int:
|
|
921
|
+
return len(self.nodes)
|
|
922
|
+
|
|
923
|
+
|
|
924
|
+
# ============================================================================
|
|
925
|
+
# Convenience Functions
|
|
926
|
+
# ============================================================================
|
|
927
|
+
|
|
928
|
+
def create_execution_plan(
|
|
929
|
+
validators: list[Validator],
|
|
930
|
+
dependencies: dict[str, set[str]] | None = None,
|
|
931
|
+
) -> ExecutionPlan:
|
|
932
|
+
"""Create an execution plan from validators.
|
|
933
|
+
|
|
934
|
+
Args:
|
|
935
|
+
validators: List of validators
|
|
936
|
+
dependencies: Optional explicit dependencies {validator_name: {dep_names}}
|
|
937
|
+
|
|
938
|
+
Returns:
|
|
939
|
+
ExecutionPlan ready for execution
|
|
940
|
+
"""
|
|
941
|
+
dag = ValidatorDAG()
|
|
942
|
+
|
|
943
|
+
for validator in validators:
|
|
944
|
+
deps = None
|
|
945
|
+
if dependencies and validator.name in dependencies:
|
|
946
|
+
deps = dependencies[validator.name]
|
|
947
|
+
dag.add_validator(validator, dependencies=deps)
|
|
948
|
+
|
|
949
|
+
return dag.build_execution_plan()
|
|
950
|
+
|
|
951
|
+
|
|
952
|
+
def execute_validators(
|
|
953
|
+
validators: list[Validator],
|
|
954
|
+
lf: pl.LazyFrame,
|
|
955
|
+
strategy: ExecutionStrategy | None = None,
|
|
956
|
+
dependencies: dict[str, set[str]] | None = None,
|
|
957
|
+
) -> ExecutionResult:
|
|
958
|
+
"""Execute validators with DAG-based ordering.
|
|
959
|
+
|
|
960
|
+
Args:
|
|
961
|
+
validators: List of validators
|
|
962
|
+
lf: LazyFrame to validate
|
|
963
|
+
strategy: Execution strategy (default: AdaptiveExecutionStrategy)
|
|
964
|
+
dependencies: Optional explicit dependencies
|
|
965
|
+
|
|
966
|
+
Returns:
|
|
967
|
+
ExecutionResult with all validation results
|
|
968
|
+
"""
|
|
969
|
+
plan = create_execution_plan(validators, dependencies)
|
|
970
|
+
return plan.execute(lf, strategy)
|