truthound 1.0.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- truthound/__init__.py +162 -0
- truthound/adapters.py +100 -0
- truthound/api.py +365 -0
- truthound/audit/__init__.py +248 -0
- truthound/audit/core.py +967 -0
- truthound/audit/filters.py +620 -0
- truthound/audit/formatters.py +707 -0
- truthound/audit/logger.py +902 -0
- truthound/audit/middleware.py +571 -0
- truthound/audit/storage.py +1083 -0
- truthound/benchmark/__init__.py +123 -0
- truthound/benchmark/base.py +757 -0
- truthound/benchmark/comparison.py +635 -0
- truthound/benchmark/generators.py +706 -0
- truthound/benchmark/reporters.py +718 -0
- truthound/benchmark/runner.py +635 -0
- truthound/benchmark/scenarios.py +712 -0
- truthound/cache.py +252 -0
- truthound/checkpoint/__init__.py +136 -0
- truthound/checkpoint/actions/__init__.py +164 -0
- truthound/checkpoint/actions/base.py +324 -0
- truthound/checkpoint/actions/custom.py +234 -0
- truthound/checkpoint/actions/discord_notify.py +290 -0
- truthound/checkpoint/actions/email_notify.py +405 -0
- truthound/checkpoint/actions/github_action.py +406 -0
- truthound/checkpoint/actions/opsgenie.py +1499 -0
- truthound/checkpoint/actions/pagerduty.py +226 -0
- truthound/checkpoint/actions/slack_notify.py +233 -0
- truthound/checkpoint/actions/store_result.py +249 -0
- truthound/checkpoint/actions/teams_notify.py +1570 -0
- truthound/checkpoint/actions/telegram_notify.py +419 -0
- truthound/checkpoint/actions/update_docs.py +552 -0
- truthound/checkpoint/actions/webhook.py +293 -0
- truthound/checkpoint/analytics/__init__.py +147 -0
- truthound/checkpoint/analytics/aggregations/__init__.py +23 -0
- truthound/checkpoint/analytics/aggregations/rollup.py +481 -0
- truthound/checkpoint/analytics/aggregations/time_bucket.py +306 -0
- truthound/checkpoint/analytics/analyzers/__init__.py +17 -0
- truthound/checkpoint/analytics/analyzers/anomaly.py +386 -0
- truthound/checkpoint/analytics/analyzers/base.py +270 -0
- truthound/checkpoint/analytics/analyzers/forecast.py +421 -0
- truthound/checkpoint/analytics/analyzers/trend.py +314 -0
- truthound/checkpoint/analytics/models.py +292 -0
- truthound/checkpoint/analytics/protocols.py +549 -0
- truthound/checkpoint/analytics/service.py +718 -0
- truthound/checkpoint/analytics/stores/__init__.py +16 -0
- truthound/checkpoint/analytics/stores/base.py +306 -0
- truthound/checkpoint/analytics/stores/memory_store.py +353 -0
- truthound/checkpoint/analytics/stores/sqlite_store.py +557 -0
- truthound/checkpoint/analytics/stores/timescale_store.py +501 -0
- truthound/checkpoint/async_actions.py +794 -0
- truthound/checkpoint/async_base.py +708 -0
- truthound/checkpoint/async_checkpoint.py +617 -0
- truthound/checkpoint/async_runner.py +639 -0
- truthound/checkpoint/checkpoint.py +527 -0
- truthound/checkpoint/ci/__init__.py +61 -0
- truthound/checkpoint/ci/detector.py +355 -0
- truthound/checkpoint/ci/reporter.py +436 -0
- truthound/checkpoint/ci/templates.py +454 -0
- truthound/checkpoint/circuitbreaker/__init__.py +133 -0
- truthound/checkpoint/circuitbreaker/breaker.py +542 -0
- truthound/checkpoint/circuitbreaker/core.py +252 -0
- truthound/checkpoint/circuitbreaker/detection.py +459 -0
- truthound/checkpoint/circuitbreaker/middleware.py +389 -0
- truthound/checkpoint/circuitbreaker/registry.py +357 -0
- truthound/checkpoint/distributed/__init__.py +139 -0
- truthound/checkpoint/distributed/backends/__init__.py +35 -0
- truthound/checkpoint/distributed/backends/celery_backend.py +503 -0
- truthound/checkpoint/distributed/backends/kubernetes_backend.py +696 -0
- truthound/checkpoint/distributed/backends/local_backend.py +397 -0
- truthound/checkpoint/distributed/backends/ray_backend.py +625 -0
- truthound/checkpoint/distributed/base.py +774 -0
- truthound/checkpoint/distributed/orchestrator.py +765 -0
- truthound/checkpoint/distributed/protocols.py +842 -0
- truthound/checkpoint/distributed/registry.py +449 -0
- truthound/checkpoint/idempotency/__init__.py +120 -0
- truthound/checkpoint/idempotency/core.py +295 -0
- truthound/checkpoint/idempotency/fingerprint.py +454 -0
- truthound/checkpoint/idempotency/locking.py +604 -0
- truthound/checkpoint/idempotency/service.py +592 -0
- truthound/checkpoint/idempotency/stores.py +653 -0
- truthound/checkpoint/monitoring/__init__.py +134 -0
- truthound/checkpoint/monitoring/aggregators/__init__.py +15 -0
- truthound/checkpoint/monitoring/aggregators/base.py +372 -0
- truthound/checkpoint/monitoring/aggregators/realtime.py +300 -0
- truthound/checkpoint/monitoring/aggregators/window.py +493 -0
- truthound/checkpoint/monitoring/collectors/__init__.py +17 -0
- truthound/checkpoint/monitoring/collectors/base.py +257 -0
- truthound/checkpoint/monitoring/collectors/memory_collector.py +617 -0
- truthound/checkpoint/monitoring/collectors/prometheus_collector.py +451 -0
- truthound/checkpoint/monitoring/collectors/redis_collector.py +518 -0
- truthound/checkpoint/monitoring/events.py +410 -0
- truthound/checkpoint/monitoring/protocols.py +636 -0
- truthound/checkpoint/monitoring/service.py +578 -0
- truthound/checkpoint/monitoring/views/__init__.py +17 -0
- truthound/checkpoint/monitoring/views/base.py +172 -0
- truthound/checkpoint/monitoring/views/queue_view.py +220 -0
- truthound/checkpoint/monitoring/views/task_view.py +240 -0
- truthound/checkpoint/monitoring/views/worker_view.py +263 -0
- truthound/checkpoint/registry.py +337 -0
- truthound/checkpoint/runner.py +356 -0
- truthound/checkpoint/transaction/__init__.py +133 -0
- truthound/checkpoint/transaction/base.py +389 -0
- truthound/checkpoint/transaction/compensatable.py +537 -0
- truthound/checkpoint/transaction/coordinator.py +576 -0
- truthound/checkpoint/transaction/executor.py +622 -0
- truthound/checkpoint/transaction/idempotency.py +534 -0
- truthound/checkpoint/transaction/saga/__init__.py +143 -0
- truthound/checkpoint/transaction/saga/builder.py +584 -0
- truthound/checkpoint/transaction/saga/definition.py +515 -0
- truthound/checkpoint/transaction/saga/event_store.py +542 -0
- truthound/checkpoint/transaction/saga/patterns.py +833 -0
- truthound/checkpoint/transaction/saga/runner.py +718 -0
- truthound/checkpoint/transaction/saga/state_machine.py +793 -0
- truthound/checkpoint/transaction/saga/strategies.py +780 -0
- truthound/checkpoint/transaction/saga/testing.py +886 -0
- truthound/checkpoint/triggers/__init__.py +58 -0
- truthound/checkpoint/triggers/base.py +237 -0
- truthound/checkpoint/triggers/event.py +385 -0
- truthound/checkpoint/triggers/schedule.py +355 -0
- truthound/cli.py +2358 -0
- truthound/cli_modules/__init__.py +124 -0
- truthound/cli_modules/advanced/__init__.py +45 -0
- truthound/cli_modules/advanced/benchmark.py +343 -0
- truthound/cli_modules/advanced/docs.py +225 -0
- truthound/cli_modules/advanced/lineage.py +209 -0
- truthound/cli_modules/advanced/ml.py +320 -0
- truthound/cli_modules/advanced/realtime.py +196 -0
- truthound/cli_modules/checkpoint/__init__.py +46 -0
- truthound/cli_modules/checkpoint/init.py +114 -0
- truthound/cli_modules/checkpoint/list.py +71 -0
- truthound/cli_modules/checkpoint/run.py +159 -0
- truthound/cli_modules/checkpoint/validate.py +67 -0
- truthound/cli_modules/common/__init__.py +71 -0
- truthound/cli_modules/common/errors.py +414 -0
- truthound/cli_modules/common/options.py +419 -0
- truthound/cli_modules/common/output.py +507 -0
- truthound/cli_modules/common/protocol.py +552 -0
- truthound/cli_modules/core/__init__.py +48 -0
- truthound/cli_modules/core/check.py +123 -0
- truthound/cli_modules/core/compare.py +104 -0
- truthound/cli_modules/core/learn.py +57 -0
- truthound/cli_modules/core/mask.py +77 -0
- truthound/cli_modules/core/profile.py +65 -0
- truthound/cli_modules/core/scan.py +61 -0
- truthound/cli_modules/profiler/__init__.py +51 -0
- truthound/cli_modules/profiler/auto_profile.py +175 -0
- truthound/cli_modules/profiler/metadata.py +107 -0
- truthound/cli_modules/profiler/suite.py +283 -0
- truthound/cli_modules/registry.py +431 -0
- truthound/cli_modules/scaffolding/__init__.py +89 -0
- truthound/cli_modules/scaffolding/base.py +631 -0
- truthound/cli_modules/scaffolding/commands.py +545 -0
- truthound/cli_modules/scaffolding/plugins.py +1072 -0
- truthound/cli_modules/scaffolding/reporters.py +594 -0
- truthound/cli_modules/scaffolding/validators.py +1127 -0
- truthound/common/__init__.py +18 -0
- truthound/common/resilience/__init__.py +130 -0
- truthound/common/resilience/bulkhead.py +266 -0
- truthound/common/resilience/circuit_breaker.py +516 -0
- truthound/common/resilience/composite.py +332 -0
- truthound/common/resilience/config.py +292 -0
- truthound/common/resilience/protocols.py +217 -0
- truthound/common/resilience/rate_limiter.py +404 -0
- truthound/common/resilience/retry.py +341 -0
- truthound/datadocs/__init__.py +260 -0
- truthound/datadocs/base.py +571 -0
- truthound/datadocs/builder.py +761 -0
- truthound/datadocs/charts.py +764 -0
- truthound/datadocs/dashboard/__init__.py +63 -0
- truthound/datadocs/dashboard/app.py +576 -0
- truthound/datadocs/dashboard/components.py +584 -0
- truthound/datadocs/dashboard/state.py +240 -0
- truthound/datadocs/engine/__init__.py +46 -0
- truthound/datadocs/engine/context.py +376 -0
- truthound/datadocs/engine/pipeline.py +618 -0
- truthound/datadocs/engine/registry.py +469 -0
- truthound/datadocs/exporters/__init__.py +49 -0
- truthound/datadocs/exporters/base.py +198 -0
- truthound/datadocs/exporters/html.py +178 -0
- truthound/datadocs/exporters/json_exporter.py +253 -0
- truthound/datadocs/exporters/markdown.py +284 -0
- truthound/datadocs/exporters/pdf.py +392 -0
- truthound/datadocs/i18n/__init__.py +86 -0
- truthound/datadocs/i18n/catalog.py +960 -0
- truthound/datadocs/i18n/formatting.py +505 -0
- truthound/datadocs/i18n/loader.py +256 -0
- truthound/datadocs/i18n/plurals.py +378 -0
- truthound/datadocs/renderers/__init__.py +42 -0
- truthound/datadocs/renderers/base.py +401 -0
- truthound/datadocs/renderers/custom.py +342 -0
- truthound/datadocs/renderers/jinja.py +697 -0
- truthound/datadocs/sections.py +736 -0
- truthound/datadocs/styles.py +931 -0
- truthound/datadocs/themes/__init__.py +101 -0
- truthound/datadocs/themes/base.py +336 -0
- truthound/datadocs/themes/default.py +417 -0
- truthound/datadocs/themes/enterprise.py +419 -0
- truthound/datadocs/themes/loader.py +336 -0
- truthound/datadocs/themes.py +301 -0
- truthound/datadocs/transformers/__init__.py +57 -0
- truthound/datadocs/transformers/base.py +268 -0
- truthound/datadocs/transformers/enrichers.py +544 -0
- truthound/datadocs/transformers/filters.py +447 -0
- truthound/datadocs/transformers/i18n.py +468 -0
- truthound/datadocs/versioning/__init__.py +62 -0
- truthound/datadocs/versioning/diff.py +639 -0
- truthound/datadocs/versioning/storage.py +497 -0
- truthound/datadocs/versioning/version.py +358 -0
- truthound/datasources/__init__.py +223 -0
- truthound/datasources/_async_protocols.py +222 -0
- truthound/datasources/_protocols.py +159 -0
- truthound/datasources/adapters.py +428 -0
- truthound/datasources/async_base.py +599 -0
- truthound/datasources/async_factory.py +511 -0
- truthound/datasources/base.py +516 -0
- truthound/datasources/factory.py +433 -0
- truthound/datasources/nosql/__init__.py +47 -0
- truthound/datasources/nosql/base.py +487 -0
- truthound/datasources/nosql/elasticsearch.py +801 -0
- truthound/datasources/nosql/mongodb.py +636 -0
- truthound/datasources/pandas_optimized.py +582 -0
- truthound/datasources/pandas_source.py +216 -0
- truthound/datasources/polars_source.py +395 -0
- truthound/datasources/spark_source.py +479 -0
- truthound/datasources/sql/__init__.py +154 -0
- truthound/datasources/sql/base.py +710 -0
- truthound/datasources/sql/bigquery.py +410 -0
- truthound/datasources/sql/cloud_base.py +199 -0
- truthound/datasources/sql/databricks.py +471 -0
- truthound/datasources/sql/mysql.py +316 -0
- truthound/datasources/sql/oracle.py +427 -0
- truthound/datasources/sql/postgresql.py +321 -0
- truthound/datasources/sql/redshift.py +479 -0
- truthound/datasources/sql/snowflake.py +439 -0
- truthound/datasources/sql/sqlite.py +286 -0
- truthound/datasources/sql/sqlserver.py +437 -0
- truthound/datasources/streaming/__init__.py +47 -0
- truthound/datasources/streaming/base.py +350 -0
- truthound/datasources/streaming/kafka.py +670 -0
- truthound/decorators.py +98 -0
- truthound/docs/__init__.py +69 -0
- truthound/docs/extractor.py +971 -0
- truthound/docs/generator.py +601 -0
- truthound/docs/parser.py +1037 -0
- truthound/docs/renderer.py +999 -0
- truthound/drift/__init__.py +22 -0
- truthound/drift/compare.py +189 -0
- truthound/drift/detectors.py +464 -0
- truthound/drift/report.py +160 -0
- truthound/execution/__init__.py +65 -0
- truthound/execution/_protocols.py +324 -0
- truthound/execution/base.py +576 -0
- truthound/execution/distributed/__init__.py +179 -0
- truthound/execution/distributed/aggregations.py +731 -0
- truthound/execution/distributed/arrow_bridge.py +817 -0
- truthound/execution/distributed/base.py +550 -0
- truthound/execution/distributed/dask_engine.py +976 -0
- truthound/execution/distributed/mixins.py +766 -0
- truthound/execution/distributed/protocols.py +756 -0
- truthound/execution/distributed/ray_engine.py +1127 -0
- truthound/execution/distributed/registry.py +446 -0
- truthound/execution/distributed/spark_engine.py +1011 -0
- truthound/execution/distributed/validator_adapter.py +682 -0
- truthound/execution/pandas_engine.py +401 -0
- truthound/execution/polars_engine.py +497 -0
- truthound/execution/pushdown/__init__.py +230 -0
- truthound/execution/pushdown/ast.py +1550 -0
- truthound/execution/pushdown/builder.py +1550 -0
- truthound/execution/pushdown/dialects.py +1072 -0
- truthound/execution/pushdown/executor.py +829 -0
- truthound/execution/pushdown/optimizer.py +1041 -0
- truthound/execution/sql_engine.py +518 -0
- truthound/infrastructure/__init__.py +189 -0
- truthound/infrastructure/audit.py +1515 -0
- truthound/infrastructure/config.py +1133 -0
- truthound/infrastructure/encryption.py +1132 -0
- truthound/infrastructure/logging.py +1503 -0
- truthound/infrastructure/metrics.py +1220 -0
- truthound/lineage/__init__.py +89 -0
- truthound/lineage/base.py +746 -0
- truthound/lineage/impact_analysis.py +474 -0
- truthound/lineage/integrations/__init__.py +22 -0
- truthound/lineage/integrations/openlineage.py +548 -0
- truthound/lineage/tracker.py +512 -0
- truthound/lineage/visualization/__init__.py +33 -0
- truthound/lineage/visualization/protocols.py +145 -0
- truthound/lineage/visualization/renderers/__init__.py +20 -0
- truthound/lineage/visualization/renderers/cytoscape.py +329 -0
- truthound/lineage/visualization/renderers/d3.py +331 -0
- truthound/lineage/visualization/renderers/graphviz.py +276 -0
- truthound/lineage/visualization/renderers/mermaid.py +308 -0
- truthound/maskers.py +113 -0
- truthound/ml/__init__.py +124 -0
- truthound/ml/anomaly_models/__init__.py +31 -0
- truthound/ml/anomaly_models/ensemble.py +362 -0
- truthound/ml/anomaly_models/isolation_forest.py +444 -0
- truthound/ml/anomaly_models/statistical.py +392 -0
- truthound/ml/base.py +1178 -0
- truthound/ml/drift_detection/__init__.py +26 -0
- truthound/ml/drift_detection/concept.py +381 -0
- truthound/ml/drift_detection/distribution.py +361 -0
- truthound/ml/drift_detection/feature.py +442 -0
- truthound/ml/drift_detection/multivariate.py +495 -0
- truthound/ml/monitoring/__init__.py +88 -0
- truthound/ml/monitoring/alerting/__init__.py +33 -0
- truthound/ml/monitoring/alerting/handlers.py +427 -0
- truthound/ml/monitoring/alerting/rules.py +508 -0
- truthound/ml/monitoring/collectors/__init__.py +19 -0
- truthound/ml/monitoring/collectors/composite.py +105 -0
- truthound/ml/monitoring/collectors/drift.py +324 -0
- truthound/ml/monitoring/collectors/performance.py +179 -0
- truthound/ml/monitoring/collectors/quality.py +369 -0
- truthound/ml/monitoring/monitor.py +536 -0
- truthound/ml/monitoring/protocols.py +451 -0
- truthound/ml/monitoring/stores/__init__.py +15 -0
- truthound/ml/monitoring/stores/memory.py +201 -0
- truthound/ml/monitoring/stores/prometheus.py +296 -0
- truthound/ml/rule_learning/__init__.py +25 -0
- truthound/ml/rule_learning/constraint_miner.py +443 -0
- truthound/ml/rule_learning/pattern_learner.py +499 -0
- truthound/ml/rule_learning/profile_learner.py +462 -0
- truthound/multitenancy/__init__.py +326 -0
- truthound/multitenancy/core.py +852 -0
- truthound/multitenancy/integration.py +597 -0
- truthound/multitenancy/isolation.py +630 -0
- truthound/multitenancy/manager.py +770 -0
- truthound/multitenancy/middleware.py +765 -0
- truthound/multitenancy/quota.py +537 -0
- truthound/multitenancy/resolvers.py +603 -0
- truthound/multitenancy/storage.py +703 -0
- truthound/observability/__init__.py +307 -0
- truthound/observability/context.py +531 -0
- truthound/observability/instrumentation.py +611 -0
- truthound/observability/logging.py +887 -0
- truthound/observability/metrics.py +1157 -0
- truthound/observability/tracing/__init__.py +178 -0
- truthound/observability/tracing/baggage.py +310 -0
- truthound/observability/tracing/config.py +426 -0
- truthound/observability/tracing/exporter.py +787 -0
- truthound/observability/tracing/integration.py +1018 -0
- truthound/observability/tracing/otel/__init__.py +146 -0
- truthound/observability/tracing/otel/adapter.py +982 -0
- truthound/observability/tracing/otel/bridge.py +1177 -0
- truthound/observability/tracing/otel/compat.py +681 -0
- truthound/observability/tracing/otel/config.py +691 -0
- truthound/observability/tracing/otel/detection.py +327 -0
- truthound/observability/tracing/otel/protocols.py +426 -0
- truthound/observability/tracing/processor.py +561 -0
- truthound/observability/tracing/propagator.py +757 -0
- truthound/observability/tracing/provider.py +569 -0
- truthound/observability/tracing/resource.py +515 -0
- truthound/observability/tracing/sampler.py +487 -0
- truthound/observability/tracing/span.py +676 -0
- truthound/plugins/__init__.py +198 -0
- truthound/plugins/base.py +599 -0
- truthound/plugins/cli.py +680 -0
- truthound/plugins/dependencies/__init__.py +42 -0
- truthound/plugins/dependencies/graph.py +422 -0
- truthound/plugins/dependencies/resolver.py +417 -0
- truthound/plugins/discovery.py +379 -0
- truthound/plugins/docs/__init__.py +46 -0
- truthound/plugins/docs/extractor.py +444 -0
- truthound/plugins/docs/renderer.py +499 -0
- truthound/plugins/enterprise_manager.py +877 -0
- truthound/plugins/examples/__init__.py +19 -0
- truthound/plugins/examples/custom_validators.py +317 -0
- truthound/plugins/examples/slack_notifier.py +312 -0
- truthound/plugins/examples/xml_reporter.py +254 -0
- truthound/plugins/hooks.py +558 -0
- truthound/plugins/lifecycle/__init__.py +43 -0
- truthound/plugins/lifecycle/hot_reload.py +402 -0
- truthound/plugins/lifecycle/manager.py +371 -0
- truthound/plugins/manager.py +736 -0
- truthound/plugins/registry.py +338 -0
- truthound/plugins/security/__init__.py +93 -0
- truthound/plugins/security/exceptions.py +332 -0
- truthound/plugins/security/policies.py +348 -0
- truthound/plugins/security/protocols.py +643 -0
- truthound/plugins/security/sandbox/__init__.py +45 -0
- truthound/plugins/security/sandbox/context.py +158 -0
- truthound/plugins/security/sandbox/engines/__init__.py +19 -0
- truthound/plugins/security/sandbox/engines/container.py +379 -0
- truthound/plugins/security/sandbox/engines/noop.py +144 -0
- truthound/plugins/security/sandbox/engines/process.py +336 -0
- truthound/plugins/security/sandbox/factory.py +211 -0
- truthound/plugins/security/signing/__init__.py +57 -0
- truthound/plugins/security/signing/service.py +330 -0
- truthound/plugins/security/signing/trust_store.py +368 -0
- truthound/plugins/security/signing/verifier.py +459 -0
- truthound/plugins/versioning/__init__.py +41 -0
- truthound/plugins/versioning/constraints.py +297 -0
- truthound/plugins/versioning/resolver.py +329 -0
- truthound/profiler/__init__.py +1729 -0
- truthound/profiler/_lazy.py +452 -0
- truthound/profiler/ab_testing/__init__.py +80 -0
- truthound/profiler/ab_testing/analysis.py +449 -0
- truthound/profiler/ab_testing/base.py +257 -0
- truthound/profiler/ab_testing/experiment.py +395 -0
- truthound/profiler/ab_testing/tracking.py +368 -0
- truthound/profiler/auto_threshold.py +1170 -0
- truthound/profiler/base.py +579 -0
- truthound/profiler/cache_patterns.py +911 -0
- truthound/profiler/caching.py +1303 -0
- truthound/profiler/column_profiler.py +712 -0
- truthound/profiler/comparison.py +1007 -0
- truthound/profiler/custom_patterns.py +1170 -0
- truthound/profiler/dashboard/__init__.py +50 -0
- truthound/profiler/dashboard/app.py +476 -0
- truthound/profiler/dashboard/components.py +457 -0
- truthound/profiler/dashboard/config.py +72 -0
- truthound/profiler/distributed/__init__.py +83 -0
- truthound/profiler/distributed/base.py +281 -0
- truthound/profiler/distributed/dask_backend.py +498 -0
- truthound/profiler/distributed/local_backend.py +293 -0
- truthound/profiler/distributed/profiler.py +304 -0
- truthound/profiler/distributed/ray_backend.py +374 -0
- truthound/profiler/distributed/spark_backend.py +375 -0
- truthound/profiler/distributed.py +1366 -0
- truthound/profiler/enterprise_sampling.py +1065 -0
- truthound/profiler/errors.py +488 -0
- truthound/profiler/evolution/__init__.py +91 -0
- truthound/profiler/evolution/alerts.py +426 -0
- truthound/profiler/evolution/changes.py +206 -0
- truthound/profiler/evolution/compatibility.py +365 -0
- truthound/profiler/evolution/detector.py +372 -0
- truthound/profiler/evolution/protocols.py +121 -0
- truthound/profiler/generators/__init__.py +48 -0
- truthound/profiler/generators/base.py +384 -0
- truthound/profiler/generators/ml_rules.py +375 -0
- truthound/profiler/generators/pattern_rules.py +384 -0
- truthound/profiler/generators/schema_rules.py +267 -0
- truthound/profiler/generators/stats_rules.py +324 -0
- truthound/profiler/generators/suite_generator.py +857 -0
- truthound/profiler/i18n.py +1542 -0
- truthound/profiler/incremental.py +554 -0
- truthound/profiler/incremental_validation.py +1710 -0
- truthound/profiler/integration/__init__.py +73 -0
- truthound/profiler/integration/adapters.py +345 -0
- truthound/profiler/integration/context.py +371 -0
- truthound/profiler/integration/executor.py +527 -0
- truthound/profiler/integration/naming.py +75 -0
- truthound/profiler/integration/protocols.py +243 -0
- truthound/profiler/memory.py +1185 -0
- truthound/profiler/migration/__init__.py +60 -0
- truthound/profiler/migration/base.py +345 -0
- truthound/profiler/migration/manager.py +444 -0
- truthound/profiler/migration/v1_0_to_v1_1.py +484 -0
- truthound/profiler/ml/__init__.py +73 -0
- truthound/profiler/ml/base.py +244 -0
- truthound/profiler/ml/classifier.py +507 -0
- truthound/profiler/ml/feature_extraction.py +604 -0
- truthound/profiler/ml/pretrained.py +448 -0
- truthound/profiler/ml_inference.py +1276 -0
- truthound/profiler/native_patterns.py +815 -0
- truthound/profiler/observability.py +1184 -0
- truthound/profiler/process_timeout.py +1566 -0
- truthound/profiler/progress.py +568 -0
- truthound/profiler/progress_callbacks.py +1734 -0
- truthound/profiler/quality.py +1345 -0
- truthound/profiler/resilience.py +1180 -0
- truthound/profiler/sampled_matcher.py +794 -0
- truthound/profiler/sampling.py +1288 -0
- truthound/profiler/scheduling/__init__.py +82 -0
- truthound/profiler/scheduling/protocols.py +214 -0
- truthound/profiler/scheduling/scheduler.py +474 -0
- truthound/profiler/scheduling/storage.py +457 -0
- truthound/profiler/scheduling/triggers.py +449 -0
- truthound/profiler/schema.py +603 -0
- truthound/profiler/streaming.py +685 -0
- truthound/profiler/streaming_patterns.py +1354 -0
- truthound/profiler/suite_cli.py +625 -0
- truthound/profiler/suite_config.py +789 -0
- truthound/profiler/suite_export.py +1268 -0
- truthound/profiler/table_profiler.py +547 -0
- truthound/profiler/timeout.py +565 -0
- truthound/profiler/validation.py +1532 -0
- truthound/profiler/visualization/__init__.py +118 -0
- truthound/profiler/visualization/base.py +346 -0
- truthound/profiler/visualization/generator.py +1259 -0
- truthound/profiler/visualization/plotly_renderer.py +811 -0
- truthound/profiler/visualization/renderers.py +669 -0
- truthound/profiler/visualization/sections.py +540 -0
- truthound/profiler/visualization.py +2122 -0
- truthound/profiler/yaml_validation.py +1151 -0
- truthound/py.typed +0 -0
- truthound/ratelimit/__init__.py +248 -0
- truthound/ratelimit/algorithms.py +1108 -0
- truthound/ratelimit/core.py +573 -0
- truthound/ratelimit/integration.py +532 -0
- truthound/ratelimit/limiter.py +663 -0
- truthound/ratelimit/middleware.py +700 -0
- truthound/ratelimit/policy.py +792 -0
- truthound/ratelimit/storage.py +763 -0
- truthound/rbac/__init__.py +340 -0
- truthound/rbac/core.py +976 -0
- truthound/rbac/integration.py +760 -0
- truthound/rbac/manager.py +1052 -0
- truthound/rbac/middleware.py +842 -0
- truthound/rbac/policy.py +954 -0
- truthound/rbac/storage.py +878 -0
- truthound/realtime/__init__.py +141 -0
- truthound/realtime/adapters/__init__.py +43 -0
- truthound/realtime/adapters/base.py +533 -0
- truthound/realtime/adapters/kafka.py +487 -0
- truthound/realtime/adapters/kinesis.py +479 -0
- truthound/realtime/adapters/mock.py +243 -0
- truthound/realtime/base.py +553 -0
- truthound/realtime/factory.py +382 -0
- truthound/realtime/incremental.py +660 -0
- truthound/realtime/processing/__init__.py +67 -0
- truthound/realtime/processing/exactly_once.py +575 -0
- truthound/realtime/processing/state.py +547 -0
- truthound/realtime/processing/windows.py +647 -0
- truthound/realtime/protocols.py +569 -0
- truthound/realtime/streaming.py +605 -0
- truthound/realtime/testing/__init__.py +32 -0
- truthound/realtime/testing/containers.py +615 -0
- truthound/realtime/testing/fixtures.py +484 -0
- truthound/report.py +280 -0
- truthound/reporters/__init__.py +46 -0
- truthound/reporters/_protocols.py +30 -0
- truthound/reporters/base.py +324 -0
- truthound/reporters/ci/__init__.py +66 -0
- truthound/reporters/ci/azure.py +436 -0
- truthound/reporters/ci/base.py +509 -0
- truthound/reporters/ci/bitbucket.py +567 -0
- truthound/reporters/ci/circleci.py +547 -0
- truthound/reporters/ci/detection.py +364 -0
- truthound/reporters/ci/factory.py +182 -0
- truthound/reporters/ci/github.py +388 -0
- truthound/reporters/ci/gitlab.py +471 -0
- truthound/reporters/ci/jenkins.py +525 -0
- truthound/reporters/console_reporter.py +299 -0
- truthound/reporters/factory.py +211 -0
- truthound/reporters/html_reporter.py +524 -0
- truthound/reporters/json_reporter.py +256 -0
- truthound/reporters/markdown_reporter.py +280 -0
- truthound/reporters/sdk/__init__.py +174 -0
- truthound/reporters/sdk/builder.py +558 -0
- truthound/reporters/sdk/mixins.py +1150 -0
- truthound/reporters/sdk/schema.py +1493 -0
- truthound/reporters/sdk/templates.py +666 -0
- truthound/reporters/sdk/testing.py +968 -0
- truthound/scanners.py +170 -0
- truthound/scheduling/__init__.py +122 -0
- truthound/scheduling/cron.py +1136 -0
- truthound/scheduling/presets.py +212 -0
- truthound/schema.py +275 -0
- truthound/secrets/__init__.py +173 -0
- truthound/secrets/base.py +618 -0
- truthound/secrets/cloud.py +682 -0
- truthound/secrets/integration.py +507 -0
- truthound/secrets/manager.py +633 -0
- truthound/secrets/oidc/__init__.py +172 -0
- truthound/secrets/oidc/base.py +902 -0
- truthound/secrets/oidc/credential_provider.py +623 -0
- truthound/secrets/oidc/exchangers.py +1001 -0
- truthound/secrets/oidc/github/__init__.py +110 -0
- truthound/secrets/oidc/github/claims.py +718 -0
- truthound/secrets/oidc/github/enhanced_provider.py +693 -0
- truthound/secrets/oidc/github/trust_policy.py +742 -0
- truthound/secrets/oidc/github/verification.py +723 -0
- truthound/secrets/oidc/github/workflow.py +691 -0
- truthound/secrets/oidc/providers.py +825 -0
- truthound/secrets/providers.py +506 -0
- truthound/secrets/resolver.py +495 -0
- truthound/stores/__init__.py +177 -0
- truthound/stores/backends/__init__.py +18 -0
- truthound/stores/backends/_protocols.py +340 -0
- truthound/stores/backends/azure_blob.py +530 -0
- truthound/stores/backends/concurrent_filesystem.py +915 -0
- truthound/stores/backends/connection_pool.py +1365 -0
- truthound/stores/backends/database.py +743 -0
- truthound/stores/backends/filesystem.py +538 -0
- truthound/stores/backends/gcs.py +399 -0
- truthound/stores/backends/memory.py +354 -0
- truthound/stores/backends/s3.py +434 -0
- truthound/stores/backpressure/__init__.py +84 -0
- truthound/stores/backpressure/base.py +375 -0
- truthound/stores/backpressure/circuit_breaker.py +434 -0
- truthound/stores/backpressure/monitor.py +376 -0
- truthound/stores/backpressure/strategies.py +677 -0
- truthound/stores/base.py +551 -0
- truthound/stores/batching/__init__.py +65 -0
- truthound/stores/batching/base.py +305 -0
- truthound/stores/batching/buffer.py +370 -0
- truthound/stores/batching/store.py +248 -0
- truthound/stores/batching/writer.py +521 -0
- truthound/stores/caching/__init__.py +60 -0
- truthound/stores/caching/backends.py +684 -0
- truthound/stores/caching/base.py +356 -0
- truthound/stores/caching/store.py +305 -0
- truthound/stores/compression/__init__.py +193 -0
- truthound/stores/compression/adaptive.py +694 -0
- truthound/stores/compression/base.py +514 -0
- truthound/stores/compression/pipeline.py +868 -0
- truthound/stores/compression/providers.py +672 -0
- truthound/stores/compression/streaming.py +832 -0
- truthound/stores/concurrency/__init__.py +81 -0
- truthound/stores/concurrency/atomic.py +556 -0
- truthound/stores/concurrency/index.py +775 -0
- truthound/stores/concurrency/locks.py +576 -0
- truthound/stores/concurrency/manager.py +482 -0
- truthound/stores/encryption/__init__.py +297 -0
- truthound/stores/encryption/base.py +952 -0
- truthound/stores/encryption/keys.py +1191 -0
- truthound/stores/encryption/pipeline.py +903 -0
- truthound/stores/encryption/providers.py +953 -0
- truthound/stores/encryption/streaming.py +950 -0
- truthound/stores/expectations.py +227 -0
- truthound/stores/factory.py +246 -0
- truthound/stores/migration/__init__.py +75 -0
- truthound/stores/migration/base.py +480 -0
- truthound/stores/migration/manager.py +347 -0
- truthound/stores/migration/registry.py +382 -0
- truthound/stores/migration/store.py +559 -0
- truthound/stores/observability/__init__.py +106 -0
- truthound/stores/observability/audit.py +718 -0
- truthound/stores/observability/config.py +270 -0
- truthound/stores/observability/factory.py +208 -0
- truthound/stores/observability/metrics.py +636 -0
- truthound/stores/observability/protocols.py +410 -0
- truthound/stores/observability/store.py +570 -0
- truthound/stores/observability/tracing.py +784 -0
- truthound/stores/replication/__init__.py +76 -0
- truthound/stores/replication/base.py +260 -0
- truthound/stores/replication/monitor.py +269 -0
- truthound/stores/replication/store.py +439 -0
- truthound/stores/replication/syncer.py +391 -0
- truthound/stores/results.py +359 -0
- truthound/stores/retention/__init__.py +77 -0
- truthound/stores/retention/base.py +378 -0
- truthound/stores/retention/policies.py +621 -0
- truthound/stores/retention/scheduler.py +279 -0
- truthound/stores/retention/store.py +526 -0
- truthound/stores/streaming/__init__.py +138 -0
- truthound/stores/streaming/base.py +801 -0
- truthound/stores/streaming/database.py +984 -0
- truthound/stores/streaming/filesystem.py +719 -0
- truthound/stores/streaming/reader.py +629 -0
- truthound/stores/streaming/s3.py +843 -0
- truthound/stores/streaming/writer.py +790 -0
- truthound/stores/tiering/__init__.py +108 -0
- truthound/stores/tiering/base.py +462 -0
- truthound/stores/tiering/manager.py +249 -0
- truthound/stores/tiering/policies.py +692 -0
- truthound/stores/tiering/store.py +526 -0
- truthound/stores/versioning/__init__.py +56 -0
- truthound/stores/versioning/base.py +376 -0
- truthound/stores/versioning/store.py +660 -0
- truthound/stores/versioning/strategies.py +353 -0
- truthound/types.py +56 -0
- truthound/validators/__init__.py +774 -0
- truthound/validators/aggregate/__init__.py +27 -0
- truthound/validators/aggregate/central.py +116 -0
- truthound/validators/aggregate/extremes.py +116 -0
- truthound/validators/aggregate/spread.py +118 -0
- truthound/validators/aggregate/sum.py +64 -0
- truthound/validators/aggregate/type.py +78 -0
- truthound/validators/anomaly/__init__.py +93 -0
- truthound/validators/anomaly/base.py +431 -0
- truthound/validators/anomaly/ml_based.py +1190 -0
- truthound/validators/anomaly/multivariate.py +647 -0
- truthound/validators/anomaly/statistical.py +599 -0
- truthound/validators/base.py +1089 -0
- truthound/validators/business_rule/__init__.py +46 -0
- truthound/validators/business_rule/base.py +147 -0
- truthound/validators/business_rule/checksum.py +509 -0
- truthound/validators/business_rule/financial.py +526 -0
- truthound/validators/cache.py +733 -0
- truthound/validators/completeness/__init__.py +39 -0
- truthound/validators/completeness/conditional.py +73 -0
- truthound/validators/completeness/default.py +98 -0
- truthound/validators/completeness/empty.py +103 -0
- truthound/validators/completeness/nan.py +337 -0
- truthound/validators/completeness/null.py +152 -0
- truthound/validators/cross_table/__init__.py +17 -0
- truthound/validators/cross_table/aggregate.py +333 -0
- truthound/validators/cross_table/row_count.py +122 -0
- truthound/validators/datetime/__init__.py +29 -0
- truthound/validators/datetime/format.py +78 -0
- truthound/validators/datetime/freshness.py +269 -0
- truthound/validators/datetime/order.py +73 -0
- truthound/validators/datetime/parseable.py +185 -0
- truthound/validators/datetime/range.py +202 -0
- truthound/validators/datetime/timezone.py +69 -0
- truthound/validators/distribution/__init__.py +49 -0
- truthound/validators/distribution/distribution.py +128 -0
- truthound/validators/distribution/monotonic.py +119 -0
- truthound/validators/distribution/outlier.py +178 -0
- truthound/validators/distribution/quantile.py +80 -0
- truthound/validators/distribution/range.py +254 -0
- truthound/validators/distribution/set.py +125 -0
- truthound/validators/distribution/statistical.py +459 -0
- truthound/validators/drift/__init__.py +79 -0
- truthound/validators/drift/base.py +427 -0
- truthound/validators/drift/multi_feature.py +401 -0
- truthound/validators/drift/numeric.py +395 -0
- truthound/validators/drift/psi.py +446 -0
- truthound/validators/drift/statistical.py +510 -0
- truthound/validators/enterprise.py +1658 -0
- truthound/validators/geospatial/__init__.py +80 -0
- truthound/validators/geospatial/base.py +97 -0
- truthound/validators/geospatial/boundary.py +238 -0
- truthound/validators/geospatial/coordinate.py +351 -0
- truthound/validators/geospatial/distance.py +399 -0
- truthound/validators/geospatial/polygon.py +665 -0
- truthound/validators/i18n/__init__.py +308 -0
- truthound/validators/i18n/bidi.py +571 -0
- truthound/validators/i18n/catalogs.py +570 -0
- truthound/validators/i18n/dialects.py +763 -0
- truthound/validators/i18n/extended_catalogs.py +549 -0
- truthound/validators/i18n/formatting.py +1434 -0
- truthound/validators/i18n/loader.py +1020 -0
- truthound/validators/i18n/messages.py +521 -0
- truthound/validators/i18n/plural.py +683 -0
- truthound/validators/i18n/protocols.py +855 -0
- truthound/validators/i18n/tms.py +1162 -0
- truthound/validators/localization/__init__.py +53 -0
- truthound/validators/localization/base.py +122 -0
- truthound/validators/localization/chinese.py +362 -0
- truthound/validators/localization/japanese.py +275 -0
- truthound/validators/localization/korean.py +524 -0
- truthound/validators/memory/__init__.py +94 -0
- truthound/validators/memory/approximate_knn.py +506 -0
- truthound/validators/memory/base.py +547 -0
- truthound/validators/memory/sgd_online.py +719 -0
- truthound/validators/memory/streaming_ecdf.py +753 -0
- truthound/validators/ml_feature/__init__.py +54 -0
- truthound/validators/ml_feature/base.py +249 -0
- truthound/validators/ml_feature/correlation.py +299 -0
- truthound/validators/ml_feature/leakage.py +344 -0
- truthound/validators/ml_feature/null_impact.py +270 -0
- truthound/validators/ml_feature/scale.py +264 -0
- truthound/validators/multi_column/__init__.py +89 -0
- truthound/validators/multi_column/arithmetic.py +284 -0
- truthound/validators/multi_column/base.py +231 -0
- truthound/validators/multi_column/comparison.py +273 -0
- truthound/validators/multi_column/consistency.py +312 -0
- truthound/validators/multi_column/statistical.py +299 -0
- truthound/validators/optimization/__init__.py +164 -0
- truthound/validators/optimization/aggregation.py +563 -0
- truthound/validators/optimization/covariance.py +556 -0
- truthound/validators/optimization/geo.py +626 -0
- truthound/validators/optimization/graph.py +587 -0
- truthound/validators/optimization/orchestrator.py +970 -0
- truthound/validators/optimization/profiling.py +1312 -0
- truthound/validators/privacy/__init__.py +223 -0
- truthound/validators/privacy/base.py +635 -0
- truthound/validators/privacy/ccpa.py +670 -0
- truthound/validators/privacy/gdpr.py +728 -0
- truthound/validators/privacy/global_patterns.py +604 -0
- truthound/validators/privacy/plugins.py +867 -0
- truthound/validators/profiling/__init__.py +52 -0
- truthound/validators/profiling/base.py +175 -0
- truthound/validators/profiling/cardinality.py +312 -0
- truthound/validators/profiling/entropy.py +391 -0
- truthound/validators/profiling/frequency.py +455 -0
- truthound/validators/pushdown_support.py +660 -0
- truthound/validators/query/__init__.py +91 -0
- truthound/validators/query/aggregate.py +346 -0
- truthound/validators/query/base.py +246 -0
- truthound/validators/query/column.py +249 -0
- truthound/validators/query/expression.py +274 -0
- truthound/validators/query/result.py +323 -0
- truthound/validators/query/row_count.py +264 -0
- truthound/validators/referential/__init__.py +80 -0
- truthound/validators/referential/base.py +395 -0
- truthound/validators/referential/cascade.py +391 -0
- truthound/validators/referential/circular.py +563 -0
- truthound/validators/referential/foreign_key.py +624 -0
- truthound/validators/referential/orphan.py +485 -0
- truthound/validators/registry.py +112 -0
- truthound/validators/schema/__init__.py +41 -0
- truthound/validators/schema/column_count.py +142 -0
- truthound/validators/schema/column_exists.py +80 -0
- truthound/validators/schema/column_order.py +82 -0
- truthound/validators/schema/column_pair.py +85 -0
- truthound/validators/schema/column_pair_set.py +195 -0
- truthound/validators/schema/column_type.py +94 -0
- truthound/validators/schema/multi_column.py +53 -0
- truthound/validators/schema/multi_column_aggregate.py +175 -0
- truthound/validators/schema/referential.py +274 -0
- truthound/validators/schema/table_schema.py +91 -0
- truthound/validators/schema_validator.py +219 -0
- truthound/validators/sdk/__init__.py +250 -0
- truthound/validators/sdk/builder.py +680 -0
- truthound/validators/sdk/decorators.py +474 -0
- truthound/validators/sdk/enterprise/__init__.py +211 -0
- truthound/validators/sdk/enterprise/docs.py +725 -0
- truthound/validators/sdk/enterprise/fuzzing.py +659 -0
- truthound/validators/sdk/enterprise/licensing.py +709 -0
- truthound/validators/sdk/enterprise/manager.py +543 -0
- truthound/validators/sdk/enterprise/resources.py +628 -0
- truthound/validators/sdk/enterprise/sandbox.py +766 -0
- truthound/validators/sdk/enterprise/signing.py +603 -0
- truthound/validators/sdk/enterprise/templates.py +865 -0
- truthound/validators/sdk/enterprise/versioning.py +659 -0
- truthound/validators/sdk/templates.py +757 -0
- truthound/validators/sdk/testing.py +807 -0
- truthound/validators/security/__init__.py +181 -0
- truthound/validators/security/redos/__init__.py +182 -0
- truthound/validators/security/redos/core.py +861 -0
- truthound/validators/security/redos/cpu_monitor.py +593 -0
- truthound/validators/security/redos/cve_database.py +791 -0
- truthound/validators/security/redos/ml/__init__.py +155 -0
- truthound/validators/security/redos/ml/base.py +785 -0
- truthound/validators/security/redos/ml/datasets.py +618 -0
- truthound/validators/security/redos/ml/features.py +359 -0
- truthound/validators/security/redos/ml/models.py +1000 -0
- truthound/validators/security/redos/ml/predictor.py +507 -0
- truthound/validators/security/redos/ml/storage.py +632 -0
- truthound/validators/security/redos/ml/training.py +571 -0
- truthound/validators/security/redos/ml_analyzer.py +937 -0
- truthound/validators/security/redos/optimizer.py +674 -0
- truthound/validators/security/redos/profiler.py +682 -0
- truthound/validators/security/redos/re2_engine.py +709 -0
- truthound/validators/security/redos.py +886 -0
- truthound/validators/security/sql_security.py +1247 -0
- truthound/validators/streaming/__init__.py +126 -0
- truthound/validators/streaming/base.py +292 -0
- truthound/validators/streaming/completeness.py +210 -0
- truthound/validators/streaming/mixin.py +575 -0
- truthound/validators/streaming/range.py +308 -0
- truthound/validators/streaming/sources.py +846 -0
- truthound/validators/string/__init__.py +57 -0
- truthound/validators/string/casing.py +158 -0
- truthound/validators/string/charset.py +96 -0
- truthound/validators/string/format.py +501 -0
- truthound/validators/string/json.py +77 -0
- truthound/validators/string/json_schema.py +184 -0
- truthound/validators/string/length.py +104 -0
- truthound/validators/string/like_pattern.py +237 -0
- truthound/validators/string/regex.py +202 -0
- truthound/validators/string/regex_extended.py +435 -0
- truthound/validators/table/__init__.py +88 -0
- truthound/validators/table/base.py +78 -0
- truthound/validators/table/column_count.py +198 -0
- truthound/validators/table/freshness.py +362 -0
- truthound/validators/table/row_count.py +251 -0
- truthound/validators/table/schema.py +333 -0
- truthound/validators/table/size.py +285 -0
- truthound/validators/timeout/__init__.py +102 -0
- truthound/validators/timeout/advanced/__init__.py +247 -0
- truthound/validators/timeout/advanced/circuit_breaker.py +675 -0
- truthound/validators/timeout/advanced/prediction.py +773 -0
- truthound/validators/timeout/advanced/priority.py +618 -0
- truthound/validators/timeout/advanced/redis_backend.py +770 -0
- truthound/validators/timeout/advanced/retry.py +721 -0
- truthound/validators/timeout/advanced/sampling.py +788 -0
- truthound/validators/timeout/advanced/sla.py +661 -0
- truthound/validators/timeout/advanced/telemetry.py +804 -0
- truthound/validators/timeout/cascade.py +477 -0
- truthound/validators/timeout/deadline.py +657 -0
- truthound/validators/timeout/degradation.py +525 -0
- truthound/validators/timeout/distributed.py +597 -0
- truthound/validators/timeseries/__init__.py +89 -0
- truthound/validators/timeseries/base.py +326 -0
- truthound/validators/timeseries/completeness.py +617 -0
- truthound/validators/timeseries/gap.py +485 -0
- truthound/validators/timeseries/monotonic.py +310 -0
- truthound/validators/timeseries/seasonality.py +422 -0
- truthound/validators/timeseries/trend.py +510 -0
- truthound/validators/uniqueness/__init__.py +59 -0
- truthound/validators/uniqueness/approximate.py +475 -0
- truthound/validators/uniqueness/distinct_values.py +253 -0
- truthound/validators/uniqueness/duplicate.py +118 -0
- truthound/validators/uniqueness/primary_key.py +140 -0
- truthound/validators/uniqueness/unique.py +191 -0
- truthound/validators/uniqueness/within_record.py +599 -0
- truthound/validators/utils.py +756 -0
- truthound-1.0.8.dist-info/METADATA +474 -0
- truthound-1.0.8.dist-info/RECORD +877 -0
- truthound-1.0.8.dist-info/WHEEL +4 -0
- truthound-1.0.8.dist-info/entry_points.txt +2 -0
- truthound-1.0.8.dist-info/licenses/LICENSE +190 -0
|
@@ -0,0 +1,1658 @@
|
|
|
1
|
+
"""Enterprise features for validators.
|
|
2
|
+
|
|
3
|
+
This module provides production-ready integrations:
|
|
4
|
+
- #14: Audit logging integration (who/when/what)
|
|
5
|
+
- #15: Metrics collection (Prometheus/StatsD)
|
|
6
|
+
- #16: Reference data caching
|
|
7
|
+
- #17: Parallel processing support
|
|
8
|
+
- #18: Configuration validation
|
|
9
|
+
- #19: Polars version compatibility
|
|
10
|
+
- #20: Internationalization support
|
|
11
|
+
|
|
12
|
+
These features integrate with the existing audit, observability, and cache modules.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
|
|
17
|
+
import functools
|
|
18
|
+
import hashlib
|
|
19
|
+
import locale
|
|
20
|
+
import os
|
|
21
|
+
import sys
|
|
22
|
+
import threading
|
|
23
|
+
import time
|
|
24
|
+
import warnings
|
|
25
|
+
from abc import ABC, abstractmethod
|
|
26
|
+
from concurrent.futures import ThreadPoolExecutor, ProcessPoolExecutor, as_completed
|
|
27
|
+
from contextlib import contextmanager
|
|
28
|
+
from dataclasses import dataclass, field
|
|
29
|
+
from datetime import datetime, timedelta
|
|
30
|
+
from enum import Enum
|
|
31
|
+
from pathlib import Path
|
|
32
|
+
from typing import (
|
|
33
|
+
Any,
|
|
34
|
+
Callable,
|
|
35
|
+
Generic,
|
|
36
|
+
Iterator,
|
|
37
|
+
Literal,
|
|
38
|
+
Mapping,
|
|
39
|
+
Protocol,
|
|
40
|
+
Sequence,
|
|
41
|
+
TypeVar,
|
|
42
|
+
overload,
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
import polars as pl
|
|
46
|
+
|
|
47
|
+
from truthound.types import Severity
|
|
48
|
+
from truthound.validators.base import (
|
|
49
|
+
Validator,
|
|
50
|
+
ValidatorConfig,
|
|
51
|
+
ValidationIssue,
|
|
52
|
+
ValidatorExecutionResult,
|
|
53
|
+
ValidationResult,
|
|
54
|
+
ValidatorLogger,
|
|
55
|
+
ValidationErrorContext,
|
|
56
|
+
GracefulValidator,
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
# =============================================================================
|
|
61
|
+
# #14: Audit Logging Integration
|
|
62
|
+
# =============================================================================
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
@dataclass
|
|
66
|
+
class ValidationAuditRecord:
|
|
67
|
+
"""Audit record for validation operations.
|
|
68
|
+
|
|
69
|
+
Tracks who, when, what, and results of validation.
|
|
70
|
+
"""
|
|
71
|
+
|
|
72
|
+
# Who
|
|
73
|
+
user_id: str | None = None
|
|
74
|
+
user_name: str | None = None
|
|
75
|
+
service_name: str | None = None
|
|
76
|
+
client_ip: str | None = None
|
|
77
|
+
|
|
78
|
+
# When
|
|
79
|
+
timestamp: datetime = field(default_factory=datetime.utcnow)
|
|
80
|
+
duration_ms: float = 0.0
|
|
81
|
+
|
|
82
|
+
# What
|
|
83
|
+
validator_name: str = ""
|
|
84
|
+
validator_category: str = ""
|
|
85
|
+
data_source: str = ""
|
|
86
|
+
row_count: int = 0
|
|
87
|
+
column_count: int = 0
|
|
88
|
+
columns_validated: tuple[str, ...] = ()
|
|
89
|
+
|
|
90
|
+
# Results
|
|
91
|
+
issues_found: int = 0
|
|
92
|
+
severity_counts: dict[str, int] = field(default_factory=dict)
|
|
93
|
+
status: str = "unknown"
|
|
94
|
+
error_message: str | None = None
|
|
95
|
+
|
|
96
|
+
# Context
|
|
97
|
+
session_id: str | None = None
|
|
98
|
+
request_id: str | None = None
|
|
99
|
+
environment: str = "unknown"
|
|
100
|
+
metadata: dict[str, Any] = field(default_factory=dict)
|
|
101
|
+
|
|
102
|
+
def to_dict(self) -> dict[str, Any]:
|
|
103
|
+
"""Convert to dictionary for serialization."""
|
|
104
|
+
return {
|
|
105
|
+
"user_id": self.user_id,
|
|
106
|
+
"user_name": self.user_name,
|
|
107
|
+
"service_name": self.service_name,
|
|
108
|
+
"client_ip": self.client_ip,
|
|
109
|
+
"timestamp": self.timestamp.isoformat(),
|
|
110
|
+
"duration_ms": self.duration_ms,
|
|
111
|
+
"validator_name": self.validator_name,
|
|
112
|
+
"validator_category": self.validator_category,
|
|
113
|
+
"data_source": self.data_source,
|
|
114
|
+
"row_count": self.row_count,
|
|
115
|
+
"column_count": self.column_count,
|
|
116
|
+
"columns_validated": list(self.columns_validated),
|
|
117
|
+
"issues_found": self.issues_found,
|
|
118
|
+
"severity_counts": self.severity_counts,
|
|
119
|
+
"status": self.status,
|
|
120
|
+
"error_message": self.error_message,
|
|
121
|
+
"session_id": self.session_id,
|
|
122
|
+
"request_id": self.request_id,
|
|
123
|
+
"environment": self.environment,
|
|
124
|
+
"metadata": self.metadata,
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
class ValidationAuditLogger:
|
|
129
|
+
"""Audit logger specifically for validation operations.
|
|
130
|
+
|
|
131
|
+
Integrates with the main audit system when available.
|
|
132
|
+
"""
|
|
133
|
+
|
|
134
|
+
_instance: "ValidationAuditLogger | None" = None
|
|
135
|
+
_lock = threading.Lock()
|
|
136
|
+
|
|
137
|
+
def __init__(self) -> None:
|
|
138
|
+
self._records: list[ValidationAuditRecord] = []
|
|
139
|
+
self._audit_logger: Any = None
|
|
140
|
+
self._enabled = True
|
|
141
|
+
self._max_records = 10000 # In-memory limit
|
|
142
|
+
self.logger = ValidatorLogger("ValidationAuditLogger")
|
|
143
|
+
|
|
144
|
+
# Try to integrate with main audit system
|
|
145
|
+
self._init_audit_integration()
|
|
146
|
+
|
|
147
|
+
def _init_audit_integration(self) -> None:
|
|
148
|
+
"""Initialize integration with truthound.audit if available."""
|
|
149
|
+
try:
|
|
150
|
+
from truthound.audit import get_audit_logger, AuditEventType
|
|
151
|
+
|
|
152
|
+
self._audit_logger = get_audit_logger()
|
|
153
|
+
self._audit_event_type = AuditEventType
|
|
154
|
+
self.logger.debug("Integrated with truthound.audit system")
|
|
155
|
+
except (ImportError, Exception):
|
|
156
|
+
self._audit_logger = None
|
|
157
|
+
self.logger.debug("truthound.audit not available, using standalone mode")
|
|
158
|
+
|
|
159
|
+
@classmethod
|
|
160
|
+
def get_instance(cls) -> "ValidationAuditLogger":
|
|
161
|
+
"""Get or create singleton instance."""
|
|
162
|
+
if cls._instance is None:
|
|
163
|
+
with cls._lock:
|
|
164
|
+
if cls._instance is None:
|
|
165
|
+
cls._instance = cls()
|
|
166
|
+
return cls._instance
|
|
167
|
+
|
|
168
|
+
def log_validation(
|
|
169
|
+
self,
|
|
170
|
+
validator: Validator,
|
|
171
|
+
lf: pl.LazyFrame,
|
|
172
|
+
result: ValidatorExecutionResult,
|
|
173
|
+
user_id: str | None = None,
|
|
174
|
+
session_id: str | None = None,
|
|
175
|
+
data_source: str | None = None,
|
|
176
|
+
**metadata: Any,
|
|
177
|
+
) -> ValidationAuditRecord:
|
|
178
|
+
"""Log a validation operation.
|
|
179
|
+
|
|
180
|
+
Args:
|
|
181
|
+
validator: The validator that was executed
|
|
182
|
+
lf: The LazyFrame that was validated
|
|
183
|
+
result: The execution result
|
|
184
|
+
user_id: Optional user identifier
|
|
185
|
+
session_id: Optional session identifier
|
|
186
|
+
data_source: Description of data source
|
|
187
|
+
**metadata: Additional metadata
|
|
188
|
+
|
|
189
|
+
Returns:
|
|
190
|
+
The created audit record
|
|
191
|
+
"""
|
|
192
|
+
if not self._enabled:
|
|
193
|
+
return ValidationAuditRecord()
|
|
194
|
+
|
|
195
|
+
# Collect schema info
|
|
196
|
+
try:
|
|
197
|
+
schema = lf.collect_schema()
|
|
198
|
+
columns = schema.names()
|
|
199
|
+
row_count = lf.select(pl.len()).collect().item()
|
|
200
|
+
except Exception:
|
|
201
|
+
columns = []
|
|
202
|
+
row_count = 0
|
|
203
|
+
|
|
204
|
+
# Build severity counts
|
|
205
|
+
severity_counts: dict[str, int] = {}
|
|
206
|
+
for issue in result.issues:
|
|
207
|
+
sev = issue.severity.value
|
|
208
|
+
severity_counts[sev] = severity_counts.get(sev, 0) + 1
|
|
209
|
+
|
|
210
|
+
# Create audit record
|
|
211
|
+
record = ValidationAuditRecord(
|
|
212
|
+
user_id=user_id or os.environ.get("USER"),
|
|
213
|
+
service_name=os.environ.get("SERVICE_NAME"),
|
|
214
|
+
timestamp=datetime.utcnow(),
|
|
215
|
+
duration_ms=result.execution_time_ms,
|
|
216
|
+
validator_name=result.validator_name,
|
|
217
|
+
validator_category=getattr(validator, "category", "unknown"),
|
|
218
|
+
data_source=data_source or "unknown",
|
|
219
|
+
row_count=row_count,
|
|
220
|
+
column_count=len(columns),
|
|
221
|
+
columns_validated=tuple(columns),
|
|
222
|
+
issues_found=len(result.issues),
|
|
223
|
+
severity_counts=severity_counts,
|
|
224
|
+
status=result.status.value,
|
|
225
|
+
error_message=(
|
|
226
|
+
result.error_context.message if result.error_context else None
|
|
227
|
+
),
|
|
228
|
+
session_id=session_id,
|
|
229
|
+
environment=os.environ.get("ENVIRONMENT", "development"),
|
|
230
|
+
metadata=metadata,
|
|
231
|
+
)
|
|
232
|
+
|
|
233
|
+
# Store locally
|
|
234
|
+
with self._lock:
|
|
235
|
+
self._records.append(record)
|
|
236
|
+
if len(self._records) > self._max_records:
|
|
237
|
+
self._records = self._records[-self._max_records:]
|
|
238
|
+
|
|
239
|
+
# Forward to main audit system if available
|
|
240
|
+
if self._audit_logger:
|
|
241
|
+
try:
|
|
242
|
+
from truthound.audit import AuditResource, AuditActor
|
|
243
|
+
|
|
244
|
+
self._audit_logger.log(
|
|
245
|
+
event_type=self._audit_event_type.READ,
|
|
246
|
+
action=f"validate_{validator.name}",
|
|
247
|
+
actor=AuditActor(id=user_id or "system"),
|
|
248
|
+
resource=AuditResource(
|
|
249
|
+
id=data_source or "unknown",
|
|
250
|
+
type="dataset",
|
|
251
|
+
),
|
|
252
|
+
)
|
|
253
|
+
except Exception as e:
|
|
254
|
+
self.logger.debug(f"Audit system forwarding skipped: {e}")
|
|
255
|
+
|
|
256
|
+
return record
|
|
257
|
+
|
|
258
|
+
def get_records(
|
|
259
|
+
self,
|
|
260
|
+
validator_name: str | None = None,
|
|
261
|
+
user_id: str | None = None,
|
|
262
|
+
since: datetime | None = None,
|
|
263
|
+
limit: int = 100,
|
|
264
|
+
) -> list[ValidationAuditRecord]:
|
|
265
|
+
"""Query audit records.
|
|
266
|
+
|
|
267
|
+
Args:
|
|
268
|
+
validator_name: Filter by validator name
|
|
269
|
+
user_id: Filter by user ID
|
|
270
|
+
since: Filter by timestamp
|
|
271
|
+
limit: Maximum records to return
|
|
272
|
+
|
|
273
|
+
Returns:
|
|
274
|
+
List of matching audit records
|
|
275
|
+
"""
|
|
276
|
+
with self._lock:
|
|
277
|
+
records = self._records.copy()
|
|
278
|
+
|
|
279
|
+
if validator_name:
|
|
280
|
+
records = [r for r in records if r.validator_name == validator_name]
|
|
281
|
+
if user_id:
|
|
282
|
+
records = [r for r in records if r.user_id == user_id]
|
|
283
|
+
if since:
|
|
284
|
+
records = [r for r in records if r.timestamp >= since]
|
|
285
|
+
|
|
286
|
+
return records[-limit:]
|
|
287
|
+
|
|
288
|
+
def clear(self) -> None:
|
|
289
|
+
"""Clear all audit records."""
|
|
290
|
+
with self._lock:
|
|
291
|
+
self._records.clear()
|
|
292
|
+
|
|
293
|
+
def enable(self) -> None:
|
|
294
|
+
"""Enable audit logging."""
|
|
295
|
+
self._enabled = True
|
|
296
|
+
|
|
297
|
+
def disable(self) -> None:
|
|
298
|
+
"""Disable audit logging."""
|
|
299
|
+
self._enabled = False
|
|
300
|
+
|
|
301
|
+
|
|
302
|
+
def get_validation_audit_logger() -> ValidationAuditLogger:
|
|
303
|
+
"""Get the global validation audit logger."""
|
|
304
|
+
return ValidationAuditLogger.get_instance()
|
|
305
|
+
|
|
306
|
+
|
|
307
|
+
# =============================================================================
|
|
308
|
+
# #15: Metrics Collection Integration
|
|
309
|
+
# =============================================================================
|
|
310
|
+
|
|
311
|
+
|
|
312
|
+
class MetricsCollector:
|
|
313
|
+
"""Collects metrics for validation operations.
|
|
314
|
+
|
|
315
|
+
Integrates with Prometheus/StatsD through truthound.observability.
|
|
316
|
+
"""
|
|
317
|
+
|
|
318
|
+
_instance: "MetricsCollector | None" = None
|
|
319
|
+
_lock = threading.Lock()
|
|
320
|
+
|
|
321
|
+
def __init__(self) -> None:
|
|
322
|
+
self._metrics_system: Any = None
|
|
323
|
+
self._enabled = True
|
|
324
|
+
self._local_stats: dict[str, float] = {}
|
|
325
|
+
self._local_counts: dict[str, int] = {}
|
|
326
|
+
self.logger = ValidatorLogger("MetricsCollector")
|
|
327
|
+
|
|
328
|
+
self._init_metrics_integration()
|
|
329
|
+
|
|
330
|
+
def _init_metrics_integration(self) -> None:
|
|
331
|
+
"""Initialize metrics backend integration."""
|
|
332
|
+
try:
|
|
333
|
+
from truthound.observability.metrics import Counter, Histogram, Gauge
|
|
334
|
+
|
|
335
|
+
self._validation_counter = Counter(
|
|
336
|
+
"truthound_validations_total",
|
|
337
|
+
"Total number of validations",
|
|
338
|
+
labels=("validator", "status", "category"),
|
|
339
|
+
)
|
|
340
|
+
self._validation_duration = Histogram(
|
|
341
|
+
"truthound_validation_duration_ms",
|
|
342
|
+
"Validation duration in milliseconds",
|
|
343
|
+
labels=("validator", "category"),
|
|
344
|
+
)
|
|
345
|
+
self._issues_counter = Counter(
|
|
346
|
+
"truthound_issues_total",
|
|
347
|
+
"Total issues found",
|
|
348
|
+
labels=("validator", "severity", "category"),
|
|
349
|
+
)
|
|
350
|
+
self._active_validations = Gauge(
|
|
351
|
+
"truthound_active_validations",
|
|
352
|
+
"Currently running validations",
|
|
353
|
+
labels=("category",),
|
|
354
|
+
)
|
|
355
|
+
self._metrics_system = True
|
|
356
|
+
self.logger.debug("Integrated with truthound.observability.metrics")
|
|
357
|
+
except (ImportError, Exception) as e:
|
|
358
|
+
self._metrics_system = None
|
|
359
|
+
self._validation_counter = None
|
|
360
|
+
self._validation_duration = None
|
|
361
|
+
self._issues_counter = None
|
|
362
|
+
self._active_validations = None
|
|
363
|
+
self.logger.debug(f"Metrics integration not available: {e}")
|
|
364
|
+
|
|
365
|
+
@classmethod
|
|
366
|
+
def get_instance(cls) -> "MetricsCollector":
|
|
367
|
+
"""Get or create singleton instance."""
|
|
368
|
+
if cls._instance is None:
|
|
369
|
+
with cls._lock:
|
|
370
|
+
if cls._instance is None:
|
|
371
|
+
cls._instance = cls()
|
|
372
|
+
return cls._instance
|
|
373
|
+
|
|
374
|
+
def record_validation(
|
|
375
|
+
self,
|
|
376
|
+
validator_name: str,
|
|
377
|
+
category: str,
|
|
378
|
+
status: str,
|
|
379
|
+
duration_ms: float,
|
|
380
|
+
issues: list[ValidationIssue],
|
|
381
|
+
) -> None:
|
|
382
|
+
"""Record metrics for a validation.
|
|
383
|
+
|
|
384
|
+
Args:
|
|
385
|
+
validator_name: Name of the validator
|
|
386
|
+
category: Validator category
|
|
387
|
+
status: Execution status
|
|
388
|
+
duration_ms: Duration in milliseconds
|
|
389
|
+
issues: List of validation issues
|
|
390
|
+
"""
|
|
391
|
+
if not self._enabled:
|
|
392
|
+
return
|
|
393
|
+
|
|
394
|
+
# Local stats (always available)
|
|
395
|
+
key = f"{validator_name}_{status}"
|
|
396
|
+
with self._lock:
|
|
397
|
+
self._local_counts[key] = self._local_counts.get(key, 0) + 1
|
|
398
|
+
self._local_stats[f"{validator_name}_duration_sum"] = (
|
|
399
|
+
self._local_stats.get(f"{validator_name}_duration_sum", 0) + duration_ms
|
|
400
|
+
)
|
|
401
|
+
|
|
402
|
+
# Forward to metrics system if available
|
|
403
|
+
if self._metrics_system and self._validation_counter:
|
|
404
|
+
try:
|
|
405
|
+
self._validation_counter.inc(
|
|
406
|
+
validator=validator_name,
|
|
407
|
+
status=status,
|
|
408
|
+
category=category,
|
|
409
|
+
)
|
|
410
|
+
|
|
411
|
+
if self._validation_duration:
|
|
412
|
+
self._validation_duration.observe(
|
|
413
|
+
duration_ms,
|
|
414
|
+
validator=validator_name,
|
|
415
|
+
category=category,
|
|
416
|
+
)
|
|
417
|
+
|
|
418
|
+
if self._issues_counter:
|
|
419
|
+
for issue in issues:
|
|
420
|
+
self._issues_counter.inc(
|
|
421
|
+
validator=validator_name,
|
|
422
|
+
severity=issue.severity.value,
|
|
423
|
+
category=category,
|
|
424
|
+
)
|
|
425
|
+
except Exception as e:
|
|
426
|
+
self.logger.debug(f"Metrics recording skipped: {e}")
|
|
427
|
+
|
|
428
|
+
@contextmanager
|
|
429
|
+
def track_validation(
|
|
430
|
+
self,
|
|
431
|
+
validator_name: str,
|
|
432
|
+
category: str,
|
|
433
|
+
) -> Iterator[dict[str, Any]]:
|
|
434
|
+
"""Context manager to track validation execution.
|
|
435
|
+
|
|
436
|
+
Args:
|
|
437
|
+
validator_name: Name of the validator
|
|
438
|
+
category: Validator category
|
|
439
|
+
|
|
440
|
+
Yields:
|
|
441
|
+
Dict to store results
|
|
442
|
+
"""
|
|
443
|
+
start_time = time.time()
|
|
444
|
+
result: dict[str, Any] = {"status": "unknown", "issues": []}
|
|
445
|
+
|
|
446
|
+
# Track active validations
|
|
447
|
+
if self._metrics_system and self._active_validations:
|
|
448
|
+
try:
|
|
449
|
+
self._active_validations.inc(category=category)
|
|
450
|
+
except Exception:
|
|
451
|
+
pass
|
|
452
|
+
|
|
453
|
+
try:
|
|
454
|
+
yield result
|
|
455
|
+
finally:
|
|
456
|
+
duration_ms = (time.time() - start_time) * 1000
|
|
457
|
+
|
|
458
|
+
# Decrement active
|
|
459
|
+
if self._metrics_system and self._active_validations:
|
|
460
|
+
try:
|
|
461
|
+
self._active_validations.dec(category=category)
|
|
462
|
+
except Exception:
|
|
463
|
+
pass
|
|
464
|
+
|
|
465
|
+
# Record final metrics
|
|
466
|
+
self.record_validation(
|
|
467
|
+
validator_name=validator_name,
|
|
468
|
+
category=category,
|
|
469
|
+
status=result.get("status", "unknown"),
|
|
470
|
+
duration_ms=duration_ms,
|
|
471
|
+
issues=result.get("issues", []),
|
|
472
|
+
)
|
|
473
|
+
|
|
474
|
+
def get_stats(self) -> dict[str, Any]:
|
|
475
|
+
"""Get local statistics summary."""
|
|
476
|
+
with self._lock:
|
|
477
|
+
return {
|
|
478
|
+
"counts": self._local_counts.copy(),
|
|
479
|
+
"stats": self._local_stats.copy(),
|
|
480
|
+
}
|
|
481
|
+
|
|
482
|
+
|
|
483
|
+
def get_metrics_collector() -> MetricsCollector:
|
|
484
|
+
"""Get the global metrics collector."""
|
|
485
|
+
return MetricsCollector.get_instance()
|
|
486
|
+
|
|
487
|
+
|
|
488
|
+
# =============================================================================
|
|
489
|
+
# #16: Reference Data Caching
|
|
490
|
+
# =============================================================================
|
|
491
|
+
|
|
492
|
+
|
|
493
|
+
@dataclass
|
|
494
|
+
class CacheEntry:
|
|
495
|
+
"""Entry in the reference data cache."""
|
|
496
|
+
|
|
497
|
+
data: pl.LazyFrame | pl.DataFrame
|
|
498
|
+
created_at: datetime
|
|
499
|
+
expires_at: datetime | None
|
|
500
|
+
hits: int = 0
|
|
501
|
+
size_bytes: int = 0
|
|
502
|
+
|
|
503
|
+
def is_expired(self) -> bool:
|
|
504
|
+
"""Check if entry has expired."""
|
|
505
|
+
if self.expires_at is None:
|
|
506
|
+
return False
|
|
507
|
+
return datetime.utcnow() > self.expires_at
|
|
508
|
+
|
|
509
|
+
|
|
510
|
+
class ReferentialDataCache:
|
|
511
|
+
"""Cache for reference data used in referential integrity checks.
|
|
512
|
+
|
|
513
|
+
Features:
|
|
514
|
+
- LRU eviction policy
|
|
515
|
+
- TTL-based expiration
|
|
516
|
+
- Memory-aware sizing
|
|
517
|
+
- Thread-safe operations
|
|
518
|
+
"""
|
|
519
|
+
|
|
520
|
+
_instance: "ReferentialDataCache | None" = None
|
|
521
|
+
_lock = threading.Lock()
|
|
522
|
+
|
|
523
|
+
def __init__(
|
|
524
|
+
self,
|
|
525
|
+
max_entries: int = 100,
|
|
526
|
+
max_size_mb: float = 500,
|
|
527
|
+
default_ttl_seconds: float = 3600,
|
|
528
|
+
) -> None:
|
|
529
|
+
"""Initialize the cache.
|
|
530
|
+
|
|
531
|
+
Args:
|
|
532
|
+
max_entries: Maximum number of entries
|
|
533
|
+
max_size_mb: Maximum total size in MB
|
|
534
|
+
default_ttl_seconds: Default TTL in seconds
|
|
535
|
+
"""
|
|
536
|
+
self._cache: dict[str, CacheEntry] = {}
|
|
537
|
+
self._max_entries = max_entries
|
|
538
|
+
self._max_size_bytes = int(max_size_mb * 1024 * 1024)
|
|
539
|
+
self._default_ttl = timedelta(seconds=default_ttl_seconds)
|
|
540
|
+
self._total_size = 0
|
|
541
|
+
self._hits = 0
|
|
542
|
+
self._misses = 0
|
|
543
|
+
self.logger = ValidatorLogger("ReferentialDataCache")
|
|
544
|
+
|
|
545
|
+
@classmethod
|
|
546
|
+
def get_instance(cls) -> "ReferentialDataCache":
|
|
547
|
+
"""Get or create singleton instance."""
|
|
548
|
+
if cls._instance is None:
|
|
549
|
+
with cls._lock:
|
|
550
|
+
if cls._instance is None:
|
|
551
|
+
cls._instance = cls()
|
|
552
|
+
return cls._instance
|
|
553
|
+
|
|
554
|
+
def _generate_key(
|
|
555
|
+
self,
|
|
556
|
+
source: str,
|
|
557
|
+
column: str,
|
|
558
|
+
query_hash: str | None = None,
|
|
559
|
+
) -> str:
|
|
560
|
+
"""Generate cache key for reference data."""
|
|
561
|
+
parts = [source, column]
|
|
562
|
+
if query_hash:
|
|
563
|
+
parts.append(query_hash)
|
|
564
|
+
key_str = ":".join(parts)
|
|
565
|
+
return hashlib.sha256(key_str.encode()).hexdigest()[:32]
|
|
566
|
+
|
|
567
|
+
def _estimate_size(self, data: pl.LazyFrame | pl.DataFrame) -> int:
|
|
568
|
+
"""Estimate memory size of data in bytes."""
|
|
569
|
+
try:
|
|
570
|
+
if isinstance(data, pl.LazyFrame):
|
|
571
|
+
# Collect schema only
|
|
572
|
+
schema = data.collect_schema()
|
|
573
|
+
return len(schema.names()) * 1000 # Rough estimate
|
|
574
|
+
else:
|
|
575
|
+
return data.estimated_size()
|
|
576
|
+
except Exception:
|
|
577
|
+
return 0
|
|
578
|
+
|
|
579
|
+
def _evict_if_needed(self, new_size: int) -> None:
|
|
580
|
+
"""Evict entries if needed to make room."""
|
|
581
|
+
# Evict expired entries first
|
|
582
|
+
expired_keys = [k for k, v in self._cache.items() if v.is_expired()]
|
|
583
|
+
for key in expired_keys:
|
|
584
|
+
self._remove_entry(key)
|
|
585
|
+
|
|
586
|
+
# Check entry count
|
|
587
|
+
while len(self._cache) >= self._max_entries:
|
|
588
|
+
self._evict_lru()
|
|
589
|
+
|
|
590
|
+
# Check size
|
|
591
|
+
while self._total_size + new_size > self._max_size_bytes and self._cache:
|
|
592
|
+
self._evict_lru()
|
|
593
|
+
|
|
594
|
+
def _evict_lru(self) -> None:
|
|
595
|
+
"""Evict least recently used entry."""
|
|
596
|
+
if not self._cache:
|
|
597
|
+
return
|
|
598
|
+
|
|
599
|
+
# Find entry with lowest hit count
|
|
600
|
+
lru_key = min(self._cache.keys(), key=lambda k: self._cache[k].hits)
|
|
601
|
+
self._remove_entry(lru_key)
|
|
602
|
+
|
|
603
|
+
def _remove_entry(self, key: str) -> None:
|
|
604
|
+
"""Remove an entry from cache."""
|
|
605
|
+
if key in self._cache:
|
|
606
|
+
entry = self._cache.pop(key)
|
|
607
|
+
self._total_size -= entry.size_bytes
|
|
608
|
+
|
|
609
|
+
def get(
|
|
610
|
+
self,
|
|
611
|
+
source: str,
|
|
612
|
+
column: str,
|
|
613
|
+
query_hash: str | None = None,
|
|
614
|
+
) -> pl.LazyFrame | pl.DataFrame | None:
|
|
615
|
+
"""Get cached reference data.
|
|
616
|
+
|
|
617
|
+
Args:
|
|
618
|
+
source: Data source identifier
|
|
619
|
+
column: Reference column name
|
|
620
|
+
query_hash: Optional query hash for filtered data
|
|
621
|
+
|
|
622
|
+
Returns:
|
|
623
|
+
Cached data or None
|
|
624
|
+
"""
|
|
625
|
+
key = self._generate_key(source, column, query_hash)
|
|
626
|
+
|
|
627
|
+
with self._lock:
|
|
628
|
+
entry = self._cache.get(key)
|
|
629
|
+
if entry is None:
|
|
630
|
+
self._misses += 1
|
|
631
|
+
return None
|
|
632
|
+
|
|
633
|
+
if entry.is_expired():
|
|
634
|
+
self._remove_entry(key)
|
|
635
|
+
self._misses += 1
|
|
636
|
+
return None
|
|
637
|
+
|
|
638
|
+
entry.hits += 1
|
|
639
|
+
self._hits += 1
|
|
640
|
+
return entry.data
|
|
641
|
+
|
|
642
|
+
def set(
|
|
643
|
+
self,
|
|
644
|
+
source: str,
|
|
645
|
+
column: str,
|
|
646
|
+
data: pl.LazyFrame | pl.DataFrame,
|
|
647
|
+
ttl_seconds: float | None = None,
|
|
648
|
+
query_hash: str | None = None,
|
|
649
|
+
) -> None:
|
|
650
|
+
"""Store reference data in cache.
|
|
651
|
+
|
|
652
|
+
Args:
|
|
653
|
+
source: Data source identifier
|
|
654
|
+
column: Reference column name
|
|
655
|
+
data: Data to cache
|
|
656
|
+
ttl_seconds: Optional TTL override
|
|
657
|
+
query_hash: Optional query hash for filtered data
|
|
658
|
+
"""
|
|
659
|
+
key = self._generate_key(source, column, query_hash)
|
|
660
|
+
size = self._estimate_size(data)
|
|
661
|
+
|
|
662
|
+
# Determine expiration
|
|
663
|
+
ttl = timedelta(seconds=ttl_seconds) if ttl_seconds else self._default_ttl
|
|
664
|
+
expires_at = datetime.utcnow() + ttl
|
|
665
|
+
|
|
666
|
+
entry = CacheEntry(
|
|
667
|
+
data=data,
|
|
668
|
+
created_at=datetime.utcnow(),
|
|
669
|
+
expires_at=expires_at,
|
|
670
|
+
size_bytes=size,
|
|
671
|
+
)
|
|
672
|
+
|
|
673
|
+
with self._lock:
|
|
674
|
+
# Remove existing entry if any
|
|
675
|
+
if key in self._cache:
|
|
676
|
+
self._remove_entry(key)
|
|
677
|
+
|
|
678
|
+
# Evict if needed
|
|
679
|
+
self._evict_if_needed(size)
|
|
680
|
+
|
|
681
|
+
# Add new entry
|
|
682
|
+
self._cache[key] = entry
|
|
683
|
+
self._total_size += size
|
|
684
|
+
|
|
685
|
+
def invalidate(
|
|
686
|
+
self,
|
|
687
|
+
source: str | None = None,
|
|
688
|
+
column: str | None = None,
|
|
689
|
+
) -> int:
|
|
690
|
+
"""Invalidate cached entries.
|
|
691
|
+
|
|
692
|
+
Args:
|
|
693
|
+
source: Optional source to match
|
|
694
|
+
column: Optional column to match
|
|
695
|
+
|
|
696
|
+
Returns:
|
|
697
|
+
Number of entries invalidated
|
|
698
|
+
"""
|
|
699
|
+
with self._lock:
|
|
700
|
+
if source is None and column is None:
|
|
701
|
+
count = len(self._cache)
|
|
702
|
+
self._cache.clear()
|
|
703
|
+
self._total_size = 0
|
|
704
|
+
return count
|
|
705
|
+
|
|
706
|
+
keys_to_remove = []
|
|
707
|
+
for key in self._cache:
|
|
708
|
+
# This is a simplistic match - in production you'd want
|
|
709
|
+
# to store source/column in the entry for proper matching
|
|
710
|
+
if source and source in key:
|
|
711
|
+
keys_to_remove.append(key)
|
|
712
|
+
elif column and column in key:
|
|
713
|
+
keys_to_remove.append(key)
|
|
714
|
+
|
|
715
|
+
for key in keys_to_remove:
|
|
716
|
+
self._remove_entry(key)
|
|
717
|
+
|
|
718
|
+
return len(keys_to_remove)
|
|
719
|
+
|
|
720
|
+
def get_stats(self) -> dict[str, Any]:
|
|
721
|
+
"""Get cache statistics."""
|
|
722
|
+
with self._lock:
|
|
723
|
+
total_requests = self._hits + self._misses
|
|
724
|
+
return {
|
|
725
|
+
"entries": len(self._cache),
|
|
726
|
+
"total_size_mb": self._total_size / (1024 * 1024),
|
|
727
|
+
"max_size_mb": self._max_size_bytes / (1024 * 1024),
|
|
728
|
+
"hits": self._hits,
|
|
729
|
+
"misses": self._misses,
|
|
730
|
+
"hit_rate": self._hits / max(total_requests, 1),
|
|
731
|
+
}
|
|
732
|
+
|
|
733
|
+
|
|
734
|
+
def get_reference_cache() -> ReferentialDataCache:
|
|
735
|
+
"""Get the global reference data cache."""
|
|
736
|
+
return ReferentialDataCache.get_instance()
|
|
737
|
+
|
|
738
|
+
|
|
739
|
+
# =============================================================================
|
|
740
|
+
# #17: Parallel Processing Support
|
|
741
|
+
# =============================================================================
|
|
742
|
+
|
|
743
|
+
|
|
744
|
+
class ParallelExecutionMode(Enum):
|
|
745
|
+
"""Execution mode for parallel validation."""
|
|
746
|
+
|
|
747
|
+
SEQUENTIAL = "sequential"
|
|
748
|
+
THREADING = "threading"
|
|
749
|
+
MULTIPROCESSING = "multiprocessing"
|
|
750
|
+
|
|
751
|
+
|
|
752
|
+
@dataclass
|
|
753
|
+
class ParallelExecutionConfig:
|
|
754
|
+
"""Configuration for parallel validation execution."""
|
|
755
|
+
|
|
756
|
+
mode: ParallelExecutionMode = ParallelExecutionMode.THREADING
|
|
757
|
+
max_workers: int | None = None # None = auto (CPU count)
|
|
758
|
+
chunk_size: int = 10000
|
|
759
|
+
timeout_seconds: float = 300.0
|
|
760
|
+
fail_fast: bool = False # Stop on first error
|
|
761
|
+
|
|
762
|
+
def get_workers(self) -> int:
|
|
763
|
+
"""Get effective worker count."""
|
|
764
|
+
if self.max_workers:
|
|
765
|
+
return self.max_workers
|
|
766
|
+
import os
|
|
767
|
+
return min(os.cpu_count() or 4, 8)
|
|
768
|
+
|
|
769
|
+
|
|
770
|
+
class ParallelValidator:
|
|
771
|
+
"""Executes multiple validators in parallel.
|
|
772
|
+
|
|
773
|
+
Features:
|
|
774
|
+
- Thread and process-based parallelism
|
|
775
|
+
- Chunked processing for large datasets
|
|
776
|
+
- Graceful error handling
|
|
777
|
+
- Progress tracking
|
|
778
|
+
"""
|
|
779
|
+
|
|
780
|
+
def __init__(
|
|
781
|
+
self,
|
|
782
|
+
validators: list[Validator],
|
|
783
|
+
config: ParallelExecutionConfig | None = None,
|
|
784
|
+
) -> None:
|
|
785
|
+
"""Initialize parallel validator.
|
|
786
|
+
|
|
787
|
+
Args:
|
|
788
|
+
validators: List of validators to execute
|
|
789
|
+
config: Execution configuration
|
|
790
|
+
"""
|
|
791
|
+
self.validators = validators
|
|
792
|
+
self.config = config or ParallelExecutionConfig()
|
|
793
|
+
self.logger = ValidatorLogger("ParallelValidator")
|
|
794
|
+
|
|
795
|
+
def validate(
|
|
796
|
+
self,
|
|
797
|
+
lf: pl.LazyFrame,
|
|
798
|
+
progress_callback: Callable[[int, int], None] | None = None,
|
|
799
|
+
) -> list[ValidatorExecutionResult]:
|
|
800
|
+
"""Execute all validators.
|
|
801
|
+
|
|
802
|
+
Args:
|
|
803
|
+
lf: LazyFrame to validate
|
|
804
|
+
progress_callback: Optional callback(completed, total)
|
|
805
|
+
|
|
806
|
+
Returns:
|
|
807
|
+
List of execution results
|
|
808
|
+
"""
|
|
809
|
+
if self.config.mode == ParallelExecutionMode.SEQUENTIAL:
|
|
810
|
+
return self._validate_sequential(lf, progress_callback)
|
|
811
|
+
elif self.config.mode == ParallelExecutionMode.THREADING:
|
|
812
|
+
return self._validate_threaded(lf, progress_callback)
|
|
813
|
+
else:
|
|
814
|
+
return self._validate_multiprocess(lf, progress_callback)
|
|
815
|
+
|
|
816
|
+
def _validate_sequential(
|
|
817
|
+
self,
|
|
818
|
+
lf: pl.LazyFrame,
|
|
819
|
+
progress_callback: Callable[[int, int], None] | None,
|
|
820
|
+
) -> list[ValidatorExecutionResult]:
|
|
821
|
+
"""Execute validators sequentially."""
|
|
822
|
+
results: list[ValidatorExecutionResult] = []
|
|
823
|
+
total = len(self.validators)
|
|
824
|
+
|
|
825
|
+
for i, validator in enumerate(self.validators):
|
|
826
|
+
try:
|
|
827
|
+
result = validator.validate_safe(lf)
|
|
828
|
+
results.append(result)
|
|
829
|
+
|
|
830
|
+
if (
|
|
831
|
+
self.config.fail_fast
|
|
832
|
+
and result.status == ValidationResult.FAILED
|
|
833
|
+
):
|
|
834
|
+
break
|
|
835
|
+
|
|
836
|
+
except Exception as e:
|
|
837
|
+
self.logger.error(f"Validator {validator.name} failed: {e}")
|
|
838
|
+
results.append(
|
|
839
|
+
ValidatorExecutionResult(
|
|
840
|
+
validator_name=validator.name,
|
|
841
|
+
status=ValidationResult.FAILED,
|
|
842
|
+
issues=[],
|
|
843
|
+
error_context=ValidationErrorContext(
|
|
844
|
+
validator_name=validator.name,
|
|
845
|
+
error_type="execution_error",
|
|
846
|
+
message=str(e),
|
|
847
|
+
exception=e,
|
|
848
|
+
),
|
|
849
|
+
)
|
|
850
|
+
)
|
|
851
|
+
|
|
852
|
+
if progress_callback:
|
|
853
|
+
progress_callback(i + 1, total)
|
|
854
|
+
|
|
855
|
+
return results
|
|
856
|
+
|
|
857
|
+
def _validate_threaded(
|
|
858
|
+
self,
|
|
859
|
+
lf: pl.LazyFrame,
|
|
860
|
+
progress_callback: Callable[[int, int], None] | None,
|
|
861
|
+
) -> list[ValidatorExecutionResult]:
|
|
862
|
+
"""Execute validators using threading."""
|
|
863
|
+
results: list[ValidatorExecutionResult] = []
|
|
864
|
+
total = len(self.validators)
|
|
865
|
+
completed = 0
|
|
866
|
+
|
|
867
|
+
def validate_one(validator: Validator) -> ValidatorExecutionResult:
|
|
868
|
+
try:
|
|
869
|
+
return validator.validate_safe(lf)
|
|
870
|
+
except Exception as e:
|
|
871
|
+
return ValidatorExecutionResult(
|
|
872
|
+
validator_name=validator.name,
|
|
873
|
+
status=ValidationResult.FAILED,
|
|
874
|
+
issues=[],
|
|
875
|
+
error_context=ValidationErrorContext(
|
|
876
|
+
validator_name=validator.name,
|
|
877
|
+
error_type="execution_error",
|
|
878
|
+
message=str(e),
|
|
879
|
+
exception=e,
|
|
880
|
+
),
|
|
881
|
+
)
|
|
882
|
+
|
|
883
|
+
with ThreadPoolExecutor(max_workers=self.config.get_workers()) as executor:
|
|
884
|
+
futures = {
|
|
885
|
+
executor.submit(validate_one, v): v for v in self.validators
|
|
886
|
+
}
|
|
887
|
+
|
|
888
|
+
for future in as_completed(futures):
|
|
889
|
+
result = future.result()
|
|
890
|
+
results.append(result)
|
|
891
|
+
completed += 1
|
|
892
|
+
|
|
893
|
+
if progress_callback:
|
|
894
|
+
progress_callback(completed, total)
|
|
895
|
+
|
|
896
|
+
if (
|
|
897
|
+
self.config.fail_fast
|
|
898
|
+
and result.status == ValidationResult.FAILED
|
|
899
|
+
):
|
|
900
|
+
executor.shutdown(wait=False, cancel_futures=True)
|
|
901
|
+
break
|
|
902
|
+
|
|
903
|
+
return results
|
|
904
|
+
|
|
905
|
+
def _validate_multiprocess(
|
|
906
|
+
self,
|
|
907
|
+
lf: pl.LazyFrame,
|
|
908
|
+
progress_callback: Callable[[int, int], None] | None,
|
|
909
|
+
) -> list[ValidatorExecutionResult]:
|
|
910
|
+
"""Execute validators using multiprocessing.
|
|
911
|
+
|
|
912
|
+
Note: This requires validators to be picklable.
|
|
913
|
+
Falls back to threading if multiprocessing fails.
|
|
914
|
+
"""
|
|
915
|
+
try:
|
|
916
|
+
# Collect DataFrame for multiprocessing
|
|
917
|
+
df = lf.collect()
|
|
918
|
+
results: list[ValidatorExecutionResult] = []
|
|
919
|
+
total = len(self.validators)
|
|
920
|
+
completed = 0
|
|
921
|
+
|
|
922
|
+
def validate_one(args: tuple) -> ValidatorExecutionResult:
|
|
923
|
+
validator, data = args
|
|
924
|
+
try:
|
|
925
|
+
return validator.validate_safe(data.lazy())
|
|
926
|
+
except Exception as e:
|
|
927
|
+
return ValidatorExecutionResult(
|
|
928
|
+
validator_name=validator.name,
|
|
929
|
+
status=ValidationResult.FAILED,
|
|
930
|
+
issues=[],
|
|
931
|
+
)
|
|
932
|
+
|
|
933
|
+
with ProcessPoolExecutor(max_workers=self.config.get_workers()) as executor:
|
|
934
|
+
futures = {
|
|
935
|
+
executor.submit(validate_one, (v, df)): v
|
|
936
|
+
for v in self.validators
|
|
937
|
+
}
|
|
938
|
+
|
|
939
|
+
for future in as_completed(futures):
|
|
940
|
+
try:
|
|
941
|
+
result = future.result(timeout=self.config.timeout_seconds)
|
|
942
|
+
results.append(result)
|
|
943
|
+
except Exception as e:
|
|
944
|
+
validator = futures[future]
|
|
945
|
+
results.append(
|
|
946
|
+
ValidatorExecutionResult(
|
|
947
|
+
validator_name=validator.name,
|
|
948
|
+
status=ValidationResult.FAILED,
|
|
949
|
+
issues=[],
|
|
950
|
+
)
|
|
951
|
+
)
|
|
952
|
+
|
|
953
|
+
completed += 1
|
|
954
|
+
if progress_callback:
|
|
955
|
+
progress_callback(completed, total)
|
|
956
|
+
|
|
957
|
+
return results
|
|
958
|
+
|
|
959
|
+
except Exception as e:
|
|
960
|
+
self.logger.warning(
|
|
961
|
+
f"Multiprocessing failed, falling back to threading: {e}"
|
|
962
|
+
)
|
|
963
|
+
return self._validate_threaded(lf, progress_callback)
|
|
964
|
+
|
|
965
|
+
|
|
966
|
+
# =============================================================================
|
|
967
|
+
# #18: Configuration Validation
|
|
968
|
+
# =============================================================================
|
|
969
|
+
|
|
970
|
+
|
|
971
|
+
@dataclass
|
|
972
|
+
class ConfigValidationError:
|
|
973
|
+
"""Error in configuration validation."""
|
|
974
|
+
|
|
975
|
+
field: str
|
|
976
|
+
message: str
|
|
977
|
+
value: Any
|
|
978
|
+
suggestion: str | None = None
|
|
979
|
+
|
|
980
|
+
|
|
981
|
+
class ConfigValidator:
|
|
982
|
+
"""Validates ValidatorConfig settings.
|
|
983
|
+
|
|
984
|
+
Catches configuration errors early with helpful messages.
|
|
985
|
+
"""
|
|
986
|
+
|
|
987
|
+
@classmethod
|
|
988
|
+
def validate(
|
|
989
|
+
cls,
|
|
990
|
+
config: ValidatorConfig,
|
|
991
|
+
validator_name: str = "",
|
|
992
|
+
) -> list[ConfigValidationError]:
|
|
993
|
+
"""Validate configuration.
|
|
994
|
+
|
|
995
|
+
Args:
|
|
996
|
+
config: Configuration to validate
|
|
997
|
+
validator_name: Name of validator for context
|
|
998
|
+
|
|
999
|
+
Returns:
|
|
1000
|
+
List of validation errors (empty if valid)
|
|
1001
|
+
"""
|
|
1002
|
+
errors: list[ConfigValidationError] = []
|
|
1003
|
+
|
|
1004
|
+
# Validate sample_size
|
|
1005
|
+
if config.sample_size < 0:
|
|
1006
|
+
errors.append(
|
|
1007
|
+
ConfigValidationError(
|
|
1008
|
+
field="sample_size",
|
|
1009
|
+
message="sample_size must be >= 0",
|
|
1010
|
+
value=config.sample_size,
|
|
1011
|
+
suggestion="Use sample_size=0 to disable sampling",
|
|
1012
|
+
)
|
|
1013
|
+
)
|
|
1014
|
+
elif config.sample_size > 10000:
|
|
1015
|
+
errors.append(
|
|
1016
|
+
ConfigValidationError(
|
|
1017
|
+
field="sample_size",
|
|
1018
|
+
message="sample_size > 10000 may cause memory issues",
|
|
1019
|
+
value=config.sample_size,
|
|
1020
|
+
suggestion="Consider using sample_size=100 for typical use cases",
|
|
1021
|
+
)
|
|
1022
|
+
)
|
|
1023
|
+
|
|
1024
|
+
# Validate mostly
|
|
1025
|
+
if config.mostly is not None:
|
|
1026
|
+
if not (0.0 <= config.mostly <= 1.0):
|
|
1027
|
+
errors.append(
|
|
1028
|
+
ConfigValidationError(
|
|
1029
|
+
field="mostly",
|
|
1030
|
+
message="mostly must be in [0.0, 1.0]",
|
|
1031
|
+
value=config.mostly,
|
|
1032
|
+
suggestion="Use mostly=0.95 for 95% pass rate",
|
|
1033
|
+
)
|
|
1034
|
+
)
|
|
1035
|
+
|
|
1036
|
+
# Validate timeout
|
|
1037
|
+
if config.timeout_seconds is not None:
|
|
1038
|
+
if config.timeout_seconds <= 0:
|
|
1039
|
+
errors.append(
|
|
1040
|
+
ConfigValidationError(
|
|
1041
|
+
field="timeout_seconds",
|
|
1042
|
+
message="timeout_seconds must be > 0",
|
|
1043
|
+
value=config.timeout_seconds,
|
|
1044
|
+
suggestion="Use timeout_seconds=None to disable timeout",
|
|
1045
|
+
)
|
|
1046
|
+
)
|
|
1047
|
+
elif config.timeout_seconds < 1:
|
|
1048
|
+
errors.append(
|
|
1049
|
+
ConfigValidationError(
|
|
1050
|
+
field="timeout_seconds",
|
|
1051
|
+
message="timeout_seconds < 1 may cause false timeouts",
|
|
1052
|
+
value=config.timeout_seconds,
|
|
1053
|
+
suggestion="Use at least timeout_seconds=1",
|
|
1054
|
+
)
|
|
1055
|
+
)
|
|
1056
|
+
|
|
1057
|
+
# Validate memory limit
|
|
1058
|
+
if config.memory_limit_mb is not None:
|
|
1059
|
+
if config.memory_limit_mb <= 0:
|
|
1060
|
+
errors.append(
|
|
1061
|
+
ConfigValidationError(
|
|
1062
|
+
field="memory_limit_mb",
|
|
1063
|
+
message="memory_limit_mb must be > 0",
|
|
1064
|
+
value=config.memory_limit_mb,
|
|
1065
|
+
suggestion="Use memory_limit_mb=None to disable limit",
|
|
1066
|
+
)
|
|
1067
|
+
)
|
|
1068
|
+
elif config.memory_limit_mb < 10:
|
|
1069
|
+
errors.append(
|
|
1070
|
+
ConfigValidationError(
|
|
1071
|
+
field="memory_limit_mb",
|
|
1072
|
+
message="memory_limit_mb < 10 may be too restrictive",
|
|
1073
|
+
value=config.memory_limit_mb,
|
|
1074
|
+
suggestion="Use at least memory_limit_mb=100",
|
|
1075
|
+
)
|
|
1076
|
+
)
|
|
1077
|
+
|
|
1078
|
+
# Validate columns
|
|
1079
|
+
if config.columns:
|
|
1080
|
+
for col in config.columns:
|
|
1081
|
+
if not col or not col.strip():
|
|
1082
|
+
errors.append(
|
|
1083
|
+
ConfigValidationError(
|
|
1084
|
+
field="columns",
|
|
1085
|
+
message="Column name cannot be empty",
|
|
1086
|
+
value=col,
|
|
1087
|
+
suggestion="Remove empty column names",
|
|
1088
|
+
)
|
|
1089
|
+
)
|
|
1090
|
+
|
|
1091
|
+
return errors
|
|
1092
|
+
|
|
1093
|
+
@classmethod
|
|
1094
|
+
def validate_or_raise(
|
|
1095
|
+
cls,
|
|
1096
|
+
config: ValidatorConfig,
|
|
1097
|
+
validator_name: str = "",
|
|
1098
|
+
) -> None:
|
|
1099
|
+
"""Validate configuration and raise on error.
|
|
1100
|
+
|
|
1101
|
+
Raises:
|
|
1102
|
+
ValueError: If configuration is invalid
|
|
1103
|
+
"""
|
|
1104
|
+
errors = cls.validate(config, validator_name)
|
|
1105
|
+
if errors:
|
|
1106
|
+
error_msgs = [f" - {e.field}: {e.message}" for e in errors]
|
|
1107
|
+
raise ValueError(
|
|
1108
|
+
f"Invalid configuration for {validator_name or 'validator'}:\n"
|
|
1109
|
+
+ "\n".join(error_msgs)
|
|
1110
|
+
)
|
|
1111
|
+
|
|
1112
|
+
|
|
1113
|
+
# =============================================================================
|
|
1114
|
+
# #19: Polars Version Compatibility
|
|
1115
|
+
# =============================================================================
|
|
1116
|
+
|
|
1117
|
+
|
|
1118
|
+
@dataclass
|
|
1119
|
+
class PolarsVersionInfo:
|
|
1120
|
+
"""Polars version information."""
|
|
1121
|
+
|
|
1122
|
+
major: int
|
|
1123
|
+
minor: int
|
|
1124
|
+
patch: int
|
|
1125
|
+
raw: str
|
|
1126
|
+
|
|
1127
|
+
@classmethod
|
|
1128
|
+
def current(cls) -> "PolarsVersionInfo":
|
|
1129
|
+
"""Get current Polars version."""
|
|
1130
|
+
version_str = pl.__version__
|
|
1131
|
+
parts = version_str.split(".")
|
|
1132
|
+
return cls(
|
|
1133
|
+
major=int(parts[0]) if len(parts) > 0 else 0,
|
|
1134
|
+
minor=int(parts[1]) if len(parts) > 1 else 0,
|
|
1135
|
+
patch=int(parts[2].split("-")[0]) if len(parts) > 2 else 0,
|
|
1136
|
+
raw=version_str,
|
|
1137
|
+
)
|
|
1138
|
+
|
|
1139
|
+
def __ge__(self, other: tuple[int, int, int]) -> bool:
|
|
1140
|
+
return (self.major, self.minor, self.patch) >= other
|
|
1141
|
+
|
|
1142
|
+
def __lt__(self, other: tuple[int, int, int]) -> bool:
|
|
1143
|
+
return (self.major, self.minor, self.patch) < other
|
|
1144
|
+
|
|
1145
|
+
|
|
1146
|
+
class PolarsCompat:
|
|
1147
|
+
"""Polars version compatibility layer.
|
|
1148
|
+
|
|
1149
|
+
Provides compatible implementations for API changes between versions.
|
|
1150
|
+
"""
|
|
1151
|
+
|
|
1152
|
+
_version: PolarsVersionInfo | None = None
|
|
1153
|
+
|
|
1154
|
+
@classmethod
|
|
1155
|
+
def version(cls) -> PolarsVersionInfo:
|
|
1156
|
+
"""Get cached Polars version."""
|
|
1157
|
+
if cls._version is None:
|
|
1158
|
+
cls._version = PolarsVersionInfo.current()
|
|
1159
|
+
return cls._version
|
|
1160
|
+
|
|
1161
|
+
@classmethod
|
|
1162
|
+
def collect_schema(cls, lf: pl.LazyFrame) -> pl.Schema:
|
|
1163
|
+
"""Get schema from LazyFrame (compatible across versions)."""
|
|
1164
|
+
v = cls.version()
|
|
1165
|
+
if v >= (0, 20, 0):
|
|
1166
|
+
return lf.collect_schema()
|
|
1167
|
+
else:
|
|
1168
|
+
# Older versions
|
|
1169
|
+
return lf.schema # type: ignore
|
|
1170
|
+
|
|
1171
|
+
@classmethod
|
|
1172
|
+
def estimated_size(cls, df: pl.DataFrame) -> int:
|
|
1173
|
+
"""Get estimated size in bytes (compatible across versions)."""
|
|
1174
|
+
v = cls.version()
|
|
1175
|
+
try:
|
|
1176
|
+
if v >= (0, 19, 0):
|
|
1177
|
+
return df.estimated_size()
|
|
1178
|
+
else:
|
|
1179
|
+
return df.estimated_size("b") # type: ignore
|
|
1180
|
+
except Exception:
|
|
1181
|
+
# Fallback estimation
|
|
1182
|
+
return len(df) * len(df.columns) * 8
|
|
1183
|
+
|
|
1184
|
+
@classmethod
|
|
1185
|
+
def str_contains(
|
|
1186
|
+
cls,
|
|
1187
|
+
expr: pl.Expr,
|
|
1188
|
+
pattern: str,
|
|
1189
|
+
literal: bool = False,
|
|
1190
|
+
) -> pl.Expr:
|
|
1191
|
+
"""String contains (compatible across versions)."""
|
|
1192
|
+
v = cls.version()
|
|
1193
|
+
if v >= (0, 19, 0):
|
|
1194
|
+
return expr.str.contains(pattern, literal=literal)
|
|
1195
|
+
else:
|
|
1196
|
+
# Older API
|
|
1197
|
+
if literal:
|
|
1198
|
+
return expr.str.contains(pattern, literal=True) # type: ignore
|
|
1199
|
+
return expr.str.contains(pattern)
|
|
1200
|
+
|
|
1201
|
+
@classmethod
|
|
1202
|
+
def null_count(cls, lf: pl.LazyFrame, col: str) -> pl.Expr:
|
|
1203
|
+
"""Count nulls in column (compatible across versions)."""
|
|
1204
|
+
v = cls.version()
|
|
1205
|
+
if v >= (0, 18, 0):
|
|
1206
|
+
return pl.col(col).null_count()
|
|
1207
|
+
else:
|
|
1208
|
+
return pl.col(col).is_null().sum()
|
|
1209
|
+
|
|
1210
|
+
@classmethod
|
|
1211
|
+
def check_min_version(
|
|
1212
|
+
cls,
|
|
1213
|
+
min_version: tuple[int, int, int],
|
|
1214
|
+
feature: str = "",
|
|
1215
|
+
) -> bool:
|
|
1216
|
+
"""Check if current Polars meets minimum version.
|
|
1217
|
+
|
|
1218
|
+
Args:
|
|
1219
|
+
min_version: Minimum required version (major, minor, patch)
|
|
1220
|
+
feature: Feature name for warning message
|
|
1221
|
+
|
|
1222
|
+
Returns:
|
|
1223
|
+
True if version requirement is met
|
|
1224
|
+
"""
|
|
1225
|
+
v = cls.version()
|
|
1226
|
+
if v < min_version:
|
|
1227
|
+
ver_str = ".".join(map(str, min_version))
|
|
1228
|
+
msg = f"Polars {ver_str}+ required"
|
|
1229
|
+
if feature:
|
|
1230
|
+
msg += f" for {feature}"
|
|
1231
|
+
msg += f", current: {v.raw}"
|
|
1232
|
+
warnings.warn(msg, UserWarning, stacklevel=2)
|
|
1233
|
+
return False
|
|
1234
|
+
return True
|
|
1235
|
+
|
|
1236
|
+
|
|
1237
|
+
# =============================================================================
|
|
1238
|
+
# #20: Internationalization Support
|
|
1239
|
+
# =============================================================================
|
|
1240
|
+
|
|
1241
|
+
|
|
1242
|
+
class Language(Enum):
|
|
1243
|
+
"""Supported languages for error messages."""
|
|
1244
|
+
|
|
1245
|
+
EN = "en"
|
|
1246
|
+
KO = "ko"
|
|
1247
|
+
JA = "ja"
|
|
1248
|
+
ZH = "zh"
|
|
1249
|
+
ES = "es"
|
|
1250
|
+
FR = "fr"
|
|
1251
|
+
DE = "de"
|
|
1252
|
+
|
|
1253
|
+
|
|
1254
|
+
# Translation dictionaries
|
|
1255
|
+
_TRANSLATIONS: dict[str, dict[str, str]] = {
|
|
1256
|
+
# Issue types
|
|
1257
|
+
"null_values": {
|
|
1258
|
+
"en": "Null values found",
|
|
1259
|
+
"ko": "null 값 발견",
|
|
1260
|
+
"ja": "null値が見つかりました",
|
|
1261
|
+
"zh": "发现空值",
|
|
1262
|
+
"es": "Valores nulos encontrados",
|
|
1263
|
+
"fr": "Valeurs nulles trouvées",
|
|
1264
|
+
"de": "Null-Werte gefunden",
|
|
1265
|
+
},
|
|
1266
|
+
"out_of_range": {
|
|
1267
|
+
"en": "Values out of range",
|
|
1268
|
+
"ko": "범위를 벗어난 값",
|
|
1269
|
+
"ja": "範囲外の値",
|
|
1270
|
+
"zh": "超出范围的值",
|
|
1271
|
+
"es": "Valores fuera de rango",
|
|
1272
|
+
"fr": "Valeurs hors limites",
|
|
1273
|
+
"de": "Werte außerhalb des Bereichs",
|
|
1274
|
+
},
|
|
1275
|
+
"duplicate_values": {
|
|
1276
|
+
"en": "Duplicate values found",
|
|
1277
|
+
"ko": "중복 값 발견",
|
|
1278
|
+
"ja": "重複値が見つかりました",
|
|
1279
|
+
"zh": "发现重复值",
|
|
1280
|
+
"es": "Valores duplicados encontrados",
|
|
1281
|
+
"fr": "Valeurs en double trouvées",
|
|
1282
|
+
"de": "Doppelte Werte gefunden",
|
|
1283
|
+
},
|
|
1284
|
+
"invalid_format": {
|
|
1285
|
+
"en": "Invalid format",
|
|
1286
|
+
"ko": "잘못된 형식",
|
|
1287
|
+
"ja": "無効な形式",
|
|
1288
|
+
"zh": "格式无效",
|
|
1289
|
+
"es": "Formato inválido",
|
|
1290
|
+
"fr": "Format invalide",
|
|
1291
|
+
"de": "Ungültiges Format",
|
|
1292
|
+
},
|
|
1293
|
+
"referential_integrity_violation": {
|
|
1294
|
+
"en": "Referential integrity violation",
|
|
1295
|
+
"ko": "참조 무결성 위반",
|
|
1296
|
+
"ja": "参照整合性違反",
|
|
1297
|
+
"zh": "引用完整性违规",
|
|
1298
|
+
"es": "Violación de integridad referencial",
|
|
1299
|
+
"fr": "Violation d'intégrité référentielle",
|
|
1300
|
+
"de": "Referenzielle Integritätsverletzung",
|
|
1301
|
+
},
|
|
1302
|
+
"schema_mismatch": {
|
|
1303
|
+
"en": "Schema mismatch",
|
|
1304
|
+
"ko": "스키마 불일치",
|
|
1305
|
+
"ja": "スキーマ不一致",
|
|
1306
|
+
"zh": "模式不匹配",
|
|
1307
|
+
"es": "Desajuste de esquema",
|
|
1308
|
+
"fr": "Non-concordance de schéma",
|
|
1309
|
+
"de": "Schema-Abweichung",
|
|
1310
|
+
},
|
|
1311
|
+
# Severity levels
|
|
1312
|
+
"critical": {
|
|
1313
|
+
"en": "Critical",
|
|
1314
|
+
"ko": "심각",
|
|
1315
|
+
"ja": "重大",
|
|
1316
|
+
"zh": "严重",
|
|
1317
|
+
"es": "Crítico",
|
|
1318
|
+
"fr": "Critique",
|
|
1319
|
+
"de": "Kritisch",
|
|
1320
|
+
},
|
|
1321
|
+
"high": {
|
|
1322
|
+
"en": "High",
|
|
1323
|
+
"ko": "높음",
|
|
1324
|
+
"ja": "高",
|
|
1325
|
+
"zh": "高",
|
|
1326
|
+
"es": "Alto",
|
|
1327
|
+
"fr": "Élevé",
|
|
1328
|
+
"de": "Hoch",
|
|
1329
|
+
},
|
|
1330
|
+
"medium": {
|
|
1331
|
+
"en": "Medium",
|
|
1332
|
+
"ko": "중간",
|
|
1333
|
+
"ja": "中",
|
|
1334
|
+
"zh": "中",
|
|
1335
|
+
"es": "Medio",
|
|
1336
|
+
"fr": "Moyen",
|
|
1337
|
+
"de": "Mittel",
|
|
1338
|
+
},
|
|
1339
|
+
"low": {
|
|
1340
|
+
"en": "Low",
|
|
1341
|
+
"ko": "낮음",
|
|
1342
|
+
"ja": "低",
|
|
1343
|
+
"zh": "低",
|
|
1344
|
+
"es": "Bajo",
|
|
1345
|
+
"fr": "Faible",
|
|
1346
|
+
"de": "Niedrig",
|
|
1347
|
+
},
|
|
1348
|
+
# Common messages
|
|
1349
|
+
"values_found": {
|
|
1350
|
+
"en": "{count} values found",
|
|
1351
|
+
"ko": "{count}개의 값 발견",
|
|
1352
|
+
"ja": "{count}個の値が見つかりました",
|
|
1353
|
+
"zh": "发现{count}个值",
|
|
1354
|
+
"es": "{count} valores encontrados",
|
|
1355
|
+
"fr": "{count} valeurs trouvées",
|
|
1356
|
+
"de": "{count} Werte gefunden",
|
|
1357
|
+
},
|
|
1358
|
+
"column_not_found": {
|
|
1359
|
+
"en": "Column '{column}' not found",
|
|
1360
|
+
"ko": "'{column}' 컬럼을 찾을 수 없습니다",
|
|
1361
|
+
"ja": "'{column}' 列が見つかりません",
|
|
1362
|
+
"zh": "未找到'{column}'列",
|
|
1363
|
+
"es": "Columna '{column}' no encontrada",
|
|
1364
|
+
"fr": "Colonne '{column}' non trouvée",
|
|
1365
|
+
"de": "Spalte '{column}' nicht gefunden",
|
|
1366
|
+
},
|
|
1367
|
+
"validation_passed": {
|
|
1368
|
+
"en": "Validation passed",
|
|
1369
|
+
"ko": "검증 통과",
|
|
1370
|
+
"ja": "検証通過",
|
|
1371
|
+
"zh": "验证通过",
|
|
1372
|
+
"es": "Validación aprobada",
|
|
1373
|
+
"fr": "Validation réussie",
|
|
1374
|
+
"de": "Validierung bestanden",
|
|
1375
|
+
},
|
|
1376
|
+
"validation_failed": {
|
|
1377
|
+
"en": "Validation failed",
|
|
1378
|
+
"ko": "검증 실패",
|
|
1379
|
+
"ja": "検証失敗",
|
|
1380
|
+
"zh": "验证失败",
|
|
1381
|
+
"es": "Validación fallida",
|
|
1382
|
+
"fr": "Validation échouée",
|
|
1383
|
+
"de": "Validierung fehlgeschlagen",
|
|
1384
|
+
},
|
|
1385
|
+
}
|
|
1386
|
+
|
|
1387
|
+
|
|
1388
|
+
class I18n:
|
|
1389
|
+
"""Internationalization support for validation messages.
|
|
1390
|
+
|
|
1391
|
+
Features:
|
|
1392
|
+
- Multiple language support
|
|
1393
|
+
- Fallback to English
|
|
1394
|
+
- Auto-detection from locale
|
|
1395
|
+
- Template interpolation
|
|
1396
|
+
"""
|
|
1397
|
+
|
|
1398
|
+
_current_language: Language = Language.EN
|
|
1399
|
+
_custom_translations: dict[str, dict[str, str]] = {}
|
|
1400
|
+
|
|
1401
|
+
@classmethod
|
|
1402
|
+
def set_language(cls, lang: Language | str) -> None:
|
|
1403
|
+
"""Set the current language.
|
|
1404
|
+
|
|
1405
|
+
Args:
|
|
1406
|
+
lang: Language enum or code string
|
|
1407
|
+
"""
|
|
1408
|
+
if isinstance(lang, str):
|
|
1409
|
+
lang = Language(lang.lower())
|
|
1410
|
+
cls._current_language = lang
|
|
1411
|
+
|
|
1412
|
+
@classmethod
|
|
1413
|
+
def get_language(cls) -> Language:
|
|
1414
|
+
"""Get the current language."""
|
|
1415
|
+
return cls._current_language
|
|
1416
|
+
|
|
1417
|
+
@classmethod
|
|
1418
|
+
def detect_language(cls) -> Language:
|
|
1419
|
+
"""Detect language from system locale."""
|
|
1420
|
+
try:
|
|
1421
|
+
loc = locale.getlocale()[0]
|
|
1422
|
+
if loc:
|
|
1423
|
+
code = loc.split("_")[0].lower()
|
|
1424
|
+
try:
|
|
1425
|
+
return Language(code)
|
|
1426
|
+
except ValueError:
|
|
1427
|
+
pass
|
|
1428
|
+
except Exception:
|
|
1429
|
+
pass
|
|
1430
|
+
return Language.EN
|
|
1431
|
+
|
|
1432
|
+
@classmethod
|
|
1433
|
+
def auto_configure(cls) -> None:
|
|
1434
|
+
"""Auto-configure language from environment."""
|
|
1435
|
+
# Check environment variable first
|
|
1436
|
+
env_lang = os.environ.get("TRUTHOUND_LANGUAGE")
|
|
1437
|
+
if env_lang:
|
|
1438
|
+
try:
|
|
1439
|
+
cls.set_language(env_lang)
|
|
1440
|
+
return
|
|
1441
|
+
except ValueError:
|
|
1442
|
+
pass
|
|
1443
|
+
|
|
1444
|
+
# Fall back to locale detection
|
|
1445
|
+
cls.set_language(cls.detect_language())
|
|
1446
|
+
|
|
1447
|
+
@classmethod
|
|
1448
|
+
def add_translations(cls, key: str, translations: dict[str, str]) -> None:
|
|
1449
|
+
"""Add custom translations.
|
|
1450
|
+
|
|
1451
|
+
Args:
|
|
1452
|
+
key: Translation key
|
|
1453
|
+
translations: Dict of language code -> text
|
|
1454
|
+
"""
|
|
1455
|
+
cls._custom_translations[key] = translations
|
|
1456
|
+
|
|
1457
|
+
@classmethod
|
|
1458
|
+
def t(
|
|
1459
|
+
cls,
|
|
1460
|
+
key: str,
|
|
1461
|
+
lang: Language | None = None,
|
|
1462
|
+
**kwargs: Any,
|
|
1463
|
+
) -> str:
|
|
1464
|
+
"""Translate a key to the specified language.
|
|
1465
|
+
|
|
1466
|
+
Args:
|
|
1467
|
+
key: Translation key
|
|
1468
|
+
lang: Language (default: current)
|
|
1469
|
+
**kwargs: Template interpolation values
|
|
1470
|
+
|
|
1471
|
+
Returns:
|
|
1472
|
+
Translated string
|
|
1473
|
+
"""
|
|
1474
|
+
lang = lang or cls._current_language
|
|
1475
|
+
lang_code = lang.value
|
|
1476
|
+
|
|
1477
|
+
# Check custom translations first
|
|
1478
|
+
if key in cls._custom_translations:
|
|
1479
|
+
translations = cls._custom_translations[key]
|
|
1480
|
+
elif key in _TRANSLATIONS:
|
|
1481
|
+
translations = _TRANSLATIONS[key]
|
|
1482
|
+
else:
|
|
1483
|
+
return key # Return key if not found
|
|
1484
|
+
|
|
1485
|
+
# Get translation for language, fallback to English
|
|
1486
|
+
text = translations.get(lang_code, translations.get("en", key))
|
|
1487
|
+
|
|
1488
|
+
# Apply template interpolation
|
|
1489
|
+
if kwargs:
|
|
1490
|
+
try:
|
|
1491
|
+
text = text.format(**kwargs)
|
|
1492
|
+
except KeyError:
|
|
1493
|
+
pass # Keep original if interpolation fails
|
|
1494
|
+
|
|
1495
|
+
return text
|
|
1496
|
+
|
|
1497
|
+
@classmethod
|
|
1498
|
+
def translate_issue(
|
|
1499
|
+
cls,
|
|
1500
|
+
issue: ValidationIssue,
|
|
1501
|
+
lang: Language | None = None,
|
|
1502
|
+
) -> ValidationIssue:
|
|
1503
|
+
"""Translate validation issue to specified language.
|
|
1504
|
+
|
|
1505
|
+
Args:
|
|
1506
|
+
issue: Original validation issue
|
|
1507
|
+
lang: Target language
|
|
1508
|
+
|
|
1509
|
+
Returns:
|
|
1510
|
+
New issue with translated messages
|
|
1511
|
+
"""
|
|
1512
|
+
lang = lang or cls._current_language
|
|
1513
|
+
|
|
1514
|
+
# Translate issue type
|
|
1515
|
+
translated_type = cls.t(issue.issue_type, lang)
|
|
1516
|
+
|
|
1517
|
+
# Translate details if it matches a template
|
|
1518
|
+
translated_details = issue.details
|
|
1519
|
+
if issue.details:
|
|
1520
|
+
# Try to match common patterns
|
|
1521
|
+
for key in _TRANSLATIONS:
|
|
1522
|
+
if key in issue.issue_type.lower():
|
|
1523
|
+
translated_details = cls.t(
|
|
1524
|
+
"values_found",
|
|
1525
|
+
lang,
|
|
1526
|
+
count=issue.count,
|
|
1527
|
+
)
|
|
1528
|
+
break
|
|
1529
|
+
|
|
1530
|
+
# Create new issue with translated content
|
|
1531
|
+
return ValidationIssue(
|
|
1532
|
+
column=issue.column,
|
|
1533
|
+
issue_type=translated_type,
|
|
1534
|
+
count=issue.count,
|
|
1535
|
+
severity=issue.severity,
|
|
1536
|
+
details=translated_details,
|
|
1537
|
+
expected=issue.expected,
|
|
1538
|
+
actual=issue.actual,
|
|
1539
|
+
sample_values=issue.sample_values,
|
|
1540
|
+
error_context=issue.error_context,
|
|
1541
|
+
validator_name=issue.validator_name,
|
|
1542
|
+
execution_time_ms=issue.execution_time_ms,
|
|
1543
|
+
)
|
|
1544
|
+
|
|
1545
|
+
@classmethod
|
|
1546
|
+
def translate_severity(cls, severity: Severity, lang: Language | None = None) -> str:
|
|
1547
|
+
"""Translate severity level to specified language."""
|
|
1548
|
+
return cls.t(severity.value.lower(), lang)
|
|
1549
|
+
|
|
1550
|
+
|
|
1551
|
+
# Convenience function
|
|
1552
|
+
def translate(key: str, **kwargs: Any) -> str:
|
|
1553
|
+
"""Translate a key using current language."""
|
|
1554
|
+
return I18n.t(key, **kwargs)
|
|
1555
|
+
|
|
1556
|
+
|
|
1557
|
+
# =============================================================================
|
|
1558
|
+
# Integration: Enhanced Validator with Enterprise Features
|
|
1559
|
+
# =============================================================================
|
|
1560
|
+
|
|
1561
|
+
|
|
1562
|
+
class EnterpriseValidator(Validator):
|
|
1563
|
+
"""Validator with enterprise features enabled.
|
|
1564
|
+
|
|
1565
|
+
Automatically integrates:
|
|
1566
|
+
- Audit logging
|
|
1567
|
+
- Metrics collection
|
|
1568
|
+
- Configuration validation
|
|
1569
|
+
- Polars compatibility checks
|
|
1570
|
+
- Internationalized messages
|
|
1571
|
+
|
|
1572
|
+
Usage:
|
|
1573
|
+
class MyValidator(EnterpriseValidator):
|
|
1574
|
+
name = "my_validator"
|
|
1575
|
+
category = "custom"
|
|
1576
|
+
|
|
1577
|
+
def validate(self, lf):
|
|
1578
|
+
# Your validation logic
|
|
1579
|
+
pass
|
|
1580
|
+
"""
|
|
1581
|
+
|
|
1582
|
+
# Enterprise features
|
|
1583
|
+
enable_audit: bool = True
|
|
1584
|
+
enable_metrics: bool = True
|
|
1585
|
+
validate_config: bool = True
|
|
1586
|
+
translate_messages: bool = False
|
|
1587
|
+
|
|
1588
|
+
def __init__(self, *args: Any, **kwargs: Any) -> None:
|
|
1589
|
+
super().__init__(*args, **kwargs)
|
|
1590
|
+
|
|
1591
|
+
# Validate config if enabled
|
|
1592
|
+
if self.validate_config:
|
|
1593
|
+
ConfigValidator.validate_or_raise(self.config, self.name)
|
|
1594
|
+
|
|
1595
|
+
def validate_safe(self, lf: pl.LazyFrame) -> ValidatorExecutionResult:
|
|
1596
|
+
"""Execute validation with enterprise features."""
|
|
1597
|
+
audit_logger = get_validation_audit_logger() if self.enable_audit else None
|
|
1598
|
+
metrics = get_metrics_collector() if self.enable_metrics else None
|
|
1599
|
+
|
|
1600
|
+
# Execute with metrics tracking
|
|
1601
|
+
if metrics:
|
|
1602
|
+
with metrics.track_validation(self.name, self.category) as ctx:
|
|
1603
|
+
result = super().validate_safe(lf)
|
|
1604
|
+
ctx["status"] = result.status.value
|
|
1605
|
+
ctx["issues"] = result.issues
|
|
1606
|
+
else:
|
|
1607
|
+
result = super().validate_safe(lf)
|
|
1608
|
+
|
|
1609
|
+
# Log to audit
|
|
1610
|
+
if audit_logger:
|
|
1611
|
+
audit_logger.log_validation(
|
|
1612
|
+
validator=self,
|
|
1613
|
+
lf=lf,
|
|
1614
|
+
result=result,
|
|
1615
|
+
)
|
|
1616
|
+
|
|
1617
|
+
# Translate messages if enabled
|
|
1618
|
+
if self.translate_messages:
|
|
1619
|
+
result.issues = [
|
|
1620
|
+
I18n.translate_issue(issue) for issue in result.issues
|
|
1621
|
+
]
|
|
1622
|
+
|
|
1623
|
+
return result
|
|
1624
|
+
|
|
1625
|
+
|
|
1626
|
+
# =============================================================================
|
|
1627
|
+
# Exports
|
|
1628
|
+
# =============================================================================
|
|
1629
|
+
|
|
1630
|
+
__all__ = [
|
|
1631
|
+
# Audit (#14)
|
|
1632
|
+
"ValidationAuditRecord",
|
|
1633
|
+
"ValidationAuditLogger",
|
|
1634
|
+
"get_validation_audit_logger",
|
|
1635
|
+
# Metrics (#15)
|
|
1636
|
+
"MetricsCollector",
|
|
1637
|
+
"get_metrics_collector",
|
|
1638
|
+
# Caching (#16)
|
|
1639
|
+
"CacheEntry",
|
|
1640
|
+
"ReferentialDataCache",
|
|
1641
|
+
"get_reference_cache",
|
|
1642
|
+
# Parallel (#17)
|
|
1643
|
+
"ParallelExecutionMode",
|
|
1644
|
+
"ParallelExecutionConfig",
|
|
1645
|
+
"ParallelValidator",
|
|
1646
|
+
# Config Validation (#18)
|
|
1647
|
+
"ConfigValidationError",
|
|
1648
|
+
"ConfigValidator",
|
|
1649
|
+
# Polars Compat (#19)
|
|
1650
|
+
"PolarsVersionInfo",
|
|
1651
|
+
"PolarsCompat",
|
|
1652
|
+
# I18n (#20)
|
|
1653
|
+
"Language",
|
|
1654
|
+
"I18n",
|
|
1655
|
+
"translate",
|
|
1656
|
+
# Enterprise Validator
|
|
1657
|
+
"EnterpriseValidator",
|
|
1658
|
+
]
|