truthound 1.0.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- truthound/__init__.py +162 -0
- truthound/adapters.py +100 -0
- truthound/api.py +365 -0
- truthound/audit/__init__.py +248 -0
- truthound/audit/core.py +967 -0
- truthound/audit/filters.py +620 -0
- truthound/audit/formatters.py +707 -0
- truthound/audit/logger.py +902 -0
- truthound/audit/middleware.py +571 -0
- truthound/audit/storage.py +1083 -0
- truthound/benchmark/__init__.py +123 -0
- truthound/benchmark/base.py +757 -0
- truthound/benchmark/comparison.py +635 -0
- truthound/benchmark/generators.py +706 -0
- truthound/benchmark/reporters.py +718 -0
- truthound/benchmark/runner.py +635 -0
- truthound/benchmark/scenarios.py +712 -0
- truthound/cache.py +252 -0
- truthound/checkpoint/__init__.py +136 -0
- truthound/checkpoint/actions/__init__.py +164 -0
- truthound/checkpoint/actions/base.py +324 -0
- truthound/checkpoint/actions/custom.py +234 -0
- truthound/checkpoint/actions/discord_notify.py +290 -0
- truthound/checkpoint/actions/email_notify.py +405 -0
- truthound/checkpoint/actions/github_action.py +406 -0
- truthound/checkpoint/actions/opsgenie.py +1499 -0
- truthound/checkpoint/actions/pagerduty.py +226 -0
- truthound/checkpoint/actions/slack_notify.py +233 -0
- truthound/checkpoint/actions/store_result.py +249 -0
- truthound/checkpoint/actions/teams_notify.py +1570 -0
- truthound/checkpoint/actions/telegram_notify.py +419 -0
- truthound/checkpoint/actions/update_docs.py +552 -0
- truthound/checkpoint/actions/webhook.py +293 -0
- truthound/checkpoint/analytics/__init__.py +147 -0
- truthound/checkpoint/analytics/aggregations/__init__.py +23 -0
- truthound/checkpoint/analytics/aggregations/rollup.py +481 -0
- truthound/checkpoint/analytics/aggregations/time_bucket.py +306 -0
- truthound/checkpoint/analytics/analyzers/__init__.py +17 -0
- truthound/checkpoint/analytics/analyzers/anomaly.py +386 -0
- truthound/checkpoint/analytics/analyzers/base.py +270 -0
- truthound/checkpoint/analytics/analyzers/forecast.py +421 -0
- truthound/checkpoint/analytics/analyzers/trend.py +314 -0
- truthound/checkpoint/analytics/models.py +292 -0
- truthound/checkpoint/analytics/protocols.py +549 -0
- truthound/checkpoint/analytics/service.py +718 -0
- truthound/checkpoint/analytics/stores/__init__.py +16 -0
- truthound/checkpoint/analytics/stores/base.py +306 -0
- truthound/checkpoint/analytics/stores/memory_store.py +353 -0
- truthound/checkpoint/analytics/stores/sqlite_store.py +557 -0
- truthound/checkpoint/analytics/stores/timescale_store.py +501 -0
- truthound/checkpoint/async_actions.py +794 -0
- truthound/checkpoint/async_base.py +708 -0
- truthound/checkpoint/async_checkpoint.py +617 -0
- truthound/checkpoint/async_runner.py +639 -0
- truthound/checkpoint/checkpoint.py +527 -0
- truthound/checkpoint/ci/__init__.py +61 -0
- truthound/checkpoint/ci/detector.py +355 -0
- truthound/checkpoint/ci/reporter.py +436 -0
- truthound/checkpoint/ci/templates.py +454 -0
- truthound/checkpoint/circuitbreaker/__init__.py +133 -0
- truthound/checkpoint/circuitbreaker/breaker.py +542 -0
- truthound/checkpoint/circuitbreaker/core.py +252 -0
- truthound/checkpoint/circuitbreaker/detection.py +459 -0
- truthound/checkpoint/circuitbreaker/middleware.py +389 -0
- truthound/checkpoint/circuitbreaker/registry.py +357 -0
- truthound/checkpoint/distributed/__init__.py +139 -0
- truthound/checkpoint/distributed/backends/__init__.py +35 -0
- truthound/checkpoint/distributed/backends/celery_backend.py +503 -0
- truthound/checkpoint/distributed/backends/kubernetes_backend.py +696 -0
- truthound/checkpoint/distributed/backends/local_backend.py +397 -0
- truthound/checkpoint/distributed/backends/ray_backend.py +625 -0
- truthound/checkpoint/distributed/base.py +774 -0
- truthound/checkpoint/distributed/orchestrator.py +765 -0
- truthound/checkpoint/distributed/protocols.py +842 -0
- truthound/checkpoint/distributed/registry.py +449 -0
- truthound/checkpoint/idempotency/__init__.py +120 -0
- truthound/checkpoint/idempotency/core.py +295 -0
- truthound/checkpoint/idempotency/fingerprint.py +454 -0
- truthound/checkpoint/idempotency/locking.py +604 -0
- truthound/checkpoint/idempotency/service.py +592 -0
- truthound/checkpoint/idempotency/stores.py +653 -0
- truthound/checkpoint/monitoring/__init__.py +134 -0
- truthound/checkpoint/monitoring/aggregators/__init__.py +15 -0
- truthound/checkpoint/monitoring/aggregators/base.py +372 -0
- truthound/checkpoint/monitoring/aggregators/realtime.py +300 -0
- truthound/checkpoint/monitoring/aggregators/window.py +493 -0
- truthound/checkpoint/monitoring/collectors/__init__.py +17 -0
- truthound/checkpoint/monitoring/collectors/base.py +257 -0
- truthound/checkpoint/monitoring/collectors/memory_collector.py +617 -0
- truthound/checkpoint/monitoring/collectors/prometheus_collector.py +451 -0
- truthound/checkpoint/monitoring/collectors/redis_collector.py +518 -0
- truthound/checkpoint/monitoring/events.py +410 -0
- truthound/checkpoint/monitoring/protocols.py +636 -0
- truthound/checkpoint/monitoring/service.py +578 -0
- truthound/checkpoint/monitoring/views/__init__.py +17 -0
- truthound/checkpoint/monitoring/views/base.py +172 -0
- truthound/checkpoint/monitoring/views/queue_view.py +220 -0
- truthound/checkpoint/monitoring/views/task_view.py +240 -0
- truthound/checkpoint/monitoring/views/worker_view.py +263 -0
- truthound/checkpoint/registry.py +337 -0
- truthound/checkpoint/runner.py +356 -0
- truthound/checkpoint/transaction/__init__.py +133 -0
- truthound/checkpoint/transaction/base.py +389 -0
- truthound/checkpoint/transaction/compensatable.py +537 -0
- truthound/checkpoint/transaction/coordinator.py +576 -0
- truthound/checkpoint/transaction/executor.py +622 -0
- truthound/checkpoint/transaction/idempotency.py +534 -0
- truthound/checkpoint/transaction/saga/__init__.py +143 -0
- truthound/checkpoint/transaction/saga/builder.py +584 -0
- truthound/checkpoint/transaction/saga/definition.py +515 -0
- truthound/checkpoint/transaction/saga/event_store.py +542 -0
- truthound/checkpoint/transaction/saga/patterns.py +833 -0
- truthound/checkpoint/transaction/saga/runner.py +718 -0
- truthound/checkpoint/transaction/saga/state_machine.py +793 -0
- truthound/checkpoint/transaction/saga/strategies.py +780 -0
- truthound/checkpoint/transaction/saga/testing.py +886 -0
- truthound/checkpoint/triggers/__init__.py +58 -0
- truthound/checkpoint/triggers/base.py +237 -0
- truthound/checkpoint/triggers/event.py +385 -0
- truthound/checkpoint/triggers/schedule.py +355 -0
- truthound/cli.py +2358 -0
- truthound/cli_modules/__init__.py +124 -0
- truthound/cli_modules/advanced/__init__.py +45 -0
- truthound/cli_modules/advanced/benchmark.py +343 -0
- truthound/cli_modules/advanced/docs.py +225 -0
- truthound/cli_modules/advanced/lineage.py +209 -0
- truthound/cli_modules/advanced/ml.py +320 -0
- truthound/cli_modules/advanced/realtime.py +196 -0
- truthound/cli_modules/checkpoint/__init__.py +46 -0
- truthound/cli_modules/checkpoint/init.py +114 -0
- truthound/cli_modules/checkpoint/list.py +71 -0
- truthound/cli_modules/checkpoint/run.py +159 -0
- truthound/cli_modules/checkpoint/validate.py +67 -0
- truthound/cli_modules/common/__init__.py +71 -0
- truthound/cli_modules/common/errors.py +414 -0
- truthound/cli_modules/common/options.py +419 -0
- truthound/cli_modules/common/output.py +507 -0
- truthound/cli_modules/common/protocol.py +552 -0
- truthound/cli_modules/core/__init__.py +48 -0
- truthound/cli_modules/core/check.py +123 -0
- truthound/cli_modules/core/compare.py +104 -0
- truthound/cli_modules/core/learn.py +57 -0
- truthound/cli_modules/core/mask.py +77 -0
- truthound/cli_modules/core/profile.py +65 -0
- truthound/cli_modules/core/scan.py +61 -0
- truthound/cli_modules/profiler/__init__.py +51 -0
- truthound/cli_modules/profiler/auto_profile.py +175 -0
- truthound/cli_modules/profiler/metadata.py +107 -0
- truthound/cli_modules/profiler/suite.py +283 -0
- truthound/cli_modules/registry.py +431 -0
- truthound/cli_modules/scaffolding/__init__.py +89 -0
- truthound/cli_modules/scaffolding/base.py +631 -0
- truthound/cli_modules/scaffolding/commands.py +545 -0
- truthound/cli_modules/scaffolding/plugins.py +1072 -0
- truthound/cli_modules/scaffolding/reporters.py +594 -0
- truthound/cli_modules/scaffolding/validators.py +1127 -0
- truthound/common/__init__.py +18 -0
- truthound/common/resilience/__init__.py +130 -0
- truthound/common/resilience/bulkhead.py +266 -0
- truthound/common/resilience/circuit_breaker.py +516 -0
- truthound/common/resilience/composite.py +332 -0
- truthound/common/resilience/config.py +292 -0
- truthound/common/resilience/protocols.py +217 -0
- truthound/common/resilience/rate_limiter.py +404 -0
- truthound/common/resilience/retry.py +341 -0
- truthound/datadocs/__init__.py +260 -0
- truthound/datadocs/base.py +571 -0
- truthound/datadocs/builder.py +761 -0
- truthound/datadocs/charts.py +764 -0
- truthound/datadocs/dashboard/__init__.py +63 -0
- truthound/datadocs/dashboard/app.py +576 -0
- truthound/datadocs/dashboard/components.py +584 -0
- truthound/datadocs/dashboard/state.py +240 -0
- truthound/datadocs/engine/__init__.py +46 -0
- truthound/datadocs/engine/context.py +376 -0
- truthound/datadocs/engine/pipeline.py +618 -0
- truthound/datadocs/engine/registry.py +469 -0
- truthound/datadocs/exporters/__init__.py +49 -0
- truthound/datadocs/exporters/base.py +198 -0
- truthound/datadocs/exporters/html.py +178 -0
- truthound/datadocs/exporters/json_exporter.py +253 -0
- truthound/datadocs/exporters/markdown.py +284 -0
- truthound/datadocs/exporters/pdf.py +392 -0
- truthound/datadocs/i18n/__init__.py +86 -0
- truthound/datadocs/i18n/catalog.py +960 -0
- truthound/datadocs/i18n/formatting.py +505 -0
- truthound/datadocs/i18n/loader.py +256 -0
- truthound/datadocs/i18n/plurals.py +378 -0
- truthound/datadocs/renderers/__init__.py +42 -0
- truthound/datadocs/renderers/base.py +401 -0
- truthound/datadocs/renderers/custom.py +342 -0
- truthound/datadocs/renderers/jinja.py +697 -0
- truthound/datadocs/sections.py +736 -0
- truthound/datadocs/styles.py +931 -0
- truthound/datadocs/themes/__init__.py +101 -0
- truthound/datadocs/themes/base.py +336 -0
- truthound/datadocs/themes/default.py +417 -0
- truthound/datadocs/themes/enterprise.py +419 -0
- truthound/datadocs/themes/loader.py +336 -0
- truthound/datadocs/themes.py +301 -0
- truthound/datadocs/transformers/__init__.py +57 -0
- truthound/datadocs/transformers/base.py +268 -0
- truthound/datadocs/transformers/enrichers.py +544 -0
- truthound/datadocs/transformers/filters.py +447 -0
- truthound/datadocs/transformers/i18n.py +468 -0
- truthound/datadocs/versioning/__init__.py +62 -0
- truthound/datadocs/versioning/diff.py +639 -0
- truthound/datadocs/versioning/storage.py +497 -0
- truthound/datadocs/versioning/version.py +358 -0
- truthound/datasources/__init__.py +223 -0
- truthound/datasources/_async_protocols.py +222 -0
- truthound/datasources/_protocols.py +159 -0
- truthound/datasources/adapters.py +428 -0
- truthound/datasources/async_base.py +599 -0
- truthound/datasources/async_factory.py +511 -0
- truthound/datasources/base.py +516 -0
- truthound/datasources/factory.py +433 -0
- truthound/datasources/nosql/__init__.py +47 -0
- truthound/datasources/nosql/base.py +487 -0
- truthound/datasources/nosql/elasticsearch.py +801 -0
- truthound/datasources/nosql/mongodb.py +636 -0
- truthound/datasources/pandas_optimized.py +582 -0
- truthound/datasources/pandas_source.py +216 -0
- truthound/datasources/polars_source.py +395 -0
- truthound/datasources/spark_source.py +479 -0
- truthound/datasources/sql/__init__.py +154 -0
- truthound/datasources/sql/base.py +710 -0
- truthound/datasources/sql/bigquery.py +410 -0
- truthound/datasources/sql/cloud_base.py +199 -0
- truthound/datasources/sql/databricks.py +471 -0
- truthound/datasources/sql/mysql.py +316 -0
- truthound/datasources/sql/oracle.py +427 -0
- truthound/datasources/sql/postgresql.py +321 -0
- truthound/datasources/sql/redshift.py +479 -0
- truthound/datasources/sql/snowflake.py +439 -0
- truthound/datasources/sql/sqlite.py +286 -0
- truthound/datasources/sql/sqlserver.py +437 -0
- truthound/datasources/streaming/__init__.py +47 -0
- truthound/datasources/streaming/base.py +350 -0
- truthound/datasources/streaming/kafka.py +670 -0
- truthound/decorators.py +98 -0
- truthound/docs/__init__.py +69 -0
- truthound/docs/extractor.py +971 -0
- truthound/docs/generator.py +601 -0
- truthound/docs/parser.py +1037 -0
- truthound/docs/renderer.py +999 -0
- truthound/drift/__init__.py +22 -0
- truthound/drift/compare.py +189 -0
- truthound/drift/detectors.py +464 -0
- truthound/drift/report.py +160 -0
- truthound/execution/__init__.py +65 -0
- truthound/execution/_protocols.py +324 -0
- truthound/execution/base.py +576 -0
- truthound/execution/distributed/__init__.py +179 -0
- truthound/execution/distributed/aggregations.py +731 -0
- truthound/execution/distributed/arrow_bridge.py +817 -0
- truthound/execution/distributed/base.py +550 -0
- truthound/execution/distributed/dask_engine.py +976 -0
- truthound/execution/distributed/mixins.py +766 -0
- truthound/execution/distributed/protocols.py +756 -0
- truthound/execution/distributed/ray_engine.py +1127 -0
- truthound/execution/distributed/registry.py +446 -0
- truthound/execution/distributed/spark_engine.py +1011 -0
- truthound/execution/distributed/validator_adapter.py +682 -0
- truthound/execution/pandas_engine.py +401 -0
- truthound/execution/polars_engine.py +497 -0
- truthound/execution/pushdown/__init__.py +230 -0
- truthound/execution/pushdown/ast.py +1550 -0
- truthound/execution/pushdown/builder.py +1550 -0
- truthound/execution/pushdown/dialects.py +1072 -0
- truthound/execution/pushdown/executor.py +829 -0
- truthound/execution/pushdown/optimizer.py +1041 -0
- truthound/execution/sql_engine.py +518 -0
- truthound/infrastructure/__init__.py +189 -0
- truthound/infrastructure/audit.py +1515 -0
- truthound/infrastructure/config.py +1133 -0
- truthound/infrastructure/encryption.py +1132 -0
- truthound/infrastructure/logging.py +1503 -0
- truthound/infrastructure/metrics.py +1220 -0
- truthound/lineage/__init__.py +89 -0
- truthound/lineage/base.py +746 -0
- truthound/lineage/impact_analysis.py +474 -0
- truthound/lineage/integrations/__init__.py +22 -0
- truthound/lineage/integrations/openlineage.py +548 -0
- truthound/lineage/tracker.py +512 -0
- truthound/lineage/visualization/__init__.py +33 -0
- truthound/lineage/visualization/protocols.py +145 -0
- truthound/lineage/visualization/renderers/__init__.py +20 -0
- truthound/lineage/visualization/renderers/cytoscape.py +329 -0
- truthound/lineage/visualization/renderers/d3.py +331 -0
- truthound/lineage/visualization/renderers/graphviz.py +276 -0
- truthound/lineage/visualization/renderers/mermaid.py +308 -0
- truthound/maskers.py +113 -0
- truthound/ml/__init__.py +124 -0
- truthound/ml/anomaly_models/__init__.py +31 -0
- truthound/ml/anomaly_models/ensemble.py +362 -0
- truthound/ml/anomaly_models/isolation_forest.py +444 -0
- truthound/ml/anomaly_models/statistical.py +392 -0
- truthound/ml/base.py +1178 -0
- truthound/ml/drift_detection/__init__.py +26 -0
- truthound/ml/drift_detection/concept.py +381 -0
- truthound/ml/drift_detection/distribution.py +361 -0
- truthound/ml/drift_detection/feature.py +442 -0
- truthound/ml/drift_detection/multivariate.py +495 -0
- truthound/ml/monitoring/__init__.py +88 -0
- truthound/ml/monitoring/alerting/__init__.py +33 -0
- truthound/ml/monitoring/alerting/handlers.py +427 -0
- truthound/ml/monitoring/alerting/rules.py +508 -0
- truthound/ml/monitoring/collectors/__init__.py +19 -0
- truthound/ml/monitoring/collectors/composite.py +105 -0
- truthound/ml/monitoring/collectors/drift.py +324 -0
- truthound/ml/monitoring/collectors/performance.py +179 -0
- truthound/ml/monitoring/collectors/quality.py +369 -0
- truthound/ml/monitoring/monitor.py +536 -0
- truthound/ml/monitoring/protocols.py +451 -0
- truthound/ml/monitoring/stores/__init__.py +15 -0
- truthound/ml/monitoring/stores/memory.py +201 -0
- truthound/ml/monitoring/stores/prometheus.py +296 -0
- truthound/ml/rule_learning/__init__.py +25 -0
- truthound/ml/rule_learning/constraint_miner.py +443 -0
- truthound/ml/rule_learning/pattern_learner.py +499 -0
- truthound/ml/rule_learning/profile_learner.py +462 -0
- truthound/multitenancy/__init__.py +326 -0
- truthound/multitenancy/core.py +852 -0
- truthound/multitenancy/integration.py +597 -0
- truthound/multitenancy/isolation.py +630 -0
- truthound/multitenancy/manager.py +770 -0
- truthound/multitenancy/middleware.py +765 -0
- truthound/multitenancy/quota.py +537 -0
- truthound/multitenancy/resolvers.py +603 -0
- truthound/multitenancy/storage.py +703 -0
- truthound/observability/__init__.py +307 -0
- truthound/observability/context.py +531 -0
- truthound/observability/instrumentation.py +611 -0
- truthound/observability/logging.py +887 -0
- truthound/observability/metrics.py +1157 -0
- truthound/observability/tracing/__init__.py +178 -0
- truthound/observability/tracing/baggage.py +310 -0
- truthound/observability/tracing/config.py +426 -0
- truthound/observability/tracing/exporter.py +787 -0
- truthound/observability/tracing/integration.py +1018 -0
- truthound/observability/tracing/otel/__init__.py +146 -0
- truthound/observability/tracing/otel/adapter.py +982 -0
- truthound/observability/tracing/otel/bridge.py +1177 -0
- truthound/observability/tracing/otel/compat.py +681 -0
- truthound/observability/tracing/otel/config.py +691 -0
- truthound/observability/tracing/otel/detection.py +327 -0
- truthound/observability/tracing/otel/protocols.py +426 -0
- truthound/observability/tracing/processor.py +561 -0
- truthound/observability/tracing/propagator.py +757 -0
- truthound/observability/tracing/provider.py +569 -0
- truthound/observability/tracing/resource.py +515 -0
- truthound/observability/tracing/sampler.py +487 -0
- truthound/observability/tracing/span.py +676 -0
- truthound/plugins/__init__.py +198 -0
- truthound/plugins/base.py +599 -0
- truthound/plugins/cli.py +680 -0
- truthound/plugins/dependencies/__init__.py +42 -0
- truthound/plugins/dependencies/graph.py +422 -0
- truthound/plugins/dependencies/resolver.py +417 -0
- truthound/plugins/discovery.py +379 -0
- truthound/plugins/docs/__init__.py +46 -0
- truthound/plugins/docs/extractor.py +444 -0
- truthound/plugins/docs/renderer.py +499 -0
- truthound/plugins/enterprise_manager.py +877 -0
- truthound/plugins/examples/__init__.py +19 -0
- truthound/plugins/examples/custom_validators.py +317 -0
- truthound/plugins/examples/slack_notifier.py +312 -0
- truthound/plugins/examples/xml_reporter.py +254 -0
- truthound/plugins/hooks.py +558 -0
- truthound/plugins/lifecycle/__init__.py +43 -0
- truthound/plugins/lifecycle/hot_reload.py +402 -0
- truthound/plugins/lifecycle/manager.py +371 -0
- truthound/plugins/manager.py +736 -0
- truthound/plugins/registry.py +338 -0
- truthound/plugins/security/__init__.py +93 -0
- truthound/plugins/security/exceptions.py +332 -0
- truthound/plugins/security/policies.py +348 -0
- truthound/plugins/security/protocols.py +643 -0
- truthound/plugins/security/sandbox/__init__.py +45 -0
- truthound/plugins/security/sandbox/context.py +158 -0
- truthound/plugins/security/sandbox/engines/__init__.py +19 -0
- truthound/plugins/security/sandbox/engines/container.py +379 -0
- truthound/plugins/security/sandbox/engines/noop.py +144 -0
- truthound/plugins/security/sandbox/engines/process.py +336 -0
- truthound/plugins/security/sandbox/factory.py +211 -0
- truthound/plugins/security/signing/__init__.py +57 -0
- truthound/plugins/security/signing/service.py +330 -0
- truthound/plugins/security/signing/trust_store.py +368 -0
- truthound/plugins/security/signing/verifier.py +459 -0
- truthound/plugins/versioning/__init__.py +41 -0
- truthound/plugins/versioning/constraints.py +297 -0
- truthound/plugins/versioning/resolver.py +329 -0
- truthound/profiler/__init__.py +1729 -0
- truthound/profiler/_lazy.py +452 -0
- truthound/profiler/ab_testing/__init__.py +80 -0
- truthound/profiler/ab_testing/analysis.py +449 -0
- truthound/profiler/ab_testing/base.py +257 -0
- truthound/profiler/ab_testing/experiment.py +395 -0
- truthound/profiler/ab_testing/tracking.py +368 -0
- truthound/profiler/auto_threshold.py +1170 -0
- truthound/profiler/base.py +579 -0
- truthound/profiler/cache_patterns.py +911 -0
- truthound/profiler/caching.py +1303 -0
- truthound/profiler/column_profiler.py +712 -0
- truthound/profiler/comparison.py +1007 -0
- truthound/profiler/custom_patterns.py +1170 -0
- truthound/profiler/dashboard/__init__.py +50 -0
- truthound/profiler/dashboard/app.py +476 -0
- truthound/profiler/dashboard/components.py +457 -0
- truthound/profiler/dashboard/config.py +72 -0
- truthound/profiler/distributed/__init__.py +83 -0
- truthound/profiler/distributed/base.py +281 -0
- truthound/profiler/distributed/dask_backend.py +498 -0
- truthound/profiler/distributed/local_backend.py +293 -0
- truthound/profiler/distributed/profiler.py +304 -0
- truthound/profiler/distributed/ray_backend.py +374 -0
- truthound/profiler/distributed/spark_backend.py +375 -0
- truthound/profiler/distributed.py +1366 -0
- truthound/profiler/enterprise_sampling.py +1065 -0
- truthound/profiler/errors.py +488 -0
- truthound/profiler/evolution/__init__.py +91 -0
- truthound/profiler/evolution/alerts.py +426 -0
- truthound/profiler/evolution/changes.py +206 -0
- truthound/profiler/evolution/compatibility.py +365 -0
- truthound/profiler/evolution/detector.py +372 -0
- truthound/profiler/evolution/protocols.py +121 -0
- truthound/profiler/generators/__init__.py +48 -0
- truthound/profiler/generators/base.py +384 -0
- truthound/profiler/generators/ml_rules.py +375 -0
- truthound/profiler/generators/pattern_rules.py +384 -0
- truthound/profiler/generators/schema_rules.py +267 -0
- truthound/profiler/generators/stats_rules.py +324 -0
- truthound/profiler/generators/suite_generator.py +857 -0
- truthound/profiler/i18n.py +1542 -0
- truthound/profiler/incremental.py +554 -0
- truthound/profiler/incremental_validation.py +1710 -0
- truthound/profiler/integration/__init__.py +73 -0
- truthound/profiler/integration/adapters.py +345 -0
- truthound/profiler/integration/context.py +371 -0
- truthound/profiler/integration/executor.py +527 -0
- truthound/profiler/integration/naming.py +75 -0
- truthound/profiler/integration/protocols.py +243 -0
- truthound/profiler/memory.py +1185 -0
- truthound/profiler/migration/__init__.py +60 -0
- truthound/profiler/migration/base.py +345 -0
- truthound/profiler/migration/manager.py +444 -0
- truthound/profiler/migration/v1_0_to_v1_1.py +484 -0
- truthound/profiler/ml/__init__.py +73 -0
- truthound/profiler/ml/base.py +244 -0
- truthound/profiler/ml/classifier.py +507 -0
- truthound/profiler/ml/feature_extraction.py +604 -0
- truthound/profiler/ml/pretrained.py +448 -0
- truthound/profiler/ml_inference.py +1276 -0
- truthound/profiler/native_patterns.py +815 -0
- truthound/profiler/observability.py +1184 -0
- truthound/profiler/process_timeout.py +1566 -0
- truthound/profiler/progress.py +568 -0
- truthound/profiler/progress_callbacks.py +1734 -0
- truthound/profiler/quality.py +1345 -0
- truthound/profiler/resilience.py +1180 -0
- truthound/profiler/sampled_matcher.py +794 -0
- truthound/profiler/sampling.py +1288 -0
- truthound/profiler/scheduling/__init__.py +82 -0
- truthound/profiler/scheduling/protocols.py +214 -0
- truthound/profiler/scheduling/scheduler.py +474 -0
- truthound/profiler/scheduling/storage.py +457 -0
- truthound/profiler/scheduling/triggers.py +449 -0
- truthound/profiler/schema.py +603 -0
- truthound/profiler/streaming.py +685 -0
- truthound/profiler/streaming_patterns.py +1354 -0
- truthound/profiler/suite_cli.py +625 -0
- truthound/profiler/suite_config.py +789 -0
- truthound/profiler/suite_export.py +1268 -0
- truthound/profiler/table_profiler.py +547 -0
- truthound/profiler/timeout.py +565 -0
- truthound/profiler/validation.py +1532 -0
- truthound/profiler/visualization/__init__.py +118 -0
- truthound/profiler/visualization/base.py +346 -0
- truthound/profiler/visualization/generator.py +1259 -0
- truthound/profiler/visualization/plotly_renderer.py +811 -0
- truthound/profiler/visualization/renderers.py +669 -0
- truthound/profiler/visualization/sections.py +540 -0
- truthound/profiler/visualization.py +2122 -0
- truthound/profiler/yaml_validation.py +1151 -0
- truthound/py.typed +0 -0
- truthound/ratelimit/__init__.py +248 -0
- truthound/ratelimit/algorithms.py +1108 -0
- truthound/ratelimit/core.py +573 -0
- truthound/ratelimit/integration.py +532 -0
- truthound/ratelimit/limiter.py +663 -0
- truthound/ratelimit/middleware.py +700 -0
- truthound/ratelimit/policy.py +792 -0
- truthound/ratelimit/storage.py +763 -0
- truthound/rbac/__init__.py +340 -0
- truthound/rbac/core.py +976 -0
- truthound/rbac/integration.py +760 -0
- truthound/rbac/manager.py +1052 -0
- truthound/rbac/middleware.py +842 -0
- truthound/rbac/policy.py +954 -0
- truthound/rbac/storage.py +878 -0
- truthound/realtime/__init__.py +141 -0
- truthound/realtime/adapters/__init__.py +43 -0
- truthound/realtime/adapters/base.py +533 -0
- truthound/realtime/adapters/kafka.py +487 -0
- truthound/realtime/adapters/kinesis.py +479 -0
- truthound/realtime/adapters/mock.py +243 -0
- truthound/realtime/base.py +553 -0
- truthound/realtime/factory.py +382 -0
- truthound/realtime/incremental.py +660 -0
- truthound/realtime/processing/__init__.py +67 -0
- truthound/realtime/processing/exactly_once.py +575 -0
- truthound/realtime/processing/state.py +547 -0
- truthound/realtime/processing/windows.py +647 -0
- truthound/realtime/protocols.py +569 -0
- truthound/realtime/streaming.py +605 -0
- truthound/realtime/testing/__init__.py +32 -0
- truthound/realtime/testing/containers.py +615 -0
- truthound/realtime/testing/fixtures.py +484 -0
- truthound/report.py +280 -0
- truthound/reporters/__init__.py +46 -0
- truthound/reporters/_protocols.py +30 -0
- truthound/reporters/base.py +324 -0
- truthound/reporters/ci/__init__.py +66 -0
- truthound/reporters/ci/azure.py +436 -0
- truthound/reporters/ci/base.py +509 -0
- truthound/reporters/ci/bitbucket.py +567 -0
- truthound/reporters/ci/circleci.py +547 -0
- truthound/reporters/ci/detection.py +364 -0
- truthound/reporters/ci/factory.py +182 -0
- truthound/reporters/ci/github.py +388 -0
- truthound/reporters/ci/gitlab.py +471 -0
- truthound/reporters/ci/jenkins.py +525 -0
- truthound/reporters/console_reporter.py +299 -0
- truthound/reporters/factory.py +211 -0
- truthound/reporters/html_reporter.py +524 -0
- truthound/reporters/json_reporter.py +256 -0
- truthound/reporters/markdown_reporter.py +280 -0
- truthound/reporters/sdk/__init__.py +174 -0
- truthound/reporters/sdk/builder.py +558 -0
- truthound/reporters/sdk/mixins.py +1150 -0
- truthound/reporters/sdk/schema.py +1493 -0
- truthound/reporters/sdk/templates.py +666 -0
- truthound/reporters/sdk/testing.py +968 -0
- truthound/scanners.py +170 -0
- truthound/scheduling/__init__.py +122 -0
- truthound/scheduling/cron.py +1136 -0
- truthound/scheduling/presets.py +212 -0
- truthound/schema.py +275 -0
- truthound/secrets/__init__.py +173 -0
- truthound/secrets/base.py +618 -0
- truthound/secrets/cloud.py +682 -0
- truthound/secrets/integration.py +507 -0
- truthound/secrets/manager.py +633 -0
- truthound/secrets/oidc/__init__.py +172 -0
- truthound/secrets/oidc/base.py +902 -0
- truthound/secrets/oidc/credential_provider.py +623 -0
- truthound/secrets/oidc/exchangers.py +1001 -0
- truthound/secrets/oidc/github/__init__.py +110 -0
- truthound/secrets/oidc/github/claims.py +718 -0
- truthound/secrets/oidc/github/enhanced_provider.py +693 -0
- truthound/secrets/oidc/github/trust_policy.py +742 -0
- truthound/secrets/oidc/github/verification.py +723 -0
- truthound/secrets/oidc/github/workflow.py +691 -0
- truthound/secrets/oidc/providers.py +825 -0
- truthound/secrets/providers.py +506 -0
- truthound/secrets/resolver.py +495 -0
- truthound/stores/__init__.py +177 -0
- truthound/stores/backends/__init__.py +18 -0
- truthound/stores/backends/_protocols.py +340 -0
- truthound/stores/backends/azure_blob.py +530 -0
- truthound/stores/backends/concurrent_filesystem.py +915 -0
- truthound/stores/backends/connection_pool.py +1365 -0
- truthound/stores/backends/database.py +743 -0
- truthound/stores/backends/filesystem.py +538 -0
- truthound/stores/backends/gcs.py +399 -0
- truthound/stores/backends/memory.py +354 -0
- truthound/stores/backends/s3.py +434 -0
- truthound/stores/backpressure/__init__.py +84 -0
- truthound/stores/backpressure/base.py +375 -0
- truthound/stores/backpressure/circuit_breaker.py +434 -0
- truthound/stores/backpressure/monitor.py +376 -0
- truthound/stores/backpressure/strategies.py +677 -0
- truthound/stores/base.py +551 -0
- truthound/stores/batching/__init__.py +65 -0
- truthound/stores/batching/base.py +305 -0
- truthound/stores/batching/buffer.py +370 -0
- truthound/stores/batching/store.py +248 -0
- truthound/stores/batching/writer.py +521 -0
- truthound/stores/caching/__init__.py +60 -0
- truthound/stores/caching/backends.py +684 -0
- truthound/stores/caching/base.py +356 -0
- truthound/stores/caching/store.py +305 -0
- truthound/stores/compression/__init__.py +193 -0
- truthound/stores/compression/adaptive.py +694 -0
- truthound/stores/compression/base.py +514 -0
- truthound/stores/compression/pipeline.py +868 -0
- truthound/stores/compression/providers.py +672 -0
- truthound/stores/compression/streaming.py +832 -0
- truthound/stores/concurrency/__init__.py +81 -0
- truthound/stores/concurrency/atomic.py +556 -0
- truthound/stores/concurrency/index.py +775 -0
- truthound/stores/concurrency/locks.py +576 -0
- truthound/stores/concurrency/manager.py +482 -0
- truthound/stores/encryption/__init__.py +297 -0
- truthound/stores/encryption/base.py +952 -0
- truthound/stores/encryption/keys.py +1191 -0
- truthound/stores/encryption/pipeline.py +903 -0
- truthound/stores/encryption/providers.py +953 -0
- truthound/stores/encryption/streaming.py +950 -0
- truthound/stores/expectations.py +227 -0
- truthound/stores/factory.py +246 -0
- truthound/stores/migration/__init__.py +75 -0
- truthound/stores/migration/base.py +480 -0
- truthound/stores/migration/manager.py +347 -0
- truthound/stores/migration/registry.py +382 -0
- truthound/stores/migration/store.py +559 -0
- truthound/stores/observability/__init__.py +106 -0
- truthound/stores/observability/audit.py +718 -0
- truthound/stores/observability/config.py +270 -0
- truthound/stores/observability/factory.py +208 -0
- truthound/stores/observability/metrics.py +636 -0
- truthound/stores/observability/protocols.py +410 -0
- truthound/stores/observability/store.py +570 -0
- truthound/stores/observability/tracing.py +784 -0
- truthound/stores/replication/__init__.py +76 -0
- truthound/stores/replication/base.py +260 -0
- truthound/stores/replication/monitor.py +269 -0
- truthound/stores/replication/store.py +439 -0
- truthound/stores/replication/syncer.py +391 -0
- truthound/stores/results.py +359 -0
- truthound/stores/retention/__init__.py +77 -0
- truthound/stores/retention/base.py +378 -0
- truthound/stores/retention/policies.py +621 -0
- truthound/stores/retention/scheduler.py +279 -0
- truthound/stores/retention/store.py +526 -0
- truthound/stores/streaming/__init__.py +138 -0
- truthound/stores/streaming/base.py +801 -0
- truthound/stores/streaming/database.py +984 -0
- truthound/stores/streaming/filesystem.py +719 -0
- truthound/stores/streaming/reader.py +629 -0
- truthound/stores/streaming/s3.py +843 -0
- truthound/stores/streaming/writer.py +790 -0
- truthound/stores/tiering/__init__.py +108 -0
- truthound/stores/tiering/base.py +462 -0
- truthound/stores/tiering/manager.py +249 -0
- truthound/stores/tiering/policies.py +692 -0
- truthound/stores/tiering/store.py +526 -0
- truthound/stores/versioning/__init__.py +56 -0
- truthound/stores/versioning/base.py +376 -0
- truthound/stores/versioning/store.py +660 -0
- truthound/stores/versioning/strategies.py +353 -0
- truthound/types.py +56 -0
- truthound/validators/__init__.py +774 -0
- truthound/validators/aggregate/__init__.py +27 -0
- truthound/validators/aggregate/central.py +116 -0
- truthound/validators/aggregate/extremes.py +116 -0
- truthound/validators/aggregate/spread.py +118 -0
- truthound/validators/aggregate/sum.py +64 -0
- truthound/validators/aggregate/type.py +78 -0
- truthound/validators/anomaly/__init__.py +93 -0
- truthound/validators/anomaly/base.py +431 -0
- truthound/validators/anomaly/ml_based.py +1190 -0
- truthound/validators/anomaly/multivariate.py +647 -0
- truthound/validators/anomaly/statistical.py +599 -0
- truthound/validators/base.py +1089 -0
- truthound/validators/business_rule/__init__.py +46 -0
- truthound/validators/business_rule/base.py +147 -0
- truthound/validators/business_rule/checksum.py +509 -0
- truthound/validators/business_rule/financial.py +526 -0
- truthound/validators/cache.py +733 -0
- truthound/validators/completeness/__init__.py +39 -0
- truthound/validators/completeness/conditional.py +73 -0
- truthound/validators/completeness/default.py +98 -0
- truthound/validators/completeness/empty.py +103 -0
- truthound/validators/completeness/nan.py +337 -0
- truthound/validators/completeness/null.py +152 -0
- truthound/validators/cross_table/__init__.py +17 -0
- truthound/validators/cross_table/aggregate.py +333 -0
- truthound/validators/cross_table/row_count.py +122 -0
- truthound/validators/datetime/__init__.py +29 -0
- truthound/validators/datetime/format.py +78 -0
- truthound/validators/datetime/freshness.py +269 -0
- truthound/validators/datetime/order.py +73 -0
- truthound/validators/datetime/parseable.py +185 -0
- truthound/validators/datetime/range.py +202 -0
- truthound/validators/datetime/timezone.py +69 -0
- truthound/validators/distribution/__init__.py +49 -0
- truthound/validators/distribution/distribution.py +128 -0
- truthound/validators/distribution/monotonic.py +119 -0
- truthound/validators/distribution/outlier.py +178 -0
- truthound/validators/distribution/quantile.py +80 -0
- truthound/validators/distribution/range.py +254 -0
- truthound/validators/distribution/set.py +125 -0
- truthound/validators/distribution/statistical.py +459 -0
- truthound/validators/drift/__init__.py +79 -0
- truthound/validators/drift/base.py +427 -0
- truthound/validators/drift/multi_feature.py +401 -0
- truthound/validators/drift/numeric.py +395 -0
- truthound/validators/drift/psi.py +446 -0
- truthound/validators/drift/statistical.py +510 -0
- truthound/validators/enterprise.py +1658 -0
- truthound/validators/geospatial/__init__.py +80 -0
- truthound/validators/geospatial/base.py +97 -0
- truthound/validators/geospatial/boundary.py +238 -0
- truthound/validators/geospatial/coordinate.py +351 -0
- truthound/validators/geospatial/distance.py +399 -0
- truthound/validators/geospatial/polygon.py +665 -0
- truthound/validators/i18n/__init__.py +308 -0
- truthound/validators/i18n/bidi.py +571 -0
- truthound/validators/i18n/catalogs.py +570 -0
- truthound/validators/i18n/dialects.py +763 -0
- truthound/validators/i18n/extended_catalogs.py +549 -0
- truthound/validators/i18n/formatting.py +1434 -0
- truthound/validators/i18n/loader.py +1020 -0
- truthound/validators/i18n/messages.py +521 -0
- truthound/validators/i18n/plural.py +683 -0
- truthound/validators/i18n/protocols.py +855 -0
- truthound/validators/i18n/tms.py +1162 -0
- truthound/validators/localization/__init__.py +53 -0
- truthound/validators/localization/base.py +122 -0
- truthound/validators/localization/chinese.py +362 -0
- truthound/validators/localization/japanese.py +275 -0
- truthound/validators/localization/korean.py +524 -0
- truthound/validators/memory/__init__.py +94 -0
- truthound/validators/memory/approximate_knn.py +506 -0
- truthound/validators/memory/base.py +547 -0
- truthound/validators/memory/sgd_online.py +719 -0
- truthound/validators/memory/streaming_ecdf.py +753 -0
- truthound/validators/ml_feature/__init__.py +54 -0
- truthound/validators/ml_feature/base.py +249 -0
- truthound/validators/ml_feature/correlation.py +299 -0
- truthound/validators/ml_feature/leakage.py +344 -0
- truthound/validators/ml_feature/null_impact.py +270 -0
- truthound/validators/ml_feature/scale.py +264 -0
- truthound/validators/multi_column/__init__.py +89 -0
- truthound/validators/multi_column/arithmetic.py +284 -0
- truthound/validators/multi_column/base.py +231 -0
- truthound/validators/multi_column/comparison.py +273 -0
- truthound/validators/multi_column/consistency.py +312 -0
- truthound/validators/multi_column/statistical.py +299 -0
- truthound/validators/optimization/__init__.py +164 -0
- truthound/validators/optimization/aggregation.py +563 -0
- truthound/validators/optimization/covariance.py +556 -0
- truthound/validators/optimization/geo.py +626 -0
- truthound/validators/optimization/graph.py +587 -0
- truthound/validators/optimization/orchestrator.py +970 -0
- truthound/validators/optimization/profiling.py +1312 -0
- truthound/validators/privacy/__init__.py +223 -0
- truthound/validators/privacy/base.py +635 -0
- truthound/validators/privacy/ccpa.py +670 -0
- truthound/validators/privacy/gdpr.py +728 -0
- truthound/validators/privacy/global_patterns.py +604 -0
- truthound/validators/privacy/plugins.py +867 -0
- truthound/validators/profiling/__init__.py +52 -0
- truthound/validators/profiling/base.py +175 -0
- truthound/validators/profiling/cardinality.py +312 -0
- truthound/validators/profiling/entropy.py +391 -0
- truthound/validators/profiling/frequency.py +455 -0
- truthound/validators/pushdown_support.py +660 -0
- truthound/validators/query/__init__.py +91 -0
- truthound/validators/query/aggregate.py +346 -0
- truthound/validators/query/base.py +246 -0
- truthound/validators/query/column.py +249 -0
- truthound/validators/query/expression.py +274 -0
- truthound/validators/query/result.py +323 -0
- truthound/validators/query/row_count.py +264 -0
- truthound/validators/referential/__init__.py +80 -0
- truthound/validators/referential/base.py +395 -0
- truthound/validators/referential/cascade.py +391 -0
- truthound/validators/referential/circular.py +563 -0
- truthound/validators/referential/foreign_key.py +624 -0
- truthound/validators/referential/orphan.py +485 -0
- truthound/validators/registry.py +112 -0
- truthound/validators/schema/__init__.py +41 -0
- truthound/validators/schema/column_count.py +142 -0
- truthound/validators/schema/column_exists.py +80 -0
- truthound/validators/schema/column_order.py +82 -0
- truthound/validators/schema/column_pair.py +85 -0
- truthound/validators/schema/column_pair_set.py +195 -0
- truthound/validators/schema/column_type.py +94 -0
- truthound/validators/schema/multi_column.py +53 -0
- truthound/validators/schema/multi_column_aggregate.py +175 -0
- truthound/validators/schema/referential.py +274 -0
- truthound/validators/schema/table_schema.py +91 -0
- truthound/validators/schema_validator.py +219 -0
- truthound/validators/sdk/__init__.py +250 -0
- truthound/validators/sdk/builder.py +680 -0
- truthound/validators/sdk/decorators.py +474 -0
- truthound/validators/sdk/enterprise/__init__.py +211 -0
- truthound/validators/sdk/enterprise/docs.py +725 -0
- truthound/validators/sdk/enterprise/fuzzing.py +659 -0
- truthound/validators/sdk/enterprise/licensing.py +709 -0
- truthound/validators/sdk/enterprise/manager.py +543 -0
- truthound/validators/sdk/enterprise/resources.py +628 -0
- truthound/validators/sdk/enterprise/sandbox.py +766 -0
- truthound/validators/sdk/enterprise/signing.py +603 -0
- truthound/validators/sdk/enterprise/templates.py +865 -0
- truthound/validators/sdk/enterprise/versioning.py +659 -0
- truthound/validators/sdk/templates.py +757 -0
- truthound/validators/sdk/testing.py +807 -0
- truthound/validators/security/__init__.py +181 -0
- truthound/validators/security/redos/__init__.py +182 -0
- truthound/validators/security/redos/core.py +861 -0
- truthound/validators/security/redos/cpu_monitor.py +593 -0
- truthound/validators/security/redos/cve_database.py +791 -0
- truthound/validators/security/redos/ml/__init__.py +155 -0
- truthound/validators/security/redos/ml/base.py +785 -0
- truthound/validators/security/redos/ml/datasets.py +618 -0
- truthound/validators/security/redos/ml/features.py +359 -0
- truthound/validators/security/redos/ml/models.py +1000 -0
- truthound/validators/security/redos/ml/predictor.py +507 -0
- truthound/validators/security/redos/ml/storage.py +632 -0
- truthound/validators/security/redos/ml/training.py +571 -0
- truthound/validators/security/redos/ml_analyzer.py +937 -0
- truthound/validators/security/redos/optimizer.py +674 -0
- truthound/validators/security/redos/profiler.py +682 -0
- truthound/validators/security/redos/re2_engine.py +709 -0
- truthound/validators/security/redos.py +886 -0
- truthound/validators/security/sql_security.py +1247 -0
- truthound/validators/streaming/__init__.py +126 -0
- truthound/validators/streaming/base.py +292 -0
- truthound/validators/streaming/completeness.py +210 -0
- truthound/validators/streaming/mixin.py +575 -0
- truthound/validators/streaming/range.py +308 -0
- truthound/validators/streaming/sources.py +846 -0
- truthound/validators/string/__init__.py +57 -0
- truthound/validators/string/casing.py +158 -0
- truthound/validators/string/charset.py +96 -0
- truthound/validators/string/format.py +501 -0
- truthound/validators/string/json.py +77 -0
- truthound/validators/string/json_schema.py +184 -0
- truthound/validators/string/length.py +104 -0
- truthound/validators/string/like_pattern.py +237 -0
- truthound/validators/string/regex.py +202 -0
- truthound/validators/string/regex_extended.py +435 -0
- truthound/validators/table/__init__.py +88 -0
- truthound/validators/table/base.py +78 -0
- truthound/validators/table/column_count.py +198 -0
- truthound/validators/table/freshness.py +362 -0
- truthound/validators/table/row_count.py +251 -0
- truthound/validators/table/schema.py +333 -0
- truthound/validators/table/size.py +285 -0
- truthound/validators/timeout/__init__.py +102 -0
- truthound/validators/timeout/advanced/__init__.py +247 -0
- truthound/validators/timeout/advanced/circuit_breaker.py +675 -0
- truthound/validators/timeout/advanced/prediction.py +773 -0
- truthound/validators/timeout/advanced/priority.py +618 -0
- truthound/validators/timeout/advanced/redis_backend.py +770 -0
- truthound/validators/timeout/advanced/retry.py +721 -0
- truthound/validators/timeout/advanced/sampling.py +788 -0
- truthound/validators/timeout/advanced/sla.py +661 -0
- truthound/validators/timeout/advanced/telemetry.py +804 -0
- truthound/validators/timeout/cascade.py +477 -0
- truthound/validators/timeout/deadline.py +657 -0
- truthound/validators/timeout/degradation.py +525 -0
- truthound/validators/timeout/distributed.py +597 -0
- truthound/validators/timeseries/__init__.py +89 -0
- truthound/validators/timeseries/base.py +326 -0
- truthound/validators/timeseries/completeness.py +617 -0
- truthound/validators/timeseries/gap.py +485 -0
- truthound/validators/timeseries/monotonic.py +310 -0
- truthound/validators/timeseries/seasonality.py +422 -0
- truthound/validators/timeseries/trend.py +510 -0
- truthound/validators/uniqueness/__init__.py +59 -0
- truthound/validators/uniqueness/approximate.py +475 -0
- truthound/validators/uniqueness/distinct_values.py +253 -0
- truthound/validators/uniqueness/duplicate.py +118 -0
- truthound/validators/uniqueness/primary_key.py +140 -0
- truthound/validators/uniqueness/unique.py +191 -0
- truthound/validators/uniqueness/within_record.py +599 -0
- truthound/validators/utils.py +756 -0
- truthound-1.0.8.dist-info/METADATA +474 -0
- truthound-1.0.8.dist-info/RECORD +877 -0
- truthound-1.0.8.dist-info/WHEEL +4 -0
- truthound-1.0.8.dist-info/entry_points.txt +2 -0
- truthound-1.0.8.dist-info/licenses/LICENSE +190 -0
|
@@ -0,0 +1,915 @@
|
|
|
1
|
+
"""Concurrent filesystem store with full concurrency control.
|
|
2
|
+
|
|
3
|
+
This module provides a thread-safe and process-safe filesystem store
|
|
4
|
+
implementation that builds on the concurrency primitives in the
|
|
5
|
+
concurrency submodule.
|
|
6
|
+
|
|
7
|
+
Features:
|
|
8
|
+
- Thread-safe operations via locking
|
|
9
|
+
- Process-safe operations via file locks
|
|
10
|
+
- Atomic writes with write-to-temp-then-rename
|
|
11
|
+
- Consistent index reads via snapshots
|
|
12
|
+
- Transaction support for batch operations
|
|
13
|
+
- Automatic recovery from failures
|
|
14
|
+
|
|
15
|
+
Example:
|
|
16
|
+
>>> from truthound.stores.backends.concurrent_filesystem import (
|
|
17
|
+
... ConcurrentFileSystemStore,
|
|
18
|
+
... ConcurrencyConfig,
|
|
19
|
+
... )
|
|
20
|
+
>>>
|
|
21
|
+
>>> # Create store with concurrency enabled
|
|
22
|
+
>>> store = ConcurrentFileSystemStore(
|
|
23
|
+
... base_path=".truthound/results",
|
|
24
|
+
... concurrency=ConcurrencyConfig(
|
|
25
|
+
... lock_strategy="auto",
|
|
26
|
+
... enable_wal=True,
|
|
27
|
+
... ),
|
|
28
|
+
... )
|
|
29
|
+
>>>
|
|
30
|
+
>>> # Use with transaction for batch operations
|
|
31
|
+
>>> with store.batch() as batch:
|
|
32
|
+
... batch.save(result1)
|
|
33
|
+
... batch.save(result2)
|
|
34
|
+
... batch.commit()
|
|
35
|
+
"""
|
|
36
|
+
|
|
37
|
+
from __future__ import annotations
|
|
38
|
+
|
|
39
|
+
import gzip
|
|
40
|
+
import json
|
|
41
|
+
import os
|
|
42
|
+
from dataclasses import dataclass, field
|
|
43
|
+
from datetime import datetime
|
|
44
|
+
from enum import Enum
|
|
45
|
+
from pathlib import Path
|
|
46
|
+
from typing import Any, Callable, Iterator, TypeVar
|
|
47
|
+
|
|
48
|
+
from truthound.stores.base import (
|
|
49
|
+
StoreConfig,
|
|
50
|
+
StoreNotFoundError,
|
|
51
|
+
StoreQuery,
|
|
52
|
+
StoreReadError,
|
|
53
|
+
StoreWriteError,
|
|
54
|
+
ValidationStore,
|
|
55
|
+
ExpectationStore,
|
|
56
|
+
)
|
|
57
|
+
from truthound.stores.results import ValidationResult
|
|
58
|
+
from truthound.stores.expectations import ExpectationSuite
|
|
59
|
+
from truthound.stores.concurrency.locks import (
|
|
60
|
+
LockMode,
|
|
61
|
+
LockStrategy,
|
|
62
|
+
get_default_lock_strategy,
|
|
63
|
+
NoOpLockStrategy,
|
|
64
|
+
FcntlLockStrategy,
|
|
65
|
+
FileLockStrategy,
|
|
66
|
+
PortalockerStrategy,
|
|
67
|
+
)
|
|
68
|
+
from truthound.stores.concurrency.manager import FileLockManager, LockStatistics
|
|
69
|
+
from truthound.stores.concurrency.atomic import (
|
|
70
|
+
AtomicFileWriter,
|
|
71
|
+
AtomicFileReader,
|
|
72
|
+
atomic_write,
|
|
73
|
+
atomic_read,
|
|
74
|
+
)
|
|
75
|
+
from truthound.stores.concurrency.index import ConcurrentIndex, IndexTransaction
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
T = TypeVar("T")
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
class LockStrategyType(str, Enum):
|
|
82
|
+
"""Available lock strategy types."""
|
|
83
|
+
|
|
84
|
+
AUTO = "auto" # Auto-detect best strategy
|
|
85
|
+
FCNTL = "fcntl" # POSIX fcntl (Unix only)
|
|
86
|
+
FILELOCK = "filelock" # filelock library
|
|
87
|
+
PORTALOCKER = "portalocker" # portalocker library
|
|
88
|
+
NONE = "none" # No locking (single-threaded)
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
@dataclass
|
|
92
|
+
class ConcurrencyConfig:
|
|
93
|
+
"""Configuration for concurrency control.
|
|
94
|
+
|
|
95
|
+
Attributes:
|
|
96
|
+
lock_strategy: Which lock strategy to use.
|
|
97
|
+
enable_wal: Whether to use write-ahead logging.
|
|
98
|
+
lock_timeout: Default timeout for lock acquisition.
|
|
99
|
+
enable_deadlock_detection: Whether to detect deadlocks.
|
|
100
|
+
enable_statistics: Whether to collect lock statistics.
|
|
101
|
+
create_backup: Whether to backup files before overwrite.
|
|
102
|
+
"""
|
|
103
|
+
|
|
104
|
+
lock_strategy: LockStrategyType | str = LockStrategyType.AUTO
|
|
105
|
+
enable_wal: bool = True
|
|
106
|
+
lock_timeout: float = 30.0
|
|
107
|
+
enable_deadlock_detection: bool = True
|
|
108
|
+
enable_statistics: bool = True
|
|
109
|
+
create_backup: bool = False
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
@dataclass
|
|
113
|
+
class ConcurrentFileSystemConfig(StoreConfig):
|
|
114
|
+
"""Configuration for concurrent filesystem store.
|
|
115
|
+
|
|
116
|
+
Extends StoreConfig with concurrency-specific options.
|
|
117
|
+
|
|
118
|
+
Attributes:
|
|
119
|
+
base_path: Base directory for storing files.
|
|
120
|
+
file_extension: File extension to use.
|
|
121
|
+
create_dirs: Whether to create directories if missing.
|
|
122
|
+
pretty_print: Whether to format JSON with indentation.
|
|
123
|
+
use_compression: Whether to compress stored files.
|
|
124
|
+
concurrency: Concurrency control configuration.
|
|
125
|
+
"""
|
|
126
|
+
|
|
127
|
+
base_path: str = ".truthound/store"
|
|
128
|
+
file_extension: str = ".json"
|
|
129
|
+
create_dirs: bool = True
|
|
130
|
+
pretty_print: bool = True
|
|
131
|
+
use_compression: bool = False
|
|
132
|
+
concurrency: ConcurrencyConfig = field(default_factory=ConcurrencyConfig)
|
|
133
|
+
|
|
134
|
+
def get_full_path(self) -> Path:
|
|
135
|
+
"""Get the full storage path including namespace and prefix."""
|
|
136
|
+
path = Path(self.base_path)
|
|
137
|
+
if self.namespace:
|
|
138
|
+
path = path / self.namespace
|
|
139
|
+
if self.prefix:
|
|
140
|
+
path = path / self.prefix
|
|
141
|
+
return path
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
class BatchContext:
|
|
145
|
+
"""Context for batch operations on the store.
|
|
146
|
+
|
|
147
|
+
Provides transactional semantics for multiple store operations.
|
|
148
|
+
All operations are buffered and applied atomically on commit.
|
|
149
|
+
|
|
150
|
+
Example:
|
|
151
|
+
>>> with store.batch() as batch:
|
|
152
|
+
... batch.save(result1)
|
|
153
|
+
... batch.save(result2)
|
|
154
|
+
... batch.delete("old-result")
|
|
155
|
+
... batch.commit()
|
|
156
|
+
"""
|
|
157
|
+
|
|
158
|
+
def __init__(
|
|
159
|
+
self,
|
|
160
|
+
store: "ConcurrentFileSystemStore",
|
|
161
|
+
index_txn: IndexTransaction,
|
|
162
|
+
) -> None:
|
|
163
|
+
"""Initialize batch context.
|
|
164
|
+
|
|
165
|
+
Args:
|
|
166
|
+
store: Parent store.
|
|
167
|
+
index_txn: Index transaction for tracking changes.
|
|
168
|
+
"""
|
|
169
|
+
self._store = store
|
|
170
|
+
self._index_txn = index_txn
|
|
171
|
+
self._pending_saves: list[ValidationResult] = []
|
|
172
|
+
self._pending_deletes: list[str] = []
|
|
173
|
+
self._committed = False
|
|
174
|
+
|
|
175
|
+
def save(self, item: ValidationResult) -> str:
|
|
176
|
+
"""Add item to pending saves.
|
|
177
|
+
|
|
178
|
+
Args:
|
|
179
|
+
item: Item to save.
|
|
180
|
+
|
|
181
|
+
Returns:
|
|
182
|
+
The item's run ID.
|
|
183
|
+
"""
|
|
184
|
+
if self._committed:
|
|
185
|
+
raise RuntimeError("Batch already committed")
|
|
186
|
+
|
|
187
|
+
self._pending_saves.append(item)
|
|
188
|
+
return item.run_id
|
|
189
|
+
|
|
190
|
+
def delete(self, item_id: str) -> None:
|
|
191
|
+
"""Add item to pending deletes.
|
|
192
|
+
|
|
193
|
+
Args:
|
|
194
|
+
item_id: Item to delete.
|
|
195
|
+
"""
|
|
196
|
+
if self._committed:
|
|
197
|
+
raise RuntimeError("Batch already committed")
|
|
198
|
+
|
|
199
|
+
self._pending_deletes.append(item_id)
|
|
200
|
+
|
|
201
|
+
def commit(self) -> int:
|
|
202
|
+
"""Commit all pending operations.
|
|
203
|
+
|
|
204
|
+
Returns:
|
|
205
|
+
Number of operations performed.
|
|
206
|
+
|
|
207
|
+
Raises:
|
|
208
|
+
RuntimeError: If already committed.
|
|
209
|
+
StoreWriteError: If any operation fails.
|
|
210
|
+
"""
|
|
211
|
+
if self._committed:
|
|
212
|
+
raise RuntimeError("Batch already committed")
|
|
213
|
+
|
|
214
|
+
count = 0
|
|
215
|
+
errors = []
|
|
216
|
+
|
|
217
|
+
# Perform deletes first
|
|
218
|
+
for item_id in self._pending_deletes:
|
|
219
|
+
try:
|
|
220
|
+
self._store._do_delete(item_id, self._index_txn)
|
|
221
|
+
count += 1
|
|
222
|
+
except Exception as e:
|
|
223
|
+
errors.append(f"Delete {item_id}: {e}")
|
|
224
|
+
|
|
225
|
+
# Then saves
|
|
226
|
+
for item in self._pending_saves:
|
|
227
|
+
try:
|
|
228
|
+
self._store._do_save(item, self._index_txn)
|
|
229
|
+
count += 1
|
|
230
|
+
except Exception as e:
|
|
231
|
+
errors.append(f"Save {item.run_id}: {e}")
|
|
232
|
+
|
|
233
|
+
# Commit index transaction
|
|
234
|
+
self._index_txn.commit()
|
|
235
|
+
self._committed = True
|
|
236
|
+
|
|
237
|
+
if errors:
|
|
238
|
+
raise StoreWriteError(f"Batch errors: {'; '.join(errors)}")
|
|
239
|
+
|
|
240
|
+
return count
|
|
241
|
+
|
|
242
|
+
def rollback(self) -> None:
|
|
243
|
+
"""Rollback all pending operations."""
|
|
244
|
+
if self._committed:
|
|
245
|
+
raise RuntimeError("Cannot rollback committed batch")
|
|
246
|
+
|
|
247
|
+
self._pending_saves.clear()
|
|
248
|
+
self._pending_deletes.clear()
|
|
249
|
+
self._index_txn.rollback()
|
|
250
|
+
self._committed = True
|
|
251
|
+
|
|
252
|
+
@property
|
|
253
|
+
def pending_count(self) -> int:
|
|
254
|
+
"""Number of pending operations."""
|
|
255
|
+
return len(self._pending_saves) + len(self._pending_deletes)
|
|
256
|
+
|
|
257
|
+
|
|
258
|
+
class ConcurrentFileSystemStore(ValidationStore[ConcurrentFileSystemConfig]):
|
|
259
|
+
"""Thread-safe and process-safe filesystem store.
|
|
260
|
+
|
|
261
|
+
This store implementation provides full concurrency control for
|
|
262
|
+
multi-threaded and multi-process access to the filesystem.
|
|
263
|
+
|
|
264
|
+
Features:
|
|
265
|
+
- Pluggable lock strategies (fcntl, filelock, portalocker)
|
|
266
|
+
- Atomic file writes using temp-and-rename pattern
|
|
267
|
+
- Consistent index reads via MVCC-like snapshots
|
|
268
|
+
- Transaction support for batch operations
|
|
269
|
+
- Write-ahead logging for durability
|
|
270
|
+
- Automatic recovery from failures
|
|
271
|
+
- Lock statistics for debugging
|
|
272
|
+
|
|
273
|
+
Example:
|
|
274
|
+
>>> store = ConcurrentFileSystemStore(
|
|
275
|
+
... base_path=".truthound/results",
|
|
276
|
+
... concurrency=ConcurrencyConfig(lock_strategy="auto"),
|
|
277
|
+
... )
|
|
278
|
+
>>>
|
|
279
|
+
>>> # Simple operations
|
|
280
|
+
>>> run_id = store.save(result)
|
|
281
|
+
>>> retrieved = store.get(run_id)
|
|
282
|
+
>>>
|
|
283
|
+
>>> # Batch operations
|
|
284
|
+
>>> with store.batch() as batch:
|
|
285
|
+
... batch.save(result1)
|
|
286
|
+
... batch.save(result2)
|
|
287
|
+
... batch.commit()
|
|
288
|
+
"""
|
|
289
|
+
|
|
290
|
+
def __init__(
|
|
291
|
+
self,
|
|
292
|
+
base_path: str = ".truthound/store",
|
|
293
|
+
namespace: str = "default",
|
|
294
|
+
prefix: str = "validations",
|
|
295
|
+
compression: bool = False,
|
|
296
|
+
concurrency: ConcurrencyConfig | None = None,
|
|
297
|
+
**kwargs: Any,
|
|
298
|
+
) -> None:
|
|
299
|
+
"""Initialize the concurrent filesystem store.
|
|
300
|
+
|
|
301
|
+
Args:
|
|
302
|
+
base_path: Base directory for storing files.
|
|
303
|
+
namespace: Namespace for organizing data.
|
|
304
|
+
prefix: Additional path prefix.
|
|
305
|
+
compression: Whether to compress stored files.
|
|
306
|
+
concurrency: Concurrency control configuration.
|
|
307
|
+
**kwargs: Additional configuration options.
|
|
308
|
+
"""
|
|
309
|
+
concurrency = concurrency or ConcurrencyConfig()
|
|
310
|
+
|
|
311
|
+
config = ConcurrentFileSystemConfig(
|
|
312
|
+
base_path=base_path,
|
|
313
|
+
namespace=namespace,
|
|
314
|
+
prefix=prefix,
|
|
315
|
+
use_compression=compression,
|
|
316
|
+
concurrency=concurrency,
|
|
317
|
+
**{k: v for k, v in kwargs.items() if hasattr(ConcurrentFileSystemConfig, k)},
|
|
318
|
+
)
|
|
319
|
+
super().__init__(config)
|
|
320
|
+
|
|
321
|
+
self._lock_strategy: LockStrategy | None = None
|
|
322
|
+
self._lock_manager: FileLockManager | None = None
|
|
323
|
+
self._index: ConcurrentIndex | None = None
|
|
324
|
+
|
|
325
|
+
@classmethod
|
|
326
|
+
def _default_config(cls) -> ConcurrentFileSystemConfig:
|
|
327
|
+
"""Create default configuration."""
|
|
328
|
+
return ConcurrentFileSystemConfig()
|
|
329
|
+
|
|
330
|
+
def _create_lock_strategy(self) -> LockStrategy:
|
|
331
|
+
"""Create the appropriate lock strategy based on config."""
|
|
332
|
+
strategy_type = self._config.concurrency.lock_strategy
|
|
333
|
+
|
|
334
|
+
if isinstance(strategy_type, str):
|
|
335
|
+
strategy_type = LockStrategyType(strategy_type)
|
|
336
|
+
|
|
337
|
+
if strategy_type == LockStrategyType.AUTO:
|
|
338
|
+
return get_default_lock_strategy()
|
|
339
|
+
elif strategy_type == LockStrategyType.FCNTL:
|
|
340
|
+
return FcntlLockStrategy()
|
|
341
|
+
elif strategy_type == LockStrategyType.FILELOCK:
|
|
342
|
+
return FileLockStrategy()
|
|
343
|
+
elif strategy_type == LockStrategyType.PORTALOCKER:
|
|
344
|
+
return PortalockerStrategy()
|
|
345
|
+
elif strategy_type == LockStrategyType.NONE:
|
|
346
|
+
return NoOpLockStrategy()
|
|
347
|
+
else:
|
|
348
|
+
return get_default_lock_strategy()
|
|
349
|
+
|
|
350
|
+
def _do_initialize(self) -> None:
|
|
351
|
+
"""Initialize the store with concurrency primitives."""
|
|
352
|
+
path = self._config.get_full_path()
|
|
353
|
+
|
|
354
|
+
if self._config.create_dirs:
|
|
355
|
+
path.mkdir(parents=True, exist_ok=True)
|
|
356
|
+
|
|
357
|
+
# Create lock strategy and manager
|
|
358
|
+
self._lock_strategy = self._create_lock_strategy()
|
|
359
|
+
self._lock_manager = FileLockManager(
|
|
360
|
+
strategy=self._lock_strategy,
|
|
361
|
+
enable_deadlock_detection=self._config.concurrency.enable_deadlock_detection,
|
|
362
|
+
enable_statistics=self._config.concurrency.enable_statistics,
|
|
363
|
+
default_timeout=self._config.concurrency.lock_timeout,
|
|
364
|
+
)
|
|
365
|
+
|
|
366
|
+
# Create concurrent index
|
|
367
|
+
self._index = ConcurrentIndex(
|
|
368
|
+
base_path=path,
|
|
369
|
+
lock_manager=self._lock_manager,
|
|
370
|
+
wal_enabled=self._config.concurrency.enable_wal,
|
|
371
|
+
)
|
|
372
|
+
self._index.initialize()
|
|
373
|
+
|
|
374
|
+
def _get_file_path(self, item_id: str) -> Path:
|
|
375
|
+
"""Get the file path for an item."""
|
|
376
|
+
ext = self._config.file_extension
|
|
377
|
+
if self._config.use_compression:
|
|
378
|
+
ext += ".gz"
|
|
379
|
+
return self._config.get_full_path() / f"{item_id}{ext}"
|
|
380
|
+
|
|
381
|
+
def _serialize(self, data: dict[str, Any]) -> bytes:
|
|
382
|
+
"""Serialize data to bytes."""
|
|
383
|
+
indent = 2 if self._config.pretty_print else None
|
|
384
|
+
json_str = json.dumps(data, indent=indent, default=str)
|
|
385
|
+
content = json_str.encode("utf-8")
|
|
386
|
+
|
|
387
|
+
if self._config.use_compression:
|
|
388
|
+
content = gzip.compress(content)
|
|
389
|
+
|
|
390
|
+
return content
|
|
391
|
+
|
|
392
|
+
def _deserialize(self, content: bytes) -> dict[str, Any]:
|
|
393
|
+
"""Deserialize bytes to data."""
|
|
394
|
+
if self._config.use_compression:
|
|
395
|
+
content = gzip.decompress(content)
|
|
396
|
+
|
|
397
|
+
return json.loads(content.decode("utf-8"))
|
|
398
|
+
|
|
399
|
+
def _do_save(
|
|
400
|
+
self,
|
|
401
|
+
item: ValidationResult,
|
|
402
|
+
index_txn: IndexTransaction | None = None,
|
|
403
|
+
) -> str:
|
|
404
|
+
"""Internal save implementation.
|
|
405
|
+
|
|
406
|
+
Args:
|
|
407
|
+
item: Item to save.
|
|
408
|
+
index_txn: Optional index transaction to use.
|
|
409
|
+
|
|
410
|
+
Returns:
|
|
411
|
+
The item's run ID.
|
|
412
|
+
"""
|
|
413
|
+
item_id = item.run_id
|
|
414
|
+
file_path = self._get_file_path(item_id)
|
|
415
|
+
|
|
416
|
+
# Serialize content
|
|
417
|
+
content = self._serialize(item.to_dict())
|
|
418
|
+
|
|
419
|
+
# Write file atomically
|
|
420
|
+
result = atomic_write(
|
|
421
|
+
file_path,
|
|
422
|
+
content,
|
|
423
|
+
create_backup=self._config.concurrency.create_backup,
|
|
424
|
+
lock_manager=self._lock_manager,
|
|
425
|
+
)
|
|
426
|
+
|
|
427
|
+
if not result.success:
|
|
428
|
+
raise StoreWriteError(f"Failed to write {file_path}: {result.error}")
|
|
429
|
+
|
|
430
|
+
# Update index
|
|
431
|
+
metadata = {
|
|
432
|
+
"data_asset": item.data_asset,
|
|
433
|
+
"run_time": item.run_time.isoformat(),
|
|
434
|
+
"status": item.status.value,
|
|
435
|
+
"file": file_path.name,
|
|
436
|
+
"tags": item.tags,
|
|
437
|
+
}
|
|
438
|
+
|
|
439
|
+
if index_txn:
|
|
440
|
+
index_txn.upsert(item_id, metadata)
|
|
441
|
+
else:
|
|
442
|
+
self._index.upsert(item_id, metadata)
|
|
443
|
+
|
|
444
|
+
return item_id
|
|
445
|
+
|
|
446
|
+
def _do_delete(
|
|
447
|
+
self,
|
|
448
|
+
item_id: str,
|
|
449
|
+
index_txn: IndexTransaction | None = None,
|
|
450
|
+
) -> bool:
|
|
451
|
+
"""Internal delete implementation.
|
|
452
|
+
|
|
453
|
+
Args:
|
|
454
|
+
item_id: Item to delete.
|
|
455
|
+
index_txn: Optional index transaction to use.
|
|
456
|
+
|
|
457
|
+
Returns:
|
|
458
|
+
True if item existed.
|
|
459
|
+
"""
|
|
460
|
+
file_path = self._get_file_path(item_id)
|
|
461
|
+
|
|
462
|
+
# Check existence
|
|
463
|
+
exists = file_path.exists()
|
|
464
|
+
|
|
465
|
+
# Delete file with lock
|
|
466
|
+
if exists:
|
|
467
|
+
with self._lock_manager.write_lock(file_path):
|
|
468
|
+
try:
|
|
469
|
+
file_path.unlink(missing_ok=True)
|
|
470
|
+
except OSError as e:
|
|
471
|
+
raise StoreWriteError(f"Failed to delete {file_path}: {e}")
|
|
472
|
+
|
|
473
|
+
# Update index
|
|
474
|
+
if index_txn:
|
|
475
|
+
index_txn.remove(item_id)
|
|
476
|
+
else:
|
|
477
|
+
self._index.remove(item_id)
|
|
478
|
+
|
|
479
|
+
return exists
|
|
480
|
+
|
|
481
|
+
def save(self, item: ValidationResult) -> str:
|
|
482
|
+
"""Save a validation result to the filesystem.
|
|
483
|
+
|
|
484
|
+
Thread-safe and process-safe.
|
|
485
|
+
|
|
486
|
+
Args:
|
|
487
|
+
item: The validation result to save.
|
|
488
|
+
|
|
489
|
+
Returns:
|
|
490
|
+
The run ID of the saved result.
|
|
491
|
+
|
|
492
|
+
Raises:
|
|
493
|
+
StoreWriteError: If saving fails.
|
|
494
|
+
"""
|
|
495
|
+
self.initialize()
|
|
496
|
+
return self._do_save(item)
|
|
497
|
+
|
|
498
|
+
def get(self, item_id: str) -> ValidationResult:
|
|
499
|
+
"""Retrieve a validation result by run ID.
|
|
500
|
+
|
|
501
|
+
Thread-safe and process-safe.
|
|
502
|
+
|
|
503
|
+
Args:
|
|
504
|
+
item_id: The run ID of the result to retrieve.
|
|
505
|
+
|
|
506
|
+
Returns:
|
|
507
|
+
The validation result.
|
|
508
|
+
|
|
509
|
+
Raises:
|
|
510
|
+
StoreNotFoundError: If the result doesn't exist.
|
|
511
|
+
StoreReadError: If reading fails.
|
|
512
|
+
"""
|
|
513
|
+
self.initialize()
|
|
514
|
+
|
|
515
|
+
file_path = self._get_file_path(item_id)
|
|
516
|
+
|
|
517
|
+
if not file_path.exists():
|
|
518
|
+
raise StoreNotFoundError("ValidationResult", item_id)
|
|
519
|
+
|
|
520
|
+
try:
|
|
521
|
+
content = atomic_read(file_path, lock_manager=self._lock_manager)
|
|
522
|
+
data = self._deserialize(content)
|
|
523
|
+
return ValidationResult.from_dict(data)
|
|
524
|
+
|
|
525
|
+
except (json.JSONDecodeError, KeyError) as e:
|
|
526
|
+
raise StoreReadError(f"Failed to parse {file_path}: {e}")
|
|
527
|
+
except OSError as e:
|
|
528
|
+
raise StoreReadError(f"Failed to read {file_path}: {e}")
|
|
529
|
+
|
|
530
|
+
def exists(self, item_id: str) -> bool:
|
|
531
|
+
"""Check if a validation result exists.
|
|
532
|
+
|
|
533
|
+
Thread-safe using snapshot isolation.
|
|
534
|
+
|
|
535
|
+
Args:
|
|
536
|
+
item_id: The run ID to check.
|
|
537
|
+
|
|
538
|
+
Returns:
|
|
539
|
+
True if the result exists.
|
|
540
|
+
"""
|
|
541
|
+
self.initialize()
|
|
542
|
+
|
|
543
|
+
snapshot = self._index.snapshot()
|
|
544
|
+
return snapshot.contains(item_id) or self._get_file_path(item_id).exists()
|
|
545
|
+
|
|
546
|
+
def delete(self, item_id: str) -> bool:
|
|
547
|
+
"""Delete a validation result.
|
|
548
|
+
|
|
549
|
+
Thread-safe and process-safe.
|
|
550
|
+
|
|
551
|
+
Args:
|
|
552
|
+
item_id: The run ID of the result to delete.
|
|
553
|
+
|
|
554
|
+
Returns:
|
|
555
|
+
True if the result was deleted, False if it didn't exist.
|
|
556
|
+
|
|
557
|
+
Raises:
|
|
558
|
+
StoreWriteError: If deletion fails.
|
|
559
|
+
"""
|
|
560
|
+
self.initialize()
|
|
561
|
+
return self._do_delete(item_id)
|
|
562
|
+
|
|
563
|
+
def list_ids(self, query: StoreQuery | None = None) -> list[str]:
|
|
564
|
+
"""List validation result IDs matching the query.
|
|
565
|
+
|
|
566
|
+
Thread-safe using snapshot isolation.
|
|
567
|
+
|
|
568
|
+
Args:
|
|
569
|
+
query: Optional query to filter results.
|
|
570
|
+
|
|
571
|
+
Returns:
|
|
572
|
+
List of matching run IDs.
|
|
573
|
+
"""
|
|
574
|
+
self.initialize()
|
|
575
|
+
|
|
576
|
+
snapshot = self._index.snapshot()
|
|
577
|
+
|
|
578
|
+
if not query:
|
|
579
|
+
return snapshot.list_ids()
|
|
580
|
+
|
|
581
|
+
# Filter by query
|
|
582
|
+
matching_ids: list[tuple[str, datetime]] = []
|
|
583
|
+
|
|
584
|
+
for entry in snapshot.entries.values():
|
|
585
|
+
meta = entry.metadata
|
|
586
|
+
if query.matches(meta):
|
|
587
|
+
run_time_str = meta.get("run_time")
|
|
588
|
+
if run_time_str:
|
|
589
|
+
run_time = datetime.fromisoformat(run_time_str)
|
|
590
|
+
else:
|
|
591
|
+
run_time = entry.created_at
|
|
592
|
+
matching_ids.append((entry.item_id, run_time))
|
|
593
|
+
|
|
594
|
+
# Sort
|
|
595
|
+
reverse = not query.ascending
|
|
596
|
+
matching_ids.sort(key=lambda x: x[1], reverse=reverse)
|
|
597
|
+
|
|
598
|
+
# Apply offset and limit
|
|
599
|
+
ids = [item_id for item_id, _ in matching_ids]
|
|
600
|
+
|
|
601
|
+
if query.offset:
|
|
602
|
+
ids = ids[query.offset:]
|
|
603
|
+
if query.limit:
|
|
604
|
+
ids = ids[:query.limit]
|
|
605
|
+
|
|
606
|
+
return ids
|
|
607
|
+
|
|
608
|
+
def query(self, query: StoreQuery) -> list[ValidationResult]:
|
|
609
|
+
"""Query validation results.
|
|
610
|
+
|
|
611
|
+
Thread-safe using snapshot isolation.
|
|
612
|
+
|
|
613
|
+
Args:
|
|
614
|
+
query: Query parameters for filtering.
|
|
615
|
+
|
|
616
|
+
Returns:
|
|
617
|
+
List of matching validation results.
|
|
618
|
+
"""
|
|
619
|
+
ids = self.list_ids(query)
|
|
620
|
+
results: list[ValidationResult] = []
|
|
621
|
+
|
|
622
|
+
for item_id in ids:
|
|
623
|
+
try:
|
|
624
|
+
result = self.get(item_id)
|
|
625
|
+
results.append(result)
|
|
626
|
+
except (StoreNotFoundError, StoreReadError):
|
|
627
|
+
# Skip corrupted or deleted entries
|
|
628
|
+
continue
|
|
629
|
+
|
|
630
|
+
return results
|
|
631
|
+
|
|
632
|
+
def batch(self) -> BatchContext:
|
|
633
|
+
"""Start a batch operation context.
|
|
634
|
+
|
|
635
|
+
Returns:
|
|
636
|
+
BatchContext for accumulating operations.
|
|
637
|
+
|
|
638
|
+
Example:
|
|
639
|
+
>>> with store.batch() as batch:
|
|
640
|
+
... batch.save(result1)
|
|
641
|
+
... batch.save(result2)
|
|
642
|
+
... batch.commit()
|
|
643
|
+
"""
|
|
644
|
+
self.initialize()
|
|
645
|
+
|
|
646
|
+
# Create an index transaction (caller is responsible for commit/rollback)
|
|
647
|
+
txn = self._index.begin_transaction()
|
|
648
|
+
return BatchContext(self, txn)
|
|
649
|
+
|
|
650
|
+
def rebuild_index(self) -> int:
|
|
651
|
+
"""Rebuild the index from stored files.
|
|
652
|
+
|
|
653
|
+
Thread-safe with write lock during rebuild.
|
|
654
|
+
|
|
655
|
+
Returns:
|
|
656
|
+
Number of items indexed.
|
|
657
|
+
"""
|
|
658
|
+
self.initialize()
|
|
659
|
+
|
|
660
|
+
def extract_metadata(file_path: Path) -> tuple[str, dict[str, Any]] | None:
|
|
661
|
+
try:
|
|
662
|
+
with open(file_path, "rb") as f:
|
|
663
|
+
content = f.read()
|
|
664
|
+
|
|
665
|
+
if self._config.use_compression:
|
|
666
|
+
content = gzip.decompress(content)
|
|
667
|
+
|
|
668
|
+
data = json.loads(content.decode("utf-8"))
|
|
669
|
+
item_id = data.get("run_id")
|
|
670
|
+
|
|
671
|
+
if item_id:
|
|
672
|
+
return item_id, {
|
|
673
|
+
"data_asset": data.get("data_asset", "unknown"),
|
|
674
|
+
"run_time": data.get("run_time"),
|
|
675
|
+
"status": data.get("status"),
|
|
676
|
+
"file": file_path.name,
|
|
677
|
+
"tags": data.get("tags", {}),
|
|
678
|
+
}
|
|
679
|
+
except (json.JSONDecodeError, OSError, gzip.BadGzipFile):
|
|
680
|
+
pass
|
|
681
|
+
return None
|
|
682
|
+
|
|
683
|
+
pattern = f"*{self._config.file_extension}*"
|
|
684
|
+
return self._index.rebuild_from_files(pattern, extract_metadata)
|
|
685
|
+
|
|
686
|
+
@property
|
|
687
|
+
def lock_statistics(self) -> LockStatistics | None:
|
|
688
|
+
"""Get lock statistics for debugging.
|
|
689
|
+
|
|
690
|
+
Returns:
|
|
691
|
+
LockStatistics if statistics enabled, None otherwise.
|
|
692
|
+
"""
|
|
693
|
+
if self._lock_manager:
|
|
694
|
+
return self._lock_manager.statistics
|
|
695
|
+
return None
|
|
696
|
+
|
|
697
|
+
def close(self) -> None:
|
|
698
|
+
"""Close the store and release resources."""
|
|
699
|
+
if self._lock_manager:
|
|
700
|
+
self._lock_manager.release_all()
|
|
701
|
+
|
|
702
|
+
|
|
703
|
+
class ConcurrentFileSystemExpectationStore(ExpectationStore[ConcurrentFileSystemConfig]):
|
|
704
|
+
"""Thread-safe filesystem expectation store.
|
|
705
|
+
|
|
706
|
+
Similar to ConcurrentFileSystemStore but for expectation suites.
|
|
707
|
+
"""
|
|
708
|
+
|
|
709
|
+
def __init__(
|
|
710
|
+
self,
|
|
711
|
+
base_path: str = ".truthound/store",
|
|
712
|
+
namespace: str = "default",
|
|
713
|
+
prefix: str = "expectations",
|
|
714
|
+
concurrency: ConcurrencyConfig | None = None,
|
|
715
|
+
**kwargs: Any,
|
|
716
|
+
) -> None:
|
|
717
|
+
"""Initialize the concurrent expectation store.
|
|
718
|
+
|
|
719
|
+
Args:
|
|
720
|
+
base_path: Base directory for storing files.
|
|
721
|
+
namespace: Namespace for organizing data.
|
|
722
|
+
prefix: Additional path prefix.
|
|
723
|
+
concurrency: Concurrency control configuration.
|
|
724
|
+
**kwargs: Additional configuration options.
|
|
725
|
+
"""
|
|
726
|
+
concurrency = concurrency or ConcurrencyConfig()
|
|
727
|
+
|
|
728
|
+
config = ConcurrentFileSystemConfig(
|
|
729
|
+
base_path=base_path,
|
|
730
|
+
namespace=namespace,
|
|
731
|
+
prefix=prefix,
|
|
732
|
+
concurrency=concurrency,
|
|
733
|
+
**{k: v for k, v in kwargs.items() if hasattr(ConcurrentFileSystemConfig, k)},
|
|
734
|
+
)
|
|
735
|
+
super().__init__(config)
|
|
736
|
+
|
|
737
|
+
self._lock_manager: FileLockManager | None = None
|
|
738
|
+
self._index: ConcurrentIndex | None = None
|
|
739
|
+
|
|
740
|
+
@classmethod
|
|
741
|
+
def _default_config(cls) -> ConcurrentFileSystemConfig:
|
|
742
|
+
"""Create default configuration."""
|
|
743
|
+
return ConcurrentFileSystemConfig(prefix="expectations")
|
|
744
|
+
|
|
745
|
+
def _do_initialize(self) -> None:
|
|
746
|
+
"""Initialize the store with concurrency primitives."""
|
|
747
|
+
path = self._config.get_full_path()
|
|
748
|
+
|
|
749
|
+
if self._config.create_dirs:
|
|
750
|
+
path.mkdir(parents=True, exist_ok=True)
|
|
751
|
+
|
|
752
|
+
# Create lock manager
|
|
753
|
+
strategy = get_default_lock_strategy()
|
|
754
|
+
self._lock_manager = FileLockManager(
|
|
755
|
+
strategy=strategy,
|
|
756
|
+
enable_deadlock_detection=self._config.concurrency.enable_deadlock_detection,
|
|
757
|
+
enable_statistics=self._config.concurrency.enable_statistics,
|
|
758
|
+
default_timeout=self._config.concurrency.lock_timeout,
|
|
759
|
+
)
|
|
760
|
+
|
|
761
|
+
# Create concurrent index
|
|
762
|
+
self._index = ConcurrentIndex(
|
|
763
|
+
base_path=path,
|
|
764
|
+
index_filename="_expectations_index.json",
|
|
765
|
+
lock_manager=self._lock_manager,
|
|
766
|
+
wal_enabled=self._config.concurrency.enable_wal,
|
|
767
|
+
)
|
|
768
|
+
self._index.initialize()
|
|
769
|
+
|
|
770
|
+
def _get_file_path(self, suite_name: str) -> Path:
|
|
771
|
+
"""Get the file path for a suite."""
|
|
772
|
+
safe_name = suite_name.replace("/", "_").replace("\\", "_")
|
|
773
|
+
return self._config.get_full_path() / f"{safe_name}{self._config.file_extension}"
|
|
774
|
+
|
|
775
|
+
def save(self, item: ExpectationSuite) -> str:
|
|
776
|
+
"""Save an expectation suite.
|
|
777
|
+
|
|
778
|
+
Thread-safe and process-safe.
|
|
779
|
+
|
|
780
|
+
Args:
|
|
781
|
+
item: The suite to save.
|
|
782
|
+
|
|
783
|
+
Returns:
|
|
784
|
+
The suite name.
|
|
785
|
+
|
|
786
|
+
Raises:
|
|
787
|
+
StoreWriteError: If saving fails.
|
|
788
|
+
"""
|
|
789
|
+
self.initialize()
|
|
790
|
+
|
|
791
|
+
file_path = self._get_file_path(item.name)
|
|
792
|
+
indent = 2 if self._config.pretty_print else None
|
|
793
|
+
content = json.dumps(item.to_dict(), indent=indent, default=str)
|
|
794
|
+
|
|
795
|
+
result = atomic_write(
|
|
796
|
+
file_path,
|
|
797
|
+
content,
|
|
798
|
+
create_backup=self._config.concurrency.create_backup,
|
|
799
|
+
lock_manager=self._lock_manager,
|
|
800
|
+
)
|
|
801
|
+
|
|
802
|
+
if not result.success:
|
|
803
|
+
raise StoreWriteError(f"Failed to write {file_path}: {result.error}")
|
|
804
|
+
|
|
805
|
+
# Update index
|
|
806
|
+
self._index.upsert(item.name, {
|
|
807
|
+
"data_asset": item.data_asset,
|
|
808
|
+
"created_at": item.created_at.isoformat() if item.created_at else None,
|
|
809
|
+
"file": file_path.name,
|
|
810
|
+
})
|
|
811
|
+
|
|
812
|
+
return item.name
|
|
813
|
+
|
|
814
|
+
def get(self, item_id: str) -> ExpectationSuite:
|
|
815
|
+
"""Retrieve an expectation suite by name.
|
|
816
|
+
|
|
817
|
+
Thread-safe and process-safe.
|
|
818
|
+
|
|
819
|
+
Args:
|
|
820
|
+
item_id: The suite name.
|
|
821
|
+
|
|
822
|
+
Returns:
|
|
823
|
+
The expectation suite.
|
|
824
|
+
|
|
825
|
+
Raises:
|
|
826
|
+
StoreNotFoundError: If the suite doesn't exist.
|
|
827
|
+
"""
|
|
828
|
+
self.initialize()
|
|
829
|
+
|
|
830
|
+
file_path = self._get_file_path(item_id)
|
|
831
|
+
|
|
832
|
+
if not file_path.exists():
|
|
833
|
+
raise StoreNotFoundError("ExpectationSuite", item_id)
|
|
834
|
+
|
|
835
|
+
try:
|
|
836
|
+
content = atomic_read(file_path, lock_manager=self._lock_manager)
|
|
837
|
+
data = json.loads(content.decode("utf-8"))
|
|
838
|
+
return ExpectationSuite.from_dict(data)
|
|
839
|
+
|
|
840
|
+
except (json.JSONDecodeError, KeyError) as e:
|
|
841
|
+
raise StoreReadError(f"Failed to parse {file_path}: {e}")
|
|
842
|
+
except OSError as e:
|
|
843
|
+
raise StoreReadError(f"Failed to read {file_path}: {e}")
|
|
844
|
+
|
|
845
|
+
def exists(self, item_id: str) -> bool:
|
|
846
|
+
"""Check if a suite exists."""
|
|
847
|
+
self.initialize()
|
|
848
|
+
return self._index.contains(item_id) or self._get_file_path(item_id).exists()
|
|
849
|
+
|
|
850
|
+
def delete(self, item_id: str) -> bool:
|
|
851
|
+
"""Delete an expectation suite.
|
|
852
|
+
|
|
853
|
+
Thread-safe and process-safe.
|
|
854
|
+
|
|
855
|
+
Args:
|
|
856
|
+
item_id: The suite name.
|
|
857
|
+
|
|
858
|
+
Returns:
|
|
859
|
+
True if deleted, False if it didn't exist.
|
|
860
|
+
"""
|
|
861
|
+
self.initialize()
|
|
862
|
+
|
|
863
|
+
file_path = self._get_file_path(item_id)
|
|
864
|
+
exists = file_path.exists()
|
|
865
|
+
|
|
866
|
+
if exists:
|
|
867
|
+
with self._lock_manager.write_lock(file_path):
|
|
868
|
+
try:
|
|
869
|
+
file_path.unlink(missing_ok=True)
|
|
870
|
+
except OSError as e:
|
|
871
|
+
raise StoreWriteError(f"Failed to delete {file_path}: {e}")
|
|
872
|
+
|
|
873
|
+
self._index.remove(item_id)
|
|
874
|
+
return exists
|
|
875
|
+
|
|
876
|
+
def list_ids(self, query: StoreQuery | None = None) -> list[str]:
|
|
877
|
+
"""List all suite names."""
|
|
878
|
+
self.initialize()
|
|
879
|
+
|
|
880
|
+
snapshot = self._index.snapshot()
|
|
881
|
+
|
|
882
|
+
if not query or not query.data_asset:
|
|
883
|
+
return sorted(snapshot.list_ids())
|
|
884
|
+
|
|
885
|
+
# Filter by data_asset
|
|
886
|
+
return sorted([
|
|
887
|
+
entry.item_id
|
|
888
|
+
for entry in snapshot.entries.values()
|
|
889
|
+
if entry.metadata.get("data_asset") == query.data_asset
|
|
890
|
+
])
|
|
891
|
+
|
|
892
|
+
def query(self, query: StoreQuery) -> list[ExpectationSuite]:
|
|
893
|
+
"""Query expectation suites."""
|
|
894
|
+
names = self.list_ids(query)
|
|
895
|
+
suites: list[ExpectationSuite] = []
|
|
896
|
+
|
|
897
|
+
for name in names:
|
|
898
|
+
try:
|
|
899
|
+
suite = self.get(name)
|
|
900
|
+
suites.append(suite)
|
|
901
|
+
except (StoreNotFoundError, StoreReadError):
|
|
902
|
+
continue
|
|
903
|
+
|
|
904
|
+
# Apply limit
|
|
905
|
+
if query.limit:
|
|
906
|
+
suites = suites[query.offset:query.offset + query.limit]
|
|
907
|
+
elif query.offset:
|
|
908
|
+
suites = suites[query.offset:]
|
|
909
|
+
|
|
910
|
+
return suites
|
|
911
|
+
|
|
912
|
+
def close(self) -> None:
|
|
913
|
+
"""Close the store and release resources."""
|
|
914
|
+
if self._lock_manager:
|
|
915
|
+
self._lock_manager.release_all()
|