truthound 1.0.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- truthound/__init__.py +162 -0
- truthound/adapters.py +100 -0
- truthound/api.py +365 -0
- truthound/audit/__init__.py +248 -0
- truthound/audit/core.py +967 -0
- truthound/audit/filters.py +620 -0
- truthound/audit/formatters.py +707 -0
- truthound/audit/logger.py +902 -0
- truthound/audit/middleware.py +571 -0
- truthound/audit/storage.py +1083 -0
- truthound/benchmark/__init__.py +123 -0
- truthound/benchmark/base.py +757 -0
- truthound/benchmark/comparison.py +635 -0
- truthound/benchmark/generators.py +706 -0
- truthound/benchmark/reporters.py +718 -0
- truthound/benchmark/runner.py +635 -0
- truthound/benchmark/scenarios.py +712 -0
- truthound/cache.py +252 -0
- truthound/checkpoint/__init__.py +136 -0
- truthound/checkpoint/actions/__init__.py +164 -0
- truthound/checkpoint/actions/base.py +324 -0
- truthound/checkpoint/actions/custom.py +234 -0
- truthound/checkpoint/actions/discord_notify.py +290 -0
- truthound/checkpoint/actions/email_notify.py +405 -0
- truthound/checkpoint/actions/github_action.py +406 -0
- truthound/checkpoint/actions/opsgenie.py +1499 -0
- truthound/checkpoint/actions/pagerduty.py +226 -0
- truthound/checkpoint/actions/slack_notify.py +233 -0
- truthound/checkpoint/actions/store_result.py +249 -0
- truthound/checkpoint/actions/teams_notify.py +1570 -0
- truthound/checkpoint/actions/telegram_notify.py +419 -0
- truthound/checkpoint/actions/update_docs.py +552 -0
- truthound/checkpoint/actions/webhook.py +293 -0
- truthound/checkpoint/analytics/__init__.py +147 -0
- truthound/checkpoint/analytics/aggregations/__init__.py +23 -0
- truthound/checkpoint/analytics/aggregations/rollup.py +481 -0
- truthound/checkpoint/analytics/aggregations/time_bucket.py +306 -0
- truthound/checkpoint/analytics/analyzers/__init__.py +17 -0
- truthound/checkpoint/analytics/analyzers/anomaly.py +386 -0
- truthound/checkpoint/analytics/analyzers/base.py +270 -0
- truthound/checkpoint/analytics/analyzers/forecast.py +421 -0
- truthound/checkpoint/analytics/analyzers/trend.py +314 -0
- truthound/checkpoint/analytics/models.py +292 -0
- truthound/checkpoint/analytics/protocols.py +549 -0
- truthound/checkpoint/analytics/service.py +718 -0
- truthound/checkpoint/analytics/stores/__init__.py +16 -0
- truthound/checkpoint/analytics/stores/base.py +306 -0
- truthound/checkpoint/analytics/stores/memory_store.py +353 -0
- truthound/checkpoint/analytics/stores/sqlite_store.py +557 -0
- truthound/checkpoint/analytics/stores/timescale_store.py +501 -0
- truthound/checkpoint/async_actions.py +794 -0
- truthound/checkpoint/async_base.py +708 -0
- truthound/checkpoint/async_checkpoint.py +617 -0
- truthound/checkpoint/async_runner.py +639 -0
- truthound/checkpoint/checkpoint.py +527 -0
- truthound/checkpoint/ci/__init__.py +61 -0
- truthound/checkpoint/ci/detector.py +355 -0
- truthound/checkpoint/ci/reporter.py +436 -0
- truthound/checkpoint/ci/templates.py +454 -0
- truthound/checkpoint/circuitbreaker/__init__.py +133 -0
- truthound/checkpoint/circuitbreaker/breaker.py +542 -0
- truthound/checkpoint/circuitbreaker/core.py +252 -0
- truthound/checkpoint/circuitbreaker/detection.py +459 -0
- truthound/checkpoint/circuitbreaker/middleware.py +389 -0
- truthound/checkpoint/circuitbreaker/registry.py +357 -0
- truthound/checkpoint/distributed/__init__.py +139 -0
- truthound/checkpoint/distributed/backends/__init__.py +35 -0
- truthound/checkpoint/distributed/backends/celery_backend.py +503 -0
- truthound/checkpoint/distributed/backends/kubernetes_backend.py +696 -0
- truthound/checkpoint/distributed/backends/local_backend.py +397 -0
- truthound/checkpoint/distributed/backends/ray_backend.py +625 -0
- truthound/checkpoint/distributed/base.py +774 -0
- truthound/checkpoint/distributed/orchestrator.py +765 -0
- truthound/checkpoint/distributed/protocols.py +842 -0
- truthound/checkpoint/distributed/registry.py +449 -0
- truthound/checkpoint/idempotency/__init__.py +120 -0
- truthound/checkpoint/idempotency/core.py +295 -0
- truthound/checkpoint/idempotency/fingerprint.py +454 -0
- truthound/checkpoint/idempotency/locking.py +604 -0
- truthound/checkpoint/idempotency/service.py +592 -0
- truthound/checkpoint/idempotency/stores.py +653 -0
- truthound/checkpoint/monitoring/__init__.py +134 -0
- truthound/checkpoint/monitoring/aggregators/__init__.py +15 -0
- truthound/checkpoint/monitoring/aggregators/base.py +372 -0
- truthound/checkpoint/monitoring/aggregators/realtime.py +300 -0
- truthound/checkpoint/monitoring/aggregators/window.py +493 -0
- truthound/checkpoint/monitoring/collectors/__init__.py +17 -0
- truthound/checkpoint/monitoring/collectors/base.py +257 -0
- truthound/checkpoint/monitoring/collectors/memory_collector.py +617 -0
- truthound/checkpoint/monitoring/collectors/prometheus_collector.py +451 -0
- truthound/checkpoint/monitoring/collectors/redis_collector.py +518 -0
- truthound/checkpoint/monitoring/events.py +410 -0
- truthound/checkpoint/monitoring/protocols.py +636 -0
- truthound/checkpoint/monitoring/service.py +578 -0
- truthound/checkpoint/monitoring/views/__init__.py +17 -0
- truthound/checkpoint/monitoring/views/base.py +172 -0
- truthound/checkpoint/monitoring/views/queue_view.py +220 -0
- truthound/checkpoint/monitoring/views/task_view.py +240 -0
- truthound/checkpoint/monitoring/views/worker_view.py +263 -0
- truthound/checkpoint/registry.py +337 -0
- truthound/checkpoint/runner.py +356 -0
- truthound/checkpoint/transaction/__init__.py +133 -0
- truthound/checkpoint/transaction/base.py +389 -0
- truthound/checkpoint/transaction/compensatable.py +537 -0
- truthound/checkpoint/transaction/coordinator.py +576 -0
- truthound/checkpoint/transaction/executor.py +622 -0
- truthound/checkpoint/transaction/idempotency.py +534 -0
- truthound/checkpoint/transaction/saga/__init__.py +143 -0
- truthound/checkpoint/transaction/saga/builder.py +584 -0
- truthound/checkpoint/transaction/saga/definition.py +515 -0
- truthound/checkpoint/transaction/saga/event_store.py +542 -0
- truthound/checkpoint/transaction/saga/patterns.py +833 -0
- truthound/checkpoint/transaction/saga/runner.py +718 -0
- truthound/checkpoint/transaction/saga/state_machine.py +793 -0
- truthound/checkpoint/transaction/saga/strategies.py +780 -0
- truthound/checkpoint/transaction/saga/testing.py +886 -0
- truthound/checkpoint/triggers/__init__.py +58 -0
- truthound/checkpoint/triggers/base.py +237 -0
- truthound/checkpoint/triggers/event.py +385 -0
- truthound/checkpoint/triggers/schedule.py +355 -0
- truthound/cli.py +2358 -0
- truthound/cli_modules/__init__.py +124 -0
- truthound/cli_modules/advanced/__init__.py +45 -0
- truthound/cli_modules/advanced/benchmark.py +343 -0
- truthound/cli_modules/advanced/docs.py +225 -0
- truthound/cli_modules/advanced/lineage.py +209 -0
- truthound/cli_modules/advanced/ml.py +320 -0
- truthound/cli_modules/advanced/realtime.py +196 -0
- truthound/cli_modules/checkpoint/__init__.py +46 -0
- truthound/cli_modules/checkpoint/init.py +114 -0
- truthound/cli_modules/checkpoint/list.py +71 -0
- truthound/cli_modules/checkpoint/run.py +159 -0
- truthound/cli_modules/checkpoint/validate.py +67 -0
- truthound/cli_modules/common/__init__.py +71 -0
- truthound/cli_modules/common/errors.py +414 -0
- truthound/cli_modules/common/options.py +419 -0
- truthound/cli_modules/common/output.py +507 -0
- truthound/cli_modules/common/protocol.py +552 -0
- truthound/cli_modules/core/__init__.py +48 -0
- truthound/cli_modules/core/check.py +123 -0
- truthound/cli_modules/core/compare.py +104 -0
- truthound/cli_modules/core/learn.py +57 -0
- truthound/cli_modules/core/mask.py +77 -0
- truthound/cli_modules/core/profile.py +65 -0
- truthound/cli_modules/core/scan.py +61 -0
- truthound/cli_modules/profiler/__init__.py +51 -0
- truthound/cli_modules/profiler/auto_profile.py +175 -0
- truthound/cli_modules/profiler/metadata.py +107 -0
- truthound/cli_modules/profiler/suite.py +283 -0
- truthound/cli_modules/registry.py +431 -0
- truthound/cli_modules/scaffolding/__init__.py +89 -0
- truthound/cli_modules/scaffolding/base.py +631 -0
- truthound/cli_modules/scaffolding/commands.py +545 -0
- truthound/cli_modules/scaffolding/plugins.py +1072 -0
- truthound/cli_modules/scaffolding/reporters.py +594 -0
- truthound/cli_modules/scaffolding/validators.py +1127 -0
- truthound/common/__init__.py +18 -0
- truthound/common/resilience/__init__.py +130 -0
- truthound/common/resilience/bulkhead.py +266 -0
- truthound/common/resilience/circuit_breaker.py +516 -0
- truthound/common/resilience/composite.py +332 -0
- truthound/common/resilience/config.py +292 -0
- truthound/common/resilience/protocols.py +217 -0
- truthound/common/resilience/rate_limiter.py +404 -0
- truthound/common/resilience/retry.py +341 -0
- truthound/datadocs/__init__.py +260 -0
- truthound/datadocs/base.py +571 -0
- truthound/datadocs/builder.py +761 -0
- truthound/datadocs/charts.py +764 -0
- truthound/datadocs/dashboard/__init__.py +63 -0
- truthound/datadocs/dashboard/app.py +576 -0
- truthound/datadocs/dashboard/components.py +584 -0
- truthound/datadocs/dashboard/state.py +240 -0
- truthound/datadocs/engine/__init__.py +46 -0
- truthound/datadocs/engine/context.py +376 -0
- truthound/datadocs/engine/pipeline.py +618 -0
- truthound/datadocs/engine/registry.py +469 -0
- truthound/datadocs/exporters/__init__.py +49 -0
- truthound/datadocs/exporters/base.py +198 -0
- truthound/datadocs/exporters/html.py +178 -0
- truthound/datadocs/exporters/json_exporter.py +253 -0
- truthound/datadocs/exporters/markdown.py +284 -0
- truthound/datadocs/exporters/pdf.py +392 -0
- truthound/datadocs/i18n/__init__.py +86 -0
- truthound/datadocs/i18n/catalog.py +960 -0
- truthound/datadocs/i18n/formatting.py +505 -0
- truthound/datadocs/i18n/loader.py +256 -0
- truthound/datadocs/i18n/plurals.py +378 -0
- truthound/datadocs/renderers/__init__.py +42 -0
- truthound/datadocs/renderers/base.py +401 -0
- truthound/datadocs/renderers/custom.py +342 -0
- truthound/datadocs/renderers/jinja.py +697 -0
- truthound/datadocs/sections.py +736 -0
- truthound/datadocs/styles.py +931 -0
- truthound/datadocs/themes/__init__.py +101 -0
- truthound/datadocs/themes/base.py +336 -0
- truthound/datadocs/themes/default.py +417 -0
- truthound/datadocs/themes/enterprise.py +419 -0
- truthound/datadocs/themes/loader.py +336 -0
- truthound/datadocs/themes.py +301 -0
- truthound/datadocs/transformers/__init__.py +57 -0
- truthound/datadocs/transformers/base.py +268 -0
- truthound/datadocs/transformers/enrichers.py +544 -0
- truthound/datadocs/transformers/filters.py +447 -0
- truthound/datadocs/transformers/i18n.py +468 -0
- truthound/datadocs/versioning/__init__.py +62 -0
- truthound/datadocs/versioning/diff.py +639 -0
- truthound/datadocs/versioning/storage.py +497 -0
- truthound/datadocs/versioning/version.py +358 -0
- truthound/datasources/__init__.py +223 -0
- truthound/datasources/_async_protocols.py +222 -0
- truthound/datasources/_protocols.py +159 -0
- truthound/datasources/adapters.py +428 -0
- truthound/datasources/async_base.py +599 -0
- truthound/datasources/async_factory.py +511 -0
- truthound/datasources/base.py +516 -0
- truthound/datasources/factory.py +433 -0
- truthound/datasources/nosql/__init__.py +47 -0
- truthound/datasources/nosql/base.py +487 -0
- truthound/datasources/nosql/elasticsearch.py +801 -0
- truthound/datasources/nosql/mongodb.py +636 -0
- truthound/datasources/pandas_optimized.py +582 -0
- truthound/datasources/pandas_source.py +216 -0
- truthound/datasources/polars_source.py +395 -0
- truthound/datasources/spark_source.py +479 -0
- truthound/datasources/sql/__init__.py +154 -0
- truthound/datasources/sql/base.py +710 -0
- truthound/datasources/sql/bigquery.py +410 -0
- truthound/datasources/sql/cloud_base.py +199 -0
- truthound/datasources/sql/databricks.py +471 -0
- truthound/datasources/sql/mysql.py +316 -0
- truthound/datasources/sql/oracle.py +427 -0
- truthound/datasources/sql/postgresql.py +321 -0
- truthound/datasources/sql/redshift.py +479 -0
- truthound/datasources/sql/snowflake.py +439 -0
- truthound/datasources/sql/sqlite.py +286 -0
- truthound/datasources/sql/sqlserver.py +437 -0
- truthound/datasources/streaming/__init__.py +47 -0
- truthound/datasources/streaming/base.py +350 -0
- truthound/datasources/streaming/kafka.py +670 -0
- truthound/decorators.py +98 -0
- truthound/docs/__init__.py +69 -0
- truthound/docs/extractor.py +971 -0
- truthound/docs/generator.py +601 -0
- truthound/docs/parser.py +1037 -0
- truthound/docs/renderer.py +999 -0
- truthound/drift/__init__.py +22 -0
- truthound/drift/compare.py +189 -0
- truthound/drift/detectors.py +464 -0
- truthound/drift/report.py +160 -0
- truthound/execution/__init__.py +65 -0
- truthound/execution/_protocols.py +324 -0
- truthound/execution/base.py +576 -0
- truthound/execution/distributed/__init__.py +179 -0
- truthound/execution/distributed/aggregations.py +731 -0
- truthound/execution/distributed/arrow_bridge.py +817 -0
- truthound/execution/distributed/base.py +550 -0
- truthound/execution/distributed/dask_engine.py +976 -0
- truthound/execution/distributed/mixins.py +766 -0
- truthound/execution/distributed/protocols.py +756 -0
- truthound/execution/distributed/ray_engine.py +1127 -0
- truthound/execution/distributed/registry.py +446 -0
- truthound/execution/distributed/spark_engine.py +1011 -0
- truthound/execution/distributed/validator_adapter.py +682 -0
- truthound/execution/pandas_engine.py +401 -0
- truthound/execution/polars_engine.py +497 -0
- truthound/execution/pushdown/__init__.py +230 -0
- truthound/execution/pushdown/ast.py +1550 -0
- truthound/execution/pushdown/builder.py +1550 -0
- truthound/execution/pushdown/dialects.py +1072 -0
- truthound/execution/pushdown/executor.py +829 -0
- truthound/execution/pushdown/optimizer.py +1041 -0
- truthound/execution/sql_engine.py +518 -0
- truthound/infrastructure/__init__.py +189 -0
- truthound/infrastructure/audit.py +1515 -0
- truthound/infrastructure/config.py +1133 -0
- truthound/infrastructure/encryption.py +1132 -0
- truthound/infrastructure/logging.py +1503 -0
- truthound/infrastructure/metrics.py +1220 -0
- truthound/lineage/__init__.py +89 -0
- truthound/lineage/base.py +746 -0
- truthound/lineage/impact_analysis.py +474 -0
- truthound/lineage/integrations/__init__.py +22 -0
- truthound/lineage/integrations/openlineage.py +548 -0
- truthound/lineage/tracker.py +512 -0
- truthound/lineage/visualization/__init__.py +33 -0
- truthound/lineage/visualization/protocols.py +145 -0
- truthound/lineage/visualization/renderers/__init__.py +20 -0
- truthound/lineage/visualization/renderers/cytoscape.py +329 -0
- truthound/lineage/visualization/renderers/d3.py +331 -0
- truthound/lineage/visualization/renderers/graphviz.py +276 -0
- truthound/lineage/visualization/renderers/mermaid.py +308 -0
- truthound/maskers.py +113 -0
- truthound/ml/__init__.py +124 -0
- truthound/ml/anomaly_models/__init__.py +31 -0
- truthound/ml/anomaly_models/ensemble.py +362 -0
- truthound/ml/anomaly_models/isolation_forest.py +444 -0
- truthound/ml/anomaly_models/statistical.py +392 -0
- truthound/ml/base.py +1178 -0
- truthound/ml/drift_detection/__init__.py +26 -0
- truthound/ml/drift_detection/concept.py +381 -0
- truthound/ml/drift_detection/distribution.py +361 -0
- truthound/ml/drift_detection/feature.py +442 -0
- truthound/ml/drift_detection/multivariate.py +495 -0
- truthound/ml/monitoring/__init__.py +88 -0
- truthound/ml/monitoring/alerting/__init__.py +33 -0
- truthound/ml/monitoring/alerting/handlers.py +427 -0
- truthound/ml/monitoring/alerting/rules.py +508 -0
- truthound/ml/monitoring/collectors/__init__.py +19 -0
- truthound/ml/monitoring/collectors/composite.py +105 -0
- truthound/ml/monitoring/collectors/drift.py +324 -0
- truthound/ml/monitoring/collectors/performance.py +179 -0
- truthound/ml/monitoring/collectors/quality.py +369 -0
- truthound/ml/monitoring/monitor.py +536 -0
- truthound/ml/monitoring/protocols.py +451 -0
- truthound/ml/monitoring/stores/__init__.py +15 -0
- truthound/ml/monitoring/stores/memory.py +201 -0
- truthound/ml/monitoring/stores/prometheus.py +296 -0
- truthound/ml/rule_learning/__init__.py +25 -0
- truthound/ml/rule_learning/constraint_miner.py +443 -0
- truthound/ml/rule_learning/pattern_learner.py +499 -0
- truthound/ml/rule_learning/profile_learner.py +462 -0
- truthound/multitenancy/__init__.py +326 -0
- truthound/multitenancy/core.py +852 -0
- truthound/multitenancy/integration.py +597 -0
- truthound/multitenancy/isolation.py +630 -0
- truthound/multitenancy/manager.py +770 -0
- truthound/multitenancy/middleware.py +765 -0
- truthound/multitenancy/quota.py +537 -0
- truthound/multitenancy/resolvers.py +603 -0
- truthound/multitenancy/storage.py +703 -0
- truthound/observability/__init__.py +307 -0
- truthound/observability/context.py +531 -0
- truthound/observability/instrumentation.py +611 -0
- truthound/observability/logging.py +887 -0
- truthound/observability/metrics.py +1157 -0
- truthound/observability/tracing/__init__.py +178 -0
- truthound/observability/tracing/baggage.py +310 -0
- truthound/observability/tracing/config.py +426 -0
- truthound/observability/tracing/exporter.py +787 -0
- truthound/observability/tracing/integration.py +1018 -0
- truthound/observability/tracing/otel/__init__.py +146 -0
- truthound/observability/tracing/otel/adapter.py +982 -0
- truthound/observability/tracing/otel/bridge.py +1177 -0
- truthound/observability/tracing/otel/compat.py +681 -0
- truthound/observability/tracing/otel/config.py +691 -0
- truthound/observability/tracing/otel/detection.py +327 -0
- truthound/observability/tracing/otel/protocols.py +426 -0
- truthound/observability/tracing/processor.py +561 -0
- truthound/observability/tracing/propagator.py +757 -0
- truthound/observability/tracing/provider.py +569 -0
- truthound/observability/tracing/resource.py +515 -0
- truthound/observability/tracing/sampler.py +487 -0
- truthound/observability/tracing/span.py +676 -0
- truthound/plugins/__init__.py +198 -0
- truthound/plugins/base.py +599 -0
- truthound/plugins/cli.py +680 -0
- truthound/plugins/dependencies/__init__.py +42 -0
- truthound/plugins/dependencies/graph.py +422 -0
- truthound/plugins/dependencies/resolver.py +417 -0
- truthound/plugins/discovery.py +379 -0
- truthound/plugins/docs/__init__.py +46 -0
- truthound/plugins/docs/extractor.py +444 -0
- truthound/plugins/docs/renderer.py +499 -0
- truthound/plugins/enterprise_manager.py +877 -0
- truthound/plugins/examples/__init__.py +19 -0
- truthound/plugins/examples/custom_validators.py +317 -0
- truthound/plugins/examples/slack_notifier.py +312 -0
- truthound/plugins/examples/xml_reporter.py +254 -0
- truthound/plugins/hooks.py +558 -0
- truthound/plugins/lifecycle/__init__.py +43 -0
- truthound/plugins/lifecycle/hot_reload.py +402 -0
- truthound/plugins/lifecycle/manager.py +371 -0
- truthound/plugins/manager.py +736 -0
- truthound/plugins/registry.py +338 -0
- truthound/plugins/security/__init__.py +93 -0
- truthound/plugins/security/exceptions.py +332 -0
- truthound/plugins/security/policies.py +348 -0
- truthound/plugins/security/protocols.py +643 -0
- truthound/plugins/security/sandbox/__init__.py +45 -0
- truthound/plugins/security/sandbox/context.py +158 -0
- truthound/plugins/security/sandbox/engines/__init__.py +19 -0
- truthound/plugins/security/sandbox/engines/container.py +379 -0
- truthound/plugins/security/sandbox/engines/noop.py +144 -0
- truthound/plugins/security/sandbox/engines/process.py +336 -0
- truthound/plugins/security/sandbox/factory.py +211 -0
- truthound/plugins/security/signing/__init__.py +57 -0
- truthound/plugins/security/signing/service.py +330 -0
- truthound/plugins/security/signing/trust_store.py +368 -0
- truthound/plugins/security/signing/verifier.py +459 -0
- truthound/plugins/versioning/__init__.py +41 -0
- truthound/plugins/versioning/constraints.py +297 -0
- truthound/plugins/versioning/resolver.py +329 -0
- truthound/profiler/__init__.py +1729 -0
- truthound/profiler/_lazy.py +452 -0
- truthound/profiler/ab_testing/__init__.py +80 -0
- truthound/profiler/ab_testing/analysis.py +449 -0
- truthound/profiler/ab_testing/base.py +257 -0
- truthound/profiler/ab_testing/experiment.py +395 -0
- truthound/profiler/ab_testing/tracking.py +368 -0
- truthound/profiler/auto_threshold.py +1170 -0
- truthound/profiler/base.py +579 -0
- truthound/profiler/cache_patterns.py +911 -0
- truthound/profiler/caching.py +1303 -0
- truthound/profiler/column_profiler.py +712 -0
- truthound/profiler/comparison.py +1007 -0
- truthound/profiler/custom_patterns.py +1170 -0
- truthound/profiler/dashboard/__init__.py +50 -0
- truthound/profiler/dashboard/app.py +476 -0
- truthound/profiler/dashboard/components.py +457 -0
- truthound/profiler/dashboard/config.py +72 -0
- truthound/profiler/distributed/__init__.py +83 -0
- truthound/profiler/distributed/base.py +281 -0
- truthound/profiler/distributed/dask_backend.py +498 -0
- truthound/profiler/distributed/local_backend.py +293 -0
- truthound/profiler/distributed/profiler.py +304 -0
- truthound/profiler/distributed/ray_backend.py +374 -0
- truthound/profiler/distributed/spark_backend.py +375 -0
- truthound/profiler/distributed.py +1366 -0
- truthound/profiler/enterprise_sampling.py +1065 -0
- truthound/profiler/errors.py +488 -0
- truthound/profiler/evolution/__init__.py +91 -0
- truthound/profiler/evolution/alerts.py +426 -0
- truthound/profiler/evolution/changes.py +206 -0
- truthound/profiler/evolution/compatibility.py +365 -0
- truthound/profiler/evolution/detector.py +372 -0
- truthound/profiler/evolution/protocols.py +121 -0
- truthound/profiler/generators/__init__.py +48 -0
- truthound/profiler/generators/base.py +384 -0
- truthound/profiler/generators/ml_rules.py +375 -0
- truthound/profiler/generators/pattern_rules.py +384 -0
- truthound/profiler/generators/schema_rules.py +267 -0
- truthound/profiler/generators/stats_rules.py +324 -0
- truthound/profiler/generators/suite_generator.py +857 -0
- truthound/profiler/i18n.py +1542 -0
- truthound/profiler/incremental.py +554 -0
- truthound/profiler/incremental_validation.py +1710 -0
- truthound/profiler/integration/__init__.py +73 -0
- truthound/profiler/integration/adapters.py +345 -0
- truthound/profiler/integration/context.py +371 -0
- truthound/profiler/integration/executor.py +527 -0
- truthound/profiler/integration/naming.py +75 -0
- truthound/profiler/integration/protocols.py +243 -0
- truthound/profiler/memory.py +1185 -0
- truthound/profiler/migration/__init__.py +60 -0
- truthound/profiler/migration/base.py +345 -0
- truthound/profiler/migration/manager.py +444 -0
- truthound/profiler/migration/v1_0_to_v1_1.py +484 -0
- truthound/profiler/ml/__init__.py +73 -0
- truthound/profiler/ml/base.py +244 -0
- truthound/profiler/ml/classifier.py +507 -0
- truthound/profiler/ml/feature_extraction.py +604 -0
- truthound/profiler/ml/pretrained.py +448 -0
- truthound/profiler/ml_inference.py +1276 -0
- truthound/profiler/native_patterns.py +815 -0
- truthound/profiler/observability.py +1184 -0
- truthound/profiler/process_timeout.py +1566 -0
- truthound/profiler/progress.py +568 -0
- truthound/profiler/progress_callbacks.py +1734 -0
- truthound/profiler/quality.py +1345 -0
- truthound/profiler/resilience.py +1180 -0
- truthound/profiler/sampled_matcher.py +794 -0
- truthound/profiler/sampling.py +1288 -0
- truthound/profiler/scheduling/__init__.py +82 -0
- truthound/profiler/scheduling/protocols.py +214 -0
- truthound/profiler/scheduling/scheduler.py +474 -0
- truthound/profiler/scheduling/storage.py +457 -0
- truthound/profiler/scheduling/triggers.py +449 -0
- truthound/profiler/schema.py +603 -0
- truthound/profiler/streaming.py +685 -0
- truthound/profiler/streaming_patterns.py +1354 -0
- truthound/profiler/suite_cli.py +625 -0
- truthound/profiler/suite_config.py +789 -0
- truthound/profiler/suite_export.py +1268 -0
- truthound/profiler/table_profiler.py +547 -0
- truthound/profiler/timeout.py +565 -0
- truthound/profiler/validation.py +1532 -0
- truthound/profiler/visualization/__init__.py +118 -0
- truthound/profiler/visualization/base.py +346 -0
- truthound/profiler/visualization/generator.py +1259 -0
- truthound/profiler/visualization/plotly_renderer.py +811 -0
- truthound/profiler/visualization/renderers.py +669 -0
- truthound/profiler/visualization/sections.py +540 -0
- truthound/profiler/visualization.py +2122 -0
- truthound/profiler/yaml_validation.py +1151 -0
- truthound/py.typed +0 -0
- truthound/ratelimit/__init__.py +248 -0
- truthound/ratelimit/algorithms.py +1108 -0
- truthound/ratelimit/core.py +573 -0
- truthound/ratelimit/integration.py +532 -0
- truthound/ratelimit/limiter.py +663 -0
- truthound/ratelimit/middleware.py +700 -0
- truthound/ratelimit/policy.py +792 -0
- truthound/ratelimit/storage.py +763 -0
- truthound/rbac/__init__.py +340 -0
- truthound/rbac/core.py +976 -0
- truthound/rbac/integration.py +760 -0
- truthound/rbac/manager.py +1052 -0
- truthound/rbac/middleware.py +842 -0
- truthound/rbac/policy.py +954 -0
- truthound/rbac/storage.py +878 -0
- truthound/realtime/__init__.py +141 -0
- truthound/realtime/adapters/__init__.py +43 -0
- truthound/realtime/adapters/base.py +533 -0
- truthound/realtime/adapters/kafka.py +487 -0
- truthound/realtime/adapters/kinesis.py +479 -0
- truthound/realtime/adapters/mock.py +243 -0
- truthound/realtime/base.py +553 -0
- truthound/realtime/factory.py +382 -0
- truthound/realtime/incremental.py +660 -0
- truthound/realtime/processing/__init__.py +67 -0
- truthound/realtime/processing/exactly_once.py +575 -0
- truthound/realtime/processing/state.py +547 -0
- truthound/realtime/processing/windows.py +647 -0
- truthound/realtime/protocols.py +569 -0
- truthound/realtime/streaming.py +605 -0
- truthound/realtime/testing/__init__.py +32 -0
- truthound/realtime/testing/containers.py +615 -0
- truthound/realtime/testing/fixtures.py +484 -0
- truthound/report.py +280 -0
- truthound/reporters/__init__.py +46 -0
- truthound/reporters/_protocols.py +30 -0
- truthound/reporters/base.py +324 -0
- truthound/reporters/ci/__init__.py +66 -0
- truthound/reporters/ci/azure.py +436 -0
- truthound/reporters/ci/base.py +509 -0
- truthound/reporters/ci/bitbucket.py +567 -0
- truthound/reporters/ci/circleci.py +547 -0
- truthound/reporters/ci/detection.py +364 -0
- truthound/reporters/ci/factory.py +182 -0
- truthound/reporters/ci/github.py +388 -0
- truthound/reporters/ci/gitlab.py +471 -0
- truthound/reporters/ci/jenkins.py +525 -0
- truthound/reporters/console_reporter.py +299 -0
- truthound/reporters/factory.py +211 -0
- truthound/reporters/html_reporter.py +524 -0
- truthound/reporters/json_reporter.py +256 -0
- truthound/reporters/markdown_reporter.py +280 -0
- truthound/reporters/sdk/__init__.py +174 -0
- truthound/reporters/sdk/builder.py +558 -0
- truthound/reporters/sdk/mixins.py +1150 -0
- truthound/reporters/sdk/schema.py +1493 -0
- truthound/reporters/sdk/templates.py +666 -0
- truthound/reporters/sdk/testing.py +968 -0
- truthound/scanners.py +170 -0
- truthound/scheduling/__init__.py +122 -0
- truthound/scheduling/cron.py +1136 -0
- truthound/scheduling/presets.py +212 -0
- truthound/schema.py +275 -0
- truthound/secrets/__init__.py +173 -0
- truthound/secrets/base.py +618 -0
- truthound/secrets/cloud.py +682 -0
- truthound/secrets/integration.py +507 -0
- truthound/secrets/manager.py +633 -0
- truthound/secrets/oidc/__init__.py +172 -0
- truthound/secrets/oidc/base.py +902 -0
- truthound/secrets/oidc/credential_provider.py +623 -0
- truthound/secrets/oidc/exchangers.py +1001 -0
- truthound/secrets/oidc/github/__init__.py +110 -0
- truthound/secrets/oidc/github/claims.py +718 -0
- truthound/secrets/oidc/github/enhanced_provider.py +693 -0
- truthound/secrets/oidc/github/trust_policy.py +742 -0
- truthound/secrets/oidc/github/verification.py +723 -0
- truthound/secrets/oidc/github/workflow.py +691 -0
- truthound/secrets/oidc/providers.py +825 -0
- truthound/secrets/providers.py +506 -0
- truthound/secrets/resolver.py +495 -0
- truthound/stores/__init__.py +177 -0
- truthound/stores/backends/__init__.py +18 -0
- truthound/stores/backends/_protocols.py +340 -0
- truthound/stores/backends/azure_blob.py +530 -0
- truthound/stores/backends/concurrent_filesystem.py +915 -0
- truthound/stores/backends/connection_pool.py +1365 -0
- truthound/stores/backends/database.py +743 -0
- truthound/stores/backends/filesystem.py +538 -0
- truthound/stores/backends/gcs.py +399 -0
- truthound/stores/backends/memory.py +354 -0
- truthound/stores/backends/s3.py +434 -0
- truthound/stores/backpressure/__init__.py +84 -0
- truthound/stores/backpressure/base.py +375 -0
- truthound/stores/backpressure/circuit_breaker.py +434 -0
- truthound/stores/backpressure/monitor.py +376 -0
- truthound/stores/backpressure/strategies.py +677 -0
- truthound/stores/base.py +551 -0
- truthound/stores/batching/__init__.py +65 -0
- truthound/stores/batching/base.py +305 -0
- truthound/stores/batching/buffer.py +370 -0
- truthound/stores/batching/store.py +248 -0
- truthound/stores/batching/writer.py +521 -0
- truthound/stores/caching/__init__.py +60 -0
- truthound/stores/caching/backends.py +684 -0
- truthound/stores/caching/base.py +356 -0
- truthound/stores/caching/store.py +305 -0
- truthound/stores/compression/__init__.py +193 -0
- truthound/stores/compression/adaptive.py +694 -0
- truthound/stores/compression/base.py +514 -0
- truthound/stores/compression/pipeline.py +868 -0
- truthound/stores/compression/providers.py +672 -0
- truthound/stores/compression/streaming.py +832 -0
- truthound/stores/concurrency/__init__.py +81 -0
- truthound/stores/concurrency/atomic.py +556 -0
- truthound/stores/concurrency/index.py +775 -0
- truthound/stores/concurrency/locks.py +576 -0
- truthound/stores/concurrency/manager.py +482 -0
- truthound/stores/encryption/__init__.py +297 -0
- truthound/stores/encryption/base.py +952 -0
- truthound/stores/encryption/keys.py +1191 -0
- truthound/stores/encryption/pipeline.py +903 -0
- truthound/stores/encryption/providers.py +953 -0
- truthound/stores/encryption/streaming.py +950 -0
- truthound/stores/expectations.py +227 -0
- truthound/stores/factory.py +246 -0
- truthound/stores/migration/__init__.py +75 -0
- truthound/stores/migration/base.py +480 -0
- truthound/stores/migration/manager.py +347 -0
- truthound/stores/migration/registry.py +382 -0
- truthound/stores/migration/store.py +559 -0
- truthound/stores/observability/__init__.py +106 -0
- truthound/stores/observability/audit.py +718 -0
- truthound/stores/observability/config.py +270 -0
- truthound/stores/observability/factory.py +208 -0
- truthound/stores/observability/metrics.py +636 -0
- truthound/stores/observability/protocols.py +410 -0
- truthound/stores/observability/store.py +570 -0
- truthound/stores/observability/tracing.py +784 -0
- truthound/stores/replication/__init__.py +76 -0
- truthound/stores/replication/base.py +260 -0
- truthound/stores/replication/monitor.py +269 -0
- truthound/stores/replication/store.py +439 -0
- truthound/stores/replication/syncer.py +391 -0
- truthound/stores/results.py +359 -0
- truthound/stores/retention/__init__.py +77 -0
- truthound/stores/retention/base.py +378 -0
- truthound/stores/retention/policies.py +621 -0
- truthound/stores/retention/scheduler.py +279 -0
- truthound/stores/retention/store.py +526 -0
- truthound/stores/streaming/__init__.py +138 -0
- truthound/stores/streaming/base.py +801 -0
- truthound/stores/streaming/database.py +984 -0
- truthound/stores/streaming/filesystem.py +719 -0
- truthound/stores/streaming/reader.py +629 -0
- truthound/stores/streaming/s3.py +843 -0
- truthound/stores/streaming/writer.py +790 -0
- truthound/stores/tiering/__init__.py +108 -0
- truthound/stores/tiering/base.py +462 -0
- truthound/stores/tiering/manager.py +249 -0
- truthound/stores/tiering/policies.py +692 -0
- truthound/stores/tiering/store.py +526 -0
- truthound/stores/versioning/__init__.py +56 -0
- truthound/stores/versioning/base.py +376 -0
- truthound/stores/versioning/store.py +660 -0
- truthound/stores/versioning/strategies.py +353 -0
- truthound/types.py +56 -0
- truthound/validators/__init__.py +774 -0
- truthound/validators/aggregate/__init__.py +27 -0
- truthound/validators/aggregate/central.py +116 -0
- truthound/validators/aggregate/extremes.py +116 -0
- truthound/validators/aggregate/spread.py +118 -0
- truthound/validators/aggregate/sum.py +64 -0
- truthound/validators/aggregate/type.py +78 -0
- truthound/validators/anomaly/__init__.py +93 -0
- truthound/validators/anomaly/base.py +431 -0
- truthound/validators/anomaly/ml_based.py +1190 -0
- truthound/validators/anomaly/multivariate.py +647 -0
- truthound/validators/anomaly/statistical.py +599 -0
- truthound/validators/base.py +1089 -0
- truthound/validators/business_rule/__init__.py +46 -0
- truthound/validators/business_rule/base.py +147 -0
- truthound/validators/business_rule/checksum.py +509 -0
- truthound/validators/business_rule/financial.py +526 -0
- truthound/validators/cache.py +733 -0
- truthound/validators/completeness/__init__.py +39 -0
- truthound/validators/completeness/conditional.py +73 -0
- truthound/validators/completeness/default.py +98 -0
- truthound/validators/completeness/empty.py +103 -0
- truthound/validators/completeness/nan.py +337 -0
- truthound/validators/completeness/null.py +152 -0
- truthound/validators/cross_table/__init__.py +17 -0
- truthound/validators/cross_table/aggregate.py +333 -0
- truthound/validators/cross_table/row_count.py +122 -0
- truthound/validators/datetime/__init__.py +29 -0
- truthound/validators/datetime/format.py +78 -0
- truthound/validators/datetime/freshness.py +269 -0
- truthound/validators/datetime/order.py +73 -0
- truthound/validators/datetime/parseable.py +185 -0
- truthound/validators/datetime/range.py +202 -0
- truthound/validators/datetime/timezone.py +69 -0
- truthound/validators/distribution/__init__.py +49 -0
- truthound/validators/distribution/distribution.py +128 -0
- truthound/validators/distribution/monotonic.py +119 -0
- truthound/validators/distribution/outlier.py +178 -0
- truthound/validators/distribution/quantile.py +80 -0
- truthound/validators/distribution/range.py +254 -0
- truthound/validators/distribution/set.py +125 -0
- truthound/validators/distribution/statistical.py +459 -0
- truthound/validators/drift/__init__.py +79 -0
- truthound/validators/drift/base.py +427 -0
- truthound/validators/drift/multi_feature.py +401 -0
- truthound/validators/drift/numeric.py +395 -0
- truthound/validators/drift/psi.py +446 -0
- truthound/validators/drift/statistical.py +510 -0
- truthound/validators/enterprise.py +1658 -0
- truthound/validators/geospatial/__init__.py +80 -0
- truthound/validators/geospatial/base.py +97 -0
- truthound/validators/geospatial/boundary.py +238 -0
- truthound/validators/geospatial/coordinate.py +351 -0
- truthound/validators/geospatial/distance.py +399 -0
- truthound/validators/geospatial/polygon.py +665 -0
- truthound/validators/i18n/__init__.py +308 -0
- truthound/validators/i18n/bidi.py +571 -0
- truthound/validators/i18n/catalogs.py +570 -0
- truthound/validators/i18n/dialects.py +763 -0
- truthound/validators/i18n/extended_catalogs.py +549 -0
- truthound/validators/i18n/formatting.py +1434 -0
- truthound/validators/i18n/loader.py +1020 -0
- truthound/validators/i18n/messages.py +521 -0
- truthound/validators/i18n/plural.py +683 -0
- truthound/validators/i18n/protocols.py +855 -0
- truthound/validators/i18n/tms.py +1162 -0
- truthound/validators/localization/__init__.py +53 -0
- truthound/validators/localization/base.py +122 -0
- truthound/validators/localization/chinese.py +362 -0
- truthound/validators/localization/japanese.py +275 -0
- truthound/validators/localization/korean.py +524 -0
- truthound/validators/memory/__init__.py +94 -0
- truthound/validators/memory/approximate_knn.py +506 -0
- truthound/validators/memory/base.py +547 -0
- truthound/validators/memory/sgd_online.py +719 -0
- truthound/validators/memory/streaming_ecdf.py +753 -0
- truthound/validators/ml_feature/__init__.py +54 -0
- truthound/validators/ml_feature/base.py +249 -0
- truthound/validators/ml_feature/correlation.py +299 -0
- truthound/validators/ml_feature/leakage.py +344 -0
- truthound/validators/ml_feature/null_impact.py +270 -0
- truthound/validators/ml_feature/scale.py +264 -0
- truthound/validators/multi_column/__init__.py +89 -0
- truthound/validators/multi_column/arithmetic.py +284 -0
- truthound/validators/multi_column/base.py +231 -0
- truthound/validators/multi_column/comparison.py +273 -0
- truthound/validators/multi_column/consistency.py +312 -0
- truthound/validators/multi_column/statistical.py +299 -0
- truthound/validators/optimization/__init__.py +164 -0
- truthound/validators/optimization/aggregation.py +563 -0
- truthound/validators/optimization/covariance.py +556 -0
- truthound/validators/optimization/geo.py +626 -0
- truthound/validators/optimization/graph.py +587 -0
- truthound/validators/optimization/orchestrator.py +970 -0
- truthound/validators/optimization/profiling.py +1312 -0
- truthound/validators/privacy/__init__.py +223 -0
- truthound/validators/privacy/base.py +635 -0
- truthound/validators/privacy/ccpa.py +670 -0
- truthound/validators/privacy/gdpr.py +728 -0
- truthound/validators/privacy/global_patterns.py +604 -0
- truthound/validators/privacy/plugins.py +867 -0
- truthound/validators/profiling/__init__.py +52 -0
- truthound/validators/profiling/base.py +175 -0
- truthound/validators/profiling/cardinality.py +312 -0
- truthound/validators/profiling/entropy.py +391 -0
- truthound/validators/profiling/frequency.py +455 -0
- truthound/validators/pushdown_support.py +660 -0
- truthound/validators/query/__init__.py +91 -0
- truthound/validators/query/aggregate.py +346 -0
- truthound/validators/query/base.py +246 -0
- truthound/validators/query/column.py +249 -0
- truthound/validators/query/expression.py +274 -0
- truthound/validators/query/result.py +323 -0
- truthound/validators/query/row_count.py +264 -0
- truthound/validators/referential/__init__.py +80 -0
- truthound/validators/referential/base.py +395 -0
- truthound/validators/referential/cascade.py +391 -0
- truthound/validators/referential/circular.py +563 -0
- truthound/validators/referential/foreign_key.py +624 -0
- truthound/validators/referential/orphan.py +485 -0
- truthound/validators/registry.py +112 -0
- truthound/validators/schema/__init__.py +41 -0
- truthound/validators/schema/column_count.py +142 -0
- truthound/validators/schema/column_exists.py +80 -0
- truthound/validators/schema/column_order.py +82 -0
- truthound/validators/schema/column_pair.py +85 -0
- truthound/validators/schema/column_pair_set.py +195 -0
- truthound/validators/schema/column_type.py +94 -0
- truthound/validators/schema/multi_column.py +53 -0
- truthound/validators/schema/multi_column_aggregate.py +175 -0
- truthound/validators/schema/referential.py +274 -0
- truthound/validators/schema/table_schema.py +91 -0
- truthound/validators/schema_validator.py +219 -0
- truthound/validators/sdk/__init__.py +250 -0
- truthound/validators/sdk/builder.py +680 -0
- truthound/validators/sdk/decorators.py +474 -0
- truthound/validators/sdk/enterprise/__init__.py +211 -0
- truthound/validators/sdk/enterprise/docs.py +725 -0
- truthound/validators/sdk/enterprise/fuzzing.py +659 -0
- truthound/validators/sdk/enterprise/licensing.py +709 -0
- truthound/validators/sdk/enterprise/manager.py +543 -0
- truthound/validators/sdk/enterprise/resources.py +628 -0
- truthound/validators/sdk/enterprise/sandbox.py +766 -0
- truthound/validators/sdk/enterprise/signing.py +603 -0
- truthound/validators/sdk/enterprise/templates.py +865 -0
- truthound/validators/sdk/enterprise/versioning.py +659 -0
- truthound/validators/sdk/templates.py +757 -0
- truthound/validators/sdk/testing.py +807 -0
- truthound/validators/security/__init__.py +181 -0
- truthound/validators/security/redos/__init__.py +182 -0
- truthound/validators/security/redos/core.py +861 -0
- truthound/validators/security/redos/cpu_monitor.py +593 -0
- truthound/validators/security/redos/cve_database.py +791 -0
- truthound/validators/security/redos/ml/__init__.py +155 -0
- truthound/validators/security/redos/ml/base.py +785 -0
- truthound/validators/security/redos/ml/datasets.py +618 -0
- truthound/validators/security/redos/ml/features.py +359 -0
- truthound/validators/security/redos/ml/models.py +1000 -0
- truthound/validators/security/redos/ml/predictor.py +507 -0
- truthound/validators/security/redos/ml/storage.py +632 -0
- truthound/validators/security/redos/ml/training.py +571 -0
- truthound/validators/security/redos/ml_analyzer.py +937 -0
- truthound/validators/security/redos/optimizer.py +674 -0
- truthound/validators/security/redos/profiler.py +682 -0
- truthound/validators/security/redos/re2_engine.py +709 -0
- truthound/validators/security/redos.py +886 -0
- truthound/validators/security/sql_security.py +1247 -0
- truthound/validators/streaming/__init__.py +126 -0
- truthound/validators/streaming/base.py +292 -0
- truthound/validators/streaming/completeness.py +210 -0
- truthound/validators/streaming/mixin.py +575 -0
- truthound/validators/streaming/range.py +308 -0
- truthound/validators/streaming/sources.py +846 -0
- truthound/validators/string/__init__.py +57 -0
- truthound/validators/string/casing.py +158 -0
- truthound/validators/string/charset.py +96 -0
- truthound/validators/string/format.py +501 -0
- truthound/validators/string/json.py +77 -0
- truthound/validators/string/json_schema.py +184 -0
- truthound/validators/string/length.py +104 -0
- truthound/validators/string/like_pattern.py +237 -0
- truthound/validators/string/regex.py +202 -0
- truthound/validators/string/regex_extended.py +435 -0
- truthound/validators/table/__init__.py +88 -0
- truthound/validators/table/base.py +78 -0
- truthound/validators/table/column_count.py +198 -0
- truthound/validators/table/freshness.py +362 -0
- truthound/validators/table/row_count.py +251 -0
- truthound/validators/table/schema.py +333 -0
- truthound/validators/table/size.py +285 -0
- truthound/validators/timeout/__init__.py +102 -0
- truthound/validators/timeout/advanced/__init__.py +247 -0
- truthound/validators/timeout/advanced/circuit_breaker.py +675 -0
- truthound/validators/timeout/advanced/prediction.py +773 -0
- truthound/validators/timeout/advanced/priority.py +618 -0
- truthound/validators/timeout/advanced/redis_backend.py +770 -0
- truthound/validators/timeout/advanced/retry.py +721 -0
- truthound/validators/timeout/advanced/sampling.py +788 -0
- truthound/validators/timeout/advanced/sla.py +661 -0
- truthound/validators/timeout/advanced/telemetry.py +804 -0
- truthound/validators/timeout/cascade.py +477 -0
- truthound/validators/timeout/deadline.py +657 -0
- truthound/validators/timeout/degradation.py +525 -0
- truthound/validators/timeout/distributed.py +597 -0
- truthound/validators/timeseries/__init__.py +89 -0
- truthound/validators/timeseries/base.py +326 -0
- truthound/validators/timeseries/completeness.py +617 -0
- truthound/validators/timeseries/gap.py +485 -0
- truthound/validators/timeseries/monotonic.py +310 -0
- truthound/validators/timeseries/seasonality.py +422 -0
- truthound/validators/timeseries/trend.py +510 -0
- truthound/validators/uniqueness/__init__.py +59 -0
- truthound/validators/uniqueness/approximate.py +475 -0
- truthound/validators/uniqueness/distinct_values.py +253 -0
- truthound/validators/uniqueness/duplicate.py +118 -0
- truthound/validators/uniqueness/primary_key.py +140 -0
- truthound/validators/uniqueness/unique.py +191 -0
- truthound/validators/uniqueness/within_record.py +599 -0
- truthound/validators/utils.py +756 -0
- truthound-1.0.8.dist-info/METADATA +474 -0
- truthound-1.0.8.dist-info/RECORD +877 -0
- truthound-1.0.8.dist-info/WHEEL +4 -0
- truthound-1.0.8.dist-info/entry_points.txt +2 -0
- truthound-1.0.8.dist-info/licenses/LICENSE +190 -0
|
@@ -0,0 +1,801 @@
|
|
|
1
|
+
"""Base classes and protocols for streaming storage.
|
|
2
|
+
|
|
3
|
+
This module defines the abstract interfaces and protocols that all streaming
|
|
4
|
+
store implementations must follow. Streaming stores enable handling of
|
|
5
|
+
validation results that exceed available memory.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import asyncio
|
|
11
|
+
from abc import ABC, abstractmethod
|
|
12
|
+
from dataclasses import dataclass, field
|
|
13
|
+
from datetime import datetime
|
|
14
|
+
from enum import Enum
|
|
15
|
+
from typing import (
|
|
16
|
+
TYPE_CHECKING,
|
|
17
|
+
Any,
|
|
18
|
+
AsyncIterator,
|
|
19
|
+
Generic,
|
|
20
|
+
Iterator,
|
|
21
|
+
Protocol,
|
|
22
|
+
TypeVar,
|
|
23
|
+
runtime_checkable,
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
if TYPE_CHECKING:
|
|
27
|
+
from truthound.stores.results import ValidationResult, ValidatorResult
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
# =============================================================================
|
|
31
|
+
# Enums
|
|
32
|
+
# =============================================================================
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class StreamingFormat(str, Enum):
|
|
36
|
+
"""Supported streaming formats."""
|
|
37
|
+
|
|
38
|
+
JSONL = "jsonl" # JSON Lines - one JSON object per line
|
|
39
|
+
NDJSON = "ndjson" # Newline Delimited JSON (same as JSONL)
|
|
40
|
+
CSV = "csv" # CSV with header
|
|
41
|
+
PARQUET = "parquet" # Columnar format for analytics
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class CompressionType(str, Enum):
|
|
45
|
+
"""Supported compression types for streaming."""
|
|
46
|
+
|
|
47
|
+
NONE = "none"
|
|
48
|
+
GZIP = "gzip"
|
|
49
|
+
ZSTD = "zstd"
|
|
50
|
+
LZ4 = "lz4"
|
|
51
|
+
SNAPPY = "snappy"
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
class StreamStatus(str, Enum):
|
|
55
|
+
"""Status of a streaming operation."""
|
|
56
|
+
|
|
57
|
+
PENDING = "pending"
|
|
58
|
+
ACTIVE = "active"
|
|
59
|
+
PAUSED = "paused"
|
|
60
|
+
COMPLETED = "completed"
|
|
61
|
+
FAILED = "failed"
|
|
62
|
+
ABORTED = "aborted"
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
# =============================================================================
|
|
66
|
+
# Configuration
|
|
67
|
+
# =============================================================================
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
@dataclass
|
|
71
|
+
class StreamingConfig:
|
|
72
|
+
"""Configuration for streaming storage operations.
|
|
73
|
+
|
|
74
|
+
Attributes:
|
|
75
|
+
format: Output format (jsonl, csv, parquet).
|
|
76
|
+
compression: Compression algorithm to use.
|
|
77
|
+
chunk_size: Number of records per chunk/file.
|
|
78
|
+
buffer_size: In-memory buffer size before flush.
|
|
79
|
+
max_memory_mb: Maximum memory usage in MB.
|
|
80
|
+
flush_interval_seconds: Auto-flush interval.
|
|
81
|
+
enable_checkpoints: Enable periodic checkpoints for recovery.
|
|
82
|
+
checkpoint_interval: Records between checkpoints.
|
|
83
|
+
enable_metrics: Collect streaming metrics.
|
|
84
|
+
max_retries: Maximum retry attempts on failure.
|
|
85
|
+
retry_delay_seconds: Base delay between retries.
|
|
86
|
+
"""
|
|
87
|
+
|
|
88
|
+
format: StreamingFormat = StreamingFormat.JSONL
|
|
89
|
+
compression: CompressionType = CompressionType.NONE
|
|
90
|
+
chunk_size: int = 10000
|
|
91
|
+
buffer_size: int = 1000
|
|
92
|
+
max_memory_mb: int = 512
|
|
93
|
+
flush_interval_seconds: float = 30.0
|
|
94
|
+
enable_checkpoints: bool = True
|
|
95
|
+
checkpoint_interval: int = 10000
|
|
96
|
+
enable_metrics: bool = True
|
|
97
|
+
max_retries: int = 3
|
|
98
|
+
retry_delay_seconds: float = 1.0
|
|
99
|
+
|
|
100
|
+
def validate(self) -> None:
|
|
101
|
+
"""Validate configuration values."""
|
|
102
|
+
if self.chunk_size <= 0:
|
|
103
|
+
raise ValueError("chunk_size must be positive")
|
|
104
|
+
if self.buffer_size <= 0:
|
|
105
|
+
raise ValueError("buffer_size must be positive")
|
|
106
|
+
if self.max_memory_mb <= 0:
|
|
107
|
+
raise ValueError("max_memory_mb must be positive")
|
|
108
|
+
if self.flush_interval_seconds < 0:
|
|
109
|
+
raise ValueError("flush_interval_seconds must be non-negative")
|
|
110
|
+
if self.checkpoint_interval <= 0:
|
|
111
|
+
raise ValueError("checkpoint_interval must be positive")
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
# =============================================================================
|
|
115
|
+
# Metrics and Monitoring
|
|
116
|
+
# =============================================================================
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
@dataclass
|
|
120
|
+
class StreamingMetrics:
|
|
121
|
+
"""Metrics collected during streaming operations.
|
|
122
|
+
|
|
123
|
+
Attributes:
|
|
124
|
+
records_written: Total records written.
|
|
125
|
+
records_read: Total records read.
|
|
126
|
+
bytes_written: Total bytes written (after compression).
|
|
127
|
+
bytes_read: Total bytes read.
|
|
128
|
+
chunks_written: Number of chunks/files written.
|
|
129
|
+
chunks_read: Number of chunks/files read.
|
|
130
|
+
flush_count: Number of buffer flushes.
|
|
131
|
+
retry_count: Number of retry attempts.
|
|
132
|
+
errors: List of errors encountered.
|
|
133
|
+
start_time: When streaming started.
|
|
134
|
+
end_time: When streaming ended.
|
|
135
|
+
peak_memory_mb: Peak memory usage in MB.
|
|
136
|
+
average_throughput: Records per second.
|
|
137
|
+
"""
|
|
138
|
+
|
|
139
|
+
records_written: int = 0
|
|
140
|
+
records_read: int = 0
|
|
141
|
+
bytes_written: int = 0
|
|
142
|
+
bytes_read: int = 0
|
|
143
|
+
chunks_written: int = 0
|
|
144
|
+
chunks_read: int = 0
|
|
145
|
+
flush_count: int = 0
|
|
146
|
+
retry_count: int = 0
|
|
147
|
+
errors: list[str] = field(default_factory=list)
|
|
148
|
+
start_time: datetime | None = None
|
|
149
|
+
end_time: datetime | None = None
|
|
150
|
+
peak_memory_mb: float = 0.0
|
|
151
|
+
average_throughput: float = 0.0
|
|
152
|
+
|
|
153
|
+
def record_write(self, count: int = 1, bytes_count: int = 0) -> None:
|
|
154
|
+
"""Record a write operation."""
|
|
155
|
+
self.records_written += count
|
|
156
|
+
self.bytes_written += bytes_count
|
|
157
|
+
|
|
158
|
+
def record_read(self, count: int = 1, bytes_count: int = 0) -> None:
|
|
159
|
+
"""Record a read operation."""
|
|
160
|
+
self.records_read += count
|
|
161
|
+
self.bytes_read += bytes_count
|
|
162
|
+
|
|
163
|
+
def record_chunk(self, is_write: bool = True) -> None:
|
|
164
|
+
"""Record a chunk operation."""
|
|
165
|
+
if is_write:
|
|
166
|
+
self.chunks_written += 1
|
|
167
|
+
else:
|
|
168
|
+
self.chunks_read += 1
|
|
169
|
+
|
|
170
|
+
def record_error(self, error: str) -> None:
|
|
171
|
+
"""Record an error."""
|
|
172
|
+
self.errors.append(error)
|
|
173
|
+
|
|
174
|
+
def start(self) -> None:
|
|
175
|
+
"""Mark streaming start."""
|
|
176
|
+
self.start_time = datetime.now()
|
|
177
|
+
|
|
178
|
+
def finish(self) -> None:
|
|
179
|
+
"""Mark streaming end and calculate throughput."""
|
|
180
|
+
self.end_time = datetime.now()
|
|
181
|
+
if self.start_time and self.end_time:
|
|
182
|
+
duration = (self.end_time - self.start_time).total_seconds()
|
|
183
|
+
if duration > 0:
|
|
184
|
+
total_records = self.records_written + self.records_read
|
|
185
|
+
self.average_throughput = total_records / duration
|
|
186
|
+
|
|
187
|
+
def to_dict(self) -> dict[str, Any]:
|
|
188
|
+
"""Convert to dictionary for serialization."""
|
|
189
|
+
return {
|
|
190
|
+
"records_written": self.records_written,
|
|
191
|
+
"records_read": self.records_read,
|
|
192
|
+
"bytes_written": self.bytes_written,
|
|
193
|
+
"bytes_read": self.bytes_read,
|
|
194
|
+
"chunks_written": self.chunks_written,
|
|
195
|
+
"chunks_read": self.chunks_read,
|
|
196
|
+
"flush_count": self.flush_count,
|
|
197
|
+
"retry_count": self.retry_count,
|
|
198
|
+
"errors": self.errors,
|
|
199
|
+
"start_time": self.start_time.isoformat() if self.start_time else None,
|
|
200
|
+
"end_time": self.end_time.isoformat() if self.end_time else None,
|
|
201
|
+
"peak_memory_mb": self.peak_memory_mb,
|
|
202
|
+
"average_throughput": self.average_throughput,
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
|
|
206
|
+
# =============================================================================
|
|
207
|
+
# Chunk Management
|
|
208
|
+
# =============================================================================
|
|
209
|
+
|
|
210
|
+
|
|
211
|
+
@dataclass
|
|
212
|
+
class ChunkInfo:
|
|
213
|
+
"""Information about a stored chunk.
|
|
214
|
+
|
|
215
|
+
Attributes:
|
|
216
|
+
chunk_id: Unique identifier for the chunk.
|
|
217
|
+
chunk_index: Sequential index of the chunk.
|
|
218
|
+
record_count: Number of records in the chunk.
|
|
219
|
+
byte_size: Size of the chunk in bytes.
|
|
220
|
+
start_offset: Starting record offset.
|
|
221
|
+
end_offset: Ending record offset.
|
|
222
|
+
checksum: Optional checksum for integrity.
|
|
223
|
+
created_at: When the chunk was created.
|
|
224
|
+
path: Storage path/key for the chunk.
|
|
225
|
+
"""
|
|
226
|
+
|
|
227
|
+
chunk_id: str
|
|
228
|
+
chunk_index: int
|
|
229
|
+
record_count: int
|
|
230
|
+
byte_size: int
|
|
231
|
+
start_offset: int
|
|
232
|
+
end_offset: int
|
|
233
|
+
checksum: str | None = None
|
|
234
|
+
created_at: datetime = field(default_factory=datetime.now)
|
|
235
|
+
path: str = ""
|
|
236
|
+
|
|
237
|
+
def to_dict(self) -> dict[str, Any]:
|
|
238
|
+
"""Convert to dictionary."""
|
|
239
|
+
return {
|
|
240
|
+
"chunk_id": self.chunk_id,
|
|
241
|
+
"chunk_index": self.chunk_index,
|
|
242
|
+
"record_count": self.record_count,
|
|
243
|
+
"byte_size": self.byte_size,
|
|
244
|
+
"start_offset": self.start_offset,
|
|
245
|
+
"end_offset": self.end_offset,
|
|
246
|
+
"checksum": self.checksum,
|
|
247
|
+
"created_at": self.created_at.isoformat(),
|
|
248
|
+
"path": self.path,
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
@classmethod
|
|
252
|
+
def from_dict(cls, data: dict[str, Any]) -> "ChunkInfo":
|
|
253
|
+
"""Create from dictionary."""
|
|
254
|
+
return cls(
|
|
255
|
+
chunk_id=data["chunk_id"],
|
|
256
|
+
chunk_index=data["chunk_index"],
|
|
257
|
+
record_count=data["record_count"],
|
|
258
|
+
byte_size=data["byte_size"],
|
|
259
|
+
start_offset=data["start_offset"],
|
|
260
|
+
end_offset=data["end_offset"],
|
|
261
|
+
checksum=data.get("checksum"),
|
|
262
|
+
created_at=datetime.fromisoformat(data["created_at"]),
|
|
263
|
+
path=data.get("path", ""),
|
|
264
|
+
)
|
|
265
|
+
|
|
266
|
+
|
|
267
|
+
@dataclass
|
|
268
|
+
class StreamSession:
|
|
269
|
+
"""Session information for a streaming operation.
|
|
270
|
+
|
|
271
|
+
Attributes:
|
|
272
|
+
session_id: Unique identifier for the session.
|
|
273
|
+
run_id: Associated validation run ID.
|
|
274
|
+
data_asset: Name of the data asset being validated.
|
|
275
|
+
status: Current status of the stream.
|
|
276
|
+
config: Streaming configuration.
|
|
277
|
+
metrics: Collected metrics.
|
|
278
|
+
chunks: List of written chunks.
|
|
279
|
+
metadata: Additional session metadata.
|
|
280
|
+
started_at: When the session started.
|
|
281
|
+
updated_at: Last update time.
|
|
282
|
+
checkpoint_offset: Last checkpointed offset.
|
|
283
|
+
"""
|
|
284
|
+
|
|
285
|
+
session_id: str
|
|
286
|
+
run_id: str
|
|
287
|
+
data_asset: str
|
|
288
|
+
status: StreamStatus = StreamStatus.PENDING
|
|
289
|
+
config: StreamingConfig = field(default_factory=StreamingConfig)
|
|
290
|
+
metrics: StreamingMetrics = field(default_factory=StreamingMetrics)
|
|
291
|
+
chunks: list[ChunkInfo] = field(default_factory=list)
|
|
292
|
+
metadata: dict[str, Any] = field(default_factory=dict)
|
|
293
|
+
started_at: datetime = field(default_factory=datetime.now)
|
|
294
|
+
updated_at: datetime = field(default_factory=datetime.now)
|
|
295
|
+
checkpoint_offset: int = 0
|
|
296
|
+
|
|
297
|
+
def to_dict(self) -> dict[str, Any]:
|
|
298
|
+
"""Convert to dictionary."""
|
|
299
|
+
return {
|
|
300
|
+
"session_id": self.session_id,
|
|
301
|
+
"run_id": self.run_id,
|
|
302
|
+
"data_asset": self.data_asset,
|
|
303
|
+
"status": self.status.value,
|
|
304
|
+
"metrics": self.metrics.to_dict(),
|
|
305
|
+
"chunks": [c.to_dict() for c in self.chunks],
|
|
306
|
+
"metadata": self.metadata,
|
|
307
|
+
"started_at": self.started_at.isoformat(),
|
|
308
|
+
"updated_at": self.updated_at.isoformat(),
|
|
309
|
+
"checkpoint_offset": self.checkpoint_offset,
|
|
310
|
+
}
|
|
311
|
+
|
|
312
|
+
@classmethod
|
|
313
|
+
def from_dict(cls, data: dict[str, Any]) -> "StreamSession":
|
|
314
|
+
"""Create from dictionary."""
|
|
315
|
+
return cls(
|
|
316
|
+
session_id=data["session_id"],
|
|
317
|
+
run_id=data["run_id"],
|
|
318
|
+
data_asset=data["data_asset"],
|
|
319
|
+
status=StreamStatus(data.get("status", "pending")),
|
|
320
|
+
metrics=StreamingMetrics(**data.get("metrics", {})),
|
|
321
|
+
chunks=[ChunkInfo.from_dict(c) for c in data.get("chunks", [])],
|
|
322
|
+
metadata=data.get("metadata", {}),
|
|
323
|
+
started_at=datetime.fromisoformat(data["started_at"]),
|
|
324
|
+
updated_at=datetime.fromisoformat(data["updated_at"]),
|
|
325
|
+
checkpoint_offset=data.get("checkpoint_offset", 0),
|
|
326
|
+
)
|
|
327
|
+
|
|
328
|
+
|
|
329
|
+
# =============================================================================
|
|
330
|
+
# Protocols
|
|
331
|
+
# =============================================================================
|
|
332
|
+
|
|
333
|
+
|
|
334
|
+
@runtime_checkable
|
|
335
|
+
class StreamingWriter(Protocol):
|
|
336
|
+
"""Protocol for streaming writers."""
|
|
337
|
+
|
|
338
|
+
def write(self, record: dict[str, Any]) -> None:
|
|
339
|
+
"""Write a single record."""
|
|
340
|
+
...
|
|
341
|
+
|
|
342
|
+
def write_batch(self, records: list[dict[str, Any]]) -> None:
|
|
343
|
+
"""Write a batch of records."""
|
|
344
|
+
...
|
|
345
|
+
|
|
346
|
+
def flush(self) -> None:
|
|
347
|
+
"""Flush buffered records to storage."""
|
|
348
|
+
...
|
|
349
|
+
|
|
350
|
+
def close(self) -> None:
|
|
351
|
+
"""Close the writer and finalize."""
|
|
352
|
+
...
|
|
353
|
+
|
|
354
|
+
|
|
355
|
+
@runtime_checkable
|
|
356
|
+
class StreamingReader(Protocol):
|
|
357
|
+
"""Protocol for streaming readers."""
|
|
358
|
+
|
|
359
|
+
def read(self) -> dict[str, Any] | None:
|
|
360
|
+
"""Read a single record."""
|
|
361
|
+
...
|
|
362
|
+
|
|
363
|
+
def read_batch(self, size: int) -> list[dict[str, Any]]:
|
|
364
|
+
"""Read a batch of records."""
|
|
365
|
+
...
|
|
366
|
+
|
|
367
|
+
def __iter__(self) -> Iterator[dict[str, Any]]:
|
|
368
|
+
"""Iterate over records."""
|
|
369
|
+
...
|
|
370
|
+
|
|
371
|
+
def close(self) -> None:
|
|
372
|
+
"""Close the reader."""
|
|
373
|
+
...
|
|
374
|
+
|
|
375
|
+
|
|
376
|
+
@runtime_checkable
|
|
377
|
+
class AsyncStreamingWriter(Protocol):
|
|
378
|
+
"""Protocol for async streaming writers."""
|
|
379
|
+
|
|
380
|
+
async def write(self, record: dict[str, Any]) -> None:
|
|
381
|
+
"""Write a single record asynchronously."""
|
|
382
|
+
...
|
|
383
|
+
|
|
384
|
+
async def write_batch(self, records: list[dict[str, Any]]) -> None:
|
|
385
|
+
"""Write a batch of records asynchronously."""
|
|
386
|
+
...
|
|
387
|
+
|
|
388
|
+
async def flush(self) -> None:
|
|
389
|
+
"""Flush buffered records to storage."""
|
|
390
|
+
...
|
|
391
|
+
|
|
392
|
+
async def close(self) -> None:
|
|
393
|
+
"""Close the writer and finalize."""
|
|
394
|
+
...
|
|
395
|
+
|
|
396
|
+
|
|
397
|
+
@runtime_checkable
|
|
398
|
+
class AsyncStreamingReader(Protocol):
|
|
399
|
+
"""Protocol for async streaming readers."""
|
|
400
|
+
|
|
401
|
+
async def read(self) -> dict[str, Any] | None:
|
|
402
|
+
"""Read a single record asynchronously."""
|
|
403
|
+
...
|
|
404
|
+
|
|
405
|
+
async def read_batch(self, size: int) -> list[dict[str, Any]]:
|
|
406
|
+
"""Read a batch of records asynchronously."""
|
|
407
|
+
...
|
|
408
|
+
|
|
409
|
+
def __aiter__(self) -> AsyncIterator[dict[str, Any]]:
|
|
410
|
+
"""Async iterate over records."""
|
|
411
|
+
...
|
|
412
|
+
|
|
413
|
+
async def close(self) -> None:
|
|
414
|
+
"""Close the reader."""
|
|
415
|
+
...
|
|
416
|
+
|
|
417
|
+
|
|
418
|
+
# =============================================================================
|
|
419
|
+
# Abstract Base Classes
|
|
420
|
+
# =============================================================================
|
|
421
|
+
|
|
422
|
+
|
|
423
|
+
T = TypeVar("T")
|
|
424
|
+
ConfigT = TypeVar("ConfigT", bound=StreamingConfig)
|
|
425
|
+
|
|
426
|
+
|
|
427
|
+
class StreamingStore(ABC, Generic[T, ConfigT]):
|
|
428
|
+
"""Abstract base class for streaming stores.
|
|
429
|
+
|
|
430
|
+
Streaming stores handle large-scale data that cannot fit in memory.
|
|
431
|
+
They support incremental writing and reading through chunked operations.
|
|
432
|
+
|
|
433
|
+
Type Parameters:
|
|
434
|
+
T: The type of objects being stored.
|
|
435
|
+
ConfigT: The configuration type for this store.
|
|
436
|
+
"""
|
|
437
|
+
|
|
438
|
+
def __init__(self, config: ConfigT | None = None) -> None:
|
|
439
|
+
"""Initialize the streaming store.
|
|
440
|
+
|
|
441
|
+
Args:
|
|
442
|
+
config: Streaming configuration.
|
|
443
|
+
"""
|
|
444
|
+
self._config = config or self._default_config()
|
|
445
|
+
self._config.validate()
|
|
446
|
+
self._initialized = False
|
|
447
|
+
self._active_sessions: dict[str, StreamSession] = {}
|
|
448
|
+
|
|
449
|
+
@classmethod
|
|
450
|
+
@abstractmethod
|
|
451
|
+
def _default_config(cls) -> ConfigT:
|
|
452
|
+
"""Create default configuration."""
|
|
453
|
+
pass
|
|
454
|
+
|
|
455
|
+
@property
|
|
456
|
+
def config(self) -> ConfigT:
|
|
457
|
+
"""Get the store configuration."""
|
|
458
|
+
return self._config
|
|
459
|
+
|
|
460
|
+
# -------------------------------------------------------------------------
|
|
461
|
+
# Lifecycle
|
|
462
|
+
# -------------------------------------------------------------------------
|
|
463
|
+
|
|
464
|
+
def initialize(self) -> None:
|
|
465
|
+
"""Initialize the store."""
|
|
466
|
+
if not self._initialized:
|
|
467
|
+
self._do_initialize()
|
|
468
|
+
self._initialized = True
|
|
469
|
+
|
|
470
|
+
@abstractmethod
|
|
471
|
+
def _do_initialize(self) -> None:
|
|
472
|
+
"""Perform actual initialization."""
|
|
473
|
+
pass
|
|
474
|
+
|
|
475
|
+
def close(self) -> None:
|
|
476
|
+
"""Close the store and all active sessions."""
|
|
477
|
+
for session in list(self._active_sessions.values()):
|
|
478
|
+
self._close_session(session)
|
|
479
|
+
self._active_sessions.clear()
|
|
480
|
+
|
|
481
|
+
def __enter__(self) -> "StreamingStore[T, ConfigT]":
|
|
482
|
+
"""Context manager entry."""
|
|
483
|
+
self.initialize()
|
|
484
|
+
return self
|
|
485
|
+
|
|
486
|
+
def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
|
|
487
|
+
"""Context manager exit."""
|
|
488
|
+
self.close()
|
|
489
|
+
|
|
490
|
+
# -------------------------------------------------------------------------
|
|
491
|
+
# Session Management
|
|
492
|
+
# -------------------------------------------------------------------------
|
|
493
|
+
|
|
494
|
+
@abstractmethod
|
|
495
|
+
def create_session(
|
|
496
|
+
self,
|
|
497
|
+
run_id: str,
|
|
498
|
+
data_asset: str,
|
|
499
|
+
metadata: dict[str, Any] | None = None,
|
|
500
|
+
) -> StreamSession:
|
|
501
|
+
"""Create a new streaming session.
|
|
502
|
+
|
|
503
|
+
Args:
|
|
504
|
+
run_id: Validation run identifier.
|
|
505
|
+
data_asset: Name of the data asset.
|
|
506
|
+
metadata: Optional session metadata.
|
|
507
|
+
|
|
508
|
+
Returns:
|
|
509
|
+
A new streaming session.
|
|
510
|
+
"""
|
|
511
|
+
pass
|
|
512
|
+
|
|
513
|
+
@abstractmethod
|
|
514
|
+
def get_session(self, session_id: str) -> StreamSession | None:
|
|
515
|
+
"""Get an existing session.
|
|
516
|
+
|
|
517
|
+
Args:
|
|
518
|
+
session_id: Session identifier.
|
|
519
|
+
|
|
520
|
+
Returns:
|
|
521
|
+
The session if found, None otherwise.
|
|
522
|
+
"""
|
|
523
|
+
pass
|
|
524
|
+
|
|
525
|
+
@abstractmethod
|
|
526
|
+
def resume_session(self, session_id: str) -> StreamSession:
|
|
527
|
+
"""Resume an interrupted session.
|
|
528
|
+
|
|
529
|
+
Args:
|
|
530
|
+
session_id: Session identifier.
|
|
531
|
+
|
|
532
|
+
Returns:
|
|
533
|
+
The resumed session.
|
|
534
|
+
|
|
535
|
+
Raises:
|
|
536
|
+
ValueError: If session cannot be resumed.
|
|
537
|
+
"""
|
|
538
|
+
pass
|
|
539
|
+
|
|
540
|
+
@abstractmethod
|
|
541
|
+
def _close_session(self, session: StreamSession) -> None:
|
|
542
|
+
"""Close and finalize a session."""
|
|
543
|
+
pass
|
|
544
|
+
|
|
545
|
+
# -------------------------------------------------------------------------
|
|
546
|
+
# Streaming Write Operations
|
|
547
|
+
# -------------------------------------------------------------------------
|
|
548
|
+
|
|
549
|
+
@abstractmethod
|
|
550
|
+
def create_writer(self, session: StreamSession) -> StreamingWriter:
|
|
551
|
+
"""Create a writer for the session.
|
|
552
|
+
|
|
553
|
+
Args:
|
|
554
|
+
session: The streaming session.
|
|
555
|
+
|
|
556
|
+
Returns:
|
|
557
|
+
A streaming writer instance.
|
|
558
|
+
"""
|
|
559
|
+
pass
|
|
560
|
+
|
|
561
|
+
@abstractmethod
|
|
562
|
+
async def create_async_writer(
|
|
563
|
+
self, session: StreamSession
|
|
564
|
+
) -> AsyncStreamingWriter:
|
|
565
|
+
"""Create an async writer for the session.
|
|
566
|
+
|
|
567
|
+
Args:
|
|
568
|
+
session: The streaming session.
|
|
569
|
+
|
|
570
|
+
Returns:
|
|
571
|
+
An async streaming writer instance.
|
|
572
|
+
"""
|
|
573
|
+
pass
|
|
574
|
+
|
|
575
|
+
# -------------------------------------------------------------------------
|
|
576
|
+
# Streaming Read Operations
|
|
577
|
+
# -------------------------------------------------------------------------
|
|
578
|
+
|
|
579
|
+
@abstractmethod
|
|
580
|
+
def create_reader(self, run_id: str) -> StreamingReader:
|
|
581
|
+
"""Create a reader for a run's results.
|
|
582
|
+
|
|
583
|
+
Args:
|
|
584
|
+
run_id: The run ID to read.
|
|
585
|
+
|
|
586
|
+
Returns:
|
|
587
|
+
A streaming reader instance.
|
|
588
|
+
"""
|
|
589
|
+
pass
|
|
590
|
+
|
|
591
|
+
@abstractmethod
|
|
592
|
+
async def create_async_reader(self, run_id: str) -> AsyncStreamingReader:
|
|
593
|
+
"""Create an async reader for a run's results.
|
|
594
|
+
|
|
595
|
+
Args:
|
|
596
|
+
run_id: The run ID to read.
|
|
597
|
+
|
|
598
|
+
Returns:
|
|
599
|
+
An async streaming reader instance.
|
|
600
|
+
"""
|
|
601
|
+
pass
|
|
602
|
+
|
|
603
|
+
@abstractmethod
|
|
604
|
+
def iter_results(
|
|
605
|
+
self,
|
|
606
|
+
run_id: str,
|
|
607
|
+
batch_size: int = 1000,
|
|
608
|
+
) -> Iterator[T]:
|
|
609
|
+
"""Iterate over results for a run.
|
|
610
|
+
|
|
611
|
+
Args:
|
|
612
|
+
run_id: The run ID to iterate.
|
|
613
|
+
batch_size: Number of records per batch.
|
|
614
|
+
|
|
615
|
+
Yields:
|
|
616
|
+
Individual result records.
|
|
617
|
+
"""
|
|
618
|
+
pass
|
|
619
|
+
|
|
620
|
+
@abstractmethod
|
|
621
|
+
async def aiter_results(
|
|
622
|
+
self,
|
|
623
|
+
run_id: str,
|
|
624
|
+
batch_size: int = 1000,
|
|
625
|
+
) -> AsyncIterator[T]:
|
|
626
|
+
"""Async iterate over results for a run.
|
|
627
|
+
|
|
628
|
+
Args:
|
|
629
|
+
run_id: The run ID to iterate.
|
|
630
|
+
batch_size: Number of records per batch.
|
|
631
|
+
|
|
632
|
+
Yields:
|
|
633
|
+
Individual result records.
|
|
634
|
+
"""
|
|
635
|
+
pass
|
|
636
|
+
|
|
637
|
+
# -------------------------------------------------------------------------
|
|
638
|
+
# Chunk Management
|
|
639
|
+
# -------------------------------------------------------------------------
|
|
640
|
+
|
|
641
|
+
@abstractmethod
|
|
642
|
+
def list_chunks(self, run_id: str) -> list[ChunkInfo]:
|
|
643
|
+
"""List all chunks for a run.
|
|
644
|
+
|
|
645
|
+
Args:
|
|
646
|
+
run_id: The run ID.
|
|
647
|
+
|
|
648
|
+
Returns:
|
|
649
|
+
List of chunk information.
|
|
650
|
+
"""
|
|
651
|
+
pass
|
|
652
|
+
|
|
653
|
+
@abstractmethod
|
|
654
|
+
def get_chunk(self, chunk_info: ChunkInfo) -> list[T]:
|
|
655
|
+
"""Get records from a specific chunk.
|
|
656
|
+
|
|
657
|
+
Args:
|
|
658
|
+
chunk_info: The chunk to retrieve.
|
|
659
|
+
|
|
660
|
+
Returns:
|
|
661
|
+
Records from the chunk.
|
|
662
|
+
"""
|
|
663
|
+
pass
|
|
664
|
+
|
|
665
|
+
@abstractmethod
|
|
666
|
+
def delete_chunks(self, run_id: str) -> int:
|
|
667
|
+
"""Delete all chunks for a run.
|
|
668
|
+
|
|
669
|
+
Args:
|
|
670
|
+
run_id: The run ID.
|
|
671
|
+
|
|
672
|
+
Returns:
|
|
673
|
+
Number of chunks deleted.
|
|
674
|
+
"""
|
|
675
|
+
pass
|
|
676
|
+
|
|
677
|
+
|
|
678
|
+
class StreamingValidationStore(StreamingStore["ValidatorResult", ConfigT], Generic[ConfigT]):
|
|
679
|
+
"""Streaming store specialized for validation results.
|
|
680
|
+
|
|
681
|
+
Provides additional methods specific to validation result streaming,
|
|
682
|
+
including aggregation and statistics computation.
|
|
683
|
+
"""
|
|
684
|
+
|
|
685
|
+
@abstractmethod
|
|
686
|
+
def stream_write_result(
|
|
687
|
+
self,
|
|
688
|
+
session: StreamSession,
|
|
689
|
+
result: "ValidatorResult",
|
|
690
|
+
) -> None:
|
|
691
|
+
"""Write a single validator result to the stream.
|
|
692
|
+
|
|
693
|
+
Args:
|
|
694
|
+
session: The streaming session.
|
|
695
|
+
result: The validator result to write.
|
|
696
|
+
"""
|
|
697
|
+
pass
|
|
698
|
+
|
|
699
|
+
@abstractmethod
|
|
700
|
+
def stream_write_batch(
|
|
701
|
+
self,
|
|
702
|
+
session: StreamSession,
|
|
703
|
+
results: list["ValidatorResult"],
|
|
704
|
+
) -> None:
|
|
705
|
+
"""Write a batch of validator results to the stream.
|
|
706
|
+
|
|
707
|
+
Args:
|
|
708
|
+
session: The streaming session.
|
|
709
|
+
results: The validator results to write.
|
|
710
|
+
"""
|
|
711
|
+
pass
|
|
712
|
+
|
|
713
|
+
@abstractmethod
|
|
714
|
+
def finalize_result(
|
|
715
|
+
self,
|
|
716
|
+
session: StreamSession,
|
|
717
|
+
additional_metadata: dict[str, Any] | None = None,
|
|
718
|
+
) -> "ValidationResult":
|
|
719
|
+
"""Finalize the streaming session and create a ValidationResult.
|
|
720
|
+
|
|
721
|
+
This aggregates all streamed results into a single ValidationResult
|
|
722
|
+
with computed statistics.
|
|
723
|
+
|
|
724
|
+
Args:
|
|
725
|
+
session: The streaming session.
|
|
726
|
+
additional_metadata: Optional additional metadata.
|
|
727
|
+
|
|
728
|
+
Returns:
|
|
729
|
+
The complete ValidationResult.
|
|
730
|
+
"""
|
|
731
|
+
pass
|
|
732
|
+
|
|
733
|
+
@abstractmethod
|
|
734
|
+
def get_streaming_stats(self, run_id: str) -> dict[str, Any]:
|
|
735
|
+
"""Get statistics about a streaming run.
|
|
736
|
+
|
|
737
|
+
Args:
|
|
738
|
+
run_id: The run ID.
|
|
739
|
+
|
|
740
|
+
Returns:
|
|
741
|
+
Statistics dictionary including record counts, errors, timing.
|
|
742
|
+
"""
|
|
743
|
+
pass
|
|
744
|
+
|
|
745
|
+
def iter_failed_results(
|
|
746
|
+
self,
|
|
747
|
+
run_id: str,
|
|
748
|
+
batch_size: int = 1000,
|
|
749
|
+
) -> Iterator["ValidatorResult"]:
|
|
750
|
+
"""Iterate over failed results only.
|
|
751
|
+
|
|
752
|
+
Args:
|
|
753
|
+
run_id: The run ID.
|
|
754
|
+
batch_size: Number of records per batch.
|
|
755
|
+
|
|
756
|
+
Yields:
|
|
757
|
+
Failed validator results.
|
|
758
|
+
"""
|
|
759
|
+
for result in self.iter_results(run_id, batch_size):
|
|
760
|
+
if not result.success:
|
|
761
|
+
yield result
|
|
762
|
+
|
|
763
|
+
def iter_results_by_column(
|
|
764
|
+
self,
|
|
765
|
+
run_id: str,
|
|
766
|
+
column: str,
|
|
767
|
+
batch_size: int = 1000,
|
|
768
|
+
) -> Iterator["ValidatorResult"]:
|
|
769
|
+
"""Iterate over results for a specific column.
|
|
770
|
+
|
|
771
|
+
Args:
|
|
772
|
+
run_id: The run ID.
|
|
773
|
+
column: Column name to filter by.
|
|
774
|
+
batch_size: Number of records per batch.
|
|
775
|
+
|
|
776
|
+
Yields:
|
|
777
|
+
Validator results for the specified column.
|
|
778
|
+
"""
|
|
779
|
+
for result in self.iter_results(run_id, batch_size):
|
|
780
|
+
if result.column == column:
|
|
781
|
+
yield result
|
|
782
|
+
|
|
783
|
+
def iter_results_by_severity(
|
|
784
|
+
self,
|
|
785
|
+
run_id: str,
|
|
786
|
+
severity: str,
|
|
787
|
+
batch_size: int = 1000,
|
|
788
|
+
) -> Iterator["ValidatorResult"]:
|
|
789
|
+
"""Iterate over results with a specific severity.
|
|
790
|
+
|
|
791
|
+
Args:
|
|
792
|
+
run_id: The run ID.
|
|
793
|
+
severity: Severity level to filter by.
|
|
794
|
+
batch_size: Number of records per batch.
|
|
795
|
+
|
|
796
|
+
Yields:
|
|
797
|
+
Validator results with the specified severity.
|
|
798
|
+
"""
|
|
799
|
+
for result in self.iter_results(run_id, batch_size):
|
|
800
|
+
if result.severity == severity:
|
|
801
|
+
yield result
|