truthound 1.0.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- truthound/__init__.py +162 -0
- truthound/adapters.py +100 -0
- truthound/api.py +365 -0
- truthound/audit/__init__.py +248 -0
- truthound/audit/core.py +967 -0
- truthound/audit/filters.py +620 -0
- truthound/audit/formatters.py +707 -0
- truthound/audit/logger.py +902 -0
- truthound/audit/middleware.py +571 -0
- truthound/audit/storage.py +1083 -0
- truthound/benchmark/__init__.py +123 -0
- truthound/benchmark/base.py +757 -0
- truthound/benchmark/comparison.py +635 -0
- truthound/benchmark/generators.py +706 -0
- truthound/benchmark/reporters.py +718 -0
- truthound/benchmark/runner.py +635 -0
- truthound/benchmark/scenarios.py +712 -0
- truthound/cache.py +252 -0
- truthound/checkpoint/__init__.py +136 -0
- truthound/checkpoint/actions/__init__.py +164 -0
- truthound/checkpoint/actions/base.py +324 -0
- truthound/checkpoint/actions/custom.py +234 -0
- truthound/checkpoint/actions/discord_notify.py +290 -0
- truthound/checkpoint/actions/email_notify.py +405 -0
- truthound/checkpoint/actions/github_action.py +406 -0
- truthound/checkpoint/actions/opsgenie.py +1499 -0
- truthound/checkpoint/actions/pagerduty.py +226 -0
- truthound/checkpoint/actions/slack_notify.py +233 -0
- truthound/checkpoint/actions/store_result.py +249 -0
- truthound/checkpoint/actions/teams_notify.py +1570 -0
- truthound/checkpoint/actions/telegram_notify.py +419 -0
- truthound/checkpoint/actions/update_docs.py +552 -0
- truthound/checkpoint/actions/webhook.py +293 -0
- truthound/checkpoint/analytics/__init__.py +147 -0
- truthound/checkpoint/analytics/aggregations/__init__.py +23 -0
- truthound/checkpoint/analytics/aggregations/rollup.py +481 -0
- truthound/checkpoint/analytics/aggregations/time_bucket.py +306 -0
- truthound/checkpoint/analytics/analyzers/__init__.py +17 -0
- truthound/checkpoint/analytics/analyzers/anomaly.py +386 -0
- truthound/checkpoint/analytics/analyzers/base.py +270 -0
- truthound/checkpoint/analytics/analyzers/forecast.py +421 -0
- truthound/checkpoint/analytics/analyzers/trend.py +314 -0
- truthound/checkpoint/analytics/models.py +292 -0
- truthound/checkpoint/analytics/protocols.py +549 -0
- truthound/checkpoint/analytics/service.py +718 -0
- truthound/checkpoint/analytics/stores/__init__.py +16 -0
- truthound/checkpoint/analytics/stores/base.py +306 -0
- truthound/checkpoint/analytics/stores/memory_store.py +353 -0
- truthound/checkpoint/analytics/stores/sqlite_store.py +557 -0
- truthound/checkpoint/analytics/stores/timescale_store.py +501 -0
- truthound/checkpoint/async_actions.py +794 -0
- truthound/checkpoint/async_base.py +708 -0
- truthound/checkpoint/async_checkpoint.py +617 -0
- truthound/checkpoint/async_runner.py +639 -0
- truthound/checkpoint/checkpoint.py +527 -0
- truthound/checkpoint/ci/__init__.py +61 -0
- truthound/checkpoint/ci/detector.py +355 -0
- truthound/checkpoint/ci/reporter.py +436 -0
- truthound/checkpoint/ci/templates.py +454 -0
- truthound/checkpoint/circuitbreaker/__init__.py +133 -0
- truthound/checkpoint/circuitbreaker/breaker.py +542 -0
- truthound/checkpoint/circuitbreaker/core.py +252 -0
- truthound/checkpoint/circuitbreaker/detection.py +459 -0
- truthound/checkpoint/circuitbreaker/middleware.py +389 -0
- truthound/checkpoint/circuitbreaker/registry.py +357 -0
- truthound/checkpoint/distributed/__init__.py +139 -0
- truthound/checkpoint/distributed/backends/__init__.py +35 -0
- truthound/checkpoint/distributed/backends/celery_backend.py +503 -0
- truthound/checkpoint/distributed/backends/kubernetes_backend.py +696 -0
- truthound/checkpoint/distributed/backends/local_backend.py +397 -0
- truthound/checkpoint/distributed/backends/ray_backend.py +625 -0
- truthound/checkpoint/distributed/base.py +774 -0
- truthound/checkpoint/distributed/orchestrator.py +765 -0
- truthound/checkpoint/distributed/protocols.py +842 -0
- truthound/checkpoint/distributed/registry.py +449 -0
- truthound/checkpoint/idempotency/__init__.py +120 -0
- truthound/checkpoint/idempotency/core.py +295 -0
- truthound/checkpoint/idempotency/fingerprint.py +454 -0
- truthound/checkpoint/idempotency/locking.py +604 -0
- truthound/checkpoint/idempotency/service.py +592 -0
- truthound/checkpoint/idempotency/stores.py +653 -0
- truthound/checkpoint/monitoring/__init__.py +134 -0
- truthound/checkpoint/monitoring/aggregators/__init__.py +15 -0
- truthound/checkpoint/monitoring/aggregators/base.py +372 -0
- truthound/checkpoint/monitoring/aggregators/realtime.py +300 -0
- truthound/checkpoint/monitoring/aggregators/window.py +493 -0
- truthound/checkpoint/monitoring/collectors/__init__.py +17 -0
- truthound/checkpoint/monitoring/collectors/base.py +257 -0
- truthound/checkpoint/monitoring/collectors/memory_collector.py +617 -0
- truthound/checkpoint/monitoring/collectors/prometheus_collector.py +451 -0
- truthound/checkpoint/monitoring/collectors/redis_collector.py +518 -0
- truthound/checkpoint/monitoring/events.py +410 -0
- truthound/checkpoint/monitoring/protocols.py +636 -0
- truthound/checkpoint/monitoring/service.py +578 -0
- truthound/checkpoint/monitoring/views/__init__.py +17 -0
- truthound/checkpoint/monitoring/views/base.py +172 -0
- truthound/checkpoint/monitoring/views/queue_view.py +220 -0
- truthound/checkpoint/monitoring/views/task_view.py +240 -0
- truthound/checkpoint/monitoring/views/worker_view.py +263 -0
- truthound/checkpoint/registry.py +337 -0
- truthound/checkpoint/runner.py +356 -0
- truthound/checkpoint/transaction/__init__.py +133 -0
- truthound/checkpoint/transaction/base.py +389 -0
- truthound/checkpoint/transaction/compensatable.py +537 -0
- truthound/checkpoint/transaction/coordinator.py +576 -0
- truthound/checkpoint/transaction/executor.py +622 -0
- truthound/checkpoint/transaction/idempotency.py +534 -0
- truthound/checkpoint/transaction/saga/__init__.py +143 -0
- truthound/checkpoint/transaction/saga/builder.py +584 -0
- truthound/checkpoint/transaction/saga/definition.py +515 -0
- truthound/checkpoint/transaction/saga/event_store.py +542 -0
- truthound/checkpoint/transaction/saga/patterns.py +833 -0
- truthound/checkpoint/transaction/saga/runner.py +718 -0
- truthound/checkpoint/transaction/saga/state_machine.py +793 -0
- truthound/checkpoint/transaction/saga/strategies.py +780 -0
- truthound/checkpoint/transaction/saga/testing.py +886 -0
- truthound/checkpoint/triggers/__init__.py +58 -0
- truthound/checkpoint/triggers/base.py +237 -0
- truthound/checkpoint/triggers/event.py +385 -0
- truthound/checkpoint/triggers/schedule.py +355 -0
- truthound/cli.py +2358 -0
- truthound/cli_modules/__init__.py +124 -0
- truthound/cli_modules/advanced/__init__.py +45 -0
- truthound/cli_modules/advanced/benchmark.py +343 -0
- truthound/cli_modules/advanced/docs.py +225 -0
- truthound/cli_modules/advanced/lineage.py +209 -0
- truthound/cli_modules/advanced/ml.py +320 -0
- truthound/cli_modules/advanced/realtime.py +196 -0
- truthound/cli_modules/checkpoint/__init__.py +46 -0
- truthound/cli_modules/checkpoint/init.py +114 -0
- truthound/cli_modules/checkpoint/list.py +71 -0
- truthound/cli_modules/checkpoint/run.py +159 -0
- truthound/cli_modules/checkpoint/validate.py +67 -0
- truthound/cli_modules/common/__init__.py +71 -0
- truthound/cli_modules/common/errors.py +414 -0
- truthound/cli_modules/common/options.py +419 -0
- truthound/cli_modules/common/output.py +507 -0
- truthound/cli_modules/common/protocol.py +552 -0
- truthound/cli_modules/core/__init__.py +48 -0
- truthound/cli_modules/core/check.py +123 -0
- truthound/cli_modules/core/compare.py +104 -0
- truthound/cli_modules/core/learn.py +57 -0
- truthound/cli_modules/core/mask.py +77 -0
- truthound/cli_modules/core/profile.py +65 -0
- truthound/cli_modules/core/scan.py +61 -0
- truthound/cli_modules/profiler/__init__.py +51 -0
- truthound/cli_modules/profiler/auto_profile.py +175 -0
- truthound/cli_modules/profiler/metadata.py +107 -0
- truthound/cli_modules/profiler/suite.py +283 -0
- truthound/cli_modules/registry.py +431 -0
- truthound/cli_modules/scaffolding/__init__.py +89 -0
- truthound/cli_modules/scaffolding/base.py +631 -0
- truthound/cli_modules/scaffolding/commands.py +545 -0
- truthound/cli_modules/scaffolding/plugins.py +1072 -0
- truthound/cli_modules/scaffolding/reporters.py +594 -0
- truthound/cli_modules/scaffolding/validators.py +1127 -0
- truthound/common/__init__.py +18 -0
- truthound/common/resilience/__init__.py +130 -0
- truthound/common/resilience/bulkhead.py +266 -0
- truthound/common/resilience/circuit_breaker.py +516 -0
- truthound/common/resilience/composite.py +332 -0
- truthound/common/resilience/config.py +292 -0
- truthound/common/resilience/protocols.py +217 -0
- truthound/common/resilience/rate_limiter.py +404 -0
- truthound/common/resilience/retry.py +341 -0
- truthound/datadocs/__init__.py +260 -0
- truthound/datadocs/base.py +571 -0
- truthound/datadocs/builder.py +761 -0
- truthound/datadocs/charts.py +764 -0
- truthound/datadocs/dashboard/__init__.py +63 -0
- truthound/datadocs/dashboard/app.py +576 -0
- truthound/datadocs/dashboard/components.py +584 -0
- truthound/datadocs/dashboard/state.py +240 -0
- truthound/datadocs/engine/__init__.py +46 -0
- truthound/datadocs/engine/context.py +376 -0
- truthound/datadocs/engine/pipeline.py +618 -0
- truthound/datadocs/engine/registry.py +469 -0
- truthound/datadocs/exporters/__init__.py +49 -0
- truthound/datadocs/exporters/base.py +198 -0
- truthound/datadocs/exporters/html.py +178 -0
- truthound/datadocs/exporters/json_exporter.py +253 -0
- truthound/datadocs/exporters/markdown.py +284 -0
- truthound/datadocs/exporters/pdf.py +392 -0
- truthound/datadocs/i18n/__init__.py +86 -0
- truthound/datadocs/i18n/catalog.py +960 -0
- truthound/datadocs/i18n/formatting.py +505 -0
- truthound/datadocs/i18n/loader.py +256 -0
- truthound/datadocs/i18n/plurals.py +378 -0
- truthound/datadocs/renderers/__init__.py +42 -0
- truthound/datadocs/renderers/base.py +401 -0
- truthound/datadocs/renderers/custom.py +342 -0
- truthound/datadocs/renderers/jinja.py +697 -0
- truthound/datadocs/sections.py +736 -0
- truthound/datadocs/styles.py +931 -0
- truthound/datadocs/themes/__init__.py +101 -0
- truthound/datadocs/themes/base.py +336 -0
- truthound/datadocs/themes/default.py +417 -0
- truthound/datadocs/themes/enterprise.py +419 -0
- truthound/datadocs/themes/loader.py +336 -0
- truthound/datadocs/themes.py +301 -0
- truthound/datadocs/transformers/__init__.py +57 -0
- truthound/datadocs/transformers/base.py +268 -0
- truthound/datadocs/transformers/enrichers.py +544 -0
- truthound/datadocs/transformers/filters.py +447 -0
- truthound/datadocs/transformers/i18n.py +468 -0
- truthound/datadocs/versioning/__init__.py +62 -0
- truthound/datadocs/versioning/diff.py +639 -0
- truthound/datadocs/versioning/storage.py +497 -0
- truthound/datadocs/versioning/version.py +358 -0
- truthound/datasources/__init__.py +223 -0
- truthound/datasources/_async_protocols.py +222 -0
- truthound/datasources/_protocols.py +159 -0
- truthound/datasources/adapters.py +428 -0
- truthound/datasources/async_base.py +599 -0
- truthound/datasources/async_factory.py +511 -0
- truthound/datasources/base.py +516 -0
- truthound/datasources/factory.py +433 -0
- truthound/datasources/nosql/__init__.py +47 -0
- truthound/datasources/nosql/base.py +487 -0
- truthound/datasources/nosql/elasticsearch.py +801 -0
- truthound/datasources/nosql/mongodb.py +636 -0
- truthound/datasources/pandas_optimized.py +582 -0
- truthound/datasources/pandas_source.py +216 -0
- truthound/datasources/polars_source.py +395 -0
- truthound/datasources/spark_source.py +479 -0
- truthound/datasources/sql/__init__.py +154 -0
- truthound/datasources/sql/base.py +710 -0
- truthound/datasources/sql/bigquery.py +410 -0
- truthound/datasources/sql/cloud_base.py +199 -0
- truthound/datasources/sql/databricks.py +471 -0
- truthound/datasources/sql/mysql.py +316 -0
- truthound/datasources/sql/oracle.py +427 -0
- truthound/datasources/sql/postgresql.py +321 -0
- truthound/datasources/sql/redshift.py +479 -0
- truthound/datasources/sql/snowflake.py +439 -0
- truthound/datasources/sql/sqlite.py +286 -0
- truthound/datasources/sql/sqlserver.py +437 -0
- truthound/datasources/streaming/__init__.py +47 -0
- truthound/datasources/streaming/base.py +350 -0
- truthound/datasources/streaming/kafka.py +670 -0
- truthound/decorators.py +98 -0
- truthound/docs/__init__.py +69 -0
- truthound/docs/extractor.py +971 -0
- truthound/docs/generator.py +601 -0
- truthound/docs/parser.py +1037 -0
- truthound/docs/renderer.py +999 -0
- truthound/drift/__init__.py +22 -0
- truthound/drift/compare.py +189 -0
- truthound/drift/detectors.py +464 -0
- truthound/drift/report.py +160 -0
- truthound/execution/__init__.py +65 -0
- truthound/execution/_protocols.py +324 -0
- truthound/execution/base.py +576 -0
- truthound/execution/distributed/__init__.py +179 -0
- truthound/execution/distributed/aggregations.py +731 -0
- truthound/execution/distributed/arrow_bridge.py +817 -0
- truthound/execution/distributed/base.py +550 -0
- truthound/execution/distributed/dask_engine.py +976 -0
- truthound/execution/distributed/mixins.py +766 -0
- truthound/execution/distributed/protocols.py +756 -0
- truthound/execution/distributed/ray_engine.py +1127 -0
- truthound/execution/distributed/registry.py +446 -0
- truthound/execution/distributed/spark_engine.py +1011 -0
- truthound/execution/distributed/validator_adapter.py +682 -0
- truthound/execution/pandas_engine.py +401 -0
- truthound/execution/polars_engine.py +497 -0
- truthound/execution/pushdown/__init__.py +230 -0
- truthound/execution/pushdown/ast.py +1550 -0
- truthound/execution/pushdown/builder.py +1550 -0
- truthound/execution/pushdown/dialects.py +1072 -0
- truthound/execution/pushdown/executor.py +829 -0
- truthound/execution/pushdown/optimizer.py +1041 -0
- truthound/execution/sql_engine.py +518 -0
- truthound/infrastructure/__init__.py +189 -0
- truthound/infrastructure/audit.py +1515 -0
- truthound/infrastructure/config.py +1133 -0
- truthound/infrastructure/encryption.py +1132 -0
- truthound/infrastructure/logging.py +1503 -0
- truthound/infrastructure/metrics.py +1220 -0
- truthound/lineage/__init__.py +89 -0
- truthound/lineage/base.py +746 -0
- truthound/lineage/impact_analysis.py +474 -0
- truthound/lineage/integrations/__init__.py +22 -0
- truthound/lineage/integrations/openlineage.py +548 -0
- truthound/lineage/tracker.py +512 -0
- truthound/lineage/visualization/__init__.py +33 -0
- truthound/lineage/visualization/protocols.py +145 -0
- truthound/lineage/visualization/renderers/__init__.py +20 -0
- truthound/lineage/visualization/renderers/cytoscape.py +329 -0
- truthound/lineage/visualization/renderers/d3.py +331 -0
- truthound/lineage/visualization/renderers/graphviz.py +276 -0
- truthound/lineage/visualization/renderers/mermaid.py +308 -0
- truthound/maskers.py +113 -0
- truthound/ml/__init__.py +124 -0
- truthound/ml/anomaly_models/__init__.py +31 -0
- truthound/ml/anomaly_models/ensemble.py +362 -0
- truthound/ml/anomaly_models/isolation_forest.py +444 -0
- truthound/ml/anomaly_models/statistical.py +392 -0
- truthound/ml/base.py +1178 -0
- truthound/ml/drift_detection/__init__.py +26 -0
- truthound/ml/drift_detection/concept.py +381 -0
- truthound/ml/drift_detection/distribution.py +361 -0
- truthound/ml/drift_detection/feature.py +442 -0
- truthound/ml/drift_detection/multivariate.py +495 -0
- truthound/ml/monitoring/__init__.py +88 -0
- truthound/ml/monitoring/alerting/__init__.py +33 -0
- truthound/ml/monitoring/alerting/handlers.py +427 -0
- truthound/ml/monitoring/alerting/rules.py +508 -0
- truthound/ml/monitoring/collectors/__init__.py +19 -0
- truthound/ml/monitoring/collectors/composite.py +105 -0
- truthound/ml/monitoring/collectors/drift.py +324 -0
- truthound/ml/monitoring/collectors/performance.py +179 -0
- truthound/ml/monitoring/collectors/quality.py +369 -0
- truthound/ml/monitoring/monitor.py +536 -0
- truthound/ml/monitoring/protocols.py +451 -0
- truthound/ml/monitoring/stores/__init__.py +15 -0
- truthound/ml/monitoring/stores/memory.py +201 -0
- truthound/ml/monitoring/stores/prometheus.py +296 -0
- truthound/ml/rule_learning/__init__.py +25 -0
- truthound/ml/rule_learning/constraint_miner.py +443 -0
- truthound/ml/rule_learning/pattern_learner.py +499 -0
- truthound/ml/rule_learning/profile_learner.py +462 -0
- truthound/multitenancy/__init__.py +326 -0
- truthound/multitenancy/core.py +852 -0
- truthound/multitenancy/integration.py +597 -0
- truthound/multitenancy/isolation.py +630 -0
- truthound/multitenancy/manager.py +770 -0
- truthound/multitenancy/middleware.py +765 -0
- truthound/multitenancy/quota.py +537 -0
- truthound/multitenancy/resolvers.py +603 -0
- truthound/multitenancy/storage.py +703 -0
- truthound/observability/__init__.py +307 -0
- truthound/observability/context.py +531 -0
- truthound/observability/instrumentation.py +611 -0
- truthound/observability/logging.py +887 -0
- truthound/observability/metrics.py +1157 -0
- truthound/observability/tracing/__init__.py +178 -0
- truthound/observability/tracing/baggage.py +310 -0
- truthound/observability/tracing/config.py +426 -0
- truthound/observability/tracing/exporter.py +787 -0
- truthound/observability/tracing/integration.py +1018 -0
- truthound/observability/tracing/otel/__init__.py +146 -0
- truthound/observability/tracing/otel/adapter.py +982 -0
- truthound/observability/tracing/otel/bridge.py +1177 -0
- truthound/observability/tracing/otel/compat.py +681 -0
- truthound/observability/tracing/otel/config.py +691 -0
- truthound/observability/tracing/otel/detection.py +327 -0
- truthound/observability/tracing/otel/protocols.py +426 -0
- truthound/observability/tracing/processor.py +561 -0
- truthound/observability/tracing/propagator.py +757 -0
- truthound/observability/tracing/provider.py +569 -0
- truthound/observability/tracing/resource.py +515 -0
- truthound/observability/tracing/sampler.py +487 -0
- truthound/observability/tracing/span.py +676 -0
- truthound/plugins/__init__.py +198 -0
- truthound/plugins/base.py +599 -0
- truthound/plugins/cli.py +680 -0
- truthound/plugins/dependencies/__init__.py +42 -0
- truthound/plugins/dependencies/graph.py +422 -0
- truthound/plugins/dependencies/resolver.py +417 -0
- truthound/plugins/discovery.py +379 -0
- truthound/plugins/docs/__init__.py +46 -0
- truthound/plugins/docs/extractor.py +444 -0
- truthound/plugins/docs/renderer.py +499 -0
- truthound/plugins/enterprise_manager.py +877 -0
- truthound/plugins/examples/__init__.py +19 -0
- truthound/plugins/examples/custom_validators.py +317 -0
- truthound/plugins/examples/slack_notifier.py +312 -0
- truthound/plugins/examples/xml_reporter.py +254 -0
- truthound/plugins/hooks.py +558 -0
- truthound/plugins/lifecycle/__init__.py +43 -0
- truthound/plugins/lifecycle/hot_reload.py +402 -0
- truthound/plugins/lifecycle/manager.py +371 -0
- truthound/plugins/manager.py +736 -0
- truthound/plugins/registry.py +338 -0
- truthound/plugins/security/__init__.py +93 -0
- truthound/plugins/security/exceptions.py +332 -0
- truthound/plugins/security/policies.py +348 -0
- truthound/plugins/security/protocols.py +643 -0
- truthound/plugins/security/sandbox/__init__.py +45 -0
- truthound/plugins/security/sandbox/context.py +158 -0
- truthound/plugins/security/sandbox/engines/__init__.py +19 -0
- truthound/plugins/security/sandbox/engines/container.py +379 -0
- truthound/plugins/security/sandbox/engines/noop.py +144 -0
- truthound/plugins/security/sandbox/engines/process.py +336 -0
- truthound/plugins/security/sandbox/factory.py +211 -0
- truthound/plugins/security/signing/__init__.py +57 -0
- truthound/plugins/security/signing/service.py +330 -0
- truthound/plugins/security/signing/trust_store.py +368 -0
- truthound/plugins/security/signing/verifier.py +459 -0
- truthound/plugins/versioning/__init__.py +41 -0
- truthound/plugins/versioning/constraints.py +297 -0
- truthound/plugins/versioning/resolver.py +329 -0
- truthound/profiler/__init__.py +1729 -0
- truthound/profiler/_lazy.py +452 -0
- truthound/profiler/ab_testing/__init__.py +80 -0
- truthound/profiler/ab_testing/analysis.py +449 -0
- truthound/profiler/ab_testing/base.py +257 -0
- truthound/profiler/ab_testing/experiment.py +395 -0
- truthound/profiler/ab_testing/tracking.py +368 -0
- truthound/profiler/auto_threshold.py +1170 -0
- truthound/profiler/base.py +579 -0
- truthound/profiler/cache_patterns.py +911 -0
- truthound/profiler/caching.py +1303 -0
- truthound/profiler/column_profiler.py +712 -0
- truthound/profiler/comparison.py +1007 -0
- truthound/profiler/custom_patterns.py +1170 -0
- truthound/profiler/dashboard/__init__.py +50 -0
- truthound/profiler/dashboard/app.py +476 -0
- truthound/profiler/dashboard/components.py +457 -0
- truthound/profiler/dashboard/config.py +72 -0
- truthound/profiler/distributed/__init__.py +83 -0
- truthound/profiler/distributed/base.py +281 -0
- truthound/profiler/distributed/dask_backend.py +498 -0
- truthound/profiler/distributed/local_backend.py +293 -0
- truthound/profiler/distributed/profiler.py +304 -0
- truthound/profiler/distributed/ray_backend.py +374 -0
- truthound/profiler/distributed/spark_backend.py +375 -0
- truthound/profiler/distributed.py +1366 -0
- truthound/profiler/enterprise_sampling.py +1065 -0
- truthound/profiler/errors.py +488 -0
- truthound/profiler/evolution/__init__.py +91 -0
- truthound/profiler/evolution/alerts.py +426 -0
- truthound/profiler/evolution/changes.py +206 -0
- truthound/profiler/evolution/compatibility.py +365 -0
- truthound/profiler/evolution/detector.py +372 -0
- truthound/profiler/evolution/protocols.py +121 -0
- truthound/profiler/generators/__init__.py +48 -0
- truthound/profiler/generators/base.py +384 -0
- truthound/profiler/generators/ml_rules.py +375 -0
- truthound/profiler/generators/pattern_rules.py +384 -0
- truthound/profiler/generators/schema_rules.py +267 -0
- truthound/profiler/generators/stats_rules.py +324 -0
- truthound/profiler/generators/suite_generator.py +857 -0
- truthound/profiler/i18n.py +1542 -0
- truthound/profiler/incremental.py +554 -0
- truthound/profiler/incremental_validation.py +1710 -0
- truthound/profiler/integration/__init__.py +73 -0
- truthound/profiler/integration/adapters.py +345 -0
- truthound/profiler/integration/context.py +371 -0
- truthound/profiler/integration/executor.py +527 -0
- truthound/profiler/integration/naming.py +75 -0
- truthound/profiler/integration/protocols.py +243 -0
- truthound/profiler/memory.py +1185 -0
- truthound/profiler/migration/__init__.py +60 -0
- truthound/profiler/migration/base.py +345 -0
- truthound/profiler/migration/manager.py +444 -0
- truthound/profiler/migration/v1_0_to_v1_1.py +484 -0
- truthound/profiler/ml/__init__.py +73 -0
- truthound/profiler/ml/base.py +244 -0
- truthound/profiler/ml/classifier.py +507 -0
- truthound/profiler/ml/feature_extraction.py +604 -0
- truthound/profiler/ml/pretrained.py +448 -0
- truthound/profiler/ml_inference.py +1276 -0
- truthound/profiler/native_patterns.py +815 -0
- truthound/profiler/observability.py +1184 -0
- truthound/profiler/process_timeout.py +1566 -0
- truthound/profiler/progress.py +568 -0
- truthound/profiler/progress_callbacks.py +1734 -0
- truthound/profiler/quality.py +1345 -0
- truthound/profiler/resilience.py +1180 -0
- truthound/profiler/sampled_matcher.py +794 -0
- truthound/profiler/sampling.py +1288 -0
- truthound/profiler/scheduling/__init__.py +82 -0
- truthound/profiler/scheduling/protocols.py +214 -0
- truthound/profiler/scheduling/scheduler.py +474 -0
- truthound/profiler/scheduling/storage.py +457 -0
- truthound/profiler/scheduling/triggers.py +449 -0
- truthound/profiler/schema.py +603 -0
- truthound/profiler/streaming.py +685 -0
- truthound/profiler/streaming_patterns.py +1354 -0
- truthound/profiler/suite_cli.py +625 -0
- truthound/profiler/suite_config.py +789 -0
- truthound/profiler/suite_export.py +1268 -0
- truthound/profiler/table_profiler.py +547 -0
- truthound/profiler/timeout.py +565 -0
- truthound/profiler/validation.py +1532 -0
- truthound/profiler/visualization/__init__.py +118 -0
- truthound/profiler/visualization/base.py +346 -0
- truthound/profiler/visualization/generator.py +1259 -0
- truthound/profiler/visualization/plotly_renderer.py +811 -0
- truthound/profiler/visualization/renderers.py +669 -0
- truthound/profiler/visualization/sections.py +540 -0
- truthound/profiler/visualization.py +2122 -0
- truthound/profiler/yaml_validation.py +1151 -0
- truthound/py.typed +0 -0
- truthound/ratelimit/__init__.py +248 -0
- truthound/ratelimit/algorithms.py +1108 -0
- truthound/ratelimit/core.py +573 -0
- truthound/ratelimit/integration.py +532 -0
- truthound/ratelimit/limiter.py +663 -0
- truthound/ratelimit/middleware.py +700 -0
- truthound/ratelimit/policy.py +792 -0
- truthound/ratelimit/storage.py +763 -0
- truthound/rbac/__init__.py +340 -0
- truthound/rbac/core.py +976 -0
- truthound/rbac/integration.py +760 -0
- truthound/rbac/manager.py +1052 -0
- truthound/rbac/middleware.py +842 -0
- truthound/rbac/policy.py +954 -0
- truthound/rbac/storage.py +878 -0
- truthound/realtime/__init__.py +141 -0
- truthound/realtime/adapters/__init__.py +43 -0
- truthound/realtime/adapters/base.py +533 -0
- truthound/realtime/adapters/kafka.py +487 -0
- truthound/realtime/adapters/kinesis.py +479 -0
- truthound/realtime/adapters/mock.py +243 -0
- truthound/realtime/base.py +553 -0
- truthound/realtime/factory.py +382 -0
- truthound/realtime/incremental.py +660 -0
- truthound/realtime/processing/__init__.py +67 -0
- truthound/realtime/processing/exactly_once.py +575 -0
- truthound/realtime/processing/state.py +547 -0
- truthound/realtime/processing/windows.py +647 -0
- truthound/realtime/protocols.py +569 -0
- truthound/realtime/streaming.py +605 -0
- truthound/realtime/testing/__init__.py +32 -0
- truthound/realtime/testing/containers.py +615 -0
- truthound/realtime/testing/fixtures.py +484 -0
- truthound/report.py +280 -0
- truthound/reporters/__init__.py +46 -0
- truthound/reporters/_protocols.py +30 -0
- truthound/reporters/base.py +324 -0
- truthound/reporters/ci/__init__.py +66 -0
- truthound/reporters/ci/azure.py +436 -0
- truthound/reporters/ci/base.py +509 -0
- truthound/reporters/ci/bitbucket.py +567 -0
- truthound/reporters/ci/circleci.py +547 -0
- truthound/reporters/ci/detection.py +364 -0
- truthound/reporters/ci/factory.py +182 -0
- truthound/reporters/ci/github.py +388 -0
- truthound/reporters/ci/gitlab.py +471 -0
- truthound/reporters/ci/jenkins.py +525 -0
- truthound/reporters/console_reporter.py +299 -0
- truthound/reporters/factory.py +211 -0
- truthound/reporters/html_reporter.py +524 -0
- truthound/reporters/json_reporter.py +256 -0
- truthound/reporters/markdown_reporter.py +280 -0
- truthound/reporters/sdk/__init__.py +174 -0
- truthound/reporters/sdk/builder.py +558 -0
- truthound/reporters/sdk/mixins.py +1150 -0
- truthound/reporters/sdk/schema.py +1493 -0
- truthound/reporters/sdk/templates.py +666 -0
- truthound/reporters/sdk/testing.py +968 -0
- truthound/scanners.py +170 -0
- truthound/scheduling/__init__.py +122 -0
- truthound/scheduling/cron.py +1136 -0
- truthound/scheduling/presets.py +212 -0
- truthound/schema.py +275 -0
- truthound/secrets/__init__.py +173 -0
- truthound/secrets/base.py +618 -0
- truthound/secrets/cloud.py +682 -0
- truthound/secrets/integration.py +507 -0
- truthound/secrets/manager.py +633 -0
- truthound/secrets/oidc/__init__.py +172 -0
- truthound/secrets/oidc/base.py +902 -0
- truthound/secrets/oidc/credential_provider.py +623 -0
- truthound/secrets/oidc/exchangers.py +1001 -0
- truthound/secrets/oidc/github/__init__.py +110 -0
- truthound/secrets/oidc/github/claims.py +718 -0
- truthound/secrets/oidc/github/enhanced_provider.py +693 -0
- truthound/secrets/oidc/github/trust_policy.py +742 -0
- truthound/secrets/oidc/github/verification.py +723 -0
- truthound/secrets/oidc/github/workflow.py +691 -0
- truthound/secrets/oidc/providers.py +825 -0
- truthound/secrets/providers.py +506 -0
- truthound/secrets/resolver.py +495 -0
- truthound/stores/__init__.py +177 -0
- truthound/stores/backends/__init__.py +18 -0
- truthound/stores/backends/_protocols.py +340 -0
- truthound/stores/backends/azure_blob.py +530 -0
- truthound/stores/backends/concurrent_filesystem.py +915 -0
- truthound/stores/backends/connection_pool.py +1365 -0
- truthound/stores/backends/database.py +743 -0
- truthound/stores/backends/filesystem.py +538 -0
- truthound/stores/backends/gcs.py +399 -0
- truthound/stores/backends/memory.py +354 -0
- truthound/stores/backends/s3.py +434 -0
- truthound/stores/backpressure/__init__.py +84 -0
- truthound/stores/backpressure/base.py +375 -0
- truthound/stores/backpressure/circuit_breaker.py +434 -0
- truthound/stores/backpressure/monitor.py +376 -0
- truthound/stores/backpressure/strategies.py +677 -0
- truthound/stores/base.py +551 -0
- truthound/stores/batching/__init__.py +65 -0
- truthound/stores/batching/base.py +305 -0
- truthound/stores/batching/buffer.py +370 -0
- truthound/stores/batching/store.py +248 -0
- truthound/stores/batching/writer.py +521 -0
- truthound/stores/caching/__init__.py +60 -0
- truthound/stores/caching/backends.py +684 -0
- truthound/stores/caching/base.py +356 -0
- truthound/stores/caching/store.py +305 -0
- truthound/stores/compression/__init__.py +193 -0
- truthound/stores/compression/adaptive.py +694 -0
- truthound/stores/compression/base.py +514 -0
- truthound/stores/compression/pipeline.py +868 -0
- truthound/stores/compression/providers.py +672 -0
- truthound/stores/compression/streaming.py +832 -0
- truthound/stores/concurrency/__init__.py +81 -0
- truthound/stores/concurrency/atomic.py +556 -0
- truthound/stores/concurrency/index.py +775 -0
- truthound/stores/concurrency/locks.py +576 -0
- truthound/stores/concurrency/manager.py +482 -0
- truthound/stores/encryption/__init__.py +297 -0
- truthound/stores/encryption/base.py +952 -0
- truthound/stores/encryption/keys.py +1191 -0
- truthound/stores/encryption/pipeline.py +903 -0
- truthound/stores/encryption/providers.py +953 -0
- truthound/stores/encryption/streaming.py +950 -0
- truthound/stores/expectations.py +227 -0
- truthound/stores/factory.py +246 -0
- truthound/stores/migration/__init__.py +75 -0
- truthound/stores/migration/base.py +480 -0
- truthound/stores/migration/manager.py +347 -0
- truthound/stores/migration/registry.py +382 -0
- truthound/stores/migration/store.py +559 -0
- truthound/stores/observability/__init__.py +106 -0
- truthound/stores/observability/audit.py +718 -0
- truthound/stores/observability/config.py +270 -0
- truthound/stores/observability/factory.py +208 -0
- truthound/stores/observability/metrics.py +636 -0
- truthound/stores/observability/protocols.py +410 -0
- truthound/stores/observability/store.py +570 -0
- truthound/stores/observability/tracing.py +784 -0
- truthound/stores/replication/__init__.py +76 -0
- truthound/stores/replication/base.py +260 -0
- truthound/stores/replication/monitor.py +269 -0
- truthound/stores/replication/store.py +439 -0
- truthound/stores/replication/syncer.py +391 -0
- truthound/stores/results.py +359 -0
- truthound/stores/retention/__init__.py +77 -0
- truthound/stores/retention/base.py +378 -0
- truthound/stores/retention/policies.py +621 -0
- truthound/stores/retention/scheduler.py +279 -0
- truthound/stores/retention/store.py +526 -0
- truthound/stores/streaming/__init__.py +138 -0
- truthound/stores/streaming/base.py +801 -0
- truthound/stores/streaming/database.py +984 -0
- truthound/stores/streaming/filesystem.py +719 -0
- truthound/stores/streaming/reader.py +629 -0
- truthound/stores/streaming/s3.py +843 -0
- truthound/stores/streaming/writer.py +790 -0
- truthound/stores/tiering/__init__.py +108 -0
- truthound/stores/tiering/base.py +462 -0
- truthound/stores/tiering/manager.py +249 -0
- truthound/stores/tiering/policies.py +692 -0
- truthound/stores/tiering/store.py +526 -0
- truthound/stores/versioning/__init__.py +56 -0
- truthound/stores/versioning/base.py +376 -0
- truthound/stores/versioning/store.py +660 -0
- truthound/stores/versioning/strategies.py +353 -0
- truthound/types.py +56 -0
- truthound/validators/__init__.py +774 -0
- truthound/validators/aggregate/__init__.py +27 -0
- truthound/validators/aggregate/central.py +116 -0
- truthound/validators/aggregate/extremes.py +116 -0
- truthound/validators/aggregate/spread.py +118 -0
- truthound/validators/aggregate/sum.py +64 -0
- truthound/validators/aggregate/type.py +78 -0
- truthound/validators/anomaly/__init__.py +93 -0
- truthound/validators/anomaly/base.py +431 -0
- truthound/validators/anomaly/ml_based.py +1190 -0
- truthound/validators/anomaly/multivariate.py +647 -0
- truthound/validators/anomaly/statistical.py +599 -0
- truthound/validators/base.py +1089 -0
- truthound/validators/business_rule/__init__.py +46 -0
- truthound/validators/business_rule/base.py +147 -0
- truthound/validators/business_rule/checksum.py +509 -0
- truthound/validators/business_rule/financial.py +526 -0
- truthound/validators/cache.py +733 -0
- truthound/validators/completeness/__init__.py +39 -0
- truthound/validators/completeness/conditional.py +73 -0
- truthound/validators/completeness/default.py +98 -0
- truthound/validators/completeness/empty.py +103 -0
- truthound/validators/completeness/nan.py +337 -0
- truthound/validators/completeness/null.py +152 -0
- truthound/validators/cross_table/__init__.py +17 -0
- truthound/validators/cross_table/aggregate.py +333 -0
- truthound/validators/cross_table/row_count.py +122 -0
- truthound/validators/datetime/__init__.py +29 -0
- truthound/validators/datetime/format.py +78 -0
- truthound/validators/datetime/freshness.py +269 -0
- truthound/validators/datetime/order.py +73 -0
- truthound/validators/datetime/parseable.py +185 -0
- truthound/validators/datetime/range.py +202 -0
- truthound/validators/datetime/timezone.py +69 -0
- truthound/validators/distribution/__init__.py +49 -0
- truthound/validators/distribution/distribution.py +128 -0
- truthound/validators/distribution/monotonic.py +119 -0
- truthound/validators/distribution/outlier.py +178 -0
- truthound/validators/distribution/quantile.py +80 -0
- truthound/validators/distribution/range.py +254 -0
- truthound/validators/distribution/set.py +125 -0
- truthound/validators/distribution/statistical.py +459 -0
- truthound/validators/drift/__init__.py +79 -0
- truthound/validators/drift/base.py +427 -0
- truthound/validators/drift/multi_feature.py +401 -0
- truthound/validators/drift/numeric.py +395 -0
- truthound/validators/drift/psi.py +446 -0
- truthound/validators/drift/statistical.py +510 -0
- truthound/validators/enterprise.py +1658 -0
- truthound/validators/geospatial/__init__.py +80 -0
- truthound/validators/geospatial/base.py +97 -0
- truthound/validators/geospatial/boundary.py +238 -0
- truthound/validators/geospatial/coordinate.py +351 -0
- truthound/validators/geospatial/distance.py +399 -0
- truthound/validators/geospatial/polygon.py +665 -0
- truthound/validators/i18n/__init__.py +308 -0
- truthound/validators/i18n/bidi.py +571 -0
- truthound/validators/i18n/catalogs.py +570 -0
- truthound/validators/i18n/dialects.py +763 -0
- truthound/validators/i18n/extended_catalogs.py +549 -0
- truthound/validators/i18n/formatting.py +1434 -0
- truthound/validators/i18n/loader.py +1020 -0
- truthound/validators/i18n/messages.py +521 -0
- truthound/validators/i18n/plural.py +683 -0
- truthound/validators/i18n/protocols.py +855 -0
- truthound/validators/i18n/tms.py +1162 -0
- truthound/validators/localization/__init__.py +53 -0
- truthound/validators/localization/base.py +122 -0
- truthound/validators/localization/chinese.py +362 -0
- truthound/validators/localization/japanese.py +275 -0
- truthound/validators/localization/korean.py +524 -0
- truthound/validators/memory/__init__.py +94 -0
- truthound/validators/memory/approximate_knn.py +506 -0
- truthound/validators/memory/base.py +547 -0
- truthound/validators/memory/sgd_online.py +719 -0
- truthound/validators/memory/streaming_ecdf.py +753 -0
- truthound/validators/ml_feature/__init__.py +54 -0
- truthound/validators/ml_feature/base.py +249 -0
- truthound/validators/ml_feature/correlation.py +299 -0
- truthound/validators/ml_feature/leakage.py +344 -0
- truthound/validators/ml_feature/null_impact.py +270 -0
- truthound/validators/ml_feature/scale.py +264 -0
- truthound/validators/multi_column/__init__.py +89 -0
- truthound/validators/multi_column/arithmetic.py +284 -0
- truthound/validators/multi_column/base.py +231 -0
- truthound/validators/multi_column/comparison.py +273 -0
- truthound/validators/multi_column/consistency.py +312 -0
- truthound/validators/multi_column/statistical.py +299 -0
- truthound/validators/optimization/__init__.py +164 -0
- truthound/validators/optimization/aggregation.py +563 -0
- truthound/validators/optimization/covariance.py +556 -0
- truthound/validators/optimization/geo.py +626 -0
- truthound/validators/optimization/graph.py +587 -0
- truthound/validators/optimization/orchestrator.py +970 -0
- truthound/validators/optimization/profiling.py +1312 -0
- truthound/validators/privacy/__init__.py +223 -0
- truthound/validators/privacy/base.py +635 -0
- truthound/validators/privacy/ccpa.py +670 -0
- truthound/validators/privacy/gdpr.py +728 -0
- truthound/validators/privacy/global_patterns.py +604 -0
- truthound/validators/privacy/plugins.py +867 -0
- truthound/validators/profiling/__init__.py +52 -0
- truthound/validators/profiling/base.py +175 -0
- truthound/validators/profiling/cardinality.py +312 -0
- truthound/validators/profiling/entropy.py +391 -0
- truthound/validators/profiling/frequency.py +455 -0
- truthound/validators/pushdown_support.py +660 -0
- truthound/validators/query/__init__.py +91 -0
- truthound/validators/query/aggregate.py +346 -0
- truthound/validators/query/base.py +246 -0
- truthound/validators/query/column.py +249 -0
- truthound/validators/query/expression.py +274 -0
- truthound/validators/query/result.py +323 -0
- truthound/validators/query/row_count.py +264 -0
- truthound/validators/referential/__init__.py +80 -0
- truthound/validators/referential/base.py +395 -0
- truthound/validators/referential/cascade.py +391 -0
- truthound/validators/referential/circular.py +563 -0
- truthound/validators/referential/foreign_key.py +624 -0
- truthound/validators/referential/orphan.py +485 -0
- truthound/validators/registry.py +112 -0
- truthound/validators/schema/__init__.py +41 -0
- truthound/validators/schema/column_count.py +142 -0
- truthound/validators/schema/column_exists.py +80 -0
- truthound/validators/schema/column_order.py +82 -0
- truthound/validators/schema/column_pair.py +85 -0
- truthound/validators/schema/column_pair_set.py +195 -0
- truthound/validators/schema/column_type.py +94 -0
- truthound/validators/schema/multi_column.py +53 -0
- truthound/validators/schema/multi_column_aggregate.py +175 -0
- truthound/validators/schema/referential.py +274 -0
- truthound/validators/schema/table_schema.py +91 -0
- truthound/validators/schema_validator.py +219 -0
- truthound/validators/sdk/__init__.py +250 -0
- truthound/validators/sdk/builder.py +680 -0
- truthound/validators/sdk/decorators.py +474 -0
- truthound/validators/sdk/enterprise/__init__.py +211 -0
- truthound/validators/sdk/enterprise/docs.py +725 -0
- truthound/validators/sdk/enterprise/fuzzing.py +659 -0
- truthound/validators/sdk/enterprise/licensing.py +709 -0
- truthound/validators/sdk/enterprise/manager.py +543 -0
- truthound/validators/sdk/enterprise/resources.py +628 -0
- truthound/validators/sdk/enterprise/sandbox.py +766 -0
- truthound/validators/sdk/enterprise/signing.py +603 -0
- truthound/validators/sdk/enterprise/templates.py +865 -0
- truthound/validators/sdk/enterprise/versioning.py +659 -0
- truthound/validators/sdk/templates.py +757 -0
- truthound/validators/sdk/testing.py +807 -0
- truthound/validators/security/__init__.py +181 -0
- truthound/validators/security/redos/__init__.py +182 -0
- truthound/validators/security/redos/core.py +861 -0
- truthound/validators/security/redos/cpu_monitor.py +593 -0
- truthound/validators/security/redos/cve_database.py +791 -0
- truthound/validators/security/redos/ml/__init__.py +155 -0
- truthound/validators/security/redos/ml/base.py +785 -0
- truthound/validators/security/redos/ml/datasets.py +618 -0
- truthound/validators/security/redos/ml/features.py +359 -0
- truthound/validators/security/redos/ml/models.py +1000 -0
- truthound/validators/security/redos/ml/predictor.py +507 -0
- truthound/validators/security/redos/ml/storage.py +632 -0
- truthound/validators/security/redos/ml/training.py +571 -0
- truthound/validators/security/redos/ml_analyzer.py +937 -0
- truthound/validators/security/redos/optimizer.py +674 -0
- truthound/validators/security/redos/profiler.py +682 -0
- truthound/validators/security/redos/re2_engine.py +709 -0
- truthound/validators/security/redos.py +886 -0
- truthound/validators/security/sql_security.py +1247 -0
- truthound/validators/streaming/__init__.py +126 -0
- truthound/validators/streaming/base.py +292 -0
- truthound/validators/streaming/completeness.py +210 -0
- truthound/validators/streaming/mixin.py +575 -0
- truthound/validators/streaming/range.py +308 -0
- truthound/validators/streaming/sources.py +846 -0
- truthound/validators/string/__init__.py +57 -0
- truthound/validators/string/casing.py +158 -0
- truthound/validators/string/charset.py +96 -0
- truthound/validators/string/format.py +501 -0
- truthound/validators/string/json.py +77 -0
- truthound/validators/string/json_schema.py +184 -0
- truthound/validators/string/length.py +104 -0
- truthound/validators/string/like_pattern.py +237 -0
- truthound/validators/string/regex.py +202 -0
- truthound/validators/string/regex_extended.py +435 -0
- truthound/validators/table/__init__.py +88 -0
- truthound/validators/table/base.py +78 -0
- truthound/validators/table/column_count.py +198 -0
- truthound/validators/table/freshness.py +362 -0
- truthound/validators/table/row_count.py +251 -0
- truthound/validators/table/schema.py +333 -0
- truthound/validators/table/size.py +285 -0
- truthound/validators/timeout/__init__.py +102 -0
- truthound/validators/timeout/advanced/__init__.py +247 -0
- truthound/validators/timeout/advanced/circuit_breaker.py +675 -0
- truthound/validators/timeout/advanced/prediction.py +773 -0
- truthound/validators/timeout/advanced/priority.py +618 -0
- truthound/validators/timeout/advanced/redis_backend.py +770 -0
- truthound/validators/timeout/advanced/retry.py +721 -0
- truthound/validators/timeout/advanced/sampling.py +788 -0
- truthound/validators/timeout/advanced/sla.py +661 -0
- truthound/validators/timeout/advanced/telemetry.py +804 -0
- truthound/validators/timeout/cascade.py +477 -0
- truthound/validators/timeout/deadline.py +657 -0
- truthound/validators/timeout/degradation.py +525 -0
- truthound/validators/timeout/distributed.py +597 -0
- truthound/validators/timeseries/__init__.py +89 -0
- truthound/validators/timeseries/base.py +326 -0
- truthound/validators/timeseries/completeness.py +617 -0
- truthound/validators/timeseries/gap.py +485 -0
- truthound/validators/timeseries/monotonic.py +310 -0
- truthound/validators/timeseries/seasonality.py +422 -0
- truthound/validators/timeseries/trend.py +510 -0
- truthound/validators/uniqueness/__init__.py +59 -0
- truthound/validators/uniqueness/approximate.py +475 -0
- truthound/validators/uniqueness/distinct_values.py +253 -0
- truthound/validators/uniqueness/duplicate.py +118 -0
- truthound/validators/uniqueness/primary_key.py +140 -0
- truthound/validators/uniqueness/unique.py +191 -0
- truthound/validators/uniqueness/within_record.py +599 -0
- truthound/validators/utils.py +756 -0
- truthound-1.0.8.dist-info/METADATA +474 -0
- truthound-1.0.8.dist-info/RECORD +877 -0
- truthound-1.0.8.dist-info/WHEEL +4 -0
- truthound-1.0.8.dist-info/entry_points.txt +2 -0
- truthound-1.0.8.dist-info/licenses/LICENSE +190 -0
|
@@ -0,0 +1,790 @@
|
|
|
1
|
+
"""Streaming writers for incremental result storage.
|
|
2
|
+
|
|
3
|
+
This module provides writers that can incrementally write validation results
|
|
4
|
+
to storage without holding all results in memory.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import asyncio
|
|
10
|
+
import gzip
|
|
11
|
+
import hashlib
|
|
12
|
+
import json
|
|
13
|
+
import threading
|
|
14
|
+
import time
|
|
15
|
+
from abc import ABC, abstractmethod
|
|
16
|
+
from contextlib import contextmanager
|
|
17
|
+
from dataclasses import dataclass, field
|
|
18
|
+
from datetime import datetime
|
|
19
|
+
from io import BytesIO, StringIO
|
|
20
|
+
from pathlib import Path
|
|
21
|
+
from typing import TYPE_CHECKING, Any, BinaryIO, Callable, Iterator, TextIO
|
|
22
|
+
from uuid import uuid4
|
|
23
|
+
|
|
24
|
+
from truthound.stores.streaming.base import (
|
|
25
|
+
ChunkInfo,
|
|
26
|
+
CompressionType,
|
|
27
|
+
StreamingConfig,
|
|
28
|
+
StreamingFormat,
|
|
29
|
+
StreamingMetrics,
|
|
30
|
+
StreamSession,
|
|
31
|
+
StreamStatus,
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
if TYPE_CHECKING:
|
|
35
|
+
from truthound.stores.results import ValidatorResult
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
# =============================================================================
|
|
39
|
+
# Exceptions
|
|
40
|
+
# =============================================================================
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class StreamWriteError(Exception):
|
|
44
|
+
"""Error during streaming write operation."""
|
|
45
|
+
|
|
46
|
+
pass
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
class StreamBufferOverflowError(StreamWriteError):
|
|
50
|
+
"""Buffer exceeded maximum size."""
|
|
51
|
+
|
|
52
|
+
pass
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
class StreamFlushError(StreamWriteError):
|
|
56
|
+
"""Error flushing buffer to storage."""
|
|
57
|
+
|
|
58
|
+
pass
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
# =============================================================================
|
|
62
|
+
# Serializers
|
|
63
|
+
# =============================================================================
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
class RecordSerializer(ABC):
|
|
67
|
+
"""Abstract record serializer."""
|
|
68
|
+
|
|
69
|
+
@abstractmethod
|
|
70
|
+
def serialize(self, record: dict[str, Any]) -> bytes:
|
|
71
|
+
"""Serialize a single record."""
|
|
72
|
+
pass
|
|
73
|
+
|
|
74
|
+
@abstractmethod
|
|
75
|
+
def serialize_batch(self, records: list[dict[str, Any]]) -> bytes:
|
|
76
|
+
"""Serialize a batch of records."""
|
|
77
|
+
pass
|
|
78
|
+
|
|
79
|
+
@abstractmethod
|
|
80
|
+
def get_content_type(self) -> str:
|
|
81
|
+
"""Get the content type for this format."""
|
|
82
|
+
pass
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
class JSONLSerializer(RecordSerializer):
|
|
86
|
+
"""JSON Lines serializer."""
|
|
87
|
+
|
|
88
|
+
def serialize(self, record: dict[str, Any]) -> bytes:
|
|
89
|
+
"""Serialize a single record to JSONL format."""
|
|
90
|
+
return (json.dumps(record, default=str) + "\n").encode("utf-8")
|
|
91
|
+
|
|
92
|
+
def serialize_batch(self, records: list[dict[str, Any]]) -> bytes:
|
|
93
|
+
"""Serialize a batch of records to JSONL format."""
|
|
94
|
+
lines = [json.dumps(r, default=str) for r in records]
|
|
95
|
+
return ("\n".join(lines) + "\n").encode("utf-8")
|
|
96
|
+
|
|
97
|
+
def get_content_type(self) -> str:
|
|
98
|
+
return "application/x-ndjson"
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
class CSVSerializer(RecordSerializer):
|
|
102
|
+
"""CSV serializer."""
|
|
103
|
+
|
|
104
|
+
def __init__(self, columns: list[str] | None = None):
|
|
105
|
+
self.columns = columns
|
|
106
|
+
self._header_written = False
|
|
107
|
+
|
|
108
|
+
def serialize(self, record: dict[str, Any]) -> bytes:
|
|
109
|
+
"""Serialize a single record to CSV format."""
|
|
110
|
+
import csv
|
|
111
|
+
from io import StringIO
|
|
112
|
+
|
|
113
|
+
output = StringIO()
|
|
114
|
+
if self.columns is None:
|
|
115
|
+
self.columns = list(record.keys())
|
|
116
|
+
|
|
117
|
+
writer = csv.DictWriter(output, fieldnames=self.columns, extrasaction="ignore")
|
|
118
|
+
|
|
119
|
+
if not self._header_written:
|
|
120
|
+
writer.writeheader()
|
|
121
|
+
self._header_written = True
|
|
122
|
+
|
|
123
|
+
writer.writerow(record)
|
|
124
|
+
return output.getvalue().encode("utf-8")
|
|
125
|
+
|
|
126
|
+
def serialize_batch(self, records: list[dict[str, Any]]) -> bytes:
|
|
127
|
+
"""Serialize a batch of records to CSV format."""
|
|
128
|
+
import csv
|
|
129
|
+
from io import StringIO
|
|
130
|
+
|
|
131
|
+
if not records:
|
|
132
|
+
return b""
|
|
133
|
+
|
|
134
|
+
output = StringIO()
|
|
135
|
+
if self.columns is None:
|
|
136
|
+
self.columns = list(records[0].keys())
|
|
137
|
+
|
|
138
|
+
writer = csv.DictWriter(output, fieldnames=self.columns, extrasaction="ignore")
|
|
139
|
+
|
|
140
|
+
if not self._header_written:
|
|
141
|
+
writer.writeheader()
|
|
142
|
+
self._header_written = True
|
|
143
|
+
|
|
144
|
+
for record in records:
|
|
145
|
+
writer.writerow(record)
|
|
146
|
+
|
|
147
|
+
return output.getvalue().encode("utf-8")
|
|
148
|
+
|
|
149
|
+
def get_content_type(self) -> str:
|
|
150
|
+
return "text/csv"
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
def get_serializer(format: StreamingFormat, **kwargs: Any) -> RecordSerializer:
|
|
154
|
+
"""Get a serializer for the specified format."""
|
|
155
|
+
if format == StreamingFormat.JSONL or format == StreamingFormat.NDJSON:
|
|
156
|
+
return JSONLSerializer()
|
|
157
|
+
elif format == StreamingFormat.CSV:
|
|
158
|
+
return CSVSerializer(columns=kwargs.get("columns"))
|
|
159
|
+
else:
|
|
160
|
+
raise ValueError(f"Unsupported format: {format}")
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
# =============================================================================
|
|
164
|
+
# Compressors
|
|
165
|
+
# =============================================================================
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
class Compressor(ABC):
|
|
169
|
+
"""Abstract compressor."""
|
|
170
|
+
|
|
171
|
+
@abstractmethod
|
|
172
|
+
def compress(self, data: bytes) -> bytes:
|
|
173
|
+
"""Compress data."""
|
|
174
|
+
pass
|
|
175
|
+
|
|
176
|
+
@abstractmethod
|
|
177
|
+
def get_extension(self) -> str:
|
|
178
|
+
"""Get file extension for compressed files."""
|
|
179
|
+
pass
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
class NoCompressor(Compressor):
|
|
183
|
+
"""No compression."""
|
|
184
|
+
|
|
185
|
+
def compress(self, data: bytes) -> bytes:
|
|
186
|
+
return data
|
|
187
|
+
|
|
188
|
+
def get_extension(self) -> str:
|
|
189
|
+
return ""
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
class GzipCompressor(Compressor):
|
|
193
|
+
"""Gzip compression."""
|
|
194
|
+
|
|
195
|
+
def __init__(self, level: int = 6):
|
|
196
|
+
self.level = level
|
|
197
|
+
|
|
198
|
+
def compress(self, data: bytes) -> bytes:
|
|
199
|
+
return gzip.compress(data, compresslevel=self.level)
|
|
200
|
+
|
|
201
|
+
def get_extension(self) -> str:
|
|
202
|
+
return ".gz"
|
|
203
|
+
|
|
204
|
+
|
|
205
|
+
class ZstdCompressor(Compressor):
|
|
206
|
+
"""Zstandard compression."""
|
|
207
|
+
|
|
208
|
+
def __init__(self, level: int = 3):
|
|
209
|
+
self.level = level
|
|
210
|
+
self._compressor = None
|
|
211
|
+
|
|
212
|
+
def _get_compressor(self) -> Any:
|
|
213
|
+
if self._compressor is None:
|
|
214
|
+
try:
|
|
215
|
+
import zstandard as zstd
|
|
216
|
+
|
|
217
|
+
self._compressor = zstd.ZstdCompressor(level=self.level)
|
|
218
|
+
except ImportError:
|
|
219
|
+
raise ImportError("zstandard library required for zstd compression")
|
|
220
|
+
return self._compressor
|
|
221
|
+
|
|
222
|
+
def compress(self, data: bytes) -> bytes:
|
|
223
|
+
compressor = self._get_compressor()
|
|
224
|
+
return compressor.compress(data)
|
|
225
|
+
|
|
226
|
+
def get_extension(self) -> str:
|
|
227
|
+
return ".zst"
|
|
228
|
+
|
|
229
|
+
|
|
230
|
+
class LZ4Compressor(Compressor):
|
|
231
|
+
"""LZ4 compression."""
|
|
232
|
+
|
|
233
|
+
def compress(self, data: bytes) -> bytes:
|
|
234
|
+
try:
|
|
235
|
+
import lz4.frame
|
|
236
|
+
|
|
237
|
+
return lz4.frame.compress(data)
|
|
238
|
+
except ImportError:
|
|
239
|
+
raise ImportError("lz4 library required for lz4 compression")
|
|
240
|
+
|
|
241
|
+
def get_extension(self) -> str:
|
|
242
|
+
return ".lz4"
|
|
243
|
+
|
|
244
|
+
|
|
245
|
+
def get_compressor(compression: CompressionType, **kwargs: Any) -> Compressor:
|
|
246
|
+
"""Get a compressor for the specified type."""
|
|
247
|
+
if compression == CompressionType.NONE:
|
|
248
|
+
return NoCompressor()
|
|
249
|
+
elif compression == CompressionType.GZIP:
|
|
250
|
+
return GzipCompressor(level=kwargs.get("level", 6))
|
|
251
|
+
elif compression == CompressionType.ZSTD:
|
|
252
|
+
return ZstdCompressor(level=kwargs.get("level", 3))
|
|
253
|
+
elif compression == CompressionType.LZ4:
|
|
254
|
+
return LZ4Compressor()
|
|
255
|
+
else:
|
|
256
|
+
raise ValueError(f"Unsupported compression: {compression}")
|
|
257
|
+
|
|
258
|
+
|
|
259
|
+
# =============================================================================
|
|
260
|
+
# Buffer Management
|
|
261
|
+
# =============================================================================
|
|
262
|
+
|
|
263
|
+
|
|
264
|
+
@dataclass
|
|
265
|
+
class WriteBuffer:
|
|
266
|
+
"""In-memory buffer for batching writes.
|
|
267
|
+
|
|
268
|
+
Attributes:
|
|
269
|
+
max_records: Maximum records before flush.
|
|
270
|
+
max_bytes: Maximum bytes before flush.
|
|
271
|
+
records: Buffered records.
|
|
272
|
+
byte_size: Current buffer size in bytes.
|
|
273
|
+
"""
|
|
274
|
+
|
|
275
|
+
max_records: int = 1000
|
|
276
|
+
max_bytes: int = 10 * 1024 * 1024 # 10MB
|
|
277
|
+
records: list[dict[str, Any]] = field(default_factory=list)
|
|
278
|
+
byte_size: int = 0
|
|
279
|
+
|
|
280
|
+
def add(self, record: dict[str, Any]) -> bool:
|
|
281
|
+
"""Add a record to the buffer.
|
|
282
|
+
|
|
283
|
+
Returns:
|
|
284
|
+
True if buffer should be flushed.
|
|
285
|
+
"""
|
|
286
|
+
record_size = len(json.dumps(record, default=str).encode("utf-8"))
|
|
287
|
+
|
|
288
|
+
self.records.append(record)
|
|
289
|
+
self.byte_size += record_size
|
|
290
|
+
|
|
291
|
+
return self.should_flush()
|
|
292
|
+
|
|
293
|
+
def add_batch(self, records: list[dict[str, Any]]) -> bool:
|
|
294
|
+
"""Add multiple records to the buffer.
|
|
295
|
+
|
|
296
|
+
Returns:
|
|
297
|
+
True if buffer should be flushed.
|
|
298
|
+
"""
|
|
299
|
+
for record in records:
|
|
300
|
+
record_size = len(json.dumps(record, default=str).encode("utf-8"))
|
|
301
|
+
self.records.append(record)
|
|
302
|
+
self.byte_size += record_size
|
|
303
|
+
|
|
304
|
+
return self.should_flush()
|
|
305
|
+
|
|
306
|
+
def should_flush(self) -> bool:
|
|
307
|
+
"""Check if buffer should be flushed."""
|
|
308
|
+
return len(self.records) >= self.max_records or self.byte_size >= self.max_bytes
|
|
309
|
+
|
|
310
|
+
def clear(self) -> list[dict[str, Any]]:
|
|
311
|
+
"""Clear and return buffered records."""
|
|
312
|
+
records = self.records
|
|
313
|
+
self.records = []
|
|
314
|
+
self.byte_size = 0
|
|
315
|
+
return records
|
|
316
|
+
|
|
317
|
+
def is_empty(self) -> bool:
|
|
318
|
+
"""Check if buffer is empty."""
|
|
319
|
+
return len(self.records) == 0
|
|
320
|
+
|
|
321
|
+
|
|
322
|
+
# =============================================================================
|
|
323
|
+
# Base Writer
|
|
324
|
+
# =============================================================================
|
|
325
|
+
|
|
326
|
+
|
|
327
|
+
class BaseStreamWriter(ABC):
|
|
328
|
+
"""Base class for streaming writers.
|
|
329
|
+
|
|
330
|
+
Handles buffering, serialization, and compression.
|
|
331
|
+
"""
|
|
332
|
+
|
|
333
|
+
def __init__(
|
|
334
|
+
self,
|
|
335
|
+
session: StreamSession,
|
|
336
|
+
config: StreamingConfig,
|
|
337
|
+
serializer: RecordSerializer | None = None,
|
|
338
|
+
compressor: Compressor | None = None,
|
|
339
|
+
):
|
|
340
|
+
"""Initialize the writer.
|
|
341
|
+
|
|
342
|
+
Args:
|
|
343
|
+
session: The streaming session.
|
|
344
|
+
config: Streaming configuration.
|
|
345
|
+
serializer: Record serializer (auto-selected if None).
|
|
346
|
+
compressor: Data compressor (auto-selected if None).
|
|
347
|
+
"""
|
|
348
|
+
self.session = session
|
|
349
|
+
self.config = config
|
|
350
|
+
self.serializer = serializer or get_serializer(config.format)
|
|
351
|
+
self.compressor = compressor or get_compressor(config.compression)
|
|
352
|
+
|
|
353
|
+
self.buffer = WriteBuffer(
|
|
354
|
+
max_records=config.buffer_size,
|
|
355
|
+
max_bytes=config.max_memory_mb * 1024 * 1024 // 4, # 25% of max memory
|
|
356
|
+
)
|
|
357
|
+
|
|
358
|
+
self.metrics = session.metrics
|
|
359
|
+
self._chunk_index = len(session.chunks)
|
|
360
|
+
self._record_offset = sum(c.record_count for c in session.chunks)
|
|
361
|
+
self._closed = False
|
|
362
|
+
self._lock = threading.RLock()
|
|
363
|
+
|
|
364
|
+
# Auto-flush timer
|
|
365
|
+
self._last_flush_time = time.time()
|
|
366
|
+
self._flush_timer: threading.Timer | None = None
|
|
367
|
+
|
|
368
|
+
if config.flush_interval_seconds > 0:
|
|
369
|
+
self._start_flush_timer()
|
|
370
|
+
|
|
371
|
+
def _start_flush_timer(self) -> None:
|
|
372
|
+
"""Start the auto-flush timer."""
|
|
373
|
+
if self._flush_timer is not None:
|
|
374
|
+
self._flush_timer.cancel()
|
|
375
|
+
|
|
376
|
+
self._flush_timer = threading.Timer(
|
|
377
|
+
self.config.flush_interval_seconds,
|
|
378
|
+
self._auto_flush,
|
|
379
|
+
)
|
|
380
|
+
self._flush_timer.daemon = True
|
|
381
|
+
self._flush_timer.start()
|
|
382
|
+
|
|
383
|
+
def _auto_flush(self) -> None:
|
|
384
|
+
"""Auto-flush callback."""
|
|
385
|
+
if not self._closed and not self.buffer.is_empty():
|
|
386
|
+
try:
|
|
387
|
+
self.flush()
|
|
388
|
+
except Exception:
|
|
389
|
+
pass # Ignore auto-flush errors
|
|
390
|
+
if not self._closed:
|
|
391
|
+
self._start_flush_timer()
|
|
392
|
+
|
|
393
|
+
def write(self, record: dict[str, Any]) -> None:
|
|
394
|
+
"""Write a single record.
|
|
395
|
+
|
|
396
|
+
Args:
|
|
397
|
+
record: The record to write.
|
|
398
|
+
"""
|
|
399
|
+
if self._closed:
|
|
400
|
+
raise StreamWriteError("Writer is closed")
|
|
401
|
+
|
|
402
|
+
with self._lock:
|
|
403
|
+
if self.buffer.add(record):
|
|
404
|
+
self.flush()
|
|
405
|
+
|
|
406
|
+
def write_result(self, result: "ValidatorResult") -> None:
|
|
407
|
+
"""Write a ValidatorResult.
|
|
408
|
+
|
|
409
|
+
Args:
|
|
410
|
+
result: The validator result to write.
|
|
411
|
+
"""
|
|
412
|
+
self.write(result.to_dict())
|
|
413
|
+
|
|
414
|
+
def write_batch(self, records: list[dict[str, Any]]) -> None:
|
|
415
|
+
"""Write a batch of records.
|
|
416
|
+
|
|
417
|
+
Args:
|
|
418
|
+
records: The records to write.
|
|
419
|
+
"""
|
|
420
|
+
if self._closed:
|
|
421
|
+
raise StreamWriteError("Writer is closed")
|
|
422
|
+
|
|
423
|
+
with self._lock:
|
|
424
|
+
if self.buffer.add_batch(records):
|
|
425
|
+
self.flush()
|
|
426
|
+
|
|
427
|
+
def write_results(self, results: list["ValidatorResult"]) -> None:
|
|
428
|
+
"""Write a batch of ValidatorResults.
|
|
429
|
+
|
|
430
|
+
Args:
|
|
431
|
+
results: The validator results to write.
|
|
432
|
+
"""
|
|
433
|
+
self.write_batch([r.to_dict() for r in results])
|
|
434
|
+
|
|
435
|
+
def flush(self) -> ChunkInfo:
|
|
436
|
+
"""Flush buffered records to storage.
|
|
437
|
+
|
|
438
|
+
Returns:
|
|
439
|
+
Information about the written chunk.
|
|
440
|
+
"""
|
|
441
|
+
if self._closed:
|
|
442
|
+
raise StreamWriteError("Writer is closed")
|
|
443
|
+
|
|
444
|
+
with self._lock:
|
|
445
|
+
records = self.buffer.clear()
|
|
446
|
+
if not records:
|
|
447
|
+
return ChunkInfo(
|
|
448
|
+
chunk_id="",
|
|
449
|
+
chunk_index=-1,
|
|
450
|
+
record_count=0,
|
|
451
|
+
byte_size=0,
|
|
452
|
+
start_offset=self._record_offset,
|
|
453
|
+
end_offset=self._record_offset,
|
|
454
|
+
)
|
|
455
|
+
|
|
456
|
+
# Serialize
|
|
457
|
+
data = self.serializer.serialize_batch(records)
|
|
458
|
+
|
|
459
|
+
# Compute checksum before compression
|
|
460
|
+
checksum = hashlib.md5(data).hexdigest()
|
|
461
|
+
|
|
462
|
+
# Compress
|
|
463
|
+
compressed_data = self.compressor.compress(data)
|
|
464
|
+
|
|
465
|
+
# Create chunk info
|
|
466
|
+
chunk_id = f"{self.session.run_id}_chunk_{self._chunk_index:06d}"
|
|
467
|
+
chunk_info = ChunkInfo(
|
|
468
|
+
chunk_id=chunk_id,
|
|
469
|
+
chunk_index=self._chunk_index,
|
|
470
|
+
record_count=len(records),
|
|
471
|
+
byte_size=len(compressed_data),
|
|
472
|
+
start_offset=self._record_offset,
|
|
473
|
+
end_offset=self._record_offset + len(records),
|
|
474
|
+
checksum=checksum,
|
|
475
|
+
)
|
|
476
|
+
|
|
477
|
+
# Write to storage
|
|
478
|
+
try:
|
|
479
|
+
self._write_chunk(chunk_info, compressed_data)
|
|
480
|
+
except Exception as e:
|
|
481
|
+
# Retry logic
|
|
482
|
+
for attempt in range(self.config.max_retries):
|
|
483
|
+
try:
|
|
484
|
+
time.sleep(self.config.retry_delay_seconds * (2**attempt))
|
|
485
|
+
self._write_chunk(chunk_info, compressed_data)
|
|
486
|
+
self.metrics.retry_count += 1
|
|
487
|
+
break
|
|
488
|
+
except Exception:
|
|
489
|
+
if attempt == self.config.max_retries - 1:
|
|
490
|
+
self.metrics.record_error(str(e))
|
|
491
|
+
raise StreamFlushError(f"Failed to write chunk: {e}")
|
|
492
|
+
|
|
493
|
+
# Update state
|
|
494
|
+
self.session.chunks.append(chunk_info)
|
|
495
|
+
self._chunk_index += 1
|
|
496
|
+
self._record_offset += len(records)
|
|
497
|
+
self._last_flush_time = time.time()
|
|
498
|
+
|
|
499
|
+
# Update metrics
|
|
500
|
+
self.metrics.record_write(len(records), len(compressed_data))
|
|
501
|
+
self.metrics.record_chunk(is_write=True)
|
|
502
|
+
self.metrics.flush_count += 1
|
|
503
|
+
|
|
504
|
+
# Checkpoint if needed
|
|
505
|
+
if (
|
|
506
|
+
self.config.enable_checkpoints
|
|
507
|
+
and self._record_offset - self.session.checkpoint_offset
|
|
508
|
+
>= self.config.checkpoint_interval
|
|
509
|
+
):
|
|
510
|
+
self._write_checkpoint()
|
|
511
|
+
|
|
512
|
+
return chunk_info
|
|
513
|
+
|
|
514
|
+
@abstractmethod
|
|
515
|
+
def _write_chunk(self, chunk_info: ChunkInfo, data: bytes) -> None:
|
|
516
|
+
"""Write a chunk to storage.
|
|
517
|
+
|
|
518
|
+
Args:
|
|
519
|
+
chunk_info: Chunk metadata.
|
|
520
|
+
data: Compressed chunk data.
|
|
521
|
+
"""
|
|
522
|
+
pass
|
|
523
|
+
|
|
524
|
+
def _write_checkpoint(self) -> None:
|
|
525
|
+
"""Write a checkpoint for recovery."""
|
|
526
|
+
self.session.checkpoint_offset = self._record_offset
|
|
527
|
+
self.session.updated_at = datetime.now()
|
|
528
|
+
self._write_session_state()
|
|
529
|
+
|
|
530
|
+
@abstractmethod
|
|
531
|
+
def _write_session_state(self) -> None:
|
|
532
|
+
"""Write session state for recovery."""
|
|
533
|
+
pass
|
|
534
|
+
|
|
535
|
+
def close(self) -> None:
|
|
536
|
+
"""Close the writer and finalize."""
|
|
537
|
+
if self._closed:
|
|
538
|
+
return
|
|
539
|
+
|
|
540
|
+
with self._lock:
|
|
541
|
+
# Stop flush timer
|
|
542
|
+
if self._flush_timer is not None:
|
|
543
|
+
self._flush_timer.cancel()
|
|
544
|
+
self._flush_timer = None
|
|
545
|
+
|
|
546
|
+
# Flush remaining records
|
|
547
|
+
if not self.buffer.is_empty():
|
|
548
|
+
self.flush()
|
|
549
|
+
|
|
550
|
+
# Update session
|
|
551
|
+
self.session.status = StreamStatus.COMPLETED
|
|
552
|
+
self.session.updated_at = datetime.now()
|
|
553
|
+
self.metrics.finish()
|
|
554
|
+
|
|
555
|
+
# Write final state
|
|
556
|
+
self._write_session_state()
|
|
557
|
+
self._finalize()
|
|
558
|
+
|
|
559
|
+
self._closed = True
|
|
560
|
+
|
|
561
|
+
@abstractmethod
|
|
562
|
+
def _finalize(self) -> None:
|
|
563
|
+
"""Finalize the stream (e.g., create manifest)."""
|
|
564
|
+
pass
|
|
565
|
+
|
|
566
|
+
def __enter__(self) -> "BaseStreamWriter":
|
|
567
|
+
"""Context manager entry."""
|
|
568
|
+
self.metrics.start()
|
|
569
|
+
self.session.status = StreamStatus.ACTIVE
|
|
570
|
+
return self
|
|
571
|
+
|
|
572
|
+
def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
|
|
573
|
+
"""Context manager exit."""
|
|
574
|
+
if exc_type is not None:
|
|
575
|
+
self.session.status = StreamStatus.FAILED
|
|
576
|
+
self.metrics.record_error(str(exc_val))
|
|
577
|
+
self.close()
|
|
578
|
+
|
|
579
|
+
|
|
580
|
+
# =============================================================================
|
|
581
|
+
# Concrete Writers
|
|
582
|
+
# =============================================================================
|
|
583
|
+
|
|
584
|
+
|
|
585
|
+
class StreamingResultWriter(BaseStreamWriter):
|
|
586
|
+
"""Filesystem-based streaming writer.
|
|
587
|
+
|
|
588
|
+
Writes records to JSONL files on the local filesystem.
|
|
589
|
+
"""
|
|
590
|
+
|
|
591
|
+
def __init__(
|
|
592
|
+
self,
|
|
593
|
+
session: StreamSession,
|
|
594
|
+
config: StreamingConfig,
|
|
595
|
+
base_path: Path | str,
|
|
596
|
+
serializer: RecordSerializer | None = None,
|
|
597
|
+
compressor: Compressor | None = None,
|
|
598
|
+
):
|
|
599
|
+
"""Initialize the filesystem writer.
|
|
600
|
+
|
|
601
|
+
Args:
|
|
602
|
+
session: The streaming session.
|
|
603
|
+
config: Streaming configuration.
|
|
604
|
+
base_path: Base directory for writing chunks.
|
|
605
|
+
serializer: Record serializer.
|
|
606
|
+
compressor: Data compressor.
|
|
607
|
+
"""
|
|
608
|
+
super().__init__(session, config, serializer, compressor)
|
|
609
|
+
self.base_path = Path(base_path)
|
|
610
|
+
self.base_path.mkdir(parents=True, exist_ok=True)
|
|
611
|
+
|
|
612
|
+
# Create run directory
|
|
613
|
+
self.run_path = self.base_path / session.run_id
|
|
614
|
+
self.run_path.mkdir(parents=True, exist_ok=True)
|
|
615
|
+
|
|
616
|
+
def _get_chunk_path(self, chunk_info: ChunkInfo) -> Path:
|
|
617
|
+
"""Get the file path for a chunk."""
|
|
618
|
+
ext = {
|
|
619
|
+
StreamingFormat.JSONL: ".jsonl",
|
|
620
|
+
StreamingFormat.NDJSON: ".ndjson",
|
|
621
|
+
StreamingFormat.CSV: ".csv",
|
|
622
|
+
StreamingFormat.PARQUET: ".parquet",
|
|
623
|
+
}.get(self.config.format, ".jsonl")
|
|
624
|
+
|
|
625
|
+
ext += self.compressor.get_extension()
|
|
626
|
+
return self.run_path / f"{chunk_info.chunk_id}{ext}"
|
|
627
|
+
|
|
628
|
+
def _write_chunk(self, chunk_info: ChunkInfo, data: bytes) -> None:
|
|
629
|
+
"""Write a chunk to the filesystem."""
|
|
630
|
+
chunk_path = self._get_chunk_path(chunk_info)
|
|
631
|
+
chunk_info.path = str(chunk_path)
|
|
632
|
+
|
|
633
|
+
# Atomic write: write to temp file then rename
|
|
634
|
+
temp_path = chunk_path.with_suffix(chunk_path.suffix + ".tmp")
|
|
635
|
+
try:
|
|
636
|
+
with open(temp_path, "wb") as f:
|
|
637
|
+
f.write(data)
|
|
638
|
+
temp_path.rename(chunk_path)
|
|
639
|
+
except Exception:
|
|
640
|
+
if temp_path.exists():
|
|
641
|
+
temp_path.unlink()
|
|
642
|
+
raise
|
|
643
|
+
|
|
644
|
+
def _write_session_state(self) -> None:
|
|
645
|
+
"""Write session state to a manifest file."""
|
|
646
|
+
manifest_path = self.run_path / "_manifest.json"
|
|
647
|
+
with open(manifest_path, "w") as f:
|
|
648
|
+
json.dump(self.session.to_dict(), f, indent=2, default=str)
|
|
649
|
+
|
|
650
|
+
def _finalize(self) -> None:
|
|
651
|
+
"""Create final manifest."""
|
|
652
|
+
# Already handled in _write_session_state
|
|
653
|
+
pass
|
|
654
|
+
|
|
655
|
+
|
|
656
|
+
class BufferedStreamWriter(BaseStreamWriter):
|
|
657
|
+
"""In-memory buffered writer for testing or small results.
|
|
658
|
+
|
|
659
|
+
Accumulates all records in memory and writes on close.
|
|
660
|
+
"""
|
|
661
|
+
|
|
662
|
+
def __init__(
|
|
663
|
+
self,
|
|
664
|
+
session: StreamSession,
|
|
665
|
+
config: StreamingConfig,
|
|
666
|
+
output: BinaryIO | None = None,
|
|
667
|
+
):
|
|
668
|
+
"""Initialize the buffered writer.
|
|
669
|
+
|
|
670
|
+
Args:
|
|
671
|
+
session: The streaming session.
|
|
672
|
+
config: Streaming configuration.
|
|
673
|
+
output: Output stream (BytesIO created if None).
|
|
674
|
+
"""
|
|
675
|
+
super().__init__(session, config)
|
|
676
|
+
self.output = output or BytesIO()
|
|
677
|
+
self._all_records: list[dict[str, Any]] = []
|
|
678
|
+
|
|
679
|
+
def _write_chunk(self, chunk_info: ChunkInfo, data: bytes) -> None:
|
|
680
|
+
"""Accumulate chunk data in memory."""
|
|
681
|
+
self.output.write(data)
|
|
682
|
+
|
|
683
|
+
def _write_session_state(self) -> None:
|
|
684
|
+
"""No-op for buffered writer."""
|
|
685
|
+
pass
|
|
686
|
+
|
|
687
|
+
def _finalize(self) -> None:
|
|
688
|
+
"""No-op for buffered writer."""
|
|
689
|
+
pass
|
|
690
|
+
|
|
691
|
+
def get_output(self) -> bytes:
|
|
692
|
+
"""Get the accumulated output."""
|
|
693
|
+
if isinstance(self.output, BytesIO):
|
|
694
|
+
return self.output.getvalue()
|
|
695
|
+
return b""
|
|
696
|
+
|
|
697
|
+
|
|
698
|
+
class AsyncStreamWriter:
|
|
699
|
+
"""Async wrapper for streaming writers.
|
|
700
|
+
|
|
701
|
+
Provides async interface for any BaseStreamWriter.
|
|
702
|
+
"""
|
|
703
|
+
|
|
704
|
+
def __init__(self, writer: BaseStreamWriter):
|
|
705
|
+
"""Initialize the async writer.
|
|
706
|
+
|
|
707
|
+
Args:
|
|
708
|
+
writer: The underlying synchronous writer.
|
|
709
|
+
"""
|
|
710
|
+
self._writer = writer
|
|
711
|
+
self._loop = asyncio.get_event_loop()
|
|
712
|
+
|
|
713
|
+
async def write(self, record: dict[str, Any]) -> None:
|
|
714
|
+
"""Write a single record asynchronously."""
|
|
715
|
+
await self._loop.run_in_executor(None, self._writer.write, record)
|
|
716
|
+
|
|
717
|
+
async def write_result(self, result: "ValidatorResult") -> None:
|
|
718
|
+
"""Write a ValidatorResult asynchronously."""
|
|
719
|
+
await self._loop.run_in_executor(None, self._writer.write_result, result)
|
|
720
|
+
|
|
721
|
+
async def write_batch(self, records: list[dict[str, Any]]) -> None:
|
|
722
|
+
"""Write a batch of records asynchronously."""
|
|
723
|
+
await self._loop.run_in_executor(None, self._writer.write_batch, records)
|
|
724
|
+
|
|
725
|
+
async def write_results(self, results: list["ValidatorResult"]) -> None:
|
|
726
|
+
"""Write a batch of ValidatorResults asynchronously."""
|
|
727
|
+
await self._loop.run_in_executor(None, self._writer.write_results, results)
|
|
728
|
+
|
|
729
|
+
async def flush(self) -> ChunkInfo:
|
|
730
|
+
"""Flush buffered records asynchronously."""
|
|
731
|
+
return await self._loop.run_in_executor(None, self._writer.flush)
|
|
732
|
+
|
|
733
|
+
async def close(self) -> None:
|
|
734
|
+
"""Close the writer asynchronously."""
|
|
735
|
+
await self._loop.run_in_executor(None, self._writer.close)
|
|
736
|
+
|
|
737
|
+
async def __aenter__(self) -> "AsyncStreamWriter":
|
|
738
|
+
"""Async context manager entry."""
|
|
739
|
+
self._writer.metrics.start()
|
|
740
|
+
self._writer.session.status = StreamStatus.ACTIVE
|
|
741
|
+
return self
|
|
742
|
+
|
|
743
|
+
async def __aexit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
|
|
744
|
+
"""Async context manager exit."""
|
|
745
|
+
if exc_type is not None:
|
|
746
|
+
self._writer.session.status = StreamStatus.FAILED
|
|
747
|
+
self._writer.metrics.record_error(str(exc_val))
|
|
748
|
+
await self.close()
|
|
749
|
+
|
|
750
|
+
@property
|
|
751
|
+
def session(self) -> StreamSession:
|
|
752
|
+
"""Get the streaming session."""
|
|
753
|
+
return self._writer.session
|
|
754
|
+
|
|
755
|
+
@property
|
|
756
|
+
def metrics(self) -> StreamingMetrics:
|
|
757
|
+
"""Get the streaming metrics."""
|
|
758
|
+
return self._writer.metrics
|
|
759
|
+
|
|
760
|
+
|
|
761
|
+
# =============================================================================
|
|
762
|
+
# Factory Functions
|
|
763
|
+
# =============================================================================
|
|
764
|
+
|
|
765
|
+
|
|
766
|
+
def create_stream_writer(
|
|
767
|
+
session: StreamSession,
|
|
768
|
+
config: StreamingConfig,
|
|
769
|
+
backend: str = "filesystem",
|
|
770
|
+
**kwargs: Any,
|
|
771
|
+
) -> BaseStreamWriter:
|
|
772
|
+
"""Create a streaming writer for the specified backend.
|
|
773
|
+
|
|
774
|
+
Args:
|
|
775
|
+
session: The streaming session.
|
|
776
|
+
config: Streaming configuration.
|
|
777
|
+
backend: Storage backend ("filesystem", "memory", "s3", "gcs", "database").
|
|
778
|
+
**kwargs: Backend-specific options.
|
|
779
|
+
|
|
780
|
+
Returns:
|
|
781
|
+
A streaming writer instance.
|
|
782
|
+
"""
|
|
783
|
+
if backend == "filesystem":
|
|
784
|
+
base_path = kwargs.get("base_path", ".truthound/streaming")
|
|
785
|
+
return StreamingResultWriter(session, config, base_path)
|
|
786
|
+
elif backend == "memory":
|
|
787
|
+
output = kwargs.get("output")
|
|
788
|
+
return BufferedStreamWriter(session, config, output)
|
|
789
|
+
else:
|
|
790
|
+
raise ValueError(f"Unsupported backend: {backend}")
|