truthound 1.0.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- truthound/__init__.py +162 -0
- truthound/adapters.py +100 -0
- truthound/api.py +365 -0
- truthound/audit/__init__.py +248 -0
- truthound/audit/core.py +967 -0
- truthound/audit/filters.py +620 -0
- truthound/audit/formatters.py +707 -0
- truthound/audit/logger.py +902 -0
- truthound/audit/middleware.py +571 -0
- truthound/audit/storage.py +1083 -0
- truthound/benchmark/__init__.py +123 -0
- truthound/benchmark/base.py +757 -0
- truthound/benchmark/comparison.py +635 -0
- truthound/benchmark/generators.py +706 -0
- truthound/benchmark/reporters.py +718 -0
- truthound/benchmark/runner.py +635 -0
- truthound/benchmark/scenarios.py +712 -0
- truthound/cache.py +252 -0
- truthound/checkpoint/__init__.py +136 -0
- truthound/checkpoint/actions/__init__.py +164 -0
- truthound/checkpoint/actions/base.py +324 -0
- truthound/checkpoint/actions/custom.py +234 -0
- truthound/checkpoint/actions/discord_notify.py +290 -0
- truthound/checkpoint/actions/email_notify.py +405 -0
- truthound/checkpoint/actions/github_action.py +406 -0
- truthound/checkpoint/actions/opsgenie.py +1499 -0
- truthound/checkpoint/actions/pagerduty.py +226 -0
- truthound/checkpoint/actions/slack_notify.py +233 -0
- truthound/checkpoint/actions/store_result.py +249 -0
- truthound/checkpoint/actions/teams_notify.py +1570 -0
- truthound/checkpoint/actions/telegram_notify.py +419 -0
- truthound/checkpoint/actions/update_docs.py +552 -0
- truthound/checkpoint/actions/webhook.py +293 -0
- truthound/checkpoint/analytics/__init__.py +147 -0
- truthound/checkpoint/analytics/aggregations/__init__.py +23 -0
- truthound/checkpoint/analytics/aggregations/rollup.py +481 -0
- truthound/checkpoint/analytics/aggregations/time_bucket.py +306 -0
- truthound/checkpoint/analytics/analyzers/__init__.py +17 -0
- truthound/checkpoint/analytics/analyzers/anomaly.py +386 -0
- truthound/checkpoint/analytics/analyzers/base.py +270 -0
- truthound/checkpoint/analytics/analyzers/forecast.py +421 -0
- truthound/checkpoint/analytics/analyzers/trend.py +314 -0
- truthound/checkpoint/analytics/models.py +292 -0
- truthound/checkpoint/analytics/protocols.py +549 -0
- truthound/checkpoint/analytics/service.py +718 -0
- truthound/checkpoint/analytics/stores/__init__.py +16 -0
- truthound/checkpoint/analytics/stores/base.py +306 -0
- truthound/checkpoint/analytics/stores/memory_store.py +353 -0
- truthound/checkpoint/analytics/stores/sqlite_store.py +557 -0
- truthound/checkpoint/analytics/stores/timescale_store.py +501 -0
- truthound/checkpoint/async_actions.py +794 -0
- truthound/checkpoint/async_base.py +708 -0
- truthound/checkpoint/async_checkpoint.py +617 -0
- truthound/checkpoint/async_runner.py +639 -0
- truthound/checkpoint/checkpoint.py +527 -0
- truthound/checkpoint/ci/__init__.py +61 -0
- truthound/checkpoint/ci/detector.py +355 -0
- truthound/checkpoint/ci/reporter.py +436 -0
- truthound/checkpoint/ci/templates.py +454 -0
- truthound/checkpoint/circuitbreaker/__init__.py +133 -0
- truthound/checkpoint/circuitbreaker/breaker.py +542 -0
- truthound/checkpoint/circuitbreaker/core.py +252 -0
- truthound/checkpoint/circuitbreaker/detection.py +459 -0
- truthound/checkpoint/circuitbreaker/middleware.py +389 -0
- truthound/checkpoint/circuitbreaker/registry.py +357 -0
- truthound/checkpoint/distributed/__init__.py +139 -0
- truthound/checkpoint/distributed/backends/__init__.py +35 -0
- truthound/checkpoint/distributed/backends/celery_backend.py +503 -0
- truthound/checkpoint/distributed/backends/kubernetes_backend.py +696 -0
- truthound/checkpoint/distributed/backends/local_backend.py +397 -0
- truthound/checkpoint/distributed/backends/ray_backend.py +625 -0
- truthound/checkpoint/distributed/base.py +774 -0
- truthound/checkpoint/distributed/orchestrator.py +765 -0
- truthound/checkpoint/distributed/protocols.py +842 -0
- truthound/checkpoint/distributed/registry.py +449 -0
- truthound/checkpoint/idempotency/__init__.py +120 -0
- truthound/checkpoint/idempotency/core.py +295 -0
- truthound/checkpoint/idempotency/fingerprint.py +454 -0
- truthound/checkpoint/idempotency/locking.py +604 -0
- truthound/checkpoint/idempotency/service.py +592 -0
- truthound/checkpoint/idempotency/stores.py +653 -0
- truthound/checkpoint/monitoring/__init__.py +134 -0
- truthound/checkpoint/monitoring/aggregators/__init__.py +15 -0
- truthound/checkpoint/monitoring/aggregators/base.py +372 -0
- truthound/checkpoint/monitoring/aggregators/realtime.py +300 -0
- truthound/checkpoint/monitoring/aggregators/window.py +493 -0
- truthound/checkpoint/monitoring/collectors/__init__.py +17 -0
- truthound/checkpoint/monitoring/collectors/base.py +257 -0
- truthound/checkpoint/monitoring/collectors/memory_collector.py +617 -0
- truthound/checkpoint/monitoring/collectors/prometheus_collector.py +451 -0
- truthound/checkpoint/monitoring/collectors/redis_collector.py +518 -0
- truthound/checkpoint/monitoring/events.py +410 -0
- truthound/checkpoint/monitoring/protocols.py +636 -0
- truthound/checkpoint/monitoring/service.py +578 -0
- truthound/checkpoint/monitoring/views/__init__.py +17 -0
- truthound/checkpoint/monitoring/views/base.py +172 -0
- truthound/checkpoint/monitoring/views/queue_view.py +220 -0
- truthound/checkpoint/monitoring/views/task_view.py +240 -0
- truthound/checkpoint/monitoring/views/worker_view.py +263 -0
- truthound/checkpoint/registry.py +337 -0
- truthound/checkpoint/runner.py +356 -0
- truthound/checkpoint/transaction/__init__.py +133 -0
- truthound/checkpoint/transaction/base.py +389 -0
- truthound/checkpoint/transaction/compensatable.py +537 -0
- truthound/checkpoint/transaction/coordinator.py +576 -0
- truthound/checkpoint/transaction/executor.py +622 -0
- truthound/checkpoint/transaction/idempotency.py +534 -0
- truthound/checkpoint/transaction/saga/__init__.py +143 -0
- truthound/checkpoint/transaction/saga/builder.py +584 -0
- truthound/checkpoint/transaction/saga/definition.py +515 -0
- truthound/checkpoint/transaction/saga/event_store.py +542 -0
- truthound/checkpoint/transaction/saga/patterns.py +833 -0
- truthound/checkpoint/transaction/saga/runner.py +718 -0
- truthound/checkpoint/transaction/saga/state_machine.py +793 -0
- truthound/checkpoint/transaction/saga/strategies.py +780 -0
- truthound/checkpoint/transaction/saga/testing.py +886 -0
- truthound/checkpoint/triggers/__init__.py +58 -0
- truthound/checkpoint/triggers/base.py +237 -0
- truthound/checkpoint/triggers/event.py +385 -0
- truthound/checkpoint/triggers/schedule.py +355 -0
- truthound/cli.py +2358 -0
- truthound/cli_modules/__init__.py +124 -0
- truthound/cli_modules/advanced/__init__.py +45 -0
- truthound/cli_modules/advanced/benchmark.py +343 -0
- truthound/cli_modules/advanced/docs.py +225 -0
- truthound/cli_modules/advanced/lineage.py +209 -0
- truthound/cli_modules/advanced/ml.py +320 -0
- truthound/cli_modules/advanced/realtime.py +196 -0
- truthound/cli_modules/checkpoint/__init__.py +46 -0
- truthound/cli_modules/checkpoint/init.py +114 -0
- truthound/cli_modules/checkpoint/list.py +71 -0
- truthound/cli_modules/checkpoint/run.py +159 -0
- truthound/cli_modules/checkpoint/validate.py +67 -0
- truthound/cli_modules/common/__init__.py +71 -0
- truthound/cli_modules/common/errors.py +414 -0
- truthound/cli_modules/common/options.py +419 -0
- truthound/cli_modules/common/output.py +507 -0
- truthound/cli_modules/common/protocol.py +552 -0
- truthound/cli_modules/core/__init__.py +48 -0
- truthound/cli_modules/core/check.py +123 -0
- truthound/cli_modules/core/compare.py +104 -0
- truthound/cli_modules/core/learn.py +57 -0
- truthound/cli_modules/core/mask.py +77 -0
- truthound/cli_modules/core/profile.py +65 -0
- truthound/cli_modules/core/scan.py +61 -0
- truthound/cli_modules/profiler/__init__.py +51 -0
- truthound/cli_modules/profiler/auto_profile.py +175 -0
- truthound/cli_modules/profiler/metadata.py +107 -0
- truthound/cli_modules/profiler/suite.py +283 -0
- truthound/cli_modules/registry.py +431 -0
- truthound/cli_modules/scaffolding/__init__.py +89 -0
- truthound/cli_modules/scaffolding/base.py +631 -0
- truthound/cli_modules/scaffolding/commands.py +545 -0
- truthound/cli_modules/scaffolding/plugins.py +1072 -0
- truthound/cli_modules/scaffolding/reporters.py +594 -0
- truthound/cli_modules/scaffolding/validators.py +1127 -0
- truthound/common/__init__.py +18 -0
- truthound/common/resilience/__init__.py +130 -0
- truthound/common/resilience/bulkhead.py +266 -0
- truthound/common/resilience/circuit_breaker.py +516 -0
- truthound/common/resilience/composite.py +332 -0
- truthound/common/resilience/config.py +292 -0
- truthound/common/resilience/protocols.py +217 -0
- truthound/common/resilience/rate_limiter.py +404 -0
- truthound/common/resilience/retry.py +341 -0
- truthound/datadocs/__init__.py +260 -0
- truthound/datadocs/base.py +571 -0
- truthound/datadocs/builder.py +761 -0
- truthound/datadocs/charts.py +764 -0
- truthound/datadocs/dashboard/__init__.py +63 -0
- truthound/datadocs/dashboard/app.py +576 -0
- truthound/datadocs/dashboard/components.py +584 -0
- truthound/datadocs/dashboard/state.py +240 -0
- truthound/datadocs/engine/__init__.py +46 -0
- truthound/datadocs/engine/context.py +376 -0
- truthound/datadocs/engine/pipeline.py +618 -0
- truthound/datadocs/engine/registry.py +469 -0
- truthound/datadocs/exporters/__init__.py +49 -0
- truthound/datadocs/exporters/base.py +198 -0
- truthound/datadocs/exporters/html.py +178 -0
- truthound/datadocs/exporters/json_exporter.py +253 -0
- truthound/datadocs/exporters/markdown.py +284 -0
- truthound/datadocs/exporters/pdf.py +392 -0
- truthound/datadocs/i18n/__init__.py +86 -0
- truthound/datadocs/i18n/catalog.py +960 -0
- truthound/datadocs/i18n/formatting.py +505 -0
- truthound/datadocs/i18n/loader.py +256 -0
- truthound/datadocs/i18n/plurals.py +378 -0
- truthound/datadocs/renderers/__init__.py +42 -0
- truthound/datadocs/renderers/base.py +401 -0
- truthound/datadocs/renderers/custom.py +342 -0
- truthound/datadocs/renderers/jinja.py +697 -0
- truthound/datadocs/sections.py +736 -0
- truthound/datadocs/styles.py +931 -0
- truthound/datadocs/themes/__init__.py +101 -0
- truthound/datadocs/themes/base.py +336 -0
- truthound/datadocs/themes/default.py +417 -0
- truthound/datadocs/themes/enterprise.py +419 -0
- truthound/datadocs/themes/loader.py +336 -0
- truthound/datadocs/themes.py +301 -0
- truthound/datadocs/transformers/__init__.py +57 -0
- truthound/datadocs/transformers/base.py +268 -0
- truthound/datadocs/transformers/enrichers.py +544 -0
- truthound/datadocs/transformers/filters.py +447 -0
- truthound/datadocs/transformers/i18n.py +468 -0
- truthound/datadocs/versioning/__init__.py +62 -0
- truthound/datadocs/versioning/diff.py +639 -0
- truthound/datadocs/versioning/storage.py +497 -0
- truthound/datadocs/versioning/version.py +358 -0
- truthound/datasources/__init__.py +223 -0
- truthound/datasources/_async_protocols.py +222 -0
- truthound/datasources/_protocols.py +159 -0
- truthound/datasources/adapters.py +428 -0
- truthound/datasources/async_base.py +599 -0
- truthound/datasources/async_factory.py +511 -0
- truthound/datasources/base.py +516 -0
- truthound/datasources/factory.py +433 -0
- truthound/datasources/nosql/__init__.py +47 -0
- truthound/datasources/nosql/base.py +487 -0
- truthound/datasources/nosql/elasticsearch.py +801 -0
- truthound/datasources/nosql/mongodb.py +636 -0
- truthound/datasources/pandas_optimized.py +582 -0
- truthound/datasources/pandas_source.py +216 -0
- truthound/datasources/polars_source.py +395 -0
- truthound/datasources/spark_source.py +479 -0
- truthound/datasources/sql/__init__.py +154 -0
- truthound/datasources/sql/base.py +710 -0
- truthound/datasources/sql/bigquery.py +410 -0
- truthound/datasources/sql/cloud_base.py +199 -0
- truthound/datasources/sql/databricks.py +471 -0
- truthound/datasources/sql/mysql.py +316 -0
- truthound/datasources/sql/oracle.py +427 -0
- truthound/datasources/sql/postgresql.py +321 -0
- truthound/datasources/sql/redshift.py +479 -0
- truthound/datasources/sql/snowflake.py +439 -0
- truthound/datasources/sql/sqlite.py +286 -0
- truthound/datasources/sql/sqlserver.py +437 -0
- truthound/datasources/streaming/__init__.py +47 -0
- truthound/datasources/streaming/base.py +350 -0
- truthound/datasources/streaming/kafka.py +670 -0
- truthound/decorators.py +98 -0
- truthound/docs/__init__.py +69 -0
- truthound/docs/extractor.py +971 -0
- truthound/docs/generator.py +601 -0
- truthound/docs/parser.py +1037 -0
- truthound/docs/renderer.py +999 -0
- truthound/drift/__init__.py +22 -0
- truthound/drift/compare.py +189 -0
- truthound/drift/detectors.py +464 -0
- truthound/drift/report.py +160 -0
- truthound/execution/__init__.py +65 -0
- truthound/execution/_protocols.py +324 -0
- truthound/execution/base.py +576 -0
- truthound/execution/distributed/__init__.py +179 -0
- truthound/execution/distributed/aggregations.py +731 -0
- truthound/execution/distributed/arrow_bridge.py +817 -0
- truthound/execution/distributed/base.py +550 -0
- truthound/execution/distributed/dask_engine.py +976 -0
- truthound/execution/distributed/mixins.py +766 -0
- truthound/execution/distributed/protocols.py +756 -0
- truthound/execution/distributed/ray_engine.py +1127 -0
- truthound/execution/distributed/registry.py +446 -0
- truthound/execution/distributed/spark_engine.py +1011 -0
- truthound/execution/distributed/validator_adapter.py +682 -0
- truthound/execution/pandas_engine.py +401 -0
- truthound/execution/polars_engine.py +497 -0
- truthound/execution/pushdown/__init__.py +230 -0
- truthound/execution/pushdown/ast.py +1550 -0
- truthound/execution/pushdown/builder.py +1550 -0
- truthound/execution/pushdown/dialects.py +1072 -0
- truthound/execution/pushdown/executor.py +829 -0
- truthound/execution/pushdown/optimizer.py +1041 -0
- truthound/execution/sql_engine.py +518 -0
- truthound/infrastructure/__init__.py +189 -0
- truthound/infrastructure/audit.py +1515 -0
- truthound/infrastructure/config.py +1133 -0
- truthound/infrastructure/encryption.py +1132 -0
- truthound/infrastructure/logging.py +1503 -0
- truthound/infrastructure/metrics.py +1220 -0
- truthound/lineage/__init__.py +89 -0
- truthound/lineage/base.py +746 -0
- truthound/lineage/impact_analysis.py +474 -0
- truthound/lineage/integrations/__init__.py +22 -0
- truthound/lineage/integrations/openlineage.py +548 -0
- truthound/lineage/tracker.py +512 -0
- truthound/lineage/visualization/__init__.py +33 -0
- truthound/lineage/visualization/protocols.py +145 -0
- truthound/lineage/visualization/renderers/__init__.py +20 -0
- truthound/lineage/visualization/renderers/cytoscape.py +329 -0
- truthound/lineage/visualization/renderers/d3.py +331 -0
- truthound/lineage/visualization/renderers/graphviz.py +276 -0
- truthound/lineage/visualization/renderers/mermaid.py +308 -0
- truthound/maskers.py +113 -0
- truthound/ml/__init__.py +124 -0
- truthound/ml/anomaly_models/__init__.py +31 -0
- truthound/ml/anomaly_models/ensemble.py +362 -0
- truthound/ml/anomaly_models/isolation_forest.py +444 -0
- truthound/ml/anomaly_models/statistical.py +392 -0
- truthound/ml/base.py +1178 -0
- truthound/ml/drift_detection/__init__.py +26 -0
- truthound/ml/drift_detection/concept.py +381 -0
- truthound/ml/drift_detection/distribution.py +361 -0
- truthound/ml/drift_detection/feature.py +442 -0
- truthound/ml/drift_detection/multivariate.py +495 -0
- truthound/ml/monitoring/__init__.py +88 -0
- truthound/ml/monitoring/alerting/__init__.py +33 -0
- truthound/ml/monitoring/alerting/handlers.py +427 -0
- truthound/ml/monitoring/alerting/rules.py +508 -0
- truthound/ml/monitoring/collectors/__init__.py +19 -0
- truthound/ml/monitoring/collectors/composite.py +105 -0
- truthound/ml/monitoring/collectors/drift.py +324 -0
- truthound/ml/monitoring/collectors/performance.py +179 -0
- truthound/ml/monitoring/collectors/quality.py +369 -0
- truthound/ml/monitoring/monitor.py +536 -0
- truthound/ml/monitoring/protocols.py +451 -0
- truthound/ml/monitoring/stores/__init__.py +15 -0
- truthound/ml/monitoring/stores/memory.py +201 -0
- truthound/ml/monitoring/stores/prometheus.py +296 -0
- truthound/ml/rule_learning/__init__.py +25 -0
- truthound/ml/rule_learning/constraint_miner.py +443 -0
- truthound/ml/rule_learning/pattern_learner.py +499 -0
- truthound/ml/rule_learning/profile_learner.py +462 -0
- truthound/multitenancy/__init__.py +326 -0
- truthound/multitenancy/core.py +852 -0
- truthound/multitenancy/integration.py +597 -0
- truthound/multitenancy/isolation.py +630 -0
- truthound/multitenancy/manager.py +770 -0
- truthound/multitenancy/middleware.py +765 -0
- truthound/multitenancy/quota.py +537 -0
- truthound/multitenancy/resolvers.py +603 -0
- truthound/multitenancy/storage.py +703 -0
- truthound/observability/__init__.py +307 -0
- truthound/observability/context.py +531 -0
- truthound/observability/instrumentation.py +611 -0
- truthound/observability/logging.py +887 -0
- truthound/observability/metrics.py +1157 -0
- truthound/observability/tracing/__init__.py +178 -0
- truthound/observability/tracing/baggage.py +310 -0
- truthound/observability/tracing/config.py +426 -0
- truthound/observability/tracing/exporter.py +787 -0
- truthound/observability/tracing/integration.py +1018 -0
- truthound/observability/tracing/otel/__init__.py +146 -0
- truthound/observability/tracing/otel/adapter.py +982 -0
- truthound/observability/tracing/otel/bridge.py +1177 -0
- truthound/observability/tracing/otel/compat.py +681 -0
- truthound/observability/tracing/otel/config.py +691 -0
- truthound/observability/tracing/otel/detection.py +327 -0
- truthound/observability/tracing/otel/protocols.py +426 -0
- truthound/observability/tracing/processor.py +561 -0
- truthound/observability/tracing/propagator.py +757 -0
- truthound/observability/tracing/provider.py +569 -0
- truthound/observability/tracing/resource.py +515 -0
- truthound/observability/tracing/sampler.py +487 -0
- truthound/observability/tracing/span.py +676 -0
- truthound/plugins/__init__.py +198 -0
- truthound/plugins/base.py +599 -0
- truthound/plugins/cli.py +680 -0
- truthound/plugins/dependencies/__init__.py +42 -0
- truthound/plugins/dependencies/graph.py +422 -0
- truthound/plugins/dependencies/resolver.py +417 -0
- truthound/plugins/discovery.py +379 -0
- truthound/plugins/docs/__init__.py +46 -0
- truthound/plugins/docs/extractor.py +444 -0
- truthound/plugins/docs/renderer.py +499 -0
- truthound/plugins/enterprise_manager.py +877 -0
- truthound/plugins/examples/__init__.py +19 -0
- truthound/plugins/examples/custom_validators.py +317 -0
- truthound/plugins/examples/slack_notifier.py +312 -0
- truthound/plugins/examples/xml_reporter.py +254 -0
- truthound/plugins/hooks.py +558 -0
- truthound/plugins/lifecycle/__init__.py +43 -0
- truthound/plugins/lifecycle/hot_reload.py +402 -0
- truthound/plugins/lifecycle/manager.py +371 -0
- truthound/plugins/manager.py +736 -0
- truthound/plugins/registry.py +338 -0
- truthound/plugins/security/__init__.py +93 -0
- truthound/plugins/security/exceptions.py +332 -0
- truthound/plugins/security/policies.py +348 -0
- truthound/plugins/security/protocols.py +643 -0
- truthound/plugins/security/sandbox/__init__.py +45 -0
- truthound/plugins/security/sandbox/context.py +158 -0
- truthound/plugins/security/sandbox/engines/__init__.py +19 -0
- truthound/plugins/security/sandbox/engines/container.py +379 -0
- truthound/plugins/security/sandbox/engines/noop.py +144 -0
- truthound/plugins/security/sandbox/engines/process.py +336 -0
- truthound/plugins/security/sandbox/factory.py +211 -0
- truthound/plugins/security/signing/__init__.py +57 -0
- truthound/plugins/security/signing/service.py +330 -0
- truthound/plugins/security/signing/trust_store.py +368 -0
- truthound/plugins/security/signing/verifier.py +459 -0
- truthound/plugins/versioning/__init__.py +41 -0
- truthound/plugins/versioning/constraints.py +297 -0
- truthound/plugins/versioning/resolver.py +329 -0
- truthound/profiler/__init__.py +1729 -0
- truthound/profiler/_lazy.py +452 -0
- truthound/profiler/ab_testing/__init__.py +80 -0
- truthound/profiler/ab_testing/analysis.py +449 -0
- truthound/profiler/ab_testing/base.py +257 -0
- truthound/profiler/ab_testing/experiment.py +395 -0
- truthound/profiler/ab_testing/tracking.py +368 -0
- truthound/profiler/auto_threshold.py +1170 -0
- truthound/profiler/base.py +579 -0
- truthound/profiler/cache_patterns.py +911 -0
- truthound/profiler/caching.py +1303 -0
- truthound/profiler/column_profiler.py +712 -0
- truthound/profiler/comparison.py +1007 -0
- truthound/profiler/custom_patterns.py +1170 -0
- truthound/profiler/dashboard/__init__.py +50 -0
- truthound/profiler/dashboard/app.py +476 -0
- truthound/profiler/dashboard/components.py +457 -0
- truthound/profiler/dashboard/config.py +72 -0
- truthound/profiler/distributed/__init__.py +83 -0
- truthound/profiler/distributed/base.py +281 -0
- truthound/profiler/distributed/dask_backend.py +498 -0
- truthound/profiler/distributed/local_backend.py +293 -0
- truthound/profiler/distributed/profiler.py +304 -0
- truthound/profiler/distributed/ray_backend.py +374 -0
- truthound/profiler/distributed/spark_backend.py +375 -0
- truthound/profiler/distributed.py +1366 -0
- truthound/profiler/enterprise_sampling.py +1065 -0
- truthound/profiler/errors.py +488 -0
- truthound/profiler/evolution/__init__.py +91 -0
- truthound/profiler/evolution/alerts.py +426 -0
- truthound/profiler/evolution/changes.py +206 -0
- truthound/profiler/evolution/compatibility.py +365 -0
- truthound/profiler/evolution/detector.py +372 -0
- truthound/profiler/evolution/protocols.py +121 -0
- truthound/profiler/generators/__init__.py +48 -0
- truthound/profiler/generators/base.py +384 -0
- truthound/profiler/generators/ml_rules.py +375 -0
- truthound/profiler/generators/pattern_rules.py +384 -0
- truthound/profiler/generators/schema_rules.py +267 -0
- truthound/profiler/generators/stats_rules.py +324 -0
- truthound/profiler/generators/suite_generator.py +857 -0
- truthound/profiler/i18n.py +1542 -0
- truthound/profiler/incremental.py +554 -0
- truthound/profiler/incremental_validation.py +1710 -0
- truthound/profiler/integration/__init__.py +73 -0
- truthound/profiler/integration/adapters.py +345 -0
- truthound/profiler/integration/context.py +371 -0
- truthound/profiler/integration/executor.py +527 -0
- truthound/profiler/integration/naming.py +75 -0
- truthound/profiler/integration/protocols.py +243 -0
- truthound/profiler/memory.py +1185 -0
- truthound/profiler/migration/__init__.py +60 -0
- truthound/profiler/migration/base.py +345 -0
- truthound/profiler/migration/manager.py +444 -0
- truthound/profiler/migration/v1_0_to_v1_1.py +484 -0
- truthound/profiler/ml/__init__.py +73 -0
- truthound/profiler/ml/base.py +244 -0
- truthound/profiler/ml/classifier.py +507 -0
- truthound/profiler/ml/feature_extraction.py +604 -0
- truthound/profiler/ml/pretrained.py +448 -0
- truthound/profiler/ml_inference.py +1276 -0
- truthound/profiler/native_patterns.py +815 -0
- truthound/profiler/observability.py +1184 -0
- truthound/profiler/process_timeout.py +1566 -0
- truthound/profiler/progress.py +568 -0
- truthound/profiler/progress_callbacks.py +1734 -0
- truthound/profiler/quality.py +1345 -0
- truthound/profiler/resilience.py +1180 -0
- truthound/profiler/sampled_matcher.py +794 -0
- truthound/profiler/sampling.py +1288 -0
- truthound/profiler/scheduling/__init__.py +82 -0
- truthound/profiler/scheduling/protocols.py +214 -0
- truthound/profiler/scheduling/scheduler.py +474 -0
- truthound/profiler/scheduling/storage.py +457 -0
- truthound/profiler/scheduling/triggers.py +449 -0
- truthound/profiler/schema.py +603 -0
- truthound/profiler/streaming.py +685 -0
- truthound/profiler/streaming_patterns.py +1354 -0
- truthound/profiler/suite_cli.py +625 -0
- truthound/profiler/suite_config.py +789 -0
- truthound/profiler/suite_export.py +1268 -0
- truthound/profiler/table_profiler.py +547 -0
- truthound/profiler/timeout.py +565 -0
- truthound/profiler/validation.py +1532 -0
- truthound/profiler/visualization/__init__.py +118 -0
- truthound/profiler/visualization/base.py +346 -0
- truthound/profiler/visualization/generator.py +1259 -0
- truthound/profiler/visualization/plotly_renderer.py +811 -0
- truthound/profiler/visualization/renderers.py +669 -0
- truthound/profiler/visualization/sections.py +540 -0
- truthound/profiler/visualization.py +2122 -0
- truthound/profiler/yaml_validation.py +1151 -0
- truthound/py.typed +0 -0
- truthound/ratelimit/__init__.py +248 -0
- truthound/ratelimit/algorithms.py +1108 -0
- truthound/ratelimit/core.py +573 -0
- truthound/ratelimit/integration.py +532 -0
- truthound/ratelimit/limiter.py +663 -0
- truthound/ratelimit/middleware.py +700 -0
- truthound/ratelimit/policy.py +792 -0
- truthound/ratelimit/storage.py +763 -0
- truthound/rbac/__init__.py +340 -0
- truthound/rbac/core.py +976 -0
- truthound/rbac/integration.py +760 -0
- truthound/rbac/manager.py +1052 -0
- truthound/rbac/middleware.py +842 -0
- truthound/rbac/policy.py +954 -0
- truthound/rbac/storage.py +878 -0
- truthound/realtime/__init__.py +141 -0
- truthound/realtime/adapters/__init__.py +43 -0
- truthound/realtime/adapters/base.py +533 -0
- truthound/realtime/adapters/kafka.py +487 -0
- truthound/realtime/adapters/kinesis.py +479 -0
- truthound/realtime/adapters/mock.py +243 -0
- truthound/realtime/base.py +553 -0
- truthound/realtime/factory.py +382 -0
- truthound/realtime/incremental.py +660 -0
- truthound/realtime/processing/__init__.py +67 -0
- truthound/realtime/processing/exactly_once.py +575 -0
- truthound/realtime/processing/state.py +547 -0
- truthound/realtime/processing/windows.py +647 -0
- truthound/realtime/protocols.py +569 -0
- truthound/realtime/streaming.py +605 -0
- truthound/realtime/testing/__init__.py +32 -0
- truthound/realtime/testing/containers.py +615 -0
- truthound/realtime/testing/fixtures.py +484 -0
- truthound/report.py +280 -0
- truthound/reporters/__init__.py +46 -0
- truthound/reporters/_protocols.py +30 -0
- truthound/reporters/base.py +324 -0
- truthound/reporters/ci/__init__.py +66 -0
- truthound/reporters/ci/azure.py +436 -0
- truthound/reporters/ci/base.py +509 -0
- truthound/reporters/ci/bitbucket.py +567 -0
- truthound/reporters/ci/circleci.py +547 -0
- truthound/reporters/ci/detection.py +364 -0
- truthound/reporters/ci/factory.py +182 -0
- truthound/reporters/ci/github.py +388 -0
- truthound/reporters/ci/gitlab.py +471 -0
- truthound/reporters/ci/jenkins.py +525 -0
- truthound/reporters/console_reporter.py +299 -0
- truthound/reporters/factory.py +211 -0
- truthound/reporters/html_reporter.py +524 -0
- truthound/reporters/json_reporter.py +256 -0
- truthound/reporters/markdown_reporter.py +280 -0
- truthound/reporters/sdk/__init__.py +174 -0
- truthound/reporters/sdk/builder.py +558 -0
- truthound/reporters/sdk/mixins.py +1150 -0
- truthound/reporters/sdk/schema.py +1493 -0
- truthound/reporters/sdk/templates.py +666 -0
- truthound/reporters/sdk/testing.py +968 -0
- truthound/scanners.py +170 -0
- truthound/scheduling/__init__.py +122 -0
- truthound/scheduling/cron.py +1136 -0
- truthound/scheduling/presets.py +212 -0
- truthound/schema.py +275 -0
- truthound/secrets/__init__.py +173 -0
- truthound/secrets/base.py +618 -0
- truthound/secrets/cloud.py +682 -0
- truthound/secrets/integration.py +507 -0
- truthound/secrets/manager.py +633 -0
- truthound/secrets/oidc/__init__.py +172 -0
- truthound/secrets/oidc/base.py +902 -0
- truthound/secrets/oidc/credential_provider.py +623 -0
- truthound/secrets/oidc/exchangers.py +1001 -0
- truthound/secrets/oidc/github/__init__.py +110 -0
- truthound/secrets/oidc/github/claims.py +718 -0
- truthound/secrets/oidc/github/enhanced_provider.py +693 -0
- truthound/secrets/oidc/github/trust_policy.py +742 -0
- truthound/secrets/oidc/github/verification.py +723 -0
- truthound/secrets/oidc/github/workflow.py +691 -0
- truthound/secrets/oidc/providers.py +825 -0
- truthound/secrets/providers.py +506 -0
- truthound/secrets/resolver.py +495 -0
- truthound/stores/__init__.py +177 -0
- truthound/stores/backends/__init__.py +18 -0
- truthound/stores/backends/_protocols.py +340 -0
- truthound/stores/backends/azure_blob.py +530 -0
- truthound/stores/backends/concurrent_filesystem.py +915 -0
- truthound/stores/backends/connection_pool.py +1365 -0
- truthound/stores/backends/database.py +743 -0
- truthound/stores/backends/filesystem.py +538 -0
- truthound/stores/backends/gcs.py +399 -0
- truthound/stores/backends/memory.py +354 -0
- truthound/stores/backends/s3.py +434 -0
- truthound/stores/backpressure/__init__.py +84 -0
- truthound/stores/backpressure/base.py +375 -0
- truthound/stores/backpressure/circuit_breaker.py +434 -0
- truthound/stores/backpressure/monitor.py +376 -0
- truthound/stores/backpressure/strategies.py +677 -0
- truthound/stores/base.py +551 -0
- truthound/stores/batching/__init__.py +65 -0
- truthound/stores/batching/base.py +305 -0
- truthound/stores/batching/buffer.py +370 -0
- truthound/stores/batching/store.py +248 -0
- truthound/stores/batching/writer.py +521 -0
- truthound/stores/caching/__init__.py +60 -0
- truthound/stores/caching/backends.py +684 -0
- truthound/stores/caching/base.py +356 -0
- truthound/stores/caching/store.py +305 -0
- truthound/stores/compression/__init__.py +193 -0
- truthound/stores/compression/adaptive.py +694 -0
- truthound/stores/compression/base.py +514 -0
- truthound/stores/compression/pipeline.py +868 -0
- truthound/stores/compression/providers.py +672 -0
- truthound/stores/compression/streaming.py +832 -0
- truthound/stores/concurrency/__init__.py +81 -0
- truthound/stores/concurrency/atomic.py +556 -0
- truthound/stores/concurrency/index.py +775 -0
- truthound/stores/concurrency/locks.py +576 -0
- truthound/stores/concurrency/manager.py +482 -0
- truthound/stores/encryption/__init__.py +297 -0
- truthound/stores/encryption/base.py +952 -0
- truthound/stores/encryption/keys.py +1191 -0
- truthound/stores/encryption/pipeline.py +903 -0
- truthound/stores/encryption/providers.py +953 -0
- truthound/stores/encryption/streaming.py +950 -0
- truthound/stores/expectations.py +227 -0
- truthound/stores/factory.py +246 -0
- truthound/stores/migration/__init__.py +75 -0
- truthound/stores/migration/base.py +480 -0
- truthound/stores/migration/manager.py +347 -0
- truthound/stores/migration/registry.py +382 -0
- truthound/stores/migration/store.py +559 -0
- truthound/stores/observability/__init__.py +106 -0
- truthound/stores/observability/audit.py +718 -0
- truthound/stores/observability/config.py +270 -0
- truthound/stores/observability/factory.py +208 -0
- truthound/stores/observability/metrics.py +636 -0
- truthound/stores/observability/protocols.py +410 -0
- truthound/stores/observability/store.py +570 -0
- truthound/stores/observability/tracing.py +784 -0
- truthound/stores/replication/__init__.py +76 -0
- truthound/stores/replication/base.py +260 -0
- truthound/stores/replication/monitor.py +269 -0
- truthound/stores/replication/store.py +439 -0
- truthound/stores/replication/syncer.py +391 -0
- truthound/stores/results.py +359 -0
- truthound/stores/retention/__init__.py +77 -0
- truthound/stores/retention/base.py +378 -0
- truthound/stores/retention/policies.py +621 -0
- truthound/stores/retention/scheduler.py +279 -0
- truthound/stores/retention/store.py +526 -0
- truthound/stores/streaming/__init__.py +138 -0
- truthound/stores/streaming/base.py +801 -0
- truthound/stores/streaming/database.py +984 -0
- truthound/stores/streaming/filesystem.py +719 -0
- truthound/stores/streaming/reader.py +629 -0
- truthound/stores/streaming/s3.py +843 -0
- truthound/stores/streaming/writer.py +790 -0
- truthound/stores/tiering/__init__.py +108 -0
- truthound/stores/tiering/base.py +462 -0
- truthound/stores/tiering/manager.py +249 -0
- truthound/stores/tiering/policies.py +692 -0
- truthound/stores/tiering/store.py +526 -0
- truthound/stores/versioning/__init__.py +56 -0
- truthound/stores/versioning/base.py +376 -0
- truthound/stores/versioning/store.py +660 -0
- truthound/stores/versioning/strategies.py +353 -0
- truthound/types.py +56 -0
- truthound/validators/__init__.py +774 -0
- truthound/validators/aggregate/__init__.py +27 -0
- truthound/validators/aggregate/central.py +116 -0
- truthound/validators/aggregate/extremes.py +116 -0
- truthound/validators/aggregate/spread.py +118 -0
- truthound/validators/aggregate/sum.py +64 -0
- truthound/validators/aggregate/type.py +78 -0
- truthound/validators/anomaly/__init__.py +93 -0
- truthound/validators/anomaly/base.py +431 -0
- truthound/validators/anomaly/ml_based.py +1190 -0
- truthound/validators/anomaly/multivariate.py +647 -0
- truthound/validators/anomaly/statistical.py +599 -0
- truthound/validators/base.py +1089 -0
- truthound/validators/business_rule/__init__.py +46 -0
- truthound/validators/business_rule/base.py +147 -0
- truthound/validators/business_rule/checksum.py +509 -0
- truthound/validators/business_rule/financial.py +526 -0
- truthound/validators/cache.py +733 -0
- truthound/validators/completeness/__init__.py +39 -0
- truthound/validators/completeness/conditional.py +73 -0
- truthound/validators/completeness/default.py +98 -0
- truthound/validators/completeness/empty.py +103 -0
- truthound/validators/completeness/nan.py +337 -0
- truthound/validators/completeness/null.py +152 -0
- truthound/validators/cross_table/__init__.py +17 -0
- truthound/validators/cross_table/aggregate.py +333 -0
- truthound/validators/cross_table/row_count.py +122 -0
- truthound/validators/datetime/__init__.py +29 -0
- truthound/validators/datetime/format.py +78 -0
- truthound/validators/datetime/freshness.py +269 -0
- truthound/validators/datetime/order.py +73 -0
- truthound/validators/datetime/parseable.py +185 -0
- truthound/validators/datetime/range.py +202 -0
- truthound/validators/datetime/timezone.py +69 -0
- truthound/validators/distribution/__init__.py +49 -0
- truthound/validators/distribution/distribution.py +128 -0
- truthound/validators/distribution/monotonic.py +119 -0
- truthound/validators/distribution/outlier.py +178 -0
- truthound/validators/distribution/quantile.py +80 -0
- truthound/validators/distribution/range.py +254 -0
- truthound/validators/distribution/set.py +125 -0
- truthound/validators/distribution/statistical.py +459 -0
- truthound/validators/drift/__init__.py +79 -0
- truthound/validators/drift/base.py +427 -0
- truthound/validators/drift/multi_feature.py +401 -0
- truthound/validators/drift/numeric.py +395 -0
- truthound/validators/drift/psi.py +446 -0
- truthound/validators/drift/statistical.py +510 -0
- truthound/validators/enterprise.py +1658 -0
- truthound/validators/geospatial/__init__.py +80 -0
- truthound/validators/geospatial/base.py +97 -0
- truthound/validators/geospatial/boundary.py +238 -0
- truthound/validators/geospatial/coordinate.py +351 -0
- truthound/validators/geospatial/distance.py +399 -0
- truthound/validators/geospatial/polygon.py +665 -0
- truthound/validators/i18n/__init__.py +308 -0
- truthound/validators/i18n/bidi.py +571 -0
- truthound/validators/i18n/catalogs.py +570 -0
- truthound/validators/i18n/dialects.py +763 -0
- truthound/validators/i18n/extended_catalogs.py +549 -0
- truthound/validators/i18n/formatting.py +1434 -0
- truthound/validators/i18n/loader.py +1020 -0
- truthound/validators/i18n/messages.py +521 -0
- truthound/validators/i18n/plural.py +683 -0
- truthound/validators/i18n/protocols.py +855 -0
- truthound/validators/i18n/tms.py +1162 -0
- truthound/validators/localization/__init__.py +53 -0
- truthound/validators/localization/base.py +122 -0
- truthound/validators/localization/chinese.py +362 -0
- truthound/validators/localization/japanese.py +275 -0
- truthound/validators/localization/korean.py +524 -0
- truthound/validators/memory/__init__.py +94 -0
- truthound/validators/memory/approximate_knn.py +506 -0
- truthound/validators/memory/base.py +547 -0
- truthound/validators/memory/sgd_online.py +719 -0
- truthound/validators/memory/streaming_ecdf.py +753 -0
- truthound/validators/ml_feature/__init__.py +54 -0
- truthound/validators/ml_feature/base.py +249 -0
- truthound/validators/ml_feature/correlation.py +299 -0
- truthound/validators/ml_feature/leakage.py +344 -0
- truthound/validators/ml_feature/null_impact.py +270 -0
- truthound/validators/ml_feature/scale.py +264 -0
- truthound/validators/multi_column/__init__.py +89 -0
- truthound/validators/multi_column/arithmetic.py +284 -0
- truthound/validators/multi_column/base.py +231 -0
- truthound/validators/multi_column/comparison.py +273 -0
- truthound/validators/multi_column/consistency.py +312 -0
- truthound/validators/multi_column/statistical.py +299 -0
- truthound/validators/optimization/__init__.py +164 -0
- truthound/validators/optimization/aggregation.py +563 -0
- truthound/validators/optimization/covariance.py +556 -0
- truthound/validators/optimization/geo.py +626 -0
- truthound/validators/optimization/graph.py +587 -0
- truthound/validators/optimization/orchestrator.py +970 -0
- truthound/validators/optimization/profiling.py +1312 -0
- truthound/validators/privacy/__init__.py +223 -0
- truthound/validators/privacy/base.py +635 -0
- truthound/validators/privacy/ccpa.py +670 -0
- truthound/validators/privacy/gdpr.py +728 -0
- truthound/validators/privacy/global_patterns.py +604 -0
- truthound/validators/privacy/plugins.py +867 -0
- truthound/validators/profiling/__init__.py +52 -0
- truthound/validators/profiling/base.py +175 -0
- truthound/validators/profiling/cardinality.py +312 -0
- truthound/validators/profiling/entropy.py +391 -0
- truthound/validators/profiling/frequency.py +455 -0
- truthound/validators/pushdown_support.py +660 -0
- truthound/validators/query/__init__.py +91 -0
- truthound/validators/query/aggregate.py +346 -0
- truthound/validators/query/base.py +246 -0
- truthound/validators/query/column.py +249 -0
- truthound/validators/query/expression.py +274 -0
- truthound/validators/query/result.py +323 -0
- truthound/validators/query/row_count.py +264 -0
- truthound/validators/referential/__init__.py +80 -0
- truthound/validators/referential/base.py +395 -0
- truthound/validators/referential/cascade.py +391 -0
- truthound/validators/referential/circular.py +563 -0
- truthound/validators/referential/foreign_key.py +624 -0
- truthound/validators/referential/orphan.py +485 -0
- truthound/validators/registry.py +112 -0
- truthound/validators/schema/__init__.py +41 -0
- truthound/validators/schema/column_count.py +142 -0
- truthound/validators/schema/column_exists.py +80 -0
- truthound/validators/schema/column_order.py +82 -0
- truthound/validators/schema/column_pair.py +85 -0
- truthound/validators/schema/column_pair_set.py +195 -0
- truthound/validators/schema/column_type.py +94 -0
- truthound/validators/schema/multi_column.py +53 -0
- truthound/validators/schema/multi_column_aggregate.py +175 -0
- truthound/validators/schema/referential.py +274 -0
- truthound/validators/schema/table_schema.py +91 -0
- truthound/validators/schema_validator.py +219 -0
- truthound/validators/sdk/__init__.py +250 -0
- truthound/validators/sdk/builder.py +680 -0
- truthound/validators/sdk/decorators.py +474 -0
- truthound/validators/sdk/enterprise/__init__.py +211 -0
- truthound/validators/sdk/enterprise/docs.py +725 -0
- truthound/validators/sdk/enterprise/fuzzing.py +659 -0
- truthound/validators/sdk/enterprise/licensing.py +709 -0
- truthound/validators/sdk/enterprise/manager.py +543 -0
- truthound/validators/sdk/enterprise/resources.py +628 -0
- truthound/validators/sdk/enterprise/sandbox.py +766 -0
- truthound/validators/sdk/enterprise/signing.py +603 -0
- truthound/validators/sdk/enterprise/templates.py +865 -0
- truthound/validators/sdk/enterprise/versioning.py +659 -0
- truthound/validators/sdk/templates.py +757 -0
- truthound/validators/sdk/testing.py +807 -0
- truthound/validators/security/__init__.py +181 -0
- truthound/validators/security/redos/__init__.py +182 -0
- truthound/validators/security/redos/core.py +861 -0
- truthound/validators/security/redos/cpu_monitor.py +593 -0
- truthound/validators/security/redos/cve_database.py +791 -0
- truthound/validators/security/redos/ml/__init__.py +155 -0
- truthound/validators/security/redos/ml/base.py +785 -0
- truthound/validators/security/redos/ml/datasets.py +618 -0
- truthound/validators/security/redos/ml/features.py +359 -0
- truthound/validators/security/redos/ml/models.py +1000 -0
- truthound/validators/security/redos/ml/predictor.py +507 -0
- truthound/validators/security/redos/ml/storage.py +632 -0
- truthound/validators/security/redos/ml/training.py +571 -0
- truthound/validators/security/redos/ml_analyzer.py +937 -0
- truthound/validators/security/redos/optimizer.py +674 -0
- truthound/validators/security/redos/profiler.py +682 -0
- truthound/validators/security/redos/re2_engine.py +709 -0
- truthound/validators/security/redos.py +886 -0
- truthound/validators/security/sql_security.py +1247 -0
- truthound/validators/streaming/__init__.py +126 -0
- truthound/validators/streaming/base.py +292 -0
- truthound/validators/streaming/completeness.py +210 -0
- truthound/validators/streaming/mixin.py +575 -0
- truthound/validators/streaming/range.py +308 -0
- truthound/validators/streaming/sources.py +846 -0
- truthound/validators/string/__init__.py +57 -0
- truthound/validators/string/casing.py +158 -0
- truthound/validators/string/charset.py +96 -0
- truthound/validators/string/format.py +501 -0
- truthound/validators/string/json.py +77 -0
- truthound/validators/string/json_schema.py +184 -0
- truthound/validators/string/length.py +104 -0
- truthound/validators/string/like_pattern.py +237 -0
- truthound/validators/string/regex.py +202 -0
- truthound/validators/string/regex_extended.py +435 -0
- truthound/validators/table/__init__.py +88 -0
- truthound/validators/table/base.py +78 -0
- truthound/validators/table/column_count.py +198 -0
- truthound/validators/table/freshness.py +362 -0
- truthound/validators/table/row_count.py +251 -0
- truthound/validators/table/schema.py +333 -0
- truthound/validators/table/size.py +285 -0
- truthound/validators/timeout/__init__.py +102 -0
- truthound/validators/timeout/advanced/__init__.py +247 -0
- truthound/validators/timeout/advanced/circuit_breaker.py +675 -0
- truthound/validators/timeout/advanced/prediction.py +773 -0
- truthound/validators/timeout/advanced/priority.py +618 -0
- truthound/validators/timeout/advanced/redis_backend.py +770 -0
- truthound/validators/timeout/advanced/retry.py +721 -0
- truthound/validators/timeout/advanced/sampling.py +788 -0
- truthound/validators/timeout/advanced/sla.py +661 -0
- truthound/validators/timeout/advanced/telemetry.py +804 -0
- truthound/validators/timeout/cascade.py +477 -0
- truthound/validators/timeout/deadline.py +657 -0
- truthound/validators/timeout/degradation.py +525 -0
- truthound/validators/timeout/distributed.py +597 -0
- truthound/validators/timeseries/__init__.py +89 -0
- truthound/validators/timeseries/base.py +326 -0
- truthound/validators/timeseries/completeness.py +617 -0
- truthound/validators/timeseries/gap.py +485 -0
- truthound/validators/timeseries/monotonic.py +310 -0
- truthound/validators/timeseries/seasonality.py +422 -0
- truthound/validators/timeseries/trend.py +510 -0
- truthound/validators/uniqueness/__init__.py +59 -0
- truthound/validators/uniqueness/approximate.py +475 -0
- truthound/validators/uniqueness/distinct_values.py +253 -0
- truthound/validators/uniqueness/duplicate.py +118 -0
- truthound/validators/uniqueness/primary_key.py +140 -0
- truthound/validators/uniqueness/unique.py +191 -0
- truthound/validators/uniqueness/within_record.py +599 -0
- truthound/validators/utils.py +756 -0
- truthound-1.0.8.dist-info/METADATA +474 -0
- truthound-1.0.8.dist-info/RECORD +877 -0
- truthound-1.0.8.dist-info/WHEEL +4 -0
- truthound-1.0.8.dist-info/entry_points.txt +2 -0
- truthound-1.0.8.dist-info/licenses/LICENSE +190 -0
|
@@ -0,0 +1,710 @@
|
|
|
1
|
+
"""Base classes for SQL data sources.
|
|
2
|
+
|
|
3
|
+
This module provides the abstract base class for SQL-based data sources,
|
|
4
|
+
with connection pooling and common SQL operations.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
from abc import abstractmethod
|
|
10
|
+
from contextlib import contextmanager
|
|
11
|
+
from dataclasses import dataclass, field
|
|
12
|
+
from queue import Queue, Empty
|
|
13
|
+
from threading import Lock
|
|
14
|
+
from typing import TYPE_CHECKING, Any, Iterator
|
|
15
|
+
|
|
16
|
+
from truthound.datasources._protocols import (
|
|
17
|
+
ColumnType,
|
|
18
|
+
DataSourceCapability,
|
|
19
|
+
)
|
|
20
|
+
from truthound.datasources.base import (
|
|
21
|
+
BaseDataSource,
|
|
22
|
+
DataSourceConfig,
|
|
23
|
+
DataSourceConnectionError,
|
|
24
|
+
DataSourceError,
|
|
25
|
+
sql_type_to_column_type,
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
if TYPE_CHECKING:
|
|
29
|
+
import polars as pl
|
|
30
|
+
from truthound.execution.base import BaseExecutionEngine
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
# =============================================================================
|
|
34
|
+
# Configuration
|
|
35
|
+
# =============================================================================
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
@dataclass
|
|
39
|
+
class SQLDataSourceConfig(DataSourceConfig):
|
|
40
|
+
"""Configuration for SQL data sources.
|
|
41
|
+
|
|
42
|
+
Attributes:
|
|
43
|
+
pool_size: Number of connections in the pool.
|
|
44
|
+
pool_timeout: Timeout for acquiring a connection from pool.
|
|
45
|
+
query_timeout: Timeout for query execution.
|
|
46
|
+
fetch_size: Number of rows to fetch at a time.
|
|
47
|
+
use_server_side_cursor: Use server-side cursor for large results.
|
|
48
|
+
schema_name: Database schema name (for PostgreSQL, etc.).
|
|
49
|
+
"""
|
|
50
|
+
|
|
51
|
+
pool_size: int = 5
|
|
52
|
+
pool_timeout: float = 30.0
|
|
53
|
+
query_timeout: float = 300.0
|
|
54
|
+
fetch_size: int = 10000
|
|
55
|
+
use_server_side_cursor: bool = False
|
|
56
|
+
schema_name: str | None = None
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
# =============================================================================
|
|
60
|
+
# Connection Pool
|
|
61
|
+
# =============================================================================
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
class SQLConnectionPool:
|
|
65
|
+
"""Thread-safe connection pool for SQL databases.
|
|
66
|
+
|
|
67
|
+
This pool manages database connections, reusing them across
|
|
68
|
+
operations for better performance.
|
|
69
|
+
|
|
70
|
+
Example:
|
|
71
|
+
>>> pool = SQLConnectionPool(create_connection, size=5)
|
|
72
|
+
>>> with pool.acquire() as conn:
|
|
73
|
+
... cursor = conn.cursor()
|
|
74
|
+
... cursor.execute("SELECT * FROM users")
|
|
75
|
+
"""
|
|
76
|
+
|
|
77
|
+
def __init__(
|
|
78
|
+
self,
|
|
79
|
+
connection_factory: callable,
|
|
80
|
+
size: int = 5,
|
|
81
|
+
timeout: float = 30.0,
|
|
82
|
+
) -> None:
|
|
83
|
+
"""Initialize connection pool.
|
|
84
|
+
|
|
85
|
+
Args:
|
|
86
|
+
connection_factory: Callable that creates a new connection.
|
|
87
|
+
size: Maximum number of connections.
|
|
88
|
+
timeout: Timeout for acquiring a connection.
|
|
89
|
+
"""
|
|
90
|
+
self._factory = connection_factory
|
|
91
|
+
self._size = size
|
|
92
|
+
self._timeout = timeout
|
|
93
|
+
self._pool: Queue = Queue(maxsize=size)
|
|
94
|
+
self._lock = Lock()
|
|
95
|
+
self._created = 0
|
|
96
|
+
self._closed = False
|
|
97
|
+
|
|
98
|
+
def _create_connection(self) -> Any:
|
|
99
|
+
"""Create a new connection."""
|
|
100
|
+
with self._lock:
|
|
101
|
+
if self._created < self._size:
|
|
102
|
+
conn = self._factory()
|
|
103
|
+
self._created += 1
|
|
104
|
+
return conn
|
|
105
|
+
return None
|
|
106
|
+
|
|
107
|
+
@contextmanager
|
|
108
|
+
def acquire(self) -> Iterator[Any]:
|
|
109
|
+
"""Acquire a connection from the pool.
|
|
110
|
+
|
|
111
|
+
Yields:
|
|
112
|
+
Database connection.
|
|
113
|
+
|
|
114
|
+
Raises:
|
|
115
|
+
DataSourceConnectionError: If unable to acquire connection.
|
|
116
|
+
"""
|
|
117
|
+
if self._closed:
|
|
118
|
+
raise DataSourceConnectionError("pool", "Connection pool is closed")
|
|
119
|
+
|
|
120
|
+
conn = None
|
|
121
|
+
|
|
122
|
+
# Try to get from pool first
|
|
123
|
+
try:
|
|
124
|
+
conn = self._pool.get_nowait()
|
|
125
|
+
except Empty:
|
|
126
|
+
# Try to create new connection
|
|
127
|
+
conn = self._create_connection()
|
|
128
|
+
|
|
129
|
+
if conn is None:
|
|
130
|
+
# Pool is full, wait for available connection
|
|
131
|
+
try:
|
|
132
|
+
conn = self._pool.get(timeout=self._timeout)
|
|
133
|
+
except Empty:
|
|
134
|
+
raise DataSourceConnectionError(
|
|
135
|
+
"pool",
|
|
136
|
+
f"Timeout waiting for connection after {self._timeout}s"
|
|
137
|
+
)
|
|
138
|
+
|
|
139
|
+
try:
|
|
140
|
+
yield conn
|
|
141
|
+
finally:
|
|
142
|
+
# Return connection to pool
|
|
143
|
+
if conn is not None and not self._closed:
|
|
144
|
+
try:
|
|
145
|
+
self._pool.put_nowait(conn)
|
|
146
|
+
except Exception:
|
|
147
|
+
# Pool is full or closed, close the connection
|
|
148
|
+
try:
|
|
149
|
+
conn.close()
|
|
150
|
+
except Exception:
|
|
151
|
+
pass
|
|
152
|
+
|
|
153
|
+
def close(self) -> None:
|
|
154
|
+
"""Close all connections in the pool."""
|
|
155
|
+
self._closed = True
|
|
156
|
+
while True:
|
|
157
|
+
try:
|
|
158
|
+
conn = self._pool.get_nowait()
|
|
159
|
+
try:
|
|
160
|
+
conn.close()
|
|
161
|
+
except Exception:
|
|
162
|
+
pass
|
|
163
|
+
except Empty:
|
|
164
|
+
break
|
|
165
|
+
|
|
166
|
+
@property
|
|
167
|
+
def size(self) -> int:
|
|
168
|
+
"""Get pool size."""
|
|
169
|
+
return self._size
|
|
170
|
+
|
|
171
|
+
@property
|
|
172
|
+
def available(self) -> int:
|
|
173
|
+
"""Get number of available connections."""
|
|
174
|
+
return self._pool.qsize()
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
# =============================================================================
|
|
178
|
+
# Abstract Base SQL Data Source
|
|
179
|
+
# =============================================================================
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
class BaseSQLDataSource(BaseDataSource[SQLDataSourceConfig]):
|
|
183
|
+
"""Abstract base class for SQL-based data sources.
|
|
184
|
+
|
|
185
|
+
This class provides common functionality for all SQL databases,
|
|
186
|
+
including connection pooling, schema introspection, and query execution.
|
|
187
|
+
|
|
188
|
+
Supports two modes of operation:
|
|
189
|
+
- **Table mode**: Validate an existing table or view
|
|
190
|
+
- **Query mode**: Validate results from a custom SQL query
|
|
191
|
+
|
|
192
|
+
Subclasses must implement:
|
|
193
|
+
- _create_connection(): Create a database connection
|
|
194
|
+
- _get_table_schema(): Get column names and types from database
|
|
195
|
+
- _get_row_count_query(): Get SQL for counting rows
|
|
196
|
+
|
|
197
|
+
Example:
|
|
198
|
+
>>> # Table mode
|
|
199
|
+
>>> source = SQLiteDataSource(database="db.sqlite", table="users")
|
|
200
|
+
>>>
|
|
201
|
+
>>> # Query mode
|
|
202
|
+
>>> source = SQLiteDataSource(
|
|
203
|
+
... database="db.sqlite",
|
|
204
|
+
... query="SELECT id, name FROM users WHERE active = 1"
|
|
205
|
+
... )
|
|
206
|
+
"""
|
|
207
|
+
|
|
208
|
+
source_type = "sql"
|
|
209
|
+
|
|
210
|
+
def __init__(
|
|
211
|
+
self,
|
|
212
|
+
table: str | None = None,
|
|
213
|
+
query: str | None = None,
|
|
214
|
+
config: SQLDataSourceConfig | None = None,
|
|
215
|
+
) -> None:
|
|
216
|
+
"""Initialize SQL data source.
|
|
217
|
+
|
|
218
|
+
Args:
|
|
219
|
+
table: Table or view name to validate. Mutually exclusive with query.
|
|
220
|
+
query: Custom SQL query to validate. Mutually exclusive with table.
|
|
221
|
+
config: Optional configuration.
|
|
222
|
+
|
|
223
|
+
Raises:
|
|
224
|
+
ValueError: If neither or both table and query are provided.
|
|
225
|
+
"""
|
|
226
|
+
super().__init__(config)
|
|
227
|
+
|
|
228
|
+
# Validate mutually exclusive parameters
|
|
229
|
+
if table is None and query is None:
|
|
230
|
+
raise ValueError("Either 'table' or 'query' must be provided")
|
|
231
|
+
if table is not None and query is not None:
|
|
232
|
+
raise ValueError("'table' and 'query' are mutually exclusive; provide only one")
|
|
233
|
+
|
|
234
|
+
self._table = table
|
|
235
|
+
self._query = query
|
|
236
|
+
self._is_query_mode = query is not None
|
|
237
|
+
self._pool: SQLConnectionPool | None = None
|
|
238
|
+
self._db_schema: list[tuple[str, str]] | None = None
|
|
239
|
+
|
|
240
|
+
@classmethod
|
|
241
|
+
def _default_config(cls) -> SQLDataSourceConfig:
|
|
242
|
+
return SQLDataSourceConfig()
|
|
243
|
+
|
|
244
|
+
@property
|
|
245
|
+
def table_name(self) -> str | None:
|
|
246
|
+
"""Get the table name (None if in query mode)."""
|
|
247
|
+
return self._table
|
|
248
|
+
|
|
249
|
+
@property
|
|
250
|
+
def query_sql(self) -> str | None:
|
|
251
|
+
"""Get the custom SQL query (None if in table mode)."""
|
|
252
|
+
return self._query
|
|
253
|
+
|
|
254
|
+
@property
|
|
255
|
+
def is_query_mode(self) -> bool:
|
|
256
|
+
"""Check if data source is in query mode."""
|
|
257
|
+
return self._is_query_mode
|
|
258
|
+
|
|
259
|
+
@property
|
|
260
|
+
def name(self) -> str:
|
|
261
|
+
"""Get the data source name."""
|
|
262
|
+
if self._config.name:
|
|
263
|
+
return self._config.name
|
|
264
|
+
if self._is_query_mode:
|
|
265
|
+
# Truncate query for display
|
|
266
|
+
query_preview = self._query[:50] + "..." if len(self._query) > 50 else self._query
|
|
267
|
+
return f"{self.source_type}:query({query_preview})"
|
|
268
|
+
return f"{self.source_type}:{self._table}"
|
|
269
|
+
|
|
270
|
+
@property
|
|
271
|
+
def full_table_name(self) -> str:
|
|
272
|
+
"""Get the fully qualified table name or subquery expression.
|
|
273
|
+
|
|
274
|
+
For query mode, returns a subquery wrapped in parentheses with alias.
|
|
275
|
+
"""
|
|
276
|
+
if self._is_query_mode:
|
|
277
|
+
# Wrap query as subquery with alias for use in FROM clauses
|
|
278
|
+
return f"({self._query}) AS _query_result"
|
|
279
|
+
if self._config.schema_name:
|
|
280
|
+
return f"{self._config.schema_name}.{self._table}"
|
|
281
|
+
return self._table
|
|
282
|
+
|
|
283
|
+
# -------------------------------------------------------------------------
|
|
284
|
+
# Abstract Methods
|
|
285
|
+
# -------------------------------------------------------------------------
|
|
286
|
+
|
|
287
|
+
@abstractmethod
|
|
288
|
+
def _create_connection(self) -> Any:
|
|
289
|
+
"""Create a new database connection.
|
|
290
|
+
|
|
291
|
+
Returns:
|
|
292
|
+
Database connection object.
|
|
293
|
+
"""
|
|
294
|
+
pass
|
|
295
|
+
|
|
296
|
+
@abstractmethod
|
|
297
|
+
def _get_table_schema_query(self) -> str:
|
|
298
|
+
"""Get SQL query to retrieve table schema.
|
|
299
|
+
|
|
300
|
+
Returns:
|
|
301
|
+
SQL query that returns (column_name, data_type) rows.
|
|
302
|
+
"""
|
|
303
|
+
pass
|
|
304
|
+
|
|
305
|
+
@abstractmethod
|
|
306
|
+
def _get_row_count_query(self) -> str:
|
|
307
|
+
"""Get SQL query to count rows.
|
|
308
|
+
|
|
309
|
+
Returns:
|
|
310
|
+
SQL query that returns a single count value.
|
|
311
|
+
"""
|
|
312
|
+
pass
|
|
313
|
+
|
|
314
|
+
@abstractmethod
|
|
315
|
+
def _quote_identifier(self, identifier: str) -> str:
|
|
316
|
+
"""Quote a SQL identifier (table/column name).
|
|
317
|
+
|
|
318
|
+
Args:
|
|
319
|
+
identifier: The identifier to quote.
|
|
320
|
+
|
|
321
|
+
Returns:
|
|
322
|
+
Quoted identifier.
|
|
323
|
+
"""
|
|
324
|
+
pass
|
|
325
|
+
|
|
326
|
+
# -------------------------------------------------------------------------
|
|
327
|
+
# Connection Management
|
|
328
|
+
# -------------------------------------------------------------------------
|
|
329
|
+
|
|
330
|
+
def _connect(self) -> None:
|
|
331
|
+
"""Initialize connection pool."""
|
|
332
|
+
if self._pool is None:
|
|
333
|
+
self._pool = SQLConnectionPool(
|
|
334
|
+
connection_factory=self._create_connection,
|
|
335
|
+
size=self._config.pool_size,
|
|
336
|
+
timeout=self._config.pool_timeout,
|
|
337
|
+
)
|
|
338
|
+
self._is_connected = True
|
|
339
|
+
|
|
340
|
+
def _disconnect(self) -> None:
|
|
341
|
+
"""Close connection pool."""
|
|
342
|
+
if self._pool is not None:
|
|
343
|
+
self._pool.close()
|
|
344
|
+
self._pool = None
|
|
345
|
+
self._is_connected = False
|
|
346
|
+
|
|
347
|
+
@contextmanager
|
|
348
|
+
def _get_connection(self) -> Iterator[Any]:
|
|
349
|
+
"""Get a connection from the pool."""
|
|
350
|
+
if self._pool is None:
|
|
351
|
+
self._connect()
|
|
352
|
+
|
|
353
|
+
with self._pool.acquire() as conn:
|
|
354
|
+
yield conn
|
|
355
|
+
|
|
356
|
+
# -------------------------------------------------------------------------
|
|
357
|
+
# Schema Operations
|
|
358
|
+
# -------------------------------------------------------------------------
|
|
359
|
+
|
|
360
|
+
def _fetch_schema(self) -> list[tuple[str, str]]:
|
|
361
|
+
"""Fetch schema from database.
|
|
362
|
+
|
|
363
|
+
In table mode, uses the table schema query.
|
|
364
|
+
In query mode, infers schema from query result metadata.
|
|
365
|
+
"""
|
|
366
|
+
if self._is_query_mode:
|
|
367
|
+
return self._fetch_schema_from_query()
|
|
368
|
+
|
|
369
|
+
with self._get_connection() as conn:
|
|
370
|
+
cursor = conn.cursor()
|
|
371
|
+
cursor.execute(self._get_table_schema_query())
|
|
372
|
+
result = cursor.fetchall()
|
|
373
|
+
cursor.close()
|
|
374
|
+
return result
|
|
375
|
+
|
|
376
|
+
def _fetch_schema_from_query(self) -> list[tuple[str, str]]:
|
|
377
|
+
"""Infer schema from query result metadata.
|
|
378
|
+
|
|
379
|
+
Executes the query with LIMIT 0 to get column info without data.
|
|
380
|
+
"""
|
|
381
|
+
# Use LIMIT 0 to get metadata without fetching actual data
|
|
382
|
+
# Wrap in subquery to ensure LIMIT works with any query
|
|
383
|
+
schema_query = f"SELECT * FROM ({self._query}) AS _schema_check LIMIT 0"
|
|
384
|
+
|
|
385
|
+
with self._get_connection() as conn:
|
|
386
|
+
cursor = conn.cursor()
|
|
387
|
+
try:
|
|
388
|
+
cursor.execute(schema_query)
|
|
389
|
+
except Exception:
|
|
390
|
+
# Fallback: try direct query (some DBs don't support LIMIT 0 well)
|
|
391
|
+
cursor.execute(self._query)
|
|
392
|
+
|
|
393
|
+
# Get column names and types from cursor description
|
|
394
|
+
if cursor.description is None:
|
|
395
|
+
cursor.close()
|
|
396
|
+
return []
|
|
397
|
+
|
|
398
|
+
result = []
|
|
399
|
+
for desc in cursor.description:
|
|
400
|
+
col_name = desc[0]
|
|
401
|
+
# Type info varies by database driver
|
|
402
|
+
# desc[1] is type_code in DB-API 2.0
|
|
403
|
+
col_type = self._get_type_name_from_description(desc)
|
|
404
|
+
result.append((col_name, col_type))
|
|
405
|
+
|
|
406
|
+
cursor.close()
|
|
407
|
+
return result
|
|
408
|
+
|
|
409
|
+
def _get_type_name_from_description(self, desc: tuple) -> str:
|
|
410
|
+
"""Convert cursor description to type name.
|
|
411
|
+
|
|
412
|
+
Override in subclasses for database-specific type mapping.
|
|
413
|
+
|
|
414
|
+
Args:
|
|
415
|
+
desc: Cursor description tuple (name, type_code, ...).
|
|
416
|
+
|
|
417
|
+
Returns:
|
|
418
|
+
SQL type name string.
|
|
419
|
+
"""
|
|
420
|
+
# Default implementation - return type_code as string
|
|
421
|
+
# Subclasses should override for proper type mapping
|
|
422
|
+
type_code = desc[1] if len(desc) > 1 else None
|
|
423
|
+
if type_code is None:
|
|
424
|
+
return "UNKNOWN"
|
|
425
|
+
return str(type_code)
|
|
426
|
+
|
|
427
|
+
@property
|
|
428
|
+
def schema(self) -> dict[str, ColumnType]:
|
|
429
|
+
"""Get the schema as column name to type mapping."""
|
|
430
|
+
if self._cached_schema is None:
|
|
431
|
+
if self._db_schema is None:
|
|
432
|
+
self._db_schema = self._fetch_schema()
|
|
433
|
+
|
|
434
|
+
self._cached_schema = {
|
|
435
|
+
col_name: sql_type_to_column_type(col_type)
|
|
436
|
+
for col_name, col_type in self._db_schema
|
|
437
|
+
}
|
|
438
|
+
return self._cached_schema
|
|
439
|
+
|
|
440
|
+
@property
|
|
441
|
+
def sql_schema(self) -> dict[str, str]:
|
|
442
|
+
"""Get the native SQL schema (column -> SQL type)."""
|
|
443
|
+
if self._db_schema is None:
|
|
444
|
+
self._db_schema = self._fetch_schema()
|
|
445
|
+
return {col_name: col_type for col_name, col_type in self._db_schema}
|
|
446
|
+
|
|
447
|
+
# -------------------------------------------------------------------------
|
|
448
|
+
# Row Count
|
|
449
|
+
# -------------------------------------------------------------------------
|
|
450
|
+
|
|
451
|
+
@property
|
|
452
|
+
def row_count(self) -> int | None:
|
|
453
|
+
"""Get row count from database."""
|
|
454
|
+
if self._cached_row_count is None:
|
|
455
|
+
with self._get_connection() as conn:
|
|
456
|
+
cursor = conn.cursor()
|
|
457
|
+
query = self._get_effective_row_count_query()
|
|
458
|
+
cursor.execute(query)
|
|
459
|
+
result = cursor.fetchone()
|
|
460
|
+
cursor.close()
|
|
461
|
+
self._cached_row_count = result[0] if result else 0
|
|
462
|
+
return self._cached_row_count
|
|
463
|
+
|
|
464
|
+
def _get_effective_row_count_query(self) -> str:
|
|
465
|
+
"""Get the row count query for current mode.
|
|
466
|
+
|
|
467
|
+
In query mode, wraps the custom query to count its results.
|
|
468
|
+
"""
|
|
469
|
+
if self._is_query_mode:
|
|
470
|
+
return f"SELECT COUNT(*) FROM ({self._query}) AS _count_query"
|
|
471
|
+
return self._get_row_count_query()
|
|
472
|
+
|
|
473
|
+
# -------------------------------------------------------------------------
|
|
474
|
+
# Query Execution
|
|
475
|
+
# -------------------------------------------------------------------------
|
|
476
|
+
|
|
477
|
+
def execute_query(
|
|
478
|
+
self,
|
|
479
|
+
query: str,
|
|
480
|
+
params: tuple | dict | None = None,
|
|
481
|
+
) -> list[dict[str, Any]]:
|
|
482
|
+
"""Execute a SQL query and return results.
|
|
483
|
+
|
|
484
|
+
Args:
|
|
485
|
+
query: SQL query to execute.
|
|
486
|
+
params: Optional query parameters.
|
|
487
|
+
|
|
488
|
+
Returns:
|
|
489
|
+
List of dictionaries with column names as keys.
|
|
490
|
+
"""
|
|
491
|
+
with self._get_connection() as conn:
|
|
492
|
+
cursor = conn.cursor()
|
|
493
|
+
if params:
|
|
494
|
+
cursor.execute(query, params)
|
|
495
|
+
else:
|
|
496
|
+
cursor.execute(query)
|
|
497
|
+
|
|
498
|
+
columns = [desc[0] for desc in cursor.description]
|
|
499
|
+
rows = cursor.fetchall()
|
|
500
|
+
cursor.close()
|
|
501
|
+
|
|
502
|
+
return [dict(zip(columns, row)) for row in rows]
|
|
503
|
+
|
|
504
|
+
def execute_scalar(
|
|
505
|
+
self,
|
|
506
|
+
query: str,
|
|
507
|
+
params: tuple | dict | None = None,
|
|
508
|
+
) -> Any:
|
|
509
|
+
"""Execute a query and return a single value.
|
|
510
|
+
|
|
511
|
+
Args:
|
|
512
|
+
query: SQL query to execute.
|
|
513
|
+
params: Optional query parameters.
|
|
514
|
+
|
|
515
|
+
Returns:
|
|
516
|
+
Single value from the first row/column.
|
|
517
|
+
"""
|
|
518
|
+
with self._get_connection() as conn:
|
|
519
|
+
cursor = conn.cursor()
|
|
520
|
+
if params:
|
|
521
|
+
cursor.execute(query, params)
|
|
522
|
+
else:
|
|
523
|
+
cursor.execute(query)
|
|
524
|
+
result = cursor.fetchone()
|
|
525
|
+
cursor.close()
|
|
526
|
+
return result[0] if result else None
|
|
527
|
+
|
|
528
|
+
# -------------------------------------------------------------------------
|
|
529
|
+
# Data Source Interface
|
|
530
|
+
# -------------------------------------------------------------------------
|
|
531
|
+
|
|
532
|
+
@property
|
|
533
|
+
def capabilities(self) -> set[DataSourceCapability]:
|
|
534
|
+
"""Get data source capabilities."""
|
|
535
|
+
return {
|
|
536
|
+
DataSourceCapability.SQL_PUSHDOWN,
|
|
537
|
+
DataSourceCapability.SAMPLING,
|
|
538
|
+
DataSourceCapability.SCHEMA_INFERENCE,
|
|
539
|
+
DataSourceCapability.ROW_COUNT,
|
|
540
|
+
}
|
|
541
|
+
|
|
542
|
+
def get_execution_engine(self) -> "BaseExecutionEngine":
|
|
543
|
+
"""Get a SQL execution engine."""
|
|
544
|
+
from truthound.execution.sql_engine import SQLExecutionEngine
|
|
545
|
+
return SQLExecutionEngine(self)
|
|
546
|
+
|
|
547
|
+
def sample(
|
|
548
|
+
self,
|
|
549
|
+
n: int = 1000,
|
|
550
|
+
seed: int | None = None,
|
|
551
|
+
) -> "BaseSQLDataSource":
|
|
552
|
+
"""Create a sampled view of the data.
|
|
553
|
+
|
|
554
|
+
Note: Most SQL databases don't support seeded sampling,
|
|
555
|
+
so the seed parameter may be ignored.
|
|
556
|
+
"""
|
|
557
|
+
# Return a wrapper that limits queries
|
|
558
|
+
return SampledSQLDataSource(self, n, seed)
|
|
559
|
+
|
|
560
|
+
def to_polars_lazyframe(self) -> "pl.LazyFrame":
|
|
561
|
+
"""Convert to Polars LazyFrame by fetching all data.
|
|
562
|
+
|
|
563
|
+
Warning: This loads all data into memory.
|
|
564
|
+
|
|
565
|
+
In query mode, executes the custom query directly.
|
|
566
|
+
In table mode, fetches all rows from the table.
|
|
567
|
+
"""
|
|
568
|
+
import polars as pl
|
|
569
|
+
|
|
570
|
+
# Check size limits
|
|
571
|
+
self.check_size_limits()
|
|
572
|
+
|
|
573
|
+
# Fetch all data
|
|
574
|
+
if self._is_query_mode:
|
|
575
|
+
# In query mode, execute the custom query directly
|
|
576
|
+
query = self._query
|
|
577
|
+
else:
|
|
578
|
+
query = f"SELECT * FROM {self.full_table_name}"
|
|
579
|
+
|
|
580
|
+
with self._get_connection() as conn:
|
|
581
|
+
cursor = conn.cursor()
|
|
582
|
+
cursor.execute(query)
|
|
583
|
+
columns = [desc[0] for desc in cursor.description]
|
|
584
|
+
rows = cursor.fetchall()
|
|
585
|
+
cursor.close()
|
|
586
|
+
|
|
587
|
+
# Convert to Polars
|
|
588
|
+
data = {col: [row[i] for row in rows] for i, col in enumerate(columns)}
|
|
589
|
+
return pl.DataFrame(data).lazy()
|
|
590
|
+
|
|
591
|
+
def validate_connection(self) -> bool:
|
|
592
|
+
"""Validate database connection."""
|
|
593
|
+
try:
|
|
594
|
+
with self._get_connection() as conn:
|
|
595
|
+
cursor = conn.cursor()
|
|
596
|
+
cursor.execute("SELECT 1")
|
|
597
|
+
cursor.fetchone()
|
|
598
|
+
cursor.close()
|
|
599
|
+
return True
|
|
600
|
+
except Exception:
|
|
601
|
+
return False
|
|
602
|
+
|
|
603
|
+
# -------------------------------------------------------------------------
|
|
604
|
+
# SQL Query Builders
|
|
605
|
+
# -------------------------------------------------------------------------
|
|
606
|
+
|
|
607
|
+
def build_count_query(self, condition: str | None = None) -> str:
|
|
608
|
+
"""Build a COUNT query.
|
|
609
|
+
|
|
610
|
+
Args:
|
|
611
|
+
condition: Optional WHERE condition.
|
|
612
|
+
|
|
613
|
+
Returns:
|
|
614
|
+
SQL query string.
|
|
615
|
+
"""
|
|
616
|
+
query = f"SELECT COUNT(*) FROM {self.full_table_name}"
|
|
617
|
+
if condition:
|
|
618
|
+
query += f" WHERE {condition}"
|
|
619
|
+
return query
|
|
620
|
+
|
|
621
|
+
def build_distinct_count_query(self, column: str) -> str:
|
|
622
|
+
"""Build a COUNT DISTINCT query."""
|
|
623
|
+
col = self._quote_identifier(column)
|
|
624
|
+
return f"SELECT COUNT(DISTINCT {col}) FROM {self.full_table_name}"
|
|
625
|
+
|
|
626
|
+
def build_null_count_query(self, column: str) -> str:
|
|
627
|
+
"""Build a NULL count query."""
|
|
628
|
+
col = self._quote_identifier(column)
|
|
629
|
+
return f"SELECT COUNT(*) FROM {self.full_table_name} WHERE {col} IS NULL"
|
|
630
|
+
|
|
631
|
+
def build_stats_query(self, column: str) -> str:
|
|
632
|
+
"""Build a statistics query for a numeric column."""
|
|
633
|
+
col = self._quote_identifier(column)
|
|
634
|
+
return f"""
|
|
635
|
+
SELECT
|
|
636
|
+
COUNT({col}) as count,
|
|
637
|
+
COUNT(*) - COUNT({col}) as null_count,
|
|
638
|
+
AVG({col}) as mean,
|
|
639
|
+
MIN({col}) as min,
|
|
640
|
+
MAX({col}) as max,
|
|
641
|
+
SUM({col}) as sum
|
|
642
|
+
FROM {self.full_table_name}
|
|
643
|
+
"""
|
|
644
|
+
|
|
645
|
+
|
|
646
|
+
# =============================================================================
|
|
647
|
+
# Sampled SQL Data Source
|
|
648
|
+
# =============================================================================
|
|
649
|
+
|
|
650
|
+
|
|
651
|
+
class SampledSQLDataSource(BaseSQLDataSource):
|
|
652
|
+
"""A sampled view of a SQL data source.
|
|
653
|
+
|
|
654
|
+
This wraps another SQL data source and limits query results.
|
|
655
|
+
"""
|
|
656
|
+
|
|
657
|
+
source_type = "sql_sampled"
|
|
658
|
+
|
|
659
|
+
def __init__(
|
|
660
|
+
self,
|
|
661
|
+
parent: BaseSQLDataSource,
|
|
662
|
+
sample_size: int,
|
|
663
|
+
seed: int | None = None,
|
|
664
|
+
) -> None:
|
|
665
|
+
"""Initialize sampled SQL data source.
|
|
666
|
+
|
|
667
|
+
Args:
|
|
668
|
+
parent: Parent SQL data source.
|
|
669
|
+
sample_size: Maximum number of rows.
|
|
670
|
+
seed: Random seed (may be ignored by database).
|
|
671
|
+
"""
|
|
672
|
+
super().__init__(
|
|
673
|
+
table=parent.table_name,
|
|
674
|
+
config=parent.config,
|
|
675
|
+
)
|
|
676
|
+
self._parent = parent
|
|
677
|
+
self._sample_size = sample_size
|
|
678
|
+
self._seed = seed
|
|
679
|
+
|
|
680
|
+
def _create_connection(self) -> Any:
|
|
681
|
+
return self._parent._create_connection()
|
|
682
|
+
|
|
683
|
+
def _get_table_schema_query(self) -> str:
|
|
684
|
+
return self._parent._get_table_schema_query()
|
|
685
|
+
|
|
686
|
+
def _get_row_count_query(self) -> str:
|
|
687
|
+
# Return sample size as row count
|
|
688
|
+
return f"SELECT LEAST({self._sample_size}, ({self._parent._get_row_count_query()}))"
|
|
689
|
+
|
|
690
|
+
def _quote_identifier(self, identifier: str) -> str:
|
|
691
|
+
return self._parent._quote_identifier(identifier)
|
|
692
|
+
|
|
693
|
+
@property
|
|
694
|
+
def full_table_name(self) -> str:
|
|
695
|
+
"""Get sampled table expression."""
|
|
696
|
+
# Subquery with LIMIT
|
|
697
|
+
return f"(SELECT * FROM {self._parent.full_table_name} LIMIT {self._sample_size}) AS sampled"
|
|
698
|
+
|
|
699
|
+
@property
|
|
700
|
+
def row_count(self) -> int | None:
|
|
701
|
+
"""Get sample row count."""
|
|
702
|
+
parent_count = self._parent.row_count
|
|
703
|
+
if parent_count is None:
|
|
704
|
+
return self._sample_size
|
|
705
|
+
return min(parent_count, self._sample_size)
|
|
706
|
+
|
|
707
|
+
def sample(self, n: int = 1000, seed: int | None = None) -> "SampledSQLDataSource":
|
|
708
|
+
"""Create a smaller sample."""
|
|
709
|
+
new_size = min(n, self._sample_size)
|
|
710
|
+
return SampledSQLDataSource(self._parent, new_size, seed)
|