truthound 1.0.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- truthound/__init__.py +162 -0
- truthound/adapters.py +100 -0
- truthound/api.py +365 -0
- truthound/audit/__init__.py +248 -0
- truthound/audit/core.py +967 -0
- truthound/audit/filters.py +620 -0
- truthound/audit/formatters.py +707 -0
- truthound/audit/logger.py +902 -0
- truthound/audit/middleware.py +571 -0
- truthound/audit/storage.py +1083 -0
- truthound/benchmark/__init__.py +123 -0
- truthound/benchmark/base.py +757 -0
- truthound/benchmark/comparison.py +635 -0
- truthound/benchmark/generators.py +706 -0
- truthound/benchmark/reporters.py +718 -0
- truthound/benchmark/runner.py +635 -0
- truthound/benchmark/scenarios.py +712 -0
- truthound/cache.py +252 -0
- truthound/checkpoint/__init__.py +136 -0
- truthound/checkpoint/actions/__init__.py +164 -0
- truthound/checkpoint/actions/base.py +324 -0
- truthound/checkpoint/actions/custom.py +234 -0
- truthound/checkpoint/actions/discord_notify.py +290 -0
- truthound/checkpoint/actions/email_notify.py +405 -0
- truthound/checkpoint/actions/github_action.py +406 -0
- truthound/checkpoint/actions/opsgenie.py +1499 -0
- truthound/checkpoint/actions/pagerduty.py +226 -0
- truthound/checkpoint/actions/slack_notify.py +233 -0
- truthound/checkpoint/actions/store_result.py +249 -0
- truthound/checkpoint/actions/teams_notify.py +1570 -0
- truthound/checkpoint/actions/telegram_notify.py +419 -0
- truthound/checkpoint/actions/update_docs.py +552 -0
- truthound/checkpoint/actions/webhook.py +293 -0
- truthound/checkpoint/analytics/__init__.py +147 -0
- truthound/checkpoint/analytics/aggregations/__init__.py +23 -0
- truthound/checkpoint/analytics/aggregations/rollup.py +481 -0
- truthound/checkpoint/analytics/aggregations/time_bucket.py +306 -0
- truthound/checkpoint/analytics/analyzers/__init__.py +17 -0
- truthound/checkpoint/analytics/analyzers/anomaly.py +386 -0
- truthound/checkpoint/analytics/analyzers/base.py +270 -0
- truthound/checkpoint/analytics/analyzers/forecast.py +421 -0
- truthound/checkpoint/analytics/analyzers/trend.py +314 -0
- truthound/checkpoint/analytics/models.py +292 -0
- truthound/checkpoint/analytics/protocols.py +549 -0
- truthound/checkpoint/analytics/service.py +718 -0
- truthound/checkpoint/analytics/stores/__init__.py +16 -0
- truthound/checkpoint/analytics/stores/base.py +306 -0
- truthound/checkpoint/analytics/stores/memory_store.py +353 -0
- truthound/checkpoint/analytics/stores/sqlite_store.py +557 -0
- truthound/checkpoint/analytics/stores/timescale_store.py +501 -0
- truthound/checkpoint/async_actions.py +794 -0
- truthound/checkpoint/async_base.py +708 -0
- truthound/checkpoint/async_checkpoint.py +617 -0
- truthound/checkpoint/async_runner.py +639 -0
- truthound/checkpoint/checkpoint.py +527 -0
- truthound/checkpoint/ci/__init__.py +61 -0
- truthound/checkpoint/ci/detector.py +355 -0
- truthound/checkpoint/ci/reporter.py +436 -0
- truthound/checkpoint/ci/templates.py +454 -0
- truthound/checkpoint/circuitbreaker/__init__.py +133 -0
- truthound/checkpoint/circuitbreaker/breaker.py +542 -0
- truthound/checkpoint/circuitbreaker/core.py +252 -0
- truthound/checkpoint/circuitbreaker/detection.py +459 -0
- truthound/checkpoint/circuitbreaker/middleware.py +389 -0
- truthound/checkpoint/circuitbreaker/registry.py +357 -0
- truthound/checkpoint/distributed/__init__.py +139 -0
- truthound/checkpoint/distributed/backends/__init__.py +35 -0
- truthound/checkpoint/distributed/backends/celery_backend.py +503 -0
- truthound/checkpoint/distributed/backends/kubernetes_backend.py +696 -0
- truthound/checkpoint/distributed/backends/local_backend.py +397 -0
- truthound/checkpoint/distributed/backends/ray_backend.py +625 -0
- truthound/checkpoint/distributed/base.py +774 -0
- truthound/checkpoint/distributed/orchestrator.py +765 -0
- truthound/checkpoint/distributed/protocols.py +842 -0
- truthound/checkpoint/distributed/registry.py +449 -0
- truthound/checkpoint/idempotency/__init__.py +120 -0
- truthound/checkpoint/idempotency/core.py +295 -0
- truthound/checkpoint/idempotency/fingerprint.py +454 -0
- truthound/checkpoint/idempotency/locking.py +604 -0
- truthound/checkpoint/idempotency/service.py +592 -0
- truthound/checkpoint/idempotency/stores.py +653 -0
- truthound/checkpoint/monitoring/__init__.py +134 -0
- truthound/checkpoint/monitoring/aggregators/__init__.py +15 -0
- truthound/checkpoint/monitoring/aggregators/base.py +372 -0
- truthound/checkpoint/monitoring/aggregators/realtime.py +300 -0
- truthound/checkpoint/monitoring/aggregators/window.py +493 -0
- truthound/checkpoint/monitoring/collectors/__init__.py +17 -0
- truthound/checkpoint/monitoring/collectors/base.py +257 -0
- truthound/checkpoint/monitoring/collectors/memory_collector.py +617 -0
- truthound/checkpoint/monitoring/collectors/prometheus_collector.py +451 -0
- truthound/checkpoint/monitoring/collectors/redis_collector.py +518 -0
- truthound/checkpoint/monitoring/events.py +410 -0
- truthound/checkpoint/monitoring/protocols.py +636 -0
- truthound/checkpoint/monitoring/service.py +578 -0
- truthound/checkpoint/monitoring/views/__init__.py +17 -0
- truthound/checkpoint/monitoring/views/base.py +172 -0
- truthound/checkpoint/monitoring/views/queue_view.py +220 -0
- truthound/checkpoint/monitoring/views/task_view.py +240 -0
- truthound/checkpoint/monitoring/views/worker_view.py +263 -0
- truthound/checkpoint/registry.py +337 -0
- truthound/checkpoint/runner.py +356 -0
- truthound/checkpoint/transaction/__init__.py +133 -0
- truthound/checkpoint/transaction/base.py +389 -0
- truthound/checkpoint/transaction/compensatable.py +537 -0
- truthound/checkpoint/transaction/coordinator.py +576 -0
- truthound/checkpoint/transaction/executor.py +622 -0
- truthound/checkpoint/transaction/idempotency.py +534 -0
- truthound/checkpoint/transaction/saga/__init__.py +143 -0
- truthound/checkpoint/transaction/saga/builder.py +584 -0
- truthound/checkpoint/transaction/saga/definition.py +515 -0
- truthound/checkpoint/transaction/saga/event_store.py +542 -0
- truthound/checkpoint/transaction/saga/patterns.py +833 -0
- truthound/checkpoint/transaction/saga/runner.py +718 -0
- truthound/checkpoint/transaction/saga/state_machine.py +793 -0
- truthound/checkpoint/transaction/saga/strategies.py +780 -0
- truthound/checkpoint/transaction/saga/testing.py +886 -0
- truthound/checkpoint/triggers/__init__.py +58 -0
- truthound/checkpoint/triggers/base.py +237 -0
- truthound/checkpoint/triggers/event.py +385 -0
- truthound/checkpoint/triggers/schedule.py +355 -0
- truthound/cli.py +2358 -0
- truthound/cli_modules/__init__.py +124 -0
- truthound/cli_modules/advanced/__init__.py +45 -0
- truthound/cli_modules/advanced/benchmark.py +343 -0
- truthound/cli_modules/advanced/docs.py +225 -0
- truthound/cli_modules/advanced/lineage.py +209 -0
- truthound/cli_modules/advanced/ml.py +320 -0
- truthound/cli_modules/advanced/realtime.py +196 -0
- truthound/cli_modules/checkpoint/__init__.py +46 -0
- truthound/cli_modules/checkpoint/init.py +114 -0
- truthound/cli_modules/checkpoint/list.py +71 -0
- truthound/cli_modules/checkpoint/run.py +159 -0
- truthound/cli_modules/checkpoint/validate.py +67 -0
- truthound/cli_modules/common/__init__.py +71 -0
- truthound/cli_modules/common/errors.py +414 -0
- truthound/cli_modules/common/options.py +419 -0
- truthound/cli_modules/common/output.py +507 -0
- truthound/cli_modules/common/protocol.py +552 -0
- truthound/cli_modules/core/__init__.py +48 -0
- truthound/cli_modules/core/check.py +123 -0
- truthound/cli_modules/core/compare.py +104 -0
- truthound/cli_modules/core/learn.py +57 -0
- truthound/cli_modules/core/mask.py +77 -0
- truthound/cli_modules/core/profile.py +65 -0
- truthound/cli_modules/core/scan.py +61 -0
- truthound/cli_modules/profiler/__init__.py +51 -0
- truthound/cli_modules/profiler/auto_profile.py +175 -0
- truthound/cli_modules/profiler/metadata.py +107 -0
- truthound/cli_modules/profiler/suite.py +283 -0
- truthound/cli_modules/registry.py +431 -0
- truthound/cli_modules/scaffolding/__init__.py +89 -0
- truthound/cli_modules/scaffolding/base.py +631 -0
- truthound/cli_modules/scaffolding/commands.py +545 -0
- truthound/cli_modules/scaffolding/plugins.py +1072 -0
- truthound/cli_modules/scaffolding/reporters.py +594 -0
- truthound/cli_modules/scaffolding/validators.py +1127 -0
- truthound/common/__init__.py +18 -0
- truthound/common/resilience/__init__.py +130 -0
- truthound/common/resilience/bulkhead.py +266 -0
- truthound/common/resilience/circuit_breaker.py +516 -0
- truthound/common/resilience/composite.py +332 -0
- truthound/common/resilience/config.py +292 -0
- truthound/common/resilience/protocols.py +217 -0
- truthound/common/resilience/rate_limiter.py +404 -0
- truthound/common/resilience/retry.py +341 -0
- truthound/datadocs/__init__.py +260 -0
- truthound/datadocs/base.py +571 -0
- truthound/datadocs/builder.py +761 -0
- truthound/datadocs/charts.py +764 -0
- truthound/datadocs/dashboard/__init__.py +63 -0
- truthound/datadocs/dashboard/app.py +576 -0
- truthound/datadocs/dashboard/components.py +584 -0
- truthound/datadocs/dashboard/state.py +240 -0
- truthound/datadocs/engine/__init__.py +46 -0
- truthound/datadocs/engine/context.py +376 -0
- truthound/datadocs/engine/pipeline.py +618 -0
- truthound/datadocs/engine/registry.py +469 -0
- truthound/datadocs/exporters/__init__.py +49 -0
- truthound/datadocs/exporters/base.py +198 -0
- truthound/datadocs/exporters/html.py +178 -0
- truthound/datadocs/exporters/json_exporter.py +253 -0
- truthound/datadocs/exporters/markdown.py +284 -0
- truthound/datadocs/exporters/pdf.py +392 -0
- truthound/datadocs/i18n/__init__.py +86 -0
- truthound/datadocs/i18n/catalog.py +960 -0
- truthound/datadocs/i18n/formatting.py +505 -0
- truthound/datadocs/i18n/loader.py +256 -0
- truthound/datadocs/i18n/plurals.py +378 -0
- truthound/datadocs/renderers/__init__.py +42 -0
- truthound/datadocs/renderers/base.py +401 -0
- truthound/datadocs/renderers/custom.py +342 -0
- truthound/datadocs/renderers/jinja.py +697 -0
- truthound/datadocs/sections.py +736 -0
- truthound/datadocs/styles.py +931 -0
- truthound/datadocs/themes/__init__.py +101 -0
- truthound/datadocs/themes/base.py +336 -0
- truthound/datadocs/themes/default.py +417 -0
- truthound/datadocs/themes/enterprise.py +419 -0
- truthound/datadocs/themes/loader.py +336 -0
- truthound/datadocs/themes.py +301 -0
- truthound/datadocs/transformers/__init__.py +57 -0
- truthound/datadocs/transformers/base.py +268 -0
- truthound/datadocs/transformers/enrichers.py +544 -0
- truthound/datadocs/transformers/filters.py +447 -0
- truthound/datadocs/transformers/i18n.py +468 -0
- truthound/datadocs/versioning/__init__.py +62 -0
- truthound/datadocs/versioning/diff.py +639 -0
- truthound/datadocs/versioning/storage.py +497 -0
- truthound/datadocs/versioning/version.py +358 -0
- truthound/datasources/__init__.py +223 -0
- truthound/datasources/_async_protocols.py +222 -0
- truthound/datasources/_protocols.py +159 -0
- truthound/datasources/adapters.py +428 -0
- truthound/datasources/async_base.py +599 -0
- truthound/datasources/async_factory.py +511 -0
- truthound/datasources/base.py +516 -0
- truthound/datasources/factory.py +433 -0
- truthound/datasources/nosql/__init__.py +47 -0
- truthound/datasources/nosql/base.py +487 -0
- truthound/datasources/nosql/elasticsearch.py +801 -0
- truthound/datasources/nosql/mongodb.py +636 -0
- truthound/datasources/pandas_optimized.py +582 -0
- truthound/datasources/pandas_source.py +216 -0
- truthound/datasources/polars_source.py +395 -0
- truthound/datasources/spark_source.py +479 -0
- truthound/datasources/sql/__init__.py +154 -0
- truthound/datasources/sql/base.py +710 -0
- truthound/datasources/sql/bigquery.py +410 -0
- truthound/datasources/sql/cloud_base.py +199 -0
- truthound/datasources/sql/databricks.py +471 -0
- truthound/datasources/sql/mysql.py +316 -0
- truthound/datasources/sql/oracle.py +427 -0
- truthound/datasources/sql/postgresql.py +321 -0
- truthound/datasources/sql/redshift.py +479 -0
- truthound/datasources/sql/snowflake.py +439 -0
- truthound/datasources/sql/sqlite.py +286 -0
- truthound/datasources/sql/sqlserver.py +437 -0
- truthound/datasources/streaming/__init__.py +47 -0
- truthound/datasources/streaming/base.py +350 -0
- truthound/datasources/streaming/kafka.py +670 -0
- truthound/decorators.py +98 -0
- truthound/docs/__init__.py +69 -0
- truthound/docs/extractor.py +971 -0
- truthound/docs/generator.py +601 -0
- truthound/docs/parser.py +1037 -0
- truthound/docs/renderer.py +999 -0
- truthound/drift/__init__.py +22 -0
- truthound/drift/compare.py +189 -0
- truthound/drift/detectors.py +464 -0
- truthound/drift/report.py +160 -0
- truthound/execution/__init__.py +65 -0
- truthound/execution/_protocols.py +324 -0
- truthound/execution/base.py +576 -0
- truthound/execution/distributed/__init__.py +179 -0
- truthound/execution/distributed/aggregations.py +731 -0
- truthound/execution/distributed/arrow_bridge.py +817 -0
- truthound/execution/distributed/base.py +550 -0
- truthound/execution/distributed/dask_engine.py +976 -0
- truthound/execution/distributed/mixins.py +766 -0
- truthound/execution/distributed/protocols.py +756 -0
- truthound/execution/distributed/ray_engine.py +1127 -0
- truthound/execution/distributed/registry.py +446 -0
- truthound/execution/distributed/spark_engine.py +1011 -0
- truthound/execution/distributed/validator_adapter.py +682 -0
- truthound/execution/pandas_engine.py +401 -0
- truthound/execution/polars_engine.py +497 -0
- truthound/execution/pushdown/__init__.py +230 -0
- truthound/execution/pushdown/ast.py +1550 -0
- truthound/execution/pushdown/builder.py +1550 -0
- truthound/execution/pushdown/dialects.py +1072 -0
- truthound/execution/pushdown/executor.py +829 -0
- truthound/execution/pushdown/optimizer.py +1041 -0
- truthound/execution/sql_engine.py +518 -0
- truthound/infrastructure/__init__.py +189 -0
- truthound/infrastructure/audit.py +1515 -0
- truthound/infrastructure/config.py +1133 -0
- truthound/infrastructure/encryption.py +1132 -0
- truthound/infrastructure/logging.py +1503 -0
- truthound/infrastructure/metrics.py +1220 -0
- truthound/lineage/__init__.py +89 -0
- truthound/lineage/base.py +746 -0
- truthound/lineage/impact_analysis.py +474 -0
- truthound/lineage/integrations/__init__.py +22 -0
- truthound/lineage/integrations/openlineage.py +548 -0
- truthound/lineage/tracker.py +512 -0
- truthound/lineage/visualization/__init__.py +33 -0
- truthound/lineage/visualization/protocols.py +145 -0
- truthound/lineage/visualization/renderers/__init__.py +20 -0
- truthound/lineage/visualization/renderers/cytoscape.py +329 -0
- truthound/lineage/visualization/renderers/d3.py +331 -0
- truthound/lineage/visualization/renderers/graphviz.py +276 -0
- truthound/lineage/visualization/renderers/mermaid.py +308 -0
- truthound/maskers.py +113 -0
- truthound/ml/__init__.py +124 -0
- truthound/ml/anomaly_models/__init__.py +31 -0
- truthound/ml/anomaly_models/ensemble.py +362 -0
- truthound/ml/anomaly_models/isolation_forest.py +444 -0
- truthound/ml/anomaly_models/statistical.py +392 -0
- truthound/ml/base.py +1178 -0
- truthound/ml/drift_detection/__init__.py +26 -0
- truthound/ml/drift_detection/concept.py +381 -0
- truthound/ml/drift_detection/distribution.py +361 -0
- truthound/ml/drift_detection/feature.py +442 -0
- truthound/ml/drift_detection/multivariate.py +495 -0
- truthound/ml/monitoring/__init__.py +88 -0
- truthound/ml/monitoring/alerting/__init__.py +33 -0
- truthound/ml/monitoring/alerting/handlers.py +427 -0
- truthound/ml/monitoring/alerting/rules.py +508 -0
- truthound/ml/monitoring/collectors/__init__.py +19 -0
- truthound/ml/monitoring/collectors/composite.py +105 -0
- truthound/ml/monitoring/collectors/drift.py +324 -0
- truthound/ml/monitoring/collectors/performance.py +179 -0
- truthound/ml/monitoring/collectors/quality.py +369 -0
- truthound/ml/monitoring/monitor.py +536 -0
- truthound/ml/monitoring/protocols.py +451 -0
- truthound/ml/monitoring/stores/__init__.py +15 -0
- truthound/ml/monitoring/stores/memory.py +201 -0
- truthound/ml/monitoring/stores/prometheus.py +296 -0
- truthound/ml/rule_learning/__init__.py +25 -0
- truthound/ml/rule_learning/constraint_miner.py +443 -0
- truthound/ml/rule_learning/pattern_learner.py +499 -0
- truthound/ml/rule_learning/profile_learner.py +462 -0
- truthound/multitenancy/__init__.py +326 -0
- truthound/multitenancy/core.py +852 -0
- truthound/multitenancy/integration.py +597 -0
- truthound/multitenancy/isolation.py +630 -0
- truthound/multitenancy/manager.py +770 -0
- truthound/multitenancy/middleware.py +765 -0
- truthound/multitenancy/quota.py +537 -0
- truthound/multitenancy/resolvers.py +603 -0
- truthound/multitenancy/storage.py +703 -0
- truthound/observability/__init__.py +307 -0
- truthound/observability/context.py +531 -0
- truthound/observability/instrumentation.py +611 -0
- truthound/observability/logging.py +887 -0
- truthound/observability/metrics.py +1157 -0
- truthound/observability/tracing/__init__.py +178 -0
- truthound/observability/tracing/baggage.py +310 -0
- truthound/observability/tracing/config.py +426 -0
- truthound/observability/tracing/exporter.py +787 -0
- truthound/observability/tracing/integration.py +1018 -0
- truthound/observability/tracing/otel/__init__.py +146 -0
- truthound/observability/tracing/otel/adapter.py +982 -0
- truthound/observability/tracing/otel/bridge.py +1177 -0
- truthound/observability/tracing/otel/compat.py +681 -0
- truthound/observability/tracing/otel/config.py +691 -0
- truthound/observability/tracing/otel/detection.py +327 -0
- truthound/observability/tracing/otel/protocols.py +426 -0
- truthound/observability/tracing/processor.py +561 -0
- truthound/observability/tracing/propagator.py +757 -0
- truthound/observability/tracing/provider.py +569 -0
- truthound/observability/tracing/resource.py +515 -0
- truthound/observability/tracing/sampler.py +487 -0
- truthound/observability/tracing/span.py +676 -0
- truthound/plugins/__init__.py +198 -0
- truthound/plugins/base.py +599 -0
- truthound/plugins/cli.py +680 -0
- truthound/plugins/dependencies/__init__.py +42 -0
- truthound/plugins/dependencies/graph.py +422 -0
- truthound/plugins/dependencies/resolver.py +417 -0
- truthound/plugins/discovery.py +379 -0
- truthound/plugins/docs/__init__.py +46 -0
- truthound/plugins/docs/extractor.py +444 -0
- truthound/plugins/docs/renderer.py +499 -0
- truthound/plugins/enterprise_manager.py +877 -0
- truthound/plugins/examples/__init__.py +19 -0
- truthound/plugins/examples/custom_validators.py +317 -0
- truthound/plugins/examples/slack_notifier.py +312 -0
- truthound/plugins/examples/xml_reporter.py +254 -0
- truthound/plugins/hooks.py +558 -0
- truthound/plugins/lifecycle/__init__.py +43 -0
- truthound/plugins/lifecycle/hot_reload.py +402 -0
- truthound/plugins/lifecycle/manager.py +371 -0
- truthound/plugins/manager.py +736 -0
- truthound/plugins/registry.py +338 -0
- truthound/plugins/security/__init__.py +93 -0
- truthound/plugins/security/exceptions.py +332 -0
- truthound/plugins/security/policies.py +348 -0
- truthound/plugins/security/protocols.py +643 -0
- truthound/plugins/security/sandbox/__init__.py +45 -0
- truthound/plugins/security/sandbox/context.py +158 -0
- truthound/plugins/security/sandbox/engines/__init__.py +19 -0
- truthound/plugins/security/sandbox/engines/container.py +379 -0
- truthound/plugins/security/sandbox/engines/noop.py +144 -0
- truthound/plugins/security/sandbox/engines/process.py +336 -0
- truthound/plugins/security/sandbox/factory.py +211 -0
- truthound/plugins/security/signing/__init__.py +57 -0
- truthound/plugins/security/signing/service.py +330 -0
- truthound/plugins/security/signing/trust_store.py +368 -0
- truthound/plugins/security/signing/verifier.py +459 -0
- truthound/plugins/versioning/__init__.py +41 -0
- truthound/plugins/versioning/constraints.py +297 -0
- truthound/plugins/versioning/resolver.py +329 -0
- truthound/profiler/__init__.py +1729 -0
- truthound/profiler/_lazy.py +452 -0
- truthound/profiler/ab_testing/__init__.py +80 -0
- truthound/profiler/ab_testing/analysis.py +449 -0
- truthound/profiler/ab_testing/base.py +257 -0
- truthound/profiler/ab_testing/experiment.py +395 -0
- truthound/profiler/ab_testing/tracking.py +368 -0
- truthound/profiler/auto_threshold.py +1170 -0
- truthound/profiler/base.py +579 -0
- truthound/profiler/cache_patterns.py +911 -0
- truthound/profiler/caching.py +1303 -0
- truthound/profiler/column_profiler.py +712 -0
- truthound/profiler/comparison.py +1007 -0
- truthound/profiler/custom_patterns.py +1170 -0
- truthound/profiler/dashboard/__init__.py +50 -0
- truthound/profiler/dashboard/app.py +476 -0
- truthound/profiler/dashboard/components.py +457 -0
- truthound/profiler/dashboard/config.py +72 -0
- truthound/profiler/distributed/__init__.py +83 -0
- truthound/profiler/distributed/base.py +281 -0
- truthound/profiler/distributed/dask_backend.py +498 -0
- truthound/profiler/distributed/local_backend.py +293 -0
- truthound/profiler/distributed/profiler.py +304 -0
- truthound/profiler/distributed/ray_backend.py +374 -0
- truthound/profiler/distributed/spark_backend.py +375 -0
- truthound/profiler/distributed.py +1366 -0
- truthound/profiler/enterprise_sampling.py +1065 -0
- truthound/profiler/errors.py +488 -0
- truthound/profiler/evolution/__init__.py +91 -0
- truthound/profiler/evolution/alerts.py +426 -0
- truthound/profiler/evolution/changes.py +206 -0
- truthound/profiler/evolution/compatibility.py +365 -0
- truthound/profiler/evolution/detector.py +372 -0
- truthound/profiler/evolution/protocols.py +121 -0
- truthound/profiler/generators/__init__.py +48 -0
- truthound/profiler/generators/base.py +384 -0
- truthound/profiler/generators/ml_rules.py +375 -0
- truthound/profiler/generators/pattern_rules.py +384 -0
- truthound/profiler/generators/schema_rules.py +267 -0
- truthound/profiler/generators/stats_rules.py +324 -0
- truthound/profiler/generators/suite_generator.py +857 -0
- truthound/profiler/i18n.py +1542 -0
- truthound/profiler/incremental.py +554 -0
- truthound/profiler/incremental_validation.py +1710 -0
- truthound/profiler/integration/__init__.py +73 -0
- truthound/profiler/integration/adapters.py +345 -0
- truthound/profiler/integration/context.py +371 -0
- truthound/profiler/integration/executor.py +527 -0
- truthound/profiler/integration/naming.py +75 -0
- truthound/profiler/integration/protocols.py +243 -0
- truthound/profiler/memory.py +1185 -0
- truthound/profiler/migration/__init__.py +60 -0
- truthound/profiler/migration/base.py +345 -0
- truthound/profiler/migration/manager.py +444 -0
- truthound/profiler/migration/v1_0_to_v1_1.py +484 -0
- truthound/profiler/ml/__init__.py +73 -0
- truthound/profiler/ml/base.py +244 -0
- truthound/profiler/ml/classifier.py +507 -0
- truthound/profiler/ml/feature_extraction.py +604 -0
- truthound/profiler/ml/pretrained.py +448 -0
- truthound/profiler/ml_inference.py +1276 -0
- truthound/profiler/native_patterns.py +815 -0
- truthound/profiler/observability.py +1184 -0
- truthound/profiler/process_timeout.py +1566 -0
- truthound/profiler/progress.py +568 -0
- truthound/profiler/progress_callbacks.py +1734 -0
- truthound/profiler/quality.py +1345 -0
- truthound/profiler/resilience.py +1180 -0
- truthound/profiler/sampled_matcher.py +794 -0
- truthound/profiler/sampling.py +1288 -0
- truthound/profiler/scheduling/__init__.py +82 -0
- truthound/profiler/scheduling/protocols.py +214 -0
- truthound/profiler/scheduling/scheduler.py +474 -0
- truthound/profiler/scheduling/storage.py +457 -0
- truthound/profiler/scheduling/triggers.py +449 -0
- truthound/profiler/schema.py +603 -0
- truthound/profiler/streaming.py +685 -0
- truthound/profiler/streaming_patterns.py +1354 -0
- truthound/profiler/suite_cli.py +625 -0
- truthound/profiler/suite_config.py +789 -0
- truthound/profiler/suite_export.py +1268 -0
- truthound/profiler/table_profiler.py +547 -0
- truthound/profiler/timeout.py +565 -0
- truthound/profiler/validation.py +1532 -0
- truthound/profiler/visualization/__init__.py +118 -0
- truthound/profiler/visualization/base.py +346 -0
- truthound/profiler/visualization/generator.py +1259 -0
- truthound/profiler/visualization/plotly_renderer.py +811 -0
- truthound/profiler/visualization/renderers.py +669 -0
- truthound/profiler/visualization/sections.py +540 -0
- truthound/profiler/visualization.py +2122 -0
- truthound/profiler/yaml_validation.py +1151 -0
- truthound/py.typed +0 -0
- truthound/ratelimit/__init__.py +248 -0
- truthound/ratelimit/algorithms.py +1108 -0
- truthound/ratelimit/core.py +573 -0
- truthound/ratelimit/integration.py +532 -0
- truthound/ratelimit/limiter.py +663 -0
- truthound/ratelimit/middleware.py +700 -0
- truthound/ratelimit/policy.py +792 -0
- truthound/ratelimit/storage.py +763 -0
- truthound/rbac/__init__.py +340 -0
- truthound/rbac/core.py +976 -0
- truthound/rbac/integration.py +760 -0
- truthound/rbac/manager.py +1052 -0
- truthound/rbac/middleware.py +842 -0
- truthound/rbac/policy.py +954 -0
- truthound/rbac/storage.py +878 -0
- truthound/realtime/__init__.py +141 -0
- truthound/realtime/adapters/__init__.py +43 -0
- truthound/realtime/adapters/base.py +533 -0
- truthound/realtime/adapters/kafka.py +487 -0
- truthound/realtime/adapters/kinesis.py +479 -0
- truthound/realtime/adapters/mock.py +243 -0
- truthound/realtime/base.py +553 -0
- truthound/realtime/factory.py +382 -0
- truthound/realtime/incremental.py +660 -0
- truthound/realtime/processing/__init__.py +67 -0
- truthound/realtime/processing/exactly_once.py +575 -0
- truthound/realtime/processing/state.py +547 -0
- truthound/realtime/processing/windows.py +647 -0
- truthound/realtime/protocols.py +569 -0
- truthound/realtime/streaming.py +605 -0
- truthound/realtime/testing/__init__.py +32 -0
- truthound/realtime/testing/containers.py +615 -0
- truthound/realtime/testing/fixtures.py +484 -0
- truthound/report.py +280 -0
- truthound/reporters/__init__.py +46 -0
- truthound/reporters/_protocols.py +30 -0
- truthound/reporters/base.py +324 -0
- truthound/reporters/ci/__init__.py +66 -0
- truthound/reporters/ci/azure.py +436 -0
- truthound/reporters/ci/base.py +509 -0
- truthound/reporters/ci/bitbucket.py +567 -0
- truthound/reporters/ci/circleci.py +547 -0
- truthound/reporters/ci/detection.py +364 -0
- truthound/reporters/ci/factory.py +182 -0
- truthound/reporters/ci/github.py +388 -0
- truthound/reporters/ci/gitlab.py +471 -0
- truthound/reporters/ci/jenkins.py +525 -0
- truthound/reporters/console_reporter.py +299 -0
- truthound/reporters/factory.py +211 -0
- truthound/reporters/html_reporter.py +524 -0
- truthound/reporters/json_reporter.py +256 -0
- truthound/reporters/markdown_reporter.py +280 -0
- truthound/reporters/sdk/__init__.py +174 -0
- truthound/reporters/sdk/builder.py +558 -0
- truthound/reporters/sdk/mixins.py +1150 -0
- truthound/reporters/sdk/schema.py +1493 -0
- truthound/reporters/sdk/templates.py +666 -0
- truthound/reporters/sdk/testing.py +968 -0
- truthound/scanners.py +170 -0
- truthound/scheduling/__init__.py +122 -0
- truthound/scheduling/cron.py +1136 -0
- truthound/scheduling/presets.py +212 -0
- truthound/schema.py +275 -0
- truthound/secrets/__init__.py +173 -0
- truthound/secrets/base.py +618 -0
- truthound/secrets/cloud.py +682 -0
- truthound/secrets/integration.py +507 -0
- truthound/secrets/manager.py +633 -0
- truthound/secrets/oidc/__init__.py +172 -0
- truthound/secrets/oidc/base.py +902 -0
- truthound/secrets/oidc/credential_provider.py +623 -0
- truthound/secrets/oidc/exchangers.py +1001 -0
- truthound/secrets/oidc/github/__init__.py +110 -0
- truthound/secrets/oidc/github/claims.py +718 -0
- truthound/secrets/oidc/github/enhanced_provider.py +693 -0
- truthound/secrets/oidc/github/trust_policy.py +742 -0
- truthound/secrets/oidc/github/verification.py +723 -0
- truthound/secrets/oidc/github/workflow.py +691 -0
- truthound/secrets/oidc/providers.py +825 -0
- truthound/secrets/providers.py +506 -0
- truthound/secrets/resolver.py +495 -0
- truthound/stores/__init__.py +177 -0
- truthound/stores/backends/__init__.py +18 -0
- truthound/stores/backends/_protocols.py +340 -0
- truthound/stores/backends/azure_blob.py +530 -0
- truthound/stores/backends/concurrent_filesystem.py +915 -0
- truthound/stores/backends/connection_pool.py +1365 -0
- truthound/stores/backends/database.py +743 -0
- truthound/stores/backends/filesystem.py +538 -0
- truthound/stores/backends/gcs.py +399 -0
- truthound/stores/backends/memory.py +354 -0
- truthound/stores/backends/s3.py +434 -0
- truthound/stores/backpressure/__init__.py +84 -0
- truthound/stores/backpressure/base.py +375 -0
- truthound/stores/backpressure/circuit_breaker.py +434 -0
- truthound/stores/backpressure/monitor.py +376 -0
- truthound/stores/backpressure/strategies.py +677 -0
- truthound/stores/base.py +551 -0
- truthound/stores/batching/__init__.py +65 -0
- truthound/stores/batching/base.py +305 -0
- truthound/stores/batching/buffer.py +370 -0
- truthound/stores/batching/store.py +248 -0
- truthound/stores/batching/writer.py +521 -0
- truthound/stores/caching/__init__.py +60 -0
- truthound/stores/caching/backends.py +684 -0
- truthound/stores/caching/base.py +356 -0
- truthound/stores/caching/store.py +305 -0
- truthound/stores/compression/__init__.py +193 -0
- truthound/stores/compression/adaptive.py +694 -0
- truthound/stores/compression/base.py +514 -0
- truthound/stores/compression/pipeline.py +868 -0
- truthound/stores/compression/providers.py +672 -0
- truthound/stores/compression/streaming.py +832 -0
- truthound/stores/concurrency/__init__.py +81 -0
- truthound/stores/concurrency/atomic.py +556 -0
- truthound/stores/concurrency/index.py +775 -0
- truthound/stores/concurrency/locks.py +576 -0
- truthound/stores/concurrency/manager.py +482 -0
- truthound/stores/encryption/__init__.py +297 -0
- truthound/stores/encryption/base.py +952 -0
- truthound/stores/encryption/keys.py +1191 -0
- truthound/stores/encryption/pipeline.py +903 -0
- truthound/stores/encryption/providers.py +953 -0
- truthound/stores/encryption/streaming.py +950 -0
- truthound/stores/expectations.py +227 -0
- truthound/stores/factory.py +246 -0
- truthound/stores/migration/__init__.py +75 -0
- truthound/stores/migration/base.py +480 -0
- truthound/stores/migration/manager.py +347 -0
- truthound/stores/migration/registry.py +382 -0
- truthound/stores/migration/store.py +559 -0
- truthound/stores/observability/__init__.py +106 -0
- truthound/stores/observability/audit.py +718 -0
- truthound/stores/observability/config.py +270 -0
- truthound/stores/observability/factory.py +208 -0
- truthound/stores/observability/metrics.py +636 -0
- truthound/stores/observability/protocols.py +410 -0
- truthound/stores/observability/store.py +570 -0
- truthound/stores/observability/tracing.py +784 -0
- truthound/stores/replication/__init__.py +76 -0
- truthound/stores/replication/base.py +260 -0
- truthound/stores/replication/monitor.py +269 -0
- truthound/stores/replication/store.py +439 -0
- truthound/stores/replication/syncer.py +391 -0
- truthound/stores/results.py +359 -0
- truthound/stores/retention/__init__.py +77 -0
- truthound/stores/retention/base.py +378 -0
- truthound/stores/retention/policies.py +621 -0
- truthound/stores/retention/scheduler.py +279 -0
- truthound/stores/retention/store.py +526 -0
- truthound/stores/streaming/__init__.py +138 -0
- truthound/stores/streaming/base.py +801 -0
- truthound/stores/streaming/database.py +984 -0
- truthound/stores/streaming/filesystem.py +719 -0
- truthound/stores/streaming/reader.py +629 -0
- truthound/stores/streaming/s3.py +843 -0
- truthound/stores/streaming/writer.py +790 -0
- truthound/stores/tiering/__init__.py +108 -0
- truthound/stores/tiering/base.py +462 -0
- truthound/stores/tiering/manager.py +249 -0
- truthound/stores/tiering/policies.py +692 -0
- truthound/stores/tiering/store.py +526 -0
- truthound/stores/versioning/__init__.py +56 -0
- truthound/stores/versioning/base.py +376 -0
- truthound/stores/versioning/store.py +660 -0
- truthound/stores/versioning/strategies.py +353 -0
- truthound/types.py +56 -0
- truthound/validators/__init__.py +774 -0
- truthound/validators/aggregate/__init__.py +27 -0
- truthound/validators/aggregate/central.py +116 -0
- truthound/validators/aggregate/extremes.py +116 -0
- truthound/validators/aggregate/spread.py +118 -0
- truthound/validators/aggregate/sum.py +64 -0
- truthound/validators/aggregate/type.py +78 -0
- truthound/validators/anomaly/__init__.py +93 -0
- truthound/validators/anomaly/base.py +431 -0
- truthound/validators/anomaly/ml_based.py +1190 -0
- truthound/validators/anomaly/multivariate.py +647 -0
- truthound/validators/anomaly/statistical.py +599 -0
- truthound/validators/base.py +1089 -0
- truthound/validators/business_rule/__init__.py +46 -0
- truthound/validators/business_rule/base.py +147 -0
- truthound/validators/business_rule/checksum.py +509 -0
- truthound/validators/business_rule/financial.py +526 -0
- truthound/validators/cache.py +733 -0
- truthound/validators/completeness/__init__.py +39 -0
- truthound/validators/completeness/conditional.py +73 -0
- truthound/validators/completeness/default.py +98 -0
- truthound/validators/completeness/empty.py +103 -0
- truthound/validators/completeness/nan.py +337 -0
- truthound/validators/completeness/null.py +152 -0
- truthound/validators/cross_table/__init__.py +17 -0
- truthound/validators/cross_table/aggregate.py +333 -0
- truthound/validators/cross_table/row_count.py +122 -0
- truthound/validators/datetime/__init__.py +29 -0
- truthound/validators/datetime/format.py +78 -0
- truthound/validators/datetime/freshness.py +269 -0
- truthound/validators/datetime/order.py +73 -0
- truthound/validators/datetime/parseable.py +185 -0
- truthound/validators/datetime/range.py +202 -0
- truthound/validators/datetime/timezone.py +69 -0
- truthound/validators/distribution/__init__.py +49 -0
- truthound/validators/distribution/distribution.py +128 -0
- truthound/validators/distribution/monotonic.py +119 -0
- truthound/validators/distribution/outlier.py +178 -0
- truthound/validators/distribution/quantile.py +80 -0
- truthound/validators/distribution/range.py +254 -0
- truthound/validators/distribution/set.py +125 -0
- truthound/validators/distribution/statistical.py +459 -0
- truthound/validators/drift/__init__.py +79 -0
- truthound/validators/drift/base.py +427 -0
- truthound/validators/drift/multi_feature.py +401 -0
- truthound/validators/drift/numeric.py +395 -0
- truthound/validators/drift/psi.py +446 -0
- truthound/validators/drift/statistical.py +510 -0
- truthound/validators/enterprise.py +1658 -0
- truthound/validators/geospatial/__init__.py +80 -0
- truthound/validators/geospatial/base.py +97 -0
- truthound/validators/geospatial/boundary.py +238 -0
- truthound/validators/geospatial/coordinate.py +351 -0
- truthound/validators/geospatial/distance.py +399 -0
- truthound/validators/geospatial/polygon.py +665 -0
- truthound/validators/i18n/__init__.py +308 -0
- truthound/validators/i18n/bidi.py +571 -0
- truthound/validators/i18n/catalogs.py +570 -0
- truthound/validators/i18n/dialects.py +763 -0
- truthound/validators/i18n/extended_catalogs.py +549 -0
- truthound/validators/i18n/formatting.py +1434 -0
- truthound/validators/i18n/loader.py +1020 -0
- truthound/validators/i18n/messages.py +521 -0
- truthound/validators/i18n/plural.py +683 -0
- truthound/validators/i18n/protocols.py +855 -0
- truthound/validators/i18n/tms.py +1162 -0
- truthound/validators/localization/__init__.py +53 -0
- truthound/validators/localization/base.py +122 -0
- truthound/validators/localization/chinese.py +362 -0
- truthound/validators/localization/japanese.py +275 -0
- truthound/validators/localization/korean.py +524 -0
- truthound/validators/memory/__init__.py +94 -0
- truthound/validators/memory/approximate_knn.py +506 -0
- truthound/validators/memory/base.py +547 -0
- truthound/validators/memory/sgd_online.py +719 -0
- truthound/validators/memory/streaming_ecdf.py +753 -0
- truthound/validators/ml_feature/__init__.py +54 -0
- truthound/validators/ml_feature/base.py +249 -0
- truthound/validators/ml_feature/correlation.py +299 -0
- truthound/validators/ml_feature/leakage.py +344 -0
- truthound/validators/ml_feature/null_impact.py +270 -0
- truthound/validators/ml_feature/scale.py +264 -0
- truthound/validators/multi_column/__init__.py +89 -0
- truthound/validators/multi_column/arithmetic.py +284 -0
- truthound/validators/multi_column/base.py +231 -0
- truthound/validators/multi_column/comparison.py +273 -0
- truthound/validators/multi_column/consistency.py +312 -0
- truthound/validators/multi_column/statistical.py +299 -0
- truthound/validators/optimization/__init__.py +164 -0
- truthound/validators/optimization/aggregation.py +563 -0
- truthound/validators/optimization/covariance.py +556 -0
- truthound/validators/optimization/geo.py +626 -0
- truthound/validators/optimization/graph.py +587 -0
- truthound/validators/optimization/orchestrator.py +970 -0
- truthound/validators/optimization/profiling.py +1312 -0
- truthound/validators/privacy/__init__.py +223 -0
- truthound/validators/privacy/base.py +635 -0
- truthound/validators/privacy/ccpa.py +670 -0
- truthound/validators/privacy/gdpr.py +728 -0
- truthound/validators/privacy/global_patterns.py +604 -0
- truthound/validators/privacy/plugins.py +867 -0
- truthound/validators/profiling/__init__.py +52 -0
- truthound/validators/profiling/base.py +175 -0
- truthound/validators/profiling/cardinality.py +312 -0
- truthound/validators/profiling/entropy.py +391 -0
- truthound/validators/profiling/frequency.py +455 -0
- truthound/validators/pushdown_support.py +660 -0
- truthound/validators/query/__init__.py +91 -0
- truthound/validators/query/aggregate.py +346 -0
- truthound/validators/query/base.py +246 -0
- truthound/validators/query/column.py +249 -0
- truthound/validators/query/expression.py +274 -0
- truthound/validators/query/result.py +323 -0
- truthound/validators/query/row_count.py +264 -0
- truthound/validators/referential/__init__.py +80 -0
- truthound/validators/referential/base.py +395 -0
- truthound/validators/referential/cascade.py +391 -0
- truthound/validators/referential/circular.py +563 -0
- truthound/validators/referential/foreign_key.py +624 -0
- truthound/validators/referential/orphan.py +485 -0
- truthound/validators/registry.py +112 -0
- truthound/validators/schema/__init__.py +41 -0
- truthound/validators/schema/column_count.py +142 -0
- truthound/validators/schema/column_exists.py +80 -0
- truthound/validators/schema/column_order.py +82 -0
- truthound/validators/schema/column_pair.py +85 -0
- truthound/validators/schema/column_pair_set.py +195 -0
- truthound/validators/schema/column_type.py +94 -0
- truthound/validators/schema/multi_column.py +53 -0
- truthound/validators/schema/multi_column_aggregate.py +175 -0
- truthound/validators/schema/referential.py +274 -0
- truthound/validators/schema/table_schema.py +91 -0
- truthound/validators/schema_validator.py +219 -0
- truthound/validators/sdk/__init__.py +250 -0
- truthound/validators/sdk/builder.py +680 -0
- truthound/validators/sdk/decorators.py +474 -0
- truthound/validators/sdk/enterprise/__init__.py +211 -0
- truthound/validators/sdk/enterprise/docs.py +725 -0
- truthound/validators/sdk/enterprise/fuzzing.py +659 -0
- truthound/validators/sdk/enterprise/licensing.py +709 -0
- truthound/validators/sdk/enterprise/manager.py +543 -0
- truthound/validators/sdk/enterprise/resources.py +628 -0
- truthound/validators/sdk/enterprise/sandbox.py +766 -0
- truthound/validators/sdk/enterprise/signing.py +603 -0
- truthound/validators/sdk/enterprise/templates.py +865 -0
- truthound/validators/sdk/enterprise/versioning.py +659 -0
- truthound/validators/sdk/templates.py +757 -0
- truthound/validators/sdk/testing.py +807 -0
- truthound/validators/security/__init__.py +181 -0
- truthound/validators/security/redos/__init__.py +182 -0
- truthound/validators/security/redos/core.py +861 -0
- truthound/validators/security/redos/cpu_monitor.py +593 -0
- truthound/validators/security/redos/cve_database.py +791 -0
- truthound/validators/security/redos/ml/__init__.py +155 -0
- truthound/validators/security/redos/ml/base.py +785 -0
- truthound/validators/security/redos/ml/datasets.py +618 -0
- truthound/validators/security/redos/ml/features.py +359 -0
- truthound/validators/security/redos/ml/models.py +1000 -0
- truthound/validators/security/redos/ml/predictor.py +507 -0
- truthound/validators/security/redos/ml/storage.py +632 -0
- truthound/validators/security/redos/ml/training.py +571 -0
- truthound/validators/security/redos/ml_analyzer.py +937 -0
- truthound/validators/security/redos/optimizer.py +674 -0
- truthound/validators/security/redos/profiler.py +682 -0
- truthound/validators/security/redos/re2_engine.py +709 -0
- truthound/validators/security/redos.py +886 -0
- truthound/validators/security/sql_security.py +1247 -0
- truthound/validators/streaming/__init__.py +126 -0
- truthound/validators/streaming/base.py +292 -0
- truthound/validators/streaming/completeness.py +210 -0
- truthound/validators/streaming/mixin.py +575 -0
- truthound/validators/streaming/range.py +308 -0
- truthound/validators/streaming/sources.py +846 -0
- truthound/validators/string/__init__.py +57 -0
- truthound/validators/string/casing.py +158 -0
- truthound/validators/string/charset.py +96 -0
- truthound/validators/string/format.py +501 -0
- truthound/validators/string/json.py +77 -0
- truthound/validators/string/json_schema.py +184 -0
- truthound/validators/string/length.py +104 -0
- truthound/validators/string/like_pattern.py +237 -0
- truthound/validators/string/regex.py +202 -0
- truthound/validators/string/regex_extended.py +435 -0
- truthound/validators/table/__init__.py +88 -0
- truthound/validators/table/base.py +78 -0
- truthound/validators/table/column_count.py +198 -0
- truthound/validators/table/freshness.py +362 -0
- truthound/validators/table/row_count.py +251 -0
- truthound/validators/table/schema.py +333 -0
- truthound/validators/table/size.py +285 -0
- truthound/validators/timeout/__init__.py +102 -0
- truthound/validators/timeout/advanced/__init__.py +247 -0
- truthound/validators/timeout/advanced/circuit_breaker.py +675 -0
- truthound/validators/timeout/advanced/prediction.py +773 -0
- truthound/validators/timeout/advanced/priority.py +618 -0
- truthound/validators/timeout/advanced/redis_backend.py +770 -0
- truthound/validators/timeout/advanced/retry.py +721 -0
- truthound/validators/timeout/advanced/sampling.py +788 -0
- truthound/validators/timeout/advanced/sla.py +661 -0
- truthound/validators/timeout/advanced/telemetry.py +804 -0
- truthound/validators/timeout/cascade.py +477 -0
- truthound/validators/timeout/deadline.py +657 -0
- truthound/validators/timeout/degradation.py +525 -0
- truthound/validators/timeout/distributed.py +597 -0
- truthound/validators/timeseries/__init__.py +89 -0
- truthound/validators/timeseries/base.py +326 -0
- truthound/validators/timeseries/completeness.py +617 -0
- truthound/validators/timeseries/gap.py +485 -0
- truthound/validators/timeseries/monotonic.py +310 -0
- truthound/validators/timeseries/seasonality.py +422 -0
- truthound/validators/timeseries/trend.py +510 -0
- truthound/validators/uniqueness/__init__.py +59 -0
- truthound/validators/uniqueness/approximate.py +475 -0
- truthound/validators/uniqueness/distinct_values.py +253 -0
- truthound/validators/uniqueness/duplicate.py +118 -0
- truthound/validators/uniqueness/primary_key.py +140 -0
- truthound/validators/uniqueness/unique.py +191 -0
- truthound/validators/uniqueness/within_record.py +599 -0
- truthound/validators/utils.py +756 -0
- truthound-1.0.8.dist-info/METADATA +474 -0
- truthound-1.0.8.dist-info/RECORD +877 -0
- truthound-1.0.8.dist-info/WHEEL +4 -0
- truthound-1.0.8.dist-info/entry_points.txt +2 -0
- truthound-1.0.8.dist-info/licenses/LICENSE +190 -0
|
@@ -0,0 +1,479 @@
|
|
|
1
|
+
"""Spark data source implementation.
|
|
2
|
+
|
|
3
|
+
This module provides a data source for PySpark DataFrames,
|
|
4
|
+
with automatic sampling for large datasets to prevent memory issues.
|
|
5
|
+
|
|
6
|
+
Requires: pip install pyspark
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
from dataclasses import dataclass
|
|
12
|
+
from typing import TYPE_CHECKING, Any
|
|
13
|
+
|
|
14
|
+
from truthound.datasources._protocols import (
|
|
15
|
+
ColumnType,
|
|
16
|
+
DataSourceCapability,
|
|
17
|
+
)
|
|
18
|
+
from truthound.datasources.base import (
|
|
19
|
+
BaseDataSource,
|
|
20
|
+
DataSourceConfig,
|
|
21
|
+
DataSourceError,
|
|
22
|
+
DataSourceSizeError,
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
if TYPE_CHECKING:
|
|
26
|
+
import polars as pl
|
|
27
|
+
from pyspark.sql import DataFrame as SparkDataFrame
|
|
28
|
+
from truthound.execution.base import BaseExecutionEngine
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def _check_pyspark_available() -> None:
|
|
32
|
+
"""Check if PySpark is available."""
|
|
33
|
+
try:
|
|
34
|
+
import pyspark # noqa: F401
|
|
35
|
+
except ImportError:
|
|
36
|
+
raise ImportError(
|
|
37
|
+
"pyspark is required for SparkDataSource. "
|
|
38
|
+
"Install with: pip install pyspark"
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def _spark_type_to_column_type(spark_type: Any) -> ColumnType:
|
|
43
|
+
"""Convert Spark data type to unified ColumnType."""
|
|
44
|
+
from pyspark.sql.types import (
|
|
45
|
+
ByteType, ShortType, IntegerType, LongType,
|
|
46
|
+
FloatType, DoubleType, DecimalType,
|
|
47
|
+
StringType, BinaryType, BooleanType,
|
|
48
|
+
DateType, TimestampType, TimestampNTZType,
|
|
49
|
+
ArrayType, MapType, StructType,
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
if isinstance(spark_type, (ByteType, ShortType, IntegerType, LongType)):
|
|
53
|
+
return ColumnType.INTEGER
|
|
54
|
+
if isinstance(spark_type, (FloatType, DoubleType)):
|
|
55
|
+
return ColumnType.FLOAT
|
|
56
|
+
if isinstance(spark_type, DecimalType):
|
|
57
|
+
return ColumnType.DECIMAL
|
|
58
|
+
if isinstance(spark_type, StringType):
|
|
59
|
+
return ColumnType.STRING
|
|
60
|
+
if isinstance(spark_type, BinaryType):
|
|
61
|
+
return ColumnType.BINARY
|
|
62
|
+
if isinstance(spark_type, BooleanType):
|
|
63
|
+
return ColumnType.BOOLEAN
|
|
64
|
+
if isinstance(spark_type, DateType):
|
|
65
|
+
return ColumnType.DATE
|
|
66
|
+
if isinstance(spark_type, (TimestampType, TimestampNTZType)):
|
|
67
|
+
return ColumnType.DATETIME
|
|
68
|
+
if isinstance(spark_type, ArrayType):
|
|
69
|
+
return ColumnType.LIST
|
|
70
|
+
if isinstance(spark_type, (MapType, StructType)):
|
|
71
|
+
return ColumnType.STRUCT
|
|
72
|
+
|
|
73
|
+
return ColumnType.UNKNOWN
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
# =============================================================================
|
|
77
|
+
# Configuration
|
|
78
|
+
# =============================================================================
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
@dataclass
|
|
82
|
+
class SparkDataSourceConfig(DataSourceConfig):
|
|
83
|
+
"""Configuration for Spark data sources.
|
|
84
|
+
|
|
85
|
+
Attributes:
|
|
86
|
+
max_rows_for_local: Maximum rows to collect to driver for local operations.
|
|
87
|
+
sampling_fraction: Fraction of data to sample when exceeding limits.
|
|
88
|
+
persist_sampled: Whether to persist sampled DataFrame in memory.
|
|
89
|
+
force_sampling: Always sample regardless of size.
|
|
90
|
+
repartition_for_sampling: Repartition before sampling for better distribution.
|
|
91
|
+
"""
|
|
92
|
+
|
|
93
|
+
max_rows_for_local: int = 100_000 # More conservative for Spark
|
|
94
|
+
sampling_fraction: float | None = None # Auto-calculate based on size
|
|
95
|
+
persist_sampled: bool = True
|
|
96
|
+
force_sampling: bool = False
|
|
97
|
+
repartition_for_sampling: int | None = None
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
# =============================================================================
|
|
101
|
+
# Spark Data Source
|
|
102
|
+
# =============================================================================
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
class SparkDataSource(BaseDataSource[SparkDataSourceConfig]):
|
|
106
|
+
"""Data source for PySpark DataFrames.
|
|
107
|
+
|
|
108
|
+
This data source handles large-scale data in Spark, automatically
|
|
109
|
+
sampling when necessary to prevent memory issues during validation.
|
|
110
|
+
|
|
111
|
+
WARNING: Many validation operations require collecting data to the
|
|
112
|
+
driver node. For very large datasets, always use sampling.
|
|
113
|
+
|
|
114
|
+
Example:
|
|
115
|
+
>>> from pyspark.sql import SparkSession
|
|
116
|
+
>>> spark = SparkSession.builder.getOrCreate()
|
|
117
|
+
>>> df = spark.read.parquet("large_data.parquet")
|
|
118
|
+
>>>
|
|
119
|
+
>>> # With automatic sampling
|
|
120
|
+
>>> source = SparkDataSource(df)
|
|
121
|
+
>>> if source.needs_sampling():
|
|
122
|
+
... source = source.sample(n=100_000)
|
|
123
|
+
>>>
|
|
124
|
+
>>> engine = source.get_execution_engine()
|
|
125
|
+
>>> print(engine.count_rows())
|
|
126
|
+
|
|
127
|
+
>>> # Force sampling for safety
|
|
128
|
+
>>> source = SparkDataSource(df, force_sampling=True)
|
|
129
|
+
"""
|
|
130
|
+
|
|
131
|
+
source_type = "spark"
|
|
132
|
+
|
|
133
|
+
def __init__(
|
|
134
|
+
self,
|
|
135
|
+
data: "SparkDataFrame",
|
|
136
|
+
config: SparkDataSourceConfig | None = None,
|
|
137
|
+
force_sampling: bool = False,
|
|
138
|
+
) -> None:
|
|
139
|
+
"""Initialize Spark data source.
|
|
140
|
+
|
|
141
|
+
Args:
|
|
142
|
+
data: PySpark DataFrame.
|
|
143
|
+
config: Optional configuration.
|
|
144
|
+
force_sampling: Force sampling even for smaller datasets.
|
|
145
|
+
"""
|
|
146
|
+
_check_pyspark_available()
|
|
147
|
+
super().__init__(config)
|
|
148
|
+
|
|
149
|
+
self._df = data
|
|
150
|
+
self._spark_schema = data.schema
|
|
151
|
+
|
|
152
|
+
if force_sampling:
|
|
153
|
+
self._config.force_sampling = True
|
|
154
|
+
|
|
155
|
+
@classmethod
|
|
156
|
+
def _default_config(cls) -> SparkDataSourceConfig:
|
|
157
|
+
return SparkDataSourceConfig()
|
|
158
|
+
|
|
159
|
+
@property
|
|
160
|
+
def spark_dataframe(self) -> "SparkDataFrame":
|
|
161
|
+
"""Get the underlying Spark DataFrame."""
|
|
162
|
+
return self._df
|
|
163
|
+
|
|
164
|
+
@property
|
|
165
|
+
def spark_schema(self) -> Any:
|
|
166
|
+
"""Get the native Spark schema."""
|
|
167
|
+
return self._spark_schema
|
|
168
|
+
|
|
169
|
+
@property
|
|
170
|
+
def schema(self) -> dict[str, ColumnType]:
|
|
171
|
+
"""Get the schema as column name to type mapping."""
|
|
172
|
+
if self._cached_schema is None:
|
|
173
|
+
self._cached_schema = {
|
|
174
|
+
field.name: _spark_type_to_column_type(field.dataType)
|
|
175
|
+
for field in self._spark_schema.fields
|
|
176
|
+
}
|
|
177
|
+
return self._cached_schema
|
|
178
|
+
|
|
179
|
+
@property
|
|
180
|
+
def columns(self) -> list[str]:
|
|
181
|
+
"""Get list of column names."""
|
|
182
|
+
return self._df.columns
|
|
183
|
+
|
|
184
|
+
@property
|
|
185
|
+
def row_count(self) -> int | None:
|
|
186
|
+
"""Get row count.
|
|
187
|
+
|
|
188
|
+
Note: This triggers a Spark action which may be expensive.
|
|
189
|
+
"""
|
|
190
|
+
if self._cached_row_count is None:
|
|
191
|
+
self._cached_row_count = self._df.count()
|
|
192
|
+
return self._cached_row_count
|
|
193
|
+
|
|
194
|
+
@property
|
|
195
|
+
def capabilities(self) -> set[DataSourceCapability]:
|
|
196
|
+
"""Get data source capabilities."""
|
|
197
|
+
return {
|
|
198
|
+
DataSourceCapability.LAZY_EVALUATION,
|
|
199
|
+
DataSourceCapability.SAMPLING,
|
|
200
|
+
DataSourceCapability.SCHEMA_INFERENCE,
|
|
201
|
+
DataSourceCapability.STREAMING,
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
def needs_sampling(self) -> bool:
|
|
205
|
+
"""Check if sampling is needed due to size.
|
|
206
|
+
|
|
207
|
+
For Spark, we use a more conservative threshold.
|
|
208
|
+
"""
|
|
209
|
+
if self._config.force_sampling:
|
|
210
|
+
return True
|
|
211
|
+
|
|
212
|
+
row_count = self.row_count
|
|
213
|
+
if row_count is None:
|
|
214
|
+
return True # Unknown size, sample to be safe
|
|
215
|
+
return row_count > self._config.max_rows_for_local
|
|
216
|
+
|
|
217
|
+
def get_execution_engine(self) -> "BaseExecutionEngine":
|
|
218
|
+
"""Get an execution engine for this data source.
|
|
219
|
+
|
|
220
|
+
Returns a Polars engine after converting sampled data.
|
|
221
|
+
Direct Spark execution is not yet supported.
|
|
222
|
+
"""
|
|
223
|
+
from truthound.execution.polars_engine import PolarsExecutionEngine
|
|
224
|
+
|
|
225
|
+
# Convert to Polars (with sampling if needed)
|
|
226
|
+
lf = self.to_polars_lazyframe()
|
|
227
|
+
return PolarsExecutionEngine(lf, self._config)
|
|
228
|
+
|
|
229
|
+
def sample(
|
|
230
|
+
self,
|
|
231
|
+
n: int = 1000,
|
|
232
|
+
seed: int | None = None,
|
|
233
|
+
) -> "SparkDataSource":
|
|
234
|
+
"""Create a new data source with sampled data.
|
|
235
|
+
|
|
236
|
+
Uses Spark's native sampling for efficiency.
|
|
237
|
+
|
|
238
|
+
Args:
|
|
239
|
+
n: Target number of rows.
|
|
240
|
+
seed: Random seed for reproducibility.
|
|
241
|
+
|
|
242
|
+
Returns:
|
|
243
|
+
New SparkDataSource with sampled data.
|
|
244
|
+
"""
|
|
245
|
+
row_count = self.row_count or 0
|
|
246
|
+
|
|
247
|
+
if row_count <= n and not self._config.force_sampling:
|
|
248
|
+
return self
|
|
249
|
+
|
|
250
|
+
# Calculate sampling fraction
|
|
251
|
+
if row_count > 0:
|
|
252
|
+
# Over-sample slightly to account for sampling variance
|
|
253
|
+
fraction = min((n * 1.1) / row_count, 1.0)
|
|
254
|
+
else:
|
|
255
|
+
fraction = 0.1 # Default if row count unknown
|
|
256
|
+
|
|
257
|
+
# Apply sampling
|
|
258
|
+
if seed is not None:
|
|
259
|
+
sampled_df = self._df.sample(withReplacement=False, fraction=fraction, seed=seed)
|
|
260
|
+
else:
|
|
261
|
+
sampled_df = self._df.sample(withReplacement=False, fraction=fraction)
|
|
262
|
+
|
|
263
|
+
# Limit to exact n if we over-sampled
|
|
264
|
+
sampled_df = sampled_df.limit(n)
|
|
265
|
+
|
|
266
|
+
# Optionally persist for performance
|
|
267
|
+
if self._config.persist_sampled:
|
|
268
|
+
sampled_df = sampled_df.persist()
|
|
269
|
+
|
|
270
|
+
config = SparkDataSourceConfig(
|
|
271
|
+
name=f"{self.name}_sample",
|
|
272
|
+
max_rows=self._config.max_rows,
|
|
273
|
+
max_rows_for_local=self._config.max_rows_for_local,
|
|
274
|
+
sample_size=n,
|
|
275
|
+
force_sampling=False,
|
|
276
|
+
)
|
|
277
|
+
|
|
278
|
+
return SparkDataSource(sampled_df, config)
|
|
279
|
+
|
|
280
|
+
def to_polars_lazyframe(self) -> "pl.LazyFrame":
|
|
281
|
+
"""Convert to Polars LazyFrame.
|
|
282
|
+
|
|
283
|
+
WARNING: This collects data to the driver node!
|
|
284
|
+
For large datasets, sample first.
|
|
285
|
+
|
|
286
|
+
Returns:
|
|
287
|
+
Polars LazyFrame.
|
|
288
|
+
|
|
289
|
+
Raises:
|
|
290
|
+
DataSourceSizeError: If data exceeds size limits.
|
|
291
|
+
"""
|
|
292
|
+
import polars as pl
|
|
293
|
+
|
|
294
|
+
row_count = self.row_count or 0
|
|
295
|
+
|
|
296
|
+
# Check size limits
|
|
297
|
+
if row_count > self._config.max_rows_for_local:
|
|
298
|
+
raise DataSourceSizeError(
|
|
299
|
+
current_size=row_count,
|
|
300
|
+
max_size=self._config.max_rows_for_local,
|
|
301
|
+
unit="rows (Spark to local conversion limit)",
|
|
302
|
+
)
|
|
303
|
+
|
|
304
|
+
# Convert via Pandas (most reliable method)
|
|
305
|
+
pandas_df = self._df.toPandas()
|
|
306
|
+
return pl.from_pandas(pandas_df).lazy()
|
|
307
|
+
|
|
308
|
+
def to_pandas(self) -> Any:
|
|
309
|
+
"""Convert to Pandas DataFrame.
|
|
310
|
+
|
|
311
|
+
WARNING: This collects all data to the driver!
|
|
312
|
+
"""
|
|
313
|
+
row_count = self.row_count or 0
|
|
314
|
+
|
|
315
|
+
if row_count > self._config.max_rows_for_local:
|
|
316
|
+
raise DataSourceSizeError(
|
|
317
|
+
current_size=row_count,
|
|
318
|
+
max_size=self._config.max_rows_for_local,
|
|
319
|
+
unit="rows",
|
|
320
|
+
)
|
|
321
|
+
|
|
322
|
+
return self._df.toPandas()
|
|
323
|
+
|
|
324
|
+
def validate_connection(self) -> bool:
|
|
325
|
+
"""Validate by checking if DataFrame is accessible."""
|
|
326
|
+
try:
|
|
327
|
+
# Check if we can access schema
|
|
328
|
+
_ = self._df.schema
|
|
329
|
+
return True
|
|
330
|
+
except Exception:
|
|
331
|
+
return False
|
|
332
|
+
|
|
333
|
+
# -------------------------------------------------------------------------
|
|
334
|
+
# Spark-specific Methods
|
|
335
|
+
# -------------------------------------------------------------------------
|
|
336
|
+
|
|
337
|
+
def repartition(self, num_partitions: int) -> "SparkDataSource":
|
|
338
|
+
"""Create a new data source with repartitioned data.
|
|
339
|
+
|
|
340
|
+
Args:
|
|
341
|
+
num_partitions: Number of partitions.
|
|
342
|
+
|
|
343
|
+
Returns:
|
|
344
|
+
New SparkDataSource with repartitioned data.
|
|
345
|
+
"""
|
|
346
|
+
repartitioned = self._df.repartition(num_partitions)
|
|
347
|
+
return SparkDataSource(repartitioned, self._config)
|
|
348
|
+
|
|
349
|
+
def coalesce(self, num_partitions: int) -> "SparkDataSource":
|
|
350
|
+
"""Create a new data source with coalesced partitions.
|
|
351
|
+
|
|
352
|
+
Args:
|
|
353
|
+
num_partitions: Number of partitions.
|
|
354
|
+
|
|
355
|
+
Returns:
|
|
356
|
+
New SparkDataSource with coalesced data.
|
|
357
|
+
"""
|
|
358
|
+
coalesced = self._df.coalesce(num_partitions)
|
|
359
|
+
return SparkDataSource(coalesced, self._config)
|
|
360
|
+
|
|
361
|
+
def persist(self) -> "SparkDataSource":
|
|
362
|
+
"""Persist the DataFrame in memory.
|
|
363
|
+
|
|
364
|
+
Returns:
|
|
365
|
+
Self after persisting.
|
|
366
|
+
"""
|
|
367
|
+
self._df.persist()
|
|
368
|
+
return self
|
|
369
|
+
|
|
370
|
+
def unpersist(self) -> "SparkDataSource":
|
|
371
|
+
"""Unpersist the DataFrame from memory.
|
|
372
|
+
|
|
373
|
+
Returns:
|
|
374
|
+
Self after unpersisting.
|
|
375
|
+
"""
|
|
376
|
+
self._df.unpersist()
|
|
377
|
+
return self
|
|
378
|
+
|
|
379
|
+
def cache(self) -> "SparkDataSource":
|
|
380
|
+
"""Cache the DataFrame (alias for persist).
|
|
381
|
+
|
|
382
|
+
Returns:
|
|
383
|
+
Self after caching.
|
|
384
|
+
"""
|
|
385
|
+
self._df.cache()
|
|
386
|
+
return self
|
|
387
|
+
|
|
388
|
+
def explain(self, extended: bool = False) -> str:
|
|
389
|
+
"""Get the execution plan.
|
|
390
|
+
|
|
391
|
+
Args:
|
|
392
|
+
extended: If True, show extended plan.
|
|
393
|
+
|
|
394
|
+
Returns:
|
|
395
|
+
Execution plan as string.
|
|
396
|
+
"""
|
|
397
|
+
import io
|
|
398
|
+
import sys
|
|
399
|
+
|
|
400
|
+
# Capture explain output
|
|
401
|
+
old_stdout = sys.stdout
|
|
402
|
+
sys.stdout = buffer = io.StringIO()
|
|
403
|
+
try:
|
|
404
|
+
self._df.explain(extended=extended)
|
|
405
|
+
return buffer.getvalue()
|
|
406
|
+
finally:
|
|
407
|
+
sys.stdout = old_stdout
|
|
408
|
+
|
|
409
|
+
def get_num_partitions(self) -> int:
|
|
410
|
+
"""Get the number of partitions."""
|
|
411
|
+
return self._df.rdd.getNumPartitions()
|
|
412
|
+
|
|
413
|
+
def get_storage_level(self) -> str:
|
|
414
|
+
"""Get the storage level (if persisted)."""
|
|
415
|
+
return str(self._df.storageLevel)
|
|
416
|
+
|
|
417
|
+
@classmethod
|
|
418
|
+
def from_table(
|
|
419
|
+
cls,
|
|
420
|
+
spark: Any,
|
|
421
|
+
table_name: str,
|
|
422
|
+
database: str | None = None,
|
|
423
|
+
config: SparkDataSourceConfig | None = None,
|
|
424
|
+
) -> "SparkDataSource":
|
|
425
|
+
"""Create data source from a Spark table.
|
|
426
|
+
|
|
427
|
+
Args:
|
|
428
|
+
spark: SparkSession instance.
|
|
429
|
+
table_name: Name of the table.
|
|
430
|
+
database: Optional database name.
|
|
431
|
+
config: Optional configuration.
|
|
432
|
+
|
|
433
|
+
Returns:
|
|
434
|
+
SparkDataSource for the table.
|
|
435
|
+
|
|
436
|
+
Example:
|
|
437
|
+
>>> source = SparkDataSource.from_table(spark, "users", database="mydb")
|
|
438
|
+
"""
|
|
439
|
+
_check_pyspark_available()
|
|
440
|
+
|
|
441
|
+
if database:
|
|
442
|
+
full_name = f"{database}.{table_name}"
|
|
443
|
+
else:
|
|
444
|
+
full_name = table_name
|
|
445
|
+
|
|
446
|
+
df = spark.table(full_name)
|
|
447
|
+
|
|
448
|
+
if config is None:
|
|
449
|
+
config = SparkDataSourceConfig(name=full_name)
|
|
450
|
+
else:
|
|
451
|
+
config.name = config.name or full_name
|
|
452
|
+
|
|
453
|
+
return cls(df, config)
|
|
454
|
+
|
|
455
|
+
@classmethod
|
|
456
|
+
def from_parquet(
|
|
457
|
+
cls,
|
|
458
|
+
spark: Any,
|
|
459
|
+
path: str,
|
|
460
|
+
config: SparkDataSourceConfig | None = None,
|
|
461
|
+
) -> "SparkDataSource":
|
|
462
|
+
"""Create data source from Parquet files.
|
|
463
|
+
|
|
464
|
+
Args:
|
|
465
|
+
spark: SparkSession instance.
|
|
466
|
+
path: Path to Parquet file(s).
|
|
467
|
+
config: Optional configuration.
|
|
468
|
+
|
|
469
|
+
Returns:
|
|
470
|
+
SparkDataSource for the Parquet data.
|
|
471
|
+
"""
|
|
472
|
+
_check_pyspark_available()
|
|
473
|
+
|
|
474
|
+
df = spark.read.parquet(path)
|
|
475
|
+
|
|
476
|
+
if config is None:
|
|
477
|
+
config = SparkDataSourceConfig(name=path)
|
|
478
|
+
|
|
479
|
+
return cls(df, config)
|
|
@@ -0,0 +1,154 @@
|
|
|
1
|
+
"""SQL data source implementations.
|
|
2
|
+
|
|
3
|
+
This subpackage provides data sources for SQL databases,
|
|
4
|
+
enabling validation directly on database tables with SQL pushdown
|
|
5
|
+
optimization where possible.
|
|
6
|
+
|
|
7
|
+
Supported databases:
|
|
8
|
+
|
|
9
|
+
Traditional RDBMS:
|
|
10
|
+
- SQLite (built-in)
|
|
11
|
+
- PostgreSQL (requires: pip install psycopg2-binary)
|
|
12
|
+
- MySQL (requires: pip install pymysql)
|
|
13
|
+
- Oracle (requires: pip install oracledb)
|
|
14
|
+
- SQL Server (requires: pip install pyodbc or pymssql)
|
|
15
|
+
|
|
16
|
+
Cloud Data Warehouses:
|
|
17
|
+
- BigQuery (requires: pip install google-cloud-bigquery db-dtypes)
|
|
18
|
+
- Snowflake (requires: pip install snowflake-connector-python)
|
|
19
|
+
- Redshift (requires: pip install redshift-connector)
|
|
20
|
+
- Databricks (requires: pip install databricks-sql-connector)
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
from truthound.datasources.sql.base import (
|
|
24
|
+
BaseSQLDataSource,
|
|
25
|
+
SQLDataSourceConfig,
|
|
26
|
+
SQLConnectionPool,
|
|
27
|
+
)
|
|
28
|
+
from truthound.datasources.sql.cloud_base import (
|
|
29
|
+
CloudDWConfig,
|
|
30
|
+
CloudDWDataSource,
|
|
31
|
+
load_credentials_from_env,
|
|
32
|
+
load_service_account_json,
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
# Core SQL databases (always available)
|
|
36
|
+
from truthound.datasources.sql.sqlite import SQLiteDataSource
|
|
37
|
+
|
|
38
|
+
# Optional imports with graceful fallback
|
|
39
|
+
try:
|
|
40
|
+
from truthound.datasources.sql.postgresql import PostgreSQLDataSource
|
|
41
|
+
except ImportError:
|
|
42
|
+
PostgreSQLDataSource = None # type: ignore
|
|
43
|
+
|
|
44
|
+
try:
|
|
45
|
+
from truthound.datasources.sql.mysql import MySQLDataSource
|
|
46
|
+
except ImportError:
|
|
47
|
+
MySQLDataSource = None # type: ignore
|
|
48
|
+
|
|
49
|
+
try:
|
|
50
|
+
from truthound.datasources.sql.oracle import OracleDataSource, OracleConfig
|
|
51
|
+
except ImportError:
|
|
52
|
+
OracleDataSource = None # type: ignore
|
|
53
|
+
OracleConfig = None # type: ignore
|
|
54
|
+
|
|
55
|
+
try:
|
|
56
|
+
from truthound.datasources.sql.sqlserver import SQLServerDataSource, SQLServerConfig
|
|
57
|
+
except ImportError:
|
|
58
|
+
SQLServerDataSource = None # type: ignore
|
|
59
|
+
SQLServerConfig = None # type: ignore
|
|
60
|
+
|
|
61
|
+
try:
|
|
62
|
+
from truthound.datasources.sql.bigquery import BigQueryDataSource, BigQueryConfig
|
|
63
|
+
except ImportError:
|
|
64
|
+
BigQueryDataSource = None # type: ignore
|
|
65
|
+
BigQueryConfig = None # type: ignore
|
|
66
|
+
|
|
67
|
+
try:
|
|
68
|
+
from truthound.datasources.sql.snowflake import SnowflakeDataSource, SnowflakeConfig
|
|
69
|
+
except ImportError:
|
|
70
|
+
SnowflakeDataSource = None # type: ignore
|
|
71
|
+
SnowflakeConfig = None # type: ignore
|
|
72
|
+
|
|
73
|
+
try:
|
|
74
|
+
from truthound.datasources.sql.redshift import RedshiftDataSource, RedshiftConfig
|
|
75
|
+
except ImportError:
|
|
76
|
+
RedshiftDataSource = None # type: ignore
|
|
77
|
+
RedshiftConfig = None # type: ignore
|
|
78
|
+
|
|
79
|
+
try:
|
|
80
|
+
from truthound.datasources.sql.databricks import (
|
|
81
|
+
DatabricksDataSource,
|
|
82
|
+
DatabricksConfig,
|
|
83
|
+
DatabricksSQLDataSource,
|
|
84
|
+
)
|
|
85
|
+
except ImportError:
|
|
86
|
+
DatabricksDataSource = None # type: ignore
|
|
87
|
+
DatabricksConfig = None # type: ignore
|
|
88
|
+
DatabricksSQLDataSource = None # type: ignore
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def get_available_sources() -> dict[str, type | None]:
|
|
92
|
+
"""Get dictionary of available SQL data sources.
|
|
93
|
+
|
|
94
|
+
Returns:
|
|
95
|
+
Dictionary mapping source name to class (or None if not available).
|
|
96
|
+
"""
|
|
97
|
+
return {
|
|
98
|
+
"sqlite": SQLiteDataSource,
|
|
99
|
+
"postgresql": PostgreSQLDataSource,
|
|
100
|
+
"mysql": MySQLDataSource,
|
|
101
|
+
"oracle": OracleDataSource,
|
|
102
|
+
"sqlserver": SQLServerDataSource,
|
|
103
|
+
"bigquery": BigQueryDataSource,
|
|
104
|
+
"snowflake": SnowflakeDataSource,
|
|
105
|
+
"redshift": RedshiftDataSource,
|
|
106
|
+
"databricks": DatabricksDataSource,
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def check_source_available(source_type: str) -> bool:
|
|
111
|
+
"""Check if a specific SQL source type is available.
|
|
112
|
+
|
|
113
|
+
Args:
|
|
114
|
+
source_type: Source type name.
|
|
115
|
+
|
|
116
|
+
Returns:
|
|
117
|
+
True if the source is available.
|
|
118
|
+
"""
|
|
119
|
+
sources = get_available_sources()
|
|
120
|
+
return sources.get(source_type) is not None
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
__all__ = [
|
|
124
|
+
# Base classes
|
|
125
|
+
"BaseSQLDataSource",
|
|
126
|
+
"SQLDataSourceConfig",
|
|
127
|
+
"SQLConnectionPool",
|
|
128
|
+
"CloudDWConfig",
|
|
129
|
+
"CloudDWDataSource",
|
|
130
|
+
# Utilities
|
|
131
|
+
"load_credentials_from_env",
|
|
132
|
+
"load_service_account_json",
|
|
133
|
+
"get_available_sources",
|
|
134
|
+
"check_source_available",
|
|
135
|
+
# Core implementations (always available)
|
|
136
|
+
"SQLiteDataSource",
|
|
137
|
+
# Traditional RDBMS (optional)
|
|
138
|
+
"PostgreSQLDataSource",
|
|
139
|
+
"MySQLDataSource",
|
|
140
|
+
"OracleDataSource",
|
|
141
|
+
"OracleConfig",
|
|
142
|
+
"SQLServerDataSource",
|
|
143
|
+
"SQLServerConfig",
|
|
144
|
+
# Cloud Data Warehouses (optional)
|
|
145
|
+
"BigQueryDataSource",
|
|
146
|
+
"BigQueryConfig",
|
|
147
|
+
"SnowflakeDataSource",
|
|
148
|
+
"SnowflakeConfig",
|
|
149
|
+
"RedshiftDataSource",
|
|
150
|
+
"RedshiftConfig",
|
|
151
|
+
"DatabricksDataSource",
|
|
152
|
+
"DatabricksConfig",
|
|
153
|
+
"DatabricksSQLDataSource",
|
|
154
|
+
]
|