truthound 1.0.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- truthound/__init__.py +162 -0
- truthound/adapters.py +100 -0
- truthound/api.py +365 -0
- truthound/audit/__init__.py +248 -0
- truthound/audit/core.py +967 -0
- truthound/audit/filters.py +620 -0
- truthound/audit/formatters.py +707 -0
- truthound/audit/logger.py +902 -0
- truthound/audit/middleware.py +571 -0
- truthound/audit/storage.py +1083 -0
- truthound/benchmark/__init__.py +123 -0
- truthound/benchmark/base.py +757 -0
- truthound/benchmark/comparison.py +635 -0
- truthound/benchmark/generators.py +706 -0
- truthound/benchmark/reporters.py +718 -0
- truthound/benchmark/runner.py +635 -0
- truthound/benchmark/scenarios.py +712 -0
- truthound/cache.py +252 -0
- truthound/checkpoint/__init__.py +136 -0
- truthound/checkpoint/actions/__init__.py +164 -0
- truthound/checkpoint/actions/base.py +324 -0
- truthound/checkpoint/actions/custom.py +234 -0
- truthound/checkpoint/actions/discord_notify.py +290 -0
- truthound/checkpoint/actions/email_notify.py +405 -0
- truthound/checkpoint/actions/github_action.py +406 -0
- truthound/checkpoint/actions/opsgenie.py +1499 -0
- truthound/checkpoint/actions/pagerduty.py +226 -0
- truthound/checkpoint/actions/slack_notify.py +233 -0
- truthound/checkpoint/actions/store_result.py +249 -0
- truthound/checkpoint/actions/teams_notify.py +1570 -0
- truthound/checkpoint/actions/telegram_notify.py +419 -0
- truthound/checkpoint/actions/update_docs.py +552 -0
- truthound/checkpoint/actions/webhook.py +293 -0
- truthound/checkpoint/analytics/__init__.py +147 -0
- truthound/checkpoint/analytics/aggregations/__init__.py +23 -0
- truthound/checkpoint/analytics/aggregations/rollup.py +481 -0
- truthound/checkpoint/analytics/aggregations/time_bucket.py +306 -0
- truthound/checkpoint/analytics/analyzers/__init__.py +17 -0
- truthound/checkpoint/analytics/analyzers/anomaly.py +386 -0
- truthound/checkpoint/analytics/analyzers/base.py +270 -0
- truthound/checkpoint/analytics/analyzers/forecast.py +421 -0
- truthound/checkpoint/analytics/analyzers/trend.py +314 -0
- truthound/checkpoint/analytics/models.py +292 -0
- truthound/checkpoint/analytics/protocols.py +549 -0
- truthound/checkpoint/analytics/service.py +718 -0
- truthound/checkpoint/analytics/stores/__init__.py +16 -0
- truthound/checkpoint/analytics/stores/base.py +306 -0
- truthound/checkpoint/analytics/stores/memory_store.py +353 -0
- truthound/checkpoint/analytics/stores/sqlite_store.py +557 -0
- truthound/checkpoint/analytics/stores/timescale_store.py +501 -0
- truthound/checkpoint/async_actions.py +794 -0
- truthound/checkpoint/async_base.py +708 -0
- truthound/checkpoint/async_checkpoint.py +617 -0
- truthound/checkpoint/async_runner.py +639 -0
- truthound/checkpoint/checkpoint.py +527 -0
- truthound/checkpoint/ci/__init__.py +61 -0
- truthound/checkpoint/ci/detector.py +355 -0
- truthound/checkpoint/ci/reporter.py +436 -0
- truthound/checkpoint/ci/templates.py +454 -0
- truthound/checkpoint/circuitbreaker/__init__.py +133 -0
- truthound/checkpoint/circuitbreaker/breaker.py +542 -0
- truthound/checkpoint/circuitbreaker/core.py +252 -0
- truthound/checkpoint/circuitbreaker/detection.py +459 -0
- truthound/checkpoint/circuitbreaker/middleware.py +389 -0
- truthound/checkpoint/circuitbreaker/registry.py +357 -0
- truthound/checkpoint/distributed/__init__.py +139 -0
- truthound/checkpoint/distributed/backends/__init__.py +35 -0
- truthound/checkpoint/distributed/backends/celery_backend.py +503 -0
- truthound/checkpoint/distributed/backends/kubernetes_backend.py +696 -0
- truthound/checkpoint/distributed/backends/local_backend.py +397 -0
- truthound/checkpoint/distributed/backends/ray_backend.py +625 -0
- truthound/checkpoint/distributed/base.py +774 -0
- truthound/checkpoint/distributed/orchestrator.py +765 -0
- truthound/checkpoint/distributed/protocols.py +842 -0
- truthound/checkpoint/distributed/registry.py +449 -0
- truthound/checkpoint/idempotency/__init__.py +120 -0
- truthound/checkpoint/idempotency/core.py +295 -0
- truthound/checkpoint/idempotency/fingerprint.py +454 -0
- truthound/checkpoint/idempotency/locking.py +604 -0
- truthound/checkpoint/idempotency/service.py +592 -0
- truthound/checkpoint/idempotency/stores.py +653 -0
- truthound/checkpoint/monitoring/__init__.py +134 -0
- truthound/checkpoint/monitoring/aggregators/__init__.py +15 -0
- truthound/checkpoint/monitoring/aggregators/base.py +372 -0
- truthound/checkpoint/monitoring/aggregators/realtime.py +300 -0
- truthound/checkpoint/monitoring/aggregators/window.py +493 -0
- truthound/checkpoint/monitoring/collectors/__init__.py +17 -0
- truthound/checkpoint/monitoring/collectors/base.py +257 -0
- truthound/checkpoint/monitoring/collectors/memory_collector.py +617 -0
- truthound/checkpoint/monitoring/collectors/prometheus_collector.py +451 -0
- truthound/checkpoint/monitoring/collectors/redis_collector.py +518 -0
- truthound/checkpoint/monitoring/events.py +410 -0
- truthound/checkpoint/monitoring/protocols.py +636 -0
- truthound/checkpoint/monitoring/service.py +578 -0
- truthound/checkpoint/monitoring/views/__init__.py +17 -0
- truthound/checkpoint/monitoring/views/base.py +172 -0
- truthound/checkpoint/monitoring/views/queue_view.py +220 -0
- truthound/checkpoint/monitoring/views/task_view.py +240 -0
- truthound/checkpoint/monitoring/views/worker_view.py +263 -0
- truthound/checkpoint/registry.py +337 -0
- truthound/checkpoint/runner.py +356 -0
- truthound/checkpoint/transaction/__init__.py +133 -0
- truthound/checkpoint/transaction/base.py +389 -0
- truthound/checkpoint/transaction/compensatable.py +537 -0
- truthound/checkpoint/transaction/coordinator.py +576 -0
- truthound/checkpoint/transaction/executor.py +622 -0
- truthound/checkpoint/transaction/idempotency.py +534 -0
- truthound/checkpoint/transaction/saga/__init__.py +143 -0
- truthound/checkpoint/transaction/saga/builder.py +584 -0
- truthound/checkpoint/transaction/saga/definition.py +515 -0
- truthound/checkpoint/transaction/saga/event_store.py +542 -0
- truthound/checkpoint/transaction/saga/patterns.py +833 -0
- truthound/checkpoint/transaction/saga/runner.py +718 -0
- truthound/checkpoint/transaction/saga/state_machine.py +793 -0
- truthound/checkpoint/transaction/saga/strategies.py +780 -0
- truthound/checkpoint/transaction/saga/testing.py +886 -0
- truthound/checkpoint/triggers/__init__.py +58 -0
- truthound/checkpoint/triggers/base.py +237 -0
- truthound/checkpoint/triggers/event.py +385 -0
- truthound/checkpoint/triggers/schedule.py +355 -0
- truthound/cli.py +2358 -0
- truthound/cli_modules/__init__.py +124 -0
- truthound/cli_modules/advanced/__init__.py +45 -0
- truthound/cli_modules/advanced/benchmark.py +343 -0
- truthound/cli_modules/advanced/docs.py +225 -0
- truthound/cli_modules/advanced/lineage.py +209 -0
- truthound/cli_modules/advanced/ml.py +320 -0
- truthound/cli_modules/advanced/realtime.py +196 -0
- truthound/cli_modules/checkpoint/__init__.py +46 -0
- truthound/cli_modules/checkpoint/init.py +114 -0
- truthound/cli_modules/checkpoint/list.py +71 -0
- truthound/cli_modules/checkpoint/run.py +159 -0
- truthound/cli_modules/checkpoint/validate.py +67 -0
- truthound/cli_modules/common/__init__.py +71 -0
- truthound/cli_modules/common/errors.py +414 -0
- truthound/cli_modules/common/options.py +419 -0
- truthound/cli_modules/common/output.py +507 -0
- truthound/cli_modules/common/protocol.py +552 -0
- truthound/cli_modules/core/__init__.py +48 -0
- truthound/cli_modules/core/check.py +123 -0
- truthound/cli_modules/core/compare.py +104 -0
- truthound/cli_modules/core/learn.py +57 -0
- truthound/cli_modules/core/mask.py +77 -0
- truthound/cli_modules/core/profile.py +65 -0
- truthound/cli_modules/core/scan.py +61 -0
- truthound/cli_modules/profiler/__init__.py +51 -0
- truthound/cli_modules/profiler/auto_profile.py +175 -0
- truthound/cli_modules/profiler/metadata.py +107 -0
- truthound/cli_modules/profiler/suite.py +283 -0
- truthound/cli_modules/registry.py +431 -0
- truthound/cli_modules/scaffolding/__init__.py +89 -0
- truthound/cli_modules/scaffolding/base.py +631 -0
- truthound/cli_modules/scaffolding/commands.py +545 -0
- truthound/cli_modules/scaffolding/plugins.py +1072 -0
- truthound/cli_modules/scaffolding/reporters.py +594 -0
- truthound/cli_modules/scaffolding/validators.py +1127 -0
- truthound/common/__init__.py +18 -0
- truthound/common/resilience/__init__.py +130 -0
- truthound/common/resilience/bulkhead.py +266 -0
- truthound/common/resilience/circuit_breaker.py +516 -0
- truthound/common/resilience/composite.py +332 -0
- truthound/common/resilience/config.py +292 -0
- truthound/common/resilience/protocols.py +217 -0
- truthound/common/resilience/rate_limiter.py +404 -0
- truthound/common/resilience/retry.py +341 -0
- truthound/datadocs/__init__.py +260 -0
- truthound/datadocs/base.py +571 -0
- truthound/datadocs/builder.py +761 -0
- truthound/datadocs/charts.py +764 -0
- truthound/datadocs/dashboard/__init__.py +63 -0
- truthound/datadocs/dashboard/app.py +576 -0
- truthound/datadocs/dashboard/components.py +584 -0
- truthound/datadocs/dashboard/state.py +240 -0
- truthound/datadocs/engine/__init__.py +46 -0
- truthound/datadocs/engine/context.py +376 -0
- truthound/datadocs/engine/pipeline.py +618 -0
- truthound/datadocs/engine/registry.py +469 -0
- truthound/datadocs/exporters/__init__.py +49 -0
- truthound/datadocs/exporters/base.py +198 -0
- truthound/datadocs/exporters/html.py +178 -0
- truthound/datadocs/exporters/json_exporter.py +253 -0
- truthound/datadocs/exporters/markdown.py +284 -0
- truthound/datadocs/exporters/pdf.py +392 -0
- truthound/datadocs/i18n/__init__.py +86 -0
- truthound/datadocs/i18n/catalog.py +960 -0
- truthound/datadocs/i18n/formatting.py +505 -0
- truthound/datadocs/i18n/loader.py +256 -0
- truthound/datadocs/i18n/plurals.py +378 -0
- truthound/datadocs/renderers/__init__.py +42 -0
- truthound/datadocs/renderers/base.py +401 -0
- truthound/datadocs/renderers/custom.py +342 -0
- truthound/datadocs/renderers/jinja.py +697 -0
- truthound/datadocs/sections.py +736 -0
- truthound/datadocs/styles.py +931 -0
- truthound/datadocs/themes/__init__.py +101 -0
- truthound/datadocs/themes/base.py +336 -0
- truthound/datadocs/themes/default.py +417 -0
- truthound/datadocs/themes/enterprise.py +419 -0
- truthound/datadocs/themes/loader.py +336 -0
- truthound/datadocs/themes.py +301 -0
- truthound/datadocs/transformers/__init__.py +57 -0
- truthound/datadocs/transformers/base.py +268 -0
- truthound/datadocs/transformers/enrichers.py +544 -0
- truthound/datadocs/transformers/filters.py +447 -0
- truthound/datadocs/transformers/i18n.py +468 -0
- truthound/datadocs/versioning/__init__.py +62 -0
- truthound/datadocs/versioning/diff.py +639 -0
- truthound/datadocs/versioning/storage.py +497 -0
- truthound/datadocs/versioning/version.py +358 -0
- truthound/datasources/__init__.py +223 -0
- truthound/datasources/_async_protocols.py +222 -0
- truthound/datasources/_protocols.py +159 -0
- truthound/datasources/adapters.py +428 -0
- truthound/datasources/async_base.py +599 -0
- truthound/datasources/async_factory.py +511 -0
- truthound/datasources/base.py +516 -0
- truthound/datasources/factory.py +433 -0
- truthound/datasources/nosql/__init__.py +47 -0
- truthound/datasources/nosql/base.py +487 -0
- truthound/datasources/nosql/elasticsearch.py +801 -0
- truthound/datasources/nosql/mongodb.py +636 -0
- truthound/datasources/pandas_optimized.py +582 -0
- truthound/datasources/pandas_source.py +216 -0
- truthound/datasources/polars_source.py +395 -0
- truthound/datasources/spark_source.py +479 -0
- truthound/datasources/sql/__init__.py +154 -0
- truthound/datasources/sql/base.py +710 -0
- truthound/datasources/sql/bigquery.py +410 -0
- truthound/datasources/sql/cloud_base.py +199 -0
- truthound/datasources/sql/databricks.py +471 -0
- truthound/datasources/sql/mysql.py +316 -0
- truthound/datasources/sql/oracle.py +427 -0
- truthound/datasources/sql/postgresql.py +321 -0
- truthound/datasources/sql/redshift.py +479 -0
- truthound/datasources/sql/snowflake.py +439 -0
- truthound/datasources/sql/sqlite.py +286 -0
- truthound/datasources/sql/sqlserver.py +437 -0
- truthound/datasources/streaming/__init__.py +47 -0
- truthound/datasources/streaming/base.py +350 -0
- truthound/datasources/streaming/kafka.py +670 -0
- truthound/decorators.py +98 -0
- truthound/docs/__init__.py +69 -0
- truthound/docs/extractor.py +971 -0
- truthound/docs/generator.py +601 -0
- truthound/docs/parser.py +1037 -0
- truthound/docs/renderer.py +999 -0
- truthound/drift/__init__.py +22 -0
- truthound/drift/compare.py +189 -0
- truthound/drift/detectors.py +464 -0
- truthound/drift/report.py +160 -0
- truthound/execution/__init__.py +65 -0
- truthound/execution/_protocols.py +324 -0
- truthound/execution/base.py +576 -0
- truthound/execution/distributed/__init__.py +179 -0
- truthound/execution/distributed/aggregations.py +731 -0
- truthound/execution/distributed/arrow_bridge.py +817 -0
- truthound/execution/distributed/base.py +550 -0
- truthound/execution/distributed/dask_engine.py +976 -0
- truthound/execution/distributed/mixins.py +766 -0
- truthound/execution/distributed/protocols.py +756 -0
- truthound/execution/distributed/ray_engine.py +1127 -0
- truthound/execution/distributed/registry.py +446 -0
- truthound/execution/distributed/spark_engine.py +1011 -0
- truthound/execution/distributed/validator_adapter.py +682 -0
- truthound/execution/pandas_engine.py +401 -0
- truthound/execution/polars_engine.py +497 -0
- truthound/execution/pushdown/__init__.py +230 -0
- truthound/execution/pushdown/ast.py +1550 -0
- truthound/execution/pushdown/builder.py +1550 -0
- truthound/execution/pushdown/dialects.py +1072 -0
- truthound/execution/pushdown/executor.py +829 -0
- truthound/execution/pushdown/optimizer.py +1041 -0
- truthound/execution/sql_engine.py +518 -0
- truthound/infrastructure/__init__.py +189 -0
- truthound/infrastructure/audit.py +1515 -0
- truthound/infrastructure/config.py +1133 -0
- truthound/infrastructure/encryption.py +1132 -0
- truthound/infrastructure/logging.py +1503 -0
- truthound/infrastructure/metrics.py +1220 -0
- truthound/lineage/__init__.py +89 -0
- truthound/lineage/base.py +746 -0
- truthound/lineage/impact_analysis.py +474 -0
- truthound/lineage/integrations/__init__.py +22 -0
- truthound/lineage/integrations/openlineage.py +548 -0
- truthound/lineage/tracker.py +512 -0
- truthound/lineage/visualization/__init__.py +33 -0
- truthound/lineage/visualization/protocols.py +145 -0
- truthound/lineage/visualization/renderers/__init__.py +20 -0
- truthound/lineage/visualization/renderers/cytoscape.py +329 -0
- truthound/lineage/visualization/renderers/d3.py +331 -0
- truthound/lineage/visualization/renderers/graphviz.py +276 -0
- truthound/lineage/visualization/renderers/mermaid.py +308 -0
- truthound/maskers.py +113 -0
- truthound/ml/__init__.py +124 -0
- truthound/ml/anomaly_models/__init__.py +31 -0
- truthound/ml/anomaly_models/ensemble.py +362 -0
- truthound/ml/anomaly_models/isolation_forest.py +444 -0
- truthound/ml/anomaly_models/statistical.py +392 -0
- truthound/ml/base.py +1178 -0
- truthound/ml/drift_detection/__init__.py +26 -0
- truthound/ml/drift_detection/concept.py +381 -0
- truthound/ml/drift_detection/distribution.py +361 -0
- truthound/ml/drift_detection/feature.py +442 -0
- truthound/ml/drift_detection/multivariate.py +495 -0
- truthound/ml/monitoring/__init__.py +88 -0
- truthound/ml/monitoring/alerting/__init__.py +33 -0
- truthound/ml/monitoring/alerting/handlers.py +427 -0
- truthound/ml/monitoring/alerting/rules.py +508 -0
- truthound/ml/monitoring/collectors/__init__.py +19 -0
- truthound/ml/monitoring/collectors/composite.py +105 -0
- truthound/ml/monitoring/collectors/drift.py +324 -0
- truthound/ml/monitoring/collectors/performance.py +179 -0
- truthound/ml/monitoring/collectors/quality.py +369 -0
- truthound/ml/monitoring/monitor.py +536 -0
- truthound/ml/monitoring/protocols.py +451 -0
- truthound/ml/monitoring/stores/__init__.py +15 -0
- truthound/ml/monitoring/stores/memory.py +201 -0
- truthound/ml/monitoring/stores/prometheus.py +296 -0
- truthound/ml/rule_learning/__init__.py +25 -0
- truthound/ml/rule_learning/constraint_miner.py +443 -0
- truthound/ml/rule_learning/pattern_learner.py +499 -0
- truthound/ml/rule_learning/profile_learner.py +462 -0
- truthound/multitenancy/__init__.py +326 -0
- truthound/multitenancy/core.py +852 -0
- truthound/multitenancy/integration.py +597 -0
- truthound/multitenancy/isolation.py +630 -0
- truthound/multitenancy/manager.py +770 -0
- truthound/multitenancy/middleware.py +765 -0
- truthound/multitenancy/quota.py +537 -0
- truthound/multitenancy/resolvers.py +603 -0
- truthound/multitenancy/storage.py +703 -0
- truthound/observability/__init__.py +307 -0
- truthound/observability/context.py +531 -0
- truthound/observability/instrumentation.py +611 -0
- truthound/observability/logging.py +887 -0
- truthound/observability/metrics.py +1157 -0
- truthound/observability/tracing/__init__.py +178 -0
- truthound/observability/tracing/baggage.py +310 -0
- truthound/observability/tracing/config.py +426 -0
- truthound/observability/tracing/exporter.py +787 -0
- truthound/observability/tracing/integration.py +1018 -0
- truthound/observability/tracing/otel/__init__.py +146 -0
- truthound/observability/tracing/otel/adapter.py +982 -0
- truthound/observability/tracing/otel/bridge.py +1177 -0
- truthound/observability/tracing/otel/compat.py +681 -0
- truthound/observability/tracing/otel/config.py +691 -0
- truthound/observability/tracing/otel/detection.py +327 -0
- truthound/observability/tracing/otel/protocols.py +426 -0
- truthound/observability/tracing/processor.py +561 -0
- truthound/observability/tracing/propagator.py +757 -0
- truthound/observability/tracing/provider.py +569 -0
- truthound/observability/tracing/resource.py +515 -0
- truthound/observability/tracing/sampler.py +487 -0
- truthound/observability/tracing/span.py +676 -0
- truthound/plugins/__init__.py +198 -0
- truthound/plugins/base.py +599 -0
- truthound/plugins/cli.py +680 -0
- truthound/plugins/dependencies/__init__.py +42 -0
- truthound/plugins/dependencies/graph.py +422 -0
- truthound/plugins/dependencies/resolver.py +417 -0
- truthound/plugins/discovery.py +379 -0
- truthound/plugins/docs/__init__.py +46 -0
- truthound/plugins/docs/extractor.py +444 -0
- truthound/plugins/docs/renderer.py +499 -0
- truthound/plugins/enterprise_manager.py +877 -0
- truthound/plugins/examples/__init__.py +19 -0
- truthound/plugins/examples/custom_validators.py +317 -0
- truthound/plugins/examples/slack_notifier.py +312 -0
- truthound/plugins/examples/xml_reporter.py +254 -0
- truthound/plugins/hooks.py +558 -0
- truthound/plugins/lifecycle/__init__.py +43 -0
- truthound/plugins/lifecycle/hot_reload.py +402 -0
- truthound/plugins/lifecycle/manager.py +371 -0
- truthound/plugins/manager.py +736 -0
- truthound/plugins/registry.py +338 -0
- truthound/plugins/security/__init__.py +93 -0
- truthound/plugins/security/exceptions.py +332 -0
- truthound/plugins/security/policies.py +348 -0
- truthound/plugins/security/protocols.py +643 -0
- truthound/plugins/security/sandbox/__init__.py +45 -0
- truthound/plugins/security/sandbox/context.py +158 -0
- truthound/plugins/security/sandbox/engines/__init__.py +19 -0
- truthound/plugins/security/sandbox/engines/container.py +379 -0
- truthound/plugins/security/sandbox/engines/noop.py +144 -0
- truthound/plugins/security/sandbox/engines/process.py +336 -0
- truthound/plugins/security/sandbox/factory.py +211 -0
- truthound/plugins/security/signing/__init__.py +57 -0
- truthound/plugins/security/signing/service.py +330 -0
- truthound/plugins/security/signing/trust_store.py +368 -0
- truthound/plugins/security/signing/verifier.py +459 -0
- truthound/plugins/versioning/__init__.py +41 -0
- truthound/plugins/versioning/constraints.py +297 -0
- truthound/plugins/versioning/resolver.py +329 -0
- truthound/profiler/__init__.py +1729 -0
- truthound/profiler/_lazy.py +452 -0
- truthound/profiler/ab_testing/__init__.py +80 -0
- truthound/profiler/ab_testing/analysis.py +449 -0
- truthound/profiler/ab_testing/base.py +257 -0
- truthound/profiler/ab_testing/experiment.py +395 -0
- truthound/profiler/ab_testing/tracking.py +368 -0
- truthound/profiler/auto_threshold.py +1170 -0
- truthound/profiler/base.py +579 -0
- truthound/profiler/cache_patterns.py +911 -0
- truthound/profiler/caching.py +1303 -0
- truthound/profiler/column_profiler.py +712 -0
- truthound/profiler/comparison.py +1007 -0
- truthound/profiler/custom_patterns.py +1170 -0
- truthound/profiler/dashboard/__init__.py +50 -0
- truthound/profiler/dashboard/app.py +476 -0
- truthound/profiler/dashboard/components.py +457 -0
- truthound/profiler/dashboard/config.py +72 -0
- truthound/profiler/distributed/__init__.py +83 -0
- truthound/profiler/distributed/base.py +281 -0
- truthound/profiler/distributed/dask_backend.py +498 -0
- truthound/profiler/distributed/local_backend.py +293 -0
- truthound/profiler/distributed/profiler.py +304 -0
- truthound/profiler/distributed/ray_backend.py +374 -0
- truthound/profiler/distributed/spark_backend.py +375 -0
- truthound/profiler/distributed.py +1366 -0
- truthound/profiler/enterprise_sampling.py +1065 -0
- truthound/profiler/errors.py +488 -0
- truthound/profiler/evolution/__init__.py +91 -0
- truthound/profiler/evolution/alerts.py +426 -0
- truthound/profiler/evolution/changes.py +206 -0
- truthound/profiler/evolution/compatibility.py +365 -0
- truthound/profiler/evolution/detector.py +372 -0
- truthound/profiler/evolution/protocols.py +121 -0
- truthound/profiler/generators/__init__.py +48 -0
- truthound/profiler/generators/base.py +384 -0
- truthound/profiler/generators/ml_rules.py +375 -0
- truthound/profiler/generators/pattern_rules.py +384 -0
- truthound/profiler/generators/schema_rules.py +267 -0
- truthound/profiler/generators/stats_rules.py +324 -0
- truthound/profiler/generators/suite_generator.py +857 -0
- truthound/profiler/i18n.py +1542 -0
- truthound/profiler/incremental.py +554 -0
- truthound/profiler/incremental_validation.py +1710 -0
- truthound/profiler/integration/__init__.py +73 -0
- truthound/profiler/integration/adapters.py +345 -0
- truthound/profiler/integration/context.py +371 -0
- truthound/profiler/integration/executor.py +527 -0
- truthound/profiler/integration/naming.py +75 -0
- truthound/profiler/integration/protocols.py +243 -0
- truthound/profiler/memory.py +1185 -0
- truthound/profiler/migration/__init__.py +60 -0
- truthound/profiler/migration/base.py +345 -0
- truthound/profiler/migration/manager.py +444 -0
- truthound/profiler/migration/v1_0_to_v1_1.py +484 -0
- truthound/profiler/ml/__init__.py +73 -0
- truthound/profiler/ml/base.py +244 -0
- truthound/profiler/ml/classifier.py +507 -0
- truthound/profiler/ml/feature_extraction.py +604 -0
- truthound/profiler/ml/pretrained.py +448 -0
- truthound/profiler/ml_inference.py +1276 -0
- truthound/profiler/native_patterns.py +815 -0
- truthound/profiler/observability.py +1184 -0
- truthound/profiler/process_timeout.py +1566 -0
- truthound/profiler/progress.py +568 -0
- truthound/profiler/progress_callbacks.py +1734 -0
- truthound/profiler/quality.py +1345 -0
- truthound/profiler/resilience.py +1180 -0
- truthound/profiler/sampled_matcher.py +794 -0
- truthound/profiler/sampling.py +1288 -0
- truthound/profiler/scheduling/__init__.py +82 -0
- truthound/profiler/scheduling/protocols.py +214 -0
- truthound/profiler/scheduling/scheduler.py +474 -0
- truthound/profiler/scheduling/storage.py +457 -0
- truthound/profiler/scheduling/triggers.py +449 -0
- truthound/profiler/schema.py +603 -0
- truthound/profiler/streaming.py +685 -0
- truthound/profiler/streaming_patterns.py +1354 -0
- truthound/profiler/suite_cli.py +625 -0
- truthound/profiler/suite_config.py +789 -0
- truthound/profiler/suite_export.py +1268 -0
- truthound/profiler/table_profiler.py +547 -0
- truthound/profiler/timeout.py +565 -0
- truthound/profiler/validation.py +1532 -0
- truthound/profiler/visualization/__init__.py +118 -0
- truthound/profiler/visualization/base.py +346 -0
- truthound/profiler/visualization/generator.py +1259 -0
- truthound/profiler/visualization/plotly_renderer.py +811 -0
- truthound/profiler/visualization/renderers.py +669 -0
- truthound/profiler/visualization/sections.py +540 -0
- truthound/profiler/visualization.py +2122 -0
- truthound/profiler/yaml_validation.py +1151 -0
- truthound/py.typed +0 -0
- truthound/ratelimit/__init__.py +248 -0
- truthound/ratelimit/algorithms.py +1108 -0
- truthound/ratelimit/core.py +573 -0
- truthound/ratelimit/integration.py +532 -0
- truthound/ratelimit/limiter.py +663 -0
- truthound/ratelimit/middleware.py +700 -0
- truthound/ratelimit/policy.py +792 -0
- truthound/ratelimit/storage.py +763 -0
- truthound/rbac/__init__.py +340 -0
- truthound/rbac/core.py +976 -0
- truthound/rbac/integration.py +760 -0
- truthound/rbac/manager.py +1052 -0
- truthound/rbac/middleware.py +842 -0
- truthound/rbac/policy.py +954 -0
- truthound/rbac/storage.py +878 -0
- truthound/realtime/__init__.py +141 -0
- truthound/realtime/adapters/__init__.py +43 -0
- truthound/realtime/adapters/base.py +533 -0
- truthound/realtime/adapters/kafka.py +487 -0
- truthound/realtime/adapters/kinesis.py +479 -0
- truthound/realtime/adapters/mock.py +243 -0
- truthound/realtime/base.py +553 -0
- truthound/realtime/factory.py +382 -0
- truthound/realtime/incremental.py +660 -0
- truthound/realtime/processing/__init__.py +67 -0
- truthound/realtime/processing/exactly_once.py +575 -0
- truthound/realtime/processing/state.py +547 -0
- truthound/realtime/processing/windows.py +647 -0
- truthound/realtime/protocols.py +569 -0
- truthound/realtime/streaming.py +605 -0
- truthound/realtime/testing/__init__.py +32 -0
- truthound/realtime/testing/containers.py +615 -0
- truthound/realtime/testing/fixtures.py +484 -0
- truthound/report.py +280 -0
- truthound/reporters/__init__.py +46 -0
- truthound/reporters/_protocols.py +30 -0
- truthound/reporters/base.py +324 -0
- truthound/reporters/ci/__init__.py +66 -0
- truthound/reporters/ci/azure.py +436 -0
- truthound/reporters/ci/base.py +509 -0
- truthound/reporters/ci/bitbucket.py +567 -0
- truthound/reporters/ci/circleci.py +547 -0
- truthound/reporters/ci/detection.py +364 -0
- truthound/reporters/ci/factory.py +182 -0
- truthound/reporters/ci/github.py +388 -0
- truthound/reporters/ci/gitlab.py +471 -0
- truthound/reporters/ci/jenkins.py +525 -0
- truthound/reporters/console_reporter.py +299 -0
- truthound/reporters/factory.py +211 -0
- truthound/reporters/html_reporter.py +524 -0
- truthound/reporters/json_reporter.py +256 -0
- truthound/reporters/markdown_reporter.py +280 -0
- truthound/reporters/sdk/__init__.py +174 -0
- truthound/reporters/sdk/builder.py +558 -0
- truthound/reporters/sdk/mixins.py +1150 -0
- truthound/reporters/sdk/schema.py +1493 -0
- truthound/reporters/sdk/templates.py +666 -0
- truthound/reporters/sdk/testing.py +968 -0
- truthound/scanners.py +170 -0
- truthound/scheduling/__init__.py +122 -0
- truthound/scheduling/cron.py +1136 -0
- truthound/scheduling/presets.py +212 -0
- truthound/schema.py +275 -0
- truthound/secrets/__init__.py +173 -0
- truthound/secrets/base.py +618 -0
- truthound/secrets/cloud.py +682 -0
- truthound/secrets/integration.py +507 -0
- truthound/secrets/manager.py +633 -0
- truthound/secrets/oidc/__init__.py +172 -0
- truthound/secrets/oidc/base.py +902 -0
- truthound/secrets/oidc/credential_provider.py +623 -0
- truthound/secrets/oidc/exchangers.py +1001 -0
- truthound/secrets/oidc/github/__init__.py +110 -0
- truthound/secrets/oidc/github/claims.py +718 -0
- truthound/secrets/oidc/github/enhanced_provider.py +693 -0
- truthound/secrets/oidc/github/trust_policy.py +742 -0
- truthound/secrets/oidc/github/verification.py +723 -0
- truthound/secrets/oidc/github/workflow.py +691 -0
- truthound/secrets/oidc/providers.py +825 -0
- truthound/secrets/providers.py +506 -0
- truthound/secrets/resolver.py +495 -0
- truthound/stores/__init__.py +177 -0
- truthound/stores/backends/__init__.py +18 -0
- truthound/stores/backends/_protocols.py +340 -0
- truthound/stores/backends/azure_blob.py +530 -0
- truthound/stores/backends/concurrent_filesystem.py +915 -0
- truthound/stores/backends/connection_pool.py +1365 -0
- truthound/stores/backends/database.py +743 -0
- truthound/stores/backends/filesystem.py +538 -0
- truthound/stores/backends/gcs.py +399 -0
- truthound/stores/backends/memory.py +354 -0
- truthound/stores/backends/s3.py +434 -0
- truthound/stores/backpressure/__init__.py +84 -0
- truthound/stores/backpressure/base.py +375 -0
- truthound/stores/backpressure/circuit_breaker.py +434 -0
- truthound/stores/backpressure/monitor.py +376 -0
- truthound/stores/backpressure/strategies.py +677 -0
- truthound/stores/base.py +551 -0
- truthound/stores/batching/__init__.py +65 -0
- truthound/stores/batching/base.py +305 -0
- truthound/stores/batching/buffer.py +370 -0
- truthound/stores/batching/store.py +248 -0
- truthound/stores/batching/writer.py +521 -0
- truthound/stores/caching/__init__.py +60 -0
- truthound/stores/caching/backends.py +684 -0
- truthound/stores/caching/base.py +356 -0
- truthound/stores/caching/store.py +305 -0
- truthound/stores/compression/__init__.py +193 -0
- truthound/stores/compression/adaptive.py +694 -0
- truthound/stores/compression/base.py +514 -0
- truthound/stores/compression/pipeline.py +868 -0
- truthound/stores/compression/providers.py +672 -0
- truthound/stores/compression/streaming.py +832 -0
- truthound/stores/concurrency/__init__.py +81 -0
- truthound/stores/concurrency/atomic.py +556 -0
- truthound/stores/concurrency/index.py +775 -0
- truthound/stores/concurrency/locks.py +576 -0
- truthound/stores/concurrency/manager.py +482 -0
- truthound/stores/encryption/__init__.py +297 -0
- truthound/stores/encryption/base.py +952 -0
- truthound/stores/encryption/keys.py +1191 -0
- truthound/stores/encryption/pipeline.py +903 -0
- truthound/stores/encryption/providers.py +953 -0
- truthound/stores/encryption/streaming.py +950 -0
- truthound/stores/expectations.py +227 -0
- truthound/stores/factory.py +246 -0
- truthound/stores/migration/__init__.py +75 -0
- truthound/stores/migration/base.py +480 -0
- truthound/stores/migration/manager.py +347 -0
- truthound/stores/migration/registry.py +382 -0
- truthound/stores/migration/store.py +559 -0
- truthound/stores/observability/__init__.py +106 -0
- truthound/stores/observability/audit.py +718 -0
- truthound/stores/observability/config.py +270 -0
- truthound/stores/observability/factory.py +208 -0
- truthound/stores/observability/metrics.py +636 -0
- truthound/stores/observability/protocols.py +410 -0
- truthound/stores/observability/store.py +570 -0
- truthound/stores/observability/tracing.py +784 -0
- truthound/stores/replication/__init__.py +76 -0
- truthound/stores/replication/base.py +260 -0
- truthound/stores/replication/monitor.py +269 -0
- truthound/stores/replication/store.py +439 -0
- truthound/stores/replication/syncer.py +391 -0
- truthound/stores/results.py +359 -0
- truthound/stores/retention/__init__.py +77 -0
- truthound/stores/retention/base.py +378 -0
- truthound/stores/retention/policies.py +621 -0
- truthound/stores/retention/scheduler.py +279 -0
- truthound/stores/retention/store.py +526 -0
- truthound/stores/streaming/__init__.py +138 -0
- truthound/stores/streaming/base.py +801 -0
- truthound/stores/streaming/database.py +984 -0
- truthound/stores/streaming/filesystem.py +719 -0
- truthound/stores/streaming/reader.py +629 -0
- truthound/stores/streaming/s3.py +843 -0
- truthound/stores/streaming/writer.py +790 -0
- truthound/stores/tiering/__init__.py +108 -0
- truthound/stores/tiering/base.py +462 -0
- truthound/stores/tiering/manager.py +249 -0
- truthound/stores/tiering/policies.py +692 -0
- truthound/stores/tiering/store.py +526 -0
- truthound/stores/versioning/__init__.py +56 -0
- truthound/stores/versioning/base.py +376 -0
- truthound/stores/versioning/store.py +660 -0
- truthound/stores/versioning/strategies.py +353 -0
- truthound/types.py +56 -0
- truthound/validators/__init__.py +774 -0
- truthound/validators/aggregate/__init__.py +27 -0
- truthound/validators/aggregate/central.py +116 -0
- truthound/validators/aggregate/extremes.py +116 -0
- truthound/validators/aggregate/spread.py +118 -0
- truthound/validators/aggregate/sum.py +64 -0
- truthound/validators/aggregate/type.py +78 -0
- truthound/validators/anomaly/__init__.py +93 -0
- truthound/validators/anomaly/base.py +431 -0
- truthound/validators/anomaly/ml_based.py +1190 -0
- truthound/validators/anomaly/multivariate.py +647 -0
- truthound/validators/anomaly/statistical.py +599 -0
- truthound/validators/base.py +1089 -0
- truthound/validators/business_rule/__init__.py +46 -0
- truthound/validators/business_rule/base.py +147 -0
- truthound/validators/business_rule/checksum.py +509 -0
- truthound/validators/business_rule/financial.py +526 -0
- truthound/validators/cache.py +733 -0
- truthound/validators/completeness/__init__.py +39 -0
- truthound/validators/completeness/conditional.py +73 -0
- truthound/validators/completeness/default.py +98 -0
- truthound/validators/completeness/empty.py +103 -0
- truthound/validators/completeness/nan.py +337 -0
- truthound/validators/completeness/null.py +152 -0
- truthound/validators/cross_table/__init__.py +17 -0
- truthound/validators/cross_table/aggregate.py +333 -0
- truthound/validators/cross_table/row_count.py +122 -0
- truthound/validators/datetime/__init__.py +29 -0
- truthound/validators/datetime/format.py +78 -0
- truthound/validators/datetime/freshness.py +269 -0
- truthound/validators/datetime/order.py +73 -0
- truthound/validators/datetime/parseable.py +185 -0
- truthound/validators/datetime/range.py +202 -0
- truthound/validators/datetime/timezone.py +69 -0
- truthound/validators/distribution/__init__.py +49 -0
- truthound/validators/distribution/distribution.py +128 -0
- truthound/validators/distribution/monotonic.py +119 -0
- truthound/validators/distribution/outlier.py +178 -0
- truthound/validators/distribution/quantile.py +80 -0
- truthound/validators/distribution/range.py +254 -0
- truthound/validators/distribution/set.py +125 -0
- truthound/validators/distribution/statistical.py +459 -0
- truthound/validators/drift/__init__.py +79 -0
- truthound/validators/drift/base.py +427 -0
- truthound/validators/drift/multi_feature.py +401 -0
- truthound/validators/drift/numeric.py +395 -0
- truthound/validators/drift/psi.py +446 -0
- truthound/validators/drift/statistical.py +510 -0
- truthound/validators/enterprise.py +1658 -0
- truthound/validators/geospatial/__init__.py +80 -0
- truthound/validators/geospatial/base.py +97 -0
- truthound/validators/geospatial/boundary.py +238 -0
- truthound/validators/geospatial/coordinate.py +351 -0
- truthound/validators/geospatial/distance.py +399 -0
- truthound/validators/geospatial/polygon.py +665 -0
- truthound/validators/i18n/__init__.py +308 -0
- truthound/validators/i18n/bidi.py +571 -0
- truthound/validators/i18n/catalogs.py +570 -0
- truthound/validators/i18n/dialects.py +763 -0
- truthound/validators/i18n/extended_catalogs.py +549 -0
- truthound/validators/i18n/formatting.py +1434 -0
- truthound/validators/i18n/loader.py +1020 -0
- truthound/validators/i18n/messages.py +521 -0
- truthound/validators/i18n/plural.py +683 -0
- truthound/validators/i18n/protocols.py +855 -0
- truthound/validators/i18n/tms.py +1162 -0
- truthound/validators/localization/__init__.py +53 -0
- truthound/validators/localization/base.py +122 -0
- truthound/validators/localization/chinese.py +362 -0
- truthound/validators/localization/japanese.py +275 -0
- truthound/validators/localization/korean.py +524 -0
- truthound/validators/memory/__init__.py +94 -0
- truthound/validators/memory/approximate_knn.py +506 -0
- truthound/validators/memory/base.py +547 -0
- truthound/validators/memory/sgd_online.py +719 -0
- truthound/validators/memory/streaming_ecdf.py +753 -0
- truthound/validators/ml_feature/__init__.py +54 -0
- truthound/validators/ml_feature/base.py +249 -0
- truthound/validators/ml_feature/correlation.py +299 -0
- truthound/validators/ml_feature/leakage.py +344 -0
- truthound/validators/ml_feature/null_impact.py +270 -0
- truthound/validators/ml_feature/scale.py +264 -0
- truthound/validators/multi_column/__init__.py +89 -0
- truthound/validators/multi_column/arithmetic.py +284 -0
- truthound/validators/multi_column/base.py +231 -0
- truthound/validators/multi_column/comparison.py +273 -0
- truthound/validators/multi_column/consistency.py +312 -0
- truthound/validators/multi_column/statistical.py +299 -0
- truthound/validators/optimization/__init__.py +164 -0
- truthound/validators/optimization/aggregation.py +563 -0
- truthound/validators/optimization/covariance.py +556 -0
- truthound/validators/optimization/geo.py +626 -0
- truthound/validators/optimization/graph.py +587 -0
- truthound/validators/optimization/orchestrator.py +970 -0
- truthound/validators/optimization/profiling.py +1312 -0
- truthound/validators/privacy/__init__.py +223 -0
- truthound/validators/privacy/base.py +635 -0
- truthound/validators/privacy/ccpa.py +670 -0
- truthound/validators/privacy/gdpr.py +728 -0
- truthound/validators/privacy/global_patterns.py +604 -0
- truthound/validators/privacy/plugins.py +867 -0
- truthound/validators/profiling/__init__.py +52 -0
- truthound/validators/profiling/base.py +175 -0
- truthound/validators/profiling/cardinality.py +312 -0
- truthound/validators/profiling/entropy.py +391 -0
- truthound/validators/profiling/frequency.py +455 -0
- truthound/validators/pushdown_support.py +660 -0
- truthound/validators/query/__init__.py +91 -0
- truthound/validators/query/aggregate.py +346 -0
- truthound/validators/query/base.py +246 -0
- truthound/validators/query/column.py +249 -0
- truthound/validators/query/expression.py +274 -0
- truthound/validators/query/result.py +323 -0
- truthound/validators/query/row_count.py +264 -0
- truthound/validators/referential/__init__.py +80 -0
- truthound/validators/referential/base.py +395 -0
- truthound/validators/referential/cascade.py +391 -0
- truthound/validators/referential/circular.py +563 -0
- truthound/validators/referential/foreign_key.py +624 -0
- truthound/validators/referential/orphan.py +485 -0
- truthound/validators/registry.py +112 -0
- truthound/validators/schema/__init__.py +41 -0
- truthound/validators/schema/column_count.py +142 -0
- truthound/validators/schema/column_exists.py +80 -0
- truthound/validators/schema/column_order.py +82 -0
- truthound/validators/schema/column_pair.py +85 -0
- truthound/validators/schema/column_pair_set.py +195 -0
- truthound/validators/schema/column_type.py +94 -0
- truthound/validators/schema/multi_column.py +53 -0
- truthound/validators/schema/multi_column_aggregate.py +175 -0
- truthound/validators/schema/referential.py +274 -0
- truthound/validators/schema/table_schema.py +91 -0
- truthound/validators/schema_validator.py +219 -0
- truthound/validators/sdk/__init__.py +250 -0
- truthound/validators/sdk/builder.py +680 -0
- truthound/validators/sdk/decorators.py +474 -0
- truthound/validators/sdk/enterprise/__init__.py +211 -0
- truthound/validators/sdk/enterprise/docs.py +725 -0
- truthound/validators/sdk/enterprise/fuzzing.py +659 -0
- truthound/validators/sdk/enterprise/licensing.py +709 -0
- truthound/validators/sdk/enterprise/manager.py +543 -0
- truthound/validators/sdk/enterprise/resources.py +628 -0
- truthound/validators/sdk/enterprise/sandbox.py +766 -0
- truthound/validators/sdk/enterprise/signing.py +603 -0
- truthound/validators/sdk/enterprise/templates.py +865 -0
- truthound/validators/sdk/enterprise/versioning.py +659 -0
- truthound/validators/sdk/templates.py +757 -0
- truthound/validators/sdk/testing.py +807 -0
- truthound/validators/security/__init__.py +181 -0
- truthound/validators/security/redos/__init__.py +182 -0
- truthound/validators/security/redos/core.py +861 -0
- truthound/validators/security/redos/cpu_monitor.py +593 -0
- truthound/validators/security/redos/cve_database.py +791 -0
- truthound/validators/security/redos/ml/__init__.py +155 -0
- truthound/validators/security/redos/ml/base.py +785 -0
- truthound/validators/security/redos/ml/datasets.py +618 -0
- truthound/validators/security/redos/ml/features.py +359 -0
- truthound/validators/security/redos/ml/models.py +1000 -0
- truthound/validators/security/redos/ml/predictor.py +507 -0
- truthound/validators/security/redos/ml/storage.py +632 -0
- truthound/validators/security/redos/ml/training.py +571 -0
- truthound/validators/security/redos/ml_analyzer.py +937 -0
- truthound/validators/security/redos/optimizer.py +674 -0
- truthound/validators/security/redos/profiler.py +682 -0
- truthound/validators/security/redos/re2_engine.py +709 -0
- truthound/validators/security/redos.py +886 -0
- truthound/validators/security/sql_security.py +1247 -0
- truthound/validators/streaming/__init__.py +126 -0
- truthound/validators/streaming/base.py +292 -0
- truthound/validators/streaming/completeness.py +210 -0
- truthound/validators/streaming/mixin.py +575 -0
- truthound/validators/streaming/range.py +308 -0
- truthound/validators/streaming/sources.py +846 -0
- truthound/validators/string/__init__.py +57 -0
- truthound/validators/string/casing.py +158 -0
- truthound/validators/string/charset.py +96 -0
- truthound/validators/string/format.py +501 -0
- truthound/validators/string/json.py +77 -0
- truthound/validators/string/json_schema.py +184 -0
- truthound/validators/string/length.py +104 -0
- truthound/validators/string/like_pattern.py +237 -0
- truthound/validators/string/regex.py +202 -0
- truthound/validators/string/regex_extended.py +435 -0
- truthound/validators/table/__init__.py +88 -0
- truthound/validators/table/base.py +78 -0
- truthound/validators/table/column_count.py +198 -0
- truthound/validators/table/freshness.py +362 -0
- truthound/validators/table/row_count.py +251 -0
- truthound/validators/table/schema.py +333 -0
- truthound/validators/table/size.py +285 -0
- truthound/validators/timeout/__init__.py +102 -0
- truthound/validators/timeout/advanced/__init__.py +247 -0
- truthound/validators/timeout/advanced/circuit_breaker.py +675 -0
- truthound/validators/timeout/advanced/prediction.py +773 -0
- truthound/validators/timeout/advanced/priority.py +618 -0
- truthound/validators/timeout/advanced/redis_backend.py +770 -0
- truthound/validators/timeout/advanced/retry.py +721 -0
- truthound/validators/timeout/advanced/sampling.py +788 -0
- truthound/validators/timeout/advanced/sla.py +661 -0
- truthound/validators/timeout/advanced/telemetry.py +804 -0
- truthound/validators/timeout/cascade.py +477 -0
- truthound/validators/timeout/deadline.py +657 -0
- truthound/validators/timeout/degradation.py +525 -0
- truthound/validators/timeout/distributed.py +597 -0
- truthound/validators/timeseries/__init__.py +89 -0
- truthound/validators/timeseries/base.py +326 -0
- truthound/validators/timeseries/completeness.py +617 -0
- truthound/validators/timeseries/gap.py +485 -0
- truthound/validators/timeseries/monotonic.py +310 -0
- truthound/validators/timeseries/seasonality.py +422 -0
- truthound/validators/timeseries/trend.py +510 -0
- truthound/validators/uniqueness/__init__.py +59 -0
- truthound/validators/uniqueness/approximate.py +475 -0
- truthound/validators/uniqueness/distinct_values.py +253 -0
- truthound/validators/uniqueness/duplicate.py +118 -0
- truthound/validators/uniqueness/primary_key.py +140 -0
- truthound/validators/uniqueness/unique.py +191 -0
- truthound/validators/uniqueness/within_record.py +599 -0
- truthound/validators/utils.py +756 -0
- truthound-1.0.8.dist-info/METADATA +474 -0
- truthound-1.0.8.dist-info/RECORD +877 -0
- truthound-1.0.8.dist-info/WHEEL +4 -0
- truthound-1.0.8.dist-info/entry_points.txt +2 -0
- truthound-1.0.8.dist-info/licenses/LICENSE +190 -0
truthound/__init__.py
ADDED
|
@@ -0,0 +1,162 @@
|
|
|
1
|
+
"""Truthound - Zero-Configuration Data Quality Framework Powered by Polars."""
|
|
2
|
+
|
|
3
|
+
from truthound.api import check, mask, profile, scan
|
|
4
|
+
from truthound.decorators import validator
|
|
5
|
+
from truthound.drift import compare
|
|
6
|
+
from truthound.report import Report
|
|
7
|
+
from truthound.schema import Schema, learn
|
|
8
|
+
|
|
9
|
+
# Data sources and execution engines (Phase 5)
|
|
10
|
+
from truthound import datasources
|
|
11
|
+
from truthound import execution
|
|
12
|
+
from truthound.datasources import get_datasource, get_sql_datasource
|
|
13
|
+
|
|
14
|
+
# Checkpoint and CI/CD integration (Phase 6)
|
|
15
|
+
from truthound import checkpoint
|
|
16
|
+
|
|
17
|
+
# Auto-profiling and rule generation (Phase 7)
|
|
18
|
+
from truthound import profiler
|
|
19
|
+
from truthound.profiler import (
|
|
20
|
+
DataProfiler,
|
|
21
|
+
profile_file,
|
|
22
|
+
profile_dataframe,
|
|
23
|
+
generate_suite,
|
|
24
|
+
ValidationSuite,
|
|
25
|
+
TableProfile,
|
|
26
|
+
ColumnProfile,
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
# Data Docs - HTML Reports and Dashboard (Phase 8)
|
|
30
|
+
from truthound import datadocs
|
|
31
|
+
from truthound.datadocs import (
|
|
32
|
+
HTMLReportBuilder,
|
|
33
|
+
generate_html_report,
|
|
34
|
+
generate_report_from_file,
|
|
35
|
+
ReportConfig,
|
|
36
|
+
ReportTheme,
|
|
37
|
+
ChartLibrary,
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
# Advanced Features - ML, Lineage, Realtime (Phase 10)
|
|
41
|
+
from truthound import ml
|
|
42
|
+
from truthound import lineage
|
|
43
|
+
from truthound import realtime
|
|
44
|
+
|
|
45
|
+
# ML exports
|
|
46
|
+
from truthound.ml import (
|
|
47
|
+
ModelRegistry,
|
|
48
|
+
AnomalyDetector,
|
|
49
|
+
MLDriftDetector,
|
|
50
|
+
RuleLearner,
|
|
51
|
+
ModelType,
|
|
52
|
+
ModelState,
|
|
53
|
+
)
|
|
54
|
+
from truthound.ml.anomaly_models import (
|
|
55
|
+
ZScoreAnomalyDetector,
|
|
56
|
+
IQRAnomalyDetector,
|
|
57
|
+
IsolationForestDetector,
|
|
58
|
+
EnsembleAnomalyDetector,
|
|
59
|
+
)
|
|
60
|
+
from truthound.ml.drift_detection import (
|
|
61
|
+
DistributionDriftDetector,
|
|
62
|
+
FeatureDriftDetector,
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
# Lineage exports
|
|
66
|
+
from truthound.lineage import (
|
|
67
|
+
LineageGraph,
|
|
68
|
+
LineageNode,
|
|
69
|
+
LineageEdge,
|
|
70
|
+
LineageTracker,
|
|
71
|
+
ImpactAnalyzer,
|
|
72
|
+
NodeType,
|
|
73
|
+
EdgeType,
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
# Realtime exports
|
|
77
|
+
from truthound.realtime import (
|
|
78
|
+
StreamingValidator,
|
|
79
|
+
IncrementalValidator,
|
|
80
|
+
StreamingConfig,
|
|
81
|
+
CheckpointManager,
|
|
82
|
+
MemoryStateStore,
|
|
83
|
+
BatchResult,
|
|
84
|
+
StreamingMode,
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
# Version: Single source of truth from pyproject.toml
|
|
88
|
+
try:
|
|
89
|
+
from importlib.metadata import version, PackageNotFoundError
|
|
90
|
+
|
|
91
|
+
__version__ = version("truthound")
|
|
92
|
+
except PackageNotFoundError:
|
|
93
|
+
# Package not installed (development mode)
|
|
94
|
+
__version__ = "0.0.0.dev"
|
|
95
|
+
__all__ = [
|
|
96
|
+
# Core API
|
|
97
|
+
"check",
|
|
98
|
+
"scan",
|
|
99
|
+
"mask",
|
|
100
|
+
"profile",
|
|
101
|
+
"learn",
|
|
102
|
+
"compare",
|
|
103
|
+
"validator",
|
|
104
|
+
"Report",
|
|
105
|
+
"Schema",
|
|
106
|
+
# Phase 5: Data sources
|
|
107
|
+
"datasources",
|
|
108
|
+
"execution",
|
|
109
|
+
"get_datasource",
|
|
110
|
+
"get_sql_datasource",
|
|
111
|
+
# Phase 6: Checkpoint & CI/CD
|
|
112
|
+
"checkpoint",
|
|
113
|
+
# Phase 7: Auto-profiling & Rule Generation
|
|
114
|
+
"profiler",
|
|
115
|
+
"DataProfiler",
|
|
116
|
+
"profile_file",
|
|
117
|
+
"profile_dataframe",
|
|
118
|
+
"generate_suite",
|
|
119
|
+
"ValidationSuite",
|
|
120
|
+
"TableProfile",
|
|
121
|
+
"ColumnProfile",
|
|
122
|
+
# Phase 8: Data Docs (HTML Reports & Dashboard)
|
|
123
|
+
"datadocs",
|
|
124
|
+
"HTMLReportBuilder",
|
|
125
|
+
"generate_html_report",
|
|
126
|
+
"generate_report_from_file",
|
|
127
|
+
"ReportConfig",
|
|
128
|
+
"ReportTheme",
|
|
129
|
+
"ChartLibrary",
|
|
130
|
+
# Phase 10: Advanced Features - ML
|
|
131
|
+
"ml",
|
|
132
|
+
"ModelRegistry",
|
|
133
|
+
"AnomalyDetector",
|
|
134
|
+
"MLDriftDetector",
|
|
135
|
+
"RuleLearner",
|
|
136
|
+
"ModelType",
|
|
137
|
+
"ModelState",
|
|
138
|
+
"ZScoreAnomalyDetector",
|
|
139
|
+
"IQRAnomalyDetector",
|
|
140
|
+
"IsolationForestDetector",
|
|
141
|
+
"EnsembleAnomalyDetector",
|
|
142
|
+
"DistributionDriftDetector",
|
|
143
|
+
"FeatureDriftDetector",
|
|
144
|
+
# Phase 10: Advanced Features - Lineage
|
|
145
|
+
"lineage",
|
|
146
|
+
"LineageGraph",
|
|
147
|
+
"LineageNode",
|
|
148
|
+
"LineageEdge",
|
|
149
|
+
"LineageTracker",
|
|
150
|
+
"ImpactAnalyzer",
|
|
151
|
+
"NodeType",
|
|
152
|
+
"EdgeType",
|
|
153
|
+
# Phase 10: Advanced Features - Realtime
|
|
154
|
+
"realtime",
|
|
155
|
+
"StreamingValidator",
|
|
156
|
+
"IncrementalValidator",
|
|
157
|
+
"StreamingConfig",
|
|
158
|
+
"CheckpointManager",
|
|
159
|
+
"MemoryStateStore",
|
|
160
|
+
"BatchResult",
|
|
161
|
+
"StreamingMode",
|
|
162
|
+
]
|
truthound/adapters.py
ADDED
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
"""Input adapters for converting various data formats to Polars LazyFrame."""
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import Any
|
|
5
|
+
|
|
6
|
+
import polars as pl
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def to_lazyframe(data: Any) -> pl.LazyFrame:
|
|
10
|
+
"""Convert various input formats to a Polars LazyFrame.
|
|
11
|
+
|
|
12
|
+
Supports:
|
|
13
|
+
- str: File path (CSV, JSON, Parquet)
|
|
14
|
+
- pl.DataFrame: Polars DataFrame
|
|
15
|
+
- pl.LazyFrame: Polars LazyFrame (passthrough)
|
|
16
|
+
- dict: Python dictionary
|
|
17
|
+
- pd.DataFrame: pandas DataFrame
|
|
18
|
+
|
|
19
|
+
Args:
|
|
20
|
+
data: Input data in any supported format.
|
|
21
|
+
|
|
22
|
+
Returns:
|
|
23
|
+
Polars LazyFrame for lazy evaluation.
|
|
24
|
+
|
|
25
|
+
Raises:
|
|
26
|
+
ValueError: If the input format is not supported.
|
|
27
|
+
FileNotFoundError: If a file path is provided but doesn't exist.
|
|
28
|
+
"""
|
|
29
|
+
# Already a LazyFrame
|
|
30
|
+
if isinstance(data, pl.LazyFrame):
|
|
31
|
+
return data
|
|
32
|
+
|
|
33
|
+
# Polars DataFrame
|
|
34
|
+
if isinstance(data, pl.DataFrame):
|
|
35
|
+
return data.lazy()
|
|
36
|
+
|
|
37
|
+
# Dictionary
|
|
38
|
+
if isinstance(data, dict):
|
|
39
|
+
return pl.DataFrame(data).lazy()
|
|
40
|
+
|
|
41
|
+
# File path
|
|
42
|
+
if isinstance(data, str):
|
|
43
|
+
return _load_file(data)
|
|
44
|
+
|
|
45
|
+
# Try pandas DataFrame
|
|
46
|
+
if _is_pandas_dataframe(data):
|
|
47
|
+
return pl.from_pandas(data).lazy()
|
|
48
|
+
|
|
49
|
+
raise ValueError(
|
|
50
|
+
f"Unsupported input type: {type(data).__name__}. "
|
|
51
|
+
"Supported types: str (file path), pl.DataFrame, pl.LazyFrame, dict, pd.DataFrame"
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def _load_file(path: str) -> pl.LazyFrame:
|
|
56
|
+
"""Load a file into a Polars LazyFrame based on extension.
|
|
57
|
+
|
|
58
|
+
Args:
|
|
59
|
+
path: Path to the file.
|
|
60
|
+
|
|
61
|
+
Returns:
|
|
62
|
+
Polars LazyFrame.
|
|
63
|
+
|
|
64
|
+
Raises:
|
|
65
|
+
FileNotFoundError: If the file doesn't exist.
|
|
66
|
+
ValueError: If the file extension is not supported.
|
|
67
|
+
"""
|
|
68
|
+
file_path = Path(path)
|
|
69
|
+
|
|
70
|
+
if not file_path.exists():
|
|
71
|
+
raise FileNotFoundError(f"File not found: {path}")
|
|
72
|
+
|
|
73
|
+
suffix = file_path.suffix.lower()
|
|
74
|
+
|
|
75
|
+
if suffix == ".csv":
|
|
76
|
+
return pl.scan_csv(path)
|
|
77
|
+
elif suffix == ".json":
|
|
78
|
+
# JSON doesn't have a scan_ method, read eagerly then convert to lazy
|
|
79
|
+
return pl.read_json(path).lazy()
|
|
80
|
+
elif suffix == ".parquet":
|
|
81
|
+
return pl.scan_parquet(path)
|
|
82
|
+
elif suffix == ".ndjson" or suffix == ".jsonl":
|
|
83
|
+
return pl.scan_ndjson(path)
|
|
84
|
+
else:
|
|
85
|
+
raise ValueError(
|
|
86
|
+
f"Unsupported file extension: {suffix}. "
|
|
87
|
+
"Supported extensions: .csv, .json, .parquet, .ndjson, .jsonl"
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def _is_pandas_dataframe(obj: Any) -> bool:
|
|
92
|
+
"""Check if an object is a pandas DataFrame without importing pandas.
|
|
93
|
+
|
|
94
|
+
Args:
|
|
95
|
+
obj: Object to check.
|
|
96
|
+
|
|
97
|
+
Returns:
|
|
98
|
+
True if the object is a pandas DataFrame.
|
|
99
|
+
"""
|
|
100
|
+
return type(obj).__name__ == "DataFrame" and type(obj).__module__.startswith("pandas")
|
truthound/api.py
ADDED
|
@@ -0,0 +1,365 @@
|
|
|
1
|
+
"""Main API functions for Truthound."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import TYPE_CHECKING, Any
|
|
7
|
+
|
|
8
|
+
import polars as pl
|
|
9
|
+
|
|
10
|
+
from truthound.adapters import to_lazyframe
|
|
11
|
+
from truthound.maskers import mask_data
|
|
12
|
+
from truthound.profiler import profile_data
|
|
13
|
+
from truthound.report import PIIReport, ProfileReport, Report
|
|
14
|
+
from truthound.scanners import scan_pii
|
|
15
|
+
from truthound.types import Severity
|
|
16
|
+
from truthound.validators import BUILTIN_VALIDATORS, Validator, get_validator
|
|
17
|
+
|
|
18
|
+
if TYPE_CHECKING:
|
|
19
|
+
from truthound.schema import Schema
|
|
20
|
+
from truthound.datasources.base import BaseDataSource
|
|
21
|
+
from truthound.execution.base import BaseExecutionEngine
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def check(
|
|
25
|
+
data: Any = None,
|
|
26
|
+
source: "BaseDataSource | None" = None,
|
|
27
|
+
validators: list[str | Validator] | None = None,
|
|
28
|
+
min_severity: str | Severity | None = None,
|
|
29
|
+
schema: str | Path | Schema | None = None,
|
|
30
|
+
auto_schema: bool = False,
|
|
31
|
+
use_engine: bool = False,
|
|
32
|
+
parallel: bool = False,
|
|
33
|
+
max_workers: int | None = None,
|
|
34
|
+
pushdown: bool | None = None,
|
|
35
|
+
) -> Report:
|
|
36
|
+
"""Perform data quality validation on the input data.
|
|
37
|
+
|
|
38
|
+
This is the main entry point for data quality validation. It accepts various
|
|
39
|
+
input types and automatically converts them to Polars LazyFrame internally.
|
|
40
|
+
|
|
41
|
+
Supported Input Types:
|
|
42
|
+
- str: File path (CSV, JSON, Parquet)
|
|
43
|
+
- pl.DataFrame: Polars DataFrame (converted to LazyFrame)
|
|
44
|
+
- pl.LazyFrame: Polars LazyFrame (used directly)
|
|
45
|
+
- pd.DataFrame: pandas DataFrame (converted via Polars)
|
|
46
|
+
- dict: Python dictionary (converted to DataFrame then LazyFrame)
|
|
47
|
+
- BaseDataSource: DataSource instance for SQL databases, Spark, etc.
|
|
48
|
+
|
|
49
|
+
Note:
|
|
50
|
+
Individual Validator classes only accept pl.LazyFrame directly.
|
|
51
|
+
This API handles the conversion for convenience. If using validators
|
|
52
|
+
directly, use ``truthound.adapters.to_lazyframe()`` to convert your data.
|
|
53
|
+
|
|
54
|
+
Args:
|
|
55
|
+
data: Input data (file path, DataFrame, dict, etc.)
|
|
56
|
+
source: Optional DataSource instance. If provided, data is ignored.
|
|
57
|
+
This enables validation on SQL databases, Spark, etc.
|
|
58
|
+
validators: Optional list of validator names or Validator instances.
|
|
59
|
+
If None, all built-in validators are used.
|
|
60
|
+
min_severity: Minimum severity level to include in results.
|
|
61
|
+
Can be "low", "medium", "high", or "critical".
|
|
62
|
+
schema: Optional schema for validation. Can be:
|
|
63
|
+
- Path to a schema YAML file
|
|
64
|
+
- Schema object from th.learn()
|
|
65
|
+
When provided, schema validation runs in addition to other validators.
|
|
66
|
+
auto_schema: If True, automatically learns and caches a schema from the data.
|
|
67
|
+
On subsequent runs with the same data source, validates against
|
|
68
|
+
the cached schema. This enables true "zero-config" validation.
|
|
69
|
+
use_engine: If True, uses execution engine for validation (experimental).
|
|
70
|
+
Currently validators still use Polars LazyFrame fallback.
|
|
71
|
+
parallel: If True, uses DAG-based parallel execution for validators.
|
|
72
|
+
Validators are grouped by dependency and executed in parallel
|
|
73
|
+
when possible. This can significantly improve performance for
|
|
74
|
+
large datasets with many validators.
|
|
75
|
+
max_workers: Maximum number of worker threads for parallel execution.
|
|
76
|
+
Only used when parallel=True. Defaults to min(32, cpu_count + 4).
|
|
77
|
+
pushdown: If True, enables query pushdown for SQL data sources.
|
|
78
|
+
Validation logic is executed server-side when possible,
|
|
79
|
+
reducing data transfer and improving performance.
|
|
80
|
+
If None (default), auto-detects based on data source type.
|
|
81
|
+
|
|
82
|
+
Returns:
|
|
83
|
+
Report containing all validation issues found.
|
|
84
|
+
|
|
85
|
+
Example:
|
|
86
|
+
>>> import truthound as th
|
|
87
|
+
>>> report = th.check("data.csv")
|
|
88
|
+
>>> print(report)
|
|
89
|
+
|
|
90
|
+
>>> # With specific validators
|
|
91
|
+
>>> report = th.check(df, validators=["null", "duplicate"])
|
|
92
|
+
|
|
93
|
+
>>> # Filter by severity
|
|
94
|
+
>>> report = th.check(df, min_severity="medium")
|
|
95
|
+
|
|
96
|
+
>>> # With schema validation
|
|
97
|
+
>>> schema = th.learn("baseline.csv")
|
|
98
|
+
>>> report = th.check("new_data.csv", schema=schema)
|
|
99
|
+
|
|
100
|
+
>>> # Zero-config with auto schema caching
|
|
101
|
+
>>> report = th.check("data.csv", auto_schema=True)
|
|
102
|
+
|
|
103
|
+
>>> # Using DataSource for SQL database
|
|
104
|
+
>>> from truthound.datasources.sql import PostgreSQLDataSource
|
|
105
|
+
>>> source = PostgreSQLDataSource(
|
|
106
|
+
... table="users",
|
|
107
|
+
... host="localhost",
|
|
108
|
+
... database="mydb",
|
|
109
|
+
... user="postgres",
|
|
110
|
+
... )
|
|
111
|
+
>>> report = th.check(source=source, validators=["null", "duplicate"])
|
|
112
|
+
|
|
113
|
+
>>> # Using auto-detection with DataSource
|
|
114
|
+
>>> from truthound.datasources import get_datasource
|
|
115
|
+
>>> source = get_datasource(spark_df) # PySpark DataFrame
|
|
116
|
+
>>> if source.needs_sampling():
|
|
117
|
+
... source = source.sample(n=100_000)
|
|
118
|
+
>>> report = th.check(source=source)
|
|
119
|
+
|
|
120
|
+
>>> # With query pushdown for SQL data sources
|
|
121
|
+
>>> from truthound.datasources.sql import PostgreSQLDataSource
|
|
122
|
+
>>> source = PostgreSQLDataSource(table="users", host="localhost", database="mydb")
|
|
123
|
+
>>> report = th.check(source=source, pushdown=True) # Execute validations server-side
|
|
124
|
+
"""
|
|
125
|
+
# Handle DataSource if provided
|
|
126
|
+
use_pushdown = False
|
|
127
|
+
sql_source = None
|
|
128
|
+
|
|
129
|
+
if source is not None:
|
|
130
|
+
from truthound.datasources.base import BaseDataSource
|
|
131
|
+
from truthound.datasources._protocols import DataSourceCapability
|
|
132
|
+
|
|
133
|
+
if not isinstance(source, BaseDataSource):
|
|
134
|
+
raise ValueError(
|
|
135
|
+
f"source must be a DataSource instance, got {type(source).__name__}"
|
|
136
|
+
)
|
|
137
|
+
|
|
138
|
+
# Determine if pushdown should be used
|
|
139
|
+
if pushdown is True:
|
|
140
|
+
use_pushdown = True
|
|
141
|
+
elif pushdown is None:
|
|
142
|
+
# Auto-detect: use pushdown for SQL sources with SQL_PUSHDOWN capability
|
|
143
|
+
use_pushdown = DataSourceCapability.SQL_PUSHDOWN in source.capabilities
|
|
144
|
+
|
|
145
|
+
if use_pushdown:
|
|
146
|
+
# Verify it's actually a SQL data source
|
|
147
|
+
try:
|
|
148
|
+
from truthound.datasources.sql.base import BaseSQLDataSource
|
|
149
|
+
if isinstance(source, BaseSQLDataSource):
|
|
150
|
+
sql_source = source
|
|
151
|
+
else:
|
|
152
|
+
use_pushdown = False
|
|
153
|
+
except ImportError:
|
|
154
|
+
use_pushdown = False
|
|
155
|
+
|
|
156
|
+
# Check size limits and warn if needed (only if not using pushdown)
|
|
157
|
+
if not use_pushdown and source.needs_sampling():
|
|
158
|
+
import warnings
|
|
159
|
+
warnings.warn(
|
|
160
|
+
f"Data source '{source.name}' has {source.row_count:,} rows, "
|
|
161
|
+
f"which exceeds the limit of {source.config.max_rows:,}. "
|
|
162
|
+
"Consider using source.sample() for better performance.",
|
|
163
|
+
UserWarning,
|
|
164
|
+
)
|
|
165
|
+
|
|
166
|
+
source_name = source.name
|
|
167
|
+
else:
|
|
168
|
+
if data is None:
|
|
169
|
+
raise ValueError("Either 'data' or 'source' must be provided")
|
|
170
|
+
|
|
171
|
+
# Convert input to LazyFrame (legacy path)
|
|
172
|
+
source_name = str(data) if isinstance(data, str) else type(data).__name__
|
|
173
|
+
|
|
174
|
+
# For pushdown path, get metadata without loading all data
|
|
175
|
+
if use_pushdown and sql_source is not None:
|
|
176
|
+
row_count = sql_source.row_count or 0
|
|
177
|
+
column_count = len(sql_source.columns)
|
|
178
|
+
lf = None # Will be loaded lazily if needed for non-pushdown validators
|
|
179
|
+
else:
|
|
180
|
+
# Standard path: load data into Polars
|
|
181
|
+
if source is not None:
|
|
182
|
+
lf = source.to_polars_lazyframe()
|
|
183
|
+
else:
|
|
184
|
+
lf = to_lazyframe(data)
|
|
185
|
+
|
|
186
|
+
# Collect metadata
|
|
187
|
+
polars_schema = lf.collect_schema()
|
|
188
|
+
df_collected = lf.collect()
|
|
189
|
+
row_count = len(df_collected)
|
|
190
|
+
column_count = len(polars_schema)
|
|
191
|
+
|
|
192
|
+
# Re-create lazy frame after collecting metadata
|
|
193
|
+
lf = df_collected.lazy()
|
|
194
|
+
|
|
195
|
+
# Determine which validators to use
|
|
196
|
+
validator_instances: list[Validator] = []
|
|
197
|
+
|
|
198
|
+
# Add schema validator if schema is provided or auto_schema is enabled
|
|
199
|
+
if schema is not None or auto_schema:
|
|
200
|
+
from truthound.schema import Schema as SchemaClass
|
|
201
|
+
from truthound.validators.schema_validator import SchemaValidator
|
|
202
|
+
|
|
203
|
+
if schema is not None:
|
|
204
|
+
if isinstance(schema, (str, Path)):
|
|
205
|
+
schema_obj = SchemaClass.load(schema)
|
|
206
|
+
else:
|
|
207
|
+
schema_obj = schema
|
|
208
|
+
else:
|
|
209
|
+
# Auto schema mode: get from cache or learn new
|
|
210
|
+
from truthound.cache import get_or_learn_schema
|
|
211
|
+
schema_obj, was_cached = get_or_learn_schema(data)
|
|
212
|
+
|
|
213
|
+
validator_instances.append(SchemaValidator(schema_obj))
|
|
214
|
+
|
|
215
|
+
if validators is None:
|
|
216
|
+
# Use all built-in validators
|
|
217
|
+
validator_instances.extend([cls() for cls in BUILTIN_VALIDATORS.values()])
|
|
218
|
+
else:
|
|
219
|
+
for v in validators:
|
|
220
|
+
if isinstance(v, str):
|
|
221
|
+
validator_cls = get_validator(v)
|
|
222
|
+
validator_instances.append(validator_cls())
|
|
223
|
+
elif isinstance(v, Validator):
|
|
224
|
+
validator_instances.append(v)
|
|
225
|
+
else:
|
|
226
|
+
raise ValueError(f"Invalid validator: {v}. Expected str or Validator instance.")
|
|
227
|
+
|
|
228
|
+
# Run all validators and collect issues
|
|
229
|
+
all_issues = []
|
|
230
|
+
|
|
231
|
+
if use_pushdown and sql_source is not None:
|
|
232
|
+
# Use pushdown validation engine for SQL data sources
|
|
233
|
+
from truthound.validators.pushdown_support import PushdownValidationEngine
|
|
234
|
+
|
|
235
|
+
engine = PushdownValidationEngine(sql_source)
|
|
236
|
+
all_issues = engine.validate(validator_instances)
|
|
237
|
+
|
|
238
|
+
elif parallel and len(validator_instances) > 1:
|
|
239
|
+
# Use DAG-based parallel execution
|
|
240
|
+
from truthound.validators.optimization.orchestrator import (
|
|
241
|
+
ValidatorDAG,
|
|
242
|
+
ParallelExecutionStrategy,
|
|
243
|
+
AdaptiveExecutionStrategy,
|
|
244
|
+
)
|
|
245
|
+
|
|
246
|
+
dag = ValidatorDAG()
|
|
247
|
+
dag.add_validators(validator_instances)
|
|
248
|
+
plan = dag.build_execution_plan()
|
|
249
|
+
|
|
250
|
+
# Choose strategy based on max_workers
|
|
251
|
+
if max_workers is not None:
|
|
252
|
+
strategy = ParallelExecutionStrategy(max_workers=max_workers)
|
|
253
|
+
else:
|
|
254
|
+
strategy = AdaptiveExecutionStrategy()
|
|
255
|
+
|
|
256
|
+
result = plan.execute(lf, strategy)
|
|
257
|
+
all_issues = result.all_issues
|
|
258
|
+
else:
|
|
259
|
+
# Sequential execution (original behavior)
|
|
260
|
+
for validator in validator_instances:
|
|
261
|
+
issues = validator.validate(lf)
|
|
262
|
+
all_issues.extend(issues)
|
|
263
|
+
|
|
264
|
+
# Create report
|
|
265
|
+
report = Report(
|
|
266
|
+
issues=all_issues,
|
|
267
|
+
source=source_name,
|
|
268
|
+
row_count=row_count,
|
|
269
|
+
column_count=column_count,
|
|
270
|
+
)
|
|
271
|
+
|
|
272
|
+
# Filter by severity if specified
|
|
273
|
+
if min_severity is not None:
|
|
274
|
+
if isinstance(min_severity, str):
|
|
275
|
+
min_severity = Severity(min_severity.lower())
|
|
276
|
+
report = report.filter_by_severity(min_severity)
|
|
277
|
+
|
|
278
|
+
return report
|
|
279
|
+
|
|
280
|
+
|
|
281
|
+
def scan(data: Any) -> PIIReport:
|
|
282
|
+
"""Scan data for personally identifiable information (PII).
|
|
283
|
+
|
|
284
|
+
Args:
|
|
285
|
+
data: Input data (file path, DataFrame, dict, etc.)
|
|
286
|
+
|
|
287
|
+
Returns:
|
|
288
|
+
PIIReport containing all PII findings.
|
|
289
|
+
|
|
290
|
+
Example:
|
|
291
|
+
>>> import truthound as th
|
|
292
|
+
>>> pii_report = th.scan("data.csv")
|
|
293
|
+
>>> print(pii_report)
|
|
294
|
+
"""
|
|
295
|
+
lf = to_lazyframe(data)
|
|
296
|
+
source = str(data) if isinstance(data, str) else type(data).__name__
|
|
297
|
+
|
|
298
|
+
df = lf.collect()
|
|
299
|
+
row_count = len(df)
|
|
300
|
+
|
|
301
|
+
findings = scan_pii(df.lazy())
|
|
302
|
+
|
|
303
|
+
return PIIReport(
|
|
304
|
+
findings=findings,
|
|
305
|
+
source=source,
|
|
306
|
+
row_count=row_count,
|
|
307
|
+
)
|
|
308
|
+
|
|
309
|
+
|
|
310
|
+
def mask(
|
|
311
|
+
data: Any,
|
|
312
|
+
columns: list[str] | None = None,
|
|
313
|
+
strategy: str = "redact",
|
|
314
|
+
) -> pl.DataFrame:
|
|
315
|
+
"""Mask sensitive data in the input.
|
|
316
|
+
|
|
317
|
+
Args:
|
|
318
|
+
data: Input data (file path, DataFrame, dict, etc.)
|
|
319
|
+
columns: Optional list of columns to mask.
|
|
320
|
+
If None, auto-detects PII columns.
|
|
321
|
+
strategy: Masking strategy - "redact", "hash", or "fake".
|
|
322
|
+
|
|
323
|
+
Returns:
|
|
324
|
+
Polars DataFrame with masked values.
|
|
325
|
+
|
|
326
|
+
Example:
|
|
327
|
+
>>> import truthound as th
|
|
328
|
+
>>> masked_df = th.mask("data.csv")
|
|
329
|
+
|
|
330
|
+
>>> # Mask specific columns
|
|
331
|
+
>>> masked_df = th.mask(df, columns=["email", "phone"])
|
|
332
|
+
|
|
333
|
+
>>> # Use hash strategy
|
|
334
|
+
>>> masked_df = th.mask(df, strategy="hash")
|
|
335
|
+
"""
|
|
336
|
+
lf = to_lazyframe(data)
|
|
337
|
+
return mask_data(lf, columns=columns, strategy=strategy)
|
|
338
|
+
|
|
339
|
+
|
|
340
|
+
def profile(data: Any) -> ProfileReport:
|
|
341
|
+
"""Generate a statistical profile of the dataset.
|
|
342
|
+
|
|
343
|
+
Args:
|
|
344
|
+
data: Input data (file path, DataFrame, dict, etc.)
|
|
345
|
+
|
|
346
|
+
Returns:
|
|
347
|
+
ProfileReport containing statistical summary.
|
|
348
|
+
|
|
349
|
+
Example:
|
|
350
|
+
>>> import truthound as th
|
|
351
|
+
>>> profile = th.profile("data.csv")
|
|
352
|
+
>>> print(profile)
|
|
353
|
+
"""
|
|
354
|
+
lf = to_lazyframe(data)
|
|
355
|
+
source = str(data) if isinstance(data, str) else type(data).__name__
|
|
356
|
+
|
|
357
|
+
profile_dict = profile_data(lf, source=source)
|
|
358
|
+
|
|
359
|
+
return ProfileReport(
|
|
360
|
+
source=profile_dict["source"],
|
|
361
|
+
row_count=profile_dict["row_count"],
|
|
362
|
+
column_count=profile_dict["column_count"],
|
|
363
|
+
size_bytes=profile_dict["size_bytes"],
|
|
364
|
+
columns=profile_dict["columns"],
|
|
365
|
+
)
|