truthound 1.0.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- truthound/__init__.py +162 -0
- truthound/adapters.py +100 -0
- truthound/api.py +365 -0
- truthound/audit/__init__.py +248 -0
- truthound/audit/core.py +967 -0
- truthound/audit/filters.py +620 -0
- truthound/audit/formatters.py +707 -0
- truthound/audit/logger.py +902 -0
- truthound/audit/middleware.py +571 -0
- truthound/audit/storage.py +1083 -0
- truthound/benchmark/__init__.py +123 -0
- truthound/benchmark/base.py +757 -0
- truthound/benchmark/comparison.py +635 -0
- truthound/benchmark/generators.py +706 -0
- truthound/benchmark/reporters.py +718 -0
- truthound/benchmark/runner.py +635 -0
- truthound/benchmark/scenarios.py +712 -0
- truthound/cache.py +252 -0
- truthound/checkpoint/__init__.py +136 -0
- truthound/checkpoint/actions/__init__.py +164 -0
- truthound/checkpoint/actions/base.py +324 -0
- truthound/checkpoint/actions/custom.py +234 -0
- truthound/checkpoint/actions/discord_notify.py +290 -0
- truthound/checkpoint/actions/email_notify.py +405 -0
- truthound/checkpoint/actions/github_action.py +406 -0
- truthound/checkpoint/actions/opsgenie.py +1499 -0
- truthound/checkpoint/actions/pagerduty.py +226 -0
- truthound/checkpoint/actions/slack_notify.py +233 -0
- truthound/checkpoint/actions/store_result.py +249 -0
- truthound/checkpoint/actions/teams_notify.py +1570 -0
- truthound/checkpoint/actions/telegram_notify.py +419 -0
- truthound/checkpoint/actions/update_docs.py +552 -0
- truthound/checkpoint/actions/webhook.py +293 -0
- truthound/checkpoint/analytics/__init__.py +147 -0
- truthound/checkpoint/analytics/aggregations/__init__.py +23 -0
- truthound/checkpoint/analytics/aggregations/rollup.py +481 -0
- truthound/checkpoint/analytics/aggregations/time_bucket.py +306 -0
- truthound/checkpoint/analytics/analyzers/__init__.py +17 -0
- truthound/checkpoint/analytics/analyzers/anomaly.py +386 -0
- truthound/checkpoint/analytics/analyzers/base.py +270 -0
- truthound/checkpoint/analytics/analyzers/forecast.py +421 -0
- truthound/checkpoint/analytics/analyzers/trend.py +314 -0
- truthound/checkpoint/analytics/models.py +292 -0
- truthound/checkpoint/analytics/protocols.py +549 -0
- truthound/checkpoint/analytics/service.py +718 -0
- truthound/checkpoint/analytics/stores/__init__.py +16 -0
- truthound/checkpoint/analytics/stores/base.py +306 -0
- truthound/checkpoint/analytics/stores/memory_store.py +353 -0
- truthound/checkpoint/analytics/stores/sqlite_store.py +557 -0
- truthound/checkpoint/analytics/stores/timescale_store.py +501 -0
- truthound/checkpoint/async_actions.py +794 -0
- truthound/checkpoint/async_base.py +708 -0
- truthound/checkpoint/async_checkpoint.py +617 -0
- truthound/checkpoint/async_runner.py +639 -0
- truthound/checkpoint/checkpoint.py +527 -0
- truthound/checkpoint/ci/__init__.py +61 -0
- truthound/checkpoint/ci/detector.py +355 -0
- truthound/checkpoint/ci/reporter.py +436 -0
- truthound/checkpoint/ci/templates.py +454 -0
- truthound/checkpoint/circuitbreaker/__init__.py +133 -0
- truthound/checkpoint/circuitbreaker/breaker.py +542 -0
- truthound/checkpoint/circuitbreaker/core.py +252 -0
- truthound/checkpoint/circuitbreaker/detection.py +459 -0
- truthound/checkpoint/circuitbreaker/middleware.py +389 -0
- truthound/checkpoint/circuitbreaker/registry.py +357 -0
- truthound/checkpoint/distributed/__init__.py +139 -0
- truthound/checkpoint/distributed/backends/__init__.py +35 -0
- truthound/checkpoint/distributed/backends/celery_backend.py +503 -0
- truthound/checkpoint/distributed/backends/kubernetes_backend.py +696 -0
- truthound/checkpoint/distributed/backends/local_backend.py +397 -0
- truthound/checkpoint/distributed/backends/ray_backend.py +625 -0
- truthound/checkpoint/distributed/base.py +774 -0
- truthound/checkpoint/distributed/orchestrator.py +765 -0
- truthound/checkpoint/distributed/protocols.py +842 -0
- truthound/checkpoint/distributed/registry.py +449 -0
- truthound/checkpoint/idempotency/__init__.py +120 -0
- truthound/checkpoint/idempotency/core.py +295 -0
- truthound/checkpoint/idempotency/fingerprint.py +454 -0
- truthound/checkpoint/idempotency/locking.py +604 -0
- truthound/checkpoint/idempotency/service.py +592 -0
- truthound/checkpoint/idempotency/stores.py +653 -0
- truthound/checkpoint/monitoring/__init__.py +134 -0
- truthound/checkpoint/monitoring/aggregators/__init__.py +15 -0
- truthound/checkpoint/monitoring/aggregators/base.py +372 -0
- truthound/checkpoint/monitoring/aggregators/realtime.py +300 -0
- truthound/checkpoint/monitoring/aggregators/window.py +493 -0
- truthound/checkpoint/monitoring/collectors/__init__.py +17 -0
- truthound/checkpoint/monitoring/collectors/base.py +257 -0
- truthound/checkpoint/monitoring/collectors/memory_collector.py +617 -0
- truthound/checkpoint/monitoring/collectors/prometheus_collector.py +451 -0
- truthound/checkpoint/monitoring/collectors/redis_collector.py +518 -0
- truthound/checkpoint/monitoring/events.py +410 -0
- truthound/checkpoint/monitoring/protocols.py +636 -0
- truthound/checkpoint/monitoring/service.py +578 -0
- truthound/checkpoint/monitoring/views/__init__.py +17 -0
- truthound/checkpoint/monitoring/views/base.py +172 -0
- truthound/checkpoint/monitoring/views/queue_view.py +220 -0
- truthound/checkpoint/monitoring/views/task_view.py +240 -0
- truthound/checkpoint/monitoring/views/worker_view.py +263 -0
- truthound/checkpoint/registry.py +337 -0
- truthound/checkpoint/runner.py +356 -0
- truthound/checkpoint/transaction/__init__.py +133 -0
- truthound/checkpoint/transaction/base.py +389 -0
- truthound/checkpoint/transaction/compensatable.py +537 -0
- truthound/checkpoint/transaction/coordinator.py +576 -0
- truthound/checkpoint/transaction/executor.py +622 -0
- truthound/checkpoint/transaction/idempotency.py +534 -0
- truthound/checkpoint/transaction/saga/__init__.py +143 -0
- truthound/checkpoint/transaction/saga/builder.py +584 -0
- truthound/checkpoint/transaction/saga/definition.py +515 -0
- truthound/checkpoint/transaction/saga/event_store.py +542 -0
- truthound/checkpoint/transaction/saga/patterns.py +833 -0
- truthound/checkpoint/transaction/saga/runner.py +718 -0
- truthound/checkpoint/transaction/saga/state_machine.py +793 -0
- truthound/checkpoint/transaction/saga/strategies.py +780 -0
- truthound/checkpoint/transaction/saga/testing.py +886 -0
- truthound/checkpoint/triggers/__init__.py +58 -0
- truthound/checkpoint/triggers/base.py +237 -0
- truthound/checkpoint/triggers/event.py +385 -0
- truthound/checkpoint/triggers/schedule.py +355 -0
- truthound/cli.py +2358 -0
- truthound/cli_modules/__init__.py +124 -0
- truthound/cli_modules/advanced/__init__.py +45 -0
- truthound/cli_modules/advanced/benchmark.py +343 -0
- truthound/cli_modules/advanced/docs.py +225 -0
- truthound/cli_modules/advanced/lineage.py +209 -0
- truthound/cli_modules/advanced/ml.py +320 -0
- truthound/cli_modules/advanced/realtime.py +196 -0
- truthound/cli_modules/checkpoint/__init__.py +46 -0
- truthound/cli_modules/checkpoint/init.py +114 -0
- truthound/cli_modules/checkpoint/list.py +71 -0
- truthound/cli_modules/checkpoint/run.py +159 -0
- truthound/cli_modules/checkpoint/validate.py +67 -0
- truthound/cli_modules/common/__init__.py +71 -0
- truthound/cli_modules/common/errors.py +414 -0
- truthound/cli_modules/common/options.py +419 -0
- truthound/cli_modules/common/output.py +507 -0
- truthound/cli_modules/common/protocol.py +552 -0
- truthound/cli_modules/core/__init__.py +48 -0
- truthound/cli_modules/core/check.py +123 -0
- truthound/cli_modules/core/compare.py +104 -0
- truthound/cli_modules/core/learn.py +57 -0
- truthound/cli_modules/core/mask.py +77 -0
- truthound/cli_modules/core/profile.py +65 -0
- truthound/cli_modules/core/scan.py +61 -0
- truthound/cli_modules/profiler/__init__.py +51 -0
- truthound/cli_modules/profiler/auto_profile.py +175 -0
- truthound/cli_modules/profiler/metadata.py +107 -0
- truthound/cli_modules/profiler/suite.py +283 -0
- truthound/cli_modules/registry.py +431 -0
- truthound/cli_modules/scaffolding/__init__.py +89 -0
- truthound/cli_modules/scaffolding/base.py +631 -0
- truthound/cli_modules/scaffolding/commands.py +545 -0
- truthound/cli_modules/scaffolding/plugins.py +1072 -0
- truthound/cli_modules/scaffolding/reporters.py +594 -0
- truthound/cli_modules/scaffolding/validators.py +1127 -0
- truthound/common/__init__.py +18 -0
- truthound/common/resilience/__init__.py +130 -0
- truthound/common/resilience/bulkhead.py +266 -0
- truthound/common/resilience/circuit_breaker.py +516 -0
- truthound/common/resilience/composite.py +332 -0
- truthound/common/resilience/config.py +292 -0
- truthound/common/resilience/protocols.py +217 -0
- truthound/common/resilience/rate_limiter.py +404 -0
- truthound/common/resilience/retry.py +341 -0
- truthound/datadocs/__init__.py +260 -0
- truthound/datadocs/base.py +571 -0
- truthound/datadocs/builder.py +761 -0
- truthound/datadocs/charts.py +764 -0
- truthound/datadocs/dashboard/__init__.py +63 -0
- truthound/datadocs/dashboard/app.py +576 -0
- truthound/datadocs/dashboard/components.py +584 -0
- truthound/datadocs/dashboard/state.py +240 -0
- truthound/datadocs/engine/__init__.py +46 -0
- truthound/datadocs/engine/context.py +376 -0
- truthound/datadocs/engine/pipeline.py +618 -0
- truthound/datadocs/engine/registry.py +469 -0
- truthound/datadocs/exporters/__init__.py +49 -0
- truthound/datadocs/exporters/base.py +198 -0
- truthound/datadocs/exporters/html.py +178 -0
- truthound/datadocs/exporters/json_exporter.py +253 -0
- truthound/datadocs/exporters/markdown.py +284 -0
- truthound/datadocs/exporters/pdf.py +392 -0
- truthound/datadocs/i18n/__init__.py +86 -0
- truthound/datadocs/i18n/catalog.py +960 -0
- truthound/datadocs/i18n/formatting.py +505 -0
- truthound/datadocs/i18n/loader.py +256 -0
- truthound/datadocs/i18n/plurals.py +378 -0
- truthound/datadocs/renderers/__init__.py +42 -0
- truthound/datadocs/renderers/base.py +401 -0
- truthound/datadocs/renderers/custom.py +342 -0
- truthound/datadocs/renderers/jinja.py +697 -0
- truthound/datadocs/sections.py +736 -0
- truthound/datadocs/styles.py +931 -0
- truthound/datadocs/themes/__init__.py +101 -0
- truthound/datadocs/themes/base.py +336 -0
- truthound/datadocs/themes/default.py +417 -0
- truthound/datadocs/themes/enterprise.py +419 -0
- truthound/datadocs/themes/loader.py +336 -0
- truthound/datadocs/themes.py +301 -0
- truthound/datadocs/transformers/__init__.py +57 -0
- truthound/datadocs/transformers/base.py +268 -0
- truthound/datadocs/transformers/enrichers.py +544 -0
- truthound/datadocs/transformers/filters.py +447 -0
- truthound/datadocs/transformers/i18n.py +468 -0
- truthound/datadocs/versioning/__init__.py +62 -0
- truthound/datadocs/versioning/diff.py +639 -0
- truthound/datadocs/versioning/storage.py +497 -0
- truthound/datadocs/versioning/version.py +358 -0
- truthound/datasources/__init__.py +223 -0
- truthound/datasources/_async_protocols.py +222 -0
- truthound/datasources/_protocols.py +159 -0
- truthound/datasources/adapters.py +428 -0
- truthound/datasources/async_base.py +599 -0
- truthound/datasources/async_factory.py +511 -0
- truthound/datasources/base.py +516 -0
- truthound/datasources/factory.py +433 -0
- truthound/datasources/nosql/__init__.py +47 -0
- truthound/datasources/nosql/base.py +487 -0
- truthound/datasources/nosql/elasticsearch.py +801 -0
- truthound/datasources/nosql/mongodb.py +636 -0
- truthound/datasources/pandas_optimized.py +582 -0
- truthound/datasources/pandas_source.py +216 -0
- truthound/datasources/polars_source.py +395 -0
- truthound/datasources/spark_source.py +479 -0
- truthound/datasources/sql/__init__.py +154 -0
- truthound/datasources/sql/base.py +710 -0
- truthound/datasources/sql/bigquery.py +410 -0
- truthound/datasources/sql/cloud_base.py +199 -0
- truthound/datasources/sql/databricks.py +471 -0
- truthound/datasources/sql/mysql.py +316 -0
- truthound/datasources/sql/oracle.py +427 -0
- truthound/datasources/sql/postgresql.py +321 -0
- truthound/datasources/sql/redshift.py +479 -0
- truthound/datasources/sql/snowflake.py +439 -0
- truthound/datasources/sql/sqlite.py +286 -0
- truthound/datasources/sql/sqlserver.py +437 -0
- truthound/datasources/streaming/__init__.py +47 -0
- truthound/datasources/streaming/base.py +350 -0
- truthound/datasources/streaming/kafka.py +670 -0
- truthound/decorators.py +98 -0
- truthound/docs/__init__.py +69 -0
- truthound/docs/extractor.py +971 -0
- truthound/docs/generator.py +601 -0
- truthound/docs/parser.py +1037 -0
- truthound/docs/renderer.py +999 -0
- truthound/drift/__init__.py +22 -0
- truthound/drift/compare.py +189 -0
- truthound/drift/detectors.py +464 -0
- truthound/drift/report.py +160 -0
- truthound/execution/__init__.py +65 -0
- truthound/execution/_protocols.py +324 -0
- truthound/execution/base.py +576 -0
- truthound/execution/distributed/__init__.py +179 -0
- truthound/execution/distributed/aggregations.py +731 -0
- truthound/execution/distributed/arrow_bridge.py +817 -0
- truthound/execution/distributed/base.py +550 -0
- truthound/execution/distributed/dask_engine.py +976 -0
- truthound/execution/distributed/mixins.py +766 -0
- truthound/execution/distributed/protocols.py +756 -0
- truthound/execution/distributed/ray_engine.py +1127 -0
- truthound/execution/distributed/registry.py +446 -0
- truthound/execution/distributed/spark_engine.py +1011 -0
- truthound/execution/distributed/validator_adapter.py +682 -0
- truthound/execution/pandas_engine.py +401 -0
- truthound/execution/polars_engine.py +497 -0
- truthound/execution/pushdown/__init__.py +230 -0
- truthound/execution/pushdown/ast.py +1550 -0
- truthound/execution/pushdown/builder.py +1550 -0
- truthound/execution/pushdown/dialects.py +1072 -0
- truthound/execution/pushdown/executor.py +829 -0
- truthound/execution/pushdown/optimizer.py +1041 -0
- truthound/execution/sql_engine.py +518 -0
- truthound/infrastructure/__init__.py +189 -0
- truthound/infrastructure/audit.py +1515 -0
- truthound/infrastructure/config.py +1133 -0
- truthound/infrastructure/encryption.py +1132 -0
- truthound/infrastructure/logging.py +1503 -0
- truthound/infrastructure/metrics.py +1220 -0
- truthound/lineage/__init__.py +89 -0
- truthound/lineage/base.py +746 -0
- truthound/lineage/impact_analysis.py +474 -0
- truthound/lineage/integrations/__init__.py +22 -0
- truthound/lineage/integrations/openlineage.py +548 -0
- truthound/lineage/tracker.py +512 -0
- truthound/lineage/visualization/__init__.py +33 -0
- truthound/lineage/visualization/protocols.py +145 -0
- truthound/lineage/visualization/renderers/__init__.py +20 -0
- truthound/lineage/visualization/renderers/cytoscape.py +329 -0
- truthound/lineage/visualization/renderers/d3.py +331 -0
- truthound/lineage/visualization/renderers/graphviz.py +276 -0
- truthound/lineage/visualization/renderers/mermaid.py +308 -0
- truthound/maskers.py +113 -0
- truthound/ml/__init__.py +124 -0
- truthound/ml/anomaly_models/__init__.py +31 -0
- truthound/ml/anomaly_models/ensemble.py +362 -0
- truthound/ml/anomaly_models/isolation_forest.py +444 -0
- truthound/ml/anomaly_models/statistical.py +392 -0
- truthound/ml/base.py +1178 -0
- truthound/ml/drift_detection/__init__.py +26 -0
- truthound/ml/drift_detection/concept.py +381 -0
- truthound/ml/drift_detection/distribution.py +361 -0
- truthound/ml/drift_detection/feature.py +442 -0
- truthound/ml/drift_detection/multivariate.py +495 -0
- truthound/ml/monitoring/__init__.py +88 -0
- truthound/ml/monitoring/alerting/__init__.py +33 -0
- truthound/ml/monitoring/alerting/handlers.py +427 -0
- truthound/ml/monitoring/alerting/rules.py +508 -0
- truthound/ml/monitoring/collectors/__init__.py +19 -0
- truthound/ml/monitoring/collectors/composite.py +105 -0
- truthound/ml/monitoring/collectors/drift.py +324 -0
- truthound/ml/monitoring/collectors/performance.py +179 -0
- truthound/ml/monitoring/collectors/quality.py +369 -0
- truthound/ml/monitoring/monitor.py +536 -0
- truthound/ml/monitoring/protocols.py +451 -0
- truthound/ml/monitoring/stores/__init__.py +15 -0
- truthound/ml/monitoring/stores/memory.py +201 -0
- truthound/ml/monitoring/stores/prometheus.py +296 -0
- truthound/ml/rule_learning/__init__.py +25 -0
- truthound/ml/rule_learning/constraint_miner.py +443 -0
- truthound/ml/rule_learning/pattern_learner.py +499 -0
- truthound/ml/rule_learning/profile_learner.py +462 -0
- truthound/multitenancy/__init__.py +326 -0
- truthound/multitenancy/core.py +852 -0
- truthound/multitenancy/integration.py +597 -0
- truthound/multitenancy/isolation.py +630 -0
- truthound/multitenancy/manager.py +770 -0
- truthound/multitenancy/middleware.py +765 -0
- truthound/multitenancy/quota.py +537 -0
- truthound/multitenancy/resolvers.py +603 -0
- truthound/multitenancy/storage.py +703 -0
- truthound/observability/__init__.py +307 -0
- truthound/observability/context.py +531 -0
- truthound/observability/instrumentation.py +611 -0
- truthound/observability/logging.py +887 -0
- truthound/observability/metrics.py +1157 -0
- truthound/observability/tracing/__init__.py +178 -0
- truthound/observability/tracing/baggage.py +310 -0
- truthound/observability/tracing/config.py +426 -0
- truthound/observability/tracing/exporter.py +787 -0
- truthound/observability/tracing/integration.py +1018 -0
- truthound/observability/tracing/otel/__init__.py +146 -0
- truthound/observability/tracing/otel/adapter.py +982 -0
- truthound/observability/tracing/otel/bridge.py +1177 -0
- truthound/observability/tracing/otel/compat.py +681 -0
- truthound/observability/tracing/otel/config.py +691 -0
- truthound/observability/tracing/otel/detection.py +327 -0
- truthound/observability/tracing/otel/protocols.py +426 -0
- truthound/observability/tracing/processor.py +561 -0
- truthound/observability/tracing/propagator.py +757 -0
- truthound/observability/tracing/provider.py +569 -0
- truthound/observability/tracing/resource.py +515 -0
- truthound/observability/tracing/sampler.py +487 -0
- truthound/observability/tracing/span.py +676 -0
- truthound/plugins/__init__.py +198 -0
- truthound/plugins/base.py +599 -0
- truthound/plugins/cli.py +680 -0
- truthound/plugins/dependencies/__init__.py +42 -0
- truthound/plugins/dependencies/graph.py +422 -0
- truthound/plugins/dependencies/resolver.py +417 -0
- truthound/plugins/discovery.py +379 -0
- truthound/plugins/docs/__init__.py +46 -0
- truthound/plugins/docs/extractor.py +444 -0
- truthound/plugins/docs/renderer.py +499 -0
- truthound/plugins/enterprise_manager.py +877 -0
- truthound/plugins/examples/__init__.py +19 -0
- truthound/plugins/examples/custom_validators.py +317 -0
- truthound/plugins/examples/slack_notifier.py +312 -0
- truthound/plugins/examples/xml_reporter.py +254 -0
- truthound/plugins/hooks.py +558 -0
- truthound/plugins/lifecycle/__init__.py +43 -0
- truthound/plugins/lifecycle/hot_reload.py +402 -0
- truthound/plugins/lifecycle/manager.py +371 -0
- truthound/plugins/manager.py +736 -0
- truthound/plugins/registry.py +338 -0
- truthound/plugins/security/__init__.py +93 -0
- truthound/plugins/security/exceptions.py +332 -0
- truthound/plugins/security/policies.py +348 -0
- truthound/plugins/security/protocols.py +643 -0
- truthound/plugins/security/sandbox/__init__.py +45 -0
- truthound/plugins/security/sandbox/context.py +158 -0
- truthound/plugins/security/sandbox/engines/__init__.py +19 -0
- truthound/plugins/security/sandbox/engines/container.py +379 -0
- truthound/plugins/security/sandbox/engines/noop.py +144 -0
- truthound/plugins/security/sandbox/engines/process.py +336 -0
- truthound/plugins/security/sandbox/factory.py +211 -0
- truthound/plugins/security/signing/__init__.py +57 -0
- truthound/plugins/security/signing/service.py +330 -0
- truthound/plugins/security/signing/trust_store.py +368 -0
- truthound/plugins/security/signing/verifier.py +459 -0
- truthound/plugins/versioning/__init__.py +41 -0
- truthound/plugins/versioning/constraints.py +297 -0
- truthound/plugins/versioning/resolver.py +329 -0
- truthound/profiler/__init__.py +1729 -0
- truthound/profiler/_lazy.py +452 -0
- truthound/profiler/ab_testing/__init__.py +80 -0
- truthound/profiler/ab_testing/analysis.py +449 -0
- truthound/profiler/ab_testing/base.py +257 -0
- truthound/profiler/ab_testing/experiment.py +395 -0
- truthound/profiler/ab_testing/tracking.py +368 -0
- truthound/profiler/auto_threshold.py +1170 -0
- truthound/profiler/base.py +579 -0
- truthound/profiler/cache_patterns.py +911 -0
- truthound/profiler/caching.py +1303 -0
- truthound/profiler/column_profiler.py +712 -0
- truthound/profiler/comparison.py +1007 -0
- truthound/profiler/custom_patterns.py +1170 -0
- truthound/profiler/dashboard/__init__.py +50 -0
- truthound/profiler/dashboard/app.py +476 -0
- truthound/profiler/dashboard/components.py +457 -0
- truthound/profiler/dashboard/config.py +72 -0
- truthound/profiler/distributed/__init__.py +83 -0
- truthound/profiler/distributed/base.py +281 -0
- truthound/profiler/distributed/dask_backend.py +498 -0
- truthound/profiler/distributed/local_backend.py +293 -0
- truthound/profiler/distributed/profiler.py +304 -0
- truthound/profiler/distributed/ray_backend.py +374 -0
- truthound/profiler/distributed/spark_backend.py +375 -0
- truthound/profiler/distributed.py +1366 -0
- truthound/profiler/enterprise_sampling.py +1065 -0
- truthound/profiler/errors.py +488 -0
- truthound/profiler/evolution/__init__.py +91 -0
- truthound/profiler/evolution/alerts.py +426 -0
- truthound/profiler/evolution/changes.py +206 -0
- truthound/profiler/evolution/compatibility.py +365 -0
- truthound/profiler/evolution/detector.py +372 -0
- truthound/profiler/evolution/protocols.py +121 -0
- truthound/profiler/generators/__init__.py +48 -0
- truthound/profiler/generators/base.py +384 -0
- truthound/profiler/generators/ml_rules.py +375 -0
- truthound/profiler/generators/pattern_rules.py +384 -0
- truthound/profiler/generators/schema_rules.py +267 -0
- truthound/profiler/generators/stats_rules.py +324 -0
- truthound/profiler/generators/suite_generator.py +857 -0
- truthound/profiler/i18n.py +1542 -0
- truthound/profiler/incremental.py +554 -0
- truthound/profiler/incremental_validation.py +1710 -0
- truthound/profiler/integration/__init__.py +73 -0
- truthound/profiler/integration/adapters.py +345 -0
- truthound/profiler/integration/context.py +371 -0
- truthound/profiler/integration/executor.py +527 -0
- truthound/profiler/integration/naming.py +75 -0
- truthound/profiler/integration/protocols.py +243 -0
- truthound/profiler/memory.py +1185 -0
- truthound/profiler/migration/__init__.py +60 -0
- truthound/profiler/migration/base.py +345 -0
- truthound/profiler/migration/manager.py +444 -0
- truthound/profiler/migration/v1_0_to_v1_1.py +484 -0
- truthound/profiler/ml/__init__.py +73 -0
- truthound/profiler/ml/base.py +244 -0
- truthound/profiler/ml/classifier.py +507 -0
- truthound/profiler/ml/feature_extraction.py +604 -0
- truthound/profiler/ml/pretrained.py +448 -0
- truthound/profiler/ml_inference.py +1276 -0
- truthound/profiler/native_patterns.py +815 -0
- truthound/profiler/observability.py +1184 -0
- truthound/profiler/process_timeout.py +1566 -0
- truthound/profiler/progress.py +568 -0
- truthound/profiler/progress_callbacks.py +1734 -0
- truthound/profiler/quality.py +1345 -0
- truthound/profiler/resilience.py +1180 -0
- truthound/profiler/sampled_matcher.py +794 -0
- truthound/profiler/sampling.py +1288 -0
- truthound/profiler/scheduling/__init__.py +82 -0
- truthound/profiler/scheduling/protocols.py +214 -0
- truthound/profiler/scheduling/scheduler.py +474 -0
- truthound/profiler/scheduling/storage.py +457 -0
- truthound/profiler/scheduling/triggers.py +449 -0
- truthound/profiler/schema.py +603 -0
- truthound/profiler/streaming.py +685 -0
- truthound/profiler/streaming_patterns.py +1354 -0
- truthound/profiler/suite_cli.py +625 -0
- truthound/profiler/suite_config.py +789 -0
- truthound/profiler/suite_export.py +1268 -0
- truthound/profiler/table_profiler.py +547 -0
- truthound/profiler/timeout.py +565 -0
- truthound/profiler/validation.py +1532 -0
- truthound/profiler/visualization/__init__.py +118 -0
- truthound/profiler/visualization/base.py +346 -0
- truthound/profiler/visualization/generator.py +1259 -0
- truthound/profiler/visualization/plotly_renderer.py +811 -0
- truthound/profiler/visualization/renderers.py +669 -0
- truthound/profiler/visualization/sections.py +540 -0
- truthound/profiler/visualization.py +2122 -0
- truthound/profiler/yaml_validation.py +1151 -0
- truthound/py.typed +0 -0
- truthound/ratelimit/__init__.py +248 -0
- truthound/ratelimit/algorithms.py +1108 -0
- truthound/ratelimit/core.py +573 -0
- truthound/ratelimit/integration.py +532 -0
- truthound/ratelimit/limiter.py +663 -0
- truthound/ratelimit/middleware.py +700 -0
- truthound/ratelimit/policy.py +792 -0
- truthound/ratelimit/storage.py +763 -0
- truthound/rbac/__init__.py +340 -0
- truthound/rbac/core.py +976 -0
- truthound/rbac/integration.py +760 -0
- truthound/rbac/manager.py +1052 -0
- truthound/rbac/middleware.py +842 -0
- truthound/rbac/policy.py +954 -0
- truthound/rbac/storage.py +878 -0
- truthound/realtime/__init__.py +141 -0
- truthound/realtime/adapters/__init__.py +43 -0
- truthound/realtime/adapters/base.py +533 -0
- truthound/realtime/adapters/kafka.py +487 -0
- truthound/realtime/adapters/kinesis.py +479 -0
- truthound/realtime/adapters/mock.py +243 -0
- truthound/realtime/base.py +553 -0
- truthound/realtime/factory.py +382 -0
- truthound/realtime/incremental.py +660 -0
- truthound/realtime/processing/__init__.py +67 -0
- truthound/realtime/processing/exactly_once.py +575 -0
- truthound/realtime/processing/state.py +547 -0
- truthound/realtime/processing/windows.py +647 -0
- truthound/realtime/protocols.py +569 -0
- truthound/realtime/streaming.py +605 -0
- truthound/realtime/testing/__init__.py +32 -0
- truthound/realtime/testing/containers.py +615 -0
- truthound/realtime/testing/fixtures.py +484 -0
- truthound/report.py +280 -0
- truthound/reporters/__init__.py +46 -0
- truthound/reporters/_protocols.py +30 -0
- truthound/reporters/base.py +324 -0
- truthound/reporters/ci/__init__.py +66 -0
- truthound/reporters/ci/azure.py +436 -0
- truthound/reporters/ci/base.py +509 -0
- truthound/reporters/ci/bitbucket.py +567 -0
- truthound/reporters/ci/circleci.py +547 -0
- truthound/reporters/ci/detection.py +364 -0
- truthound/reporters/ci/factory.py +182 -0
- truthound/reporters/ci/github.py +388 -0
- truthound/reporters/ci/gitlab.py +471 -0
- truthound/reporters/ci/jenkins.py +525 -0
- truthound/reporters/console_reporter.py +299 -0
- truthound/reporters/factory.py +211 -0
- truthound/reporters/html_reporter.py +524 -0
- truthound/reporters/json_reporter.py +256 -0
- truthound/reporters/markdown_reporter.py +280 -0
- truthound/reporters/sdk/__init__.py +174 -0
- truthound/reporters/sdk/builder.py +558 -0
- truthound/reporters/sdk/mixins.py +1150 -0
- truthound/reporters/sdk/schema.py +1493 -0
- truthound/reporters/sdk/templates.py +666 -0
- truthound/reporters/sdk/testing.py +968 -0
- truthound/scanners.py +170 -0
- truthound/scheduling/__init__.py +122 -0
- truthound/scheduling/cron.py +1136 -0
- truthound/scheduling/presets.py +212 -0
- truthound/schema.py +275 -0
- truthound/secrets/__init__.py +173 -0
- truthound/secrets/base.py +618 -0
- truthound/secrets/cloud.py +682 -0
- truthound/secrets/integration.py +507 -0
- truthound/secrets/manager.py +633 -0
- truthound/secrets/oidc/__init__.py +172 -0
- truthound/secrets/oidc/base.py +902 -0
- truthound/secrets/oidc/credential_provider.py +623 -0
- truthound/secrets/oidc/exchangers.py +1001 -0
- truthound/secrets/oidc/github/__init__.py +110 -0
- truthound/secrets/oidc/github/claims.py +718 -0
- truthound/secrets/oidc/github/enhanced_provider.py +693 -0
- truthound/secrets/oidc/github/trust_policy.py +742 -0
- truthound/secrets/oidc/github/verification.py +723 -0
- truthound/secrets/oidc/github/workflow.py +691 -0
- truthound/secrets/oidc/providers.py +825 -0
- truthound/secrets/providers.py +506 -0
- truthound/secrets/resolver.py +495 -0
- truthound/stores/__init__.py +177 -0
- truthound/stores/backends/__init__.py +18 -0
- truthound/stores/backends/_protocols.py +340 -0
- truthound/stores/backends/azure_blob.py +530 -0
- truthound/stores/backends/concurrent_filesystem.py +915 -0
- truthound/stores/backends/connection_pool.py +1365 -0
- truthound/stores/backends/database.py +743 -0
- truthound/stores/backends/filesystem.py +538 -0
- truthound/stores/backends/gcs.py +399 -0
- truthound/stores/backends/memory.py +354 -0
- truthound/stores/backends/s3.py +434 -0
- truthound/stores/backpressure/__init__.py +84 -0
- truthound/stores/backpressure/base.py +375 -0
- truthound/stores/backpressure/circuit_breaker.py +434 -0
- truthound/stores/backpressure/monitor.py +376 -0
- truthound/stores/backpressure/strategies.py +677 -0
- truthound/stores/base.py +551 -0
- truthound/stores/batching/__init__.py +65 -0
- truthound/stores/batching/base.py +305 -0
- truthound/stores/batching/buffer.py +370 -0
- truthound/stores/batching/store.py +248 -0
- truthound/stores/batching/writer.py +521 -0
- truthound/stores/caching/__init__.py +60 -0
- truthound/stores/caching/backends.py +684 -0
- truthound/stores/caching/base.py +356 -0
- truthound/stores/caching/store.py +305 -0
- truthound/stores/compression/__init__.py +193 -0
- truthound/stores/compression/adaptive.py +694 -0
- truthound/stores/compression/base.py +514 -0
- truthound/stores/compression/pipeline.py +868 -0
- truthound/stores/compression/providers.py +672 -0
- truthound/stores/compression/streaming.py +832 -0
- truthound/stores/concurrency/__init__.py +81 -0
- truthound/stores/concurrency/atomic.py +556 -0
- truthound/stores/concurrency/index.py +775 -0
- truthound/stores/concurrency/locks.py +576 -0
- truthound/stores/concurrency/manager.py +482 -0
- truthound/stores/encryption/__init__.py +297 -0
- truthound/stores/encryption/base.py +952 -0
- truthound/stores/encryption/keys.py +1191 -0
- truthound/stores/encryption/pipeline.py +903 -0
- truthound/stores/encryption/providers.py +953 -0
- truthound/stores/encryption/streaming.py +950 -0
- truthound/stores/expectations.py +227 -0
- truthound/stores/factory.py +246 -0
- truthound/stores/migration/__init__.py +75 -0
- truthound/stores/migration/base.py +480 -0
- truthound/stores/migration/manager.py +347 -0
- truthound/stores/migration/registry.py +382 -0
- truthound/stores/migration/store.py +559 -0
- truthound/stores/observability/__init__.py +106 -0
- truthound/stores/observability/audit.py +718 -0
- truthound/stores/observability/config.py +270 -0
- truthound/stores/observability/factory.py +208 -0
- truthound/stores/observability/metrics.py +636 -0
- truthound/stores/observability/protocols.py +410 -0
- truthound/stores/observability/store.py +570 -0
- truthound/stores/observability/tracing.py +784 -0
- truthound/stores/replication/__init__.py +76 -0
- truthound/stores/replication/base.py +260 -0
- truthound/stores/replication/monitor.py +269 -0
- truthound/stores/replication/store.py +439 -0
- truthound/stores/replication/syncer.py +391 -0
- truthound/stores/results.py +359 -0
- truthound/stores/retention/__init__.py +77 -0
- truthound/stores/retention/base.py +378 -0
- truthound/stores/retention/policies.py +621 -0
- truthound/stores/retention/scheduler.py +279 -0
- truthound/stores/retention/store.py +526 -0
- truthound/stores/streaming/__init__.py +138 -0
- truthound/stores/streaming/base.py +801 -0
- truthound/stores/streaming/database.py +984 -0
- truthound/stores/streaming/filesystem.py +719 -0
- truthound/stores/streaming/reader.py +629 -0
- truthound/stores/streaming/s3.py +843 -0
- truthound/stores/streaming/writer.py +790 -0
- truthound/stores/tiering/__init__.py +108 -0
- truthound/stores/tiering/base.py +462 -0
- truthound/stores/tiering/manager.py +249 -0
- truthound/stores/tiering/policies.py +692 -0
- truthound/stores/tiering/store.py +526 -0
- truthound/stores/versioning/__init__.py +56 -0
- truthound/stores/versioning/base.py +376 -0
- truthound/stores/versioning/store.py +660 -0
- truthound/stores/versioning/strategies.py +353 -0
- truthound/types.py +56 -0
- truthound/validators/__init__.py +774 -0
- truthound/validators/aggregate/__init__.py +27 -0
- truthound/validators/aggregate/central.py +116 -0
- truthound/validators/aggregate/extremes.py +116 -0
- truthound/validators/aggregate/spread.py +118 -0
- truthound/validators/aggregate/sum.py +64 -0
- truthound/validators/aggregate/type.py +78 -0
- truthound/validators/anomaly/__init__.py +93 -0
- truthound/validators/anomaly/base.py +431 -0
- truthound/validators/anomaly/ml_based.py +1190 -0
- truthound/validators/anomaly/multivariate.py +647 -0
- truthound/validators/anomaly/statistical.py +599 -0
- truthound/validators/base.py +1089 -0
- truthound/validators/business_rule/__init__.py +46 -0
- truthound/validators/business_rule/base.py +147 -0
- truthound/validators/business_rule/checksum.py +509 -0
- truthound/validators/business_rule/financial.py +526 -0
- truthound/validators/cache.py +733 -0
- truthound/validators/completeness/__init__.py +39 -0
- truthound/validators/completeness/conditional.py +73 -0
- truthound/validators/completeness/default.py +98 -0
- truthound/validators/completeness/empty.py +103 -0
- truthound/validators/completeness/nan.py +337 -0
- truthound/validators/completeness/null.py +152 -0
- truthound/validators/cross_table/__init__.py +17 -0
- truthound/validators/cross_table/aggregate.py +333 -0
- truthound/validators/cross_table/row_count.py +122 -0
- truthound/validators/datetime/__init__.py +29 -0
- truthound/validators/datetime/format.py +78 -0
- truthound/validators/datetime/freshness.py +269 -0
- truthound/validators/datetime/order.py +73 -0
- truthound/validators/datetime/parseable.py +185 -0
- truthound/validators/datetime/range.py +202 -0
- truthound/validators/datetime/timezone.py +69 -0
- truthound/validators/distribution/__init__.py +49 -0
- truthound/validators/distribution/distribution.py +128 -0
- truthound/validators/distribution/monotonic.py +119 -0
- truthound/validators/distribution/outlier.py +178 -0
- truthound/validators/distribution/quantile.py +80 -0
- truthound/validators/distribution/range.py +254 -0
- truthound/validators/distribution/set.py +125 -0
- truthound/validators/distribution/statistical.py +459 -0
- truthound/validators/drift/__init__.py +79 -0
- truthound/validators/drift/base.py +427 -0
- truthound/validators/drift/multi_feature.py +401 -0
- truthound/validators/drift/numeric.py +395 -0
- truthound/validators/drift/psi.py +446 -0
- truthound/validators/drift/statistical.py +510 -0
- truthound/validators/enterprise.py +1658 -0
- truthound/validators/geospatial/__init__.py +80 -0
- truthound/validators/geospatial/base.py +97 -0
- truthound/validators/geospatial/boundary.py +238 -0
- truthound/validators/geospatial/coordinate.py +351 -0
- truthound/validators/geospatial/distance.py +399 -0
- truthound/validators/geospatial/polygon.py +665 -0
- truthound/validators/i18n/__init__.py +308 -0
- truthound/validators/i18n/bidi.py +571 -0
- truthound/validators/i18n/catalogs.py +570 -0
- truthound/validators/i18n/dialects.py +763 -0
- truthound/validators/i18n/extended_catalogs.py +549 -0
- truthound/validators/i18n/formatting.py +1434 -0
- truthound/validators/i18n/loader.py +1020 -0
- truthound/validators/i18n/messages.py +521 -0
- truthound/validators/i18n/plural.py +683 -0
- truthound/validators/i18n/protocols.py +855 -0
- truthound/validators/i18n/tms.py +1162 -0
- truthound/validators/localization/__init__.py +53 -0
- truthound/validators/localization/base.py +122 -0
- truthound/validators/localization/chinese.py +362 -0
- truthound/validators/localization/japanese.py +275 -0
- truthound/validators/localization/korean.py +524 -0
- truthound/validators/memory/__init__.py +94 -0
- truthound/validators/memory/approximate_knn.py +506 -0
- truthound/validators/memory/base.py +547 -0
- truthound/validators/memory/sgd_online.py +719 -0
- truthound/validators/memory/streaming_ecdf.py +753 -0
- truthound/validators/ml_feature/__init__.py +54 -0
- truthound/validators/ml_feature/base.py +249 -0
- truthound/validators/ml_feature/correlation.py +299 -0
- truthound/validators/ml_feature/leakage.py +344 -0
- truthound/validators/ml_feature/null_impact.py +270 -0
- truthound/validators/ml_feature/scale.py +264 -0
- truthound/validators/multi_column/__init__.py +89 -0
- truthound/validators/multi_column/arithmetic.py +284 -0
- truthound/validators/multi_column/base.py +231 -0
- truthound/validators/multi_column/comparison.py +273 -0
- truthound/validators/multi_column/consistency.py +312 -0
- truthound/validators/multi_column/statistical.py +299 -0
- truthound/validators/optimization/__init__.py +164 -0
- truthound/validators/optimization/aggregation.py +563 -0
- truthound/validators/optimization/covariance.py +556 -0
- truthound/validators/optimization/geo.py +626 -0
- truthound/validators/optimization/graph.py +587 -0
- truthound/validators/optimization/orchestrator.py +970 -0
- truthound/validators/optimization/profiling.py +1312 -0
- truthound/validators/privacy/__init__.py +223 -0
- truthound/validators/privacy/base.py +635 -0
- truthound/validators/privacy/ccpa.py +670 -0
- truthound/validators/privacy/gdpr.py +728 -0
- truthound/validators/privacy/global_patterns.py +604 -0
- truthound/validators/privacy/plugins.py +867 -0
- truthound/validators/profiling/__init__.py +52 -0
- truthound/validators/profiling/base.py +175 -0
- truthound/validators/profiling/cardinality.py +312 -0
- truthound/validators/profiling/entropy.py +391 -0
- truthound/validators/profiling/frequency.py +455 -0
- truthound/validators/pushdown_support.py +660 -0
- truthound/validators/query/__init__.py +91 -0
- truthound/validators/query/aggregate.py +346 -0
- truthound/validators/query/base.py +246 -0
- truthound/validators/query/column.py +249 -0
- truthound/validators/query/expression.py +274 -0
- truthound/validators/query/result.py +323 -0
- truthound/validators/query/row_count.py +264 -0
- truthound/validators/referential/__init__.py +80 -0
- truthound/validators/referential/base.py +395 -0
- truthound/validators/referential/cascade.py +391 -0
- truthound/validators/referential/circular.py +563 -0
- truthound/validators/referential/foreign_key.py +624 -0
- truthound/validators/referential/orphan.py +485 -0
- truthound/validators/registry.py +112 -0
- truthound/validators/schema/__init__.py +41 -0
- truthound/validators/schema/column_count.py +142 -0
- truthound/validators/schema/column_exists.py +80 -0
- truthound/validators/schema/column_order.py +82 -0
- truthound/validators/schema/column_pair.py +85 -0
- truthound/validators/schema/column_pair_set.py +195 -0
- truthound/validators/schema/column_type.py +94 -0
- truthound/validators/schema/multi_column.py +53 -0
- truthound/validators/schema/multi_column_aggregate.py +175 -0
- truthound/validators/schema/referential.py +274 -0
- truthound/validators/schema/table_schema.py +91 -0
- truthound/validators/schema_validator.py +219 -0
- truthound/validators/sdk/__init__.py +250 -0
- truthound/validators/sdk/builder.py +680 -0
- truthound/validators/sdk/decorators.py +474 -0
- truthound/validators/sdk/enterprise/__init__.py +211 -0
- truthound/validators/sdk/enterprise/docs.py +725 -0
- truthound/validators/sdk/enterprise/fuzzing.py +659 -0
- truthound/validators/sdk/enterprise/licensing.py +709 -0
- truthound/validators/sdk/enterprise/manager.py +543 -0
- truthound/validators/sdk/enterprise/resources.py +628 -0
- truthound/validators/sdk/enterprise/sandbox.py +766 -0
- truthound/validators/sdk/enterprise/signing.py +603 -0
- truthound/validators/sdk/enterprise/templates.py +865 -0
- truthound/validators/sdk/enterprise/versioning.py +659 -0
- truthound/validators/sdk/templates.py +757 -0
- truthound/validators/sdk/testing.py +807 -0
- truthound/validators/security/__init__.py +181 -0
- truthound/validators/security/redos/__init__.py +182 -0
- truthound/validators/security/redos/core.py +861 -0
- truthound/validators/security/redos/cpu_monitor.py +593 -0
- truthound/validators/security/redos/cve_database.py +791 -0
- truthound/validators/security/redos/ml/__init__.py +155 -0
- truthound/validators/security/redos/ml/base.py +785 -0
- truthound/validators/security/redos/ml/datasets.py +618 -0
- truthound/validators/security/redos/ml/features.py +359 -0
- truthound/validators/security/redos/ml/models.py +1000 -0
- truthound/validators/security/redos/ml/predictor.py +507 -0
- truthound/validators/security/redos/ml/storage.py +632 -0
- truthound/validators/security/redos/ml/training.py +571 -0
- truthound/validators/security/redos/ml_analyzer.py +937 -0
- truthound/validators/security/redos/optimizer.py +674 -0
- truthound/validators/security/redos/profiler.py +682 -0
- truthound/validators/security/redos/re2_engine.py +709 -0
- truthound/validators/security/redos.py +886 -0
- truthound/validators/security/sql_security.py +1247 -0
- truthound/validators/streaming/__init__.py +126 -0
- truthound/validators/streaming/base.py +292 -0
- truthound/validators/streaming/completeness.py +210 -0
- truthound/validators/streaming/mixin.py +575 -0
- truthound/validators/streaming/range.py +308 -0
- truthound/validators/streaming/sources.py +846 -0
- truthound/validators/string/__init__.py +57 -0
- truthound/validators/string/casing.py +158 -0
- truthound/validators/string/charset.py +96 -0
- truthound/validators/string/format.py +501 -0
- truthound/validators/string/json.py +77 -0
- truthound/validators/string/json_schema.py +184 -0
- truthound/validators/string/length.py +104 -0
- truthound/validators/string/like_pattern.py +237 -0
- truthound/validators/string/regex.py +202 -0
- truthound/validators/string/regex_extended.py +435 -0
- truthound/validators/table/__init__.py +88 -0
- truthound/validators/table/base.py +78 -0
- truthound/validators/table/column_count.py +198 -0
- truthound/validators/table/freshness.py +362 -0
- truthound/validators/table/row_count.py +251 -0
- truthound/validators/table/schema.py +333 -0
- truthound/validators/table/size.py +285 -0
- truthound/validators/timeout/__init__.py +102 -0
- truthound/validators/timeout/advanced/__init__.py +247 -0
- truthound/validators/timeout/advanced/circuit_breaker.py +675 -0
- truthound/validators/timeout/advanced/prediction.py +773 -0
- truthound/validators/timeout/advanced/priority.py +618 -0
- truthound/validators/timeout/advanced/redis_backend.py +770 -0
- truthound/validators/timeout/advanced/retry.py +721 -0
- truthound/validators/timeout/advanced/sampling.py +788 -0
- truthound/validators/timeout/advanced/sla.py +661 -0
- truthound/validators/timeout/advanced/telemetry.py +804 -0
- truthound/validators/timeout/cascade.py +477 -0
- truthound/validators/timeout/deadline.py +657 -0
- truthound/validators/timeout/degradation.py +525 -0
- truthound/validators/timeout/distributed.py +597 -0
- truthound/validators/timeseries/__init__.py +89 -0
- truthound/validators/timeseries/base.py +326 -0
- truthound/validators/timeseries/completeness.py +617 -0
- truthound/validators/timeseries/gap.py +485 -0
- truthound/validators/timeseries/monotonic.py +310 -0
- truthound/validators/timeseries/seasonality.py +422 -0
- truthound/validators/timeseries/trend.py +510 -0
- truthound/validators/uniqueness/__init__.py +59 -0
- truthound/validators/uniqueness/approximate.py +475 -0
- truthound/validators/uniqueness/distinct_values.py +253 -0
- truthound/validators/uniqueness/duplicate.py +118 -0
- truthound/validators/uniqueness/primary_key.py +140 -0
- truthound/validators/uniqueness/unique.py +191 -0
- truthound/validators/uniqueness/within_record.py +599 -0
- truthound/validators/utils.py +756 -0
- truthound-1.0.8.dist-info/METADATA +474 -0
- truthound-1.0.8.dist-info/RECORD +877 -0
- truthound-1.0.8.dist-info/WHEEL +4 -0
- truthound-1.0.8.dist-info/entry_points.txt +2 -0
- truthound-1.0.8.dist-info/licenses/LICENSE +190 -0
truthound/cli.py
ADDED
|
@@ -0,0 +1,2358 @@
|
|
|
1
|
+
"""Command-line interface for Truthound."""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import Annotated, Optional
|
|
6
|
+
|
|
7
|
+
import typer
|
|
8
|
+
|
|
9
|
+
from truthound.api import check, mask, profile, scan
|
|
10
|
+
from truthound.drift import compare
|
|
11
|
+
from truthound.schema import learn
|
|
12
|
+
|
|
13
|
+
# Phase 7: Auto-profiling imports (lazy loaded to avoid startup overhead)
|
|
14
|
+
|
|
15
|
+
def _version_callback(value: bool) -> None:
|
|
16
|
+
"""Print version and exit."""
|
|
17
|
+
if value:
|
|
18
|
+
from truthound import __version__
|
|
19
|
+
typer.echo(f"truthound {__version__}")
|
|
20
|
+
raise typer.Exit()
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
app = typer.Typer(
|
|
24
|
+
name="truthound",
|
|
25
|
+
help="Zero-Configuration Data Quality Framework Powered by Polars",
|
|
26
|
+
add_completion=False,
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
@app.callback()
|
|
31
|
+
def main(
|
|
32
|
+
version: Annotated[
|
|
33
|
+
bool,
|
|
34
|
+
typer.Option(
|
|
35
|
+
"--version",
|
|
36
|
+
"-V",
|
|
37
|
+
callback=_version_callback,
|
|
38
|
+
is_eager=True,
|
|
39
|
+
help="Show version and exit.",
|
|
40
|
+
),
|
|
41
|
+
] = False,
|
|
42
|
+
) -> None:
|
|
43
|
+
"""Truthound - Zero-Configuration Data Quality Framework."""
|
|
44
|
+
pass
|
|
45
|
+
|
|
46
|
+
# Create checkpoint subcommand group
|
|
47
|
+
checkpoint_app = typer.Typer(
|
|
48
|
+
name="checkpoint",
|
|
49
|
+
help="Checkpoint and CI/CD integration commands",
|
|
50
|
+
)
|
|
51
|
+
app.add_typer(checkpoint_app, name="checkpoint")
|
|
52
|
+
|
|
53
|
+
# Phase 9: Plugin management commands
|
|
54
|
+
from truthound.plugins.cli import app as plugin_app
|
|
55
|
+
app.add_typer(plugin_app, name="plugin")
|
|
56
|
+
|
|
57
|
+
# Scaffolding commands (th new validator, th new reporter, th new plugin)
|
|
58
|
+
from truthound.cli_modules.scaffolding.commands import app as new_app
|
|
59
|
+
app.add_typer(new_app, name="new")
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
@app.command(name="learn")
|
|
63
|
+
def learn_cmd(
|
|
64
|
+
file: Annotated[Path, typer.Argument(help="Path to the data file to learn from")],
|
|
65
|
+
output: Annotated[
|
|
66
|
+
Path,
|
|
67
|
+
typer.Option("--output", "-o", help="Output schema file path"),
|
|
68
|
+
] = Path("schema.yaml"),
|
|
69
|
+
no_constraints: Annotated[
|
|
70
|
+
bool,
|
|
71
|
+
typer.Option("--no-constraints", help="Don't infer constraints from data"),
|
|
72
|
+
] = False,
|
|
73
|
+
) -> None:
|
|
74
|
+
"""Learn schema from a data file."""
|
|
75
|
+
if not file.exists():
|
|
76
|
+
typer.echo(f"Error: File not found: {file}", err=True)
|
|
77
|
+
raise typer.Exit(1)
|
|
78
|
+
|
|
79
|
+
try:
|
|
80
|
+
schema = learn(str(file), infer_constraints=not no_constraints)
|
|
81
|
+
schema.save(output)
|
|
82
|
+
typer.echo(f"Schema saved to {output}")
|
|
83
|
+
typer.echo(f" Columns: {len(schema.columns)}")
|
|
84
|
+
typer.echo(f" Rows: {schema.row_count:,}")
|
|
85
|
+
except Exception as e:
|
|
86
|
+
typer.echo(f"Error: {e}", err=True)
|
|
87
|
+
raise typer.Exit(1)
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
@app.command(name="check")
|
|
91
|
+
def check_cmd(
|
|
92
|
+
file: Annotated[Path, typer.Argument(help="Path to the data file")],
|
|
93
|
+
validators: Annotated[
|
|
94
|
+
Optional[list[str]],
|
|
95
|
+
typer.Option("--validators", "-v", help="Comma-separated list of validators"),
|
|
96
|
+
] = None,
|
|
97
|
+
min_severity: Annotated[
|
|
98
|
+
Optional[str],
|
|
99
|
+
typer.Option("--min-severity", "-s", help="Minimum severity level (low, medium, high, critical)"),
|
|
100
|
+
] = None,
|
|
101
|
+
schema_file: Annotated[
|
|
102
|
+
Optional[Path],
|
|
103
|
+
typer.Option("--schema", help="Schema file for validation"),
|
|
104
|
+
] = None,
|
|
105
|
+
auto_schema: Annotated[
|
|
106
|
+
bool,
|
|
107
|
+
typer.Option("--auto-schema", help="Auto-learn and cache schema (zero-config mode)"),
|
|
108
|
+
] = False,
|
|
109
|
+
format: Annotated[
|
|
110
|
+
str,
|
|
111
|
+
typer.Option("--format", "-f", help="Output format (console, json, html)"),
|
|
112
|
+
] = "console",
|
|
113
|
+
output: Annotated[
|
|
114
|
+
Optional[Path],
|
|
115
|
+
typer.Option("--output", "-o", help="Output file path"),
|
|
116
|
+
] = None,
|
|
117
|
+
strict: Annotated[
|
|
118
|
+
bool,
|
|
119
|
+
typer.Option("--strict", help="Exit with code 1 if issues are found"),
|
|
120
|
+
] = False,
|
|
121
|
+
) -> None:
|
|
122
|
+
"""Validate data quality in a file."""
|
|
123
|
+
if not file.exists():
|
|
124
|
+
typer.echo(f"Error: File not found: {file}", err=True)
|
|
125
|
+
raise typer.Exit(1)
|
|
126
|
+
|
|
127
|
+
if schema_file and not schema_file.exists():
|
|
128
|
+
typer.echo(f"Error: Schema file not found: {schema_file}", err=True)
|
|
129
|
+
raise typer.Exit(1)
|
|
130
|
+
|
|
131
|
+
# Parse validators if provided
|
|
132
|
+
validator_list = None
|
|
133
|
+
if validators:
|
|
134
|
+
validator_list = [v.strip() for v in ",".join(validators).split(",")]
|
|
135
|
+
|
|
136
|
+
try:
|
|
137
|
+
report = check(
|
|
138
|
+
str(file),
|
|
139
|
+
validators=validator_list,
|
|
140
|
+
min_severity=min_severity,
|
|
141
|
+
schema=schema_file,
|
|
142
|
+
auto_schema=auto_schema,
|
|
143
|
+
)
|
|
144
|
+
except Exception as e:
|
|
145
|
+
typer.echo(f"Error: {e}", err=True)
|
|
146
|
+
raise typer.Exit(1)
|
|
147
|
+
|
|
148
|
+
# Output the report
|
|
149
|
+
if format == "json":
|
|
150
|
+
result = report.to_json()
|
|
151
|
+
if output:
|
|
152
|
+
output.write_text(result)
|
|
153
|
+
typer.echo(f"Report written to {output}")
|
|
154
|
+
else:
|
|
155
|
+
typer.echo(result)
|
|
156
|
+
elif format == "html":
|
|
157
|
+
if not output:
|
|
158
|
+
typer.echo("Error: --output is required for HTML format", err=True)
|
|
159
|
+
raise typer.Exit(1)
|
|
160
|
+
# HTML output requires jinja2
|
|
161
|
+
try:
|
|
162
|
+
from truthound.html_report import generate_html_report
|
|
163
|
+
|
|
164
|
+
html = generate_html_report(report)
|
|
165
|
+
output.write_text(html)
|
|
166
|
+
typer.echo(f"HTML report written to {output}")
|
|
167
|
+
except ImportError:
|
|
168
|
+
typer.echo("Error: HTML reports require jinja2. Install with: pip install truthound[reports]", err=True)
|
|
169
|
+
raise typer.Exit(1)
|
|
170
|
+
else:
|
|
171
|
+
report.print()
|
|
172
|
+
|
|
173
|
+
# Exit with error if strict mode and issues found
|
|
174
|
+
if strict and report.has_issues:
|
|
175
|
+
raise typer.Exit(1)
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
@app.command(name="scan")
|
|
179
|
+
def scan_cmd(
|
|
180
|
+
file: Annotated[Path, typer.Argument(help="Path to the data file")],
|
|
181
|
+
format: Annotated[
|
|
182
|
+
str,
|
|
183
|
+
typer.Option("--format", "-f", help="Output format (console, json)"),
|
|
184
|
+
] = "console",
|
|
185
|
+
output: Annotated[
|
|
186
|
+
Optional[Path],
|
|
187
|
+
typer.Option("--output", "-o", help="Output file path"),
|
|
188
|
+
] = None,
|
|
189
|
+
) -> None:
|
|
190
|
+
"""Scan for personally identifiable information (PII)."""
|
|
191
|
+
if not file.exists():
|
|
192
|
+
typer.echo(f"Error: File not found: {file}", err=True)
|
|
193
|
+
raise typer.Exit(1)
|
|
194
|
+
|
|
195
|
+
try:
|
|
196
|
+
pii_report = scan(str(file))
|
|
197
|
+
except Exception as e:
|
|
198
|
+
typer.echo(f"Error: {e}", err=True)
|
|
199
|
+
raise typer.Exit(1)
|
|
200
|
+
|
|
201
|
+
if format == "json":
|
|
202
|
+
result = pii_report.to_json()
|
|
203
|
+
if output:
|
|
204
|
+
output.write_text(result)
|
|
205
|
+
typer.echo(f"Report written to {output}")
|
|
206
|
+
else:
|
|
207
|
+
typer.echo(result)
|
|
208
|
+
else:
|
|
209
|
+
pii_report.print()
|
|
210
|
+
|
|
211
|
+
|
|
212
|
+
@app.command(name="mask")
|
|
213
|
+
def mask_cmd(
|
|
214
|
+
file: Annotated[Path, typer.Argument(help="Path to the data file")],
|
|
215
|
+
output: Annotated[
|
|
216
|
+
Path,
|
|
217
|
+
typer.Option("--output", "-o", help="Output file path"),
|
|
218
|
+
],
|
|
219
|
+
columns: Annotated[
|
|
220
|
+
Optional[list[str]],
|
|
221
|
+
typer.Option("--columns", "-c", help="Columns to mask (comma-separated)"),
|
|
222
|
+
] = None,
|
|
223
|
+
strategy: Annotated[
|
|
224
|
+
str,
|
|
225
|
+
typer.Option("--strategy", "-s", help="Masking strategy (redact, hash, fake)"),
|
|
226
|
+
] = "redact",
|
|
227
|
+
) -> None:
|
|
228
|
+
"""Mask sensitive data in a file."""
|
|
229
|
+
if not file.exists():
|
|
230
|
+
typer.echo(f"Error: File not found: {file}", err=True)
|
|
231
|
+
raise typer.Exit(1)
|
|
232
|
+
|
|
233
|
+
# Parse columns if provided
|
|
234
|
+
column_list = None
|
|
235
|
+
if columns:
|
|
236
|
+
column_list = [c.strip() for c in ",".join(columns).split(",")]
|
|
237
|
+
|
|
238
|
+
try:
|
|
239
|
+
masked_df = mask(str(file), columns=column_list, strategy=strategy)
|
|
240
|
+
except Exception as e:
|
|
241
|
+
typer.echo(f"Error: {e}", err=True)
|
|
242
|
+
raise typer.Exit(1)
|
|
243
|
+
|
|
244
|
+
# Write output based on file extension
|
|
245
|
+
suffix = output.suffix.lower()
|
|
246
|
+
if suffix == ".csv":
|
|
247
|
+
masked_df.write_csv(output)
|
|
248
|
+
elif suffix == ".parquet":
|
|
249
|
+
masked_df.write_parquet(output)
|
|
250
|
+
elif suffix == ".json":
|
|
251
|
+
masked_df.write_json(output)
|
|
252
|
+
else:
|
|
253
|
+
masked_df.write_csv(output)
|
|
254
|
+
|
|
255
|
+
typer.echo(f"Masked data written to {output}")
|
|
256
|
+
|
|
257
|
+
|
|
258
|
+
@app.command(name="profile")
|
|
259
|
+
def profile_cmd(
|
|
260
|
+
file: Annotated[Path, typer.Argument(help="Path to the data file")],
|
|
261
|
+
format: Annotated[
|
|
262
|
+
str,
|
|
263
|
+
typer.Option("--format", "-f", help="Output format (console, json)"),
|
|
264
|
+
] = "console",
|
|
265
|
+
output: Annotated[
|
|
266
|
+
Optional[Path],
|
|
267
|
+
typer.Option("--output", "-o", help="Output file path"),
|
|
268
|
+
] = None,
|
|
269
|
+
) -> None:
|
|
270
|
+
"""Generate a statistical profile of the data."""
|
|
271
|
+
if not file.exists():
|
|
272
|
+
typer.echo(f"Error: File not found: {file}", err=True)
|
|
273
|
+
raise typer.Exit(1)
|
|
274
|
+
|
|
275
|
+
try:
|
|
276
|
+
profile_report = profile(str(file))
|
|
277
|
+
except Exception as e:
|
|
278
|
+
typer.echo(f"Error: {e}", err=True)
|
|
279
|
+
raise typer.Exit(1)
|
|
280
|
+
|
|
281
|
+
if format == "json":
|
|
282
|
+
result = profile_report.to_json()
|
|
283
|
+
if output:
|
|
284
|
+
output.write_text(result)
|
|
285
|
+
typer.echo(f"Profile written to {output}")
|
|
286
|
+
else:
|
|
287
|
+
typer.echo(result)
|
|
288
|
+
else:
|
|
289
|
+
profile_report.print()
|
|
290
|
+
|
|
291
|
+
|
|
292
|
+
@app.command(name="compare")
|
|
293
|
+
def compare_cmd(
|
|
294
|
+
baseline: Annotated[Path, typer.Argument(help="Baseline (reference) data file")],
|
|
295
|
+
current: Annotated[Path, typer.Argument(help="Current data file to compare")],
|
|
296
|
+
columns: Annotated[
|
|
297
|
+
Optional[list[str]],
|
|
298
|
+
typer.Option("--columns", "-c", help="Columns to compare (comma-separated)"),
|
|
299
|
+
] = None,
|
|
300
|
+
method: Annotated[
|
|
301
|
+
str,
|
|
302
|
+
typer.Option("--method", "-m", help="Detection method (auto, ks, psi, chi2, js)"),
|
|
303
|
+
] = "auto",
|
|
304
|
+
threshold: Annotated[
|
|
305
|
+
Optional[float],
|
|
306
|
+
typer.Option("--threshold", "-t", help="Custom drift threshold"),
|
|
307
|
+
] = None,
|
|
308
|
+
format: Annotated[
|
|
309
|
+
str,
|
|
310
|
+
typer.Option("--format", "-f", help="Output format (console, json)"),
|
|
311
|
+
] = "console",
|
|
312
|
+
output: Annotated[
|
|
313
|
+
Optional[Path],
|
|
314
|
+
typer.Option("--output", "-o", help="Output file path"),
|
|
315
|
+
] = None,
|
|
316
|
+
strict: Annotated[
|
|
317
|
+
bool,
|
|
318
|
+
typer.Option("--strict", help="Exit with code 1 if drift is detected"),
|
|
319
|
+
] = False,
|
|
320
|
+
) -> None:
|
|
321
|
+
"""Compare two datasets and detect data drift."""
|
|
322
|
+
if not baseline.exists():
|
|
323
|
+
typer.echo(f"Error: Baseline file not found: {baseline}", err=True)
|
|
324
|
+
raise typer.Exit(1)
|
|
325
|
+
|
|
326
|
+
if not current.exists():
|
|
327
|
+
typer.echo(f"Error: Current file not found: {current}", err=True)
|
|
328
|
+
raise typer.Exit(1)
|
|
329
|
+
|
|
330
|
+
# Parse columns if provided
|
|
331
|
+
column_list = None
|
|
332
|
+
if columns:
|
|
333
|
+
column_list = [c.strip() for c in ",".join(columns).split(",")]
|
|
334
|
+
|
|
335
|
+
try:
|
|
336
|
+
drift_report = compare(
|
|
337
|
+
str(baseline),
|
|
338
|
+
str(current),
|
|
339
|
+
columns=column_list,
|
|
340
|
+
method=method,
|
|
341
|
+
threshold=threshold,
|
|
342
|
+
)
|
|
343
|
+
except Exception as e:
|
|
344
|
+
typer.echo(f"Error: {e}", err=True)
|
|
345
|
+
raise typer.Exit(1)
|
|
346
|
+
|
|
347
|
+
if format == "json":
|
|
348
|
+
result = drift_report.to_json()
|
|
349
|
+
if output:
|
|
350
|
+
output.write_text(result)
|
|
351
|
+
typer.echo(f"Drift report written to {output}")
|
|
352
|
+
else:
|
|
353
|
+
typer.echo(result)
|
|
354
|
+
else:
|
|
355
|
+
drift_report.print()
|
|
356
|
+
|
|
357
|
+
# Exit with error if strict mode and drift found
|
|
358
|
+
if strict and drift_report.has_drift:
|
|
359
|
+
raise typer.Exit(1)
|
|
360
|
+
|
|
361
|
+
|
|
362
|
+
# =============================================================================
|
|
363
|
+
# Checkpoint Commands
|
|
364
|
+
# =============================================================================
|
|
365
|
+
|
|
366
|
+
|
|
367
|
+
@checkpoint_app.command(name="run")
|
|
368
|
+
def checkpoint_run_cmd(
|
|
369
|
+
name: Annotated[str, typer.Argument(help="Name of checkpoint to run")],
|
|
370
|
+
config_file: Annotated[
|
|
371
|
+
Optional[Path],
|
|
372
|
+
typer.Option("--config", "-c", help="Checkpoint configuration file (YAML/JSON)"),
|
|
373
|
+
] = None,
|
|
374
|
+
data_source: Annotated[
|
|
375
|
+
Optional[Path],
|
|
376
|
+
typer.Option("--data", "-d", help="Override data source path"),
|
|
377
|
+
] = None,
|
|
378
|
+
validators: Annotated[
|
|
379
|
+
Optional[list[str]],
|
|
380
|
+
typer.Option("--validators", "-v", help="Override validators (comma-separated)"),
|
|
381
|
+
] = None,
|
|
382
|
+
output: Annotated[
|
|
383
|
+
Optional[Path],
|
|
384
|
+
typer.Option("--output", "-o", help="Output file for results (JSON)"),
|
|
385
|
+
] = None,
|
|
386
|
+
format: Annotated[
|
|
387
|
+
str,
|
|
388
|
+
typer.Option("--format", "-f", help="Output format (console, json)"),
|
|
389
|
+
] = "console",
|
|
390
|
+
strict: Annotated[
|
|
391
|
+
bool,
|
|
392
|
+
typer.Option("--strict", help="Exit with code 1 if issues are found"),
|
|
393
|
+
] = False,
|
|
394
|
+
store_result: Annotated[
|
|
395
|
+
Optional[Path],
|
|
396
|
+
typer.Option("--store", help="Store results to directory"),
|
|
397
|
+
] = None,
|
|
398
|
+
notify_slack: Annotated[
|
|
399
|
+
Optional[str],
|
|
400
|
+
typer.Option("--slack", help="Slack webhook URL for notifications"),
|
|
401
|
+
] = None,
|
|
402
|
+
notify_webhook: Annotated[
|
|
403
|
+
Optional[str],
|
|
404
|
+
typer.Option("--webhook", help="Webhook URL for notifications"),
|
|
405
|
+
] = None,
|
|
406
|
+
github_summary: Annotated[
|
|
407
|
+
bool,
|
|
408
|
+
typer.Option("--github-summary", help="Write GitHub Actions job summary"),
|
|
409
|
+
] = False,
|
|
410
|
+
) -> None:
|
|
411
|
+
"""Run a checkpoint validation pipeline."""
|
|
412
|
+
from truthound.checkpoint import Checkpoint, CheckpointRegistry
|
|
413
|
+
from truthound.checkpoint.actions import (
|
|
414
|
+
StoreValidationResult,
|
|
415
|
+
SlackNotification,
|
|
416
|
+
WebhookAction,
|
|
417
|
+
GitHubAction,
|
|
418
|
+
)
|
|
419
|
+
|
|
420
|
+
try:
|
|
421
|
+
# Load from config file or create ad-hoc
|
|
422
|
+
if config_file:
|
|
423
|
+
if not config_file.exists():
|
|
424
|
+
typer.echo(f"Error: Config file not found: {config_file}", err=True)
|
|
425
|
+
raise typer.Exit(1)
|
|
426
|
+
|
|
427
|
+
registry = CheckpointRegistry()
|
|
428
|
+
registry.load_from_yaml(config_file) if config_file.suffix in (".yaml", ".yml") else registry.load_from_json(config_file)
|
|
429
|
+
|
|
430
|
+
if name not in registry:
|
|
431
|
+
typer.echo(f"Error: Checkpoint '{name}' not found in config", err=True)
|
|
432
|
+
typer.echo(f"Available: {', '.join(registry.list_names())}")
|
|
433
|
+
raise typer.Exit(1)
|
|
434
|
+
|
|
435
|
+
checkpoint = registry.get(name)
|
|
436
|
+
else:
|
|
437
|
+
# Create ad-hoc checkpoint
|
|
438
|
+
if not data_source:
|
|
439
|
+
typer.echo("Error: --data is required when not using config file", err=True)
|
|
440
|
+
raise typer.Exit(1)
|
|
441
|
+
|
|
442
|
+
if not data_source.exists():
|
|
443
|
+
typer.echo(f"Error: Data file not found: {data_source}", err=True)
|
|
444
|
+
raise typer.Exit(1)
|
|
445
|
+
|
|
446
|
+
validator_list = None
|
|
447
|
+
if validators:
|
|
448
|
+
validator_list = [v.strip() for v in ",".join(validators).split(",")]
|
|
449
|
+
|
|
450
|
+
actions = []
|
|
451
|
+
|
|
452
|
+
# Add actions based on CLI options
|
|
453
|
+
if store_result:
|
|
454
|
+
actions.append(StoreValidationResult(store_path=str(store_result)))
|
|
455
|
+
|
|
456
|
+
if notify_slack:
|
|
457
|
+
actions.append(SlackNotification(
|
|
458
|
+
webhook_url=notify_slack,
|
|
459
|
+
notify_on="failure",
|
|
460
|
+
))
|
|
461
|
+
|
|
462
|
+
if notify_webhook:
|
|
463
|
+
actions.append(WebhookAction(url=notify_webhook))
|
|
464
|
+
|
|
465
|
+
if github_summary:
|
|
466
|
+
actions.append(GitHubAction(
|
|
467
|
+
set_summary=True,
|
|
468
|
+
set_output=True,
|
|
469
|
+
))
|
|
470
|
+
|
|
471
|
+
checkpoint = Checkpoint(
|
|
472
|
+
name=name,
|
|
473
|
+
data_source=str(data_source),
|
|
474
|
+
validators=validator_list,
|
|
475
|
+
actions=actions,
|
|
476
|
+
)
|
|
477
|
+
|
|
478
|
+
# Run checkpoint
|
|
479
|
+
result = checkpoint.run()
|
|
480
|
+
|
|
481
|
+
# Output results
|
|
482
|
+
if format == "json":
|
|
483
|
+
result_json = json.dumps(result.to_dict(), indent=2, default=str)
|
|
484
|
+
if output:
|
|
485
|
+
output.write_text(result_json)
|
|
486
|
+
typer.echo(f"Results written to {output}")
|
|
487
|
+
else:
|
|
488
|
+
typer.echo(result_json)
|
|
489
|
+
else:
|
|
490
|
+
typer.echo(result.summary())
|
|
491
|
+
|
|
492
|
+
# Exit code based on status
|
|
493
|
+
if strict and result.status.value in ("failure", "error"):
|
|
494
|
+
raise typer.Exit(1)
|
|
495
|
+
|
|
496
|
+
except typer.Exit:
|
|
497
|
+
raise
|
|
498
|
+
except Exception as e:
|
|
499
|
+
typer.echo(f"Error: {e}", err=True)
|
|
500
|
+
raise typer.Exit(1)
|
|
501
|
+
|
|
502
|
+
|
|
503
|
+
@checkpoint_app.command(name="list")
|
|
504
|
+
def checkpoint_list_cmd(
|
|
505
|
+
config_file: Annotated[
|
|
506
|
+
Optional[Path],
|
|
507
|
+
typer.Option("--config", "-c", help="Checkpoint configuration file"),
|
|
508
|
+
] = None,
|
|
509
|
+
format: Annotated[
|
|
510
|
+
str,
|
|
511
|
+
typer.Option("--format", "-f", help="Output format (console, json)"),
|
|
512
|
+
] = "console",
|
|
513
|
+
) -> None:
|
|
514
|
+
"""List available checkpoints."""
|
|
515
|
+
from truthound.checkpoint import CheckpointRegistry
|
|
516
|
+
|
|
517
|
+
try:
|
|
518
|
+
registry = CheckpointRegistry()
|
|
519
|
+
|
|
520
|
+
if config_file:
|
|
521
|
+
if not config_file.exists():
|
|
522
|
+
typer.echo(f"Error: Config file not found: {config_file}", err=True)
|
|
523
|
+
raise typer.Exit(1)
|
|
524
|
+
|
|
525
|
+
if config_file.suffix in (".yaml", ".yml"):
|
|
526
|
+
registry.load_from_yaml(config_file)
|
|
527
|
+
else:
|
|
528
|
+
registry.load_from_json(config_file)
|
|
529
|
+
|
|
530
|
+
checkpoints = registry.list_all()
|
|
531
|
+
|
|
532
|
+
if not checkpoints:
|
|
533
|
+
typer.echo("No checkpoints registered.")
|
|
534
|
+
return
|
|
535
|
+
|
|
536
|
+
if format == "json":
|
|
537
|
+
result = json.dumps([cp.to_dict() for cp in checkpoints], indent=2)
|
|
538
|
+
typer.echo(result)
|
|
539
|
+
else:
|
|
540
|
+
typer.echo(f"Checkpoints ({len(checkpoints)}):")
|
|
541
|
+
for cp in checkpoints:
|
|
542
|
+
typer.echo(f" - {cp.name}")
|
|
543
|
+
typer.echo(f" Data: {cp.config.data_source}")
|
|
544
|
+
typer.echo(f" Actions: {len(cp.actions)}")
|
|
545
|
+
typer.echo(f" Triggers: {len(cp.triggers)}")
|
|
546
|
+
|
|
547
|
+
except typer.Exit:
|
|
548
|
+
raise
|
|
549
|
+
except Exception as e:
|
|
550
|
+
typer.echo(f"Error: {e}", err=True)
|
|
551
|
+
raise typer.Exit(1)
|
|
552
|
+
|
|
553
|
+
|
|
554
|
+
@checkpoint_app.command(name="validate")
|
|
555
|
+
def checkpoint_validate_cmd(
|
|
556
|
+
config_file: Annotated[
|
|
557
|
+
Path,
|
|
558
|
+
typer.Argument(help="Checkpoint configuration file to validate"),
|
|
559
|
+
],
|
|
560
|
+
) -> None:
|
|
561
|
+
"""Validate a checkpoint configuration file."""
|
|
562
|
+
from truthound.checkpoint import CheckpointRegistry
|
|
563
|
+
|
|
564
|
+
try:
|
|
565
|
+
if not config_file.exists():
|
|
566
|
+
typer.echo(f"Error: Config file not found: {config_file}", err=True)
|
|
567
|
+
raise typer.Exit(1)
|
|
568
|
+
|
|
569
|
+
registry = CheckpointRegistry()
|
|
570
|
+
|
|
571
|
+
if config_file.suffix in (".yaml", ".yml"):
|
|
572
|
+
checkpoints = registry.load_from_yaml(config_file)
|
|
573
|
+
else:
|
|
574
|
+
checkpoints = registry.load_from_json(config_file)
|
|
575
|
+
|
|
576
|
+
all_valid = True
|
|
577
|
+
|
|
578
|
+
for cp in checkpoints:
|
|
579
|
+
errors = cp.validate()
|
|
580
|
+
if errors:
|
|
581
|
+
all_valid = False
|
|
582
|
+
typer.echo(f"Checkpoint '{cp.name}' has errors:")
|
|
583
|
+
for err in errors:
|
|
584
|
+
typer.echo(f" - {err}")
|
|
585
|
+
else:
|
|
586
|
+
typer.echo(f"Checkpoint '{cp.name}' is valid")
|
|
587
|
+
|
|
588
|
+
if all_valid:
|
|
589
|
+
typer.echo(f"\nAll {len(checkpoints)} checkpoint(s) are valid.")
|
|
590
|
+
else:
|
|
591
|
+
typer.echo("\nSome checkpoints have validation errors.", err=True)
|
|
592
|
+
raise typer.Exit(1)
|
|
593
|
+
|
|
594
|
+
except typer.Exit:
|
|
595
|
+
raise
|
|
596
|
+
except Exception as e:
|
|
597
|
+
typer.echo(f"Error: {e}", err=True)
|
|
598
|
+
raise typer.Exit(1)
|
|
599
|
+
|
|
600
|
+
|
|
601
|
+
@checkpoint_app.command(name="init")
|
|
602
|
+
def checkpoint_init_cmd(
|
|
603
|
+
output: Annotated[
|
|
604
|
+
Path,
|
|
605
|
+
typer.Option("--output", "-o", help="Output file path"),
|
|
606
|
+
] = Path("truthound.yaml"),
|
|
607
|
+
format: Annotated[
|
|
608
|
+
str,
|
|
609
|
+
typer.Option("--format", "-f", help="Config format (yaml, json)"),
|
|
610
|
+
] = "yaml",
|
|
611
|
+
) -> None:
|
|
612
|
+
"""Initialize a sample checkpoint configuration file."""
|
|
613
|
+
import yaml
|
|
614
|
+
|
|
615
|
+
sample_config = {
|
|
616
|
+
"checkpoints": [
|
|
617
|
+
{
|
|
618
|
+
"name": "daily_data_validation",
|
|
619
|
+
"data_source": "data/production.csv",
|
|
620
|
+
"validators": ["null", "duplicate", "range", "regex"],
|
|
621
|
+
"min_severity": "medium",
|
|
622
|
+
"auto_schema": True,
|
|
623
|
+
"tags": {
|
|
624
|
+
"environment": "production",
|
|
625
|
+
"team": "data-platform",
|
|
626
|
+
},
|
|
627
|
+
"actions": [
|
|
628
|
+
{
|
|
629
|
+
"type": "store_result",
|
|
630
|
+
"store_path": "./truthound_results",
|
|
631
|
+
"partition_by": "date",
|
|
632
|
+
},
|
|
633
|
+
{
|
|
634
|
+
"type": "update_docs",
|
|
635
|
+
"site_path": "./truthound_docs",
|
|
636
|
+
"include_history": True,
|
|
637
|
+
},
|
|
638
|
+
{
|
|
639
|
+
"type": "slack",
|
|
640
|
+
"webhook_url": "https://hooks.slack.com/services/YOUR/WEBHOOK/URL",
|
|
641
|
+
"notify_on": "failure",
|
|
642
|
+
"channel": "#data-quality",
|
|
643
|
+
},
|
|
644
|
+
],
|
|
645
|
+
"triggers": [
|
|
646
|
+
{
|
|
647
|
+
"type": "schedule",
|
|
648
|
+
"interval_hours": 24,
|
|
649
|
+
"run_on_weekdays": [0, 1, 2, 3, 4], # Mon-Fri
|
|
650
|
+
},
|
|
651
|
+
],
|
|
652
|
+
},
|
|
653
|
+
{
|
|
654
|
+
"name": "hourly_metrics_check",
|
|
655
|
+
"data_source": "data/metrics.parquet",
|
|
656
|
+
"validators": ["null", "range"],
|
|
657
|
+
"actions": [
|
|
658
|
+
{
|
|
659
|
+
"type": "webhook",
|
|
660
|
+
"url": "https://api.example.com/data-quality/events",
|
|
661
|
+
"auth_type": "bearer",
|
|
662
|
+
"auth_credentials": {"token": "${API_TOKEN}"},
|
|
663
|
+
},
|
|
664
|
+
],
|
|
665
|
+
"triggers": [
|
|
666
|
+
{
|
|
667
|
+
"type": "cron",
|
|
668
|
+
"expression": "0 * * * *", # Every hour
|
|
669
|
+
},
|
|
670
|
+
],
|
|
671
|
+
},
|
|
672
|
+
],
|
|
673
|
+
}
|
|
674
|
+
|
|
675
|
+
if format == "json":
|
|
676
|
+
output = output.with_suffix(".json")
|
|
677
|
+
output.write_text(json.dumps(sample_config, indent=2))
|
|
678
|
+
else:
|
|
679
|
+
output = output.with_suffix(".yaml")
|
|
680
|
+
import yaml
|
|
681
|
+
output.write_text(yaml.dump(sample_config, default_flow_style=False, sort_keys=False))
|
|
682
|
+
|
|
683
|
+
typer.echo(f"Sample checkpoint config created: {output}")
|
|
684
|
+
typer.echo("\nEdit the file to configure your checkpoints, then run:")
|
|
685
|
+
typer.echo(f" truthound checkpoint run <checkpoint_name> --config {output}")
|
|
686
|
+
|
|
687
|
+
|
|
688
|
+
# =============================================================================
|
|
689
|
+
# Auto-Profiling Commands (Phase 7)
|
|
690
|
+
# =============================================================================
|
|
691
|
+
|
|
692
|
+
|
|
693
|
+
@app.command(name="auto-profile")
|
|
694
|
+
def auto_profile_cmd(
|
|
695
|
+
file: Annotated[Path, typer.Argument(help="Path to the data file")],
|
|
696
|
+
output: Annotated[
|
|
697
|
+
Optional[Path],
|
|
698
|
+
typer.Option("--output", "-o", help="Output file path for profile JSON"),
|
|
699
|
+
] = None,
|
|
700
|
+
format: Annotated[
|
|
701
|
+
str,
|
|
702
|
+
typer.Option("--format", "-f", help="Output format (console, json, yaml)"),
|
|
703
|
+
] = "console",
|
|
704
|
+
include_patterns: Annotated[
|
|
705
|
+
bool,
|
|
706
|
+
typer.Option("--patterns/--no-patterns", help="Include pattern detection"),
|
|
707
|
+
] = True,
|
|
708
|
+
include_correlations: Annotated[
|
|
709
|
+
bool,
|
|
710
|
+
typer.Option("--correlations/--no-correlations", help="Include correlation analysis"),
|
|
711
|
+
] = False,
|
|
712
|
+
sample_size: Annotated[
|
|
713
|
+
Optional[int],
|
|
714
|
+
typer.Option("--sample", "-s", help="Sample size for profiling (default: all rows)"),
|
|
715
|
+
] = None,
|
|
716
|
+
top_n: Annotated[
|
|
717
|
+
int,
|
|
718
|
+
typer.Option("--top-n", help="Number of top/bottom values to include"),
|
|
719
|
+
] = 10,
|
|
720
|
+
) -> None:
|
|
721
|
+
"""Profile data with auto-detection of types and patterns.
|
|
722
|
+
|
|
723
|
+
This performs comprehensive profiling including:
|
|
724
|
+
- Column statistics (null ratio, unique ratio, distribution)
|
|
725
|
+
- Type inference (email, phone, UUID, etc.)
|
|
726
|
+
- Pattern detection
|
|
727
|
+
- Suggested validation rules
|
|
728
|
+
"""
|
|
729
|
+
if not file.exists():
|
|
730
|
+
typer.echo(f"Error: File not found: {file}", err=True)
|
|
731
|
+
raise typer.Exit(1)
|
|
732
|
+
|
|
733
|
+
try:
|
|
734
|
+
from truthound.profiler import (
|
|
735
|
+
DataProfiler,
|
|
736
|
+
ProfilerConfig,
|
|
737
|
+
profile_file,
|
|
738
|
+
save_profile,
|
|
739
|
+
)
|
|
740
|
+
|
|
741
|
+
config = ProfilerConfig(
|
|
742
|
+
include_patterns=include_patterns,
|
|
743
|
+
include_correlations=include_correlations,
|
|
744
|
+
sample_size=sample_size,
|
|
745
|
+
top_n_values=top_n,
|
|
746
|
+
)
|
|
747
|
+
|
|
748
|
+
profiler = DataProfiler(config=config)
|
|
749
|
+
|
|
750
|
+
typer.echo(f"Profiling {file}...")
|
|
751
|
+
profile_result = profiler.profile(
|
|
752
|
+
_read_file_as_lazy(file),
|
|
753
|
+
name=file.stem,
|
|
754
|
+
source=str(file),
|
|
755
|
+
)
|
|
756
|
+
|
|
757
|
+
if format == "json":
|
|
758
|
+
import json as json_mod
|
|
759
|
+
result = json_mod.dumps(profile_result.to_dict(), indent=2, default=str)
|
|
760
|
+
if output:
|
|
761
|
+
with open(output, "w", encoding="utf-8") as f:
|
|
762
|
+
f.write(result)
|
|
763
|
+
typer.echo(f"Profile saved to {output}")
|
|
764
|
+
else:
|
|
765
|
+
typer.echo(result)
|
|
766
|
+
|
|
767
|
+
elif format == "yaml":
|
|
768
|
+
import yaml
|
|
769
|
+
result = yaml.dump(profile_result.to_dict(), default_flow_style=False)
|
|
770
|
+
if output:
|
|
771
|
+
with open(output, "w", encoding="utf-8") as f:
|
|
772
|
+
f.write(result)
|
|
773
|
+
typer.echo(f"Profile saved to {output}")
|
|
774
|
+
else:
|
|
775
|
+
typer.echo(result)
|
|
776
|
+
|
|
777
|
+
else: # console
|
|
778
|
+
_print_profile_summary(profile_result)
|
|
779
|
+
if output:
|
|
780
|
+
save_profile(profile_result, output)
|
|
781
|
+
typer.echo(f"\nFull profile saved to {output}")
|
|
782
|
+
|
|
783
|
+
except Exception as e:
|
|
784
|
+
typer.echo(f"Error: {e}", err=True)
|
|
785
|
+
raise typer.Exit(1)
|
|
786
|
+
|
|
787
|
+
|
|
788
|
+
@app.command(name="generate-suite")
|
|
789
|
+
def generate_suite_cmd(
|
|
790
|
+
profile_file: Annotated[
|
|
791
|
+
Path,
|
|
792
|
+
typer.Argument(help="Path to profile JSON file (from auto-profile)"),
|
|
793
|
+
],
|
|
794
|
+
output: Annotated[
|
|
795
|
+
Optional[Path],
|
|
796
|
+
typer.Option("--output", "-o", help="Output file path"),
|
|
797
|
+
] = None,
|
|
798
|
+
format: Annotated[
|
|
799
|
+
str,
|
|
800
|
+
typer.Option(
|
|
801
|
+
"--format", "-f",
|
|
802
|
+
help="Output format (yaml, json, python, toml, checkpoint)",
|
|
803
|
+
),
|
|
804
|
+
] = "yaml",
|
|
805
|
+
strictness: Annotated[
|
|
806
|
+
str,
|
|
807
|
+
typer.Option("--strictness", "-s", help="Rule strictness (loose, medium, strict)"),
|
|
808
|
+
] = "medium",
|
|
809
|
+
include: Annotated[
|
|
810
|
+
Optional[list[str]],
|
|
811
|
+
typer.Option("--include", "-i", help="Include only these categories"),
|
|
812
|
+
] = None,
|
|
813
|
+
exclude: Annotated[
|
|
814
|
+
Optional[list[str]],
|
|
815
|
+
typer.Option("--exclude", "-e", help="Exclude these categories"),
|
|
816
|
+
] = None,
|
|
817
|
+
min_confidence: Annotated[
|
|
818
|
+
Optional[str],
|
|
819
|
+
typer.Option("--min-confidence", help="Minimum rule confidence (low, medium, high)"),
|
|
820
|
+
] = None,
|
|
821
|
+
name: Annotated[
|
|
822
|
+
Optional[str],
|
|
823
|
+
typer.Option("--name", "-n", help="Name for the validation suite"),
|
|
824
|
+
] = None,
|
|
825
|
+
preset: Annotated[
|
|
826
|
+
Optional[str],
|
|
827
|
+
typer.Option(
|
|
828
|
+
"--preset", "-p",
|
|
829
|
+
help="Configuration preset (default, strict, loose, minimal, comprehensive, ci_cd)",
|
|
830
|
+
),
|
|
831
|
+
] = None,
|
|
832
|
+
config: Annotated[
|
|
833
|
+
Optional[Path],
|
|
834
|
+
typer.Option("--config", "-c", help="Path to configuration file"),
|
|
835
|
+
] = None,
|
|
836
|
+
group_by_category: Annotated[
|
|
837
|
+
bool,
|
|
838
|
+
typer.Option("--group-by-category", help="Group rules by category in output"),
|
|
839
|
+
] = False,
|
|
840
|
+
code_style: Annotated[
|
|
841
|
+
str,
|
|
842
|
+
typer.Option(
|
|
843
|
+
"--code-style",
|
|
844
|
+
help="Python code style (functional, class_based, declarative)",
|
|
845
|
+
),
|
|
846
|
+
] = "functional",
|
|
847
|
+
) -> None:
|
|
848
|
+
"""Generate validation rules from a profile.
|
|
849
|
+
|
|
850
|
+
This creates a validation suite based on the data profile.
|
|
851
|
+
Categories available: schema, completeness, uniqueness, format,
|
|
852
|
+
distribution, pattern, temporal, relationship, anomaly
|
|
853
|
+
|
|
854
|
+
Output formats:
|
|
855
|
+
- yaml: Human-readable YAML (default)
|
|
856
|
+
- json: Machine-readable JSON
|
|
857
|
+
- python: Executable Python code
|
|
858
|
+
- toml: TOML configuration
|
|
859
|
+
- checkpoint: Truthound checkpoint format for CI/CD
|
|
860
|
+
|
|
861
|
+
Examples:
|
|
862
|
+
# Generate from profile
|
|
863
|
+
truthound generate-suite profile.json -o rules.yaml
|
|
864
|
+
|
|
865
|
+
# Only schema and format rules
|
|
866
|
+
truthound generate-suite profile.json -i schema -i format
|
|
867
|
+
|
|
868
|
+
# Strict mode with preset
|
|
869
|
+
truthound generate-suite profile.json --preset strict
|
|
870
|
+
|
|
871
|
+
# Generate Python code with class-based style
|
|
872
|
+
truthound generate-suite profile.json -f python --code-style class_based
|
|
873
|
+
|
|
874
|
+
# Generate CI/CD checkpoint
|
|
875
|
+
truthound generate-suite profile.json -f checkpoint -o ci_rules.yaml
|
|
876
|
+
|
|
877
|
+
# Use configuration file
|
|
878
|
+
truthound generate-suite profile.json --config suite_config.yaml
|
|
879
|
+
"""
|
|
880
|
+
if not profile_file.exists():
|
|
881
|
+
typer.echo(f"Error: Profile file not found: {profile_file}", err=True)
|
|
882
|
+
raise typer.Exit(1)
|
|
883
|
+
|
|
884
|
+
try:
|
|
885
|
+
from truthound.profiler import (
|
|
886
|
+
run_generate_suite,
|
|
887
|
+
get_available_formats,
|
|
888
|
+
get_available_presets,
|
|
889
|
+
)
|
|
890
|
+
|
|
891
|
+
# Validate format
|
|
892
|
+
available_formats = get_available_formats()
|
|
893
|
+
if format not in available_formats:
|
|
894
|
+
typer.echo(
|
|
895
|
+
f"Error: Invalid format '{format}'. "
|
|
896
|
+
f"Available: {', '.join(available_formats)}",
|
|
897
|
+
err=True,
|
|
898
|
+
)
|
|
899
|
+
raise typer.Exit(1)
|
|
900
|
+
|
|
901
|
+
# Validate preset
|
|
902
|
+
if preset:
|
|
903
|
+
available_presets = get_available_presets()
|
|
904
|
+
if preset not in available_presets:
|
|
905
|
+
typer.echo(
|
|
906
|
+
f"Error: Invalid preset '{preset}'. "
|
|
907
|
+
f"Available: {', '.join(available_presets)}",
|
|
908
|
+
err=True,
|
|
909
|
+
)
|
|
910
|
+
raise typer.Exit(1)
|
|
911
|
+
|
|
912
|
+
# Parse categories
|
|
913
|
+
include_cats = None
|
|
914
|
+
if include:
|
|
915
|
+
include_cats = [c.strip() for c in ",".join(include).split(",")]
|
|
916
|
+
|
|
917
|
+
exclude_cats = None
|
|
918
|
+
if exclude:
|
|
919
|
+
exclude_cats = [c.strip() for c in ",".join(exclude).split(",")]
|
|
920
|
+
|
|
921
|
+
# Run generation using the new handler
|
|
922
|
+
exit_code = run_generate_suite(
|
|
923
|
+
profile_file=profile_file,
|
|
924
|
+
output=output,
|
|
925
|
+
format=format,
|
|
926
|
+
strictness=strictness,
|
|
927
|
+
include=include_cats,
|
|
928
|
+
exclude=exclude_cats,
|
|
929
|
+
min_confidence=min_confidence,
|
|
930
|
+
name=name,
|
|
931
|
+
preset=preset,
|
|
932
|
+
config=config,
|
|
933
|
+
group_by_category=group_by_category,
|
|
934
|
+
echo=typer.echo,
|
|
935
|
+
verbose=True,
|
|
936
|
+
)
|
|
937
|
+
|
|
938
|
+
if exit_code != 0:
|
|
939
|
+
raise typer.Exit(exit_code)
|
|
940
|
+
|
|
941
|
+
except typer.Exit:
|
|
942
|
+
raise
|
|
943
|
+
except Exception as e:
|
|
944
|
+
typer.echo(f"Error: {e}", err=True)
|
|
945
|
+
raise typer.Exit(1)
|
|
946
|
+
|
|
947
|
+
|
|
948
|
+
@app.command(name="quick-suite")
|
|
949
|
+
def quick_suite_cmd(
|
|
950
|
+
file: Annotated[Path, typer.Argument(help="Path to the data file")],
|
|
951
|
+
output: Annotated[
|
|
952
|
+
Optional[Path],
|
|
953
|
+
typer.Option("--output", "-o", help="Output file path"),
|
|
954
|
+
] = None,
|
|
955
|
+
format: Annotated[
|
|
956
|
+
str,
|
|
957
|
+
typer.Option(
|
|
958
|
+
"--format", "-f",
|
|
959
|
+
help="Output format (yaml, json, python, toml, checkpoint)",
|
|
960
|
+
),
|
|
961
|
+
] = "yaml",
|
|
962
|
+
strictness: Annotated[
|
|
963
|
+
str,
|
|
964
|
+
typer.Option("--strictness", "-s", help="Rule strictness (loose, medium, strict)"),
|
|
965
|
+
] = "medium",
|
|
966
|
+
include: Annotated[
|
|
967
|
+
Optional[list[str]],
|
|
968
|
+
typer.Option("--include", "-i", help="Include only these categories"),
|
|
969
|
+
] = None,
|
|
970
|
+
exclude: Annotated[
|
|
971
|
+
Optional[list[str]],
|
|
972
|
+
typer.Option("--exclude", "-e", help="Exclude these categories"),
|
|
973
|
+
] = None,
|
|
974
|
+
min_confidence: Annotated[
|
|
975
|
+
Optional[str],
|
|
976
|
+
typer.Option("--min-confidence", help="Minimum rule confidence (low, medium, high)"),
|
|
977
|
+
] = None,
|
|
978
|
+
name: Annotated[
|
|
979
|
+
Optional[str],
|
|
980
|
+
typer.Option("--name", "-n", help="Name for the validation suite"),
|
|
981
|
+
] = None,
|
|
982
|
+
preset: Annotated[
|
|
983
|
+
Optional[str],
|
|
984
|
+
typer.Option(
|
|
985
|
+
"--preset", "-p",
|
|
986
|
+
help="Configuration preset (default, strict, loose, minimal, comprehensive, ci_cd)",
|
|
987
|
+
),
|
|
988
|
+
] = None,
|
|
989
|
+
sample_size: Annotated[
|
|
990
|
+
Optional[int],
|
|
991
|
+
typer.Option("--sample-size", help="Sample size for profiling (default: auto)"),
|
|
992
|
+
] = None,
|
|
993
|
+
) -> None:
|
|
994
|
+
"""Profile data and generate validation rules in one step.
|
|
995
|
+
|
|
996
|
+
This is a convenience command that combines auto-profile and generate-suite.
|
|
997
|
+
|
|
998
|
+
Output formats:
|
|
999
|
+
- yaml: Human-readable YAML (default)
|
|
1000
|
+
- json: Machine-readable JSON
|
|
1001
|
+
- python: Executable Python code
|
|
1002
|
+
- toml: TOML configuration
|
|
1003
|
+
- checkpoint: Truthound checkpoint format for CI/CD
|
|
1004
|
+
|
|
1005
|
+
Examples:
|
|
1006
|
+
# Basic usage
|
|
1007
|
+
truthound quick-suite data.parquet -o rules.yaml
|
|
1008
|
+
|
|
1009
|
+
# Strict mode with Python output
|
|
1010
|
+
truthound quick-suite data.csv -s strict -f python -o validators.py
|
|
1011
|
+
|
|
1012
|
+
# CI/CD checkpoint
|
|
1013
|
+
truthound quick-suite data.parquet --preset ci_cd -o ci_rules.yaml
|
|
1014
|
+
|
|
1015
|
+
# With sampling for large files
|
|
1016
|
+
truthound quick-suite large_data.parquet --sample-size 10000
|
|
1017
|
+
"""
|
|
1018
|
+
if not file.exists():
|
|
1019
|
+
typer.echo(f"Error: File not found: {file}", err=True)
|
|
1020
|
+
raise typer.Exit(1)
|
|
1021
|
+
|
|
1022
|
+
try:
|
|
1023
|
+
from truthound.profiler import (
|
|
1024
|
+
run_quick_suite,
|
|
1025
|
+
get_available_formats,
|
|
1026
|
+
get_available_presets,
|
|
1027
|
+
)
|
|
1028
|
+
|
|
1029
|
+
# Validate format
|
|
1030
|
+
available_formats = get_available_formats()
|
|
1031
|
+
if format not in available_formats:
|
|
1032
|
+
typer.echo(
|
|
1033
|
+
f"Error: Invalid format '{format}'. "
|
|
1034
|
+
f"Available: {', '.join(available_formats)}",
|
|
1035
|
+
err=True,
|
|
1036
|
+
)
|
|
1037
|
+
raise typer.Exit(1)
|
|
1038
|
+
|
|
1039
|
+
# Validate preset
|
|
1040
|
+
if preset:
|
|
1041
|
+
available_presets = get_available_presets()
|
|
1042
|
+
if preset not in available_presets:
|
|
1043
|
+
typer.echo(
|
|
1044
|
+
f"Error: Invalid preset '{preset}'. "
|
|
1045
|
+
f"Available: {', '.join(available_presets)}",
|
|
1046
|
+
err=True,
|
|
1047
|
+
)
|
|
1048
|
+
raise typer.Exit(1)
|
|
1049
|
+
|
|
1050
|
+
# Parse categories
|
|
1051
|
+
include_cats = None
|
|
1052
|
+
if include:
|
|
1053
|
+
include_cats = [c.strip() for c in ",".join(include).split(",")]
|
|
1054
|
+
|
|
1055
|
+
exclude_cats = None
|
|
1056
|
+
if exclude:
|
|
1057
|
+
exclude_cats = [c.strip() for c in ",".join(exclude).split(",")]
|
|
1058
|
+
|
|
1059
|
+
# Run quick suite using the new handler
|
|
1060
|
+
exit_code = run_quick_suite(
|
|
1061
|
+
file=file,
|
|
1062
|
+
output=output,
|
|
1063
|
+
format=format,
|
|
1064
|
+
strictness=strictness,
|
|
1065
|
+
include=include_cats,
|
|
1066
|
+
exclude=exclude_cats,
|
|
1067
|
+
min_confidence=min_confidence,
|
|
1068
|
+
name=name,
|
|
1069
|
+
preset=preset,
|
|
1070
|
+
sample_size=sample_size,
|
|
1071
|
+
echo=typer.echo,
|
|
1072
|
+
verbose=True,
|
|
1073
|
+
)
|
|
1074
|
+
|
|
1075
|
+
if exit_code != 0:
|
|
1076
|
+
raise typer.Exit(exit_code)
|
|
1077
|
+
|
|
1078
|
+
except typer.Exit:
|
|
1079
|
+
raise
|
|
1080
|
+
except Exception as e:
|
|
1081
|
+
typer.echo(f"Error: {e}", err=True)
|
|
1082
|
+
raise typer.Exit(1)
|
|
1083
|
+
|
|
1084
|
+
|
|
1085
|
+
@app.command(name="list-formats")
|
|
1086
|
+
def list_formats_cmd() -> None:
|
|
1087
|
+
"""List available output formats for suite generation."""
|
|
1088
|
+
try:
|
|
1089
|
+
from truthound.profiler import get_available_formats
|
|
1090
|
+
|
|
1091
|
+
typer.echo("Available output formats:")
|
|
1092
|
+
typer.echo("")
|
|
1093
|
+
formats_info = {
|
|
1094
|
+
"yaml": "Human-readable YAML format (default)",
|
|
1095
|
+
"json": "Machine-readable JSON format",
|
|
1096
|
+
"python": "Executable Python code with validators",
|
|
1097
|
+
"toml": "TOML configuration format",
|
|
1098
|
+
"checkpoint": "Truthound checkpoint format for CI/CD",
|
|
1099
|
+
}
|
|
1100
|
+
|
|
1101
|
+
for fmt in get_available_formats():
|
|
1102
|
+
desc = formats_info.get(fmt, "")
|
|
1103
|
+
typer.echo(f" {fmt:12} - {desc}")
|
|
1104
|
+
|
|
1105
|
+
except Exception as e:
|
|
1106
|
+
typer.echo(f"Error: {e}", err=True)
|
|
1107
|
+
raise typer.Exit(1)
|
|
1108
|
+
|
|
1109
|
+
|
|
1110
|
+
@app.command(name="list-presets")
|
|
1111
|
+
def list_presets_cmd() -> None:
|
|
1112
|
+
"""List available configuration presets for suite generation."""
|
|
1113
|
+
try:
|
|
1114
|
+
from truthound.profiler import get_available_presets
|
|
1115
|
+
|
|
1116
|
+
typer.echo("Available configuration presets:")
|
|
1117
|
+
typer.echo("")
|
|
1118
|
+
presets_info = {
|
|
1119
|
+
"default": "Balanced settings (medium strictness, all categories)",
|
|
1120
|
+
"strict": "Strict validation rules with high confidence",
|
|
1121
|
+
"loose": "Relaxed validation for flexible data",
|
|
1122
|
+
"minimal": "Only high-confidence schema rules",
|
|
1123
|
+
"comprehensive": "All generators with detailed output",
|
|
1124
|
+
"schema_only": "Schema and completeness rules only",
|
|
1125
|
+
"format_only": "Format and pattern rules only",
|
|
1126
|
+
"ci_cd": "Optimized for CI/CD pipelines (checkpoint format)",
|
|
1127
|
+
"development": "Development-friendly (Python code output)",
|
|
1128
|
+
"production": "Production-ready (strict, high confidence)",
|
|
1129
|
+
}
|
|
1130
|
+
|
|
1131
|
+
for preset in get_available_presets():
|
|
1132
|
+
desc = presets_info.get(preset, "")
|
|
1133
|
+
typer.echo(f" {preset:16} - {desc}")
|
|
1134
|
+
|
|
1135
|
+
except Exception as e:
|
|
1136
|
+
typer.echo(f"Error: {e}", err=True)
|
|
1137
|
+
raise typer.Exit(1)
|
|
1138
|
+
|
|
1139
|
+
|
|
1140
|
+
@app.command(name="list-categories")
|
|
1141
|
+
def list_categories_cmd() -> None:
|
|
1142
|
+
"""List available rule categories for suite generation."""
|
|
1143
|
+
try:
|
|
1144
|
+
from truthound.profiler import get_available_categories
|
|
1145
|
+
|
|
1146
|
+
typer.echo("Available rule categories:")
|
|
1147
|
+
typer.echo("")
|
|
1148
|
+
categories_info = {
|
|
1149
|
+
"schema": "Column existence, types, and structure",
|
|
1150
|
+
"completeness": "Null values and data completeness",
|
|
1151
|
+
"uniqueness": "Unique constraints and cardinality",
|
|
1152
|
+
"format": "Data format validation (email, phone, etc.)",
|
|
1153
|
+
"distribution": "Statistical distribution checks",
|
|
1154
|
+
"pattern": "Regex pattern matching",
|
|
1155
|
+
"temporal": "Date/time validation",
|
|
1156
|
+
"relationship": "Cross-column relationships",
|
|
1157
|
+
"anomaly": "Anomaly detection rules",
|
|
1158
|
+
}
|
|
1159
|
+
|
|
1160
|
+
for cat in get_available_categories():
|
|
1161
|
+
desc = categories_info.get(cat, "")
|
|
1162
|
+
typer.echo(f" {cat:14} - {desc}")
|
|
1163
|
+
|
|
1164
|
+
except Exception as e:
|
|
1165
|
+
typer.echo(f"Error: {e}", err=True)
|
|
1166
|
+
raise typer.Exit(1)
|
|
1167
|
+
|
|
1168
|
+
|
|
1169
|
+
# =============================================================================
|
|
1170
|
+
# Helper Functions
|
|
1171
|
+
# =============================================================================
|
|
1172
|
+
|
|
1173
|
+
|
|
1174
|
+
def _read_file_as_lazy(path: Path):
|
|
1175
|
+
"""Read a file as a Polars LazyFrame."""
|
|
1176
|
+
import polars as pl
|
|
1177
|
+
|
|
1178
|
+
suffix = path.suffix.lower()
|
|
1179
|
+
readers = {
|
|
1180
|
+
".parquet": pl.scan_parquet,
|
|
1181
|
+
".csv": pl.scan_csv,
|
|
1182
|
+
".json": pl.scan_ndjson,
|
|
1183
|
+
".ndjson": pl.scan_ndjson,
|
|
1184
|
+
}
|
|
1185
|
+
|
|
1186
|
+
if suffix not in readers:
|
|
1187
|
+
raise ValueError(
|
|
1188
|
+
f"Unsupported file type: {suffix}. "
|
|
1189
|
+
f"Supported: {list(readers.keys())}"
|
|
1190
|
+
)
|
|
1191
|
+
|
|
1192
|
+
return readers[suffix](path)
|
|
1193
|
+
|
|
1194
|
+
|
|
1195
|
+
# =============================================================================
|
|
1196
|
+
# Benchmark Commands
|
|
1197
|
+
# =============================================================================
|
|
1198
|
+
|
|
1199
|
+
benchmark_app = typer.Typer(
|
|
1200
|
+
name="benchmark",
|
|
1201
|
+
help="Performance benchmarking commands",
|
|
1202
|
+
)
|
|
1203
|
+
app.add_typer(benchmark_app, name="benchmark")
|
|
1204
|
+
|
|
1205
|
+
|
|
1206
|
+
@benchmark_app.command(name="run")
|
|
1207
|
+
def benchmark_run_cmd(
|
|
1208
|
+
benchmark: Annotated[
|
|
1209
|
+
Optional[str],
|
|
1210
|
+
typer.Argument(help="Benchmark name to run (e.g., 'profile', 'check')"),
|
|
1211
|
+
] = None,
|
|
1212
|
+
suite: Annotated[
|
|
1213
|
+
Optional[str],
|
|
1214
|
+
typer.Option("--suite", "-s", help="Predefined suite to run (quick, ci, full, profiling, validation)"),
|
|
1215
|
+
] = None,
|
|
1216
|
+
size: Annotated[
|
|
1217
|
+
str,
|
|
1218
|
+
typer.Option("--size", help="Data size (tiny, small, medium, large, xlarge)"),
|
|
1219
|
+
] = "medium",
|
|
1220
|
+
rows: Annotated[
|
|
1221
|
+
Optional[int],
|
|
1222
|
+
typer.Option("--rows", "-r", help="Custom row count (overrides size)"),
|
|
1223
|
+
] = None,
|
|
1224
|
+
iterations: Annotated[
|
|
1225
|
+
int,
|
|
1226
|
+
typer.Option("--iterations", "-i", help="Number of measurement iterations"),
|
|
1227
|
+
] = 5,
|
|
1228
|
+
warmup: Annotated[
|
|
1229
|
+
int,
|
|
1230
|
+
typer.Option("--warmup", "-w", help="Number of warmup iterations"),
|
|
1231
|
+
] = 2,
|
|
1232
|
+
output: Annotated[
|
|
1233
|
+
Optional[Path],
|
|
1234
|
+
typer.Option("--output", "-o", help="Output file path"),
|
|
1235
|
+
] = None,
|
|
1236
|
+
format: Annotated[
|
|
1237
|
+
str,
|
|
1238
|
+
typer.Option("--format", "-f", help="Output format (console, json, markdown, html)"),
|
|
1239
|
+
] = "console",
|
|
1240
|
+
save_baseline: Annotated[
|
|
1241
|
+
bool,
|
|
1242
|
+
typer.Option("--save-baseline", help="Save results as baseline for regression detection"),
|
|
1243
|
+
] = False,
|
|
1244
|
+
compare_baseline: Annotated[
|
|
1245
|
+
bool,
|
|
1246
|
+
typer.Option("--compare-baseline", help="Compare against saved baseline"),
|
|
1247
|
+
] = False,
|
|
1248
|
+
verbose: Annotated[
|
|
1249
|
+
bool,
|
|
1250
|
+
typer.Option("--verbose", "-v", help="Verbose output"),
|
|
1251
|
+
] = False,
|
|
1252
|
+
) -> None:
|
|
1253
|
+
"""Run performance benchmarks.
|
|
1254
|
+
|
|
1255
|
+
Examples:
|
|
1256
|
+
# Run a single benchmark
|
|
1257
|
+
truthound benchmark run profile --size medium
|
|
1258
|
+
|
|
1259
|
+
# Run a predefined suite
|
|
1260
|
+
truthound benchmark run --suite quick
|
|
1261
|
+
|
|
1262
|
+
# Run with custom row count
|
|
1263
|
+
truthound benchmark run check --rows 1000000
|
|
1264
|
+
|
|
1265
|
+
# Save as baseline
|
|
1266
|
+
truthound benchmark run --suite ci --save-baseline
|
|
1267
|
+
|
|
1268
|
+
# Compare against baseline
|
|
1269
|
+
truthound benchmark run --suite ci --compare-baseline
|
|
1270
|
+
"""
|
|
1271
|
+
from truthound.benchmark import (
|
|
1272
|
+
BenchmarkRunner,
|
|
1273
|
+
BenchmarkSuite,
|
|
1274
|
+
BenchmarkConfig,
|
|
1275
|
+
BenchmarkSize,
|
|
1276
|
+
RunnerConfig,
|
|
1277
|
+
ConsoleReporter,
|
|
1278
|
+
JSONReporter,
|
|
1279
|
+
MarkdownReporter,
|
|
1280
|
+
HTMLReporter,
|
|
1281
|
+
RegressionDetector,
|
|
1282
|
+
)
|
|
1283
|
+
|
|
1284
|
+
try:
|
|
1285
|
+
# Determine row count
|
|
1286
|
+
size_map = {
|
|
1287
|
+
"tiny": BenchmarkSize.TINY,
|
|
1288
|
+
"small": BenchmarkSize.SMALL,
|
|
1289
|
+
"medium": BenchmarkSize.MEDIUM,
|
|
1290
|
+
"large": BenchmarkSize.LARGE,
|
|
1291
|
+
"xlarge": BenchmarkSize.XLARGE,
|
|
1292
|
+
}
|
|
1293
|
+
benchmark_size = size_map.get(size, BenchmarkSize.MEDIUM)
|
|
1294
|
+
row_count = rows if rows else benchmark_size.row_count
|
|
1295
|
+
|
|
1296
|
+
# Configure benchmark
|
|
1297
|
+
benchmark_config = BenchmarkConfig(
|
|
1298
|
+
warmup_iterations=warmup,
|
|
1299
|
+
measure_iterations=iterations,
|
|
1300
|
+
default_size=benchmark_size,
|
|
1301
|
+
verbose=verbose,
|
|
1302
|
+
)
|
|
1303
|
+
|
|
1304
|
+
runner_config = RunnerConfig(
|
|
1305
|
+
size_override=benchmark_size if not rows else None,
|
|
1306
|
+
verbose=verbose,
|
|
1307
|
+
)
|
|
1308
|
+
|
|
1309
|
+
runner = BenchmarkRunner(
|
|
1310
|
+
config=runner_config,
|
|
1311
|
+
benchmark_config=benchmark_config,
|
|
1312
|
+
)
|
|
1313
|
+
|
|
1314
|
+
# Determine what to run
|
|
1315
|
+
if suite:
|
|
1316
|
+
suite_map = {
|
|
1317
|
+
"quick": BenchmarkSuite.quick,
|
|
1318
|
+
"ci": BenchmarkSuite.ci,
|
|
1319
|
+
"full": lambda: BenchmarkSuite.full(benchmark_size),
|
|
1320
|
+
"profiling": lambda: BenchmarkSuite.profiling(benchmark_size),
|
|
1321
|
+
"validation": lambda: BenchmarkSuite.validation(benchmark_size),
|
|
1322
|
+
}
|
|
1323
|
+
if suite not in suite_map:
|
|
1324
|
+
typer.echo(f"Unknown suite: {suite}. Available: {list(suite_map.keys())}", err=True)
|
|
1325
|
+
raise typer.Exit(1)
|
|
1326
|
+
|
|
1327
|
+
benchmark_suite = suite_map[suite]()
|
|
1328
|
+
results = runner.run_suite(benchmark_suite)
|
|
1329
|
+
|
|
1330
|
+
elif benchmark:
|
|
1331
|
+
result = runner.run(benchmark, row_count=row_count)
|
|
1332
|
+
# Wrap single result in suite result for consistent handling
|
|
1333
|
+
from truthound.benchmark.base import EnvironmentInfo
|
|
1334
|
+
from truthound.benchmark.runner import SuiteResult
|
|
1335
|
+
results = SuiteResult(
|
|
1336
|
+
suite_name=f"single:{benchmark}",
|
|
1337
|
+
results=[result],
|
|
1338
|
+
environment=EnvironmentInfo.capture(),
|
|
1339
|
+
)
|
|
1340
|
+
results.completed_at = result.completed_at
|
|
1341
|
+
|
|
1342
|
+
else:
|
|
1343
|
+
typer.echo("Specify either a benchmark name or --suite", err=True)
|
|
1344
|
+
raise typer.Exit(1)
|
|
1345
|
+
|
|
1346
|
+
# Compare against baseline if requested
|
|
1347
|
+
if compare_baseline:
|
|
1348
|
+
detector = RegressionDetector()
|
|
1349
|
+
report = detector.generate_report(results)
|
|
1350
|
+
typer.echo(report)
|
|
1351
|
+
|
|
1352
|
+
regressions = detector.check(results)
|
|
1353
|
+
if regressions:
|
|
1354
|
+
typer.echo("\n⚠️ Performance regressions detected!", err=True)
|
|
1355
|
+
raise typer.Exit(1)
|
|
1356
|
+
|
|
1357
|
+
# Generate output
|
|
1358
|
+
reporters = {
|
|
1359
|
+
"console": ConsoleReporter(use_colors=True),
|
|
1360
|
+
"json": JSONReporter(pretty=True),
|
|
1361
|
+
"markdown": MarkdownReporter(),
|
|
1362
|
+
"html": HTMLReporter(),
|
|
1363
|
+
}
|
|
1364
|
+
|
|
1365
|
+
reporter = reporters.get(format, ConsoleReporter())
|
|
1366
|
+
report_content = reporter.report_suite(results)
|
|
1367
|
+
|
|
1368
|
+
if output:
|
|
1369
|
+
output.parent.mkdir(parents=True, exist_ok=True)
|
|
1370
|
+
output.write_text(report_content)
|
|
1371
|
+
typer.echo(f"Results saved to: {output}")
|
|
1372
|
+
elif format == "console":
|
|
1373
|
+
typer.echo(report_content)
|
|
1374
|
+
else:
|
|
1375
|
+
typer.echo(report_content)
|
|
1376
|
+
|
|
1377
|
+
# Save baseline if requested
|
|
1378
|
+
if save_baseline:
|
|
1379
|
+
detector = RegressionDetector()
|
|
1380
|
+
detector.save_baseline(results)
|
|
1381
|
+
typer.echo(f"Baseline saved to: {detector.history_path}")
|
|
1382
|
+
|
|
1383
|
+
except typer.Exit:
|
|
1384
|
+
raise
|
|
1385
|
+
except Exception as e:
|
|
1386
|
+
typer.echo(f"Error: {e}", err=True)
|
|
1387
|
+
if verbose:
|
|
1388
|
+
import traceback
|
|
1389
|
+
typer.echo(traceback.format_exc(), err=True)
|
|
1390
|
+
raise typer.Exit(1)
|
|
1391
|
+
|
|
1392
|
+
|
|
1393
|
+
@benchmark_app.command(name="list")
|
|
1394
|
+
def benchmark_list_cmd(
|
|
1395
|
+
format: Annotated[
|
|
1396
|
+
str,
|
|
1397
|
+
typer.Option("--format", "-f", help="Output format (console, json)"),
|
|
1398
|
+
] = "console",
|
|
1399
|
+
) -> None:
|
|
1400
|
+
"""List available benchmarks."""
|
|
1401
|
+
from truthound.benchmark import benchmark_registry
|
|
1402
|
+
|
|
1403
|
+
benchmarks = benchmark_registry.list_all()
|
|
1404
|
+
|
|
1405
|
+
if format == "json":
|
|
1406
|
+
data = [
|
|
1407
|
+
{
|
|
1408
|
+
"name": b.name,
|
|
1409
|
+
"category": b.category.value,
|
|
1410
|
+
"description": b.description,
|
|
1411
|
+
}
|
|
1412
|
+
for b in benchmarks
|
|
1413
|
+
]
|
|
1414
|
+
typer.echo(json.dumps(data, indent=2))
|
|
1415
|
+
else:
|
|
1416
|
+
typer.echo("\nAvailable Benchmarks:")
|
|
1417
|
+
typer.echo("=" * 60)
|
|
1418
|
+
|
|
1419
|
+
# Group by category
|
|
1420
|
+
from collections import defaultdict
|
|
1421
|
+
by_category = defaultdict(list)
|
|
1422
|
+
for b in benchmarks:
|
|
1423
|
+
by_category[b.category.value].append(b)
|
|
1424
|
+
|
|
1425
|
+
for category in sorted(by_category.keys()):
|
|
1426
|
+
typer.echo(f"\n[{category.upper()}]")
|
|
1427
|
+
for b in by_category[category]:
|
|
1428
|
+
typer.echo(f" {b.name:20} - {b.description}")
|
|
1429
|
+
|
|
1430
|
+
|
|
1431
|
+
@benchmark_app.command(name="compare")
|
|
1432
|
+
def benchmark_compare_cmd(
|
|
1433
|
+
baseline: Annotated[
|
|
1434
|
+
Path,
|
|
1435
|
+
typer.Argument(help="Baseline results JSON file"),
|
|
1436
|
+
],
|
|
1437
|
+
current: Annotated[
|
|
1438
|
+
Path,
|
|
1439
|
+
typer.Argument(help="Current results JSON file"),
|
|
1440
|
+
],
|
|
1441
|
+
threshold: Annotated[
|
|
1442
|
+
float,
|
|
1443
|
+
typer.Option("--threshold", "-t", help="Regression threshold percentage"),
|
|
1444
|
+
] = 10.0,
|
|
1445
|
+
format: Annotated[
|
|
1446
|
+
str,
|
|
1447
|
+
typer.Option("--format", "-f", help="Output format (console, json, markdown)"),
|
|
1448
|
+
] = "console",
|
|
1449
|
+
) -> None:
|
|
1450
|
+
"""Compare two benchmark results for regressions."""
|
|
1451
|
+
from truthound.benchmark import BenchmarkComparator
|
|
1452
|
+
from truthound.benchmark.runner import SuiteResult
|
|
1453
|
+
|
|
1454
|
+
if not baseline.exists():
|
|
1455
|
+
typer.echo(f"Baseline file not found: {baseline}", err=True)
|
|
1456
|
+
raise typer.Exit(1)
|
|
1457
|
+
|
|
1458
|
+
if not current.exists():
|
|
1459
|
+
typer.echo(f"Current file not found: {current}", err=True)
|
|
1460
|
+
raise typer.Exit(1)
|
|
1461
|
+
|
|
1462
|
+
try:
|
|
1463
|
+
baseline_data = json.loads(baseline.read_text())
|
|
1464
|
+
current_data = json.loads(current.read_text())
|
|
1465
|
+
|
|
1466
|
+
comparator = BenchmarkComparator(regression_threshold=threshold / 100)
|
|
1467
|
+
|
|
1468
|
+
# This is a simplified comparison - full implementation would
|
|
1469
|
+
# reconstruct SuiteResult objects
|
|
1470
|
+
typer.echo("\nBenchmark Comparison")
|
|
1471
|
+
typer.echo("=" * 60)
|
|
1472
|
+
typer.echo(f"Baseline: {baseline}")
|
|
1473
|
+
typer.echo(f"Current: {current}")
|
|
1474
|
+
typer.echo(f"Threshold: {threshold}%")
|
|
1475
|
+
typer.echo("-" * 60)
|
|
1476
|
+
|
|
1477
|
+
baseline_results = {r["benchmark_name"]: r for r in baseline_data.get("results", [])}
|
|
1478
|
+
current_results = {r["benchmark_name"]: r for r in current_data.get("results", [])}
|
|
1479
|
+
|
|
1480
|
+
regressions = []
|
|
1481
|
+
improvements = []
|
|
1482
|
+
|
|
1483
|
+
for name, curr in current_results.items():
|
|
1484
|
+
if name not in baseline_results:
|
|
1485
|
+
continue
|
|
1486
|
+
|
|
1487
|
+
base = baseline_results[name]
|
|
1488
|
+
base_duration = base["metrics"]["timing"]["mean_seconds"]
|
|
1489
|
+
curr_duration = curr["metrics"]["timing"]["mean_seconds"]
|
|
1490
|
+
|
|
1491
|
+
if base_duration > 0:
|
|
1492
|
+
pct_change = ((curr_duration - base_duration) / base_duration) * 100
|
|
1493
|
+
|
|
1494
|
+
if pct_change > threshold:
|
|
1495
|
+
regressions.append((name, base_duration, curr_duration, pct_change))
|
|
1496
|
+
elif pct_change < -threshold:
|
|
1497
|
+
improvements.append((name, base_duration, curr_duration, pct_change))
|
|
1498
|
+
|
|
1499
|
+
if regressions:
|
|
1500
|
+
typer.echo("\n🔴 REGRESSIONS:")
|
|
1501
|
+
for name, base_d, curr_d, pct in regressions:
|
|
1502
|
+
typer.echo(f" {name}: {base_d:.3f}s -> {curr_d:.3f}s ({pct:+.1f}%)")
|
|
1503
|
+
|
|
1504
|
+
if improvements:
|
|
1505
|
+
typer.echo("\n🟢 IMPROVEMENTS:")
|
|
1506
|
+
for name, base_d, curr_d, pct in improvements:
|
|
1507
|
+
typer.echo(f" {name}: {base_d:.3f}s -> {curr_d:.3f}s ({pct:+.1f}%)")
|
|
1508
|
+
|
|
1509
|
+
if not regressions and not improvements:
|
|
1510
|
+
typer.echo("\n✅ No significant changes detected.")
|
|
1511
|
+
|
|
1512
|
+
typer.echo("")
|
|
1513
|
+
|
|
1514
|
+
if regressions:
|
|
1515
|
+
raise typer.Exit(1)
|
|
1516
|
+
|
|
1517
|
+
except typer.Exit:
|
|
1518
|
+
raise
|
|
1519
|
+
except Exception as e:
|
|
1520
|
+
typer.echo(f"Error: {e}", err=True)
|
|
1521
|
+
raise typer.Exit(1)
|
|
1522
|
+
|
|
1523
|
+
|
|
1524
|
+
def _print_profile_summary(profile) -> None:
|
|
1525
|
+
"""Print a summary of the profile to console."""
|
|
1526
|
+
typer.echo(f"\n{'='*60}")
|
|
1527
|
+
typer.echo(f"Profile: {profile.name or 'unnamed'}")
|
|
1528
|
+
typer.echo(f"{'='*60}")
|
|
1529
|
+
typer.echo(f"Rows: {profile.row_count:,}")
|
|
1530
|
+
typer.echo(f"Columns: {profile.column_count}")
|
|
1531
|
+
typer.echo(f"Estimated Memory: {profile.estimated_memory_bytes / 1024 / 1024:.2f} MB")
|
|
1532
|
+
|
|
1533
|
+
if profile.duplicate_row_ratio > 0:
|
|
1534
|
+
typer.echo(f"Duplicate Rows: {profile.duplicate_row_count:,} ({profile.duplicate_row_ratio*100:.1f}%)")
|
|
1535
|
+
|
|
1536
|
+
typer.echo(f"\n{'Column Details':^60}")
|
|
1537
|
+
typer.echo("-" * 60)
|
|
1538
|
+
|
|
1539
|
+
for col in profile.columns:
|
|
1540
|
+
typer.echo(f"\n{col.name}")
|
|
1541
|
+
typer.echo(f" Type: {col.physical_type} -> {col.inferred_type.value}")
|
|
1542
|
+
typer.echo(f" Nulls: {col.null_count:,} ({col.null_ratio*100:.1f}%)")
|
|
1543
|
+
typer.echo(f" Unique: {col.distinct_count:,} ({col.unique_ratio*100:.1f}%)")
|
|
1544
|
+
|
|
1545
|
+
if col.distribution:
|
|
1546
|
+
dist = col.distribution
|
|
1547
|
+
typer.echo(f" Range: [{dist.min}, {dist.max}]")
|
|
1548
|
+
if dist.mean is not None:
|
|
1549
|
+
typer.echo(f" Mean: {dist.mean:.2f}, Std: {dist.std:.2f}")
|
|
1550
|
+
|
|
1551
|
+
if col.min_length is not None:
|
|
1552
|
+
typer.echo(f" Length: [{col.min_length}, {col.max_length}], avg={col.avg_length:.1f}")
|
|
1553
|
+
|
|
1554
|
+
if col.detected_patterns:
|
|
1555
|
+
patterns = [p.pattern for p in col.detected_patterns[:3]]
|
|
1556
|
+
typer.echo(f" Patterns: {', '.join(patterns)}")
|
|
1557
|
+
|
|
1558
|
+
if col.suggested_validators:
|
|
1559
|
+
typer.echo(f" Suggested: {len(col.suggested_validators)} validators")
|
|
1560
|
+
|
|
1561
|
+
|
|
1562
|
+
# =============================================================================
|
|
1563
|
+
# Data Docs Commands (Phase 8)
|
|
1564
|
+
# =============================================================================
|
|
1565
|
+
|
|
1566
|
+
docs_app = typer.Typer(
|
|
1567
|
+
name="docs",
|
|
1568
|
+
help="Generate data documentation and reports (Phase 8)",
|
|
1569
|
+
)
|
|
1570
|
+
app.add_typer(docs_app, name="docs")
|
|
1571
|
+
|
|
1572
|
+
|
|
1573
|
+
@docs_app.command(name="generate")
|
|
1574
|
+
def docs_generate_cmd(
|
|
1575
|
+
profile_file: Annotated[
|
|
1576
|
+
Path,
|
|
1577
|
+
typer.Argument(help="Path to profile JSON file (from auto-profile)"),
|
|
1578
|
+
],
|
|
1579
|
+
output: Annotated[
|
|
1580
|
+
Optional[Path],
|
|
1581
|
+
typer.Option("--output", "-o", help="Output file path"),
|
|
1582
|
+
] = None,
|
|
1583
|
+
title: Annotated[
|
|
1584
|
+
str,
|
|
1585
|
+
typer.Option("--title", "-t", help="Report title"),
|
|
1586
|
+
] = "Data Profile Report",
|
|
1587
|
+
subtitle: Annotated[
|
|
1588
|
+
str,
|
|
1589
|
+
typer.Option("--subtitle", "-s", help="Report subtitle"),
|
|
1590
|
+
] = "",
|
|
1591
|
+
theme: Annotated[
|
|
1592
|
+
str,
|
|
1593
|
+
typer.Option("--theme", help="Report theme (light, dark, professional, minimal, modern)"),
|
|
1594
|
+
] = "professional",
|
|
1595
|
+
chart_library: Annotated[
|
|
1596
|
+
str,
|
|
1597
|
+
typer.Option("--charts", "-c", help="Chart library (apexcharts, chartjs, plotly, svg)"),
|
|
1598
|
+
] = "apexcharts",
|
|
1599
|
+
format: Annotated[
|
|
1600
|
+
str,
|
|
1601
|
+
typer.Option("--format", "-f", help="Output format (html, pdf)"),
|
|
1602
|
+
] = "html",
|
|
1603
|
+
) -> None:
|
|
1604
|
+
"""Generate HTML report from profile data.
|
|
1605
|
+
|
|
1606
|
+
This creates a static, self-contained HTML report that can be:
|
|
1607
|
+
- Saved as CI/CD artifact
|
|
1608
|
+
- Shared via email or Slack
|
|
1609
|
+
- Viewed offline in any browser
|
|
1610
|
+
|
|
1611
|
+
Examples:
|
|
1612
|
+
# Basic usage
|
|
1613
|
+
truthound docs generate profile.json -o report.html
|
|
1614
|
+
|
|
1615
|
+
# With custom title and theme
|
|
1616
|
+
truthound docs generate profile.json -o report.html --title "Q4 Data Report" --theme dark
|
|
1617
|
+
|
|
1618
|
+
# Using different chart library
|
|
1619
|
+
truthound docs generate profile.json -o report.html --charts chartjs
|
|
1620
|
+
|
|
1621
|
+
# Export to PDF (requires weasyprint)
|
|
1622
|
+
truthound docs generate profile.json -o report.pdf --format pdf
|
|
1623
|
+
"""
|
|
1624
|
+
if not profile_file.exists():
|
|
1625
|
+
typer.echo(f"Error: Profile file not found: {profile_file}", err=True)
|
|
1626
|
+
raise typer.Exit(1)
|
|
1627
|
+
|
|
1628
|
+
# Default output path
|
|
1629
|
+
if not output:
|
|
1630
|
+
output = profile_file.with_suffix(f".{format}")
|
|
1631
|
+
|
|
1632
|
+
try:
|
|
1633
|
+
from truthound.datadocs import (
|
|
1634
|
+
generate_html_report,
|
|
1635
|
+
export_to_pdf,
|
|
1636
|
+
ReportTheme,
|
|
1637
|
+
ChartLibrary,
|
|
1638
|
+
)
|
|
1639
|
+
|
|
1640
|
+
# Load profile
|
|
1641
|
+
with open(profile_file, "r", encoding="utf-8") as f:
|
|
1642
|
+
profile = json.load(f)
|
|
1643
|
+
|
|
1644
|
+
typer.echo(f"Generating {format.upper()} report...")
|
|
1645
|
+
typer.echo(f" Profile: {profile_file}")
|
|
1646
|
+
typer.echo(f" Theme: {theme}")
|
|
1647
|
+
typer.echo(f" Charts: {chart_library}")
|
|
1648
|
+
|
|
1649
|
+
if format == "html":
|
|
1650
|
+
html_content = generate_html_report(
|
|
1651
|
+
profile=profile,
|
|
1652
|
+
title=title,
|
|
1653
|
+
subtitle=subtitle,
|
|
1654
|
+
theme=theme,
|
|
1655
|
+
chart_library=chart_library,
|
|
1656
|
+
output_path=output,
|
|
1657
|
+
)
|
|
1658
|
+
typer.echo(f"\nReport saved to: {output}")
|
|
1659
|
+
typer.echo(f" Size: {len(html_content):,} bytes")
|
|
1660
|
+
|
|
1661
|
+
elif format == "pdf":
|
|
1662
|
+
try:
|
|
1663
|
+
output_path = export_to_pdf(
|
|
1664
|
+
profile=profile,
|
|
1665
|
+
output_path=output,
|
|
1666
|
+
title=title,
|
|
1667
|
+
subtitle=subtitle,
|
|
1668
|
+
theme=theme,
|
|
1669
|
+
chart_library="svg", # SVG works best for PDF
|
|
1670
|
+
)
|
|
1671
|
+
typer.echo(f"\nPDF report saved to: {output_path}")
|
|
1672
|
+
except ImportError:
|
|
1673
|
+
typer.echo(
|
|
1674
|
+
"Error: PDF export requires weasyprint. "
|
|
1675
|
+
"Install with: pip install truthound[pdf]",
|
|
1676
|
+
err=True,
|
|
1677
|
+
)
|
|
1678
|
+
raise typer.Exit(1)
|
|
1679
|
+
|
|
1680
|
+
else:
|
|
1681
|
+
typer.echo(f"Error: Unsupported format '{format}'", err=True)
|
|
1682
|
+
raise typer.Exit(1)
|
|
1683
|
+
|
|
1684
|
+
except Exception as e:
|
|
1685
|
+
typer.echo(f"Error: {e}", err=True)
|
|
1686
|
+
raise typer.Exit(1)
|
|
1687
|
+
|
|
1688
|
+
|
|
1689
|
+
@docs_app.command(name="themes")
|
|
1690
|
+
def docs_themes_cmd() -> None:
|
|
1691
|
+
"""List available report themes."""
|
|
1692
|
+
try:
|
|
1693
|
+
from truthound.datadocs import get_available_themes
|
|
1694
|
+
|
|
1695
|
+
typer.echo("Available report themes:")
|
|
1696
|
+
typer.echo("")
|
|
1697
|
+
|
|
1698
|
+
themes_info = {
|
|
1699
|
+
"light": "Clean and bright, suitable for most use cases",
|
|
1700
|
+
"dark": "Dark mode with vibrant colors, easy on the eyes",
|
|
1701
|
+
"professional": "Corporate style, subdued colors (default)",
|
|
1702
|
+
"minimal": "Minimalist design with monochrome accents",
|
|
1703
|
+
"modern": "Contemporary design with vibrant gradients",
|
|
1704
|
+
}
|
|
1705
|
+
|
|
1706
|
+
for theme in get_available_themes():
|
|
1707
|
+
desc = themes_info.get(theme, "")
|
|
1708
|
+
typer.echo(f" {theme:14} - {desc}")
|
|
1709
|
+
|
|
1710
|
+
except Exception as e:
|
|
1711
|
+
typer.echo(f"Error: {e}", err=True)
|
|
1712
|
+
raise typer.Exit(1)
|
|
1713
|
+
|
|
1714
|
+
|
|
1715
|
+
@app.command(name="dashboard")
|
|
1716
|
+
def dashboard_cmd(
|
|
1717
|
+
profile: Annotated[
|
|
1718
|
+
Optional[Path],
|
|
1719
|
+
typer.Option("--profile", "-p", help="Path to profile JSON file"),
|
|
1720
|
+
] = None,
|
|
1721
|
+
port: Annotated[
|
|
1722
|
+
int,
|
|
1723
|
+
typer.Option("--port", help="Server port"),
|
|
1724
|
+
] = 8080,
|
|
1725
|
+
host: Annotated[
|
|
1726
|
+
str,
|
|
1727
|
+
typer.Option("--host", help="Server host"),
|
|
1728
|
+
] = "localhost",
|
|
1729
|
+
title: Annotated[
|
|
1730
|
+
str,
|
|
1731
|
+
typer.Option("--title", "-t", help="Dashboard title"),
|
|
1732
|
+
] = "Truthound Dashboard",
|
|
1733
|
+
debug: Annotated[
|
|
1734
|
+
bool,
|
|
1735
|
+
typer.Option("--debug", help="Enable debug mode"),
|
|
1736
|
+
] = False,
|
|
1737
|
+
) -> None:
|
|
1738
|
+
"""Launch interactive dashboard for data exploration.
|
|
1739
|
+
|
|
1740
|
+
This requires the dashboard extra to be installed:
|
|
1741
|
+
pip install truthound[dashboard]
|
|
1742
|
+
|
|
1743
|
+
The dashboard provides:
|
|
1744
|
+
- Interactive data exploration
|
|
1745
|
+
- Column filtering and search
|
|
1746
|
+
- Real-time quality metrics
|
|
1747
|
+
- Pattern visualization
|
|
1748
|
+
|
|
1749
|
+
Examples:
|
|
1750
|
+
# Launch with profile
|
|
1751
|
+
truthound dashboard --profile profile.json
|
|
1752
|
+
|
|
1753
|
+
# Custom port and title
|
|
1754
|
+
truthound dashboard --profile profile.json --port 3000 --title "My Dashboard"
|
|
1755
|
+
"""
|
|
1756
|
+
try:
|
|
1757
|
+
from truthound.datadocs import launch_dashboard
|
|
1758
|
+
|
|
1759
|
+
if profile and not profile.exists():
|
|
1760
|
+
typer.echo(f"Error: Profile file not found: {profile}", err=True)
|
|
1761
|
+
raise typer.Exit(1)
|
|
1762
|
+
|
|
1763
|
+
typer.echo(f"Launching dashboard on http://{host}:{port}")
|
|
1764
|
+
if profile:
|
|
1765
|
+
typer.echo(f" Profile: {profile}")
|
|
1766
|
+
|
|
1767
|
+
launch_dashboard(
|
|
1768
|
+
profile_path=profile,
|
|
1769
|
+
port=port,
|
|
1770
|
+
host=host,
|
|
1771
|
+
title=title,
|
|
1772
|
+
debug=debug,
|
|
1773
|
+
)
|
|
1774
|
+
|
|
1775
|
+
except ImportError:
|
|
1776
|
+
typer.echo(
|
|
1777
|
+
"Error: Dashboard requires additional dependencies. "
|
|
1778
|
+
"Install with: pip install truthound[dashboard]",
|
|
1779
|
+
err=True,
|
|
1780
|
+
)
|
|
1781
|
+
raise typer.Exit(1)
|
|
1782
|
+
except Exception as e:
|
|
1783
|
+
typer.echo(f"Error: {e}", err=True)
|
|
1784
|
+
raise typer.Exit(1)
|
|
1785
|
+
|
|
1786
|
+
|
|
1787
|
+
# =============================================================================
|
|
1788
|
+
# Phase 10: ML, Lineage, and Realtime Commands
|
|
1789
|
+
# =============================================================================
|
|
1790
|
+
|
|
1791
|
+
# ML subcommand group
|
|
1792
|
+
ml_app = typer.Typer(
|
|
1793
|
+
name="ml",
|
|
1794
|
+
help="Machine learning based validation commands",
|
|
1795
|
+
)
|
|
1796
|
+
app.add_typer(ml_app, name="ml")
|
|
1797
|
+
|
|
1798
|
+
|
|
1799
|
+
@ml_app.command(name="anomaly")
|
|
1800
|
+
def ml_anomaly_cmd(
|
|
1801
|
+
file: Annotated[Path, typer.Argument(help="Path to the data file")],
|
|
1802
|
+
method: Annotated[
|
|
1803
|
+
str,
|
|
1804
|
+
typer.Option("--method", "-m", help="Detection method (zscore, iqr, mad, isolation_forest)"),
|
|
1805
|
+
] = "zscore",
|
|
1806
|
+
contamination: Annotated[
|
|
1807
|
+
float,
|
|
1808
|
+
typer.Option("--contamination", "-c", help="Expected proportion of outliers (0.0 to 0.5)"),
|
|
1809
|
+
] = 0.1,
|
|
1810
|
+
columns: Annotated[
|
|
1811
|
+
Optional[str],
|
|
1812
|
+
typer.Option("--columns", help="Comma-separated columns to analyze"),
|
|
1813
|
+
] = None,
|
|
1814
|
+
output: Annotated[
|
|
1815
|
+
Optional[Path],
|
|
1816
|
+
typer.Option("--output", "-o", help="Output file path for results"),
|
|
1817
|
+
] = None,
|
|
1818
|
+
format: Annotated[
|
|
1819
|
+
str,
|
|
1820
|
+
typer.Option("--format", "-f", help="Output format (console, json)"),
|
|
1821
|
+
] = "console",
|
|
1822
|
+
) -> None:
|
|
1823
|
+
"""Detect anomalies in data using ML methods.
|
|
1824
|
+
|
|
1825
|
+
Examples:
|
|
1826
|
+
truthound ml anomaly data.csv
|
|
1827
|
+
truthound ml anomaly data.csv --method isolation_forest --contamination 0.05
|
|
1828
|
+
truthound ml anomaly data.csv --columns "amount,price" --output anomalies.json
|
|
1829
|
+
"""
|
|
1830
|
+
import polars as pl
|
|
1831
|
+
from truthound.ml import (
|
|
1832
|
+
ZScoreAnomalyDetector,
|
|
1833
|
+
IQRAnomalyDetector,
|
|
1834
|
+
MADAnomalyDetector,
|
|
1835
|
+
IsolationForestDetector,
|
|
1836
|
+
)
|
|
1837
|
+
from truthound.ml.anomaly_models.statistical import StatisticalConfig
|
|
1838
|
+
from truthound.ml.anomaly_models.isolation_forest import IsolationForestConfig
|
|
1839
|
+
|
|
1840
|
+
if not file.exists():
|
|
1841
|
+
typer.echo(f"Error: File not found: {file}", err=True)
|
|
1842
|
+
raise typer.Exit(1)
|
|
1843
|
+
|
|
1844
|
+
try:
|
|
1845
|
+
# Load data
|
|
1846
|
+
df = pl.read_csv(file) if str(file).endswith(".csv") else pl.read_parquet(file)
|
|
1847
|
+
|
|
1848
|
+
# Parse columns
|
|
1849
|
+
cols = [c.strip() for c in columns.split(",")] if columns else None
|
|
1850
|
+
|
|
1851
|
+
# Select detector and appropriate config
|
|
1852
|
+
# Use min_samples=10 for CLI to allow smaller datasets
|
|
1853
|
+
if method == "isolation_forest":
|
|
1854
|
+
config = IsolationForestConfig(
|
|
1855
|
+
contamination=contamination, columns=cols, min_samples=10
|
|
1856
|
+
)
|
|
1857
|
+
detector = IsolationForestDetector(config=config)
|
|
1858
|
+
elif method in ("zscore", "iqr", "mad"):
|
|
1859
|
+
config = StatisticalConfig(
|
|
1860
|
+
contamination=contamination, columns=cols, min_samples=10
|
|
1861
|
+
)
|
|
1862
|
+
detector_map = {
|
|
1863
|
+
"zscore": ZScoreAnomalyDetector,
|
|
1864
|
+
"iqr": IQRAnomalyDetector,
|
|
1865
|
+
"mad": MADAnomalyDetector,
|
|
1866
|
+
}
|
|
1867
|
+
detector = detector_map[method](config=config)
|
|
1868
|
+
else:
|
|
1869
|
+
typer.echo(f"Error: Unknown method '{method}'. Available: zscore, iqr, mad, isolation_forest", err=True)
|
|
1870
|
+
raise typer.Exit(1)
|
|
1871
|
+
detector.fit(df.lazy())
|
|
1872
|
+
result = detector.predict(df.lazy())
|
|
1873
|
+
|
|
1874
|
+
# Output results
|
|
1875
|
+
if format == "json":
|
|
1876
|
+
output_data = result.to_dict()
|
|
1877
|
+
if output:
|
|
1878
|
+
with open(output, "w") as f:
|
|
1879
|
+
json.dump(output_data, f, indent=2)
|
|
1880
|
+
typer.echo(f"Results saved to {output}")
|
|
1881
|
+
else:
|
|
1882
|
+
typer.echo(json.dumps(output_data, indent=2))
|
|
1883
|
+
else:
|
|
1884
|
+
typer.echo(f"\nAnomaly Detection Results ({method})")
|
|
1885
|
+
typer.echo("=" * 50)
|
|
1886
|
+
typer.echo(f"Total points: {result.total_points}")
|
|
1887
|
+
typer.echo(f"Anomalies found: {result.anomaly_count}")
|
|
1888
|
+
typer.echo(f"Anomaly ratio: {result.anomaly_ratio:.2%}")
|
|
1889
|
+
typer.echo(f"Threshold used: {result.threshold_used:.4f}")
|
|
1890
|
+
|
|
1891
|
+
if result.anomaly_count > 0:
|
|
1892
|
+
typer.echo("\nTop anomalies:")
|
|
1893
|
+
anomalies = sorted(result.get_anomalies(), key=lambda x: x.score, reverse=True)[:10]
|
|
1894
|
+
for a in anomalies:
|
|
1895
|
+
typer.echo(f" Index {a.index}: score={a.score:.4f}, confidence={a.confidence:.2%}")
|
|
1896
|
+
|
|
1897
|
+
except Exception as e:
|
|
1898
|
+
typer.echo(f"Error: {e}", err=True)
|
|
1899
|
+
raise typer.Exit(1)
|
|
1900
|
+
|
|
1901
|
+
|
|
1902
|
+
@ml_app.command(name="drift")
|
|
1903
|
+
def ml_drift_cmd(
|
|
1904
|
+
baseline: Annotated[Path, typer.Argument(help="Path to baseline/reference data file")],
|
|
1905
|
+
current: Annotated[Path, typer.Argument(help="Path to current data file")],
|
|
1906
|
+
method: Annotated[
|
|
1907
|
+
str,
|
|
1908
|
+
typer.Option("--method", "-m", help="Detection method (distribution, feature, multivariate)"),
|
|
1909
|
+
] = "feature",
|
|
1910
|
+
threshold: Annotated[
|
|
1911
|
+
float,
|
|
1912
|
+
typer.Option("--threshold", "-t", help="Drift detection threshold"),
|
|
1913
|
+
] = 0.1,
|
|
1914
|
+
columns: Annotated[
|
|
1915
|
+
Optional[str],
|
|
1916
|
+
typer.Option("--columns", help="Comma-separated columns to analyze"),
|
|
1917
|
+
] = None,
|
|
1918
|
+
output: Annotated[
|
|
1919
|
+
Optional[Path],
|
|
1920
|
+
typer.Option("--output", "-o", help="Output file path"),
|
|
1921
|
+
] = None,
|
|
1922
|
+
) -> None:
|
|
1923
|
+
"""Detect data drift between baseline and current datasets.
|
|
1924
|
+
|
|
1925
|
+
Examples:
|
|
1926
|
+
truthound ml drift baseline.csv current.csv
|
|
1927
|
+
truthound ml drift ref.parquet new.parquet --method multivariate
|
|
1928
|
+
truthound ml drift old.csv new.csv --threshold 0.2 --output drift_report.json
|
|
1929
|
+
"""
|
|
1930
|
+
import polars as pl
|
|
1931
|
+
from truthound.ml.drift_detection import (
|
|
1932
|
+
DistributionDriftDetector,
|
|
1933
|
+
FeatureDriftDetector,
|
|
1934
|
+
MultivariateDriftDetector,
|
|
1935
|
+
)
|
|
1936
|
+
|
|
1937
|
+
if not baseline.exists():
|
|
1938
|
+
typer.echo(f"Error: Baseline file not found: {baseline}", err=True)
|
|
1939
|
+
raise typer.Exit(1)
|
|
1940
|
+
if not current.exists():
|
|
1941
|
+
typer.echo(f"Error: Current file not found: {current}", err=True)
|
|
1942
|
+
raise typer.Exit(1)
|
|
1943
|
+
|
|
1944
|
+
try:
|
|
1945
|
+
# Load data
|
|
1946
|
+
read_func = lambda p: pl.read_csv(p) if str(p).endswith(".csv") else pl.read_parquet(p)
|
|
1947
|
+
baseline_df = read_func(baseline)
|
|
1948
|
+
current_df = read_func(current)
|
|
1949
|
+
|
|
1950
|
+
detector_map = {
|
|
1951
|
+
"distribution": DistributionDriftDetector,
|
|
1952
|
+
"feature": FeatureDriftDetector,
|
|
1953
|
+
"multivariate": MultivariateDriftDetector,
|
|
1954
|
+
}
|
|
1955
|
+
|
|
1956
|
+
if method not in detector_map:
|
|
1957
|
+
typer.echo(f"Error: Unknown method '{method}'. Available: {list(detector_map.keys())}", err=True)
|
|
1958
|
+
raise typer.Exit(1)
|
|
1959
|
+
|
|
1960
|
+
detector = detector_map[method](threshold=threshold)
|
|
1961
|
+
detector.fit(baseline_df.lazy())
|
|
1962
|
+
|
|
1963
|
+
cols = [c.strip() for c in columns.split(",")] if columns else None
|
|
1964
|
+
result = detector.detect(baseline_df.lazy(), current_df.lazy(), columns=cols)
|
|
1965
|
+
|
|
1966
|
+
# Output results
|
|
1967
|
+
typer.echo(f"\nDrift Detection Results ({method})")
|
|
1968
|
+
typer.echo("=" * 50)
|
|
1969
|
+
typer.echo(f"Drift detected: {'YES' if result.is_drifted else 'NO'}")
|
|
1970
|
+
typer.echo(f"Drift score: {result.drift_score:.4f}")
|
|
1971
|
+
typer.echo(f"Drift type: {result.drift_type}")
|
|
1972
|
+
|
|
1973
|
+
if result.column_scores:
|
|
1974
|
+
typer.echo("\nPer-column drift scores:")
|
|
1975
|
+
for col, score in sorted(result.column_scores, key=lambda x: x[1], reverse=True):
|
|
1976
|
+
status = "[DRIFTED]" if score >= threshold else ""
|
|
1977
|
+
typer.echo(f" {col}: {score:.4f} {status}")
|
|
1978
|
+
|
|
1979
|
+
if output:
|
|
1980
|
+
with open(output, "w") as f:
|
|
1981
|
+
json.dump(result.to_dict(), f, indent=2)
|
|
1982
|
+
typer.echo(f"\nResults saved to {output}")
|
|
1983
|
+
|
|
1984
|
+
except Exception as e:
|
|
1985
|
+
typer.echo(f"Error: {e}", err=True)
|
|
1986
|
+
raise typer.Exit(1)
|
|
1987
|
+
|
|
1988
|
+
|
|
1989
|
+
@ml_app.command(name="learn-rules")
|
|
1990
|
+
def ml_learn_rules_cmd(
|
|
1991
|
+
file: Annotated[Path, typer.Argument(help="Path to the data file")],
|
|
1992
|
+
output: Annotated[
|
|
1993
|
+
Path,
|
|
1994
|
+
typer.Option("--output", "-o", help="Output file for learned rules"),
|
|
1995
|
+
] = Path("learned_rules.json"),
|
|
1996
|
+
strictness: Annotated[
|
|
1997
|
+
str,
|
|
1998
|
+
typer.Option("--strictness", "-s", help="Rule strictness (loose, medium, strict)"),
|
|
1999
|
+
] = "medium",
|
|
2000
|
+
min_confidence: Annotated[
|
|
2001
|
+
float,
|
|
2002
|
+
typer.Option("--min-confidence", help="Minimum rule confidence"),
|
|
2003
|
+
] = 0.9,
|
|
2004
|
+
max_rules: Annotated[
|
|
2005
|
+
int,
|
|
2006
|
+
typer.Option("--max-rules", help="Maximum number of rules to generate"),
|
|
2007
|
+
] = 100,
|
|
2008
|
+
) -> None:
|
|
2009
|
+
"""Learn validation rules from data.
|
|
2010
|
+
|
|
2011
|
+
Examples:
|
|
2012
|
+
truthound ml learn-rules data.csv
|
|
2013
|
+
truthound ml learn-rules data.csv --strictness strict --min-confidence 0.95
|
|
2014
|
+
truthound ml learn-rules data.parquet --output my_rules.json
|
|
2015
|
+
"""
|
|
2016
|
+
import polars as pl
|
|
2017
|
+
from truthound.ml.rule_learning import DataProfileRuleLearner, PatternRuleLearner
|
|
2018
|
+
|
|
2019
|
+
if not file.exists():
|
|
2020
|
+
typer.echo(f"Error: File not found: {file}", err=True)
|
|
2021
|
+
raise typer.Exit(1)
|
|
2022
|
+
|
|
2023
|
+
try:
|
|
2024
|
+
df = pl.read_csv(file) if str(file).endswith(".csv") else pl.read_parquet(file)
|
|
2025
|
+
|
|
2026
|
+
typer.echo(f"Learning rules from {file}...")
|
|
2027
|
+
typer.echo(f" Rows: {len(df):,}, Columns: {len(df.columns)}")
|
|
2028
|
+
|
|
2029
|
+
# Use profile learner
|
|
2030
|
+
learner = DataProfileRuleLearner(
|
|
2031
|
+
strictness=strictness,
|
|
2032
|
+
min_confidence=min_confidence,
|
|
2033
|
+
max_rules=max_rules,
|
|
2034
|
+
)
|
|
2035
|
+
|
|
2036
|
+
result = learner.learn_rules(df.lazy())
|
|
2037
|
+
|
|
2038
|
+
typer.echo(f"\nLearned {len(result.rules)} rules ({result.filtered_rules} filtered)")
|
|
2039
|
+
typer.echo(f"Learning time: {result.learning_time_ms:.1f}ms")
|
|
2040
|
+
|
|
2041
|
+
# Show rules by type
|
|
2042
|
+
rule_types = {}
|
|
2043
|
+
for rule in result.rules:
|
|
2044
|
+
rule_types[rule.rule_type] = rule_types.get(rule.rule_type, 0) + 1
|
|
2045
|
+
|
|
2046
|
+
typer.echo("\nRules by type:")
|
|
2047
|
+
for rtype, count in sorted(rule_types.items(), key=lambda x: x[1], reverse=True):
|
|
2048
|
+
typer.echo(f" {rtype}: {count}")
|
|
2049
|
+
|
|
2050
|
+
# Save rules
|
|
2051
|
+
with open(output, "w") as f:
|
|
2052
|
+
json.dump(result.to_dict(), f, indent=2)
|
|
2053
|
+
typer.echo(f"\nRules saved to {output}")
|
|
2054
|
+
|
|
2055
|
+
except Exception as e:
|
|
2056
|
+
typer.echo(f"Error: {e}", err=True)
|
|
2057
|
+
raise typer.Exit(1)
|
|
2058
|
+
|
|
2059
|
+
|
|
2060
|
+
# Lineage subcommand group
|
|
2061
|
+
lineage_app = typer.Typer(
|
|
2062
|
+
name="lineage",
|
|
2063
|
+
help="Data lineage tracking and analysis commands",
|
|
2064
|
+
)
|
|
2065
|
+
app.add_typer(lineage_app, name="lineage")
|
|
2066
|
+
|
|
2067
|
+
|
|
2068
|
+
@lineage_app.command(name="show")
|
|
2069
|
+
def lineage_show_cmd(
|
|
2070
|
+
lineage_file: Annotated[Path, typer.Argument(help="Path to lineage JSON file")],
|
|
2071
|
+
node: Annotated[
|
|
2072
|
+
Optional[str],
|
|
2073
|
+
typer.Option("--node", "-n", help="Show lineage for specific node"),
|
|
2074
|
+
] = None,
|
|
2075
|
+
direction: Annotated[
|
|
2076
|
+
str,
|
|
2077
|
+
typer.Option("--direction", "-d", help="Direction (upstream, downstream, both)"),
|
|
2078
|
+
] = "both",
|
|
2079
|
+
format: Annotated[
|
|
2080
|
+
str,
|
|
2081
|
+
typer.Option("--format", "-f", help="Output format (console, json, dot)"),
|
|
2082
|
+
] = "console",
|
|
2083
|
+
) -> None:
|
|
2084
|
+
"""Display lineage information.
|
|
2085
|
+
|
|
2086
|
+
Examples:
|
|
2087
|
+
truthound lineage show lineage.json
|
|
2088
|
+
truthound lineage show lineage.json --node my_table --direction upstream
|
|
2089
|
+
truthound lineage show lineage.json --format dot > lineage.dot
|
|
2090
|
+
"""
|
|
2091
|
+
from truthound.lineage import LineageGraph
|
|
2092
|
+
|
|
2093
|
+
if not lineage_file.exists():
|
|
2094
|
+
typer.echo(f"Error: File not found: {lineage_file}", err=True)
|
|
2095
|
+
raise typer.Exit(1)
|
|
2096
|
+
|
|
2097
|
+
try:
|
|
2098
|
+
graph = LineageGraph.load(lineage_file)
|
|
2099
|
+
|
|
2100
|
+
if node:
|
|
2101
|
+
if not graph.has_node(node):
|
|
2102
|
+
typer.echo(f"Error: Node '{node}' not found", err=True)
|
|
2103
|
+
raise typer.Exit(1)
|
|
2104
|
+
|
|
2105
|
+
node_obj = graph.get_node(node)
|
|
2106
|
+
typer.echo(f"\nLineage for: {node}")
|
|
2107
|
+
typer.echo(f"Type: {node_obj.node_type.value}")
|
|
2108
|
+
|
|
2109
|
+
if direction in ("upstream", "both"):
|
|
2110
|
+
upstream = graph.get_upstream(node)
|
|
2111
|
+
typer.echo(f"\nUpstream ({len(upstream)} nodes):")
|
|
2112
|
+
for n in upstream:
|
|
2113
|
+
typer.echo(f" <- {n.name} ({n.node_type.value})")
|
|
2114
|
+
|
|
2115
|
+
if direction in ("downstream", "both"):
|
|
2116
|
+
downstream = graph.get_downstream(node)
|
|
2117
|
+
typer.echo(f"\nDownstream ({len(downstream)} nodes):")
|
|
2118
|
+
for n in downstream:
|
|
2119
|
+
typer.echo(f" -> {n.name} ({n.node_type.value})")
|
|
2120
|
+
else:
|
|
2121
|
+
typer.echo(f"\nLineage Graph Summary")
|
|
2122
|
+
typer.echo("=" * 40)
|
|
2123
|
+
typer.echo(f"Nodes: {graph.node_count}")
|
|
2124
|
+
typer.echo(f"Edges: {graph.edge_count}")
|
|
2125
|
+
|
|
2126
|
+
roots = graph.get_roots()
|
|
2127
|
+
typer.echo(f"\nRoot nodes ({len(roots)}):")
|
|
2128
|
+
for r in roots[:10]:
|
|
2129
|
+
typer.echo(f" {r.name} ({r.node_type.value})")
|
|
2130
|
+
|
|
2131
|
+
leaves = graph.get_leaves()
|
|
2132
|
+
typer.echo(f"\nLeaf nodes ({len(leaves)}):")
|
|
2133
|
+
for l in leaves[:10]:
|
|
2134
|
+
typer.echo(f" {l.name} ({l.node_type.value})")
|
|
2135
|
+
|
|
2136
|
+
except Exception as e:
|
|
2137
|
+
typer.echo(f"Error: {e}", err=True)
|
|
2138
|
+
raise typer.Exit(1)
|
|
2139
|
+
|
|
2140
|
+
|
|
2141
|
+
@lineage_app.command(name="impact")
|
|
2142
|
+
def lineage_impact_cmd(
|
|
2143
|
+
lineage_file: Annotated[Path, typer.Argument(help="Path to lineage JSON file")],
|
|
2144
|
+
node: Annotated[str, typer.Argument(help="Node to analyze impact for")],
|
|
2145
|
+
max_depth: Annotated[
|
|
2146
|
+
int,
|
|
2147
|
+
typer.Option("--max-depth", help="Maximum depth for impact analysis"),
|
|
2148
|
+
] = -1,
|
|
2149
|
+
output: Annotated[
|
|
2150
|
+
Optional[Path],
|
|
2151
|
+
typer.Option("--output", "-o", help="Output file for results"),
|
|
2152
|
+
] = None,
|
|
2153
|
+
) -> None:
|
|
2154
|
+
"""Analyze impact of changes to a data asset.
|
|
2155
|
+
|
|
2156
|
+
Examples:
|
|
2157
|
+
truthound lineage impact lineage.json raw_data
|
|
2158
|
+
truthound lineage impact lineage.json my_table --max-depth 3
|
|
2159
|
+
"""
|
|
2160
|
+
from truthound.lineage import LineageGraph, ImpactAnalyzer
|
|
2161
|
+
|
|
2162
|
+
if not lineage_file.exists():
|
|
2163
|
+
typer.echo(f"Error: File not found: {lineage_file}", err=True)
|
|
2164
|
+
raise typer.Exit(1)
|
|
2165
|
+
|
|
2166
|
+
try:
|
|
2167
|
+
graph = LineageGraph.load(lineage_file)
|
|
2168
|
+
analyzer = ImpactAnalyzer(graph)
|
|
2169
|
+
|
|
2170
|
+
result = analyzer.analyze_impact(node, max_depth=max_depth)
|
|
2171
|
+
|
|
2172
|
+
typer.echo(result.summary())
|
|
2173
|
+
|
|
2174
|
+
if result.affected_nodes:
|
|
2175
|
+
typer.echo("\nAffected nodes:")
|
|
2176
|
+
for affected in result.affected_nodes[:20]:
|
|
2177
|
+
level_marker = {
|
|
2178
|
+
"critical": "[!!!]",
|
|
2179
|
+
"high": "[!!]",
|
|
2180
|
+
"medium": "[!]",
|
|
2181
|
+
"low": "[-]",
|
|
2182
|
+
"none": "[ ]",
|
|
2183
|
+
}.get(affected.impact_level.value, "")
|
|
2184
|
+
typer.echo(f" {level_marker} {affected.node.name} (depth={affected.distance})")
|
|
2185
|
+
|
|
2186
|
+
if output:
|
|
2187
|
+
with open(output, "w") as f:
|
|
2188
|
+
json.dump(result.to_dict(), f, indent=2)
|
|
2189
|
+
typer.echo(f"\nResults saved to {output}")
|
|
2190
|
+
|
|
2191
|
+
except Exception as e:
|
|
2192
|
+
typer.echo(f"Error: {e}", err=True)
|
|
2193
|
+
raise typer.Exit(1)
|
|
2194
|
+
|
|
2195
|
+
|
|
2196
|
+
# Realtime subcommand group
|
|
2197
|
+
realtime_app = typer.Typer(
|
|
2198
|
+
name="realtime",
|
|
2199
|
+
help="Real-time and streaming validation commands",
|
|
2200
|
+
)
|
|
2201
|
+
app.add_typer(realtime_app, name="realtime")
|
|
2202
|
+
|
|
2203
|
+
|
|
2204
|
+
@realtime_app.command(name="validate")
|
|
2205
|
+
def realtime_validate_cmd(
|
|
2206
|
+
source: Annotated[str, typer.Argument(help="Streaming source (mock, kafka:topic, kinesis:stream)")],
|
|
2207
|
+
validators: Annotated[
|
|
2208
|
+
Optional[str],
|
|
2209
|
+
typer.Option("--validators", "-v", help="Comma-separated validators"),
|
|
2210
|
+
] = None,
|
|
2211
|
+
batch_size: Annotated[
|
|
2212
|
+
int,
|
|
2213
|
+
typer.Option("--batch-size", "-b", help="Batch size"),
|
|
2214
|
+
] = 1000,
|
|
2215
|
+
max_batches: Annotated[
|
|
2216
|
+
int,
|
|
2217
|
+
typer.Option("--max-batches", help="Maximum batches to process (0=unlimited)"),
|
|
2218
|
+
] = 10,
|
|
2219
|
+
output: Annotated[
|
|
2220
|
+
Optional[Path],
|
|
2221
|
+
typer.Option("--output", "-o", help="Output file for results"),
|
|
2222
|
+
] = None,
|
|
2223
|
+
) -> None:
|
|
2224
|
+
"""Validate streaming data in real-time.
|
|
2225
|
+
|
|
2226
|
+
Examples:
|
|
2227
|
+
truthound realtime validate mock --max-batches 5
|
|
2228
|
+
truthound realtime validate mock --validators null,range --batch-size 500
|
|
2229
|
+
"""
|
|
2230
|
+
from truthound.realtime import MockStreamingSource, StreamingValidator, StreamingConfig
|
|
2231
|
+
|
|
2232
|
+
try:
|
|
2233
|
+
# Parse source
|
|
2234
|
+
if source.startswith("mock"):
|
|
2235
|
+
stream = MockStreamingSource(
|
|
2236
|
+
records_per_batch=batch_size,
|
|
2237
|
+
num_batches=max_batches if max_batches > 0 else 100,
|
|
2238
|
+
)
|
|
2239
|
+
else:
|
|
2240
|
+
typer.echo(f"Source '{source}' requires additional configuration.")
|
|
2241
|
+
typer.echo("For now, using mock source for demonstration.")
|
|
2242
|
+
stream = MockStreamingSource(
|
|
2243
|
+
records_per_batch=batch_size,
|
|
2244
|
+
num_batches=max_batches if max_batches > 0 else 100,
|
|
2245
|
+
)
|
|
2246
|
+
|
|
2247
|
+
validator_list = [v.strip() for v in validators.split(",")] if validators else None
|
|
2248
|
+
config = StreamingConfig(batch_size=batch_size)
|
|
2249
|
+
streaming_validator = StreamingValidator(
|
|
2250
|
+
validators=validator_list,
|
|
2251
|
+
config=config,
|
|
2252
|
+
)
|
|
2253
|
+
|
|
2254
|
+
results = []
|
|
2255
|
+
with stream:
|
|
2256
|
+
typer.echo(f"Starting streaming validation...")
|
|
2257
|
+
typer.echo(f" Source: {source}")
|
|
2258
|
+
typer.echo(f" Batch size: {batch_size}")
|
|
2259
|
+
typer.echo(f" Validators: {validator_list or 'all'}")
|
|
2260
|
+
typer.echo()
|
|
2261
|
+
|
|
2262
|
+
for result in streaming_validator.validate_stream(stream, max_batches=max_batches if max_batches > 0 else None):
|
|
2263
|
+
status = "[ISSUES]" if result.has_issues else "[OK]"
|
|
2264
|
+
typer.echo(f"Batch {result.batch_id}: {result.record_count} records, {result.issue_count} issues {status}")
|
|
2265
|
+
results.append(result.to_dict())
|
|
2266
|
+
|
|
2267
|
+
stats = streaming_validator.get_stats()
|
|
2268
|
+
typer.echo(f"\nSummary")
|
|
2269
|
+
typer.echo("=" * 40)
|
|
2270
|
+
typer.echo(f"Batches processed: {stats['batch_count']}")
|
|
2271
|
+
typer.echo(f"Total records: {stats['total_records']}")
|
|
2272
|
+
typer.echo(f"Total issues: {stats['total_issues']}")
|
|
2273
|
+
typer.echo(f"Issue rate: {stats['issue_rate']:.2%}")
|
|
2274
|
+
typer.echo(f"Avg processing time: {stats['avg_processing_time_ms']:.1f}ms")
|
|
2275
|
+
|
|
2276
|
+
if output:
|
|
2277
|
+
with open(output, "w") as f:
|
|
2278
|
+
json.dump({"batches": results, "stats": stats}, f, indent=2)
|
|
2279
|
+
typer.echo(f"\nResults saved to {output}")
|
|
2280
|
+
|
|
2281
|
+
except Exception as e:
|
|
2282
|
+
typer.echo(f"Error: {e}", err=True)
|
|
2283
|
+
raise typer.Exit(1)
|
|
2284
|
+
|
|
2285
|
+
|
|
2286
|
+
def _discover_and_register_plugins() -> None:
|
|
2287
|
+
"""Discover and register CLI plugins from entry points.
|
|
2288
|
+
|
|
2289
|
+
This function discovers plugins registered under the 'truthound.cli'
|
|
2290
|
+
entry point group. This allows external packages (like truthound-dashboard)
|
|
2291
|
+
to extend the CLI with additional commands.
|
|
2292
|
+
|
|
2293
|
+
Entry point format in pyproject.toml:
|
|
2294
|
+
[project.entry-points."truthound.cli"]
|
|
2295
|
+
serve = "truthound_dashboard.cli:register_commands"
|
|
2296
|
+
|
|
2297
|
+
The registered module must have either:
|
|
2298
|
+
- A `register_commands(app: typer.Typer)` function
|
|
2299
|
+
- An `app` attribute that is a Typer instance
|
|
2300
|
+
"""
|
|
2301
|
+
import logging
|
|
2302
|
+
|
|
2303
|
+
logger = logging.getLogger(__name__)
|
|
2304
|
+
|
|
2305
|
+
try:
|
|
2306
|
+
from importlib.metadata import entry_points
|
|
2307
|
+
|
|
2308
|
+
# Get entry points for truthound.cli group
|
|
2309
|
+
eps = entry_points(group="truthound.cli")
|
|
2310
|
+
|
|
2311
|
+
for ep in eps:
|
|
2312
|
+
try:
|
|
2313
|
+
# Load the module
|
|
2314
|
+
module = ep.load()
|
|
2315
|
+
|
|
2316
|
+
# Check for register_commands function
|
|
2317
|
+
if hasattr(module, "register_commands"):
|
|
2318
|
+
module.register_commands(app)
|
|
2319
|
+
logger.debug(f"Registered CLI plugin via register_commands: {ep.name}")
|
|
2320
|
+
|
|
2321
|
+
# Check for app attribute (sub-typer)
|
|
2322
|
+
elif hasattr(module, "app"):
|
|
2323
|
+
app.add_typer(module.app, name=ep.name)
|
|
2324
|
+
logger.debug(f"Registered CLI plugin via app typer: {ep.name}")
|
|
2325
|
+
|
|
2326
|
+
# Check if the module itself is a callable (register function)
|
|
2327
|
+
elif callable(module):
|
|
2328
|
+
module(app)
|
|
2329
|
+
logger.debug(f"Registered CLI plugin via callable: {ep.name}")
|
|
2330
|
+
|
|
2331
|
+
else:
|
|
2332
|
+
logger.warning(
|
|
2333
|
+
f"CLI plugin '{ep.name}' has no register_commands, app, "
|
|
2334
|
+
"or is not callable"
|
|
2335
|
+
)
|
|
2336
|
+
|
|
2337
|
+
except Exception as e:
|
|
2338
|
+
# Log but don't fail - plugins shouldn't break core functionality
|
|
2339
|
+
logger.debug(f"Failed to load CLI plugin '{ep.name}': {e}")
|
|
2340
|
+
|
|
2341
|
+
except Exception as e:
|
|
2342
|
+
# Entry points not available or other error - silently continue
|
|
2343
|
+
logger.debug(f"Entry point discovery not available: {e}")
|
|
2344
|
+
|
|
2345
|
+
|
|
2346
|
+
# Discover and register CLI plugins from entry points
|
|
2347
|
+
# This runs at module load time to ensure plugins are available
|
|
2348
|
+
# when the CLI is invoked
|
|
2349
|
+
_discover_and_register_plugins()
|
|
2350
|
+
|
|
2351
|
+
|
|
2352
|
+
def main() -> None:
|
|
2353
|
+
"""Main entry point for the CLI."""
|
|
2354
|
+
app()
|
|
2355
|
+
|
|
2356
|
+
|
|
2357
|
+
if __name__ == "__main__":
|
|
2358
|
+
main()
|