truthound 1.0.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- truthound/__init__.py +162 -0
- truthound/adapters.py +100 -0
- truthound/api.py +365 -0
- truthound/audit/__init__.py +248 -0
- truthound/audit/core.py +967 -0
- truthound/audit/filters.py +620 -0
- truthound/audit/formatters.py +707 -0
- truthound/audit/logger.py +902 -0
- truthound/audit/middleware.py +571 -0
- truthound/audit/storage.py +1083 -0
- truthound/benchmark/__init__.py +123 -0
- truthound/benchmark/base.py +757 -0
- truthound/benchmark/comparison.py +635 -0
- truthound/benchmark/generators.py +706 -0
- truthound/benchmark/reporters.py +718 -0
- truthound/benchmark/runner.py +635 -0
- truthound/benchmark/scenarios.py +712 -0
- truthound/cache.py +252 -0
- truthound/checkpoint/__init__.py +136 -0
- truthound/checkpoint/actions/__init__.py +164 -0
- truthound/checkpoint/actions/base.py +324 -0
- truthound/checkpoint/actions/custom.py +234 -0
- truthound/checkpoint/actions/discord_notify.py +290 -0
- truthound/checkpoint/actions/email_notify.py +405 -0
- truthound/checkpoint/actions/github_action.py +406 -0
- truthound/checkpoint/actions/opsgenie.py +1499 -0
- truthound/checkpoint/actions/pagerduty.py +226 -0
- truthound/checkpoint/actions/slack_notify.py +233 -0
- truthound/checkpoint/actions/store_result.py +249 -0
- truthound/checkpoint/actions/teams_notify.py +1570 -0
- truthound/checkpoint/actions/telegram_notify.py +419 -0
- truthound/checkpoint/actions/update_docs.py +552 -0
- truthound/checkpoint/actions/webhook.py +293 -0
- truthound/checkpoint/analytics/__init__.py +147 -0
- truthound/checkpoint/analytics/aggregations/__init__.py +23 -0
- truthound/checkpoint/analytics/aggregations/rollup.py +481 -0
- truthound/checkpoint/analytics/aggregations/time_bucket.py +306 -0
- truthound/checkpoint/analytics/analyzers/__init__.py +17 -0
- truthound/checkpoint/analytics/analyzers/anomaly.py +386 -0
- truthound/checkpoint/analytics/analyzers/base.py +270 -0
- truthound/checkpoint/analytics/analyzers/forecast.py +421 -0
- truthound/checkpoint/analytics/analyzers/trend.py +314 -0
- truthound/checkpoint/analytics/models.py +292 -0
- truthound/checkpoint/analytics/protocols.py +549 -0
- truthound/checkpoint/analytics/service.py +718 -0
- truthound/checkpoint/analytics/stores/__init__.py +16 -0
- truthound/checkpoint/analytics/stores/base.py +306 -0
- truthound/checkpoint/analytics/stores/memory_store.py +353 -0
- truthound/checkpoint/analytics/stores/sqlite_store.py +557 -0
- truthound/checkpoint/analytics/stores/timescale_store.py +501 -0
- truthound/checkpoint/async_actions.py +794 -0
- truthound/checkpoint/async_base.py +708 -0
- truthound/checkpoint/async_checkpoint.py +617 -0
- truthound/checkpoint/async_runner.py +639 -0
- truthound/checkpoint/checkpoint.py +527 -0
- truthound/checkpoint/ci/__init__.py +61 -0
- truthound/checkpoint/ci/detector.py +355 -0
- truthound/checkpoint/ci/reporter.py +436 -0
- truthound/checkpoint/ci/templates.py +454 -0
- truthound/checkpoint/circuitbreaker/__init__.py +133 -0
- truthound/checkpoint/circuitbreaker/breaker.py +542 -0
- truthound/checkpoint/circuitbreaker/core.py +252 -0
- truthound/checkpoint/circuitbreaker/detection.py +459 -0
- truthound/checkpoint/circuitbreaker/middleware.py +389 -0
- truthound/checkpoint/circuitbreaker/registry.py +357 -0
- truthound/checkpoint/distributed/__init__.py +139 -0
- truthound/checkpoint/distributed/backends/__init__.py +35 -0
- truthound/checkpoint/distributed/backends/celery_backend.py +503 -0
- truthound/checkpoint/distributed/backends/kubernetes_backend.py +696 -0
- truthound/checkpoint/distributed/backends/local_backend.py +397 -0
- truthound/checkpoint/distributed/backends/ray_backend.py +625 -0
- truthound/checkpoint/distributed/base.py +774 -0
- truthound/checkpoint/distributed/orchestrator.py +765 -0
- truthound/checkpoint/distributed/protocols.py +842 -0
- truthound/checkpoint/distributed/registry.py +449 -0
- truthound/checkpoint/idempotency/__init__.py +120 -0
- truthound/checkpoint/idempotency/core.py +295 -0
- truthound/checkpoint/idempotency/fingerprint.py +454 -0
- truthound/checkpoint/idempotency/locking.py +604 -0
- truthound/checkpoint/idempotency/service.py +592 -0
- truthound/checkpoint/idempotency/stores.py +653 -0
- truthound/checkpoint/monitoring/__init__.py +134 -0
- truthound/checkpoint/monitoring/aggregators/__init__.py +15 -0
- truthound/checkpoint/monitoring/aggregators/base.py +372 -0
- truthound/checkpoint/monitoring/aggregators/realtime.py +300 -0
- truthound/checkpoint/monitoring/aggregators/window.py +493 -0
- truthound/checkpoint/monitoring/collectors/__init__.py +17 -0
- truthound/checkpoint/monitoring/collectors/base.py +257 -0
- truthound/checkpoint/monitoring/collectors/memory_collector.py +617 -0
- truthound/checkpoint/monitoring/collectors/prometheus_collector.py +451 -0
- truthound/checkpoint/monitoring/collectors/redis_collector.py +518 -0
- truthound/checkpoint/monitoring/events.py +410 -0
- truthound/checkpoint/monitoring/protocols.py +636 -0
- truthound/checkpoint/monitoring/service.py +578 -0
- truthound/checkpoint/monitoring/views/__init__.py +17 -0
- truthound/checkpoint/monitoring/views/base.py +172 -0
- truthound/checkpoint/monitoring/views/queue_view.py +220 -0
- truthound/checkpoint/monitoring/views/task_view.py +240 -0
- truthound/checkpoint/monitoring/views/worker_view.py +263 -0
- truthound/checkpoint/registry.py +337 -0
- truthound/checkpoint/runner.py +356 -0
- truthound/checkpoint/transaction/__init__.py +133 -0
- truthound/checkpoint/transaction/base.py +389 -0
- truthound/checkpoint/transaction/compensatable.py +537 -0
- truthound/checkpoint/transaction/coordinator.py +576 -0
- truthound/checkpoint/transaction/executor.py +622 -0
- truthound/checkpoint/transaction/idempotency.py +534 -0
- truthound/checkpoint/transaction/saga/__init__.py +143 -0
- truthound/checkpoint/transaction/saga/builder.py +584 -0
- truthound/checkpoint/transaction/saga/definition.py +515 -0
- truthound/checkpoint/transaction/saga/event_store.py +542 -0
- truthound/checkpoint/transaction/saga/patterns.py +833 -0
- truthound/checkpoint/transaction/saga/runner.py +718 -0
- truthound/checkpoint/transaction/saga/state_machine.py +793 -0
- truthound/checkpoint/transaction/saga/strategies.py +780 -0
- truthound/checkpoint/transaction/saga/testing.py +886 -0
- truthound/checkpoint/triggers/__init__.py +58 -0
- truthound/checkpoint/triggers/base.py +237 -0
- truthound/checkpoint/triggers/event.py +385 -0
- truthound/checkpoint/triggers/schedule.py +355 -0
- truthound/cli.py +2358 -0
- truthound/cli_modules/__init__.py +124 -0
- truthound/cli_modules/advanced/__init__.py +45 -0
- truthound/cli_modules/advanced/benchmark.py +343 -0
- truthound/cli_modules/advanced/docs.py +225 -0
- truthound/cli_modules/advanced/lineage.py +209 -0
- truthound/cli_modules/advanced/ml.py +320 -0
- truthound/cli_modules/advanced/realtime.py +196 -0
- truthound/cli_modules/checkpoint/__init__.py +46 -0
- truthound/cli_modules/checkpoint/init.py +114 -0
- truthound/cli_modules/checkpoint/list.py +71 -0
- truthound/cli_modules/checkpoint/run.py +159 -0
- truthound/cli_modules/checkpoint/validate.py +67 -0
- truthound/cli_modules/common/__init__.py +71 -0
- truthound/cli_modules/common/errors.py +414 -0
- truthound/cli_modules/common/options.py +419 -0
- truthound/cli_modules/common/output.py +507 -0
- truthound/cli_modules/common/protocol.py +552 -0
- truthound/cli_modules/core/__init__.py +48 -0
- truthound/cli_modules/core/check.py +123 -0
- truthound/cli_modules/core/compare.py +104 -0
- truthound/cli_modules/core/learn.py +57 -0
- truthound/cli_modules/core/mask.py +77 -0
- truthound/cli_modules/core/profile.py +65 -0
- truthound/cli_modules/core/scan.py +61 -0
- truthound/cli_modules/profiler/__init__.py +51 -0
- truthound/cli_modules/profiler/auto_profile.py +175 -0
- truthound/cli_modules/profiler/metadata.py +107 -0
- truthound/cli_modules/profiler/suite.py +283 -0
- truthound/cli_modules/registry.py +431 -0
- truthound/cli_modules/scaffolding/__init__.py +89 -0
- truthound/cli_modules/scaffolding/base.py +631 -0
- truthound/cli_modules/scaffolding/commands.py +545 -0
- truthound/cli_modules/scaffolding/plugins.py +1072 -0
- truthound/cli_modules/scaffolding/reporters.py +594 -0
- truthound/cli_modules/scaffolding/validators.py +1127 -0
- truthound/common/__init__.py +18 -0
- truthound/common/resilience/__init__.py +130 -0
- truthound/common/resilience/bulkhead.py +266 -0
- truthound/common/resilience/circuit_breaker.py +516 -0
- truthound/common/resilience/composite.py +332 -0
- truthound/common/resilience/config.py +292 -0
- truthound/common/resilience/protocols.py +217 -0
- truthound/common/resilience/rate_limiter.py +404 -0
- truthound/common/resilience/retry.py +341 -0
- truthound/datadocs/__init__.py +260 -0
- truthound/datadocs/base.py +571 -0
- truthound/datadocs/builder.py +761 -0
- truthound/datadocs/charts.py +764 -0
- truthound/datadocs/dashboard/__init__.py +63 -0
- truthound/datadocs/dashboard/app.py +576 -0
- truthound/datadocs/dashboard/components.py +584 -0
- truthound/datadocs/dashboard/state.py +240 -0
- truthound/datadocs/engine/__init__.py +46 -0
- truthound/datadocs/engine/context.py +376 -0
- truthound/datadocs/engine/pipeline.py +618 -0
- truthound/datadocs/engine/registry.py +469 -0
- truthound/datadocs/exporters/__init__.py +49 -0
- truthound/datadocs/exporters/base.py +198 -0
- truthound/datadocs/exporters/html.py +178 -0
- truthound/datadocs/exporters/json_exporter.py +253 -0
- truthound/datadocs/exporters/markdown.py +284 -0
- truthound/datadocs/exporters/pdf.py +392 -0
- truthound/datadocs/i18n/__init__.py +86 -0
- truthound/datadocs/i18n/catalog.py +960 -0
- truthound/datadocs/i18n/formatting.py +505 -0
- truthound/datadocs/i18n/loader.py +256 -0
- truthound/datadocs/i18n/plurals.py +378 -0
- truthound/datadocs/renderers/__init__.py +42 -0
- truthound/datadocs/renderers/base.py +401 -0
- truthound/datadocs/renderers/custom.py +342 -0
- truthound/datadocs/renderers/jinja.py +697 -0
- truthound/datadocs/sections.py +736 -0
- truthound/datadocs/styles.py +931 -0
- truthound/datadocs/themes/__init__.py +101 -0
- truthound/datadocs/themes/base.py +336 -0
- truthound/datadocs/themes/default.py +417 -0
- truthound/datadocs/themes/enterprise.py +419 -0
- truthound/datadocs/themes/loader.py +336 -0
- truthound/datadocs/themes.py +301 -0
- truthound/datadocs/transformers/__init__.py +57 -0
- truthound/datadocs/transformers/base.py +268 -0
- truthound/datadocs/transformers/enrichers.py +544 -0
- truthound/datadocs/transformers/filters.py +447 -0
- truthound/datadocs/transformers/i18n.py +468 -0
- truthound/datadocs/versioning/__init__.py +62 -0
- truthound/datadocs/versioning/diff.py +639 -0
- truthound/datadocs/versioning/storage.py +497 -0
- truthound/datadocs/versioning/version.py +358 -0
- truthound/datasources/__init__.py +223 -0
- truthound/datasources/_async_protocols.py +222 -0
- truthound/datasources/_protocols.py +159 -0
- truthound/datasources/adapters.py +428 -0
- truthound/datasources/async_base.py +599 -0
- truthound/datasources/async_factory.py +511 -0
- truthound/datasources/base.py +516 -0
- truthound/datasources/factory.py +433 -0
- truthound/datasources/nosql/__init__.py +47 -0
- truthound/datasources/nosql/base.py +487 -0
- truthound/datasources/nosql/elasticsearch.py +801 -0
- truthound/datasources/nosql/mongodb.py +636 -0
- truthound/datasources/pandas_optimized.py +582 -0
- truthound/datasources/pandas_source.py +216 -0
- truthound/datasources/polars_source.py +395 -0
- truthound/datasources/spark_source.py +479 -0
- truthound/datasources/sql/__init__.py +154 -0
- truthound/datasources/sql/base.py +710 -0
- truthound/datasources/sql/bigquery.py +410 -0
- truthound/datasources/sql/cloud_base.py +199 -0
- truthound/datasources/sql/databricks.py +471 -0
- truthound/datasources/sql/mysql.py +316 -0
- truthound/datasources/sql/oracle.py +427 -0
- truthound/datasources/sql/postgresql.py +321 -0
- truthound/datasources/sql/redshift.py +479 -0
- truthound/datasources/sql/snowflake.py +439 -0
- truthound/datasources/sql/sqlite.py +286 -0
- truthound/datasources/sql/sqlserver.py +437 -0
- truthound/datasources/streaming/__init__.py +47 -0
- truthound/datasources/streaming/base.py +350 -0
- truthound/datasources/streaming/kafka.py +670 -0
- truthound/decorators.py +98 -0
- truthound/docs/__init__.py +69 -0
- truthound/docs/extractor.py +971 -0
- truthound/docs/generator.py +601 -0
- truthound/docs/parser.py +1037 -0
- truthound/docs/renderer.py +999 -0
- truthound/drift/__init__.py +22 -0
- truthound/drift/compare.py +189 -0
- truthound/drift/detectors.py +464 -0
- truthound/drift/report.py +160 -0
- truthound/execution/__init__.py +65 -0
- truthound/execution/_protocols.py +324 -0
- truthound/execution/base.py +576 -0
- truthound/execution/distributed/__init__.py +179 -0
- truthound/execution/distributed/aggregations.py +731 -0
- truthound/execution/distributed/arrow_bridge.py +817 -0
- truthound/execution/distributed/base.py +550 -0
- truthound/execution/distributed/dask_engine.py +976 -0
- truthound/execution/distributed/mixins.py +766 -0
- truthound/execution/distributed/protocols.py +756 -0
- truthound/execution/distributed/ray_engine.py +1127 -0
- truthound/execution/distributed/registry.py +446 -0
- truthound/execution/distributed/spark_engine.py +1011 -0
- truthound/execution/distributed/validator_adapter.py +682 -0
- truthound/execution/pandas_engine.py +401 -0
- truthound/execution/polars_engine.py +497 -0
- truthound/execution/pushdown/__init__.py +230 -0
- truthound/execution/pushdown/ast.py +1550 -0
- truthound/execution/pushdown/builder.py +1550 -0
- truthound/execution/pushdown/dialects.py +1072 -0
- truthound/execution/pushdown/executor.py +829 -0
- truthound/execution/pushdown/optimizer.py +1041 -0
- truthound/execution/sql_engine.py +518 -0
- truthound/infrastructure/__init__.py +189 -0
- truthound/infrastructure/audit.py +1515 -0
- truthound/infrastructure/config.py +1133 -0
- truthound/infrastructure/encryption.py +1132 -0
- truthound/infrastructure/logging.py +1503 -0
- truthound/infrastructure/metrics.py +1220 -0
- truthound/lineage/__init__.py +89 -0
- truthound/lineage/base.py +746 -0
- truthound/lineage/impact_analysis.py +474 -0
- truthound/lineage/integrations/__init__.py +22 -0
- truthound/lineage/integrations/openlineage.py +548 -0
- truthound/lineage/tracker.py +512 -0
- truthound/lineage/visualization/__init__.py +33 -0
- truthound/lineage/visualization/protocols.py +145 -0
- truthound/lineage/visualization/renderers/__init__.py +20 -0
- truthound/lineage/visualization/renderers/cytoscape.py +329 -0
- truthound/lineage/visualization/renderers/d3.py +331 -0
- truthound/lineage/visualization/renderers/graphviz.py +276 -0
- truthound/lineage/visualization/renderers/mermaid.py +308 -0
- truthound/maskers.py +113 -0
- truthound/ml/__init__.py +124 -0
- truthound/ml/anomaly_models/__init__.py +31 -0
- truthound/ml/anomaly_models/ensemble.py +362 -0
- truthound/ml/anomaly_models/isolation_forest.py +444 -0
- truthound/ml/anomaly_models/statistical.py +392 -0
- truthound/ml/base.py +1178 -0
- truthound/ml/drift_detection/__init__.py +26 -0
- truthound/ml/drift_detection/concept.py +381 -0
- truthound/ml/drift_detection/distribution.py +361 -0
- truthound/ml/drift_detection/feature.py +442 -0
- truthound/ml/drift_detection/multivariate.py +495 -0
- truthound/ml/monitoring/__init__.py +88 -0
- truthound/ml/monitoring/alerting/__init__.py +33 -0
- truthound/ml/monitoring/alerting/handlers.py +427 -0
- truthound/ml/monitoring/alerting/rules.py +508 -0
- truthound/ml/monitoring/collectors/__init__.py +19 -0
- truthound/ml/monitoring/collectors/composite.py +105 -0
- truthound/ml/monitoring/collectors/drift.py +324 -0
- truthound/ml/monitoring/collectors/performance.py +179 -0
- truthound/ml/monitoring/collectors/quality.py +369 -0
- truthound/ml/monitoring/monitor.py +536 -0
- truthound/ml/monitoring/protocols.py +451 -0
- truthound/ml/monitoring/stores/__init__.py +15 -0
- truthound/ml/monitoring/stores/memory.py +201 -0
- truthound/ml/monitoring/stores/prometheus.py +296 -0
- truthound/ml/rule_learning/__init__.py +25 -0
- truthound/ml/rule_learning/constraint_miner.py +443 -0
- truthound/ml/rule_learning/pattern_learner.py +499 -0
- truthound/ml/rule_learning/profile_learner.py +462 -0
- truthound/multitenancy/__init__.py +326 -0
- truthound/multitenancy/core.py +852 -0
- truthound/multitenancy/integration.py +597 -0
- truthound/multitenancy/isolation.py +630 -0
- truthound/multitenancy/manager.py +770 -0
- truthound/multitenancy/middleware.py +765 -0
- truthound/multitenancy/quota.py +537 -0
- truthound/multitenancy/resolvers.py +603 -0
- truthound/multitenancy/storage.py +703 -0
- truthound/observability/__init__.py +307 -0
- truthound/observability/context.py +531 -0
- truthound/observability/instrumentation.py +611 -0
- truthound/observability/logging.py +887 -0
- truthound/observability/metrics.py +1157 -0
- truthound/observability/tracing/__init__.py +178 -0
- truthound/observability/tracing/baggage.py +310 -0
- truthound/observability/tracing/config.py +426 -0
- truthound/observability/tracing/exporter.py +787 -0
- truthound/observability/tracing/integration.py +1018 -0
- truthound/observability/tracing/otel/__init__.py +146 -0
- truthound/observability/tracing/otel/adapter.py +982 -0
- truthound/observability/tracing/otel/bridge.py +1177 -0
- truthound/observability/tracing/otel/compat.py +681 -0
- truthound/observability/tracing/otel/config.py +691 -0
- truthound/observability/tracing/otel/detection.py +327 -0
- truthound/observability/tracing/otel/protocols.py +426 -0
- truthound/observability/tracing/processor.py +561 -0
- truthound/observability/tracing/propagator.py +757 -0
- truthound/observability/tracing/provider.py +569 -0
- truthound/observability/tracing/resource.py +515 -0
- truthound/observability/tracing/sampler.py +487 -0
- truthound/observability/tracing/span.py +676 -0
- truthound/plugins/__init__.py +198 -0
- truthound/plugins/base.py +599 -0
- truthound/plugins/cli.py +680 -0
- truthound/plugins/dependencies/__init__.py +42 -0
- truthound/plugins/dependencies/graph.py +422 -0
- truthound/plugins/dependencies/resolver.py +417 -0
- truthound/plugins/discovery.py +379 -0
- truthound/plugins/docs/__init__.py +46 -0
- truthound/plugins/docs/extractor.py +444 -0
- truthound/plugins/docs/renderer.py +499 -0
- truthound/plugins/enterprise_manager.py +877 -0
- truthound/plugins/examples/__init__.py +19 -0
- truthound/plugins/examples/custom_validators.py +317 -0
- truthound/plugins/examples/slack_notifier.py +312 -0
- truthound/plugins/examples/xml_reporter.py +254 -0
- truthound/plugins/hooks.py +558 -0
- truthound/plugins/lifecycle/__init__.py +43 -0
- truthound/plugins/lifecycle/hot_reload.py +402 -0
- truthound/plugins/lifecycle/manager.py +371 -0
- truthound/plugins/manager.py +736 -0
- truthound/plugins/registry.py +338 -0
- truthound/plugins/security/__init__.py +93 -0
- truthound/plugins/security/exceptions.py +332 -0
- truthound/plugins/security/policies.py +348 -0
- truthound/plugins/security/protocols.py +643 -0
- truthound/plugins/security/sandbox/__init__.py +45 -0
- truthound/plugins/security/sandbox/context.py +158 -0
- truthound/plugins/security/sandbox/engines/__init__.py +19 -0
- truthound/plugins/security/sandbox/engines/container.py +379 -0
- truthound/plugins/security/sandbox/engines/noop.py +144 -0
- truthound/plugins/security/sandbox/engines/process.py +336 -0
- truthound/plugins/security/sandbox/factory.py +211 -0
- truthound/plugins/security/signing/__init__.py +57 -0
- truthound/plugins/security/signing/service.py +330 -0
- truthound/plugins/security/signing/trust_store.py +368 -0
- truthound/plugins/security/signing/verifier.py +459 -0
- truthound/plugins/versioning/__init__.py +41 -0
- truthound/plugins/versioning/constraints.py +297 -0
- truthound/plugins/versioning/resolver.py +329 -0
- truthound/profiler/__init__.py +1729 -0
- truthound/profiler/_lazy.py +452 -0
- truthound/profiler/ab_testing/__init__.py +80 -0
- truthound/profiler/ab_testing/analysis.py +449 -0
- truthound/profiler/ab_testing/base.py +257 -0
- truthound/profiler/ab_testing/experiment.py +395 -0
- truthound/profiler/ab_testing/tracking.py +368 -0
- truthound/profiler/auto_threshold.py +1170 -0
- truthound/profiler/base.py +579 -0
- truthound/profiler/cache_patterns.py +911 -0
- truthound/profiler/caching.py +1303 -0
- truthound/profiler/column_profiler.py +712 -0
- truthound/profiler/comparison.py +1007 -0
- truthound/profiler/custom_patterns.py +1170 -0
- truthound/profiler/dashboard/__init__.py +50 -0
- truthound/profiler/dashboard/app.py +476 -0
- truthound/profiler/dashboard/components.py +457 -0
- truthound/profiler/dashboard/config.py +72 -0
- truthound/profiler/distributed/__init__.py +83 -0
- truthound/profiler/distributed/base.py +281 -0
- truthound/profiler/distributed/dask_backend.py +498 -0
- truthound/profiler/distributed/local_backend.py +293 -0
- truthound/profiler/distributed/profiler.py +304 -0
- truthound/profiler/distributed/ray_backend.py +374 -0
- truthound/profiler/distributed/spark_backend.py +375 -0
- truthound/profiler/distributed.py +1366 -0
- truthound/profiler/enterprise_sampling.py +1065 -0
- truthound/profiler/errors.py +488 -0
- truthound/profiler/evolution/__init__.py +91 -0
- truthound/profiler/evolution/alerts.py +426 -0
- truthound/profiler/evolution/changes.py +206 -0
- truthound/profiler/evolution/compatibility.py +365 -0
- truthound/profiler/evolution/detector.py +372 -0
- truthound/profiler/evolution/protocols.py +121 -0
- truthound/profiler/generators/__init__.py +48 -0
- truthound/profiler/generators/base.py +384 -0
- truthound/profiler/generators/ml_rules.py +375 -0
- truthound/profiler/generators/pattern_rules.py +384 -0
- truthound/profiler/generators/schema_rules.py +267 -0
- truthound/profiler/generators/stats_rules.py +324 -0
- truthound/profiler/generators/suite_generator.py +857 -0
- truthound/profiler/i18n.py +1542 -0
- truthound/profiler/incremental.py +554 -0
- truthound/profiler/incremental_validation.py +1710 -0
- truthound/profiler/integration/__init__.py +73 -0
- truthound/profiler/integration/adapters.py +345 -0
- truthound/profiler/integration/context.py +371 -0
- truthound/profiler/integration/executor.py +527 -0
- truthound/profiler/integration/naming.py +75 -0
- truthound/profiler/integration/protocols.py +243 -0
- truthound/profiler/memory.py +1185 -0
- truthound/profiler/migration/__init__.py +60 -0
- truthound/profiler/migration/base.py +345 -0
- truthound/profiler/migration/manager.py +444 -0
- truthound/profiler/migration/v1_0_to_v1_1.py +484 -0
- truthound/profiler/ml/__init__.py +73 -0
- truthound/profiler/ml/base.py +244 -0
- truthound/profiler/ml/classifier.py +507 -0
- truthound/profiler/ml/feature_extraction.py +604 -0
- truthound/profiler/ml/pretrained.py +448 -0
- truthound/profiler/ml_inference.py +1276 -0
- truthound/profiler/native_patterns.py +815 -0
- truthound/profiler/observability.py +1184 -0
- truthound/profiler/process_timeout.py +1566 -0
- truthound/profiler/progress.py +568 -0
- truthound/profiler/progress_callbacks.py +1734 -0
- truthound/profiler/quality.py +1345 -0
- truthound/profiler/resilience.py +1180 -0
- truthound/profiler/sampled_matcher.py +794 -0
- truthound/profiler/sampling.py +1288 -0
- truthound/profiler/scheduling/__init__.py +82 -0
- truthound/profiler/scheduling/protocols.py +214 -0
- truthound/profiler/scheduling/scheduler.py +474 -0
- truthound/profiler/scheduling/storage.py +457 -0
- truthound/profiler/scheduling/triggers.py +449 -0
- truthound/profiler/schema.py +603 -0
- truthound/profiler/streaming.py +685 -0
- truthound/profiler/streaming_patterns.py +1354 -0
- truthound/profiler/suite_cli.py +625 -0
- truthound/profiler/suite_config.py +789 -0
- truthound/profiler/suite_export.py +1268 -0
- truthound/profiler/table_profiler.py +547 -0
- truthound/profiler/timeout.py +565 -0
- truthound/profiler/validation.py +1532 -0
- truthound/profiler/visualization/__init__.py +118 -0
- truthound/profiler/visualization/base.py +346 -0
- truthound/profiler/visualization/generator.py +1259 -0
- truthound/profiler/visualization/plotly_renderer.py +811 -0
- truthound/profiler/visualization/renderers.py +669 -0
- truthound/profiler/visualization/sections.py +540 -0
- truthound/profiler/visualization.py +2122 -0
- truthound/profiler/yaml_validation.py +1151 -0
- truthound/py.typed +0 -0
- truthound/ratelimit/__init__.py +248 -0
- truthound/ratelimit/algorithms.py +1108 -0
- truthound/ratelimit/core.py +573 -0
- truthound/ratelimit/integration.py +532 -0
- truthound/ratelimit/limiter.py +663 -0
- truthound/ratelimit/middleware.py +700 -0
- truthound/ratelimit/policy.py +792 -0
- truthound/ratelimit/storage.py +763 -0
- truthound/rbac/__init__.py +340 -0
- truthound/rbac/core.py +976 -0
- truthound/rbac/integration.py +760 -0
- truthound/rbac/manager.py +1052 -0
- truthound/rbac/middleware.py +842 -0
- truthound/rbac/policy.py +954 -0
- truthound/rbac/storage.py +878 -0
- truthound/realtime/__init__.py +141 -0
- truthound/realtime/adapters/__init__.py +43 -0
- truthound/realtime/adapters/base.py +533 -0
- truthound/realtime/adapters/kafka.py +487 -0
- truthound/realtime/adapters/kinesis.py +479 -0
- truthound/realtime/adapters/mock.py +243 -0
- truthound/realtime/base.py +553 -0
- truthound/realtime/factory.py +382 -0
- truthound/realtime/incremental.py +660 -0
- truthound/realtime/processing/__init__.py +67 -0
- truthound/realtime/processing/exactly_once.py +575 -0
- truthound/realtime/processing/state.py +547 -0
- truthound/realtime/processing/windows.py +647 -0
- truthound/realtime/protocols.py +569 -0
- truthound/realtime/streaming.py +605 -0
- truthound/realtime/testing/__init__.py +32 -0
- truthound/realtime/testing/containers.py +615 -0
- truthound/realtime/testing/fixtures.py +484 -0
- truthound/report.py +280 -0
- truthound/reporters/__init__.py +46 -0
- truthound/reporters/_protocols.py +30 -0
- truthound/reporters/base.py +324 -0
- truthound/reporters/ci/__init__.py +66 -0
- truthound/reporters/ci/azure.py +436 -0
- truthound/reporters/ci/base.py +509 -0
- truthound/reporters/ci/bitbucket.py +567 -0
- truthound/reporters/ci/circleci.py +547 -0
- truthound/reporters/ci/detection.py +364 -0
- truthound/reporters/ci/factory.py +182 -0
- truthound/reporters/ci/github.py +388 -0
- truthound/reporters/ci/gitlab.py +471 -0
- truthound/reporters/ci/jenkins.py +525 -0
- truthound/reporters/console_reporter.py +299 -0
- truthound/reporters/factory.py +211 -0
- truthound/reporters/html_reporter.py +524 -0
- truthound/reporters/json_reporter.py +256 -0
- truthound/reporters/markdown_reporter.py +280 -0
- truthound/reporters/sdk/__init__.py +174 -0
- truthound/reporters/sdk/builder.py +558 -0
- truthound/reporters/sdk/mixins.py +1150 -0
- truthound/reporters/sdk/schema.py +1493 -0
- truthound/reporters/sdk/templates.py +666 -0
- truthound/reporters/sdk/testing.py +968 -0
- truthound/scanners.py +170 -0
- truthound/scheduling/__init__.py +122 -0
- truthound/scheduling/cron.py +1136 -0
- truthound/scheduling/presets.py +212 -0
- truthound/schema.py +275 -0
- truthound/secrets/__init__.py +173 -0
- truthound/secrets/base.py +618 -0
- truthound/secrets/cloud.py +682 -0
- truthound/secrets/integration.py +507 -0
- truthound/secrets/manager.py +633 -0
- truthound/secrets/oidc/__init__.py +172 -0
- truthound/secrets/oidc/base.py +902 -0
- truthound/secrets/oidc/credential_provider.py +623 -0
- truthound/secrets/oidc/exchangers.py +1001 -0
- truthound/secrets/oidc/github/__init__.py +110 -0
- truthound/secrets/oidc/github/claims.py +718 -0
- truthound/secrets/oidc/github/enhanced_provider.py +693 -0
- truthound/secrets/oidc/github/trust_policy.py +742 -0
- truthound/secrets/oidc/github/verification.py +723 -0
- truthound/secrets/oidc/github/workflow.py +691 -0
- truthound/secrets/oidc/providers.py +825 -0
- truthound/secrets/providers.py +506 -0
- truthound/secrets/resolver.py +495 -0
- truthound/stores/__init__.py +177 -0
- truthound/stores/backends/__init__.py +18 -0
- truthound/stores/backends/_protocols.py +340 -0
- truthound/stores/backends/azure_blob.py +530 -0
- truthound/stores/backends/concurrent_filesystem.py +915 -0
- truthound/stores/backends/connection_pool.py +1365 -0
- truthound/stores/backends/database.py +743 -0
- truthound/stores/backends/filesystem.py +538 -0
- truthound/stores/backends/gcs.py +399 -0
- truthound/stores/backends/memory.py +354 -0
- truthound/stores/backends/s3.py +434 -0
- truthound/stores/backpressure/__init__.py +84 -0
- truthound/stores/backpressure/base.py +375 -0
- truthound/stores/backpressure/circuit_breaker.py +434 -0
- truthound/stores/backpressure/monitor.py +376 -0
- truthound/stores/backpressure/strategies.py +677 -0
- truthound/stores/base.py +551 -0
- truthound/stores/batching/__init__.py +65 -0
- truthound/stores/batching/base.py +305 -0
- truthound/stores/batching/buffer.py +370 -0
- truthound/stores/batching/store.py +248 -0
- truthound/stores/batching/writer.py +521 -0
- truthound/stores/caching/__init__.py +60 -0
- truthound/stores/caching/backends.py +684 -0
- truthound/stores/caching/base.py +356 -0
- truthound/stores/caching/store.py +305 -0
- truthound/stores/compression/__init__.py +193 -0
- truthound/stores/compression/adaptive.py +694 -0
- truthound/stores/compression/base.py +514 -0
- truthound/stores/compression/pipeline.py +868 -0
- truthound/stores/compression/providers.py +672 -0
- truthound/stores/compression/streaming.py +832 -0
- truthound/stores/concurrency/__init__.py +81 -0
- truthound/stores/concurrency/atomic.py +556 -0
- truthound/stores/concurrency/index.py +775 -0
- truthound/stores/concurrency/locks.py +576 -0
- truthound/stores/concurrency/manager.py +482 -0
- truthound/stores/encryption/__init__.py +297 -0
- truthound/stores/encryption/base.py +952 -0
- truthound/stores/encryption/keys.py +1191 -0
- truthound/stores/encryption/pipeline.py +903 -0
- truthound/stores/encryption/providers.py +953 -0
- truthound/stores/encryption/streaming.py +950 -0
- truthound/stores/expectations.py +227 -0
- truthound/stores/factory.py +246 -0
- truthound/stores/migration/__init__.py +75 -0
- truthound/stores/migration/base.py +480 -0
- truthound/stores/migration/manager.py +347 -0
- truthound/stores/migration/registry.py +382 -0
- truthound/stores/migration/store.py +559 -0
- truthound/stores/observability/__init__.py +106 -0
- truthound/stores/observability/audit.py +718 -0
- truthound/stores/observability/config.py +270 -0
- truthound/stores/observability/factory.py +208 -0
- truthound/stores/observability/metrics.py +636 -0
- truthound/stores/observability/protocols.py +410 -0
- truthound/stores/observability/store.py +570 -0
- truthound/stores/observability/tracing.py +784 -0
- truthound/stores/replication/__init__.py +76 -0
- truthound/stores/replication/base.py +260 -0
- truthound/stores/replication/monitor.py +269 -0
- truthound/stores/replication/store.py +439 -0
- truthound/stores/replication/syncer.py +391 -0
- truthound/stores/results.py +359 -0
- truthound/stores/retention/__init__.py +77 -0
- truthound/stores/retention/base.py +378 -0
- truthound/stores/retention/policies.py +621 -0
- truthound/stores/retention/scheduler.py +279 -0
- truthound/stores/retention/store.py +526 -0
- truthound/stores/streaming/__init__.py +138 -0
- truthound/stores/streaming/base.py +801 -0
- truthound/stores/streaming/database.py +984 -0
- truthound/stores/streaming/filesystem.py +719 -0
- truthound/stores/streaming/reader.py +629 -0
- truthound/stores/streaming/s3.py +843 -0
- truthound/stores/streaming/writer.py +790 -0
- truthound/stores/tiering/__init__.py +108 -0
- truthound/stores/tiering/base.py +462 -0
- truthound/stores/tiering/manager.py +249 -0
- truthound/stores/tiering/policies.py +692 -0
- truthound/stores/tiering/store.py +526 -0
- truthound/stores/versioning/__init__.py +56 -0
- truthound/stores/versioning/base.py +376 -0
- truthound/stores/versioning/store.py +660 -0
- truthound/stores/versioning/strategies.py +353 -0
- truthound/types.py +56 -0
- truthound/validators/__init__.py +774 -0
- truthound/validators/aggregate/__init__.py +27 -0
- truthound/validators/aggregate/central.py +116 -0
- truthound/validators/aggregate/extremes.py +116 -0
- truthound/validators/aggregate/spread.py +118 -0
- truthound/validators/aggregate/sum.py +64 -0
- truthound/validators/aggregate/type.py +78 -0
- truthound/validators/anomaly/__init__.py +93 -0
- truthound/validators/anomaly/base.py +431 -0
- truthound/validators/anomaly/ml_based.py +1190 -0
- truthound/validators/anomaly/multivariate.py +647 -0
- truthound/validators/anomaly/statistical.py +599 -0
- truthound/validators/base.py +1089 -0
- truthound/validators/business_rule/__init__.py +46 -0
- truthound/validators/business_rule/base.py +147 -0
- truthound/validators/business_rule/checksum.py +509 -0
- truthound/validators/business_rule/financial.py +526 -0
- truthound/validators/cache.py +733 -0
- truthound/validators/completeness/__init__.py +39 -0
- truthound/validators/completeness/conditional.py +73 -0
- truthound/validators/completeness/default.py +98 -0
- truthound/validators/completeness/empty.py +103 -0
- truthound/validators/completeness/nan.py +337 -0
- truthound/validators/completeness/null.py +152 -0
- truthound/validators/cross_table/__init__.py +17 -0
- truthound/validators/cross_table/aggregate.py +333 -0
- truthound/validators/cross_table/row_count.py +122 -0
- truthound/validators/datetime/__init__.py +29 -0
- truthound/validators/datetime/format.py +78 -0
- truthound/validators/datetime/freshness.py +269 -0
- truthound/validators/datetime/order.py +73 -0
- truthound/validators/datetime/parseable.py +185 -0
- truthound/validators/datetime/range.py +202 -0
- truthound/validators/datetime/timezone.py +69 -0
- truthound/validators/distribution/__init__.py +49 -0
- truthound/validators/distribution/distribution.py +128 -0
- truthound/validators/distribution/monotonic.py +119 -0
- truthound/validators/distribution/outlier.py +178 -0
- truthound/validators/distribution/quantile.py +80 -0
- truthound/validators/distribution/range.py +254 -0
- truthound/validators/distribution/set.py +125 -0
- truthound/validators/distribution/statistical.py +459 -0
- truthound/validators/drift/__init__.py +79 -0
- truthound/validators/drift/base.py +427 -0
- truthound/validators/drift/multi_feature.py +401 -0
- truthound/validators/drift/numeric.py +395 -0
- truthound/validators/drift/psi.py +446 -0
- truthound/validators/drift/statistical.py +510 -0
- truthound/validators/enterprise.py +1658 -0
- truthound/validators/geospatial/__init__.py +80 -0
- truthound/validators/geospatial/base.py +97 -0
- truthound/validators/geospatial/boundary.py +238 -0
- truthound/validators/geospatial/coordinate.py +351 -0
- truthound/validators/geospatial/distance.py +399 -0
- truthound/validators/geospatial/polygon.py +665 -0
- truthound/validators/i18n/__init__.py +308 -0
- truthound/validators/i18n/bidi.py +571 -0
- truthound/validators/i18n/catalogs.py +570 -0
- truthound/validators/i18n/dialects.py +763 -0
- truthound/validators/i18n/extended_catalogs.py +549 -0
- truthound/validators/i18n/formatting.py +1434 -0
- truthound/validators/i18n/loader.py +1020 -0
- truthound/validators/i18n/messages.py +521 -0
- truthound/validators/i18n/plural.py +683 -0
- truthound/validators/i18n/protocols.py +855 -0
- truthound/validators/i18n/tms.py +1162 -0
- truthound/validators/localization/__init__.py +53 -0
- truthound/validators/localization/base.py +122 -0
- truthound/validators/localization/chinese.py +362 -0
- truthound/validators/localization/japanese.py +275 -0
- truthound/validators/localization/korean.py +524 -0
- truthound/validators/memory/__init__.py +94 -0
- truthound/validators/memory/approximate_knn.py +506 -0
- truthound/validators/memory/base.py +547 -0
- truthound/validators/memory/sgd_online.py +719 -0
- truthound/validators/memory/streaming_ecdf.py +753 -0
- truthound/validators/ml_feature/__init__.py +54 -0
- truthound/validators/ml_feature/base.py +249 -0
- truthound/validators/ml_feature/correlation.py +299 -0
- truthound/validators/ml_feature/leakage.py +344 -0
- truthound/validators/ml_feature/null_impact.py +270 -0
- truthound/validators/ml_feature/scale.py +264 -0
- truthound/validators/multi_column/__init__.py +89 -0
- truthound/validators/multi_column/arithmetic.py +284 -0
- truthound/validators/multi_column/base.py +231 -0
- truthound/validators/multi_column/comparison.py +273 -0
- truthound/validators/multi_column/consistency.py +312 -0
- truthound/validators/multi_column/statistical.py +299 -0
- truthound/validators/optimization/__init__.py +164 -0
- truthound/validators/optimization/aggregation.py +563 -0
- truthound/validators/optimization/covariance.py +556 -0
- truthound/validators/optimization/geo.py +626 -0
- truthound/validators/optimization/graph.py +587 -0
- truthound/validators/optimization/orchestrator.py +970 -0
- truthound/validators/optimization/profiling.py +1312 -0
- truthound/validators/privacy/__init__.py +223 -0
- truthound/validators/privacy/base.py +635 -0
- truthound/validators/privacy/ccpa.py +670 -0
- truthound/validators/privacy/gdpr.py +728 -0
- truthound/validators/privacy/global_patterns.py +604 -0
- truthound/validators/privacy/plugins.py +867 -0
- truthound/validators/profiling/__init__.py +52 -0
- truthound/validators/profiling/base.py +175 -0
- truthound/validators/profiling/cardinality.py +312 -0
- truthound/validators/profiling/entropy.py +391 -0
- truthound/validators/profiling/frequency.py +455 -0
- truthound/validators/pushdown_support.py +660 -0
- truthound/validators/query/__init__.py +91 -0
- truthound/validators/query/aggregate.py +346 -0
- truthound/validators/query/base.py +246 -0
- truthound/validators/query/column.py +249 -0
- truthound/validators/query/expression.py +274 -0
- truthound/validators/query/result.py +323 -0
- truthound/validators/query/row_count.py +264 -0
- truthound/validators/referential/__init__.py +80 -0
- truthound/validators/referential/base.py +395 -0
- truthound/validators/referential/cascade.py +391 -0
- truthound/validators/referential/circular.py +563 -0
- truthound/validators/referential/foreign_key.py +624 -0
- truthound/validators/referential/orphan.py +485 -0
- truthound/validators/registry.py +112 -0
- truthound/validators/schema/__init__.py +41 -0
- truthound/validators/schema/column_count.py +142 -0
- truthound/validators/schema/column_exists.py +80 -0
- truthound/validators/schema/column_order.py +82 -0
- truthound/validators/schema/column_pair.py +85 -0
- truthound/validators/schema/column_pair_set.py +195 -0
- truthound/validators/schema/column_type.py +94 -0
- truthound/validators/schema/multi_column.py +53 -0
- truthound/validators/schema/multi_column_aggregate.py +175 -0
- truthound/validators/schema/referential.py +274 -0
- truthound/validators/schema/table_schema.py +91 -0
- truthound/validators/schema_validator.py +219 -0
- truthound/validators/sdk/__init__.py +250 -0
- truthound/validators/sdk/builder.py +680 -0
- truthound/validators/sdk/decorators.py +474 -0
- truthound/validators/sdk/enterprise/__init__.py +211 -0
- truthound/validators/sdk/enterprise/docs.py +725 -0
- truthound/validators/sdk/enterprise/fuzzing.py +659 -0
- truthound/validators/sdk/enterprise/licensing.py +709 -0
- truthound/validators/sdk/enterprise/manager.py +543 -0
- truthound/validators/sdk/enterprise/resources.py +628 -0
- truthound/validators/sdk/enterprise/sandbox.py +766 -0
- truthound/validators/sdk/enterprise/signing.py +603 -0
- truthound/validators/sdk/enterprise/templates.py +865 -0
- truthound/validators/sdk/enterprise/versioning.py +659 -0
- truthound/validators/sdk/templates.py +757 -0
- truthound/validators/sdk/testing.py +807 -0
- truthound/validators/security/__init__.py +181 -0
- truthound/validators/security/redos/__init__.py +182 -0
- truthound/validators/security/redos/core.py +861 -0
- truthound/validators/security/redos/cpu_monitor.py +593 -0
- truthound/validators/security/redos/cve_database.py +791 -0
- truthound/validators/security/redos/ml/__init__.py +155 -0
- truthound/validators/security/redos/ml/base.py +785 -0
- truthound/validators/security/redos/ml/datasets.py +618 -0
- truthound/validators/security/redos/ml/features.py +359 -0
- truthound/validators/security/redos/ml/models.py +1000 -0
- truthound/validators/security/redos/ml/predictor.py +507 -0
- truthound/validators/security/redos/ml/storage.py +632 -0
- truthound/validators/security/redos/ml/training.py +571 -0
- truthound/validators/security/redos/ml_analyzer.py +937 -0
- truthound/validators/security/redos/optimizer.py +674 -0
- truthound/validators/security/redos/profiler.py +682 -0
- truthound/validators/security/redos/re2_engine.py +709 -0
- truthound/validators/security/redos.py +886 -0
- truthound/validators/security/sql_security.py +1247 -0
- truthound/validators/streaming/__init__.py +126 -0
- truthound/validators/streaming/base.py +292 -0
- truthound/validators/streaming/completeness.py +210 -0
- truthound/validators/streaming/mixin.py +575 -0
- truthound/validators/streaming/range.py +308 -0
- truthound/validators/streaming/sources.py +846 -0
- truthound/validators/string/__init__.py +57 -0
- truthound/validators/string/casing.py +158 -0
- truthound/validators/string/charset.py +96 -0
- truthound/validators/string/format.py +501 -0
- truthound/validators/string/json.py +77 -0
- truthound/validators/string/json_schema.py +184 -0
- truthound/validators/string/length.py +104 -0
- truthound/validators/string/like_pattern.py +237 -0
- truthound/validators/string/regex.py +202 -0
- truthound/validators/string/regex_extended.py +435 -0
- truthound/validators/table/__init__.py +88 -0
- truthound/validators/table/base.py +78 -0
- truthound/validators/table/column_count.py +198 -0
- truthound/validators/table/freshness.py +362 -0
- truthound/validators/table/row_count.py +251 -0
- truthound/validators/table/schema.py +333 -0
- truthound/validators/table/size.py +285 -0
- truthound/validators/timeout/__init__.py +102 -0
- truthound/validators/timeout/advanced/__init__.py +247 -0
- truthound/validators/timeout/advanced/circuit_breaker.py +675 -0
- truthound/validators/timeout/advanced/prediction.py +773 -0
- truthound/validators/timeout/advanced/priority.py +618 -0
- truthound/validators/timeout/advanced/redis_backend.py +770 -0
- truthound/validators/timeout/advanced/retry.py +721 -0
- truthound/validators/timeout/advanced/sampling.py +788 -0
- truthound/validators/timeout/advanced/sla.py +661 -0
- truthound/validators/timeout/advanced/telemetry.py +804 -0
- truthound/validators/timeout/cascade.py +477 -0
- truthound/validators/timeout/deadline.py +657 -0
- truthound/validators/timeout/degradation.py +525 -0
- truthound/validators/timeout/distributed.py +597 -0
- truthound/validators/timeseries/__init__.py +89 -0
- truthound/validators/timeseries/base.py +326 -0
- truthound/validators/timeseries/completeness.py +617 -0
- truthound/validators/timeseries/gap.py +485 -0
- truthound/validators/timeseries/monotonic.py +310 -0
- truthound/validators/timeseries/seasonality.py +422 -0
- truthound/validators/timeseries/trend.py +510 -0
- truthound/validators/uniqueness/__init__.py +59 -0
- truthound/validators/uniqueness/approximate.py +475 -0
- truthound/validators/uniqueness/distinct_values.py +253 -0
- truthound/validators/uniqueness/duplicate.py +118 -0
- truthound/validators/uniqueness/primary_key.py +140 -0
- truthound/validators/uniqueness/unique.py +191 -0
- truthound/validators/uniqueness/within_record.py +599 -0
- truthound/validators/utils.py +756 -0
- truthound-1.0.8.dist-info/METADATA +474 -0
- truthound-1.0.8.dist-info/RECORD +877 -0
- truthound-1.0.8.dist-info/WHEEL +4 -0
- truthound-1.0.8.dist-info/entry_points.txt +2 -0
- truthound-1.0.8.dist-info/licenses/LICENSE +190 -0
|
@@ -0,0 +1,635 @@
|
|
|
1
|
+
"""Base classes for privacy compliance validators.
|
|
2
|
+
|
|
3
|
+
This module provides extensible base classes for implementing
|
|
4
|
+
GDPR, CCPA, and other privacy regulation compliance validators.
|
|
5
|
+
|
|
6
|
+
Privacy Regulations Supported:
|
|
7
|
+
- GDPR (General Data Protection Regulation) - EU
|
|
8
|
+
- CCPA (California Consumer Privacy Act) - US/California
|
|
9
|
+
- LGPD (Lei Geral de Proteção de Dados) - Brazil
|
|
10
|
+
- PIPEDA (Personal Information Protection) - Canada
|
|
11
|
+
- APPI (Act on Protection of Personal Information) - Japan
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from abc import abstractmethod
|
|
15
|
+
from dataclasses import dataclass, field
|
|
16
|
+
from enum import Enum
|
|
17
|
+
from typing import Any, Callable
|
|
18
|
+
import re
|
|
19
|
+
|
|
20
|
+
import polars as pl
|
|
21
|
+
|
|
22
|
+
from truthound.validators.base import (
|
|
23
|
+
Validator,
|
|
24
|
+
ValidationIssue,
|
|
25
|
+
ValidatorConfig,
|
|
26
|
+
StringValidatorMixin,
|
|
27
|
+
)
|
|
28
|
+
from truthound.types import Severity
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class PrivacyRegulation(str, Enum):
|
|
32
|
+
"""Supported privacy regulations."""
|
|
33
|
+
|
|
34
|
+
GDPR = "gdpr" # EU General Data Protection Regulation
|
|
35
|
+
CCPA = "ccpa" # California Consumer Privacy Act
|
|
36
|
+
LGPD = "lgpd" # Brazil Lei Geral de Proteção de Dados
|
|
37
|
+
PIPEDA = "pipeda" # Canada Personal Information Protection
|
|
38
|
+
APPI = "appi" # Japan Act on Protection of Personal Information
|
|
39
|
+
HIPAA = "hipaa" # US Health Insurance Portability and Accountability
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
class PIICategory(str, Enum):
|
|
43
|
+
"""Categories of personally identifiable information per GDPR Article 9."""
|
|
44
|
+
|
|
45
|
+
# Standard PII
|
|
46
|
+
DIRECT_IDENTIFIER = "direct_identifier" # Name, email, phone
|
|
47
|
+
INDIRECT_IDENTIFIER = "indirect_identifier" # IP, device ID, cookie
|
|
48
|
+
FINANCIAL = "financial" # Credit card, bank account
|
|
49
|
+
GOVERNMENT_ID = "government_id" # SSN, passport, national ID
|
|
50
|
+
|
|
51
|
+
# Special Categories (GDPR Article 9 - requires explicit consent)
|
|
52
|
+
RACIAL_ETHNIC = "racial_ethnic" # Racial or ethnic origin
|
|
53
|
+
POLITICAL = "political" # Political opinions
|
|
54
|
+
RELIGIOUS = "religious" # Religious or philosophical beliefs
|
|
55
|
+
TRADE_UNION = "trade_union" # Trade union membership
|
|
56
|
+
GENETIC = "genetic" # Genetic data
|
|
57
|
+
BIOMETRIC = "biometric" # Biometric data
|
|
58
|
+
HEALTH = "health" # Health data
|
|
59
|
+
SEX_LIFE = "sex_life" # Sex life or sexual orientation
|
|
60
|
+
CRIMINAL = "criminal" # Criminal convictions
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
class ConsentStatus(str, Enum):
|
|
64
|
+
"""Data consent status for GDPR compliance."""
|
|
65
|
+
|
|
66
|
+
EXPLICIT = "explicit" # Explicit opt-in consent
|
|
67
|
+
IMPLICIT = "implicit" # Implied consent (may not be GDPR compliant)
|
|
68
|
+
WITHDRAWN = "withdrawn" # Consent withdrawn
|
|
69
|
+
NOT_REQUIRED = "not_required" # Consent not required (legitimate interest)
|
|
70
|
+
UNKNOWN = "unknown" # Consent status unknown
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
class LegalBasis(str, Enum):
|
|
74
|
+
"""GDPR Article 6 legal basis for processing."""
|
|
75
|
+
|
|
76
|
+
CONSENT = "consent" # Data subject consent
|
|
77
|
+
CONTRACT = "contract" # Contract performance
|
|
78
|
+
LEGAL_OBLIGATION = "legal_obligation" # Legal obligation
|
|
79
|
+
VITAL_INTERESTS = "vital_interests" # Protect vital interests
|
|
80
|
+
PUBLIC_TASK = "public_task" # Public interest task
|
|
81
|
+
LEGITIMATE_INTEREST = "legitimate_interest" # Legitimate interests
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
@dataclass
|
|
85
|
+
class PIIFieldDefinition:
|
|
86
|
+
"""Definition of a PII field pattern."""
|
|
87
|
+
|
|
88
|
+
name: str
|
|
89
|
+
pattern: re.Pattern | None = None
|
|
90
|
+
column_hints: list[str] = field(default_factory=list)
|
|
91
|
+
category: PIICategory = PIICategory.DIRECT_IDENTIFIER
|
|
92
|
+
regulations: list[PrivacyRegulation] = field(default_factory=list)
|
|
93
|
+
requires_consent: bool = True
|
|
94
|
+
is_special_category: bool = False # GDPR Article 9
|
|
95
|
+
retention_sensitive: bool = True
|
|
96
|
+
confidence_base: int = 85
|
|
97
|
+
description: str = ""
|
|
98
|
+
|
|
99
|
+
def matches_column_name(self, column_name: str) -> bool:
|
|
100
|
+
"""Check if column name matches hints."""
|
|
101
|
+
col_lower = column_name.lower().replace("_", " ").replace("-", " ")
|
|
102
|
+
return any(hint.lower() in col_lower for hint in self.column_hints)
|
|
103
|
+
|
|
104
|
+
def matches_value(self, value: str) -> bool:
|
|
105
|
+
"""Check if value matches pattern."""
|
|
106
|
+
if self.pattern is None:
|
|
107
|
+
return False
|
|
108
|
+
return bool(self.pattern.match(value))
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
@dataclass
|
|
112
|
+
class PrivacyFinding:
|
|
113
|
+
"""Represents a privacy compliance finding."""
|
|
114
|
+
|
|
115
|
+
column: str
|
|
116
|
+
pii_type: str
|
|
117
|
+
category: PIICategory
|
|
118
|
+
regulation: PrivacyRegulation
|
|
119
|
+
violation_type: str
|
|
120
|
+
count: int
|
|
121
|
+
confidence: int
|
|
122
|
+
severity: Severity
|
|
123
|
+
recommendation: str
|
|
124
|
+
legal_basis_required: bool = True
|
|
125
|
+
requires_consent: bool = True
|
|
126
|
+
sample_values: list[Any] | None = None
|
|
127
|
+
|
|
128
|
+
def to_dict(self) -> dict:
|
|
129
|
+
"""Convert to dictionary."""
|
|
130
|
+
return {
|
|
131
|
+
"column": self.column,
|
|
132
|
+
"pii_type": self.pii_type,
|
|
133
|
+
"category": self.category.value,
|
|
134
|
+
"regulation": self.regulation.value,
|
|
135
|
+
"violation_type": self.violation_type,
|
|
136
|
+
"count": self.count,
|
|
137
|
+
"confidence": self.confidence,
|
|
138
|
+
"severity": self.severity.value,
|
|
139
|
+
"recommendation": self.recommendation,
|
|
140
|
+
"legal_basis_required": self.legal_basis_required,
|
|
141
|
+
"requires_consent": self.requires_consent,
|
|
142
|
+
"sample_values": self.sample_values,
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
class PrivacyValidator(Validator, StringValidatorMixin):
|
|
147
|
+
"""Base class for privacy compliance validators.
|
|
148
|
+
|
|
149
|
+
Provides common functionality for detecting PII and
|
|
150
|
+
validating compliance with privacy regulations.
|
|
151
|
+
|
|
152
|
+
Subclasses should implement:
|
|
153
|
+
- get_pii_definitions(): Return list of PII patterns to check
|
|
154
|
+
- validate(): Full validation logic
|
|
155
|
+
"""
|
|
156
|
+
|
|
157
|
+
category = "privacy"
|
|
158
|
+
regulation: PrivacyRegulation = PrivacyRegulation.GDPR
|
|
159
|
+
|
|
160
|
+
def __init__(
|
|
161
|
+
self,
|
|
162
|
+
columns: list[str] | None = None,
|
|
163
|
+
sample_size: int = 1000,
|
|
164
|
+
min_confidence: int = 70,
|
|
165
|
+
detect_special_categories: bool = True,
|
|
166
|
+
check_retention: bool = False,
|
|
167
|
+
retention_days: int | None = None,
|
|
168
|
+
date_column: str | None = None,
|
|
169
|
+
**kwargs: Any,
|
|
170
|
+
):
|
|
171
|
+
"""Initialize privacy validator.
|
|
172
|
+
|
|
173
|
+
Args:
|
|
174
|
+
columns: Specific columns to check (None = all string columns)
|
|
175
|
+
sample_size: Number of rows to sample for pattern detection
|
|
176
|
+
min_confidence: Minimum confidence threshold for reporting
|
|
177
|
+
detect_special_categories: Whether to detect GDPR Article 9 data
|
|
178
|
+
check_retention: Whether to check data retention compliance
|
|
179
|
+
retention_days: Maximum retention period in days
|
|
180
|
+
date_column: Column containing record date for retention check
|
|
181
|
+
**kwargs: Additional config
|
|
182
|
+
"""
|
|
183
|
+
super().__init__(**kwargs)
|
|
184
|
+
self.columns = columns
|
|
185
|
+
self.sample_size = sample_size
|
|
186
|
+
self.min_confidence = min_confidence
|
|
187
|
+
self.detect_special_categories = detect_special_categories
|
|
188
|
+
self.check_retention = check_retention
|
|
189
|
+
self.retention_days = retention_days
|
|
190
|
+
self.date_column = date_column
|
|
191
|
+
|
|
192
|
+
@abstractmethod
|
|
193
|
+
def get_pii_definitions(self) -> list[PIIFieldDefinition]:
|
|
194
|
+
"""Get PII field definitions for this regulation.
|
|
195
|
+
|
|
196
|
+
Returns:
|
|
197
|
+
List of PIIFieldDefinition objects
|
|
198
|
+
"""
|
|
199
|
+
pass
|
|
200
|
+
|
|
201
|
+
def _detect_pii_in_column(
|
|
202
|
+
self,
|
|
203
|
+
df: pl.DataFrame,
|
|
204
|
+
column: str,
|
|
205
|
+
pii_defs: list[PIIFieldDefinition],
|
|
206
|
+
) -> list[PrivacyFinding]:
|
|
207
|
+
"""Detect PII in a single column.
|
|
208
|
+
|
|
209
|
+
Args:
|
|
210
|
+
df: Input DataFrame
|
|
211
|
+
column: Column to check
|
|
212
|
+
pii_defs: PII definitions to check against
|
|
213
|
+
|
|
214
|
+
Returns:
|
|
215
|
+
List of privacy findings
|
|
216
|
+
"""
|
|
217
|
+
findings: list[PrivacyFinding] = []
|
|
218
|
+
|
|
219
|
+
col_data = df.get_column(column).drop_nulls()
|
|
220
|
+
if len(col_data) == 0:
|
|
221
|
+
return findings
|
|
222
|
+
|
|
223
|
+
# Sample for performance
|
|
224
|
+
sample = col_data.head(min(len(col_data), self.sample_size))
|
|
225
|
+
sample_values = [str(v) for v in sample.to_list() if v is not None]
|
|
226
|
+
|
|
227
|
+
for pii_def in pii_defs:
|
|
228
|
+
# Skip special categories if not requested
|
|
229
|
+
if pii_def.is_special_category and not self.detect_special_categories:
|
|
230
|
+
continue
|
|
231
|
+
|
|
232
|
+
# Calculate confidence
|
|
233
|
+
confidence = self._calculate_confidence(
|
|
234
|
+
column, sample_values, pii_def
|
|
235
|
+
)
|
|
236
|
+
|
|
237
|
+
if confidence < self.min_confidence:
|
|
238
|
+
continue
|
|
239
|
+
|
|
240
|
+
# Count matches
|
|
241
|
+
match_count = sum(
|
|
242
|
+
1 for v in sample_values
|
|
243
|
+
if pii_def.matches_value(v)
|
|
244
|
+
)
|
|
245
|
+
|
|
246
|
+
if match_count == 0 and not pii_def.matches_column_name(column):
|
|
247
|
+
continue
|
|
248
|
+
|
|
249
|
+
# Extrapolate to full dataset
|
|
250
|
+
match_ratio = match_count / len(sample_values) if sample_values else 0
|
|
251
|
+
estimated_count = int(len(col_data) * match_ratio) if match_ratio > 0 else 0
|
|
252
|
+
|
|
253
|
+
if estimated_count == 0 and not pii_def.matches_column_name(column):
|
|
254
|
+
continue
|
|
255
|
+
|
|
256
|
+
# Determine severity based on category
|
|
257
|
+
severity = self._get_severity_for_category(pii_def.category)
|
|
258
|
+
|
|
259
|
+
# Create finding
|
|
260
|
+
finding = PrivacyFinding(
|
|
261
|
+
column=column,
|
|
262
|
+
pii_type=pii_def.name,
|
|
263
|
+
category=pii_def.category,
|
|
264
|
+
regulation=self.regulation,
|
|
265
|
+
violation_type=f"potential_{pii_def.category.value}_detected",
|
|
266
|
+
count=estimated_count or len(col_data),
|
|
267
|
+
confidence=confidence,
|
|
268
|
+
severity=severity,
|
|
269
|
+
recommendation=self._get_recommendation(pii_def),
|
|
270
|
+
legal_basis_required=True,
|
|
271
|
+
requires_consent=pii_def.requires_consent,
|
|
272
|
+
sample_values=sample_values[:3] if sample_values else None,
|
|
273
|
+
)
|
|
274
|
+
findings.append(finding)
|
|
275
|
+
|
|
276
|
+
return findings
|
|
277
|
+
|
|
278
|
+
def _calculate_confidence(
|
|
279
|
+
self,
|
|
280
|
+
column: str,
|
|
281
|
+
values: list[str],
|
|
282
|
+
pii_def: PIIFieldDefinition,
|
|
283
|
+
) -> int:
|
|
284
|
+
"""Calculate confidence score for PII detection.
|
|
285
|
+
|
|
286
|
+
Args:
|
|
287
|
+
column: Column name
|
|
288
|
+
values: Sample values
|
|
289
|
+
pii_def: PII definition
|
|
290
|
+
|
|
291
|
+
Returns:
|
|
292
|
+
Confidence score (0-100)
|
|
293
|
+
"""
|
|
294
|
+
confidence = pii_def.confidence_base
|
|
295
|
+
|
|
296
|
+
# Boost for column name match
|
|
297
|
+
if pii_def.matches_column_name(column):
|
|
298
|
+
confidence = min(99, confidence + 15)
|
|
299
|
+
|
|
300
|
+
# Adjust based on pattern match ratio
|
|
301
|
+
if values and pii_def.pattern:
|
|
302
|
+
match_count = sum(1 for v in values if pii_def.matches_value(v))
|
|
303
|
+
match_ratio = match_count / len(values)
|
|
304
|
+
|
|
305
|
+
if match_ratio > 0.8:
|
|
306
|
+
confidence = min(99, confidence + 10)
|
|
307
|
+
elif match_ratio > 0.5:
|
|
308
|
+
confidence = min(99, confidence + 5)
|
|
309
|
+
elif match_ratio < 0.1:
|
|
310
|
+
confidence = max(50, confidence - 20)
|
|
311
|
+
|
|
312
|
+
return confidence
|
|
313
|
+
|
|
314
|
+
def _get_severity_for_category(self, category: PIICategory) -> Severity:
|
|
315
|
+
"""Get severity level for PII category.
|
|
316
|
+
|
|
317
|
+
Args:
|
|
318
|
+
category: PII category
|
|
319
|
+
|
|
320
|
+
Returns:
|
|
321
|
+
Appropriate severity level
|
|
322
|
+
"""
|
|
323
|
+
# Special categories (GDPR Article 9) are always critical
|
|
324
|
+
special_categories = {
|
|
325
|
+
PIICategory.RACIAL_ETHNIC,
|
|
326
|
+
PIICategory.POLITICAL,
|
|
327
|
+
PIICategory.RELIGIOUS,
|
|
328
|
+
PIICategory.TRADE_UNION,
|
|
329
|
+
PIICategory.GENETIC,
|
|
330
|
+
PIICategory.BIOMETRIC,
|
|
331
|
+
PIICategory.HEALTH,
|
|
332
|
+
PIICategory.SEX_LIFE,
|
|
333
|
+
PIICategory.CRIMINAL,
|
|
334
|
+
}
|
|
335
|
+
|
|
336
|
+
if category in special_categories:
|
|
337
|
+
return Severity.CRITICAL
|
|
338
|
+
|
|
339
|
+
# Government IDs and financial data are high severity
|
|
340
|
+
if category in {PIICategory.GOVERNMENT_ID, PIICategory.FINANCIAL}:
|
|
341
|
+
return Severity.HIGH
|
|
342
|
+
|
|
343
|
+
# Direct identifiers are medium-high
|
|
344
|
+
if category == PIICategory.DIRECT_IDENTIFIER:
|
|
345
|
+
return Severity.MEDIUM
|
|
346
|
+
|
|
347
|
+
return Severity.LOW
|
|
348
|
+
|
|
349
|
+
def _get_recommendation(self, pii_def: PIIFieldDefinition) -> str:
|
|
350
|
+
"""Get compliance recommendation for PII finding.
|
|
351
|
+
|
|
352
|
+
Args:
|
|
353
|
+
pii_def: PII definition
|
|
354
|
+
|
|
355
|
+
Returns:
|
|
356
|
+
Recommendation string
|
|
357
|
+
"""
|
|
358
|
+
if pii_def.is_special_category:
|
|
359
|
+
return (
|
|
360
|
+
f"CRITICAL: {pii_def.name} is a special category under GDPR Article 9. "
|
|
361
|
+
"Explicit consent or specific legal basis required. "
|
|
362
|
+
"Consider data minimization or pseudonymization."
|
|
363
|
+
)
|
|
364
|
+
|
|
365
|
+
if pii_def.category == PIICategory.GOVERNMENT_ID:
|
|
366
|
+
return (
|
|
367
|
+
f"HIGH: {pii_def.name} detected. "
|
|
368
|
+
"Ensure proper encryption, access controls, and documented legal basis. "
|
|
369
|
+
"Consider masking or tokenization."
|
|
370
|
+
)
|
|
371
|
+
|
|
372
|
+
if pii_def.category == PIICategory.FINANCIAL:
|
|
373
|
+
return (
|
|
374
|
+
f"HIGH: {pii_def.name} detected. "
|
|
375
|
+
"Apply PCI-DSS compliant handling. "
|
|
376
|
+
"Use encryption and limit access."
|
|
377
|
+
)
|
|
378
|
+
|
|
379
|
+
return (
|
|
380
|
+
f"MEDIUM: {pii_def.name} detected. "
|
|
381
|
+
"Document legal basis for processing and ensure appropriate consent."
|
|
382
|
+
)
|
|
383
|
+
|
|
384
|
+
def _convert_findings_to_issues(
|
|
385
|
+
self,
|
|
386
|
+
findings: list[PrivacyFinding],
|
|
387
|
+
) -> list[ValidationIssue]:
|
|
388
|
+
"""Convert privacy findings to validation issues.
|
|
389
|
+
|
|
390
|
+
Args:
|
|
391
|
+
findings: List of privacy findings
|
|
392
|
+
|
|
393
|
+
Returns:
|
|
394
|
+
List of ValidationIssue objects
|
|
395
|
+
"""
|
|
396
|
+
issues: list[ValidationIssue] = []
|
|
397
|
+
|
|
398
|
+
for finding in findings:
|
|
399
|
+
issue = ValidationIssue(
|
|
400
|
+
column=finding.column,
|
|
401
|
+
issue_type=f"{self.regulation.value}_{finding.violation_type}",
|
|
402
|
+
count=finding.count,
|
|
403
|
+
severity=finding.severity,
|
|
404
|
+
details=(
|
|
405
|
+
f"{finding.pii_type} ({finding.category.value}) detected with "
|
|
406
|
+
f"{finding.confidence}% confidence. {finding.recommendation}"
|
|
407
|
+
),
|
|
408
|
+
expected=f"No unprotected {finding.pii_type}",
|
|
409
|
+
actual=f"Found {finding.count} potential instances",
|
|
410
|
+
sample_values=finding.sample_values,
|
|
411
|
+
)
|
|
412
|
+
issues.append(issue)
|
|
413
|
+
|
|
414
|
+
return issues
|
|
415
|
+
|
|
416
|
+
|
|
417
|
+
class DataRetentionValidator(PrivacyValidator):
|
|
418
|
+
"""Validates data retention compliance.
|
|
419
|
+
|
|
420
|
+
Checks that personal data is not retained beyond the specified
|
|
421
|
+
retention period as required by GDPR Article 5(1)(e).
|
|
422
|
+
"""
|
|
423
|
+
|
|
424
|
+
name = "data_retention"
|
|
425
|
+
|
|
426
|
+
def __init__(
|
|
427
|
+
self,
|
|
428
|
+
date_column: str,
|
|
429
|
+
retention_days: int,
|
|
430
|
+
pii_columns: list[str] | None = None,
|
|
431
|
+
**kwargs: Any,
|
|
432
|
+
):
|
|
433
|
+
"""Initialize retention validator.
|
|
434
|
+
|
|
435
|
+
Args:
|
|
436
|
+
date_column: Column containing record creation/update date
|
|
437
|
+
retention_days: Maximum retention period in days
|
|
438
|
+
pii_columns: Columns containing PII to check
|
|
439
|
+
**kwargs: Additional config
|
|
440
|
+
"""
|
|
441
|
+
super().__init__(
|
|
442
|
+
check_retention=True,
|
|
443
|
+
retention_days=retention_days,
|
|
444
|
+
date_column=date_column,
|
|
445
|
+
**kwargs,
|
|
446
|
+
)
|
|
447
|
+
self.pii_columns = pii_columns
|
|
448
|
+
|
|
449
|
+
def get_pii_definitions(self) -> list[PIIFieldDefinition]:
|
|
450
|
+
"""Return empty list - retention validator doesn't detect PII types."""
|
|
451
|
+
return []
|
|
452
|
+
|
|
453
|
+
def validate(self, lf: pl.LazyFrame) -> list[ValidationIssue]:
|
|
454
|
+
"""Validate data retention compliance.
|
|
455
|
+
|
|
456
|
+
Args:
|
|
457
|
+
lf: Input LazyFrame
|
|
458
|
+
|
|
459
|
+
Returns:
|
|
460
|
+
List of validation issues
|
|
461
|
+
"""
|
|
462
|
+
issues: list[ValidationIssue] = []
|
|
463
|
+
|
|
464
|
+
schema = lf.collect_schema()
|
|
465
|
+
|
|
466
|
+
# Check if date column exists
|
|
467
|
+
if self.date_column not in schema.names():
|
|
468
|
+
return [ValidationIssue(
|
|
469
|
+
column=self.date_column,
|
|
470
|
+
issue_type=f"{self.regulation.value}_retention_check_failed",
|
|
471
|
+
count=0,
|
|
472
|
+
severity=Severity.HIGH,
|
|
473
|
+
details=f"Date column '{self.date_column}' not found for retention check",
|
|
474
|
+
)]
|
|
475
|
+
|
|
476
|
+
# Calculate retention threshold
|
|
477
|
+
from datetime import datetime, timedelta
|
|
478
|
+
threshold = datetime.now() - timedelta(days=self.retention_days)
|
|
479
|
+
|
|
480
|
+
# Count records beyond retention
|
|
481
|
+
df = lf.collect()
|
|
482
|
+
|
|
483
|
+
try:
|
|
484
|
+
# Handle different date formats
|
|
485
|
+
date_col = df[self.date_column]
|
|
486
|
+
|
|
487
|
+
if date_col.dtype in (pl.Date, pl.Datetime):
|
|
488
|
+
expired_count = df.filter(
|
|
489
|
+
pl.col(self.date_column) < threshold
|
|
490
|
+
).height
|
|
491
|
+
else:
|
|
492
|
+
# Try to parse as string
|
|
493
|
+
expired_count = df.filter(
|
|
494
|
+
pl.col(self.date_column).str.to_datetime() < threshold
|
|
495
|
+
).height
|
|
496
|
+
except Exception:
|
|
497
|
+
return [ValidationIssue(
|
|
498
|
+
column=self.date_column,
|
|
499
|
+
issue_type=f"{self.regulation.value}_retention_check_failed",
|
|
500
|
+
count=0,
|
|
501
|
+
severity=Severity.MEDIUM,
|
|
502
|
+
details=f"Could not parse date column '{self.date_column}'",
|
|
503
|
+
)]
|
|
504
|
+
|
|
505
|
+
if expired_count > 0:
|
|
506
|
+
total_rows = df.height
|
|
507
|
+
ratio = expired_count / total_rows if total_rows > 0 else 0
|
|
508
|
+
|
|
509
|
+
issues.append(ValidationIssue(
|
|
510
|
+
column=self.date_column,
|
|
511
|
+
issue_type=f"{self.regulation.value}_retention_exceeded",
|
|
512
|
+
count=expired_count,
|
|
513
|
+
severity=Severity.HIGH if ratio > 0.1 else Severity.MEDIUM,
|
|
514
|
+
details=(
|
|
515
|
+
f"Found {expired_count} records ({ratio:.1%}) exceeding "
|
|
516
|
+
f"{self.retention_days}-day retention period. "
|
|
517
|
+
"Consider implementing automated data purging."
|
|
518
|
+
),
|
|
519
|
+
expected=f"All records within {self.retention_days} days",
|
|
520
|
+
actual=f"{expired_count} records exceed retention period",
|
|
521
|
+
))
|
|
522
|
+
|
|
523
|
+
return issues
|
|
524
|
+
|
|
525
|
+
|
|
526
|
+
class ConsentValidator(PrivacyValidator):
|
|
527
|
+
"""Validates consent tracking compliance.
|
|
528
|
+
|
|
529
|
+
Checks that proper consent records exist for PII processing
|
|
530
|
+
as required by GDPR Article 7.
|
|
531
|
+
"""
|
|
532
|
+
|
|
533
|
+
name = "consent_tracking"
|
|
534
|
+
|
|
535
|
+
def __init__(
|
|
536
|
+
self,
|
|
537
|
+
consent_column: str,
|
|
538
|
+
pii_columns: list[str],
|
|
539
|
+
valid_consent_values: list[str] | None = None,
|
|
540
|
+
require_explicit: bool = True,
|
|
541
|
+
**kwargs: Any,
|
|
542
|
+
):
|
|
543
|
+
"""Initialize consent validator.
|
|
544
|
+
|
|
545
|
+
Args:
|
|
546
|
+
consent_column: Column containing consent status
|
|
547
|
+
pii_columns: Columns containing PII that require consent
|
|
548
|
+
valid_consent_values: Values indicating valid consent
|
|
549
|
+
require_explicit: Whether explicit consent is required
|
|
550
|
+
**kwargs: Additional config
|
|
551
|
+
"""
|
|
552
|
+
super().__init__(**kwargs)
|
|
553
|
+
self.consent_column = consent_column
|
|
554
|
+
self.pii_columns = pii_columns
|
|
555
|
+
self.valid_consent_values = valid_consent_values or [
|
|
556
|
+
"yes", "true", "1", "explicit", "granted", "accepted"
|
|
557
|
+
]
|
|
558
|
+
self.require_explicit = require_explicit
|
|
559
|
+
|
|
560
|
+
def get_pii_definitions(self) -> list[PIIFieldDefinition]:
|
|
561
|
+
"""Return empty list - consent validator uses explicit columns."""
|
|
562
|
+
return []
|
|
563
|
+
|
|
564
|
+
def validate(self, lf: pl.LazyFrame) -> list[ValidationIssue]:
|
|
565
|
+
"""Validate consent tracking compliance.
|
|
566
|
+
|
|
567
|
+
Args:
|
|
568
|
+
lf: Input LazyFrame
|
|
569
|
+
|
|
570
|
+
Returns:
|
|
571
|
+
List of validation issues
|
|
572
|
+
"""
|
|
573
|
+
issues: list[ValidationIssue] = []
|
|
574
|
+
|
|
575
|
+
schema = lf.collect_schema()
|
|
576
|
+
|
|
577
|
+
# Check if consent column exists
|
|
578
|
+
if self.consent_column not in schema.names():
|
|
579
|
+
return [ValidationIssue(
|
|
580
|
+
column=self.consent_column,
|
|
581
|
+
issue_type=f"{self.regulation.value}_consent_column_missing",
|
|
582
|
+
count=0,
|
|
583
|
+
severity=Severity.CRITICAL,
|
|
584
|
+
details=(
|
|
585
|
+
f"Consent tracking column '{self.consent_column}' not found. "
|
|
586
|
+
"GDPR requires documented consent for PII processing."
|
|
587
|
+
),
|
|
588
|
+
)]
|
|
589
|
+
|
|
590
|
+
df = lf.collect()
|
|
591
|
+
total_rows = df.height
|
|
592
|
+
|
|
593
|
+
if total_rows == 0:
|
|
594
|
+
return issues
|
|
595
|
+
|
|
596
|
+
# Check for missing consent
|
|
597
|
+
valid_values = [v.lower() for v in self.valid_consent_values]
|
|
598
|
+
|
|
599
|
+
# Count records with PII but without valid consent
|
|
600
|
+
for pii_col in self.pii_columns:
|
|
601
|
+
if pii_col not in schema.names():
|
|
602
|
+
continue
|
|
603
|
+
|
|
604
|
+
# Records with PII data
|
|
605
|
+
has_pii = df.filter(pl.col(pii_col).is_not_null())
|
|
606
|
+
|
|
607
|
+
if has_pii.height == 0:
|
|
608
|
+
continue
|
|
609
|
+
|
|
610
|
+
# Records with PII but invalid/missing consent
|
|
611
|
+
missing_consent = has_pii.filter(
|
|
612
|
+
pl.col(self.consent_column).is_null() |
|
|
613
|
+
~pl.col(self.consent_column).cast(pl.Utf8).str.to_lowercase().is_in(valid_values)
|
|
614
|
+
)
|
|
615
|
+
|
|
616
|
+
missing_count = missing_consent.height
|
|
617
|
+
|
|
618
|
+
if missing_count > 0:
|
|
619
|
+
ratio = missing_count / has_pii.height
|
|
620
|
+
|
|
621
|
+
issues.append(ValidationIssue(
|
|
622
|
+
column=pii_col,
|
|
623
|
+
issue_type=f"{self.regulation.value}_consent_missing",
|
|
624
|
+
count=missing_count,
|
|
625
|
+
severity=Severity.CRITICAL if ratio > 0.1 else Severity.HIGH,
|
|
626
|
+
details=(
|
|
627
|
+
f"Found {missing_count} records ({ratio:.1%}) with PII in "
|
|
628
|
+
f"'{pii_col}' but without valid consent in '{self.consent_column}'. "
|
|
629
|
+
"GDPR Article 7 requires demonstrable consent."
|
|
630
|
+
),
|
|
631
|
+
expected="Valid consent for all PII records",
|
|
632
|
+
actual=f"{missing_count} records lack consent",
|
|
633
|
+
))
|
|
634
|
+
|
|
635
|
+
return issues
|