truthound 1.0.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- truthound/__init__.py +162 -0
- truthound/adapters.py +100 -0
- truthound/api.py +365 -0
- truthound/audit/__init__.py +248 -0
- truthound/audit/core.py +967 -0
- truthound/audit/filters.py +620 -0
- truthound/audit/formatters.py +707 -0
- truthound/audit/logger.py +902 -0
- truthound/audit/middleware.py +571 -0
- truthound/audit/storage.py +1083 -0
- truthound/benchmark/__init__.py +123 -0
- truthound/benchmark/base.py +757 -0
- truthound/benchmark/comparison.py +635 -0
- truthound/benchmark/generators.py +706 -0
- truthound/benchmark/reporters.py +718 -0
- truthound/benchmark/runner.py +635 -0
- truthound/benchmark/scenarios.py +712 -0
- truthound/cache.py +252 -0
- truthound/checkpoint/__init__.py +136 -0
- truthound/checkpoint/actions/__init__.py +164 -0
- truthound/checkpoint/actions/base.py +324 -0
- truthound/checkpoint/actions/custom.py +234 -0
- truthound/checkpoint/actions/discord_notify.py +290 -0
- truthound/checkpoint/actions/email_notify.py +405 -0
- truthound/checkpoint/actions/github_action.py +406 -0
- truthound/checkpoint/actions/opsgenie.py +1499 -0
- truthound/checkpoint/actions/pagerduty.py +226 -0
- truthound/checkpoint/actions/slack_notify.py +233 -0
- truthound/checkpoint/actions/store_result.py +249 -0
- truthound/checkpoint/actions/teams_notify.py +1570 -0
- truthound/checkpoint/actions/telegram_notify.py +419 -0
- truthound/checkpoint/actions/update_docs.py +552 -0
- truthound/checkpoint/actions/webhook.py +293 -0
- truthound/checkpoint/analytics/__init__.py +147 -0
- truthound/checkpoint/analytics/aggregations/__init__.py +23 -0
- truthound/checkpoint/analytics/aggregations/rollup.py +481 -0
- truthound/checkpoint/analytics/aggregations/time_bucket.py +306 -0
- truthound/checkpoint/analytics/analyzers/__init__.py +17 -0
- truthound/checkpoint/analytics/analyzers/anomaly.py +386 -0
- truthound/checkpoint/analytics/analyzers/base.py +270 -0
- truthound/checkpoint/analytics/analyzers/forecast.py +421 -0
- truthound/checkpoint/analytics/analyzers/trend.py +314 -0
- truthound/checkpoint/analytics/models.py +292 -0
- truthound/checkpoint/analytics/protocols.py +549 -0
- truthound/checkpoint/analytics/service.py +718 -0
- truthound/checkpoint/analytics/stores/__init__.py +16 -0
- truthound/checkpoint/analytics/stores/base.py +306 -0
- truthound/checkpoint/analytics/stores/memory_store.py +353 -0
- truthound/checkpoint/analytics/stores/sqlite_store.py +557 -0
- truthound/checkpoint/analytics/stores/timescale_store.py +501 -0
- truthound/checkpoint/async_actions.py +794 -0
- truthound/checkpoint/async_base.py +708 -0
- truthound/checkpoint/async_checkpoint.py +617 -0
- truthound/checkpoint/async_runner.py +639 -0
- truthound/checkpoint/checkpoint.py +527 -0
- truthound/checkpoint/ci/__init__.py +61 -0
- truthound/checkpoint/ci/detector.py +355 -0
- truthound/checkpoint/ci/reporter.py +436 -0
- truthound/checkpoint/ci/templates.py +454 -0
- truthound/checkpoint/circuitbreaker/__init__.py +133 -0
- truthound/checkpoint/circuitbreaker/breaker.py +542 -0
- truthound/checkpoint/circuitbreaker/core.py +252 -0
- truthound/checkpoint/circuitbreaker/detection.py +459 -0
- truthound/checkpoint/circuitbreaker/middleware.py +389 -0
- truthound/checkpoint/circuitbreaker/registry.py +357 -0
- truthound/checkpoint/distributed/__init__.py +139 -0
- truthound/checkpoint/distributed/backends/__init__.py +35 -0
- truthound/checkpoint/distributed/backends/celery_backend.py +503 -0
- truthound/checkpoint/distributed/backends/kubernetes_backend.py +696 -0
- truthound/checkpoint/distributed/backends/local_backend.py +397 -0
- truthound/checkpoint/distributed/backends/ray_backend.py +625 -0
- truthound/checkpoint/distributed/base.py +774 -0
- truthound/checkpoint/distributed/orchestrator.py +765 -0
- truthound/checkpoint/distributed/protocols.py +842 -0
- truthound/checkpoint/distributed/registry.py +449 -0
- truthound/checkpoint/idempotency/__init__.py +120 -0
- truthound/checkpoint/idempotency/core.py +295 -0
- truthound/checkpoint/idempotency/fingerprint.py +454 -0
- truthound/checkpoint/idempotency/locking.py +604 -0
- truthound/checkpoint/idempotency/service.py +592 -0
- truthound/checkpoint/idempotency/stores.py +653 -0
- truthound/checkpoint/monitoring/__init__.py +134 -0
- truthound/checkpoint/monitoring/aggregators/__init__.py +15 -0
- truthound/checkpoint/monitoring/aggregators/base.py +372 -0
- truthound/checkpoint/monitoring/aggregators/realtime.py +300 -0
- truthound/checkpoint/monitoring/aggregators/window.py +493 -0
- truthound/checkpoint/monitoring/collectors/__init__.py +17 -0
- truthound/checkpoint/monitoring/collectors/base.py +257 -0
- truthound/checkpoint/monitoring/collectors/memory_collector.py +617 -0
- truthound/checkpoint/monitoring/collectors/prometheus_collector.py +451 -0
- truthound/checkpoint/monitoring/collectors/redis_collector.py +518 -0
- truthound/checkpoint/monitoring/events.py +410 -0
- truthound/checkpoint/monitoring/protocols.py +636 -0
- truthound/checkpoint/monitoring/service.py +578 -0
- truthound/checkpoint/monitoring/views/__init__.py +17 -0
- truthound/checkpoint/monitoring/views/base.py +172 -0
- truthound/checkpoint/monitoring/views/queue_view.py +220 -0
- truthound/checkpoint/monitoring/views/task_view.py +240 -0
- truthound/checkpoint/monitoring/views/worker_view.py +263 -0
- truthound/checkpoint/registry.py +337 -0
- truthound/checkpoint/runner.py +356 -0
- truthound/checkpoint/transaction/__init__.py +133 -0
- truthound/checkpoint/transaction/base.py +389 -0
- truthound/checkpoint/transaction/compensatable.py +537 -0
- truthound/checkpoint/transaction/coordinator.py +576 -0
- truthound/checkpoint/transaction/executor.py +622 -0
- truthound/checkpoint/transaction/idempotency.py +534 -0
- truthound/checkpoint/transaction/saga/__init__.py +143 -0
- truthound/checkpoint/transaction/saga/builder.py +584 -0
- truthound/checkpoint/transaction/saga/definition.py +515 -0
- truthound/checkpoint/transaction/saga/event_store.py +542 -0
- truthound/checkpoint/transaction/saga/patterns.py +833 -0
- truthound/checkpoint/transaction/saga/runner.py +718 -0
- truthound/checkpoint/transaction/saga/state_machine.py +793 -0
- truthound/checkpoint/transaction/saga/strategies.py +780 -0
- truthound/checkpoint/transaction/saga/testing.py +886 -0
- truthound/checkpoint/triggers/__init__.py +58 -0
- truthound/checkpoint/triggers/base.py +237 -0
- truthound/checkpoint/triggers/event.py +385 -0
- truthound/checkpoint/triggers/schedule.py +355 -0
- truthound/cli.py +2358 -0
- truthound/cli_modules/__init__.py +124 -0
- truthound/cli_modules/advanced/__init__.py +45 -0
- truthound/cli_modules/advanced/benchmark.py +343 -0
- truthound/cli_modules/advanced/docs.py +225 -0
- truthound/cli_modules/advanced/lineage.py +209 -0
- truthound/cli_modules/advanced/ml.py +320 -0
- truthound/cli_modules/advanced/realtime.py +196 -0
- truthound/cli_modules/checkpoint/__init__.py +46 -0
- truthound/cli_modules/checkpoint/init.py +114 -0
- truthound/cli_modules/checkpoint/list.py +71 -0
- truthound/cli_modules/checkpoint/run.py +159 -0
- truthound/cli_modules/checkpoint/validate.py +67 -0
- truthound/cli_modules/common/__init__.py +71 -0
- truthound/cli_modules/common/errors.py +414 -0
- truthound/cli_modules/common/options.py +419 -0
- truthound/cli_modules/common/output.py +507 -0
- truthound/cli_modules/common/protocol.py +552 -0
- truthound/cli_modules/core/__init__.py +48 -0
- truthound/cli_modules/core/check.py +123 -0
- truthound/cli_modules/core/compare.py +104 -0
- truthound/cli_modules/core/learn.py +57 -0
- truthound/cli_modules/core/mask.py +77 -0
- truthound/cli_modules/core/profile.py +65 -0
- truthound/cli_modules/core/scan.py +61 -0
- truthound/cli_modules/profiler/__init__.py +51 -0
- truthound/cli_modules/profiler/auto_profile.py +175 -0
- truthound/cli_modules/profiler/metadata.py +107 -0
- truthound/cli_modules/profiler/suite.py +283 -0
- truthound/cli_modules/registry.py +431 -0
- truthound/cli_modules/scaffolding/__init__.py +89 -0
- truthound/cli_modules/scaffolding/base.py +631 -0
- truthound/cli_modules/scaffolding/commands.py +545 -0
- truthound/cli_modules/scaffolding/plugins.py +1072 -0
- truthound/cli_modules/scaffolding/reporters.py +594 -0
- truthound/cli_modules/scaffolding/validators.py +1127 -0
- truthound/common/__init__.py +18 -0
- truthound/common/resilience/__init__.py +130 -0
- truthound/common/resilience/bulkhead.py +266 -0
- truthound/common/resilience/circuit_breaker.py +516 -0
- truthound/common/resilience/composite.py +332 -0
- truthound/common/resilience/config.py +292 -0
- truthound/common/resilience/protocols.py +217 -0
- truthound/common/resilience/rate_limiter.py +404 -0
- truthound/common/resilience/retry.py +341 -0
- truthound/datadocs/__init__.py +260 -0
- truthound/datadocs/base.py +571 -0
- truthound/datadocs/builder.py +761 -0
- truthound/datadocs/charts.py +764 -0
- truthound/datadocs/dashboard/__init__.py +63 -0
- truthound/datadocs/dashboard/app.py +576 -0
- truthound/datadocs/dashboard/components.py +584 -0
- truthound/datadocs/dashboard/state.py +240 -0
- truthound/datadocs/engine/__init__.py +46 -0
- truthound/datadocs/engine/context.py +376 -0
- truthound/datadocs/engine/pipeline.py +618 -0
- truthound/datadocs/engine/registry.py +469 -0
- truthound/datadocs/exporters/__init__.py +49 -0
- truthound/datadocs/exporters/base.py +198 -0
- truthound/datadocs/exporters/html.py +178 -0
- truthound/datadocs/exporters/json_exporter.py +253 -0
- truthound/datadocs/exporters/markdown.py +284 -0
- truthound/datadocs/exporters/pdf.py +392 -0
- truthound/datadocs/i18n/__init__.py +86 -0
- truthound/datadocs/i18n/catalog.py +960 -0
- truthound/datadocs/i18n/formatting.py +505 -0
- truthound/datadocs/i18n/loader.py +256 -0
- truthound/datadocs/i18n/plurals.py +378 -0
- truthound/datadocs/renderers/__init__.py +42 -0
- truthound/datadocs/renderers/base.py +401 -0
- truthound/datadocs/renderers/custom.py +342 -0
- truthound/datadocs/renderers/jinja.py +697 -0
- truthound/datadocs/sections.py +736 -0
- truthound/datadocs/styles.py +931 -0
- truthound/datadocs/themes/__init__.py +101 -0
- truthound/datadocs/themes/base.py +336 -0
- truthound/datadocs/themes/default.py +417 -0
- truthound/datadocs/themes/enterprise.py +419 -0
- truthound/datadocs/themes/loader.py +336 -0
- truthound/datadocs/themes.py +301 -0
- truthound/datadocs/transformers/__init__.py +57 -0
- truthound/datadocs/transformers/base.py +268 -0
- truthound/datadocs/transformers/enrichers.py +544 -0
- truthound/datadocs/transformers/filters.py +447 -0
- truthound/datadocs/transformers/i18n.py +468 -0
- truthound/datadocs/versioning/__init__.py +62 -0
- truthound/datadocs/versioning/diff.py +639 -0
- truthound/datadocs/versioning/storage.py +497 -0
- truthound/datadocs/versioning/version.py +358 -0
- truthound/datasources/__init__.py +223 -0
- truthound/datasources/_async_protocols.py +222 -0
- truthound/datasources/_protocols.py +159 -0
- truthound/datasources/adapters.py +428 -0
- truthound/datasources/async_base.py +599 -0
- truthound/datasources/async_factory.py +511 -0
- truthound/datasources/base.py +516 -0
- truthound/datasources/factory.py +433 -0
- truthound/datasources/nosql/__init__.py +47 -0
- truthound/datasources/nosql/base.py +487 -0
- truthound/datasources/nosql/elasticsearch.py +801 -0
- truthound/datasources/nosql/mongodb.py +636 -0
- truthound/datasources/pandas_optimized.py +582 -0
- truthound/datasources/pandas_source.py +216 -0
- truthound/datasources/polars_source.py +395 -0
- truthound/datasources/spark_source.py +479 -0
- truthound/datasources/sql/__init__.py +154 -0
- truthound/datasources/sql/base.py +710 -0
- truthound/datasources/sql/bigquery.py +410 -0
- truthound/datasources/sql/cloud_base.py +199 -0
- truthound/datasources/sql/databricks.py +471 -0
- truthound/datasources/sql/mysql.py +316 -0
- truthound/datasources/sql/oracle.py +427 -0
- truthound/datasources/sql/postgresql.py +321 -0
- truthound/datasources/sql/redshift.py +479 -0
- truthound/datasources/sql/snowflake.py +439 -0
- truthound/datasources/sql/sqlite.py +286 -0
- truthound/datasources/sql/sqlserver.py +437 -0
- truthound/datasources/streaming/__init__.py +47 -0
- truthound/datasources/streaming/base.py +350 -0
- truthound/datasources/streaming/kafka.py +670 -0
- truthound/decorators.py +98 -0
- truthound/docs/__init__.py +69 -0
- truthound/docs/extractor.py +971 -0
- truthound/docs/generator.py +601 -0
- truthound/docs/parser.py +1037 -0
- truthound/docs/renderer.py +999 -0
- truthound/drift/__init__.py +22 -0
- truthound/drift/compare.py +189 -0
- truthound/drift/detectors.py +464 -0
- truthound/drift/report.py +160 -0
- truthound/execution/__init__.py +65 -0
- truthound/execution/_protocols.py +324 -0
- truthound/execution/base.py +576 -0
- truthound/execution/distributed/__init__.py +179 -0
- truthound/execution/distributed/aggregations.py +731 -0
- truthound/execution/distributed/arrow_bridge.py +817 -0
- truthound/execution/distributed/base.py +550 -0
- truthound/execution/distributed/dask_engine.py +976 -0
- truthound/execution/distributed/mixins.py +766 -0
- truthound/execution/distributed/protocols.py +756 -0
- truthound/execution/distributed/ray_engine.py +1127 -0
- truthound/execution/distributed/registry.py +446 -0
- truthound/execution/distributed/spark_engine.py +1011 -0
- truthound/execution/distributed/validator_adapter.py +682 -0
- truthound/execution/pandas_engine.py +401 -0
- truthound/execution/polars_engine.py +497 -0
- truthound/execution/pushdown/__init__.py +230 -0
- truthound/execution/pushdown/ast.py +1550 -0
- truthound/execution/pushdown/builder.py +1550 -0
- truthound/execution/pushdown/dialects.py +1072 -0
- truthound/execution/pushdown/executor.py +829 -0
- truthound/execution/pushdown/optimizer.py +1041 -0
- truthound/execution/sql_engine.py +518 -0
- truthound/infrastructure/__init__.py +189 -0
- truthound/infrastructure/audit.py +1515 -0
- truthound/infrastructure/config.py +1133 -0
- truthound/infrastructure/encryption.py +1132 -0
- truthound/infrastructure/logging.py +1503 -0
- truthound/infrastructure/metrics.py +1220 -0
- truthound/lineage/__init__.py +89 -0
- truthound/lineage/base.py +746 -0
- truthound/lineage/impact_analysis.py +474 -0
- truthound/lineage/integrations/__init__.py +22 -0
- truthound/lineage/integrations/openlineage.py +548 -0
- truthound/lineage/tracker.py +512 -0
- truthound/lineage/visualization/__init__.py +33 -0
- truthound/lineage/visualization/protocols.py +145 -0
- truthound/lineage/visualization/renderers/__init__.py +20 -0
- truthound/lineage/visualization/renderers/cytoscape.py +329 -0
- truthound/lineage/visualization/renderers/d3.py +331 -0
- truthound/lineage/visualization/renderers/graphviz.py +276 -0
- truthound/lineage/visualization/renderers/mermaid.py +308 -0
- truthound/maskers.py +113 -0
- truthound/ml/__init__.py +124 -0
- truthound/ml/anomaly_models/__init__.py +31 -0
- truthound/ml/anomaly_models/ensemble.py +362 -0
- truthound/ml/anomaly_models/isolation_forest.py +444 -0
- truthound/ml/anomaly_models/statistical.py +392 -0
- truthound/ml/base.py +1178 -0
- truthound/ml/drift_detection/__init__.py +26 -0
- truthound/ml/drift_detection/concept.py +381 -0
- truthound/ml/drift_detection/distribution.py +361 -0
- truthound/ml/drift_detection/feature.py +442 -0
- truthound/ml/drift_detection/multivariate.py +495 -0
- truthound/ml/monitoring/__init__.py +88 -0
- truthound/ml/monitoring/alerting/__init__.py +33 -0
- truthound/ml/monitoring/alerting/handlers.py +427 -0
- truthound/ml/monitoring/alerting/rules.py +508 -0
- truthound/ml/monitoring/collectors/__init__.py +19 -0
- truthound/ml/monitoring/collectors/composite.py +105 -0
- truthound/ml/monitoring/collectors/drift.py +324 -0
- truthound/ml/monitoring/collectors/performance.py +179 -0
- truthound/ml/monitoring/collectors/quality.py +369 -0
- truthound/ml/monitoring/monitor.py +536 -0
- truthound/ml/monitoring/protocols.py +451 -0
- truthound/ml/monitoring/stores/__init__.py +15 -0
- truthound/ml/monitoring/stores/memory.py +201 -0
- truthound/ml/monitoring/stores/prometheus.py +296 -0
- truthound/ml/rule_learning/__init__.py +25 -0
- truthound/ml/rule_learning/constraint_miner.py +443 -0
- truthound/ml/rule_learning/pattern_learner.py +499 -0
- truthound/ml/rule_learning/profile_learner.py +462 -0
- truthound/multitenancy/__init__.py +326 -0
- truthound/multitenancy/core.py +852 -0
- truthound/multitenancy/integration.py +597 -0
- truthound/multitenancy/isolation.py +630 -0
- truthound/multitenancy/manager.py +770 -0
- truthound/multitenancy/middleware.py +765 -0
- truthound/multitenancy/quota.py +537 -0
- truthound/multitenancy/resolvers.py +603 -0
- truthound/multitenancy/storage.py +703 -0
- truthound/observability/__init__.py +307 -0
- truthound/observability/context.py +531 -0
- truthound/observability/instrumentation.py +611 -0
- truthound/observability/logging.py +887 -0
- truthound/observability/metrics.py +1157 -0
- truthound/observability/tracing/__init__.py +178 -0
- truthound/observability/tracing/baggage.py +310 -0
- truthound/observability/tracing/config.py +426 -0
- truthound/observability/tracing/exporter.py +787 -0
- truthound/observability/tracing/integration.py +1018 -0
- truthound/observability/tracing/otel/__init__.py +146 -0
- truthound/observability/tracing/otel/adapter.py +982 -0
- truthound/observability/tracing/otel/bridge.py +1177 -0
- truthound/observability/tracing/otel/compat.py +681 -0
- truthound/observability/tracing/otel/config.py +691 -0
- truthound/observability/tracing/otel/detection.py +327 -0
- truthound/observability/tracing/otel/protocols.py +426 -0
- truthound/observability/tracing/processor.py +561 -0
- truthound/observability/tracing/propagator.py +757 -0
- truthound/observability/tracing/provider.py +569 -0
- truthound/observability/tracing/resource.py +515 -0
- truthound/observability/tracing/sampler.py +487 -0
- truthound/observability/tracing/span.py +676 -0
- truthound/plugins/__init__.py +198 -0
- truthound/plugins/base.py +599 -0
- truthound/plugins/cli.py +680 -0
- truthound/plugins/dependencies/__init__.py +42 -0
- truthound/plugins/dependencies/graph.py +422 -0
- truthound/plugins/dependencies/resolver.py +417 -0
- truthound/plugins/discovery.py +379 -0
- truthound/plugins/docs/__init__.py +46 -0
- truthound/plugins/docs/extractor.py +444 -0
- truthound/plugins/docs/renderer.py +499 -0
- truthound/plugins/enterprise_manager.py +877 -0
- truthound/plugins/examples/__init__.py +19 -0
- truthound/plugins/examples/custom_validators.py +317 -0
- truthound/plugins/examples/slack_notifier.py +312 -0
- truthound/plugins/examples/xml_reporter.py +254 -0
- truthound/plugins/hooks.py +558 -0
- truthound/plugins/lifecycle/__init__.py +43 -0
- truthound/plugins/lifecycle/hot_reload.py +402 -0
- truthound/plugins/lifecycle/manager.py +371 -0
- truthound/plugins/manager.py +736 -0
- truthound/plugins/registry.py +338 -0
- truthound/plugins/security/__init__.py +93 -0
- truthound/plugins/security/exceptions.py +332 -0
- truthound/plugins/security/policies.py +348 -0
- truthound/plugins/security/protocols.py +643 -0
- truthound/plugins/security/sandbox/__init__.py +45 -0
- truthound/plugins/security/sandbox/context.py +158 -0
- truthound/plugins/security/sandbox/engines/__init__.py +19 -0
- truthound/plugins/security/sandbox/engines/container.py +379 -0
- truthound/plugins/security/sandbox/engines/noop.py +144 -0
- truthound/plugins/security/sandbox/engines/process.py +336 -0
- truthound/plugins/security/sandbox/factory.py +211 -0
- truthound/plugins/security/signing/__init__.py +57 -0
- truthound/plugins/security/signing/service.py +330 -0
- truthound/plugins/security/signing/trust_store.py +368 -0
- truthound/plugins/security/signing/verifier.py +459 -0
- truthound/plugins/versioning/__init__.py +41 -0
- truthound/plugins/versioning/constraints.py +297 -0
- truthound/plugins/versioning/resolver.py +329 -0
- truthound/profiler/__init__.py +1729 -0
- truthound/profiler/_lazy.py +452 -0
- truthound/profiler/ab_testing/__init__.py +80 -0
- truthound/profiler/ab_testing/analysis.py +449 -0
- truthound/profiler/ab_testing/base.py +257 -0
- truthound/profiler/ab_testing/experiment.py +395 -0
- truthound/profiler/ab_testing/tracking.py +368 -0
- truthound/profiler/auto_threshold.py +1170 -0
- truthound/profiler/base.py +579 -0
- truthound/profiler/cache_patterns.py +911 -0
- truthound/profiler/caching.py +1303 -0
- truthound/profiler/column_profiler.py +712 -0
- truthound/profiler/comparison.py +1007 -0
- truthound/profiler/custom_patterns.py +1170 -0
- truthound/profiler/dashboard/__init__.py +50 -0
- truthound/profiler/dashboard/app.py +476 -0
- truthound/profiler/dashboard/components.py +457 -0
- truthound/profiler/dashboard/config.py +72 -0
- truthound/profiler/distributed/__init__.py +83 -0
- truthound/profiler/distributed/base.py +281 -0
- truthound/profiler/distributed/dask_backend.py +498 -0
- truthound/profiler/distributed/local_backend.py +293 -0
- truthound/profiler/distributed/profiler.py +304 -0
- truthound/profiler/distributed/ray_backend.py +374 -0
- truthound/profiler/distributed/spark_backend.py +375 -0
- truthound/profiler/distributed.py +1366 -0
- truthound/profiler/enterprise_sampling.py +1065 -0
- truthound/profiler/errors.py +488 -0
- truthound/profiler/evolution/__init__.py +91 -0
- truthound/profiler/evolution/alerts.py +426 -0
- truthound/profiler/evolution/changes.py +206 -0
- truthound/profiler/evolution/compatibility.py +365 -0
- truthound/profiler/evolution/detector.py +372 -0
- truthound/profiler/evolution/protocols.py +121 -0
- truthound/profiler/generators/__init__.py +48 -0
- truthound/profiler/generators/base.py +384 -0
- truthound/profiler/generators/ml_rules.py +375 -0
- truthound/profiler/generators/pattern_rules.py +384 -0
- truthound/profiler/generators/schema_rules.py +267 -0
- truthound/profiler/generators/stats_rules.py +324 -0
- truthound/profiler/generators/suite_generator.py +857 -0
- truthound/profiler/i18n.py +1542 -0
- truthound/profiler/incremental.py +554 -0
- truthound/profiler/incremental_validation.py +1710 -0
- truthound/profiler/integration/__init__.py +73 -0
- truthound/profiler/integration/adapters.py +345 -0
- truthound/profiler/integration/context.py +371 -0
- truthound/profiler/integration/executor.py +527 -0
- truthound/profiler/integration/naming.py +75 -0
- truthound/profiler/integration/protocols.py +243 -0
- truthound/profiler/memory.py +1185 -0
- truthound/profiler/migration/__init__.py +60 -0
- truthound/profiler/migration/base.py +345 -0
- truthound/profiler/migration/manager.py +444 -0
- truthound/profiler/migration/v1_0_to_v1_1.py +484 -0
- truthound/profiler/ml/__init__.py +73 -0
- truthound/profiler/ml/base.py +244 -0
- truthound/profiler/ml/classifier.py +507 -0
- truthound/profiler/ml/feature_extraction.py +604 -0
- truthound/profiler/ml/pretrained.py +448 -0
- truthound/profiler/ml_inference.py +1276 -0
- truthound/profiler/native_patterns.py +815 -0
- truthound/profiler/observability.py +1184 -0
- truthound/profiler/process_timeout.py +1566 -0
- truthound/profiler/progress.py +568 -0
- truthound/profiler/progress_callbacks.py +1734 -0
- truthound/profiler/quality.py +1345 -0
- truthound/profiler/resilience.py +1180 -0
- truthound/profiler/sampled_matcher.py +794 -0
- truthound/profiler/sampling.py +1288 -0
- truthound/profiler/scheduling/__init__.py +82 -0
- truthound/profiler/scheduling/protocols.py +214 -0
- truthound/profiler/scheduling/scheduler.py +474 -0
- truthound/profiler/scheduling/storage.py +457 -0
- truthound/profiler/scheduling/triggers.py +449 -0
- truthound/profiler/schema.py +603 -0
- truthound/profiler/streaming.py +685 -0
- truthound/profiler/streaming_patterns.py +1354 -0
- truthound/profiler/suite_cli.py +625 -0
- truthound/profiler/suite_config.py +789 -0
- truthound/profiler/suite_export.py +1268 -0
- truthound/profiler/table_profiler.py +547 -0
- truthound/profiler/timeout.py +565 -0
- truthound/profiler/validation.py +1532 -0
- truthound/profiler/visualization/__init__.py +118 -0
- truthound/profiler/visualization/base.py +346 -0
- truthound/profiler/visualization/generator.py +1259 -0
- truthound/profiler/visualization/plotly_renderer.py +811 -0
- truthound/profiler/visualization/renderers.py +669 -0
- truthound/profiler/visualization/sections.py +540 -0
- truthound/profiler/visualization.py +2122 -0
- truthound/profiler/yaml_validation.py +1151 -0
- truthound/py.typed +0 -0
- truthound/ratelimit/__init__.py +248 -0
- truthound/ratelimit/algorithms.py +1108 -0
- truthound/ratelimit/core.py +573 -0
- truthound/ratelimit/integration.py +532 -0
- truthound/ratelimit/limiter.py +663 -0
- truthound/ratelimit/middleware.py +700 -0
- truthound/ratelimit/policy.py +792 -0
- truthound/ratelimit/storage.py +763 -0
- truthound/rbac/__init__.py +340 -0
- truthound/rbac/core.py +976 -0
- truthound/rbac/integration.py +760 -0
- truthound/rbac/manager.py +1052 -0
- truthound/rbac/middleware.py +842 -0
- truthound/rbac/policy.py +954 -0
- truthound/rbac/storage.py +878 -0
- truthound/realtime/__init__.py +141 -0
- truthound/realtime/adapters/__init__.py +43 -0
- truthound/realtime/adapters/base.py +533 -0
- truthound/realtime/adapters/kafka.py +487 -0
- truthound/realtime/adapters/kinesis.py +479 -0
- truthound/realtime/adapters/mock.py +243 -0
- truthound/realtime/base.py +553 -0
- truthound/realtime/factory.py +382 -0
- truthound/realtime/incremental.py +660 -0
- truthound/realtime/processing/__init__.py +67 -0
- truthound/realtime/processing/exactly_once.py +575 -0
- truthound/realtime/processing/state.py +547 -0
- truthound/realtime/processing/windows.py +647 -0
- truthound/realtime/protocols.py +569 -0
- truthound/realtime/streaming.py +605 -0
- truthound/realtime/testing/__init__.py +32 -0
- truthound/realtime/testing/containers.py +615 -0
- truthound/realtime/testing/fixtures.py +484 -0
- truthound/report.py +280 -0
- truthound/reporters/__init__.py +46 -0
- truthound/reporters/_protocols.py +30 -0
- truthound/reporters/base.py +324 -0
- truthound/reporters/ci/__init__.py +66 -0
- truthound/reporters/ci/azure.py +436 -0
- truthound/reporters/ci/base.py +509 -0
- truthound/reporters/ci/bitbucket.py +567 -0
- truthound/reporters/ci/circleci.py +547 -0
- truthound/reporters/ci/detection.py +364 -0
- truthound/reporters/ci/factory.py +182 -0
- truthound/reporters/ci/github.py +388 -0
- truthound/reporters/ci/gitlab.py +471 -0
- truthound/reporters/ci/jenkins.py +525 -0
- truthound/reporters/console_reporter.py +299 -0
- truthound/reporters/factory.py +211 -0
- truthound/reporters/html_reporter.py +524 -0
- truthound/reporters/json_reporter.py +256 -0
- truthound/reporters/markdown_reporter.py +280 -0
- truthound/reporters/sdk/__init__.py +174 -0
- truthound/reporters/sdk/builder.py +558 -0
- truthound/reporters/sdk/mixins.py +1150 -0
- truthound/reporters/sdk/schema.py +1493 -0
- truthound/reporters/sdk/templates.py +666 -0
- truthound/reporters/sdk/testing.py +968 -0
- truthound/scanners.py +170 -0
- truthound/scheduling/__init__.py +122 -0
- truthound/scheduling/cron.py +1136 -0
- truthound/scheduling/presets.py +212 -0
- truthound/schema.py +275 -0
- truthound/secrets/__init__.py +173 -0
- truthound/secrets/base.py +618 -0
- truthound/secrets/cloud.py +682 -0
- truthound/secrets/integration.py +507 -0
- truthound/secrets/manager.py +633 -0
- truthound/secrets/oidc/__init__.py +172 -0
- truthound/secrets/oidc/base.py +902 -0
- truthound/secrets/oidc/credential_provider.py +623 -0
- truthound/secrets/oidc/exchangers.py +1001 -0
- truthound/secrets/oidc/github/__init__.py +110 -0
- truthound/secrets/oidc/github/claims.py +718 -0
- truthound/secrets/oidc/github/enhanced_provider.py +693 -0
- truthound/secrets/oidc/github/trust_policy.py +742 -0
- truthound/secrets/oidc/github/verification.py +723 -0
- truthound/secrets/oidc/github/workflow.py +691 -0
- truthound/secrets/oidc/providers.py +825 -0
- truthound/secrets/providers.py +506 -0
- truthound/secrets/resolver.py +495 -0
- truthound/stores/__init__.py +177 -0
- truthound/stores/backends/__init__.py +18 -0
- truthound/stores/backends/_protocols.py +340 -0
- truthound/stores/backends/azure_blob.py +530 -0
- truthound/stores/backends/concurrent_filesystem.py +915 -0
- truthound/stores/backends/connection_pool.py +1365 -0
- truthound/stores/backends/database.py +743 -0
- truthound/stores/backends/filesystem.py +538 -0
- truthound/stores/backends/gcs.py +399 -0
- truthound/stores/backends/memory.py +354 -0
- truthound/stores/backends/s3.py +434 -0
- truthound/stores/backpressure/__init__.py +84 -0
- truthound/stores/backpressure/base.py +375 -0
- truthound/stores/backpressure/circuit_breaker.py +434 -0
- truthound/stores/backpressure/monitor.py +376 -0
- truthound/stores/backpressure/strategies.py +677 -0
- truthound/stores/base.py +551 -0
- truthound/stores/batching/__init__.py +65 -0
- truthound/stores/batching/base.py +305 -0
- truthound/stores/batching/buffer.py +370 -0
- truthound/stores/batching/store.py +248 -0
- truthound/stores/batching/writer.py +521 -0
- truthound/stores/caching/__init__.py +60 -0
- truthound/stores/caching/backends.py +684 -0
- truthound/stores/caching/base.py +356 -0
- truthound/stores/caching/store.py +305 -0
- truthound/stores/compression/__init__.py +193 -0
- truthound/stores/compression/adaptive.py +694 -0
- truthound/stores/compression/base.py +514 -0
- truthound/stores/compression/pipeline.py +868 -0
- truthound/stores/compression/providers.py +672 -0
- truthound/stores/compression/streaming.py +832 -0
- truthound/stores/concurrency/__init__.py +81 -0
- truthound/stores/concurrency/atomic.py +556 -0
- truthound/stores/concurrency/index.py +775 -0
- truthound/stores/concurrency/locks.py +576 -0
- truthound/stores/concurrency/manager.py +482 -0
- truthound/stores/encryption/__init__.py +297 -0
- truthound/stores/encryption/base.py +952 -0
- truthound/stores/encryption/keys.py +1191 -0
- truthound/stores/encryption/pipeline.py +903 -0
- truthound/stores/encryption/providers.py +953 -0
- truthound/stores/encryption/streaming.py +950 -0
- truthound/stores/expectations.py +227 -0
- truthound/stores/factory.py +246 -0
- truthound/stores/migration/__init__.py +75 -0
- truthound/stores/migration/base.py +480 -0
- truthound/stores/migration/manager.py +347 -0
- truthound/stores/migration/registry.py +382 -0
- truthound/stores/migration/store.py +559 -0
- truthound/stores/observability/__init__.py +106 -0
- truthound/stores/observability/audit.py +718 -0
- truthound/stores/observability/config.py +270 -0
- truthound/stores/observability/factory.py +208 -0
- truthound/stores/observability/metrics.py +636 -0
- truthound/stores/observability/protocols.py +410 -0
- truthound/stores/observability/store.py +570 -0
- truthound/stores/observability/tracing.py +784 -0
- truthound/stores/replication/__init__.py +76 -0
- truthound/stores/replication/base.py +260 -0
- truthound/stores/replication/monitor.py +269 -0
- truthound/stores/replication/store.py +439 -0
- truthound/stores/replication/syncer.py +391 -0
- truthound/stores/results.py +359 -0
- truthound/stores/retention/__init__.py +77 -0
- truthound/stores/retention/base.py +378 -0
- truthound/stores/retention/policies.py +621 -0
- truthound/stores/retention/scheduler.py +279 -0
- truthound/stores/retention/store.py +526 -0
- truthound/stores/streaming/__init__.py +138 -0
- truthound/stores/streaming/base.py +801 -0
- truthound/stores/streaming/database.py +984 -0
- truthound/stores/streaming/filesystem.py +719 -0
- truthound/stores/streaming/reader.py +629 -0
- truthound/stores/streaming/s3.py +843 -0
- truthound/stores/streaming/writer.py +790 -0
- truthound/stores/tiering/__init__.py +108 -0
- truthound/stores/tiering/base.py +462 -0
- truthound/stores/tiering/manager.py +249 -0
- truthound/stores/tiering/policies.py +692 -0
- truthound/stores/tiering/store.py +526 -0
- truthound/stores/versioning/__init__.py +56 -0
- truthound/stores/versioning/base.py +376 -0
- truthound/stores/versioning/store.py +660 -0
- truthound/stores/versioning/strategies.py +353 -0
- truthound/types.py +56 -0
- truthound/validators/__init__.py +774 -0
- truthound/validators/aggregate/__init__.py +27 -0
- truthound/validators/aggregate/central.py +116 -0
- truthound/validators/aggregate/extremes.py +116 -0
- truthound/validators/aggregate/spread.py +118 -0
- truthound/validators/aggregate/sum.py +64 -0
- truthound/validators/aggregate/type.py +78 -0
- truthound/validators/anomaly/__init__.py +93 -0
- truthound/validators/anomaly/base.py +431 -0
- truthound/validators/anomaly/ml_based.py +1190 -0
- truthound/validators/anomaly/multivariate.py +647 -0
- truthound/validators/anomaly/statistical.py +599 -0
- truthound/validators/base.py +1089 -0
- truthound/validators/business_rule/__init__.py +46 -0
- truthound/validators/business_rule/base.py +147 -0
- truthound/validators/business_rule/checksum.py +509 -0
- truthound/validators/business_rule/financial.py +526 -0
- truthound/validators/cache.py +733 -0
- truthound/validators/completeness/__init__.py +39 -0
- truthound/validators/completeness/conditional.py +73 -0
- truthound/validators/completeness/default.py +98 -0
- truthound/validators/completeness/empty.py +103 -0
- truthound/validators/completeness/nan.py +337 -0
- truthound/validators/completeness/null.py +152 -0
- truthound/validators/cross_table/__init__.py +17 -0
- truthound/validators/cross_table/aggregate.py +333 -0
- truthound/validators/cross_table/row_count.py +122 -0
- truthound/validators/datetime/__init__.py +29 -0
- truthound/validators/datetime/format.py +78 -0
- truthound/validators/datetime/freshness.py +269 -0
- truthound/validators/datetime/order.py +73 -0
- truthound/validators/datetime/parseable.py +185 -0
- truthound/validators/datetime/range.py +202 -0
- truthound/validators/datetime/timezone.py +69 -0
- truthound/validators/distribution/__init__.py +49 -0
- truthound/validators/distribution/distribution.py +128 -0
- truthound/validators/distribution/monotonic.py +119 -0
- truthound/validators/distribution/outlier.py +178 -0
- truthound/validators/distribution/quantile.py +80 -0
- truthound/validators/distribution/range.py +254 -0
- truthound/validators/distribution/set.py +125 -0
- truthound/validators/distribution/statistical.py +459 -0
- truthound/validators/drift/__init__.py +79 -0
- truthound/validators/drift/base.py +427 -0
- truthound/validators/drift/multi_feature.py +401 -0
- truthound/validators/drift/numeric.py +395 -0
- truthound/validators/drift/psi.py +446 -0
- truthound/validators/drift/statistical.py +510 -0
- truthound/validators/enterprise.py +1658 -0
- truthound/validators/geospatial/__init__.py +80 -0
- truthound/validators/geospatial/base.py +97 -0
- truthound/validators/geospatial/boundary.py +238 -0
- truthound/validators/geospatial/coordinate.py +351 -0
- truthound/validators/geospatial/distance.py +399 -0
- truthound/validators/geospatial/polygon.py +665 -0
- truthound/validators/i18n/__init__.py +308 -0
- truthound/validators/i18n/bidi.py +571 -0
- truthound/validators/i18n/catalogs.py +570 -0
- truthound/validators/i18n/dialects.py +763 -0
- truthound/validators/i18n/extended_catalogs.py +549 -0
- truthound/validators/i18n/formatting.py +1434 -0
- truthound/validators/i18n/loader.py +1020 -0
- truthound/validators/i18n/messages.py +521 -0
- truthound/validators/i18n/plural.py +683 -0
- truthound/validators/i18n/protocols.py +855 -0
- truthound/validators/i18n/tms.py +1162 -0
- truthound/validators/localization/__init__.py +53 -0
- truthound/validators/localization/base.py +122 -0
- truthound/validators/localization/chinese.py +362 -0
- truthound/validators/localization/japanese.py +275 -0
- truthound/validators/localization/korean.py +524 -0
- truthound/validators/memory/__init__.py +94 -0
- truthound/validators/memory/approximate_knn.py +506 -0
- truthound/validators/memory/base.py +547 -0
- truthound/validators/memory/sgd_online.py +719 -0
- truthound/validators/memory/streaming_ecdf.py +753 -0
- truthound/validators/ml_feature/__init__.py +54 -0
- truthound/validators/ml_feature/base.py +249 -0
- truthound/validators/ml_feature/correlation.py +299 -0
- truthound/validators/ml_feature/leakage.py +344 -0
- truthound/validators/ml_feature/null_impact.py +270 -0
- truthound/validators/ml_feature/scale.py +264 -0
- truthound/validators/multi_column/__init__.py +89 -0
- truthound/validators/multi_column/arithmetic.py +284 -0
- truthound/validators/multi_column/base.py +231 -0
- truthound/validators/multi_column/comparison.py +273 -0
- truthound/validators/multi_column/consistency.py +312 -0
- truthound/validators/multi_column/statistical.py +299 -0
- truthound/validators/optimization/__init__.py +164 -0
- truthound/validators/optimization/aggregation.py +563 -0
- truthound/validators/optimization/covariance.py +556 -0
- truthound/validators/optimization/geo.py +626 -0
- truthound/validators/optimization/graph.py +587 -0
- truthound/validators/optimization/orchestrator.py +970 -0
- truthound/validators/optimization/profiling.py +1312 -0
- truthound/validators/privacy/__init__.py +223 -0
- truthound/validators/privacy/base.py +635 -0
- truthound/validators/privacy/ccpa.py +670 -0
- truthound/validators/privacy/gdpr.py +728 -0
- truthound/validators/privacy/global_patterns.py +604 -0
- truthound/validators/privacy/plugins.py +867 -0
- truthound/validators/profiling/__init__.py +52 -0
- truthound/validators/profiling/base.py +175 -0
- truthound/validators/profiling/cardinality.py +312 -0
- truthound/validators/profiling/entropy.py +391 -0
- truthound/validators/profiling/frequency.py +455 -0
- truthound/validators/pushdown_support.py +660 -0
- truthound/validators/query/__init__.py +91 -0
- truthound/validators/query/aggregate.py +346 -0
- truthound/validators/query/base.py +246 -0
- truthound/validators/query/column.py +249 -0
- truthound/validators/query/expression.py +274 -0
- truthound/validators/query/result.py +323 -0
- truthound/validators/query/row_count.py +264 -0
- truthound/validators/referential/__init__.py +80 -0
- truthound/validators/referential/base.py +395 -0
- truthound/validators/referential/cascade.py +391 -0
- truthound/validators/referential/circular.py +563 -0
- truthound/validators/referential/foreign_key.py +624 -0
- truthound/validators/referential/orphan.py +485 -0
- truthound/validators/registry.py +112 -0
- truthound/validators/schema/__init__.py +41 -0
- truthound/validators/schema/column_count.py +142 -0
- truthound/validators/schema/column_exists.py +80 -0
- truthound/validators/schema/column_order.py +82 -0
- truthound/validators/schema/column_pair.py +85 -0
- truthound/validators/schema/column_pair_set.py +195 -0
- truthound/validators/schema/column_type.py +94 -0
- truthound/validators/schema/multi_column.py +53 -0
- truthound/validators/schema/multi_column_aggregate.py +175 -0
- truthound/validators/schema/referential.py +274 -0
- truthound/validators/schema/table_schema.py +91 -0
- truthound/validators/schema_validator.py +219 -0
- truthound/validators/sdk/__init__.py +250 -0
- truthound/validators/sdk/builder.py +680 -0
- truthound/validators/sdk/decorators.py +474 -0
- truthound/validators/sdk/enterprise/__init__.py +211 -0
- truthound/validators/sdk/enterprise/docs.py +725 -0
- truthound/validators/sdk/enterprise/fuzzing.py +659 -0
- truthound/validators/sdk/enterprise/licensing.py +709 -0
- truthound/validators/sdk/enterprise/manager.py +543 -0
- truthound/validators/sdk/enterprise/resources.py +628 -0
- truthound/validators/sdk/enterprise/sandbox.py +766 -0
- truthound/validators/sdk/enterprise/signing.py +603 -0
- truthound/validators/sdk/enterprise/templates.py +865 -0
- truthound/validators/sdk/enterprise/versioning.py +659 -0
- truthound/validators/sdk/templates.py +757 -0
- truthound/validators/sdk/testing.py +807 -0
- truthound/validators/security/__init__.py +181 -0
- truthound/validators/security/redos/__init__.py +182 -0
- truthound/validators/security/redos/core.py +861 -0
- truthound/validators/security/redos/cpu_monitor.py +593 -0
- truthound/validators/security/redos/cve_database.py +791 -0
- truthound/validators/security/redos/ml/__init__.py +155 -0
- truthound/validators/security/redos/ml/base.py +785 -0
- truthound/validators/security/redos/ml/datasets.py +618 -0
- truthound/validators/security/redos/ml/features.py +359 -0
- truthound/validators/security/redos/ml/models.py +1000 -0
- truthound/validators/security/redos/ml/predictor.py +507 -0
- truthound/validators/security/redos/ml/storage.py +632 -0
- truthound/validators/security/redos/ml/training.py +571 -0
- truthound/validators/security/redos/ml_analyzer.py +937 -0
- truthound/validators/security/redos/optimizer.py +674 -0
- truthound/validators/security/redos/profiler.py +682 -0
- truthound/validators/security/redos/re2_engine.py +709 -0
- truthound/validators/security/redos.py +886 -0
- truthound/validators/security/sql_security.py +1247 -0
- truthound/validators/streaming/__init__.py +126 -0
- truthound/validators/streaming/base.py +292 -0
- truthound/validators/streaming/completeness.py +210 -0
- truthound/validators/streaming/mixin.py +575 -0
- truthound/validators/streaming/range.py +308 -0
- truthound/validators/streaming/sources.py +846 -0
- truthound/validators/string/__init__.py +57 -0
- truthound/validators/string/casing.py +158 -0
- truthound/validators/string/charset.py +96 -0
- truthound/validators/string/format.py +501 -0
- truthound/validators/string/json.py +77 -0
- truthound/validators/string/json_schema.py +184 -0
- truthound/validators/string/length.py +104 -0
- truthound/validators/string/like_pattern.py +237 -0
- truthound/validators/string/regex.py +202 -0
- truthound/validators/string/regex_extended.py +435 -0
- truthound/validators/table/__init__.py +88 -0
- truthound/validators/table/base.py +78 -0
- truthound/validators/table/column_count.py +198 -0
- truthound/validators/table/freshness.py +362 -0
- truthound/validators/table/row_count.py +251 -0
- truthound/validators/table/schema.py +333 -0
- truthound/validators/table/size.py +285 -0
- truthound/validators/timeout/__init__.py +102 -0
- truthound/validators/timeout/advanced/__init__.py +247 -0
- truthound/validators/timeout/advanced/circuit_breaker.py +675 -0
- truthound/validators/timeout/advanced/prediction.py +773 -0
- truthound/validators/timeout/advanced/priority.py +618 -0
- truthound/validators/timeout/advanced/redis_backend.py +770 -0
- truthound/validators/timeout/advanced/retry.py +721 -0
- truthound/validators/timeout/advanced/sampling.py +788 -0
- truthound/validators/timeout/advanced/sla.py +661 -0
- truthound/validators/timeout/advanced/telemetry.py +804 -0
- truthound/validators/timeout/cascade.py +477 -0
- truthound/validators/timeout/deadline.py +657 -0
- truthound/validators/timeout/degradation.py +525 -0
- truthound/validators/timeout/distributed.py +597 -0
- truthound/validators/timeseries/__init__.py +89 -0
- truthound/validators/timeseries/base.py +326 -0
- truthound/validators/timeseries/completeness.py +617 -0
- truthound/validators/timeseries/gap.py +485 -0
- truthound/validators/timeseries/monotonic.py +310 -0
- truthound/validators/timeseries/seasonality.py +422 -0
- truthound/validators/timeseries/trend.py +510 -0
- truthound/validators/uniqueness/__init__.py +59 -0
- truthound/validators/uniqueness/approximate.py +475 -0
- truthound/validators/uniqueness/distinct_values.py +253 -0
- truthound/validators/uniqueness/duplicate.py +118 -0
- truthound/validators/uniqueness/primary_key.py +140 -0
- truthound/validators/uniqueness/unique.py +191 -0
- truthound/validators/uniqueness/within_record.py +599 -0
- truthound/validators/utils.py +756 -0
- truthound-1.0.8.dist-info/METADATA +474 -0
- truthound-1.0.8.dist-info/RECORD +877 -0
- truthound-1.0.8.dist-info/WHEEL +4 -0
- truthound-1.0.8.dist-info/entry_points.txt +2 -0
- truthound-1.0.8.dist-info/licenses/LICENSE +190 -0
|
@@ -0,0 +1,1247 @@
|
|
|
1
|
+
"""Comprehensive SQL security module.
|
|
2
|
+
|
|
3
|
+
Provides extensible SQL injection protection with:
|
|
4
|
+
- Multi-level security policies
|
|
5
|
+
- Parameterized query support
|
|
6
|
+
- Whitelist-based validation
|
|
7
|
+
- Query audit logging
|
|
8
|
+
- Pluggable security rules
|
|
9
|
+
|
|
10
|
+
Security Levels:
|
|
11
|
+
STRICT: Maximum security, minimal allowed operations
|
|
12
|
+
STANDARD: Balanced security for typical use cases (default)
|
|
13
|
+
PERMISSIVE: Relaxed security for trusted environments
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
from __future__ import annotations
|
|
17
|
+
|
|
18
|
+
import hashlib
|
|
19
|
+
import logging
|
|
20
|
+
import re
|
|
21
|
+
from abc import ABC, abstractmethod
|
|
22
|
+
from dataclasses import dataclass, field
|
|
23
|
+
from datetime import datetime
|
|
24
|
+
from enum import Enum, auto
|
|
25
|
+
from typing import Any, Callable, Iterator
|
|
26
|
+
|
|
27
|
+
import polars as pl
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
# =============================================================================
|
|
31
|
+
# Exceptions
|
|
32
|
+
# =============================================================================
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class SQLSecurityError(Exception):
|
|
36
|
+
"""Base exception for SQL security issues."""
|
|
37
|
+
|
|
38
|
+
pass
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class SQLInjectionError(SQLSecurityError):
|
|
42
|
+
"""Raised when potential SQL injection is detected."""
|
|
43
|
+
|
|
44
|
+
def __init__(self, message: str, pattern: str | None = None, query: str | None = None):
|
|
45
|
+
super().__init__(message)
|
|
46
|
+
self.pattern = pattern
|
|
47
|
+
self.query = query[:100] + "..." if query and len(query) > 100 else query
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
class QueryValidationError(SQLSecurityError):
|
|
51
|
+
"""Raised when query validation fails."""
|
|
52
|
+
|
|
53
|
+
pass
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
# =============================================================================
|
|
57
|
+
# Security Levels and Policies
|
|
58
|
+
# =============================================================================
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
class SecurityLevel(Enum):
|
|
62
|
+
"""Security level presets."""
|
|
63
|
+
|
|
64
|
+
STRICT = auto() # Maximum security
|
|
65
|
+
STANDARD = auto() # Balanced (default)
|
|
66
|
+
PERMISSIVE = auto() # Relaxed for trusted environments
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
@dataclass
|
|
70
|
+
class SecurityPolicy:
|
|
71
|
+
"""Configurable security policy for SQL validation.
|
|
72
|
+
|
|
73
|
+
Defines what operations are allowed and blocked.
|
|
74
|
+
|
|
75
|
+
Example:
|
|
76
|
+
# Custom policy for analytics queries
|
|
77
|
+
policy = SecurityPolicy(
|
|
78
|
+
level=SecurityLevel.STANDARD,
|
|
79
|
+
max_query_length=20000,
|
|
80
|
+
allow_joins=True,
|
|
81
|
+
allow_subqueries=True,
|
|
82
|
+
allow_aggregations=True,
|
|
83
|
+
blocked_functions=["SLEEP", "BENCHMARK", "LOAD_FILE"],
|
|
84
|
+
)
|
|
85
|
+
"""
|
|
86
|
+
|
|
87
|
+
# Basic settings
|
|
88
|
+
level: SecurityLevel = SecurityLevel.STANDARD
|
|
89
|
+
max_query_length: int = 10000
|
|
90
|
+
max_identifier_length: int = 128
|
|
91
|
+
|
|
92
|
+
# Structural permissions
|
|
93
|
+
allow_joins: bool = True
|
|
94
|
+
allow_subqueries: bool = True
|
|
95
|
+
allow_aggregations: bool = True
|
|
96
|
+
allow_window_functions: bool = True
|
|
97
|
+
allow_cte: bool = True # Common Table Expressions (WITH clause)
|
|
98
|
+
allow_union: bool = False # UNION can be used for injection
|
|
99
|
+
|
|
100
|
+
# Statement types
|
|
101
|
+
allowed_statements: set[str] = field(
|
|
102
|
+
default_factory=lambda: {"SELECT", "WITH"}
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
# Blocked patterns (regex)
|
|
106
|
+
blocked_patterns: list[str] = field(default_factory=list)
|
|
107
|
+
|
|
108
|
+
# Blocked SQL functions
|
|
109
|
+
blocked_functions: list[str] = field(
|
|
110
|
+
default_factory=lambda: [
|
|
111
|
+
"SLEEP",
|
|
112
|
+
"BENCHMARK",
|
|
113
|
+
"LOAD_FILE",
|
|
114
|
+
"INTO OUTFILE",
|
|
115
|
+
"INTO DUMPFILE",
|
|
116
|
+
]
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
# Allowed tables/columns (if empty, all are allowed)
|
|
120
|
+
allowed_tables: set[str] = field(default_factory=set)
|
|
121
|
+
allowed_columns: set[str] = field(default_factory=set)
|
|
122
|
+
|
|
123
|
+
# Callbacks
|
|
124
|
+
on_violation: Callable[[str, str], None] | None = None
|
|
125
|
+
|
|
126
|
+
@classmethod
|
|
127
|
+
def strict(cls) -> "SecurityPolicy":
|
|
128
|
+
"""Create a strict security policy."""
|
|
129
|
+
return cls(
|
|
130
|
+
level=SecurityLevel.STRICT,
|
|
131
|
+
max_query_length=5000,
|
|
132
|
+
allow_joins=False,
|
|
133
|
+
allow_subqueries=False,
|
|
134
|
+
allow_union=False,
|
|
135
|
+
allow_cte=False,
|
|
136
|
+
)
|
|
137
|
+
|
|
138
|
+
@classmethod
|
|
139
|
+
def standard(cls) -> "SecurityPolicy":
|
|
140
|
+
"""Create a standard security policy."""
|
|
141
|
+
return cls(level=SecurityLevel.STANDARD)
|
|
142
|
+
|
|
143
|
+
@classmethod
|
|
144
|
+
def permissive(cls) -> "SecurityPolicy":
|
|
145
|
+
"""Create a permissive security policy."""
|
|
146
|
+
return cls(
|
|
147
|
+
level=SecurityLevel.PERMISSIVE,
|
|
148
|
+
max_query_length=50000,
|
|
149
|
+
allow_joins=True,
|
|
150
|
+
allow_subqueries=True,
|
|
151
|
+
allow_union=True,
|
|
152
|
+
allow_cte=True,
|
|
153
|
+
)
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
# =============================================================================
|
|
157
|
+
# Pattern-based Validation
|
|
158
|
+
# =============================================================================
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
@dataclass
|
|
162
|
+
class DangerousPattern:
|
|
163
|
+
"""A dangerous SQL pattern to detect."""
|
|
164
|
+
|
|
165
|
+
name: str
|
|
166
|
+
pattern: str
|
|
167
|
+
severity: str = "HIGH" # HIGH, MEDIUM, LOW
|
|
168
|
+
description: str = ""
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
class PatternRegistry:
|
|
172
|
+
"""Registry of dangerous SQL patterns.
|
|
173
|
+
|
|
174
|
+
Extensible registry for SQL injection patterns.
|
|
175
|
+
|
|
176
|
+
Example:
|
|
177
|
+
registry = PatternRegistry()
|
|
178
|
+
registry.register(DangerousPattern(
|
|
179
|
+
name="time_based_injection",
|
|
180
|
+
pattern=r"WAITFOR\s+DELAY",
|
|
181
|
+
severity="HIGH",
|
|
182
|
+
description="Time-based SQL injection"
|
|
183
|
+
))
|
|
184
|
+
"""
|
|
185
|
+
|
|
186
|
+
# Default dangerous patterns
|
|
187
|
+
DEFAULT_PATTERNS = [
|
|
188
|
+
# DDL statements
|
|
189
|
+
DangerousPattern(
|
|
190
|
+
"ddl_create",
|
|
191
|
+
r"\b(CREATE)\s+(TABLE|DATABASE|INDEX|VIEW|SCHEMA|PROCEDURE|FUNCTION)\b",
|
|
192
|
+
"HIGH",
|
|
193
|
+
"DDL CREATE statement",
|
|
194
|
+
),
|
|
195
|
+
DangerousPattern(
|
|
196
|
+
"ddl_alter",
|
|
197
|
+
r"\b(ALTER)\s+(TABLE|DATABASE|INDEX|VIEW|SCHEMA)\b",
|
|
198
|
+
"HIGH",
|
|
199
|
+
"DDL ALTER statement",
|
|
200
|
+
),
|
|
201
|
+
DangerousPattern(
|
|
202
|
+
"ddl_drop",
|
|
203
|
+
r"\b(DROP)\s+(TABLE|DATABASE|INDEX|VIEW|SCHEMA)\b",
|
|
204
|
+
"HIGH",
|
|
205
|
+
"DDL DROP statement",
|
|
206
|
+
),
|
|
207
|
+
DangerousPattern(
|
|
208
|
+
"ddl_truncate",
|
|
209
|
+
r"\bTRUNCATE\s+TABLE\b",
|
|
210
|
+
"HIGH",
|
|
211
|
+
"DDL TRUNCATE statement",
|
|
212
|
+
),
|
|
213
|
+
# DCL statements
|
|
214
|
+
DangerousPattern(
|
|
215
|
+
"dcl_grant",
|
|
216
|
+
r"\b(GRANT|REVOKE|DENY)\b",
|
|
217
|
+
"HIGH",
|
|
218
|
+
"DCL statement",
|
|
219
|
+
),
|
|
220
|
+
# DML modification
|
|
221
|
+
DangerousPattern(
|
|
222
|
+
"dml_insert",
|
|
223
|
+
r"\bINSERT\s+INTO\b",
|
|
224
|
+
"HIGH",
|
|
225
|
+
"INSERT statement",
|
|
226
|
+
),
|
|
227
|
+
DangerousPattern(
|
|
228
|
+
"dml_update",
|
|
229
|
+
r"\bUPDATE\s+\w+\s+SET\b",
|
|
230
|
+
"HIGH",
|
|
231
|
+
"UPDATE statement",
|
|
232
|
+
),
|
|
233
|
+
DangerousPattern(
|
|
234
|
+
"dml_delete",
|
|
235
|
+
r"\bDELETE\s+FROM\b",
|
|
236
|
+
"HIGH",
|
|
237
|
+
"DELETE statement",
|
|
238
|
+
),
|
|
239
|
+
# Transaction control
|
|
240
|
+
DangerousPattern(
|
|
241
|
+
"transaction",
|
|
242
|
+
r"\b(COMMIT|ROLLBACK|SAVEPOINT|BEGIN\s+TRANSACTION)\b",
|
|
243
|
+
"MEDIUM",
|
|
244
|
+
"Transaction control",
|
|
245
|
+
),
|
|
246
|
+
# System/Exec
|
|
247
|
+
DangerousPattern(
|
|
248
|
+
"exec",
|
|
249
|
+
r"\b(EXEC|EXECUTE|CALL)\s*\(",
|
|
250
|
+
"HIGH",
|
|
251
|
+
"Execute/call statement",
|
|
252
|
+
),
|
|
253
|
+
# File operations
|
|
254
|
+
DangerousPattern(
|
|
255
|
+
"file_ops",
|
|
256
|
+
r"\b(LOAD_FILE|INTO\s+OUTFILE|INTO\s+DUMPFILE)\b",
|
|
257
|
+
"HIGH",
|
|
258
|
+
"File operation",
|
|
259
|
+
),
|
|
260
|
+
# Comment injection
|
|
261
|
+
DangerousPattern(
|
|
262
|
+
"line_comment",
|
|
263
|
+
r"--\s*$",
|
|
264
|
+
"MEDIUM",
|
|
265
|
+
"Line comment at end (potential injection)",
|
|
266
|
+
),
|
|
267
|
+
DangerousPattern(
|
|
268
|
+
"block_comment",
|
|
269
|
+
r"/\*[^*]*\*+(?:[^/*][^*]*\*+)*/",
|
|
270
|
+
"LOW",
|
|
271
|
+
"Block comment",
|
|
272
|
+
),
|
|
273
|
+
# Stacked queries
|
|
274
|
+
DangerousPattern(
|
|
275
|
+
"stacked_query",
|
|
276
|
+
r";\s*(SELECT|INSERT|UPDATE|DELETE|DROP|CREATE|ALTER|EXEC)",
|
|
277
|
+
"HIGH",
|
|
278
|
+
"Stacked query",
|
|
279
|
+
),
|
|
280
|
+
# Union injection
|
|
281
|
+
DangerousPattern(
|
|
282
|
+
"union_select",
|
|
283
|
+
r"\bUNION\s+(ALL\s+)?SELECT\b",
|
|
284
|
+
"MEDIUM",
|
|
285
|
+
"UNION SELECT (potential injection)",
|
|
286
|
+
),
|
|
287
|
+
# Time-based injection
|
|
288
|
+
DangerousPattern(
|
|
289
|
+
"sleep",
|
|
290
|
+
r"\b(SLEEP|WAITFOR\s+DELAY|BENCHMARK)\s*\(",
|
|
291
|
+
"HIGH",
|
|
292
|
+
"Time-based injection",
|
|
293
|
+
),
|
|
294
|
+
# Error-based injection
|
|
295
|
+
DangerousPattern(
|
|
296
|
+
"extractvalue",
|
|
297
|
+
r"\b(EXTRACTVALUE|UPDATEXML|EXP|FLOOR\s*\(\s*RAND)\b",
|
|
298
|
+
"MEDIUM",
|
|
299
|
+
"Error-based injection function",
|
|
300
|
+
),
|
|
301
|
+
# Boolean-based injection patterns
|
|
302
|
+
DangerousPattern(
|
|
303
|
+
"always_true",
|
|
304
|
+
r"(?:OR|AND)\s+['\"0-9]+\s*=\s*['\"0-9]+",
|
|
305
|
+
"MEDIUM",
|
|
306
|
+
"Always true/false condition",
|
|
307
|
+
),
|
|
308
|
+
DangerousPattern(
|
|
309
|
+
"or_1_eq_1",
|
|
310
|
+
r"\bOR\s+1\s*=\s*1\b",
|
|
311
|
+
"HIGH",
|
|
312
|
+
"Classic OR 1=1 injection",
|
|
313
|
+
),
|
|
314
|
+
]
|
|
315
|
+
|
|
316
|
+
def __init__(self) -> None:
|
|
317
|
+
self._patterns: list[DangerousPattern] = []
|
|
318
|
+
self._compiled: list[tuple[DangerousPattern, re.Pattern]] = []
|
|
319
|
+
|
|
320
|
+
# Register default patterns
|
|
321
|
+
for pattern in self.DEFAULT_PATTERNS:
|
|
322
|
+
self.register(pattern)
|
|
323
|
+
|
|
324
|
+
def register(self, pattern: DangerousPattern) -> None:
|
|
325
|
+
"""Register a new dangerous pattern."""
|
|
326
|
+
self._patterns.append(pattern)
|
|
327
|
+
compiled = re.compile(pattern.pattern, re.IGNORECASE | re.MULTILINE)
|
|
328
|
+
self._compiled.append((pattern, compiled))
|
|
329
|
+
|
|
330
|
+
def unregister(self, name: str) -> bool:
|
|
331
|
+
"""Unregister a pattern by name."""
|
|
332
|
+
for i, p in enumerate(self._patterns):
|
|
333
|
+
if p.name == name:
|
|
334
|
+
del self._patterns[i]
|
|
335
|
+
del self._compiled[i]
|
|
336
|
+
return True
|
|
337
|
+
return False
|
|
338
|
+
|
|
339
|
+
def check(self, query: str) -> list[tuple[DangerousPattern, str]]:
|
|
340
|
+
"""Check query against all patterns.
|
|
341
|
+
|
|
342
|
+
Returns:
|
|
343
|
+
List of (pattern, matched_text) tuples
|
|
344
|
+
"""
|
|
345
|
+
matches = []
|
|
346
|
+
for pattern, compiled in self._compiled:
|
|
347
|
+
match = compiled.search(query)
|
|
348
|
+
if match:
|
|
349
|
+
matches.append((pattern, match.group()))
|
|
350
|
+
return matches
|
|
351
|
+
|
|
352
|
+
def __iter__(self) -> Iterator[DangerousPattern]:
|
|
353
|
+
return iter(self._patterns)
|
|
354
|
+
|
|
355
|
+
|
|
356
|
+
# =============================================================================
|
|
357
|
+
# Core SQL Validator
|
|
358
|
+
# =============================================================================
|
|
359
|
+
|
|
360
|
+
|
|
361
|
+
class SQLQueryValidator:
|
|
362
|
+
"""Enhanced SQL query validator with pluggable policies.
|
|
363
|
+
|
|
364
|
+
Validates SQL queries for security issues using configurable policies
|
|
365
|
+
and pattern-based detection.
|
|
366
|
+
|
|
367
|
+
Example:
|
|
368
|
+
# With default policy
|
|
369
|
+
validator = SQLQueryValidator()
|
|
370
|
+
validator.validate("SELECT * FROM users") # OK
|
|
371
|
+
|
|
372
|
+
# With custom policy
|
|
373
|
+
policy = SecurityPolicy.strict()
|
|
374
|
+
validator = SQLQueryValidator(policy=policy)
|
|
375
|
+
validator.validate("SELECT * FROM users JOIN orders") # Raises error
|
|
376
|
+
"""
|
|
377
|
+
|
|
378
|
+
def __init__(
|
|
379
|
+
self,
|
|
380
|
+
policy: SecurityPolicy | None = None,
|
|
381
|
+
pattern_registry: PatternRegistry | None = None,
|
|
382
|
+
audit_logger: "QueryAuditLogger | None" = None,
|
|
383
|
+
):
|
|
384
|
+
"""Initialize SQL query validator.
|
|
385
|
+
|
|
386
|
+
Args:
|
|
387
|
+
policy: Security policy to use (default: STANDARD)
|
|
388
|
+
pattern_registry: Custom pattern registry
|
|
389
|
+
audit_logger: Optional audit logger
|
|
390
|
+
"""
|
|
391
|
+
self.policy = policy or SecurityPolicy.standard()
|
|
392
|
+
self.pattern_registry = pattern_registry or PatternRegistry()
|
|
393
|
+
self.audit_logger = audit_logger
|
|
394
|
+
|
|
395
|
+
# Apply policy-specific patterns
|
|
396
|
+
self._apply_policy_patterns()
|
|
397
|
+
|
|
398
|
+
def _apply_policy_patterns(self) -> None:
|
|
399
|
+
"""Apply additional patterns based on policy."""
|
|
400
|
+
# Block UNION if not allowed
|
|
401
|
+
if not self.policy.allow_union:
|
|
402
|
+
self.pattern_registry.register(
|
|
403
|
+
DangerousPattern(
|
|
404
|
+
"policy_union",
|
|
405
|
+
r"\bUNION\b",
|
|
406
|
+
"MEDIUM",
|
|
407
|
+
"UNION blocked by policy",
|
|
408
|
+
)
|
|
409
|
+
)
|
|
410
|
+
|
|
411
|
+
# Block subqueries if not allowed
|
|
412
|
+
if not self.policy.allow_subqueries:
|
|
413
|
+
self.pattern_registry.register(
|
|
414
|
+
DangerousPattern(
|
|
415
|
+
"policy_subquery",
|
|
416
|
+
r"\(\s*SELECT\b",
|
|
417
|
+
"MEDIUM",
|
|
418
|
+
"Subquery blocked by policy",
|
|
419
|
+
)
|
|
420
|
+
)
|
|
421
|
+
|
|
422
|
+
# Block joins if not allowed
|
|
423
|
+
if not self.policy.allow_joins:
|
|
424
|
+
self.pattern_registry.register(
|
|
425
|
+
DangerousPattern(
|
|
426
|
+
"policy_join",
|
|
427
|
+
r"\b(INNER|LEFT|RIGHT|FULL|CROSS)?\s*JOIN\b",
|
|
428
|
+
"MEDIUM",
|
|
429
|
+
"JOIN blocked by policy",
|
|
430
|
+
)
|
|
431
|
+
)
|
|
432
|
+
|
|
433
|
+
# Add custom blocked patterns
|
|
434
|
+
for i, pattern in enumerate(self.policy.blocked_patterns):
|
|
435
|
+
self.pattern_registry.register(
|
|
436
|
+
DangerousPattern(
|
|
437
|
+
f"custom_blocked_{i}",
|
|
438
|
+
pattern,
|
|
439
|
+
"HIGH",
|
|
440
|
+
"Custom blocked pattern",
|
|
441
|
+
)
|
|
442
|
+
)
|
|
443
|
+
|
|
444
|
+
# Add blocked functions
|
|
445
|
+
for func in self.policy.blocked_functions:
|
|
446
|
+
self.pattern_registry.register(
|
|
447
|
+
DangerousPattern(
|
|
448
|
+
f"blocked_func_{func.lower()}",
|
|
449
|
+
rf"\b{re.escape(func)}\s*\(",
|
|
450
|
+
"HIGH",
|
|
451
|
+
f"Blocked function: {func}",
|
|
452
|
+
)
|
|
453
|
+
)
|
|
454
|
+
|
|
455
|
+
def validate(self, query: str) -> None:
|
|
456
|
+
"""Validate a SQL query for security issues.
|
|
457
|
+
|
|
458
|
+
Args:
|
|
459
|
+
query: SQL query string to validate
|
|
460
|
+
|
|
461
|
+
Raises:
|
|
462
|
+
QueryValidationError: If query fails basic validation
|
|
463
|
+
SQLInjectionError: If potential injection is detected
|
|
464
|
+
"""
|
|
465
|
+
if not query or not query.strip():
|
|
466
|
+
raise QueryValidationError("Empty query")
|
|
467
|
+
|
|
468
|
+
# Check length
|
|
469
|
+
if len(query) > self.policy.max_query_length:
|
|
470
|
+
raise QueryValidationError(
|
|
471
|
+
f"Query exceeds maximum length of {self.policy.max_query_length}"
|
|
472
|
+
)
|
|
473
|
+
|
|
474
|
+
normalized = query.strip()
|
|
475
|
+
|
|
476
|
+
# Check statement type
|
|
477
|
+
self._validate_statement_type(normalized)
|
|
478
|
+
|
|
479
|
+
# Check for multiple statements
|
|
480
|
+
self._check_multiple_statements(normalized)
|
|
481
|
+
|
|
482
|
+
# Check against pattern registry
|
|
483
|
+
matches = self.pattern_registry.check(normalized)
|
|
484
|
+
if matches:
|
|
485
|
+
pattern, matched = matches[0]
|
|
486
|
+
if self.policy.on_violation:
|
|
487
|
+
self.policy.on_violation(pattern.name, matched)
|
|
488
|
+
raise SQLInjectionError(
|
|
489
|
+
f"Dangerous pattern detected: {pattern.description}",
|
|
490
|
+
pattern=pattern.pattern,
|
|
491
|
+
query=query,
|
|
492
|
+
)
|
|
493
|
+
|
|
494
|
+
# Validate table names
|
|
495
|
+
if self.policy.allowed_tables:
|
|
496
|
+
self._validate_table_names(normalized)
|
|
497
|
+
|
|
498
|
+
# Log successful validation
|
|
499
|
+
if self.audit_logger:
|
|
500
|
+
self.audit_logger.log_query(query, success=True)
|
|
501
|
+
|
|
502
|
+
def _validate_statement_type(self, query: str) -> None:
|
|
503
|
+
"""Validate statement type is allowed."""
|
|
504
|
+
match = re.match(r"^\s*(\w+)", query, re.IGNORECASE)
|
|
505
|
+
if not match:
|
|
506
|
+
raise QueryValidationError("Could not determine SQL statement type")
|
|
507
|
+
|
|
508
|
+
statement_type = match.group(1).upper()
|
|
509
|
+
if statement_type not in self.policy.allowed_statements:
|
|
510
|
+
raise QueryValidationError(
|
|
511
|
+
f"Statement type '{statement_type}' not allowed. "
|
|
512
|
+
f"Allowed: {', '.join(self.policy.allowed_statements)}"
|
|
513
|
+
)
|
|
514
|
+
|
|
515
|
+
def _check_multiple_statements(self, query: str) -> None:
|
|
516
|
+
"""Check for multiple statements."""
|
|
517
|
+
# Remove string literals
|
|
518
|
+
cleaned = re.sub(r"'[^']*'", "", query)
|
|
519
|
+
cleaned = re.sub(r'"[^"]*"', "", cleaned)
|
|
520
|
+
|
|
521
|
+
if re.search(r";\s*\S", cleaned):
|
|
522
|
+
raise SQLInjectionError(
|
|
523
|
+
"Multiple statements detected",
|
|
524
|
+
pattern="stacked_query",
|
|
525
|
+
query=query,
|
|
526
|
+
)
|
|
527
|
+
|
|
528
|
+
def _validate_table_names(self, query: str) -> None:
|
|
529
|
+
"""Validate table names against whitelist."""
|
|
530
|
+
table_pattern = r"\b(?:FROM|JOIN)\s+([a-zA-Z_][a-zA-Z0-9_]*)\b"
|
|
531
|
+
matches = re.findall(table_pattern, query, re.IGNORECASE)
|
|
532
|
+
|
|
533
|
+
allowed_lower = {t.lower() for t in self.policy.allowed_tables}
|
|
534
|
+
for table in matches:
|
|
535
|
+
if table.lower() not in allowed_lower:
|
|
536
|
+
raise QueryValidationError(
|
|
537
|
+
f"Table '{table}' not in allowed list: "
|
|
538
|
+
f"{', '.join(self.policy.allowed_tables)}"
|
|
539
|
+
)
|
|
540
|
+
|
|
541
|
+
|
|
542
|
+
def validate_sql_query(
|
|
543
|
+
query: str,
|
|
544
|
+
policy: SecurityPolicy | None = None,
|
|
545
|
+
allowed_tables: list[str] | None = None,
|
|
546
|
+
) -> None:
|
|
547
|
+
"""Convenience function to validate SQL query.
|
|
548
|
+
|
|
549
|
+
Args:
|
|
550
|
+
query: SQL query to validate
|
|
551
|
+
policy: Optional security policy
|
|
552
|
+
allowed_tables: Optional table whitelist
|
|
553
|
+
|
|
554
|
+
Raises:
|
|
555
|
+
SQLSecurityError: If validation fails
|
|
556
|
+
"""
|
|
557
|
+
if policy is None:
|
|
558
|
+
policy = SecurityPolicy.standard()
|
|
559
|
+
|
|
560
|
+
if allowed_tables:
|
|
561
|
+
policy.allowed_tables = set(allowed_tables)
|
|
562
|
+
|
|
563
|
+
validator = SQLQueryValidator(policy=policy)
|
|
564
|
+
validator.validate(query)
|
|
565
|
+
|
|
566
|
+
|
|
567
|
+
# =============================================================================
|
|
568
|
+
# Whitelist Validation
|
|
569
|
+
# =============================================================================
|
|
570
|
+
|
|
571
|
+
|
|
572
|
+
@dataclass
|
|
573
|
+
class SchemaWhitelist:
|
|
574
|
+
"""Schema-aware whitelist for tables and columns.
|
|
575
|
+
|
|
576
|
+
Example:
|
|
577
|
+
whitelist = SchemaWhitelist()
|
|
578
|
+
whitelist.add_table("orders", ["id", "customer_id", "amount", "status"])
|
|
579
|
+
whitelist.add_table("customers", ["id", "name", "email"])
|
|
580
|
+
|
|
581
|
+
whitelist.validate_table("orders") # OK
|
|
582
|
+
whitelist.validate_column("orders", "amount") # OK
|
|
583
|
+
whitelist.validate_column("orders", "password") # Raises error
|
|
584
|
+
"""
|
|
585
|
+
|
|
586
|
+
tables: dict[str, set[str]] = field(default_factory=dict)
|
|
587
|
+
allow_all_columns: bool = False
|
|
588
|
+
|
|
589
|
+
def add_table(self, table: str, columns: list[str] | None = None) -> None:
|
|
590
|
+
"""Add a table to the whitelist.
|
|
591
|
+
|
|
592
|
+
Args:
|
|
593
|
+
table: Table name
|
|
594
|
+
columns: Allowed columns (None = all columns allowed)
|
|
595
|
+
"""
|
|
596
|
+
self.tables[table.lower()] = set(c.lower() for c in columns) if columns else set()
|
|
597
|
+
|
|
598
|
+
def remove_table(self, table: str) -> None:
|
|
599
|
+
"""Remove a table from the whitelist."""
|
|
600
|
+
self.tables.pop(table.lower(), None)
|
|
601
|
+
|
|
602
|
+
def validate_table(self, table: str) -> None:
|
|
603
|
+
"""Validate table is in whitelist."""
|
|
604
|
+
if table.lower() not in self.tables:
|
|
605
|
+
raise QueryValidationError(
|
|
606
|
+
f"Table '{table}' not in whitelist. "
|
|
607
|
+
f"Allowed: {', '.join(self.tables.keys())}"
|
|
608
|
+
)
|
|
609
|
+
|
|
610
|
+
def validate_column(self, table: str, column: str) -> None:
|
|
611
|
+
"""Validate column is in whitelist for table."""
|
|
612
|
+
self.validate_table(table)
|
|
613
|
+
|
|
614
|
+
columns = self.tables[table.lower()]
|
|
615
|
+
if columns and column.lower() not in columns:
|
|
616
|
+
raise QueryValidationError(
|
|
617
|
+
f"Column '{column}' not allowed for table '{table}'. "
|
|
618
|
+
f"Allowed: {', '.join(columns)}"
|
|
619
|
+
)
|
|
620
|
+
|
|
621
|
+
def get_tables(self) -> list[str]:
|
|
622
|
+
"""Get list of allowed tables."""
|
|
623
|
+
return list(self.tables.keys())
|
|
624
|
+
|
|
625
|
+
def get_columns(self, table: str) -> list[str]:
|
|
626
|
+
"""Get list of allowed columns for table."""
|
|
627
|
+
return list(self.tables.get(table.lower(), []))
|
|
628
|
+
|
|
629
|
+
|
|
630
|
+
class WhitelistValidator:
|
|
631
|
+
"""Validates queries against schema whitelist.
|
|
632
|
+
|
|
633
|
+
Example:
|
|
634
|
+
whitelist = SchemaWhitelist()
|
|
635
|
+
whitelist.add_table("orders", ["id", "amount"])
|
|
636
|
+
|
|
637
|
+
validator = WhitelistValidator(whitelist)
|
|
638
|
+
validator.validate_query("SELECT id, amount FROM orders") # OK
|
|
639
|
+
validator.validate_query("SELECT password FROM users") # Raises error
|
|
640
|
+
"""
|
|
641
|
+
|
|
642
|
+
def __init__(self, schema: SchemaWhitelist):
|
|
643
|
+
self.schema = schema
|
|
644
|
+
|
|
645
|
+
def validate_query(self, query: str) -> None:
|
|
646
|
+
"""Validate query against whitelist."""
|
|
647
|
+
# Extract table references
|
|
648
|
+
table_pattern = r"\b(?:FROM|JOIN)\s+([a-zA-Z_][a-zA-Z0-9_]*)\b"
|
|
649
|
+
tables = re.findall(table_pattern, query, re.IGNORECASE)
|
|
650
|
+
|
|
651
|
+
for table in tables:
|
|
652
|
+
self.schema.validate_table(table)
|
|
653
|
+
|
|
654
|
+
# Extract column references (simplified)
|
|
655
|
+
# Note: Full SQL parsing would require a proper parser
|
|
656
|
+
select_pattern = r"SELECT\s+(.+?)\s+FROM"
|
|
657
|
+
match = re.search(select_pattern, query, re.IGNORECASE | re.DOTALL)
|
|
658
|
+
if match and tables:
|
|
659
|
+
columns_str = match.group(1)
|
|
660
|
+
if columns_str.strip() != "*":
|
|
661
|
+
# Parse column list
|
|
662
|
+
columns = [c.strip().split(".")[-1] for c in columns_str.split(",")]
|
|
663
|
+
for col in columns:
|
|
664
|
+
# Remove aliases
|
|
665
|
+
col = re.sub(r"\s+AS\s+\w+$", "", col, flags=re.IGNORECASE).strip()
|
|
666
|
+
if col and not col.startswith("("):
|
|
667
|
+
# Validate against first table (simplified)
|
|
668
|
+
self.schema.validate_column(tables[0], col)
|
|
669
|
+
|
|
670
|
+
|
|
671
|
+
# =============================================================================
|
|
672
|
+
# Parameterized Queries
|
|
673
|
+
# =============================================================================
|
|
674
|
+
|
|
675
|
+
|
|
676
|
+
@dataclass
|
|
677
|
+
class ParameterizedQuery:
|
|
678
|
+
"""A parameterized SQL query.
|
|
679
|
+
|
|
680
|
+
Stores query template and parameters separately for safe execution.
|
|
681
|
+
|
|
682
|
+
Example:
|
|
683
|
+
query = ParameterizedQuery(
|
|
684
|
+
template="SELECT * FROM orders WHERE amount > :min_amount",
|
|
685
|
+
parameters={"min_amount": 100}
|
|
686
|
+
)
|
|
687
|
+
"""
|
|
688
|
+
|
|
689
|
+
template: str
|
|
690
|
+
parameters: dict[str, Any] = field(default_factory=dict)
|
|
691
|
+
|
|
692
|
+
def __post_init__(self) -> None:
|
|
693
|
+
"""Validate template and parameters."""
|
|
694
|
+
# Find all parameter placeholders
|
|
695
|
+
placeholders = set(re.findall(r":(\w+)", self.template))
|
|
696
|
+
|
|
697
|
+
# Check all parameters are provided
|
|
698
|
+
missing = placeholders - set(self.parameters.keys())
|
|
699
|
+
if missing:
|
|
700
|
+
raise QueryValidationError(
|
|
701
|
+
f"Missing parameters: {', '.join(missing)}"
|
|
702
|
+
)
|
|
703
|
+
|
|
704
|
+
def render(self) -> str:
|
|
705
|
+
"""Render the query with parameters.
|
|
706
|
+
|
|
707
|
+
Note: For Polars SQL, parameters are substituted directly.
|
|
708
|
+
Values are escaped to prevent injection.
|
|
709
|
+
"""
|
|
710
|
+
result = self.template
|
|
711
|
+
for key, value in self.parameters.items():
|
|
712
|
+
placeholder = f":{key}"
|
|
713
|
+
escaped_value = self._escape_value(value)
|
|
714
|
+
result = result.replace(placeholder, escaped_value)
|
|
715
|
+
return result
|
|
716
|
+
|
|
717
|
+
def _escape_value(self, value: Any) -> str:
|
|
718
|
+
"""Escape a parameter value for SQL."""
|
|
719
|
+
if value is None:
|
|
720
|
+
return "NULL"
|
|
721
|
+
elif isinstance(value, bool):
|
|
722
|
+
return "TRUE" if value else "FALSE"
|
|
723
|
+
elif isinstance(value, (int, float)):
|
|
724
|
+
return str(value)
|
|
725
|
+
elif isinstance(value, str):
|
|
726
|
+
# Escape single quotes
|
|
727
|
+
escaped = value.replace("'", "''")
|
|
728
|
+
return f"'{escaped}'"
|
|
729
|
+
elif isinstance(value, (list, tuple)):
|
|
730
|
+
escaped_items = [self._escape_value(v) for v in value]
|
|
731
|
+
return f"({', '.join(escaped_items)})"
|
|
732
|
+
else:
|
|
733
|
+
raise QueryValidationError(
|
|
734
|
+
f"Unsupported parameter type: {type(value)}"
|
|
735
|
+
)
|
|
736
|
+
|
|
737
|
+
|
|
738
|
+
class SecureSQLBuilder:
|
|
739
|
+
"""Builder for secure SQL queries with parameterization.
|
|
740
|
+
|
|
741
|
+
Provides a fluent interface for building secure SQL queries
|
|
742
|
+
with automatic parameter escaping and validation.
|
|
743
|
+
|
|
744
|
+
Example:
|
|
745
|
+
builder = SecureSQLBuilder(allowed_tables=["orders", "customers"])
|
|
746
|
+
|
|
747
|
+
query = (
|
|
748
|
+
builder
|
|
749
|
+
.select("orders", ["id", "amount", "status"])
|
|
750
|
+
.where("amount > :min_amount")
|
|
751
|
+
.where("status = :status")
|
|
752
|
+
.order_by("amount", desc=True)
|
|
753
|
+
.limit(100)
|
|
754
|
+
.build({"min_amount": 100, "status": "pending"})
|
|
755
|
+
)
|
|
756
|
+
|
|
757
|
+
# Execute with context
|
|
758
|
+
result = builder.execute(ctx, query)
|
|
759
|
+
"""
|
|
760
|
+
|
|
761
|
+
def __init__(
|
|
762
|
+
self,
|
|
763
|
+
allowed_tables: list[str] | None = None,
|
|
764
|
+
policy: SecurityPolicy | None = None,
|
|
765
|
+
):
|
|
766
|
+
self.allowed_tables = set(allowed_tables) if allowed_tables else None
|
|
767
|
+
self.policy = policy or SecurityPolicy.standard()
|
|
768
|
+
self.validator = SQLQueryValidator(policy=self.policy)
|
|
769
|
+
|
|
770
|
+
# Query parts
|
|
771
|
+
self._select_table: str | None = None
|
|
772
|
+
self._select_columns: list[str] = []
|
|
773
|
+
self._joins: list[str] = []
|
|
774
|
+
self._where_clauses: list[str] = []
|
|
775
|
+
self._group_by: list[str] = []
|
|
776
|
+
self._having_clauses: list[str] = []
|
|
777
|
+
self._order_by: list[str] = []
|
|
778
|
+
self._limit_value: int | None = None
|
|
779
|
+
self._offset_value: int | None = None
|
|
780
|
+
|
|
781
|
+
def select(
|
|
782
|
+
self,
|
|
783
|
+
table: str,
|
|
784
|
+
columns: list[str] | None = None,
|
|
785
|
+
) -> "SecureSQLBuilder":
|
|
786
|
+
"""Set SELECT table and columns.
|
|
787
|
+
|
|
788
|
+
Args:
|
|
789
|
+
table: Table name
|
|
790
|
+
columns: Columns to select (None = all)
|
|
791
|
+
"""
|
|
792
|
+
self._validate_identifier(table)
|
|
793
|
+
if self.allowed_tables and table not in self.allowed_tables:
|
|
794
|
+
raise QueryValidationError(
|
|
795
|
+
f"Table '{table}' not in allowed list"
|
|
796
|
+
)
|
|
797
|
+
|
|
798
|
+
self._select_table = table
|
|
799
|
+
|
|
800
|
+
if columns:
|
|
801
|
+
for col in columns:
|
|
802
|
+
self._validate_identifier(col)
|
|
803
|
+
self._select_columns = columns
|
|
804
|
+
else:
|
|
805
|
+
self._select_columns = ["*"]
|
|
806
|
+
|
|
807
|
+
return self
|
|
808
|
+
|
|
809
|
+
def join(
|
|
810
|
+
self,
|
|
811
|
+
table: str,
|
|
812
|
+
on: str,
|
|
813
|
+
join_type: str = "INNER",
|
|
814
|
+
) -> "SecureSQLBuilder":
|
|
815
|
+
"""Add a JOIN clause.
|
|
816
|
+
|
|
817
|
+
Args:
|
|
818
|
+
table: Table to join
|
|
819
|
+
on: Join condition
|
|
820
|
+
join_type: Type of join (INNER, LEFT, RIGHT, etc.)
|
|
821
|
+
"""
|
|
822
|
+
if not self.policy.allow_joins:
|
|
823
|
+
raise QueryValidationError("JOINs not allowed by policy")
|
|
824
|
+
|
|
825
|
+
self._validate_identifier(table)
|
|
826
|
+
if self.allowed_tables and table not in self.allowed_tables:
|
|
827
|
+
raise QueryValidationError(
|
|
828
|
+
f"Table '{table}' not in allowed list"
|
|
829
|
+
)
|
|
830
|
+
|
|
831
|
+
join_type = join_type.upper()
|
|
832
|
+
if join_type not in {"INNER", "LEFT", "RIGHT", "FULL", "CROSS"}:
|
|
833
|
+
raise QueryValidationError(f"Invalid join type: {join_type}")
|
|
834
|
+
|
|
835
|
+
self._joins.append(f"{join_type} JOIN {table} ON {on}")
|
|
836
|
+
return self
|
|
837
|
+
|
|
838
|
+
def where(self, condition: str) -> "SecureSQLBuilder":
|
|
839
|
+
"""Add a WHERE condition.
|
|
840
|
+
|
|
841
|
+
Args:
|
|
842
|
+
condition: WHERE condition (can include :param placeholders)
|
|
843
|
+
"""
|
|
844
|
+
self._where_clauses.append(condition)
|
|
845
|
+
return self
|
|
846
|
+
|
|
847
|
+
def group_by(self, *columns: str) -> "SecureSQLBuilder":
|
|
848
|
+
"""Add GROUP BY columns."""
|
|
849
|
+
for col in columns:
|
|
850
|
+
self._validate_identifier(col.split(".")[-1])
|
|
851
|
+
self._group_by.extend(columns)
|
|
852
|
+
return self
|
|
853
|
+
|
|
854
|
+
def having(self, condition: str) -> "SecureSQLBuilder":
|
|
855
|
+
"""Add HAVING condition."""
|
|
856
|
+
self._having_clauses.append(condition)
|
|
857
|
+
return self
|
|
858
|
+
|
|
859
|
+
def order_by(self, column: str, desc: bool = False) -> "SecureSQLBuilder":
|
|
860
|
+
"""Add ORDER BY column."""
|
|
861
|
+
self._validate_identifier(column.split(".")[-1])
|
|
862
|
+
direction = "DESC" if desc else "ASC"
|
|
863
|
+
self._order_by.append(f"{column} {direction}")
|
|
864
|
+
return self
|
|
865
|
+
|
|
866
|
+
def limit(self, n: int) -> "SecureSQLBuilder":
|
|
867
|
+
"""Set LIMIT."""
|
|
868
|
+
if n < 0:
|
|
869
|
+
raise QueryValidationError("LIMIT must be non-negative")
|
|
870
|
+
self._limit_value = n
|
|
871
|
+
return self
|
|
872
|
+
|
|
873
|
+
def offset(self, n: int) -> "SecureSQLBuilder":
|
|
874
|
+
"""Set OFFSET."""
|
|
875
|
+
if n < 0:
|
|
876
|
+
raise QueryValidationError("OFFSET must be non-negative")
|
|
877
|
+
self._offset_value = n
|
|
878
|
+
return self
|
|
879
|
+
|
|
880
|
+
def build(self, parameters: dict[str, Any] | None = None) -> ParameterizedQuery:
|
|
881
|
+
"""Build the parameterized query.
|
|
882
|
+
|
|
883
|
+
Args:
|
|
884
|
+
parameters: Query parameters
|
|
885
|
+
|
|
886
|
+
Returns:
|
|
887
|
+
ParameterizedQuery ready for execution
|
|
888
|
+
"""
|
|
889
|
+
if not self._select_table:
|
|
890
|
+
raise QueryValidationError("No table selected")
|
|
891
|
+
|
|
892
|
+
parts = []
|
|
893
|
+
|
|
894
|
+
# SELECT
|
|
895
|
+
columns_str = ", ".join(self._select_columns)
|
|
896
|
+
parts.append(f"SELECT {columns_str}")
|
|
897
|
+
|
|
898
|
+
# FROM
|
|
899
|
+
parts.append(f"FROM {self._select_table}")
|
|
900
|
+
|
|
901
|
+
# JOINs
|
|
902
|
+
for join in self._joins:
|
|
903
|
+
parts.append(join)
|
|
904
|
+
|
|
905
|
+
# WHERE
|
|
906
|
+
if self._where_clauses:
|
|
907
|
+
conditions = " AND ".join(f"({c})" for c in self._where_clauses)
|
|
908
|
+
parts.append(f"WHERE {conditions}")
|
|
909
|
+
|
|
910
|
+
# GROUP BY
|
|
911
|
+
if self._group_by:
|
|
912
|
+
parts.append(f"GROUP BY {', '.join(self._group_by)}")
|
|
913
|
+
|
|
914
|
+
# HAVING
|
|
915
|
+
if self._having_clauses:
|
|
916
|
+
conditions = " AND ".join(f"({c})" for c in self._having_clauses)
|
|
917
|
+
parts.append(f"HAVING {conditions}")
|
|
918
|
+
|
|
919
|
+
# ORDER BY
|
|
920
|
+
if self._order_by:
|
|
921
|
+
parts.append(f"ORDER BY {', '.join(self._order_by)}")
|
|
922
|
+
|
|
923
|
+
# LIMIT
|
|
924
|
+
if self._limit_value is not None:
|
|
925
|
+
parts.append(f"LIMIT {self._limit_value}")
|
|
926
|
+
|
|
927
|
+
# OFFSET
|
|
928
|
+
if self._offset_value is not None:
|
|
929
|
+
parts.append(f"OFFSET {self._offset_value}")
|
|
930
|
+
|
|
931
|
+
template = " ".join(parts)
|
|
932
|
+
|
|
933
|
+
return ParameterizedQuery(
|
|
934
|
+
template=template,
|
|
935
|
+
parameters=parameters or {},
|
|
936
|
+
)
|
|
937
|
+
|
|
938
|
+
def execute(
|
|
939
|
+
self,
|
|
940
|
+
ctx: pl.SQLContext,
|
|
941
|
+
query: ParameterizedQuery,
|
|
942
|
+
) -> pl.DataFrame:
|
|
943
|
+
"""Execute a parameterized query.
|
|
944
|
+
|
|
945
|
+
Args:
|
|
946
|
+
ctx: Polars SQL context
|
|
947
|
+
query: Parameterized query to execute
|
|
948
|
+
|
|
949
|
+
Returns:
|
|
950
|
+
Query result as DataFrame
|
|
951
|
+
"""
|
|
952
|
+
rendered = query.render()
|
|
953
|
+
|
|
954
|
+
# Validate the rendered query
|
|
955
|
+
self.validator.validate(rendered)
|
|
956
|
+
|
|
957
|
+
return ctx.execute(rendered).collect()
|
|
958
|
+
|
|
959
|
+
def reset(self) -> "SecureSQLBuilder":
|
|
960
|
+
"""Reset builder state."""
|
|
961
|
+
self._select_table = None
|
|
962
|
+
self._select_columns = []
|
|
963
|
+
self._joins = []
|
|
964
|
+
self._where_clauses = []
|
|
965
|
+
self._group_by = []
|
|
966
|
+
self._having_clauses = []
|
|
967
|
+
self._order_by = []
|
|
968
|
+
self._limit_value = None
|
|
969
|
+
self._offset_value = None
|
|
970
|
+
return self
|
|
971
|
+
|
|
972
|
+
def _validate_identifier(self, identifier: str) -> None:
|
|
973
|
+
"""Validate SQL identifier."""
|
|
974
|
+
if not identifier:
|
|
975
|
+
raise QueryValidationError("Empty identifier")
|
|
976
|
+
|
|
977
|
+
if len(identifier) > self.policy.max_identifier_length:
|
|
978
|
+
raise QueryValidationError(
|
|
979
|
+
f"Identifier too long: {len(identifier)} > {self.policy.max_identifier_length}"
|
|
980
|
+
)
|
|
981
|
+
|
|
982
|
+
if not re.match(r"^[a-zA-Z_][a-zA-Z0-9_]*$", identifier):
|
|
983
|
+
if identifier != "*":
|
|
984
|
+
raise QueryValidationError(
|
|
985
|
+
f"Invalid identifier '{identifier}': must be alphanumeric with underscores"
|
|
986
|
+
)
|
|
987
|
+
|
|
988
|
+
|
|
989
|
+
# =============================================================================
|
|
990
|
+
# Secure Query Mixin
|
|
991
|
+
# =============================================================================
|
|
992
|
+
|
|
993
|
+
|
|
994
|
+
class SecureQueryMixin:
|
|
995
|
+
"""Mixin providing secure query execution for validators.
|
|
996
|
+
|
|
997
|
+
Use in validators that need to execute SQL queries safely.
|
|
998
|
+
|
|
999
|
+
Example:
|
|
1000
|
+
class MyValidator(BaseValidator, SecureQueryMixin):
|
|
1001
|
+
def validate(self, lf):
|
|
1002
|
+
query = self.build_secure_query(
|
|
1003
|
+
table="data",
|
|
1004
|
+
columns=["id", "value"],
|
|
1005
|
+
where="value > :threshold",
|
|
1006
|
+
parameters={"threshold": 100},
|
|
1007
|
+
)
|
|
1008
|
+
result = self.execute_secure_query(lf, query)
|
|
1009
|
+
return self.process_result(result)
|
|
1010
|
+
"""
|
|
1011
|
+
|
|
1012
|
+
_security_policy: SecurityPolicy = SecurityPolicy.standard()
|
|
1013
|
+
_sql_validator: SQLQueryValidator | None = None
|
|
1014
|
+
|
|
1015
|
+
def set_security_policy(self, policy: SecurityPolicy) -> None:
|
|
1016
|
+
"""Set security policy for query execution."""
|
|
1017
|
+
self._security_policy = policy
|
|
1018
|
+
self._sql_validator = SQLQueryValidator(policy=policy)
|
|
1019
|
+
|
|
1020
|
+
def get_sql_validator(self) -> SQLQueryValidator:
|
|
1021
|
+
"""Get or create SQL validator."""
|
|
1022
|
+
if self._sql_validator is None:
|
|
1023
|
+
self._sql_validator = SQLQueryValidator(policy=self._security_policy)
|
|
1024
|
+
return self._sql_validator
|
|
1025
|
+
|
|
1026
|
+
def validate_query(self, query: str) -> None:
|
|
1027
|
+
"""Validate a SQL query for security.
|
|
1028
|
+
|
|
1029
|
+
Args:
|
|
1030
|
+
query: Query to validate
|
|
1031
|
+
|
|
1032
|
+
Raises:
|
|
1033
|
+
SQLSecurityError: If validation fails
|
|
1034
|
+
"""
|
|
1035
|
+
self.get_sql_validator().validate(query)
|
|
1036
|
+
|
|
1037
|
+
def build_secure_query(
|
|
1038
|
+
self,
|
|
1039
|
+
table: str,
|
|
1040
|
+
columns: list[str] | None = None,
|
|
1041
|
+
where: str | None = None,
|
|
1042
|
+
parameters: dict[str, Any] | None = None,
|
|
1043
|
+
allowed_tables: list[str] | None = None,
|
|
1044
|
+
) -> ParameterizedQuery:
|
|
1045
|
+
"""Build a secure parameterized query.
|
|
1046
|
+
|
|
1047
|
+
Args:
|
|
1048
|
+
table: Table name
|
|
1049
|
+
columns: Columns to select
|
|
1050
|
+
where: WHERE clause with :param placeholders
|
|
1051
|
+
parameters: Parameter values
|
|
1052
|
+
allowed_tables: Optional table whitelist
|
|
1053
|
+
|
|
1054
|
+
Returns:
|
|
1055
|
+
ParameterizedQuery
|
|
1056
|
+
"""
|
|
1057
|
+
builder = SecureSQLBuilder(
|
|
1058
|
+
allowed_tables=allowed_tables,
|
|
1059
|
+
policy=self._security_policy,
|
|
1060
|
+
)
|
|
1061
|
+
|
|
1062
|
+
builder.select(table, columns)
|
|
1063
|
+
if where:
|
|
1064
|
+
builder.where(where)
|
|
1065
|
+
|
|
1066
|
+
return builder.build(parameters)
|
|
1067
|
+
|
|
1068
|
+
def execute_secure_query(
|
|
1069
|
+
self,
|
|
1070
|
+
lf: pl.LazyFrame,
|
|
1071
|
+
query: ParameterizedQuery,
|
|
1072
|
+
table_name: str = "data",
|
|
1073
|
+
) -> pl.DataFrame:
|
|
1074
|
+
"""Execute a parameterized query securely.
|
|
1075
|
+
|
|
1076
|
+
Args:
|
|
1077
|
+
lf: LazyFrame to query
|
|
1078
|
+
query: Parameterized query
|
|
1079
|
+
table_name: Name for table in SQL context
|
|
1080
|
+
|
|
1081
|
+
Returns:
|
|
1082
|
+
Query result
|
|
1083
|
+
"""
|
|
1084
|
+
rendered = query.render()
|
|
1085
|
+
self.validate_query(rendered)
|
|
1086
|
+
|
|
1087
|
+
ctx = pl.SQLContext()
|
|
1088
|
+
ctx.register(table_name, lf)
|
|
1089
|
+
return ctx.execute(rendered).collect()
|
|
1090
|
+
|
|
1091
|
+
|
|
1092
|
+
# =============================================================================
|
|
1093
|
+
# Audit Logging
|
|
1094
|
+
# =============================================================================
|
|
1095
|
+
|
|
1096
|
+
|
|
1097
|
+
@dataclass
|
|
1098
|
+
class AuditEntry:
|
|
1099
|
+
"""A single audit log entry."""
|
|
1100
|
+
|
|
1101
|
+
timestamp: datetime
|
|
1102
|
+
query_hash: str
|
|
1103
|
+
query_preview: str
|
|
1104
|
+
success: bool
|
|
1105
|
+
error_type: str | None = None
|
|
1106
|
+
error_message: str | None = None
|
|
1107
|
+
user: str | None = None
|
|
1108
|
+
context: dict[str, Any] = field(default_factory=dict)
|
|
1109
|
+
|
|
1110
|
+
|
|
1111
|
+
class QueryAuditLogger:
|
|
1112
|
+
"""Audit logger for SQL query execution.
|
|
1113
|
+
|
|
1114
|
+
Logs all query attempts for security monitoring.
|
|
1115
|
+
|
|
1116
|
+
Example:
|
|
1117
|
+
logger = QueryAuditLogger()
|
|
1118
|
+
logger.log_query("SELECT * FROM users", success=True)
|
|
1119
|
+
|
|
1120
|
+
# Get recent entries
|
|
1121
|
+
for entry in logger.get_recent(10):
|
|
1122
|
+
print(f"{entry.timestamp}: {entry.query_preview}")
|
|
1123
|
+
|
|
1124
|
+
# Export to file
|
|
1125
|
+
logger.export_to_file("audit.log")
|
|
1126
|
+
"""
|
|
1127
|
+
|
|
1128
|
+
def __init__(
|
|
1129
|
+
self,
|
|
1130
|
+
max_entries: int = 10000,
|
|
1131
|
+
log_full_queries: bool = False,
|
|
1132
|
+
python_logger: logging.Logger | None = None,
|
|
1133
|
+
):
|
|
1134
|
+
"""Initialize audit logger.
|
|
1135
|
+
|
|
1136
|
+
Args:
|
|
1137
|
+
max_entries: Maximum entries to keep in memory
|
|
1138
|
+
log_full_queries: Whether to log full query text
|
|
1139
|
+
python_logger: Optional Python logger for external logging
|
|
1140
|
+
"""
|
|
1141
|
+
self.max_entries = max_entries
|
|
1142
|
+
self.log_full_queries = log_full_queries
|
|
1143
|
+
self.python_logger = python_logger
|
|
1144
|
+
self._entries: list[AuditEntry] = []
|
|
1145
|
+
|
|
1146
|
+
def log_query(
|
|
1147
|
+
self,
|
|
1148
|
+
query: str,
|
|
1149
|
+
success: bool,
|
|
1150
|
+
error: Exception | None = None,
|
|
1151
|
+
user: str | None = None,
|
|
1152
|
+
context: dict[str, Any] | None = None,
|
|
1153
|
+
) -> None:
|
|
1154
|
+
"""Log a query execution attempt.
|
|
1155
|
+
|
|
1156
|
+
Args:
|
|
1157
|
+
query: SQL query
|
|
1158
|
+
success: Whether execution succeeded
|
|
1159
|
+
error: Optional error that occurred
|
|
1160
|
+
user: Optional user identifier
|
|
1161
|
+
context: Optional additional context
|
|
1162
|
+
"""
|
|
1163
|
+
# Create hash of query
|
|
1164
|
+
query_hash = hashlib.sha256(query.encode()).hexdigest()[:16]
|
|
1165
|
+
|
|
1166
|
+
# Create preview (first 100 chars)
|
|
1167
|
+
preview = query[:100] + "..." if len(query) > 100 else query
|
|
1168
|
+
if not self.log_full_queries:
|
|
1169
|
+
preview = re.sub(r"'[^']*'", "'***'", preview) # Mask string values
|
|
1170
|
+
|
|
1171
|
+
entry = AuditEntry(
|
|
1172
|
+
timestamp=datetime.now(),
|
|
1173
|
+
query_hash=query_hash,
|
|
1174
|
+
query_preview=preview,
|
|
1175
|
+
success=success,
|
|
1176
|
+
error_type=type(error).__name__ if error else None,
|
|
1177
|
+
error_message=str(error) if error else None,
|
|
1178
|
+
user=user,
|
|
1179
|
+
context=context or {},
|
|
1180
|
+
)
|
|
1181
|
+
|
|
1182
|
+
self._entries.append(entry)
|
|
1183
|
+
|
|
1184
|
+
# Trim if over limit
|
|
1185
|
+
if len(self._entries) > self.max_entries:
|
|
1186
|
+
self._entries = self._entries[-self.max_entries :]
|
|
1187
|
+
|
|
1188
|
+
# Log to Python logger if configured
|
|
1189
|
+
if self.python_logger:
|
|
1190
|
+
log_level = logging.INFO if success else logging.WARNING
|
|
1191
|
+
self.python_logger.log(
|
|
1192
|
+
log_level,
|
|
1193
|
+
f"SQL {'OK' if success else 'FAIL'} [{query_hash}]: {preview}",
|
|
1194
|
+
)
|
|
1195
|
+
|
|
1196
|
+
def get_recent(self, n: int = 100) -> list[AuditEntry]:
|
|
1197
|
+
"""Get recent audit entries."""
|
|
1198
|
+
return self._entries[-n:]
|
|
1199
|
+
|
|
1200
|
+
def get_failures(self, n: int = 100) -> list[AuditEntry]:
|
|
1201
|
+
"""Get recent failed queries."""
|
|
1202
|
+
failures = [e for e in self._entries if not e.success]
|
|
1203
|
+
return failures[-n:]
|
|
1204
|
+
|
|
1205
|
+
def get_by_hash(self, query_hash: str) -> list[AuditEntry]:
|
|
1206
|
+
"""Get entries by query hash."""
|
|
1207
|
+
return [e for e in self._entries if e.query_hash == query_hash]
|
|
1208
|
+
|
|
1209
|
+
def clear(self) -> None:
|
|
1210
|
+
"""Clear all entries."""
|
|
1211
|
+
self._entries.clear()
|
|
1212
|
+
|
|
1213
|
+
def export_to_file(self, filepath: str) -> None:
|
|
1214
|
+
"""Export audit log to file.
|
|
1215
|
+
|
|
1216
|
+
Args:
|
|
1217
|
+
filepath: Output file path
|
|
1218
|
+
"""
|
|
1219
|
+
import json
|
|
1220
|
+
|
|
1221
|
+
with open(filepath, "w") as f:
|
|
1222
|
+
for entry in self._entries:
|
|
1223
|
+
record = {
|
|
1224
|
+
"timestamp": entry.timestamp.isoformat(),
|
|
1225
|
+
"query_hash": entry.query_hash,
|
|
1226
|
+
"query_preview": entry.query_preview,
|
|
1227
|
+
"success": entry.success,
|
|
1228
|
+
"error_type": entry.error_type,
|
|
1229
|
+
"error_message": entry.error_message,
|
|
1230
|
+
"user": entry.user,
|
|
1231
|
+
"context": entry.context,
|
|
1232
|
+
}
|
|
1233
|
+
f.write(json.dumps(record) + "\n")
|
|
1234
|
+
|
|
1235
|
+
def get_stats(self) -> dict[str, Any]:
|
|
1236
|
+
"""Get audit statistics."""
|
|
1237
|
+
total = len(self._entries)
|
|
1238
|
+
successes = sum(1 for e in self._entries if e.success)
|
|
1239
|
+
failures = total - successes
|
|
1240
|
+
|
|
1241
|
+
return {
|
|
1242
|
+
"total_queries": total,
|
|
1243
|
+
"successful": successes,
|
|
1244
|
+
"failed": failures,
|
|
1245
|
+
"success_rate": successes / total if total > 0 else 1.0,
|
|
1246
|
+
"unique_queries": len(set(e.query_hash for e in self._entries)),
|
|
1247
|
+
}
|