truthound 1.0.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- truthound/__init__.py +162 -0
- truthound/adapters.py +100 -0
- truthound/api.py +365 -0
- truthound/audit/__init__.py +248 -0
- truthound/audit/core.py +967 -0
- truthound/audit/filters.py +620 -0
- truthound/audit/formatters.py +707 -0
- truthound/audit/logger.py +902 -0
- truthound/audit/middleware.py +571 -0
- truthound/audit/storage.py +1083 -0
- truthound/benchmark/__init__.py +123 -0
- truthound/benchmark/base.py +757 -0
- truthound/benchmark/comparison.py +635 -0
- truthound/benchmark/generators.py +706 -0
- truthound/benchmark/reporters.py +718 -0
- truthound/benchmark/runner.py +635 -0
- truthound/benchmark/scenarios.py +712 -0
- truthound/cache.py +252 -0
- truthound/checkpoint/__init__.py +136 -0
- truthound/checkpoint/actions/__init__.py +164 -0
- truthound/checkpoint/actions/base.py +324 -0
- truthound/checkpoint/actions/custom.py +234 -0
- truthound/checkpoint/actions/discord_notify.py +290 -0
- truthound/checkpoint/actions/email_notify.py +405 -0
- truthound/checkpoint/actions/github_action.py +406 -0
- truthound/checkpoint/actions/opsgenie.py +1499 -0
- truthound/checkpoint/actions/pagerduty.py +226 -0
- truthound/checkpoint/actions/slack_notify.py +233 -0
- truthound/checkpoint/actions/store_result.py +249 -0
- truthound/checkpoint/actions/teams_notify.py +1570 -0
- truthound/checkpoint/actions/telegram_notify.py +419 -0
- truthound/checkpoint/actions/update_docs.py +552 -0
- truthound/checkpoint/actions/webhook.py +293 -0
- truthound/checkpoint/analytics/__init__.py +147 -0
- truthound/checkpoint/analytics/aggregations/__init__.py +23 -0
- truthound/checkpoint/analytics/aggregations/rollup.py +481 -0
- truthound/checkpoint/analytics/aggregations/time_bucket.py +306 -0
- truthound/checkpoint/analytics/analyzers/__init__.py +17 -0
- truthound/checkpoint/analytics/analyzers/anomaly.py +386 -0
- truthound/checkpoint/analytics/analyzers/base.py +270 -0
- truthound/checkpoint/analytics/analyzers/forecast.py +421 -0
- truthound/checkpoint/analytics/analyzers/trend.py +314 -0
- truthound/checkpoint/analytics/models.py +292 -0
- truthound/checkpoint/analytics/protocols.py +549 -0
- truthound/checkpoint/analytics/service.py +718 -0
- truthound/checkpoint/analytics/stores/__init__.py +16 -0
- truthound/checkpoint/analytics/stores/base.py +306 -0
- truthound/checkpoint/analytics/stores/memory_store.py +353 -0
- truthound/checkpoint/analytics/stores/sqlite_store.py +557 -0
- truthound/checkpoint/analytics/stores/timescale_store.py +501 -0
- truthound/checkpoint/async_actions.py +794 -0
- truthound/checkpoint/async_base.py +708 -0
- truthound/checkpoint/async_checkpoint.py +617 -0
- truthound/checkpoint/async_runner.py +639 -0
- truthound/checkpoint/checkpoint.py +527 -0
- truthound/checkpoint/ci/__init__.py +61 -0
- truthound/checkpoint/ci/detector.py +355 -0
- truthound/checkpoint/ci/reporter.py +436 -0
- truthound/checkpoint/ci/templates.py +454 -0
- truthound/checkpoint/circuitbreaker/__init__.py +133 -0
- truthound/checkpoint/circuitbreaker/breaker.py +542 -0
- truthound/checkpoint/circuitbreaker/core.py +252 -0
- truthound/checkpoint/circuitbreaker/detection.py +459 -0
- truthound/checkpoint/circuitbreaker/middleware.py +389 -0
- truthound/checkpoint/circuitbreaker/registry.py +357 -0
- truthound/checkpoint/distributed/__init__.py +139 -0
- truthound/checkpoint/distributed/backends/__init__.py +35 -0
- truthound/checkpoint/distributed/backends/celery_backend.py +503 -0
- truthound/checkpoint/distributed/backends/kubernetes_backend.py +696 -0
- truthound/checkpoint/distributed/backends/local_backend.py +397 -0
- truthound/checkpoint/distributed/backends/ray_backend.py +625 -0
- truthound/checkpoint/distributed/base.py +774 -0
- truthound/checkpoint/distributed/orchestrator.py +765 -0
- truthound/checkpoint/distributed/protocols.py +842 -0
- truthound/checkpoint/distributed/registry.py +449 -0
- truthound/checkpoint/idempotency/__init__.py +120 -0
- truthound/checkpoint/idempotency/core.py +295 -0
- truthound/checkpoint/idempotency/fingerprint.py +454 -0
- truthound/checkpoint/idempotency/locking.py +604 -0
- truthound/checkpoint/idempotency/service.py +592 -0
- truthound/checkpoint/idempotency/stores.py +653 -0
- truthound/checkpoint/monitoring/__init__.py +134 -0
- truthound/checkpoint/monitoring/aggregators/__init__.py +15 -0
- truthound/checkpoint/monitoring/aggregators/base.py +372 -0
- truthound/checkpoint/monitoring/aggregators/realtime.py +300 -0
- truthound/checkpoint/monitoring/aggregators/window.py +493 -0
- truthound/checkpoint/monitoring/collectors/__init__.py +17 -0
- truthound/checkpoint/monitoring/collectors/base.py +257 -0
- truthound/checkpoint/monitoring/collectors/memory_collector.py +617 -0
- truthound/checkpoint/monitoring/collectors/prometheus_collector.py +451 -0
- truthound/checkpoint/monitoring/collectors/redis_collector.py +518 -0
- truthound/checkpoint/monitoring/events.py +410 -0
- truthound/checkpoint/monitoring/protocols.py +636 -0
- truthound/checkpoint/monitoring/service.py +578 -0
- truthound/checkpoint/monitoring/views/__init__.py +17 -0
- truthound/checkpoint/monitoring/views/base.py +172 -0
- truthound/checkpoint/monitoring/views/queue_view.py +220 -0
- truthound/checkpoint/monitoring/views/task_view.py +240 -0
- truthound/checkpoint/monitoring/views/worker_view.py +263 -0
- truthound/checkpoint/registry.py +337 -0
- truthound/checkpoint/runner.py +356 -0
- truthound/checkpoint/transaction/__init__.py +133 -0
- truthound/checkpoint/transaction/base.py +389 -0
- truthound/checkpoint/transaction/compensatable.py +537 -0
- truthound/checkpoint/transaction/coordinator.py +576 -0
- truthound/checkpoint/transaction/executor.py +622 -0
- truthound/checkpoint/transaction/idempotency.py +534 -0
- truthound/checkpoint/transaction/saga/__init__.py +143 -0
- truthound/checkpoint/transaction/saga/builder.py +584 -0
- truthound/checkpoint/transaction/saga/definition.py +515 -0
- truthound/checkpoint/transaction/saga/event_store.py +542 -0
- truthound/checkpoint/transaction/saga/patterns.py +833 -0
- truthound/checkpoint/transaction/saga/runner.py +718 -0
- truthound/checkpoint/transaction/saga/state_machine.py +793 -0
- truthound/checkpoint/transaction/saga/strategies.py +780 -0
- truthound/checkpoint/transaction/saga/testing.py +886 -0
- truthound/checkpoint/triggers/__init__.py +58 -0
- truthound/checkpoint/triggers/base.py +237 -0
- truthound/checkpoint/triggers/event.py +385 -0
- truthound/checkpoint/triggers/schedule.py +355 -0
- truthound/cli.py +2358 -0
- truthound/cli_modules/__init__.py +124 -0
- truthound/cli_modules/advanced/__init__.py +45 -0
- truthound/cli_modules/advanced/benchmark.py +343 -0
- truthound/cli_modules/advanced/docs.py +225 -0
- truthound/cli_modules/advanced/lineage.py +209 -0
- truthound/cli_modules/advanced/ml.py +320 -0
- truthound/cli_modules/advanced/realtime.py +196 -0
- truthound/cli_modules/checkpoint/__init__.py +46 -0
- truthound/cli_modules/checkpoint/init.py +114 -0
- truthound/cli_modules/checkpoint/list.py +71 -0
- truthound/cli_modules/checkpoint/run.py +159 -0
- truthound/cli_modules/checkpoint/validate.py +67 -0
- truthound/cli_modules/common/__init__.py +71 -0
- truthound/cli_modules/common/errors.py +414 -0
- truthound/cli_modules/common/options.py +419 -0
- truthound/cli_modules/common/output.py +507 -0
- truthound/cli_modules/common/protocol.py +552 -0
- truthound/cli_modules/core/__init__.py +48 -0
- truthound/cli_modules/core/check.py +123 -0
- truthound/cli_modules/core/compare.py +104 -0
- truthound/cli_modules/core/learn.py +57 -0
- truthound/cli_modules/core/mask.py +77 -0
- truthound/cli_modules/core/profile.py +65 -0
- truthound/cli_modules/core/scan.py +61 -0
- truthound/cli_modules/profiler/__init__.py +51 -0
- truthound/cli_modules/profiler/auto_profile.py +175 -0
- truthound/cli_modules/profiler/metadata.py +107 -0
- truthound/cli_modules/profiler/suite.py +283 -0
- truthound/cli_modules/registry.py +431 -0
- truthound/cli_modules/scaffolding/__init__.py +89 -0
- truthound/cli_modules/scaffolding/base.py +631 -0
- truthound/cli_modules/scaffolding/commands.py +545 -0
- truthound/cli_modules/scaffolding/plugins.py +1072 -0
- truthound/cli_modules/scaffolding/reporters.py +594 -0
- truthound/cli_modules/scaffolding/validators.py +1127 -0
- truthound/common/__init__.py +18 -0
- truthound/common/resilience/__init__.py +130 -0
- truthound/common/resilience/bulkhead.py +266 -0
- truthound/common/resilience/circuit_breaker.py +516 -0
- truthound/common/resilience/composite.py +332 -0
- truthound/common/resilience/config.py +292 -0
- truthound/common/resilience/protocols.py +217 -0
- truthound/common/resilience/rate_limiter.py +404 -0
- truthound/common/resilience/retry.py +341 -0
- truthound/datadocs/__init__.py +260 -0
- truthound/datadocs/base.py +571 -0
- truthound/datadocs/builder.py +761 -0
- truthound/datadocs/charts.py +764 -0
- truthound/datadocs/dashboard/__init__.py +63 -0
- truthound/datadocs/dashboard/app.py +576 -0
- truthound/datadocs/dashboard/components.py +584 -0
- truthound/datadocs/dashboard/state.py +240 -0
- truthound/datadocs/engine/__init__.py +46 -0
- truthound/datadocs/engine/context.py +376 -0
- truthound/datadocs/engine/pipeline.py +618 -0
- truthound/datadocs/engine/registry.py +469 -0
- truthound/datadocs/exporters/__init__.py +49 -0
- truthound/datadocs/exporters/base.py +198 -0
- truthound/datadocs/exporters/html.py +178 -0
- truthound/datadocs/exporters/json_exporter.py +253 -0
- truthound/datadocs/exporters/markdown.py +284 -0
- truthound/datadocs/exporters/pdf.py +392 -0
- truthound/datadocs/i18n/__init__.py +86 -0
- truthound/datadocs/i18n/catalog.py +960 -0
- truthound/datadocs/i18n/formatting.py +505 -0
- truthound/datadocs/i18n/loader.py +256 -0
- truthound/datadocs/i18n/plurals.py +378 -0
- truthound/datadocs/renderers/__init__.py +42 -0
- truthound/datadocs/renderers/base.py +401 -0
- truthound/datadocs/renderers/custom.py +342 -0
- truthound/datadocs/renderers/jinja.py +697 -0
- truthound/datadocs/sections.py +736 -0
- truthound/datadocs/styles.py +931 -0
- truthound/datadocs/themes/__init__.py +101 -0
- truthound/datadocs/themes/base.py +336 -0
- truthound/datadocs/themes/default.py +417 -0
- truthound/datadocs/themes/enterprise.py +419 -0
- truthound/datadocs/themes/loader.py +336 -0
- truthound/datadocs/themes.py +301 -0
- truthound/datadocs/transformers/__init__.py +57 -0
- truthound/datadocs/transformers/base.py +268 -0
- truthound/datadocs/transformers/enrichers.py +544 -0
- truthound/datadocs/transformers/filters.py +447 -0
- truthound/datadocs/transformers/i18n.py +468 -0
- truthound/datadocs/versioning/__init__.py +62 -0
- truthound/datadocs/versioning/diff.py +639 -0
- truthound/datadocs/versioning/storage.py +497 -0
- truthound/datadocs/versioning/version.py +358 -0
- truthound/datasources/__init__.py +223 -0
- truthound/datasources/_async_protocols.py +222 -0
- truthound/datasources/_protocols.py +159 -0
- truthound/datasources/adapters.py +428 -0
- truthound/datasources/async_base.py +599 -0
- truthound/datasources/async_factory.py +511 -0
- truthound/datasources/base.py +516 -0
- truthound/datasources/factory.py +433 -0
- truthound/datasources/nosql/__init__.py +47 -0
- truthound/datasources/nosql/base.py +487 -0
- truthound/datasources/nosql/elasticsearch.py +801 -0
- truthound/datasources/nosql/mongodb.py +636 -0
- truthound/datasources/pandas_optimized.py +582 -0
- truthound/datasources/pandas_source.py +216 -0
- truthound/datasources/polars_source.py +395 -0
- truthound/datasources/spark_source.py +479 -0
- truthound/datasources/sql/__init__.py +154 -0
- truthound/datasources/sql/base.py +710 -0
- truthound/datasources/sql/bigquery.py +410 -0
- truthound/datasources/sql/cloud_base.py +199 -0
- truthound/datasources/sql/databricks.py +471 -0
- truthound/datasources/sql/mysql.py +316 -0
- truthound/datasources/sql/oracle.py +427 -0
- truthound/datasources/sql/postgresql.py +321 -0
- truthound/datasources/sql/redshift.py +479 -0
- truthound/datasources/sql/snowflake.py +439 -0
- truthound/datasources/sql/sqlite.py +286 -0
- truthound/datasources/sql/sqlserver.py +437 -0
- truthound/datasources/streaming/__init__.py +47 -0
- truthound/datasources/streaming/base.py +350 -0
- truthound/datasources/streaming/kafka.py +670 -0
- truthound/decorators.py +98 -0
- truthound/docs/__init__.py +69 -0
- truthound/docs/extractor.py +971 -0
- truthound/docs/generator.py +601 -0
- truthound/docs/parser.py +1037 -0
- truthound/docs/renderer.py +999 -0
- truthound/drift/__init__.py +22 -0
- truthound/drift/compare.py +189 -0
- truthound/drift/detectors.py +464 -0
- truthound/drift/report.py +160 -0
- truthound/execution/__init__.py +65 -0
- truthound/execution/_protocols.py +324 -0
- truthound/execution/base.py +576 -0
- truthound/execution/distributed/__init__.py +179 -0
- truthound/execution/distributed/aggregations.py +731 -0
- truthound/execution/distributed/arrow_bridge.py +817 -0
- truthound/execution/distributed/base.py +550 -0
- truthound/execution/distributed/dask_engine.py +976 -0
- truthound/execution/distributed/mixins.py +766 -0
- truthound/execution/distributed/protocols.py +756 -0
- truthound/execution/distributed/ray_engine.py +1127 -0
- truthound/execution/distributed/registry.py +446 -0
- truthound/execution/distributed/spark_engine.py +1011 -0
- truthound/execution/distributed/validator_adapter.py +682 -0
- truthound/execution/pandas_engine.py +401 -0
- truthound/execution/polars_engine.py +497 -0
- truthound/execution/pushdown/__init__.py +230 -0
- truthound/execution/pushdown/ast.py +1550 -0
- truthound/execution/pushdown/builder.py +1550 -0
- truthound/execution/pushdown/dialects.py +1072 -0
- truthound/execution/pushdown/executor.py +829 -0
- truthound/execution/pushdown/optimizer.py +1041 -0
- truthound/execution/sql_engine.py +518 -0
- truthound/infrastructure/__init__.py +189 -0
- truthound/infrastructure/audit.py +1515 -0
- truthound/infrastructure/config.py +1133 -0
- truthound/infrastructure/encryption.py +1132 -0
- truthound/infrastructure/logging.py +1503 -0
- truthound/infrastructure/metrics.py +1220 -0
- truthound/lineage/__init__.py +89 -0
- truthound/lineage/base.py +746 -0
- truthound/lineage/impact_analysis.py +474 -0
- truthound/lineage/integrations/__init__.py +22 -0
- truthound/lineage/integrations/openlineage.py +548 -0
- truthound/lineage/tracker.py +512 -0
- truthound/lineage/visualization/__init__.py +33 -0
- truthound/lineage/visualization/protocols.py +145 -0
- truthound/lineage/visualization/renderers/__init__.py +20 -0
- truthound/lineage/visualization/renderers/cytoscape.py +329 -0
- truthound/lineage/visualization/renderers/d3.py +331 -0
- truthound/lineage/visualization/renderers/graphviz.py +276 -0
- truthound/lineage/visualization/renderers/mermaid.py +308 -0
- truthound/maskers.py +113 -0
- truthound/ml/__init__.py +124 -0
- truthound/ml/anomaly_models/__init__.py +31 -0
- truthound/ml/anomaly_models/ensemble.py +362 -0
- truthound/ml/anomaly_models/isolation_forest.py +444 -0
- truthound/ml/anomaly_models/statistical.py +392 -0
- truthound/ml/base.py +1178 -0
- truthound/ml/drift_detection/__init__.py +26 -0
- truthound/ml/drift_detection/concept.py +381 -0
- truthound/ml/drift_detection/distribution.py +361 -0
- truthound/ml/drift_detection/feature.py +442 -0
- truthound/ml/drift_detection/multivariate.py +495 -0
- truthound/ml/monitoring/__init__.py +88 -0
- truthound/ml/monitoring/alerting/__init__.py +33 -0
- truthound/ml/monitoring/alerting/handlers.py +427 -0
- truthound/ml/monitoring/alerting/rules.py +508 -0
- truthound/ml/monitoring/collectors/__init__.py +19 -0
- truthound/ml/monitoring/collectors/composite.py +105 -0
- truthound/ml/monitoring/collectors/drift.py +324 -0
- truthound/ml/monitoring/collectors/performance.py +179 -0
- truthound/ml/monitoring/collectors/quality.py +369 -0
- truthound/ml/monitoring/monitor.py +536 -0
- truthound/ml/monitoring/protocols.py +451 -0
- truthound/ml/monitoring/stores/__init__.py +15 -0
- truthound/ml/monitoring/stores/memory.py +201 -0
- truthound/ml/monitoring/stores/prometheus.py +296 -0
- truthound/ml/rule_learning/__init__.py +25 -0
- truthound/ml/rule_learning/constraint_miner.py +443 -0
- truthound/ml/rule_learning/pattern_learner.py +499 -0
- truthound/ml/rule_learning/profile_learner.py +462 -0
- truthound/multitenancy/__init__.py +326 -0
- truthound/multitenancy/core.py +852 -0
- truthound/multitenancy/integration.py +597 -0
- truthound/multitenancy/isolation.py +630 -0
- truthound/multitenancy/manager.py +770 -0
- truthound/multitenancy/middleware.py +765 -0
- truthound/multitenancy/quota.py +537 -0
- truthound/multitenancy/resolvers.py +603 -0
- truthound/multitenancy/storage.py +703 -0
- truthound/observability/__init__.py +307 -0
- truthound/observability/context.py +531 -0
- truthound/observability/instrumentation.py +611 -0
- truthound/observability/logging.py +887 -0
- truthound/observability/metrics.py +1157 -0
- truthound/observability/tracing/__init__.py +178 -0
- truthound/observability/tracing/baggage.py +310 -0
- truthound/observability/tracing/config.py +426 -0
- truthound/observability/tracing/exporter.py +787 -0
- truthound/observability/tracing/integration.py +1018 -0
- truthound/observability/tracing/otel/__init__.py +146 -0
- truthound/observability/tracing/otel/adapter.py +982 -0
- truthound/observability/tracing/otel/bridge.py +1177 -0
- truthound/observability/tracing/otel/compat.py +681 -0
- truthound/observability/tracing/otel/config.py +691 -0
- truthound/observability/tracing/otel/detection.py +327 -0
- truthound/observability/tracing/otel/protocols.py +426 -0
- truthound/observability/tracing/processor.py +561 -0
- truthound/observability/tracing/propagator.py +757 -0
- truthound/observability/tracing/provider.py +569 -0
- truthound/observability/tracing/resource.py +515 -0
- truthound/observability/tracing/sampler.py +487 -0
- truthound/observability/tracing/span.py +676 -0
- truthound/plugins/__init__.py +198 -0
- truthound/plugins/base.py +599 -0
- truthound/plugins/cli.py +680 -0
- truthound/plugins/dependencies/__init__.py +42 -0
- truthound/plugins/dependencies/graph.py +422 -0
- truthound/plugins/dependencies/resolver.py +417 -0
- truthound/plugins/discovery.py +379 -0
- truthound/plugins/docs/__init__.py +46 -0
- truthound/plugins/docs/extractor.py +444 -0
- truthound/plugins/docs/renderer.py +499 -0
- truthound/plugins/enterprise_manager.py +877 -0
- truthound/plugins/examples/__init__.py +19 -0
- truthound/plugins/examples/custom_validators.py +317 -0
- truthound/plugins/examples/slack_notifier.py +312 -0
- truthound/plugins/examples/xml_reporter.py +254 -0
- truthound/plugins/hooks.py +558 -0
- truthound/plugins/lifecycle/__init__.py +43 -0
- truthound/plugins/lifecycle/hot_reload.py +402 -0
- truthound/plugins/lifecycle/manager.py +371 -0
- truthound/plugins/manager.py +736 -0
- truthound/plugins/registry.py +338 -0
- truthound/plugins/security/__init__.py +93 -0
- truthound/plugins/security/exceptions.py +332 -0
- truthound/plugins/security/policies.py +348 -0
- truthound/plugins/security/protocols.py +643 -0
- truthound/plugins/security/sandbox/__init__.py +45 -0
- truthound/plugins/security/sandbox/context.py +158 -0
- truthound/plugins/security/sandbox/engines/__init__.py +19 -0
- truthound/plugins/security/sandbox/engines/container.py +379 -0
- truthound/plugins/security/sandbox/engines/noop.py +144 -0
- truthound/plugins/security/sandbox/engines/process.py +336 -0
- truthound/plugins/security/sandbox/factory.py +211 -0
- truthound/plugins/security/signing/__init__.py +57 -0
- truthound/plugins/security/signing/service.py +330 -0
- truthound/plugins/security/signing/trust_store.py +368 -0
- truthound/plugins/security/signing/verifier.py +459 -0
- truthound/plugins/versioning/__init__.py +41 -0
- truthound/plugins/versioning/constraints.py +297 -0
- truthound/plugins/versioning/resolver.py +329 -0
- truthound/profiler/__init__.py +1729 -0
- truthound/profiler/_lazy.py +452 -0
- truthound/profiler/ab_testing/__init__.py +80 -0
- truthound/profiler/ab_testing/analysis.py +449 -0
- truthound/profiler/ab_testing/base.py +257 -0
- truthound/profiler/ab_testing/experiment.py +395 -0
- truthound/profiler/ab_testing/tracking.py +368 -0
- truthound/profiler/auto_threshold.py +1170 -0
- truthound/profiler/base.py +579 -0
- truthound/profiler/cache_patterns.py +911 -0
- truthound/profiler/caching.py +1303 -0
- truthound/profiler/column_profiler.py +712 -0
- truthound/profiler/comparison.py +1007 -0
- truthound/profiler/custom_patterns.py +1170 -0
- truthound/profiler/dashboard/__init__.py +50 -0
- truthound/profiler/dashboard/app.py +476 -0
- truthound/profiler/dashboard/components.py +457 -0
- truthound/profiler/dashboard/config.py +72 -0
- truthound/profiler/distributed/__init__.py +83 -0
- truthound/profiler/distributed/base.py +281 -0
- truthound/profiler/distributed/dask_backend.py +498 -0
- truthound/profiler/distributed/local_backend.py +293 -0
- truthound/profiler/distributed/profiler.py +304 -0
- truthound/profiler/distributed/ray_backend.py +374 -0
- truthound/profiler/distributed/spark_backend.py +375 -0
- truthound/profiler/distributed.py +1366 -0
- truthound/profiler/enterprise_sampling.py +1065 -0
- truthound/profiler/errors.py +488 -0
- truthound/profiler/evolution/__init__.py +91 -0
- truthound/profiler/evolution/alerts.py +426 -0
- truthound/profiler/evolution/changes.py +206 -0
- truthound/profiler/evolution/compatibility.py +365 -0
- truthound/profiler/evolution/detector.py +372 -0
- truthound/profiler/evolution/protocols.py +121 -0
- truthound/profiler/generators/__init__.py +48 -0
- truthound/profiler/generators/base.py +384 -0
- truthound/profiler/generators/ml_rules.py +375 -0
- truthound/profiler/generators/pattern_rules.py +384 -0
- truthound/profiler/generators/schema_rules.py +267 -0
- truthound/profiler/generators/stats_rules.py +324 -0
- truthound/profiler/generators/suite_generator.py +857 -0
- truthound/profiler/i18n.py +1542 -0
- truthound/profiler/incremental.py +554 -0
- truthound/profiler/incremental_validation.py +1710 -0
- truthound/profiler/integration/__init__.py +73 -0
- truthound/profiler/integration/adapters.py +345 -0
- truthound/profiler/integration/context.py +371 -0
- truthound/profiler/integration/executor.py +527 -0
- truthound/profiler/integration/naming.py +75 -0
- truthound/profiler/integration/protocols.py +243 -0
- truthound/profiler/memory.py +1185 -0
- truthound/profiler/migration/__init__.py +60 -0
- truthound/profiler/migration/base.py +345 -0
- truthound/profiler/migration/manager.py +444 -0
- truthound/profiler/migration/v1_0_to_v1_1.py +484 -0
- truthound/profiler/ml/__init__.py +73 -0
- truthound/profiler/ml/base.py +244 -0
- truthound/profiler/ml/classifier.py +507 -0
- truthound/profiler/ml/feature_extraction.py +604 -0
- truthound/profiler/ml/pretrained.py +448 -0
- truthound/profiler/ml_inference.py +1276 -0
- truthound/profiler/native_patterns.py +815 -0
- truthound/profiler/observability.py +1184 -0
- truthound/profiler/process_timeout.py +1566 -0
- truthound/profiler/progress.py +568 -0
- truthound/profiler/progress_callbacks.py +1734 -0
- truthound/profiler/quality.py +1345 -0
- truthound/profiler/resilience.py +1180 -0
- truthound/profiler/sampled_matcher.py +794 -0
- truthound/profiler/sampling.py +1288 -0
- truthound/profiler/scheduling/__init__.py +82 -0
- truthound/profiler/scheduling/protocols.py +214 -0
- truthound/profiler/scheduling/scheduler.py +474 -0
- truthound/profiler/scheduling/storage.py +457 -0
- truthound/profiler/scheduling/triggers.py +449 -0
- truthound/profiler/schema.py +603 -0
- truthound/profiler/streaming.py +685 -0
- truthound/profiler/streaming_patterns.py +1354 -0
- truthound/profiler/suite_cli.py +625 -0
- truthound/profiler/suite_config.py +789 -0
- truthound/profiler/suite_export.py +1268 -0
- truthound/profiler/table_profiler.py +547 -0
- truthound/profiler/timeout.py +565 -0
- truthound/profiler/validation.py +1532 -0
- truthound/profiler/visualization/__init__.py +118 -0
- truthound/profiler/visualization/base.py +346 -0
- truthound/profiler/visualization/generator.py +1259 -0
- truthound/profiler/visualization/plotly_renderer.py +811 -0
- truthound/profiler/visualization/renderers.py +669 -0
- truthound/profiler/visualization/sections.py +540 -0
- truthound/profiler/visualization.py +2122 -0
- truthound/profiler/yaml_validation.py +1151 -0
- truthound/py.typed +0 -0
- truthound/ratelimit/__init__.py +248 -0
- truthound/ratelimit/algorithms.py +1108 -0
- truthound/ratelimit/core.py +573 -0
- truthound/ratelimit/integration.py +532 -0
- truthound/ratelimit/limiter.py +663 -0
- truthound/ratelimit/middleware.py +700 -0
- truthound/ratelimit/policy.py +792 -0
- truthound/ratelimit/storage.py +763 -0
- truthound/rbac/__init__.py +340 -0
- truthound/rbac/core.py +976 -0
- truthound/rbac/integration.py +760 -0
- truthound/rbac/manager.py +1052 -0
- truthound/rbac/middleware.py +842 -0
- truthound/rbac/policy.py +954 -0
- truthound/rbac/storage.py +878 -0
- truthound/realtime/__init__.py +141 -0
- truthound/realtime/adapters/__init__.py +43 -0
- truthound/realtime/adapters/base.py +533 -0
- truthound/realtime/adapters/kafka.py +487 -0
- truthound/realtime/adapters/kinesis.py +479 -0
- truthound/realtime/adapters/mock.py +243 -0
- truthound/realtime/base.py +553 -0
- truthound/realtime/factory.py +382 -0
- truthound/realtime/incremental.py +660 -0
- truthound/realtime/processing/__init__.py +67 -0
- truthound/realtime/processing/exactly_once.py +575 -0
- truthound/realtime/processing/state.py +547 -0
- truthound/realtime/processing/windows.py +647 -0
- truthound/realtime/protocols.py +569 -0
- truthound/realtime/streaming.py +605 -0
- truthound/realtime/testing/__init__.py +32 -0
- truthound/realtime/testing/containers.py +615 -0
- truthound/realtime/testing/fixtures.py +484 -0
- truthound/report.py +280 -0
- truthound/reporters/__init__.py +46 -0
- truthound/reporters/_protocols.py +30 -0
- truthound/reporters/base.py +324 -0
- truthound/reporters/ci/__init__.py +66 -0
- truthound/reporters/ci/azure.py +436 -0
- truthound/reporters/ci/base.py +509 -0
- truthound/reporters/ci/bitbucket.py +567 -0
- truthound/reporters/ci/circleci.py +547 -0
- truthound/reporters/ci/detection.py +364 -0
- truthound/reporters/ci/factory.py +182 -0
- truthound/reporters/ci/github.py +388 -0
- truthound/reporters/ci/gitlab.py +471 -0
- truthound/reporters/ci/jenkins.py +525 -0
- truthound/reporters/console_reporter.py +299 -0
- truthound/reporters/factory.py +211 -0
- truthound/reporters/html_reporter.py +524 -0
- truthound/reporters/json_reporter.py +256 -0
- truthound/reporters/markdown_reporter.py +280 -0
- truthound/reporters/sdk/__init__.py +174 -0
- truthound/reporters/sdk/builder.py +558 -0
- truthound/reporters/sdk/mixins.py +1150 -0
- truthound/reporters/sdk/schema.py +1493 -0
- truthound/reporters/sdk/templates.py +666 -0
- truthound/reporters/sdk/testing.py +968 -0
- truthound/scanners.py +170 -0
- truthound/scheduling/__init__.py +122 -0
- truthound/scheduling/cron.py +1136 -0
- truthound/scheduling/presets.py +212 -0
- truthound/schema.py +275 -0
- truthound/secrets/__init__.py +173 -0
- truthound/secrets/base.py +618 -0
- truthound/secrets/cloud.py +682 -0
- truthound/secrets/integration.py +507 -0
- truthound/secrets/manager.py +633 -0
- truthound/secrets/oidc/__init__.py +172 -0
- truthound/secrets/oidc/base.py +902 -0
- truthound/secrets/oidc/credential_provider.py +623 -0
- truthound/secrets/oidc/exchangers.py +1001 -0
- truthound/secrets/oidc/github/__init__.py +110 -0
- truthound/secrets/oidc/github/claims.py +718 -0
- truthound/secrets/oidc/github/enhanced_provider.py +693 -0
- truthound/secrets/oidc/github/trust_policy.py +742 -0
- truthound/secrets/oidc/github/verification.py +723 -0
- truthound/secrets/oidc/github/workflow.py +691 -0
- truthound/secrets/oidc/providers.py +825 -0
- truthound/secrets/providers.py +506 -0
- truthound/secrets/resolver.py +495 -0
- truthound/stores/__init__.py +177 -0
- truthound/stores/backends/__init__.py +18 -0
- truthound/stores/backends/_protocols.py +340 -0
- truthound/stores/backends/azure_blob.py +530 -0
- truthound/stores/backends/concurrent_filesystem.py +915 -0
- truthound/stores/backends/connection_pool.py +1365 -0
- truthound/stores/backends/database.py +743 -0
- truthound/stores/backends/filesystem.py +538 -0
- truthound/stores/backends/gcs.py +399 -0
- truthound/stores/backends/memory.py +354 -0
- truthound/stores/backends/s3.py +434 -0
- truthound/stores/backpressure/__init__.py +84 -0
- truthound/stores/backpressure/base.py +375 -0
- truthound/stores/backpressure/circuit_breaker.py +434 -0
- truthound/stores/backpressure/monitor.py +376 -0
- truthound/stores/backpressure/strategies.py +677 -0
- truthound/stores/base.py +551 -0
- truthound/stores/batching/__init__.py +65 -0
- truthound/stores/batching/base.py +305 -0
- truthound/stores/batching/buffer.py +370 -0
- truthound/stores/batching/store.py +248 -0
- truthound/stores/batching/writer.py +521 -0
- truthound/stores/caching/__init__.py +60 -0
- truthound/stores/caching/backends.py +684 -0
- truthound/stores/caching/base.py +356 -0
- truthound/stores/caching/store.py +305 -0
- truthound/stores/compression/__init__.py +193 -0
- truthound/stores/compression/adaptive.py +694 -0
- truthound/stores/compression/base.py +514 -0
- truthound/stores/compression/pipeline.py +868 -0
- truthound/stores/compression/providers.py +672 -0
- truthound/stores/compression/streaming.py +832 -0
- truthound/stores/concurrency/__init__.py +81 -0
- truthound/stores/concurrency/atomic.py +556 -0
- truthound/stores/concurrency/index.py +775 -0
- truthound/stores/concurrency/locks.py +576 -0
- truthound/stores/concurrency/manager.py +482 -0
- truthound/stores/encryption/__init__.py +297 -0
- truthound/stores/encryption/base.py +952 -0
- truthound/stores/encryption/keys.py +1191 -0
- truthound/stores/encryption/pipeline.py +903 -0
- truthound/stores/encryption/providers.py +953 -0
- truthound/stores/encryption/streaming.py +950 -0
- truthound/stores/expectations.py +227 -0
- truthound/stores/factory.py +246 -0
- truthound/stores/migration/__init__.py +75 -0
- truthound/stores/migration/base.py +480 -0
- truthound/stores/migration/manager.py +347 -0
- truthound/stores/migration/registry.py +382 -0
- truthound/stores/migration/store.py +559 -0
- truthound/stores/observability/__init__.py +106 -0
- truthound/stores/observability/audit.py +718 -0
- truthound/stores/observability/config.py +270 -0
- truthound/stores/observability/factory.py +208 -0
- truthound/stores/observability/metrics.py +636 -0
- truthound/stores/observability/protocols.py +410 -0
- truthound/stores/observability/store.py +570 -0
- truthound/stores/observability/tracing.py +784 -0
- truthound/stores/replication/__init__.py +76 -0
- truthound/stores/replication/base.py +260 -0
- truthound/stores/replication/monitor.py +269 -0
- truthound/stores/replication/store.py +439 -0
- truthound/stores/replication/syncer.py +391 -0
- truthound/stores/results.py +359 -0
- truthound/stores/retention/__init__.py +77 -0
- truthound/stores/retention/base.py +378 -0
- truthound/stores/retention/policies.py +621 -0
- truthound/stores/retention/scheduler.py +279 -0
- truthound/stores/retention/store.py +526 -0
- truthound/stores/streaming/__init__.py +138 -0
- truthound/stores/streaming/base.py +801 -0
- truthound/stores/streaming/database.py +984 -0
- truthound/stores/streaming/filesystem.py +719 -0
- truthound/stores/streaming/reader.py +629 -0
- truthound/stores/streaming/s3.py +843 -0
- truthound/stores/streaming/writer.py +790 -0
- truthound/stores/tiering/__init__.py +108 -0
- truthound/stores/tiering/base.py +462 -0
- truthound/stores/tiering/manager.py +249 -0
- truthound/stores/tiering/policies.py +692 -0
- truthound/stores/tiering/store.py +526 -0
- truthound/stores/versioning/__init__.py +56 -0
- truthound/stores/versioning/base.py +376 -0
- truthound/stores/versioning/store.py +660 -0
- truthound/stores/versioning/strategies.py +353 -0
- truthound/types.py +56 -0
- truthound/validators/__init__.py +774 -0
- truthound/validators/aggregate/__init__.py +27 -0
- truthound/validators/aggregate/central.py +116 -0
- truthound/validators/aggregate/extremes.py +116 -0
- truthound/validators/aggregate/spread.py +118 -0
- truthound/validators/aggregate/sum.py +64 -0
- truthound/validators/aggregate/type.py +78 -0
- truthound/validators/anomaly/__init__.py +93 -0
- truthound/validators/anomaly/base.py +431 -0
- truthound/validators/anomaly/ml_based.py +1190 -0
- truthound/validators/anomaly/multivariate.py +647 -0
- truthound/validators/anomaly/statistical.py +599 -0
- truthound/validators/base.py +1089 -0
- truthound/validators/business_rule/__init__.py +46 -0
- truthound/validators/business_rule/base.py +147 -0
- truthound/validators/business_rule/checksum.py +509 -0
- truthound/validators/business_rule/financial.py +526 -0
- truthound/validators/cache.py +733 -0
- truthound/validators/completeness/__init__.py +39 -0
- truthound/validators/completeness/conditional.py +73 -0
- truthound/validators/completeness/default.py +98 -0
- truthound/validators/completeness/empty.py +103 -0
- truthound/validators/completeness/nan.py +337 -0
- truthound/validators/completeness/null.py +152 -0
- truthound/validators/cross_table/__init__.py +17 -0
- truthound/validators/cross_table/aggregate.py +333 -0
- truthound/validators/cross_table/row_count.py +122 -0
- truthound/validators/datetime/__init__.py +29 -0
- truthound/validators/datetime/format.py +78 -0
- truthound/validators/datetime/freshness.py +269 -0
- truthound/validators/datetime/order.py +73 -0
- truthound/validators/datetime/parseable.py +185 -0
- truthound/validators/datetime/range.py +202 -0
- truthound/validators/datetime/timezone.py +69 -0
- truthound/validators/distribution/__init__.py +49 -0
- truthound/validators/distribution/distribution.py +128 -0
- truthound/validators/distribution/monotonic.py +119 -0
- truthound/validators/distribution/outlier.py +178 -0
- truthound/validators/distribution/quantile.py +80 -0
- truthound/validators/distribution/range.py +254 -0
- truthound/validators/distribution/set.py +125 -0
- truthound/validators/distribution/statistical.py +459 -0
- truthound/validators/drift/__init__.py +79 -0
- truthound/validators/drift/base.py +427 -0
- truthound/validators/drift/multi_feature.py +401 -0
- truthound/validators/drift/numeric.py +395 -0
- truthound/validators/drift/psi.py +446 -0
- truthound/validators/drift/statistical.py +510 -0
- truthound/validators/enterprise.py +1658 -0
- truthound/validators/geospatial/__init__.py +80 -0
- truthound/validators/geospatial/base.py +97 -0
- truthound/validators/geospatial/boundary.py +238 -0
- truthound/validators/geospatial/coordinate.py +351 -0
- truthound/validators/geospatial/distance.py +399 -0
- truthound/validators/geospatial/polygon.py +665 -0
- truthound/validators/i18n/__init__.py +308 -0
- truthound/validators/i18n/bidi.py +571 -0
- truthound/validators/i18n/catalogs.py +570 -0
- truthound/validators/i18n/dialects.py +763 -0
- truthound/validators/i18n/extended_catalogs.py +549 -0
- truthound/validators/i18n/formatting.py +1434 -0
- truthound/validators/i18n/loader.py +1020 -0
- truthound/validators/i18n/messages.py +521 -0
- truthound/validators/i18n/plural.py +683 -0
- truthound/validators/i18n/protocols.py +855 -0
- truthound/validators/i18n/tms.py +1162 -0
- truthound/validators/localization/__init__.py +53 -0
- truthound/validators/localization/base.py +122 -0
- truthound/validators/localization/chinese.py +362 -0
- truthound/validators/localization/japanese.py +275 -0
- truthound/validators/localization/korean.py +524 -0
- truthound/validators/memory/__init__.py +94 -0
- truthound/validators/memory/approximate_knn.py +506 -0
- truthound/validators/memory/base.py +547 -0
- truthound/validators/memory/sgd_online.py +719 -0
- truthound/validators/memory/streaming_ecdf.py +753 -0
- truthound/validators/ml_feature/__init__.py +54 -0
- truthound/validators/ml_feature/base.py +249 -0
- truthound/validators/ml_feature/correlation.py +299 -0
- truthound/validators/ml_feature/leakage.py +344 -0
- truthound/validators/ml_feature/null_impact.py +270 -0
- truthound/validators/ml_feature/scale.py +264 -0
- truthound/validators/multi_column/__init__.py +89 -0
- truthound/validators/multi_column/arithmetic.py +284 -0
- truthound/validators/multi_column/base.py +231 -0
- truthound/validators/multi_column/comparison.py +273 -0
- truthound/validators/multi_column/consistency.py +312 -0
- truthound/validators/multi_column/statistical.py +299 -0
- truthound/validators/optimization/__init__.py +164 -0
- truthound/validators/optimization/aggregation.py +563 -0
- truthound/validators/optimization/covariance.py +556 -0
- truthound/validators/optimization/geo.py +626 -0
- truthound/validators/optimization/graph.py +587 -0
- truthound/validators/optimization/orchestrator.py +970 -0
- truthound/validators/optimization/profiling.py +1312 -0
- truthound/validators/privacy/__init__.py +223 -0
- truthound/validators/privacy/base.py +635 -0
- truthound/validators/privacy/ccpa.py +670 -0
- truthound/validators/privacy/gdpr.py +728 -0
- truthound/validators/privacy/global_patterns.py +604 -0
- truthound/validators/privacy/plugins.py +867 -0
- truthound/validators/profiling/__init__.py +52 -0
- truthound/validators/profiling/base.py +175 -0
- truthound/validators/profiling/cardinality.py +312 -0
- truthound/validators/profiling/entropy.py +391 -0
- truthound/validators/profiling/frequency.py +455 -0
- truthound/validators/pushdown_support.py +660 -0
- truthound/validators/query/__init__.py +91 -0
- truthound/validators/query/aggregate.py +346 -0
- truthound/validators/query/base.py +246 -0
- truthound/validators/query/column.py +249 -0
- truthound/validators/query/expression.py +274 -0
- truthound/validators/query/result.py +323 -0
- truthound/validators/query/row_count.py +264 -0
- truthound/validators/referential/__init__.py +80 -0
- truthound/validators/referential/base.py +395 -0
- truthound/validators/referential/cascade.py +391 -0
- truthound/validators/referential/circular.py +563 -0
- truthound/validators/referential/foreign_key.py +624 -0
- truthound/validators/referential/orphan.py +485 -0
- truthound/validators/registry.py +112 -0
- truthound/validators/schema/__init__.py +41 -0
- truthound/validators/schema/column_count.py +142 -0
- truthound/validators/schema/column_exists.py +80 -0
- truthound/validators/schema/column_order.py +82 -0
- truthound/validators/schema/column_pair.py +85 -0
- truthound/validators/schema/column_pair_set.py +195 -0
- truthound/validators/schema/column_type.py +94 -0
- truthound/validators/schema/multi_column.py +53 -0
- truthound/validators/schema/multi_column_aggregate.py +175 -0
- truthound/validators/schema/referential.py +274 -0
- truthound/validators/schema/table_schema.py +91 -0
- truthound/validators/schema_validator.py +219 -0
- truthound/validators/sdk/__init__.py +250 -0
- truthound/validators/sdk/builder.py +680 -0
- truthound/validators/sdk/decorators.py +474 -0
- truthound/validators/sdk/enterprise/__init__.py +211 -0
- truthound/validators/sdk/enterprise/docs.py +725 -0
- truthound/validators/sdk/enterprise/fuzzing.py +659 -0
- truthound/validators/sdk/enterprise/licensing.py +709 -0
- truthound/validators/sdk/enterprise/manager.py +543 -0
- truthound/validators/sdk/enterprise/resources.py +628 -0
- truthound/validators/sdk/enterprise/sandbox.py +766 -0
- truthound/validators/sdk/enterprise/signing.py +603 -0
- truthound/validators/sdk/enterprise/templates.py +865 -0
- truthound/validators/sdk/enterprise/versioning.py +659 -0
- truthound/validators/sdk/templates.py +757 -0
- truthound/validators/sdk/testing.py +807 -0
- truthound/validators/security/__init__.py +181 -0
- truthound/validators/security/redos/__init__.py +182 -0
- truthound/validators/security/redos/core.py +861 -0
- truthound/validators/security/redos/cpu_monitor.py +593 -0
- truthound/validators/security/redos/cve_database.py +791 -0
- truthound/validators/security/redos/ml/__init__.py +155 -0
- truthound/validators/security/redos/ml/base.py +785 -0
- truthound/validators/security/redos/ml/datasets.py +618 -0
- truthound/validators/security/redos/ml/features.py +359 -0
- truthound/validators/security/redos/ml/models.py +1000 -0
- truthound/validators/security/redos/ml/predictor.py +507 -0
- truthound/validators/security/redos/ml/storage.py +632 -0
- truthound/validators/security/redos/ml/training.py +571 -0
- truthound/validators/security/redos/ml_analyzer.py +937 -0
- truthound/validators/security/redos/optimizer.py +674 -0
- truthound/validators/security/redos/profiler.py +682 -0
- truthound/validators/security/redos/re2_engine.py +709 -0
- truthound/validators/security/redos.py +886 -0
- truthound/validators/security/sql_security.py +1247 -0
- truthound/validators/streaming/__init__.py +126 -0
- truthound/validators/streaming/base.py +292 -0
- truthound/validators/streaming/completeness.py +210 -0
- truthound/validators/streaming/mixin.py +575 -0
- truthound/validators/streaming/range.py +308 -0
- truthound/validators/streaming/sources.py +846 -0
- truthound/validators/string/__init__.py +57 -0
- truthound/validators/string/casing.py +158 -0
- truthound/validators/string/charset.py +96 -0
- truthound/validators/string/format.py +501 -0
- truthound/validators/string/json.py +77 -0
- truthound/validators/string/json_schema.py +184 -0
- truthound/validators/string/length.py +104 -0
- truthound/validators/string/like_pattern.py +237 -0
- truthound/validators/string/regex.py +202 -0
- truthound/validators/string/regex_extended.py +435 -0
- truthound/validators/table/__init__.py +88 -0
- truthound/validators/table/base.py +78 -0
- truthound/validators/table/column_count.py +198 -0
- truthound/validators/table/freshness.py +362 -0
- truthound/validators/table/row_count.py +251 -0
- truthound/validators/table/schema.py +333 -0
- truthound/validators/table/size.py +285 -0
- truthound/validators/timeout/__init__.py +102 -0
- truthound/validators/timeout/advanced/__init__.py +247 -0
- truthound/validators/timeout/advanced/circuit_breaker.py +675 -0
- truthound/validators/timeout/advanced/prediction.py +773 -0
- truthound/validators/timeout/advanced/priority.py +618 -0
- truthound/validators/timeout/advanced/redis_backend.py +770 -0
- truthound/validators/timeout/advanced/retry.py +721 -0
- truthound/validators/timeout/advanced/sampling.py +788 -0
- truthound/validators/timeout/advanced/sla.py +661 -0
- truthound/validators/timeout/advanced/telemetry.py +804 -0
- truthound/validators/timeout/cascade.py +477 -0
- truthound/validators/timeout/deadline.py +657 -0
- truthound/validators/timeout/degradation.py +525 -0
- truthound/validators/timeout/distributed.py +597 -0
- truthound/validators/timeseries/__init__.py +89 -0
- truthound/validators/timeseries/base.py +326 -0
- truthound/validators/timeseries/completeness.py +617 -0
- truthound/validators/timeseries/gap.py +485 -0
- truthound/validators/timeseries/monotonic.py +310 -0
- truthound/validators/timeseries/seasonality.py +422 -0
- truthound/validators/timeseries/trend.py +510 -0
- truthound/validators/uniqueness/__init__.py +59 -0
- truthound/validators/uniqueness/approximate.py +475 -0
- truthound/validators/uniqueness/distinct_values.py +253 -0
- truthound/validators/uniqueness/duplicate.py +118 -0
- truthound/validators/uniqueness/primary_key.py +140 -0
- truthound/validators/uniqueness/unique.py +191 -0
- truthound/validators/uniqueness/within_record.py +599 -0
- truthound/validators/utils.py +756 -0
- truthound-1.0.8.dist-info/METADATA +474 -0
- truthound-1.0.8.dist-info/RECORD +877 -0
- truthound-1.0.8.dist-info/WHEEL +4 -0
- truthound-1.0.8.dist-info/entry_points.txt +2 -0
- truthound-1.0.8.dist-info/licenses/LICENSE +190 -0
|
@@ -0,0 +1,1303 @@
|
|
|
1
|
+
"""Caching layer for data profiling with file hash based caching.
|
|
2
|
+
|
|
3
|
+
This module provides a flexible caching system for profile results:
|
|
4
|
+
- File hash based cache key generation
|
|
5
|
+
- Multiple backend support (memory, file, Redis)
|
|
6
|
+
- TTL-based expiration
|
|
7
|
+
- Lazy evaluation with cache-through pattern
|
|
8
|
+
|
|
9
|
+
Key features:
|
|
10
|
+
- Pluggable backend architecture
|
|
11
|
+
- Content-based cache invalidation
|
|
12
|
+
- Compression support for large profiles
|
|
13
|
+
- Thread-safe operations
|
|
14
|
+
|
|
15
|
+
Example:
|
|
16
|
+
from truthound.profiler.caching import ProfileCache, FileHashCacheKey
|
|
17
|
+
|
|
18
|
+
# Create cache with memory backend
|
|
19
|
+
cache = ProfileCache()
|
|
20
|
+
|
|
21
|
+
# Generate cache key from file
|
|
22
|
+
key = FileHashCacheKey.from_file("data.parquet")
|
|
23
|
+
|
|
24
|
+
# Cache-through pattern
|
|
25
|
+
profile = cache.get_or_compute(key, lambda: expensive_profile())
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
from __future__ import annotations
|
|
29
|
+
|
|
30
|
+
import gzip
|
|
31
|
+
import hashlib
|
|
32
|
+
import json
|
|
33
|
+
import os
|
|
34
|
+
import pickle
|
|
35
|
+
import shutil
|
|
36
|
+
import threading
|
|
37
|
+
import time
|
|
38
|
+
from abc import ABC, abstractmethod
|
|
39
|
+
from dataclasses import dataclass, field
|
|
40
|
+
from datetime import datetime, timedelta
|
|
41
|
+
from enum import Enum
|
|
42
|
+
from pathlib import Path
|
|
43
|
+
from typing import Any, Callable, Generic, Protocol, TypeVar
|
|
44
|
+
|
|
45
|
+
from truthound.profiler.base import TableProfile
|
|
46
|
+
from truthound.profiler.schema import ProfileSerializer
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
# =============================================================================
|
|
50
|
+
# Cache Key Protocol
|
|
51
|
+
# =============================================================================
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
class CacheKeyProtocol(Protocol):
|
|
55
|
+
"""Protocol for cache keys."""
|
|
56
|
+
|
|
57
|
+
def to_string(self) -> str:
|
|
58
|
+
"""Convert key to string representation."""
|
|
59
|
+
...
|
|
60
|
+
|
|
61
|
+
def __hash__(self) -> int:
|
|
62
|
+
...
|
|
63
|
+
|
|
64
|
+
def __eq__(self, other: object) -> bool:
|
|
65
|
+
...
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
@dataclass(frozen=True)
|
|
69
|
+
class CacheKey:
|
|
70
|
+
"""Base cache key implementation."""
|
|
71
|
+
|
|
72
|
+
key: str
|
|
73
|
+
namespace: str = "default"
|
|
74
|
+
version: str = "1"
|
|
75
|
+
|
|
76
|
+
def to_string(self) -> str:
|
|
77
|
+
"""Create unique string representation."""
|
|
78
|
+
return f"{self.namespace}:{self.version}:{self.key}"
|
|
79
|
+
|
|
80
|
+
def __hash__(self) -> int:
|
|
81
|
+
return hash(self.to_string())
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
@dataclass(frozen=True)
|
|
85
|
+
class FileHashCacheKey(CacheKey):
|
|
86
|
+
"""Cache key based on file content hash.
|
|
87
|
+
|
|
88
|
+
Uses SHA-256 to create a content-based cache key that
|
|
89
|
+
automatically invalidates when file contents change.
|
|
90
|
+
|
|
91
|
+
Attributes:
|
|
92
|
+
file_path: Original file path
|
|
93
|
+
file_hash: SHA-256 hash of file contents
|
|
94
|
+
file_size: File size in bytes
|
|
95
|
+
file_mtime: File modification time
|
|
96
|
+
"""
|
|
97
|
+
|
|
98
|
+
file_path: str = ""
|
|
99
|
+
file_hash: str = ""
|
|
100
|
+
file_size: int = 0
|
|
101
|
+
file_mtime: float = 0.0
|
|
102
|
+
|
|
103
|
+
@classmethod
|
|
104
|
+
def from_file(
|
|
105
|
+
cls,
|
|
106
|
+
path: str | Path,
|
|
107
|
+
*,
|
|
108
|
+
namespace: str = "profile",
|
|
109
|
+
version: str = "1",
|
|
110
|
+
quick_hash: bool = False,
|
|
111
|
+
sample_size: int = 1024 * 1024, # 1MB sample for quick hash
|
|
112
|
+
) -> "FileHashCacheKey":
|
|
113
|
+
"""Create cache key from file.
|
|
114
|
+
|
|
115
|
+
Args:
|
|
116
|
+
path: Path to the file
|
|
117
|
+
namespace: Cache namespace
|
|
118
|
+
version: Cache version
|
|
119
|
+
quick_hash: If True, only hash first/last portions for speed
|
|
120
|
+
sample_size: Bytes to sample when using quick hash
|
|
121
|
+
|
|
122
|
+
Returns:
|
|
123
|
+
FileHashCacheKey instance
|
|
124
|
+
"""
|
|
125
|
+
path = Path(path)
|
|
126
|
+
if not path.exists():
|
|
127
|
+
raise FileNotFoundError(f"File not found: {path}")
|
|
128
|
+
|
|
129
|
+
stat = path.stat()
|
|
130
|
+
file_size = stat.st_size
|
|
131
|
+
file_mtime = stat.st_mtime
|
|
132
|
+
|
|
133
|
+
# Calculate hash
|
|
134
|
+
if quick_hash and file_size > sample_size * 2:
|
|
135
|
+
# For large files, hash beginning, end, and size
|
|
136
|
+
file_hash = cls._quick_hash(path, sample_size)
|
|
137
|
+
else:
|
|
138
|
+
file_hash = cls._full_hash(path)
|
|
139
|
+
|
|
140
|
+
return cls(
|
|
141
|
+
key=file_hash,
|
|
142
|
+
namespace=namespace,
|
|
143
|
+
version=version,
|
|
144
|
+
file_path=str(path),
|
|
145
|
+
file_hash=file_hash,
|
|
146
|
+
file_size=file_size,
|
|
147
|
+
file_mtime=file_mtime,
|
|
148
|
+
)
|
|
149
|
+
|
|
150
|
+
@staticmethod
|
|
151
|
+
def _full_hash(path: Path, chunk_size: int = 8192) -> str:
|
|
152
|
+
"""Calculate full file hash."""
|
|
153
|
+
hasher = hashlib.sha256()
|
|
154
|
+
with open(path, "rb") as f:
|
|
155
|
+
while chunk := f.read(chunk_size):
|
|
156
|
+
hasher.update(chunk)
|
|
157
|
+
return hasher.hexdigest()
|
|
158
|
+
|
|
159
|
+
@staticmethod
|
|
160
|
+
def _quick_hash(path: Path, sample_size: int) -> str:
|
|
161
|
+
"""Calculate quick hash from file samples."""
|
|
162
|
+
hasher = hashlib.sha256()
|
|
163
|
+
file_size = path.stat().st_size
|
|
164
|
+
|
|
165
|
+
with open(path, "rb") as f:
|
|
166
|
+
# Hash beginning
|
|
167
|
+
hasher.update(f.read(sample_size))
|
|
168
|
+
|
|
169
|
+
# Hash end
|
|
170
|
+
f.seek(-sample_size, 2)
|
|
171
|
+
hasher.update(f.read(sample_size))
|
|
172
|
+
|
|
173
|
+
# Include size in hash
|
|
174
|
+
hasher.update(str(file_size).encode())
|
|
175
|
+
|
|
176
|
+
return hasher.hexdigest()
|
|
177
|
+
|
|
178
|
+
def to_string(self) -> str:
|
|
179
|
+
"""Create unique string representation including file info."""
|
|
180
|
+
return f"{self.namespace}:{self.version}:{self.file_hash}:{self.file_size}"
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
@dataclass(frozen=True)
|
|
184
|
+
class DataFrameHashCacheKey(CacheKey):
|
|
185
|
+
"""Cache key based on DataFrame content hash.
|
|
186
|
+
|
|
187
|
+
Creates a hash based on DataFrame schema and sample data.
|
|
188
|
+
"""
|
|
189
|
+
|
|
190
|
+
schema_hash: str = ""
|
|
191
|
+
sample_hash: str = ""
|
|
192
|
+
row_count: int = 0
|
|
193
|
+
column_count: int = 0
|
|
194
|
+
|
|
195
|
+
@classmethod
|
|
196
|
+
def from_dataframe(
|
|
197
|
+
cls,
|
|
198
|
+
df: Any, # pl.DataFrame or similar
|
|
199
|
+
*,
|
|
200
|
+
namespace: str = "profile",
|
|
201
|
+
version: str = "1",
|
|
202
|
+
sample_rows: int = 1000,
|
|
203
|
+
) -> "DataFrameHashCacheKey":
|
|
204
|
+
"""Create cache key from DataFrame.
|
|
205
|
+
|
|
206
|
+
Args:
|
|
207
|
+
df: Polars DataFrame
|
|
208
|
+
namespace: Cache namespace
|
|
209
|
+
version: Cache version
|
|
210
|
+
sample_rows: Number of rows to sample for hash
|
|
211
|
+
|
|
212
|
+
Returns:
|
|
213
|
+
DataFrameHashCacheKey instance
|
|
214
|
+
"""
|
|
215
|
+
import polars as pl
|
|
216
|
+
|
|
217
|
+
if not isinstance(df, (pl.DataFrame, pl.LazyFrame)):
|
|
218
|
+
raise TypeError(f"Expected Polars DataFrame, got {type(df)}")
|
|
219
|
+
|
|
220
|
+
if isinstance(df, pl.LazyFrame):
|
|
221
|
+
schema = df.collect_schema()
|
|
222
|
+
sample_df = df.head(sample_rows).collect()
|
|
223
|
+
else:
|
|
224
|
+
schema = df.schema
|
|
225
|
+
sample_df = df.head(sample_rows)
|
|
226
|
+
|
|
227
|
+
# Hash schema
|
|
228
|
+
schema_str = str(sorted(schema.items()))
|
|
229
|
+
schema_hash = hashlib.sha256(schema_str.encode()).hexdigest()[:16]
|
|
230
|
+
|
|
231
|
+
# Hash sample data
|
|
232
|
+
sample_bytes = sample_df.to_pandas().to_csv().encode()
|
|
233
|
+
sample_hash = hashlib.sha256(sample_bytes).hexdigest()[:16]
|
|
234
|
+
|
|
235
|
+
# Combined key
|
|
236
|
+
key = f"{schema_hash}:{sample_hash}:{len(sample_df)}:{len(schema)}"
|
|
237
|
+
|
|
238
|
+
return cls(
|
|
239
|
+
key=key,
|
|
240
|
+
namespace=namespace,
|
|
241
|
+
version=version,
|
|
242
|
+
schema_hash=schema_hash,
|
|
243
|
+
sample_hash=sample_hash,
|
|
244
|
+
row_count=len(sample_df),
|
|
245
|
+
column_count=len(schema),
|
|
246
|
+
)
|
|
247
|
+
|
|
248
|
+
|
|
249
|
+
# =============================================================================
|
|
250
|
+
# Cache Entry
|
|
251
|
+
# =============================================================================
|
|
252
|
+
|
|
253
|
+
|
|
254
|
+
@dataclass
|
|
255
|
+
class CacheEntry:
|
|
256
|
+
"""Cached profile entry with metadata."""
|
|
257
|
+
|
|
258
|
+
profile: TableProfile
|
|
259
|
+
created_at: datetime = field(default_factory=datetime.now)
|
|
260
|
+
expires_at: datetime | None = None
|
|
261
|
+
access_count: int = 0
|
|
262
|
+
last_accessed: datetime = field(default_factory=datetime.now)
|
|
263
|
+
compressed: bool = False
|
|
264
|
+
size_bytes: int = 0
|
|
265
|
+
metadata: dict[str, Any] = field(default_factory=dict)
|
|
266
|
+
|
|
267
|
+
def is_expired(self) -> bool:
|
|
268
|
+
"""Check if entry has expired."""
|
|
269
|
+
if self.expires_at is None:
|
|
270
|
+
return False
|
|
271
|
+
return datetime.now() > self.expires_at
|
|
272
|
+
|
|
273
|
+
def touch(self) -> None:
|
|
274
|
+
"""Update access statistics."""
|
|
275
|
+
self.access_count += 1
|
|
276
|
+
self.last_accessed = datetime.now()
|
|
277
|
+
|
|
278
|
+
def to_dict(self) -> dict[str, Any]:
|
|
279
|
+
"""Serialize to dictionary."""
|
|
280
|
+
serializer = ProfileSerializer()
|
|
281
|
+
return {
|
|
282
|
+
"profile": serializer.serialize(self.profile),
|
|
283
|
+
"created_at": self.created_at.isoformat(),
|
|
284
|
+
"expires_at": self.expires_at.isoformat() if self.expires_at else None,
|
|
285
|
+
"access_count": self.access_count,
|
|
286
|
+
"last_accessed": self.last_accessed.isoformat(),
|
|
287
|
+
"metadata": self.metadata,
|
|
288
|
+
}
|
|
289
|
+
|
|
290
|
+
@classmethod
|
|
291
|
+
def from_dict(cls, data: dict[str, Any]) -> "CacheEntry":
|
|
292
|
+
"""Deserialize from dictionary."""
|
|
293
|
+
serializer = ProfileSerializer()
|
|
294
|
+
profile = serializer.deserialize(data["profile"])
|
|
295
|
+
|
|
296
|
+
expires_at = None
|
|
297
|
+
if data.get("expires_at"):
|
|
298
|
+
expires_at = datetime.fromisoformat(data["expires_at"])
|
|
299
|
+
|
|
300
|
+
return cls(
|
|
301
|
+
profile=profile,
|
|
302
|
+
created_at=datetime.fromisoformat(data["created_at"]),
|
|
303
|
+
expires_at=expires_at,
|
|
304
|
+
access_count=data.get("access_count", 0),
|
|
305
|
+
last_accessed=datetime.fromisoformat(data.get("last_accessed", data["created_at"])),
|
|
306
|
+
metadata=data.get("metadata", {}),
|
|
307
|
+
)
|
|
308
|
+
|
|
309
|
+
|
|
310
|
+
# =============================================================================
|
|
311
|
+
# Cache Backend Protocol
|
|
312
|
+
# =============================================================================
|
|
313
|
+
|
|
314
|
+
|
|
315
|
+
class CacheBackend(ABC):
|
|
316
|
+
"""Abstract base class for cache backends.
|
|
317
|
+
|
|
318
|
+
Implement this to create custom cache backends (Redis, Memcached, etc.)
|
|
319
|
+
"""
|
|
320
|
+
|
|
321
|
+
@abstractmethod
|
|
322
|
+
def get(self, key: str) -> CacheEntry | None:
|
|
323
|
+
"""Retrieve entry from cache.
|
|
324
|
+
|
|
325
|
+
Args:
|
|
326
|
+
key: Cache key string
|
|
327
|
+
|
|
328
|
+
Returns:
|
|
329
|
+
CacheEntry if found, None otherwise
|
|
330
|
+
"""
|
|
331
|
+
pass
|
|
332
|
+
|
|
333
|
+
@abstractmethod
|
|
334
|
+
def set(
|
|
335
|
+
self,
|
|
336
|
+
key: str,
|
|
337
|
+
entry: CacheEntry,
|
|
338
|
+
ttl: timedelta | None = None,
|
|
339
|
+
) -> None:
|
|
340
|
+
"""Store entry in cache.
|
|
341
|
+
|
|
342
|
+
Args:
|
|
343
|
+
key: Cache key string
|
|
344
|
+
entry: Entry to cache
|
|
345
|
+
ttl: Time-to-live for entry
|
|
346
|
+
"""
|
|
347
|
+
pass
|
|
348
|
+
|
|
349
|
+
@abstractmethod
|
|
350
|
+
def delete(self, key: str) -> bool:
|
|
351
|
+
"""Delete entry from cache.
|
|
352
|
+
|
|
353
|
+
Args:
|
|
354
|
+
key: Cache key string
|
|
355
|
+
|
|
356
|
+
Returns:
|
|
357
|
+
True if entry was deleted, False if not found
|
|
358
|
+
"""
|
|
359
|
+
pass
|
|
360
|
+
|
|
361
|
+
@abstractmethod
|
|
362
|
+
def clear(self) -> int:
|
|
363
|
+
"""Clear all entries from cache.
|
|
364
|
+
|
|
365
|
+
Returns:
|
|
366
|
+
Number of entries cleared
|
|
367
|
+
"""
|
|
368
|
+
pass
|
|
369
|
+
|
|
370
|
+
@abstractmethod
|
|
371
|
+
def exists(self, key: str) -> bool:
|
|
372
|
+
"""Check if key exists in cache.
|
|
373
|
+
|
|
374
|
+
Args:
|
|
375
|
+
key: Cache key string
|
|
376
|
+
|
|
377
|
+
Returns:
|
|
378
|
+
True if key exists
|
|
379
|
+
"""
|
|
380
|
+
pass
|
|
381
|
+
|
|
382
|
+
def get_stats(self) -> dict[str, Any]:
|
|
383
|
+
"""Get cache statistics.
|
|
384
|
+
|
|
385
|
+
Returns:
|
|
386
|
+
Dictionary with cache statistics
|
|
387
|
+
"""
|
|
388
|
+
return {}
|
|
389
|
+
|
|
390
|
+
|
|
391
|
+
class MemoryCacheBackend(CacheBackend):
|
|
392
|
+
"""In-memory cache backend with LRU eviction.
|
|
393
|
+
|
|
394
|
+
Thread-safe implementation using locks.
|
|
395
|
+
|
|
396
|
+
Attributes:
|
|
397
|
+
max_size: Maximum number of entries
|
|
398
|
+
max_memory_bytes: Maximum memory usage in bytes (0 = unlimited)
|
|
399
|
+
"""
|
|
400
|
+
|
|
401
|
+
def __init__(
|
|
402
|
+
self,
|
|
403
|
+
*,
|
|
404
|
+
max_size: int = 1000,
|
|
405
|
+
max_memory_bytes: int = 0,
|
|
406
|
+
):
|
|
407
|
+
self.max_size = max_size
|
|
408
|
+
self.max_memory_bytes = max_memory_bytes
|
|
409
|
+
self._cache: dict[str, CacheEntry] = {}
|
|
410
|
+
self._lock = threading.RLock()
|
|
411
|
+
self._hits = 0
|
|
412
|
+
self._misses = 0
|
|
413
|
+
|
|
414
|
+
def get(self, key: str) -> CacheEntry | None:
|
|
415
|
+
with self._lock:
|
|
416
|
+
entry = self._cache.get(key)
|
|
417
|
+
if entry is None:
|
|
418
|
+
self._misses += 1
|
|
419
|
+
return None
|
|
420
|
+
|
|
421
|
+
if entry.is_expired():
|
|
422
|
+
del self._cache[key]
|
|
423
|
+
self._misses += 1
|
|
424
|
+
return None
|
|
425
|
+
|
|
426
|
+
entry.touch()
|
|
427
|
+
self._hits += 1
|
|
428
|
+
return entry
|
|
429
|
+
|
|
430
|
+
def set(
|
|
431
|
+
self,
|
|
432
|
+
key: str,
|
|
433
|
+
entry: CacheEntry,
|
|
434
|
+
ttl: timedelta | None = None,
|
|
435
|
+
) -> None:
|
|
436
|
+
with self._lock:
|
|
437
|
+
if ttl:
|
|
438
|
+
entry.expires_at = datetime.now() + ttl
|
|
439
|
+
|
|
440
|
+
self._cache[key] = entry
|
|
441
|
+
|
|
442
|
+
# Evict if over size
|
|
443
|
+
if len(self._cache) > self.max_size:
|
|
444
|
+
self._evict_lru()
|
|
445
|
+
|
|
446
|
+
def delete(self, key: str) -> bool:
|
|
447
|
+
with self._lock:
|
|
448
|
+
if key in self._cache:
|
|
449
|
+
del self._cache[key]
|
|
450
|
+
return True
|
|
451
|
+
return False
|
|
452
|
+
|
|
453
|
+
def clear(self) -> int:
|
|
454
|
+
with self._lock:
|
|
455
|
+
count = len(self._cache)
|
|
456
|
+
self._cache.clear()
|
|
457
|
+
return count
|
|
458
|
+
|
|
459
|
+
def exists(self, key: str) -> bool:
|
|
460
|
+
with self._lock:
|
|
461
|
+
entry = self._cache.get(key)
|
|
462
|
+
if entry is None:
|
|
463
|
+
return False
|
|
464
|
+
if entry.is_expired():
|
|
465
|
+
del self._cache[key]
|
|
466
|
+
return False
|
|
467
|
+
return True
|
|
468
|
+
|
|
469
|
+
def get_stats(self) -> dict[str, Any]:
|
|
470
|
+
with self._lock:
|
|
471
|
+
total = self._hits + self._misses
|
|
472
|
+
return {
|
|
473
|
+
"type": "memory",
|
|
474
|
+
"size": len(self._cache),
|
|
475
|
+
"max_size": self.max_size,
|
|
476
|
+
"hits": self._hits,
|
|
477
|
+
"misses": self._misses,
|
|
478
|
+
"hit_ratio": self._hits / total if total > 0 else 0.0,
|
|
479
|
+
}
|
|
480
|
+
|
|
481
|
+
def _evict_lru(self) -> None:
|
|
482
|
+
"""Evict least recently used entries."""
|
|
483
|
+
if not self._cache:
|
|
484
|
+
return
|
|
485
|
+
|
|
486
|
+
# Find LRU entry
|
|
487
|
+
lru_key = min(
|
|
488
|
+
self._cache.keys(),
|
|
489
|
+
key=lambda k: self._cache[k].last_accessed,
|
|
490
|
+
)
|
|
491
|
+
del self._cache[lru_key]
|
|
492
|
+
|
|
493
|
+
|
|
494
|
+
class FileCacheBackend(CacheBackend):
|
|
495
|
+
"""File-based cache backend with optional compression.
|
|
496
|
+
|
|
497
|
+
Stores cached profiles as JSON files with gzip compression.
|
|
498
|
+
|
|
499
|
+
Attributes:
|
|
500
|
+
cache_dir: Directory for cache files
|
|
501
|
+
compress: Whether to compress cache files
|
|
502
|
+
"""
|
|
503
|
+
|
|
504
|
+
def __init__(
|
|
505
|
+
self,
|
|
506
|
+
cache_dir: str | Path = ".truthound_cache",
|
|
507
|
+
*,
|
|
508
|
+
compress: bool = True,
|
|
509
|
+
max_size_mb: int = 1000,
|
|
510
|
+
):
|
|
511
|
+
self.cache_dir = Path(cache_dir)
|
|
512
|
+
self.compress = compress
|
|
513
|
+
self.max_size_mb = max_size_mb
|
|
514
|
+
self._lock = threading.RLock()
|
|
515
|
+
self._hits = 0
|
|
516
|
+
self._misses = 0
|
|
517
|
+
|
|
518
|
+
# Ensure cache directory exists
|
|
519
|
+
self.cache_dir.mkdir(parents=True, exist_ok=True)
|
|
520
|
+
|
|
521
|
+
def _get_path(self, key: str) -> Path:
|
|
522
|
+
"""Get file path for cache key."""
|
|
523
|
+
# Use hash to avoid filesystem issues with long keys
|
|
524
|
+
key_hash = hashlib.sha256(key.encode()).hexdigest()
|
|
525
|
+
suffix = ".json.gz" if self.compress else ".json"
|
|
526
|
+
return self.cache_dir / f"{key_hash}{suffix}"
|
|
527
|
+
|
|
528
|
+
def get(self, key: str) -> CacheEntry | None:
|
|
529
|
+
path = self._get_path(key)
|
|
530
|
+
|
|
531
|
+
with self._lock:
|
|
532
|
+
if not path.exists():
|
|
533
|
+
self._misses += 1
|
|
534
|
+
return None
|
|
535
|
+
|
|
536
|
+
try:
|
|
537
|
+
if self.compress:
|
|
538
|
+
with gzip.open(path, "rt", encoding="utf-8") as f:
|
|
539
|
+
data = json.load(f)
|
|
540
|
+
else:
|
|
541
|
+
with open(path, "r", encoding="utf-8") as f:
|
|
542
|
+
data = json.load(f)
|
|
543
|
+
|
|
544
|
+
entry = CacheEntry.from_dict(data)
|
|
545
|
+
|
|
546
|
+
if entry.is_expired():
|
|
547
|
+
path.unlink(missing_ok=True)
|
|
548
|
+
self._misses += 1
|
|
549
|
+
return None
|
|
550
|
+
|
|
551
|
+
entry.touch()
|
|
552
|
+
self._hits += 1
|
|
553
|
+
|
|
554
|
+
# Update file with new access stats
|
|
555
|
+
self._save_entry(path, entry)
|
|
556
|
+
|
|
557
|
+
return entry
|
|
558
|
+
|
|
559
|
+
except (json.JSONDecodeError, KeyError, OSError):
|
|
560
|
+
path.unlink(missing_ok=True)
|
|
561
|
+
self._misses += 1
|
|
562
|
+
return None
|
|
563
|
+
|
|
564
|
+
def set(
|
|
565
|
+
self,
|
|
566
|
+
key: str,
|
|
567
|
+
entry: CacheEntry,
|
|
568
|
+
ttl: timedelta | None = None,
|
|
569
|
+
) -> None:
|
|
570
|
+
if ttl:
|
|
571
|
+
entry.expires_at = datetime.now() + ttl
|
|
572
|
+
|
|
573
|
+
path = self._get_path(key)
|
|
574
|
+
|
|
575
|
+
with self._lock:
|
|
576
|
+
self._save_entry(path, entry)
|
|
577
|
+
|
|
578
|
+
# Check cache size and cleanup if needed
|
|
579
|
+
self._maybe_cleanup()
|
|
580
|
+
|
|
581
|
+
def _save_entry(self, path: Path, entry: CacheEntry) -> None:
|
|
582
|
+
"""Save entry to file."""
|
|
583
|
+
data = entry.to_dict()
|
|
584
|
+
|
|
585
|
+
if self.compress:
|
|
586
|
+
with gzip.open(path, "wt", encoding="utf-8") as f:
|
|
587
|
+
json.dump(data, f)
|
|
588
|
+
else:
|
|
589
|
+
with open(path, "w", encoding="utf-8") as f:
|
|
590
|
+
json.dump(data, f)
|
|
591
|
+
|
|
592
|
+
def delete(self, key: str) -> bool:
|
|
593
|
+
path = self._get_path(key)
|
|
594
|
+
|
|
595
|
+
with self._lock:
|
|
596
|
+
if path.exists():
|
|
597
|
+
path.unlink()
|
|
598
|
+
return True
|
|
599
|
+
return False
|
|
600
|
+
|
|
601
|
+
def clear(self) -> int:
|
|
602
|
+
with self._lock:
|
|
603
|
+
count = 0
|
|
604
|
+
for path in self.cache_dir.glob("*.json*"):
|
|
605
|
+
path.unlink()
|
|
606
|
+
count += 1
|
|
607
|
+
return count
|
|
608
|
+
|
|
609
|
+
def exists(self, key: str) -> bool:
|
|
610
|
+
path = self._get_path(key)
|
|
611
|
+
return path.exists()
|
|
612
|
+
|
|
613
|
+
def get_stats(self) -> dict[str, Any]:
|
|
614
|
+
with self._lock:
|
|
615
|
+
cache_files = list(self.cache_dir.glob("*.json*"))
|
|
616
|
+
total_size = sum(f.stat().st_size for f in cache_files)
|
|
617
|
+
total = self._hits + self._misses
|
|
618
|
+
|
|
619
|
+
return {
|
|
620
|
+
"type": "file",
|
|
621
|
+
"cache_dir": str(self.cache_dir),
|
|
622
|
+
"file_count": len(cache_files),
|
|
623
|
+
"total_size_mb": total_size / (1024 * 1024),
|
|
624
|
+
"hits": self._hits,
|
|
625
|
+
"misses": self._misses,
|
|
626
|
+
"hit_ratio": self._hits / total if total > 0 else 0.0,
|
|
627
|
+
}
|
|
628
|
+
|
|
629
|
+
def _maybe_cleanup(self) -> None:
|
|
630
|
+
"""Clean up cache if over size limit."""
|
|
631
|
+
cache_files = list(self.cache_dir.glob("*.json*"))
|
|
632
|
+
total_size = sum(f.stat().st_size for f in cache_files)
|
|
633
|
+
max_bytes = self.max_size_mb * 1024 * 1024
|
|
634
|
+
|
|
635
|
+
if total_size <= max_bytes:
|
|
636
|
+
return
|
|
637
|
+
|
|
638
|
+
# Sort by modification time, delete oldest
|
|
639
|
+
cache_files.sort(key=lambda f: f.stat().st_mtime)
|
|
640
|
+
|
|
641
|
+
for path in cache_files:
|
|
642
|
+
if total_size <= max_bytes * 0.8: # Clean to 80%
|
|
643
|
+
break
|
|
644
|
+
size = path.stat().st_size
|
|
645
|
+
path.unlink()
|
|
646
|
+
total_size -= size
|
|
647
|
+
|
|
648
|
+
|
|
649
|
+
class RedisConnectionError(Exception):
|
|
650
|
+
"""Raised when Redis connection fails."""
|
|
651
|
+
|
|
652
|
+
pass
|
|
653
|
+
|
|
654
|
+
|
|
655
|
+
class RedisCacheBackend(CacheBackend):
|
|
656
|
+
"""Redis-based cache backend for distributed caching.
|
|
657
|
+
|
|
658
|
+
Requires redis package to be installed. Includes proper error
|
|
659
|
+
handling for connection failures and timeouts.
|
|
660
|
+
|
|
661
|
+
For production use with automatic fallback, consider using
|
|
662
|
+
`ResilientCacheBackend` from `truthound.profiler.resilience`.
|
|
663
|
+
|
|
664
|
+
Example:
|
|
665
|
+
backend = RedisCacheBackend(
|
|
666
|
+
host="localhost",
|
|
667
|
+
port=6379,
|
|
668
|
+
prefix="truthound:cache:",
|
|
669
|
+
connect_timeout=5.0,
|
|
670
|
+
socket_timeout=2.0,
|
|
671
|
+
)
|
|
672
|
+
|
|
673
|
+
Attributes:
|
|
674
|
+
host: Redis server hostname
|
|
675
|
+
port: Redis server port
|
|
676
|
+
prefix: Key prefix for namespace isolation
|
|
677
|
+
connection_info: Connection details for diagnostics
|
|
678
|
+
"""
|
|
679
|
+
|
|
680
|
+
def __init__(
|
|
681
|
+
self,
|
|
682
|
+
host: str = "localhost",
|
|
683
|
+
port: int = 6379,
|
|
684
|
+
db: int = 0,
|
|
685
|
+
password: str | None = None,
|
|
686
|
+
prefix: str = "truthound:cache:",
|
|
687
|
+
connect_timeout: float = 5.0,
|
|
688
|
+
socket_timeout: float = 2.0,
|
|
689
|
+
retry_on_timeout: bool = True,
|
|
690
|
+
max_connections: int = 10,
|
|
691
|
+
health_check_interval: int = 30,
|
|
692
|
+
lazy_connect: bool = True,
|
|
693
|
+
**kwargs: Any,
|
|
694
|
+
):
|
|
695
|
+
self.host = host
|
|
696
|
+
self.port = port
|
|
697
|
+
self.prefix = prefix
|
|
698
|
+
self._hits = 0
|
|
699
|
+
self._misses = 0
|
|
700
|
+
self._errors = 0
|
|
701
|
+
self._lock = threading.RLock()
|
|
702
|
+
self._connected = False
|
|
703
|
+
self._last_error: str | None = None
|
|
704
|
+
self._last_error_time: datetime | None = None
|
|
705
|
+
|
|
706
|
+
try:
|
|
707
|
+
import redis
|
|
708
|
+
from redis.exceptions import RedisError
|
|
709
|
+
self._redis_module = redis
|
|
710
|
+
self._RedisError = RedisError
|
|
711
|
+
except ImportError:
|
|
712
|
+
raise ImportError(
|
|
713
|
+
"Redis support requires the 'redis' package. "
|
|
714
|
+
"Install with: pip install redis"
|
|
715
|
+
)
|
|
716
|
+
|
|
717
|
+
# Create connection pool with timeout settings
|
|
718
|
+
try:
|
|
719
|
+
self._pool = redis.ConnectionPool(
|
|
720
|
+
host=host,
|
|
721
|
+
port=port,
|
|
722
|
+
db=db,
|
|
723
|
+
password=password,
|
|
724
|
+
decode_responses=False,
|
|
725
|
+
socket_connect_timeout=connect_timeout,
|
|
726
|
+
socket_timeout=socket_timeout,
|
|
727
|
+
retry_on_timeout=retry_on_timeout,
|
|
728
|
+
max_connections=max_connections,
|
|
729
|
+
health_check_interval=health_check_interval,
|
|
730
|
+
**kwargs,
|
|
731
|
+
)
|
|
732
|
+
self._client = redis.Redis(connection_pool=self._pool)
|
|
733
|
+
|
|
734
|
+
# Test connection unless lazy
|
|
735
|
+
if not lazy_connect:
|
|
736
|
+
self._client.ping()
|
|
737
|
+
self._connected = True
|
|
738
|
+
|
|
739
|
+
except Exception as e:
|
|
740
|
+
self._connected = False
|
|
741
|
+
self._last_error = str(e)
|
|
742
|
+
self._last_error_time = datetime.now()
|
|
743
|
+
if not lazy_connect:
|
|
744
|
+
raise RedisConnectionError(
|
|
745
|
+
f"Failed to connect to Redis at {host}:{port}: {e}"
|
|
746
|
+
) from e
|
|
747
|
+
|
|
748
|
+
@property
|
|
749
|
+
def connection_info(self) -> dict[str, Any]:
|
|
750
|
+
"""Get connection information."""
|
|
751
|
+
return {
|
|
752
|
+
"host": self.host,
|
|
753
|
+
"port": self.port,
|
|
754
|
+
"prefix": self.prefix,
|
|
755
|
+
"connected": self._connected,
|
|
756
|
+
"last_error": self._last_error,
|
|
757
|
+
"last_error_time": (
|
|
758
|
+
self._last_error_time.isoformat()
|
|
759
|
+
if self._last_error_time else None
|
|
760
|
+
),
|
|
761
|
+
}
|
|
762
|
+
|
|
763
|
+
def _make_key(self, key: str) -> str:
|
|
764
|
+
"""Create Redis key with prefix."""
|
|
765
|
+
return f"{self.prefix}{key}"
|
|
766
|
+
|
|
767
|
+
def _handle_error(self, e: Exception, operation: str) -> None:
|
|
768
|
+
"""Handle and record errors."""
|
|
769
|
+
with self._lock:
|
|
770
|
+
self._errors += 1
|
|
771
|
+
self._last_error = f"{operation}: {e}"
|
|
772
|
+
self._last_error_time = datetime.now()
|
|
773
|
+
|
|
774
|
+
# Check if it's a connection error
|
|
775
|
+
if "Connection" in str(type(e).__name__) or "Timeout" in str(type(e).__name__):
|
|
776
|
+
self._connected = False
|
|
777
|
+
|
|
778
|
+
def ping(self) -> bool:
|
|
779
|
+
"""Check if Redis is reachable.
|
|
780
|
+
|
|
781
|
+
Returns:
|
|
782
|
+
True if Redis responds to ping
|
|
783
|
+
"""
|
|
784
|
+
try:
|
|
785
|
+
self._client.ping()
|
|
786
|
+
self._connected = True
|
|
787
|
+
return True
|
|
788
|
+
except Exception as e:
|
|
789
|
+
self._handle_error(e, "ping")
|
|
790
|
+
return False
|
|
791
|
+
|
|
792
|
+
def get(self, key: str) -> CacheEntry | None:
|
|
793
|
+
redis_key = self._make_key(key)
|
|
794
|
+
|
|
795
|
+
try:
|
|
796
|
+
data = self._client.get(redis_key)
|
|
797
|
+
self._connected = True
|
|
798
|
+
except self._RedisError as e:
|
|
799
|
+
self._handle_error(e, "get")
|
|
800
|
+
raise RedisConnectionError(f"Redis get failed: {e}") from e
|
|
801
|
+
|
|
802
|
+
if data is None:
|
|
803
|
+
with self._lock:
|
|
804
|
+
self._misses += 1
|
|
805
|
+
return None
|
|
806
|
+
|
|
807
|
+
try:
|
|
808
|
+
entry_dict = json.loads(data.decode("utf-8"))
|
|
809
|
+
entry = CacheEntry.from_dict(entry_dict)
|
|
810
|
+
|
|
811
|
+
if entry.is_expired():
|
|
812
|
+
try:
|
|
813
|
+
self._client.delete(redis_key)
|
|
814
|
+
except self._RedisError:
|
|
815
|
+
pass # Ignore delete errors for expired entries
|
|
816
|
+
with self._lock:
|
|
817
|
+
self._misses += 1
|
|
818
|
+
return None
|
|
819
|
+
|
|
820
|
+
entry.touch()
|
|
821
|
+
with self._lock:
|
|
822
|
+
self._hits += 1
|
|
823
|
+
|
|
824
|
+
return entry
|
|
825
|
+
|
|
826
|
+
except (json.JSONDecodeError, KeyError, TypeError) as e:
|
|
827
|
+
# Corrupted entry - try to delete
|
|
828
|
+
try:
|
|
829
|
+
self._client.delete(redis_key)
|
|
830
|
+
except self._RedisError:
|
|
831
|
+
pass
|
|
832
|
+
with self._lock:
|
|
833
|
+
self._misses += 1
|
|
834
|
+
return None
|
|
835
|
+
|
|
836
|
+
def set(
|
|
837
|
+
self,
|
|
838
|
+
key: str,
|
|
839
|
+
entry: CacheEntry,
|
|
840
|
+
ttl: timedelta | None = None,
|
|
841
|
+
) -> None:
|
|
842
|
+
if ttl:
|
|
843
|
+
entry.expires_at = datetime.now() + ttl
|
|
844
|
+
|
|
845
|
+
redis_key = self._make_key(key)
|
|
846
|
+
|
|
847
|
+
try:
|
|
848
|
+
data = json.dumps(entry.to_dict()).encode("utf-8")
|
|
849
|
+
except (TypeError, ValueError) as e:
|
|
850
|
+
raise ValueError(f"Failed to serialize cache entry: {e}") from e
|
|
851
|
+
|
|
852
|
+
try:
|
|
853
|
+
if ttl:
|
|
854
|
+
self._client.setex(redis_key, ttl, data)
|
|
855
|
+
else:
|
|
856
|
+
self._client.set(redis_key, data)
|
|
857
|
+
self._connected = True
|
|
858
|
+
except self._RedisError as e:
|
|
859
|
+
self._handle_error(e, "set")
|
|
860
|
+
raise RedisConnectionError(f"Redis set failed: {e}") from e
|
|
861
|
+
|
|
862
|
+
def delete(self, key: str) -> bool:
|
|
863
|
+
redis_key = self._make_key(key)
|
|
864
|
+
try:
|
|
865
|
+
result = self._client.delete(redis_key) > 0
|
|
866
|
+
self._connected = True
|
|
867
|
+
return result
|
|
868
|
+
except self._RedisError as e:
|
|
869
|
+
self._handle_error(e, "delete")
|
|
870
|
+
raise RedisConnectionError(f"Redis delete failed: {e}") from e
|
|
871
|
+
|
|
872
|
+
def clear(self) -> int:
|
|
873
|
+
pattern = f"{self.prefix}*"
|
|
874
|
+
try:
|
|
875
|
+
keys = self._client.keys(pattern)
|
|
876
|
+
if keys:
|
|
877
|
+
result = self._client.delete(*keys)
|
|
878
|
+
self._connected = True
|
|
879
|
+
return result
|
|
880
|
+
return 0
|
|
881
|
+
except self._RedisError as e:
|
|
882
|
+
self._handle_error(e, "clear")
|
|
883
|
+
raise RedisConnectionError(f"Redis clear failed: {e}") from e
|
|
884
|
+
|
|
885
|
+
def exists(self, key: str) -> bool:
|
|
886
|
+
redis_key = self._make_key(key)
|
|
887
|
+
try:
|
|
888
|
+
result = self._client.exists(redis_key) > 0
|
|
889
|
+
self._connected = True
|
|
890
|
+
return result
|
|
891
|
+
except self._RedisError as e:
|
|
892
|
+
self._handle_error(e, "exists")
|
|
893
|
+
raise RedisConnectionError(f"Redis exists failed: {e}") from e
|
|
894
|
+
|
|
895
|
+
def get_stats(self) -> dict[str, Any]:
|
|
896
|
+
with self._lock:
|
|
897
|
+
total = self._hits + self._misses
|
|
898
|
+
|
|
899
|
+
stats = {
|
|
900
|
+
"type": "redis",
|
|
901
|
+
"host": self.host,
|
|
902
|
+
"port": self.port,
|
|
903
|
+
"prefix": self.prefix,
|
|
904
|
+
"connected": self._connected,
|
|
905
|
+
"hits": self._hits,
|
|
906
|
+
"misses": self._misses,
|
|
907
|
+
"errors": self._errors,
|
|
908
|
+
"hit_ratio": self._hits / total if total > 0 else 0.0,
|
|
909
|
+
}
|
|
910
|
+
|
|
911
|
+
# Try to get key count
|
|
912
|
+
try:
|
|
913
|
+
pattern = f"{self.prefix}*"
|
|
914
|
+
keys = self._client.keys(pattern)
|
|
915
|
+
stats["key_count"] = len(keys)
|
|
916
|
+
except self._RedisError:
|
|
917
|
+
stats["key_count"] = -1
|
|
918
|
+
|
|
919
|
+
if self._last_error:
|
|
920
|
+
stats["last_error"] = self._last_error
|
|
921
|
+
stats["last_error_time"] = (
|
|
922
|
+
self._last_error_time.isoformat()
|
|
923
|
+
if self._last_error_time else None
|
|
924
|
+
)
|
|
925
|
+
|
|
926
|
+
return stats
|
|
927
|
+
|
|
928
|
+
def close(self) -> None:
|
|
929
|
+
"""Close the connection pool."""
|
|
930
|
+
try:
|
|
931
|
+
self._pool.disconnect()
|
|
932
|
+
self._connected = False
|
|
933
|
+
except Exception:
|
|
934
|
+
pass
|
|
935
|
+
|
|
936
|
+
|
|
937
|
+
# =============================================================================
|
|
938
|
+
# Cache Backend Registry
|
|
939
|
+
# =============================================================================
|
|
940
|
+
|
|
941
|
+
|
|
942
|
+
class CacheBackendRegistry:
|
|
943
|
+
"""Registry for cache backend factories.
|
|
944
|
+
|
|
945
|
+
Allows registration of custom backend types.
|
|
946
|
+
|
|
947
|
+
Example:
|
|
948
|
+
registry = CacheBackendRegistry()
|
|
949
|
+
registry.register("custom", CustomBackend)
|
|
950
|
+
backend = registry.create("custom", **kwargs)
|
|
951
|
+
"""
|
|
952
|
+
|
|
953
|
+
def __init__(self) -> None:
|
|
954
|
+
self._backends: dict[str, type[CacheBackend]] = {}
|
|
955
|
+
|
|
956
|
+
def register(
|
|
957
|
+
self,
|
|
958
|
+
name: str,
|
|
959
|
+
backend_class: type[CacheBackend],
|
|
960
|
+
) -> None:
|
|
961
|
+
"""Register a backend class."""
|
|
962
|
+
self._backends[name] = backend_class
|
|
963
|
+
|
|
964
|
+
def create(self, name: str, **kwargs: Any) -> CacheBackend:
|
|
965
|
+
"""Create a backend instance."""
|
|
966
|
+
if name not in self._backends:
|
|
967
|
+
raise KeyError(
|
|
968
|
+
f"Unknown cache backend: {name}. "
|
|
969
|
+
f"Available: {list(self._backends.keys())}"
|
|
970
|
+
)
|
|
971
|
+
return self._backends[name](**kwargs)
|
|
972
|
+
|
|
973
|
+
def list_backends(self) -> list[str]:
|
|
974
|
+
"""List registered backend names."""
|
|
975
|
+
return list(self._backends.keys())
|
|
976
|
+
|
|
977
|
+
|
|
978
|
+
# Global registry with default backends
|
|
979
|
+
cache_backend_registry = CacheBackendRegistry()
|
|
980
|
+
cache_backend_registry.register("memory", MemoryCacheBackend)
|
|
981
|
+
cache_backend_registry.register("file", FileCacheBackend)
|
|
982
|
+
cache_backend_registry.register("redis", RedisCacheBackend)
|
|
983
|
+
|
|
984
|
+
|
|
985
|
+
# =============================================================================
|
|
986
|
+
# Profile Cache
|
|
987
|
+
# =============================================================================
|
|
988
|
+
|
|
989
|
+
|
|
990
|
+
@dataclass
|
|
991
|
+
class CacheConfig:
|
|
992
|
+
"""Configuration for profile caching."""
|
|
993
|
+
|
|
994
|
+
backend: str = "memory"
|
|
995
|
+
backend_options: dict[str, Any] = field(default_factory=dict)
|
|
996
|
+
default_ttl: timedelta | None = None
|
|
997
|
+
enabled: bool = True
|
|
998
|
+
compression: bool = True
|
|
999
|
+
|
|
1000
|
+
|
|
1001
|
+
class ProfileCache:
|
|
1002
|
+
"""High-level profile caching with cache-through pattern.
|
|
1003
|
+
|
|
1004
|
+
This is the main interface for caching profile results.
|
|
1005
|
+
It wraps a cache backend and provides convenience methods.
|
|
1006
|
+
|
|
1007
|
+
Example:
|
|
1008
|
+
# Create cache with default memory backend
|
|
1009
|
+
cache = ProfileCache()
|
|
1010
|
+
|
|
1011
|
+
# Or with file backend
|
|
1012
|
+
cache = ProfileCache(
|
|
1013
|
+
backend="file",
|
|
1014
|
+
backend_options={"cache_dir": ".cache"}
|
|
1015
|
+
)
|
|
1016
|
+
|
|
1017
|
+
# Cache-through pattern
|
|
1018
|
+
key = FileHashCacheKey.from_file("data.parquet")
|
|
1019
|
+
profile = cache.get_or_compute(
|
|
1020
|
+
key,
|
|
1021
|
+
compute_fn=lambda: profile_file("data.parquet")
|
|
1022
|
+
)
|
|
1023
|
+
"""
|
|
1024
|
+
|
|
1025
|
+
def __init__(
|
|
1026
|
+
self,
|
|
1027
|
+
backend: str | CacheBackend = "memory",
|
|
1028
|
+
backend_options: dict[str, Any] | None = None,
|
|
1029
|
+
default_ttl: timedelta | None = None,
|
|
1030
|
+
enabled: bool = True,
|
|
1031
|
+
):
|
|
1032
|
+
"""Initialize profile cache.
|
|
1033
|
+
|
|
1034
|
+
Args:
|
|
1035
|
+
backend: Backend name or instance
|
|
1036
|
+
backend_options: Options for backend construction
|
|
1037
|
+
default_ttl: Default time-to-live for entries
|
|
1038
|
+
enabled: Whether caching is enabled
|
|
1039
|
+
"""
|
|
1040
|
+
self.enabled = enabled
|
|
1041
|
+
self.default_ttl = default_ttl
|
|
1042
|
+
|
|
1043
|
+
if isinstance(backend, CacheBackend):
|
|
1044
|
+
self._backend = backend
|
|
1045
|
+
else:
|
|
1046
|
+
options = backend_options or {}
|
|
1047
|
+
self._backend = cache_backend_registry.create(backend, **options)
|
|
1048
|
+
|
|
1049
|
+
@property
|
|
1050
|
+
def backend(self) -> CacheBackend:
|
|
1051
|
+
"""Access the underlying backend."""
|
|
1052
|
+
return self._backend
|
|
1053
|
+
|
|
1054
|
+
def get(self, key: CacheKeyProtocol) -> TableProfile | None:
|
|
1055
|
+
"""Get profile from cache.
|
|
1056
|
+
|
|
1057
|
+
Args:
|
|
1058
|
+
key: Cache key
|
|
1059
|
+
|
|
1060
|
+
Returns:
|
|
1061
|
+
Cached profile or None
|
|
1062
|
+
"""
|
|
1063
|
+
if not self.enabled:
|
|
1064
|
+
return None
|
|
1065
|
+
|
|
1066
|
+
entry = self._backend.get(key.to_string())
|
|
1067
|
+
return entry.profile if entry else None
|
|
1068
|
+
|
|
1069
|
+
def set(
|
|
1070
|
+
self,
|
|
1071
|
+
key: CacheKeyProtocol,
|
|
1072
|
+
profile: TableProfile,
|
|
1073
|
+
ttl: timedelta | None = None,
|
|
1074
|
+
metadata: dict[str, Any] | None = None,
|
|
1075
|
+
) -> None:
|
|
1076
|
+
"""Store profile in cache.
|
|
1077
|
+
|
|
1078
|
+
Args:
|
|
1079
|
+
key: Cache key
|
|
1080
|
+
profile: Profile to cache
|
|
1081
|
+
ttl: Time-to-live (uses default if not specified)
|
|
1082
|
+
metadata: Additional metadata to store
|
|
1083
|
+
"""
|
|
1084
|
+
if not self.enabled:
|
|
1085
|
+
return
|
|
1086
|
+
|
|
1087
|
+
entry = CacheEntry(
|
|
1088
|
+
profile=profile,
|
|
1089
|
+
metadata=metadata or {},
|
|
1090
|
+
)
|
|
1091
|
+
|
|
1092
|
+
self._backend.set(
|
|
1093
|
+
key.to_string(),
|
|
1094
|
+
entry,
|
|
1095
|
+
ttl=ttl or self.default_ttl,
|
|
1096
|
+
)
|
|
1097
|
+
|
|
1098
|
+
def get_or_compute(
|
|
1099
|
+
self,
|
|
1100
|
+
key: CacheKeyProtocol,
|
|
1101
|
+
compute_fn: Callable[[], TableProfile],
|
|
1102
|
+
ttl: timedelta | None = None,
|
|
1103
|
+
metadata: dict[str, Any] | None = None,
|
|
1104
|
+
) -> TableProfile:
|
|
1105
|
+
"""Get from cache or compute and cache.
|
|
1106
|
+
|
|
1107
|
+
This implements the cache-through pattern:
|
|
1108
|
+
1. Try to get from cache
|
|
1109
|
+
2. If miss, compute the profile
|
|
1110
|
+
3. Store in cache
|
|
1111
|
+
4. Return the profile
|
|
1112
|
+
|
|
1113
|
+
Args:
|
|
1114
|
+
key: Cache key
|
|
1115
|
+
compute_fn: Function to compute profile on cache miss
|
|
1116
|
+
ttl: Time-to-live for cached entry
|
|
1117
|
+
metadata: Additional metadata to store
|
|
1118
|
+
|
|
1119
|
+
Returns:
|
|
1120
|
+
Cached or computed profile
|
|
1121
|
+
"""
|
|
1122
|
+
# Try cache first
|
|
1123
|
+
cached = self.get(key)
|
|
1124
|
+
if cached is not None:
|
|
1125
|
+
return cached
|
|
1126
|
+
|
|
1127
|
+
# Compute profile
|
|
1128
|
+
profile = compute_fn()
|
|
1129
|
+
|
|
1130
|
+
# Store in cache
|
|
1131
|
+
self.set(key, profile, ttl=ttl, metadata=metadata)
|
|
1132
|
+
|
|
1133
|
+
return profile
|
|
1134
|
+
|
|
1135
|
+
def invalidate(self, key: CacheKeyProtocol) -> bool:
|
|
1136
|
+
"""Invalidate a cache entry.
|
|
1137
|
+
|
|
1138
|
+
Args:
|
|
1139
|
+
key: Cache key
|
|
1140
|
+
|
|
1141
|
+
Returns:
|
|
1142
|
+
True if entry was invalidated
|
|
1143
|
+
"""
|
|
1144
|
+
return self._backend.delete(key.to_string())
|
|
1145
|
+
|
|
1146
|
+
def invalidate_by_pattern(self, pattern: str) -> int:
|
|
1147
|
+
"""Invalidate entries matching a pattern.
|
|
1148
|
+
|
|
1149
|
+
Note: Only supported by some backends.
|
|
1150
|
+
|
|
1151
|
+
Args:
|
|
1152
|
+
pattern: Pattern to match (glob-style)
|
|
1153
|
+
|
|
1154
|
+
Returns:
|
|
1155
|
+
Number of entries invalidated
|
|
1156
|
+
"""
|
|
1157
|
+
# This is a simplified implementation
|
|
1158
|
+
# Full pattern matching would require backend support
|
|
1159
|
+
return 0
|
|
1160
|
+
|
|
1161
|
+
def clear(self) -> int:
|
|
1162
|
+
"""Clear all cache entries.
|
|
1163
|
+
|
|
1164
|
+
Returns:
|
|
1165
|
+
Number of entries cleared
|
|
1166
|
+
"""
|
|
1167
|
+
return self._backend.clear()
|
|
1168
|
+
|
|
1169
|
+
def get_stats(self) -> dict[str, Any]:
|
|
1170
|
+
"""Get cache statistics.
|
|
1171
|
+
|
|
1172
|
+
Returns:
|
|
1173
|
+
Dictionary with cache statistics
|
|
1174
|
+
"""
|
|
1175
|
+
stats = self._backend.get_stats()
|
|
1176
|
+
stats["enabled"] = self.enabled
|
|
1177
|
+
stats["default_ttl_seconds"] = (
|
|
1178
|
+
self.default_ttl.total_seconds() if self.default_ttl else None
|
|
1179
|
+
)
|
|
1180
|
+
return stats
|
|
1181
|
+
|
|
1182
|
+
def warm(
|
|
1183
|
+
self,
|
|
1184
|
+
keys: list[CacheKeyProtocol],
|
|
1185
|
+
compute_fn: Callable[[CacheKeyProtocol], TableProfile],
|
|
1186
|
+
*,
|
|
1187
|
+
parallel: bool = False,
|
|
1188
|
+
) -> dict[str, bool]:
|
|
1189
|
+
"""Warm cache with multiple entries.
|
|
1190
|
+
|
|
1191
|
+
Args:
|
|
1192
|
+
keys: Cache keys to warm
|
|
1193
|
+
compute_fn: Function to compute each profile
|
|
1194
|
+
parallel: Whether to compute in parallel
|
|
1195
|
+
|
|
1196
|
+
Returns:
|
|
1197
|
+
Dictionary mapping key strings to success status
|
|
1198
|
+
"""
|
|
1199
|
+
results: dict[str, bool] = {}
|
|
1200
|
+
|
|
1201
|
+
for key in keys:
|
|
1202
|
+
key_str = key.to_string()
|
|
1203
|
+
try:
|
|
1204
|
+
if not self._backend.exists(key_str):
|
|
1205
|
+
profile = compute_fn(key)
|
|
1206
|
+
self.set(key, profile)
|
|
1207
|
+
results[key_str] = True
|
|
1208
|
+
except Exception:
|
|
1209
|
+
results[key_str] = False
|
|
1210
|
+
|
|
1211
|
+
return results
|
|
1212
|
+
|
|
1213
|
+
|
|
1214
|
+
# =============================================================================
|
|
1215
|
+
# Caching Decorator
|
|
1216
|
+
# =============================================================================
|
|
1217
|
+
|
|
1218
|
+
|
|
1219
|
+
def cached_profile(
|
|
1220
|
+
cache: ProfileCache | None = None,
|
|
1221
|
+
ttl: timedelta | None = None,
|
|
1222
|
+
key_fn: Callable[..., CacheKeyProtocol] | None = None,
|
|
1223
|
+
) -> Callable:
|
|
1224
|
+
"""Decorator to cache profile function results.
|
|
1225
|
+
|
|
1226
|
+
Example:
|
|
1227
|
+
cache = ProfileCache()
|
|
1228
|
+
|
|
1229
|
+
@cached_profile(cache, ttl=timedelta(hours=1))
|
|
1230
|
+
def profile_file(path: str) -> TableProfile:
|
|
1231
|
+
# expensive profiling...
|
|
1232
|
+
return profile
|
|
1233
|
+
|
|
1234
|
+
Args:
|
|
1235
|
+
cache: ProfileCache instance (creates default if not provided)
|
|
1236
|
+
ttl: Time-to-live for cached entries
|
|
1237
|
+
key_fn: Function to generate cache key from arguments
|
|
1238
|
+
|
|
1239
|
+
Returns:
|
|
1240
|
+
Decorated function
|
|
1241
|
+
"""
|
|
1242
|
+
_cache = cache or ProfileCache()
|
|
1243
|
+
|
|
1244
|
+
def decorator(func: Callable[..., TableProfile]) -> Callable[..., TableProfile]:
|
|
1245
|
+
def wrapper(*args: Any, **kwargs: Any) -> TableProfile:
|
|
1246
|
+
# Generate cache key
|
|
1247
|
+
if key_fn:
|
|
1248
|
+
key = key_fn(*args, **kwargs)
|
|
1249
|
+
else:
|
|
1250
|
+
# Default: use first argument as file path
|
|
1251
|
+
if args and isinstance(args[0], (str, Path)):
|
|
1252
|
+
key = FileHashCacheKey.from_file(args[0])
|
|
1253
|
+
else:
|
|
1254
|
+
# Fallback to function call hash
|
|
1255
|
+
call_hash = hashlib.sha256(
|
|
1256
|
+
f"{func.__name__}:{args}:{kwargs}".encode()
|
|
1257
|
+
).hexdigest()
|
|
1258
|
+
key = CacheKey(key=call_hash)
|
|
1259
|
+
|
|
1260
|
+
return _cache.get_or_compute(
|
|
1261
|
+
key,
|
|
1262
|
+
compute_fn=lambda: func(*args, **kwargs),
|
|
1263
|
+
ttl=ttl,
|
|
1264
|
+
)
|
|
1265
|
+
|
|
1266
|
+
return wrapper
|
|
1267
|
+
|
|
1268
|
+
return decorator
|
|
1269
|
+
|
|
1270
|
+
|
|
1271
|
+
# =============================================================================
|
|
1272
|
+
# Convenience Functions
|
|
1273
|
+
# =============================================================================
|
|
1274
|
+
|
|
1275
|
+
|
|
1276
|
+
def create_cache(
|
|
1277
|
+
backend: str = "memory",
|
|
1278
|
+
**kwargs: Any,
|
|
1279
|
+
) -> ProfileCache:
|
|
1280
|
+
"""Create a ProfileCache with the specified backend.
|
|
1281
|
+
|
|
1282
|
+
Args:
|
|
1283
|
+
backend: Backend type ("memory", "file", "redis")
|
|
1284
|
+
**kwargs: Backend-specific options
|
|
1285
|
+
|
|
1286
|
+
Returns:
|
|
1287
|
+
Configured ProfileCache instance
|
|
1288
|
+
"""
|
|
1289
|
+
return ProfileCache(backend=backend, backend_options=kwargs)
|
|
1290
|
+
|
|
1291
|
+
|
|
1292
|
+
def hash_file(path: str | Path, quick: bool = False) -> str:
|
|
1293
|
+
"""Calculate file content hash.
|
|
1294
|
+
|
|
1295
|
+
Args:
|
|
1296
|
+
path: Path to file
|
|
1297
|
+
quick: Use quick hash for large files
|
|
1298
|
+
|
|
1299
|
+
Returns:
|
|
1300
|
+
SHA-256 hash string
|
|
1301
|
+
"""
|
|
1302
|
+
key = FileHashCacheKey.from_file(path, quick_hash=quick)
|
|
1303
|
+
return key.file_hash
|