truthound 1.0.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- truthound/__init__.py +162 -0
- truthound/adapters.py +100 -0
- truthound/api.py +365 -0
- truthound/audit/__init__.py +248 -0
- truthound/audit/core.py +967 -0
- truthound/audit/filters.py +620 -0
- truthound/audit/formatters.py +707 -0
- truthound/audit/logger.py +902 -0
- truthound/audit/middleware.py +571 -0
- truthound/audit/storage.py +1083 -0
- truthound/benchmark/__init__.py +123 -0
- truthound/benchmark/base.py +757 -0
- truthound/benchmark/comparison.py +635 -0
- truthound/benchmark/generators.py +706 -0
- truthound/benchmark/reporters.py +718 -0
- truthound/benchmark/runner.py +635 -0
- truthound/benchmark/scenarios.py +712 -0
- truthound/cache.py +252 -0
- truthound/checkpoint/__init__.py +136 -0
- truthound/checkpoint/actions/__init__.py +164 -0
- truthound/checkpoint/actions/base.py +324 -0
- truthound/checkpoint/actions/custom.py +234 -0
- truthound/checkpoint/actions/discord_notify.py +290 -0
- truthound/checkpoint/actions/email_notify.py +405 -0
- truthound/checkpoint/actions/github_action.py +406 -0
- truthound/checkpoint/actions/opsgenie.py +1499 -0
- truthound/checkpoint/actions/pagerduty.py +226 -0
- truthound/checkpoint/actions/slack_notify.py +233 -0
- truthound/checkpoint/actions/store_result.py +249 -0
- truthound/checkpoint/actions/teams_notify.py +1570 -0
- truthound/checkpoint/actions/telegram_notify.py +419 -0
- truthound/checkpoint/actions/update_docs.py +552 -0
- truthound/checkpoint/actions/webhook.py +293 -0
- truthound/checkpoint/analytics/__init__.py +147 -0
- truthound/checkpoint/analytics/aggregations/__init__.py +23 -0
- truthound/checkpoint/analytics/aggregations/rollup.py +481 -0
- truthound/checkpoint/analytics/aggregations/time_bucket.py +306 -0
- truthound/checkpoint/analytics/analyzers/__init__.py +17 -0
- truthound/checkpoint/analytics/analyzers/anomaly.py +386 -0
- truthound/checkpoint/analytics/analyzers/base.py +270 -0
- truthound/checkpoint/analytics/analyzers/forecast.py +421 -0
- truthound/checkpoint/analytics/analyzers/trend.py +314 -0
- truthound/checkpoint/analytics/models.py +292 -0
- truthound/checkpoint/analytics/protocols.py +549 -0
- truthound/checkpoint/analytics/service.py +718 -0
- truthound/checkpoint/analytics/stores/__init__.py +16 -0
- truthound/checkpoint/analytics/stores/base.py +306 -0
- truthound/checkpoint/analytics/stores/memory_store.py +353 -0
- truthound/checkpoint/analytics/stores/sqlite_store.py +557 -0
- truthound/checkpoint/analytics/stores/timescale_store.py +501 -0
- truthound/checkpoint/async_actions.py +794 -0
- truthound/checkpoint/async_base.py +708 -0
- truthound/checkpoint/async_checkpoint.py +617 -0
- truthound/checkpoint/async_runner.py +639 -0
- truthound/checkpoint/checkpoint.py +527 -0
- truthound/checkpoint/ci/__init__.py +61 -0
- truthound/checkpoint/ci/detector.py +355 -0
- truthound/checkpoint/ci/reporter.py +436 -0
- truthound/checkpoint/ci/templates.py +454 -0
- truthound/checkpoint/circuitbreaker/__init__.py +133 -0
- truthound/checkpoint/circuitbreaker/breaker.py +542 -0
- truthound/checkpoint/circuitbreaker/core.py +252 -0
- truthound/checkpoint/circuitbreaker/detection.py +459 -0
- truthound/checkpoint/circuitbreaker/middleware.py +389 -0
- truthound/checkpoint/circuitbreaker/registry.py +357 -0
- truthound/checkpoint/distributed/__init__.py +139 -0
- truthound/checkpoint/distributed/backends/__init__.py +35 -0
- truthound/checkpoint/distributed/backends/celery_backend.py +503 -0
- truthound/checkpoint/distributed/backends/kubernetes_backend.py +696 -0
- truthound/checkpoint/distributed/backends/local_backend.py +397 -0
- truthound/checkpoint/distributed/backends/ray_backend.py +625 -0
- truthound/checkpoint/distributed/base.py +774 -0
- truthound/checkpoint/distributed/orchestrator.py +765 -0
- truthound/checkpoint/distributed/protocols.py +842 -0
- truthound/checkpoint/distributed/registry.py +449 -0
- truthound/checkpoint/idempotency/__init__.py +120 -0
- truthound/checkpoint/idempotency/core.py +295 -0
- truthound/checkpoint/idempotency/fingerprint.py +454 -0
- truthound/checkpoint/idempotency/locking.py +604 -0
- truthound/checkpoint/idempotency/service.py +592 -0
- truthound/checkpoint/idempotency/stores.py +653 -0
- truthound/checkpoint/monitoring/__init__.py +134 -0
- truthound/checkpoint/monitoring/aggregators/__init__.py +15 -0
- truthound/checkpoint/monitoring/aggregators/base.py +372 -0
- truthound/checkpoint/monitoring/aggregators/realtime.py +300 -0
- truthound/checkpoint/monitoring/aggregators/window.py +493 -0
- truthound/checkpoint/monitoring/collectors/__init__.py +17 -0
- truthound/checkpoint/monitoring/collectors/base.py +257 -0
- truthound/checkpoint/monitoring/collectors/memory_collector.py +617 -0
- truthound/checkpoint/monitoring/collectors/prometheus_collector.py +451 -0
- truthound/checkpoint/monitoring/collectors/redis_collector.py +518 -0
- truthound/checkpoint/monitoring/events.py +410 -0
- truthound/checkpoint/monitoring/protocols.py +636 -0
- truthound/checkpoint/monitoring/service.py +578 -0
- truthound/checkpoint/monitoring/views/__init__.py +17 -0
- truthound/checkpoint/monitoring/views/base.py +172 -0
- truthound/checkpoint/monitoring/views/queue_view.py +220 -0
- truthound/checkpoint/monitoring/views/task_view.py +240 -0
- truthound/checkpoint/monitoring/views/worker_view.py +263 -0
- truthound/checkpoint/registry.py +337 -0
- truthound/checkpoint/runner.py +356 -0
- truthound/checkpoint/transaction/__init__.py +133 -0
- truthound/checkpoint/transaction/base.py +389 -0
- truthound/checkpoint/transaction/compensatable.py +537 -0
- truthound/checkpoint/transaction/coordinator.py +576 -0
- truthound/checkpoint/transaction/executor.py +622 -0
- truthound/checkpoint/transaction/idempotency.py +534 -0
- truthound/checkpoint/transaction/saga/__init__.py +143 -0
- truthound/checkpoint/transaction/saga/builder.py +584 -0
- truthound/checkpoint/transaction/saga/definition.py +515 -0
- truthound/checkpoint/transaction/saga/event_store.py +542 -0
- truthound/checkpoint/transaction/saga/patterns.py +833 -0
- truthound/checkpoint/transaction/saga/runner.py +718 -0
- truthound/checkpoint/transaction/saga/state_machine.py +793 -0
- truthound/checkpoint/transaction/saga/strategies.py +780 -0
- truthound/checkpoint/transaction/saga/testing.py +886 -0
- truthound/checkpoint/triggers/__init__.py +58 -0
- truthound/checkpoint/triggers/base.py +237 -0
- truthound/checkpoint/triggers/event.py +385 -0
- truthound/checkpoint/triggers/schedule.py +355 -0
- truthound/cli.py +2358 -0
- truthound/cli_modules/__init__.py +124 -0
- truthound/cli_modules/advanced/__init__.py +45 -0
- truthound/cli_modules/advanced/benchmark.py +343 -0
- truthound/cli_modules/advanced/docs.py +225 -0
- truthound/cli_modules/advanced/lineage.py +209 -0
- truthound/cli_modules/advanced/ml.py +320 -0
- truthound/cli_modules/advanced/realtime.py +196 -0
- truthound/cli_modules/checkpoint/__init__.py +46 -0
- truthound/cli_modules/checkpoint/init.py +114 -0
- truthound/cli_modules/checkpoint/list.py +71 -0
- truthound/cli_modules/checkpoint/run.py +159 -0
- truthound/cli_modules/checkpoint/validate.py +67 -0
- truthound/cli_modules/common/__init__.py +71 -0
- truthound/cli_modules/common/errors.py +414 -0
- truthound/cli_modules/common/options.py +419 -0
- truthound/cli_modules/common/output.py +507 -0
- truthound/cli_modules/common/protocol.py +552 -0
- truthound/cli_modules/core/__init__.py +48 -0
- truthound/cli_modules/core/check.py +123 -0
- truthound/cli_modules/core/compare.py +104 -0
- truthound/cli_modules/core/learn.py +57 -0
- truthound/cli_modules/core/mask.py +77 -0
- truthound/cli_modules/core/profile.py +65 -0
- truthound/cli_modules/core/scan.py +61 -0
- truthound/cli_modules/profiler/__init__.py +51 -0
- truthound/cli_modules/profiler/auto_profile.py +175 -0
- truthound/cli_modules/profiler/metadata.py +107 -0
- truthound/cli_modules/profiler/suite.py +283 -0
- truthound/cli_modules/registry.py +431 -0
- truthound/cli_modules/scaffolding/__init__.py +89 -0
- truthound/cli_modules/scaffolding/base.py +631 -0
- truthound/cli_modules/scaffolding/commands.py +545 -0
- truthound/cli_modules/scaffolding/plugins.py +1072 -0
- truthound/cli_modules/scaffolding/reporters.py +594 -0
- truthound/cli_modules/scaffolding/validators.py +1127 -0
- truthound/common/__init__.py +18 -0
- truthound/common/resilience/__init__.py +130 -0
- truthound/common/resilience/bulkhead.py +266 -0
- truthound/common/resilience/circuit_breaker.py +516 -0
- truthound/common/resilience/composite.py +332 -0
- truthound/common/resilience/config.py +292 -0
- truthound/common/resilience/protocols.py +217 -0
- truthound/common/resilience/rate_limiter.py +404 -0
- truthound/common/resilience/retry.py +341 -0
- truthound/datadocs/__init__.py +260 -0
- truthound/datadocs/base.py +571 -0
- truthound/datadocs/builder.py +761 -0
- truthound/datadocs/charts.py +764 -0
- truthound/datadocs/dashboard/__init__.py +63 -0
- truthound/datadocs/dashboard/app.py +576 -0
- truthound/datadocs/dashboard/components.py +584 -0
- truthound/datadocs/dashboard/state.py +240 -0
- truthound/datadocs/engine/__init__.py +46 -0
- truthound/datadocs/engine/context.py +376 -0
- truthound/datadocs/engine/pipeline.py +618 -0
- truthound/datadocs/engine/registry.py +469 -0
- truthound/datadocs/exporters/__init__.py +49 -0
- truthound/datadocs/exporters/base.py +198 -0
- truthound/datadocs/exporters/html.py +178 -0
- truthound/datadocs/exporters/json_exporter.py +253 -0
- truthound/datadocs/exporters/markdown.py +284 -0
- truthound/datadocs/exporters/pdf.py +392 -0
- truthound/datadocs/i18n/__init__.py +86 -0
- truthound/datadocs/i18n/catalog.py +960 -0
- truthound/datadocs/i18n/formatting.py +505 -0
- truthound/datadocs/i18n/loader.py +256 -0
- truthound/datadocs/i18n/plurals.py +378 -0
- truthound/datadocs/renderers/__init__.py +42 -0
- truthound/datadocs/renderers/base.py +401 -0
- truthound/datadocs/renderers/custom.py +342 -0
- truthound/datadocs/renderers/jinja.py +697 -0
- truthound/datadocs/sections.py +736 -0
- truthound/datadocs/styles.py +931 -0
- truthound/datadocs/themes/__init__.py +101 -0
- truthound/datadocs/themes/base.py +336 -0
- truthound/datadocs/themes/default.py +417 -0
- truthound/datadocs/themes/enterprise.py +419 -0
- truthound/datadocs/themes/loader.py +336 -0
- truthound/datadocs/themes.py +301 -0
- truthound/datadocs/transformers/__init__.py +57 -0
- truthound/datadocs/transformers/base.py +268 -0
- truthound/datadocs/transformers/enrichers.py +544 -0
- truthound/datadocs/transformers/filters.py +447 -0
- truthound/datadocs/transformers/i18n.py +468 -0
- truthound/datadocs/versioning/__init__.py +62 -0
- truthound/datadocs/versioning/diff.py +639 -0
- truthound/datadocs/versioning/storage.py +497 -0
- truthound/datadocs/versioning/version.py +358 -0
- truthound/datasources/__init__.py +223 -0
- truthound/datasources/_async_protocols.py +222 -0
- truthound/datasources/_protocols.py +159 -0
- truthound/datasources/adapters.py +428 -0
- truthound/datasources/async_base.py +599 -0
- truthound/datasources/async_factory.py +511 -0
- truthound/datasources/base.py +516 -0
- truthound/datasources/factory.py +433 -0
- truthound/datasources/nosql/__init__.py +47 -0
- truthound/datasources/nosql/base.py +487 -0
- truthound/datasources/nosql/elasticsearch.py +801 -0
- truthound/datasources/nosql/mongodb.py +636 -0
- truthound/datasources/pandas_optimized.py +582 -0
- truthound/datasources/pandas_source.py +216 -0
- truthound/datasources/polars_source.py +395 -0
- truthound/datasources/spark_source.py +479 -0
- truthound/datasources/sql/__init__.py +154 -0
- truthound/datasources/sql/base.py +710 -0
- truthound/datasources/sql/bigquery.py +410 -0
- truthound/datasources/sql/cloud_base.py +199 -0
- truthound/datasources/sql/databricks.py +471 -0
- truthound/datasources/sql/mysql.py +316 -0
- truthound/datasources/sql/oracle.py +427 -0
- truthound/datasources/sql/postgresql.py +321 -0
- truthound/datasources/sql/redshift.py +479 -0
- truthound/datasources/sql/snowflake.py +439 -0
- truthound/datasources/sql/sqlite.py +286 -0
- truthound/datasources/sql/sqlserver.py +437 -0
- truthound/datasources/streaming/__init__.py +47 -0
- truthound/datasources/streaming/base.py +350 -0
- truthound/datasources/streaming/kafka.py +670 -0
- truthound/decorators.py +98 -0
- truthound/docs/__init__.py +69 -0
- truthound/docs/extractor.py +971 -0
- truthound/docs/generator.py +601 -0
- truthound/docs/parser.py +1037 -0
- truthound/docs/renderer.py +999 -0
- truthound/drift/__init__.py +22 -0
- truthound/drift/compare.py +189 -0
- truthound/drift/detectors.py +464 -0
- truthound/drift/report.py +160 -0
- truthound/execution/__init__.py +65 -0
- truthound/execution/_protocols.py +324 -0
- truthound/execution/base.py +576 -0
- truthound/execution/distributed/__init__.py +179 -0
- truthound/execution/distributed/aggregations.py +731 -0
- truthound/execution/distributed/arrow_bridge.py +817 -0
- truthound/execution/distributed/base.py +550 -0
- truthound/execution/distributed/dask_engine.py +976 -0
- truthound/execution/distributed/mixins.py +766 -0
- truthound/execution/distributed/protocols.py +756 -0
- truthound/execution/distributed/ray_engine.py +1127 -0
- truthound/execution/distributed/registry.py +446 -0
- truthound/execution/distributed/spark_engine.py +1011 -0
- truthound/execution/distributed/validator_adapter.py +682 -0
- truthound/execution/pandas_engine.py +401 -0
- truthound/execution/polars_engine.py +497 -0
- truthound/execution/pushdown/__init__.py +230 -0
- truthound/execution/pushdown/ast.py +1550 -0
- truthound/execution/pushdown/builder.py +1550 -0
- truthound/execution/pushdown/dialects.py +1072 -0
- truthound/execution/pushdown/executor.py +829 -0
- truthound/execution/pushdown/optimizer.py +1041 -0
- truthound/execution/sql_engine.py +518 -0
- truthound/infrastructure/__init__.py +189 -0
- truthound/infrastructure/audit.py +1515 -0
- truthound/infrastructure/config.py +1133 -0
- truthound/infrastructure/encryption.py +1132 -0
- truthound/infrastructure/logging.py +1503 -0
- truthound/infrastructure/metrics.py +1220 -0
- truthound/lineage/__init__.py +89 -0
- truthound/lineage/base.py +746 -0
- truthound/lineage/impact_analysis.py +474 -0
- truthound/lineage/integrations/__init__.py +22 -0
- truthound/lineage/integrations/openlineage.py +548 -0
- truthound/lineage/tracker.py +512 -0
- truthound/lineage/visualization/__init__.py +33 -0
- truthound/lineage/visualization/protocols.py +145 -0
- truthound/lineage/visualization/renderers/__init__.py +20 -0
- truthound/lineage/visualization/renderers/cytoscape.py +329 -0
- truthound/lineage/visualization/renderers/d3.py +331 -0
- truthound/lineage/visualization/renderers/graphviz.py +276 -0
- truthound/lineage/visualization/renderers/mermaid.py +308 -0
- truthound/maskers.py +113 -0
- truthound/ml/__init__.py +124 -0
- truthound/ml/anomaly_models/__init__.py +31 -0
- truthound/ml/anomaly_models/ensemble.py +362 -0
- truthound/ml/anomaly_models/isolation_forest.py +444 -0
- truthound/ml/anomaly_models/statistical.py +392 -0
- truthound/ml/base.py +1178 -0
- truthound/ml/drift_detection/__init__.py +26 -0
- truthound/ml/drift_detection/concept.py +381 -0
- truthound/ml/drift_detection/distribution.py +361 -0
- truthound/ml/drift_detection/feature.py +442 -0
- truthound/ml/drift_detection/multivariate.py +495 -0
- truthound/ml/monitoring/__init__.py +88 -0
- truthound/ml/monitoring/alerting/__init__.py +33 -0
- truthound/ml/monitoring/alerting/handlers.py +427 -0
- truthound/ml/monitoring/alerting/rules.py +508 -0
- truthound/ml/monitoring/collectors/__init__.py +19 -0
- truthound/ml/monitoring/collectors/composite.py +105 -0
- truthound/ml/monitoring/collectors/drift.py +324 -0
- truthound/ml/monitoring/collectors/performance.py +179 -0
- truthound/ml/monitoring/collectors/quality.py +369 -0
- truthound/ml/monitoring/monitor.py +536 -0
- truthound/ml/monitoring/protocols.py +451 -0
- truthound/ml/monitoring/stores/__init__.py +15 -0
- truthound/ml/monitoring/stores/memory.py +201 -0
- truthound/ml/monitoring/stores/prometheus.py +296 -0
- truthound/ml/rule_learning/__init__.py +25 -0
- truthound/ml/rule_learning/constraint_miner.py +443 -0
- truthound/ml/rule_learning/pattern_learner.py +499 -0
- truthound/ml/rule_learning/profile_learner.py +462 -0
- truthound/multitenancy/__init__.py +326 -0
- truthound/multitenancy/core.py +852 -0
- truthound/multitenancy/integration.py +597 -0
- truthound/multitenancy/isolation.py +630 -0
- truthound/multitenancy/manager.py +770 -0
- truthound/multitenancy/middleware.py +765 -0
- truthound/multitenancy/quota.py +537 -0
- truthound/multitenancy/resolvers.py +603 -0
- truthound/multitenancy/storage.py +703 -0
- truthound/observability/__init__.py +307 -0
- truthound/observability/context.py +531 -0
- truthound/observability/instrumentation.py +611 -0
- truthound/observability/logging.py +887 -0
- truthound/observability/metrics.py +1157 -0
- truthound/observability/tracing/__init__.py +178 -0
- truthound/observability/tracing/baggage.py +310 -0
- truthound/observability/tracing/config.py +426 -0
- truthound/observability/tracing/exporter.py +787 -0
- truthound/observability/tracing/integration.py +1018 -0
- truthound/observability/tracing/otel/__init__.py +146 -0
- truthound/observability/tracing/otel/adapter.py +982 -0
- truthound/observability/tracing/otel/bridge.py +1177 -0
- truthound/observability/tracing/otel/compat.py +681 -0
- truthound/observability/tracing/otel/config.py +691 -0
- truthound/observability/tracing/otel/detection.py +327 -0
- truthound/observability/tracing/otel/protocols.py +426 -0
- truthound/observability/tracing/processor.py +561 -0
- truthound/observability/tracing/propagator.py +757 -0
- truthound/observability/tracing/provider.py +569 -0
- truthound/observability/tracing/resource.py +515 -0
- truthound/observability/tracing/sampler.py +487 -0
- truthound/observability/tracing/span.py +676 -0
- truthound/plugins/__init__.py +198 -0
- truthound/plugins/base.py +599 -0
- truthound/plugins/cli.py +680 -0
- truthound/plugins/dependencies/__init__.py +42 -0
- truthound/plugins/dependencies/graph.py +422 -0
- truthound/plugins/dependencies/resolver.py +417 -0
- truthound/plugins/discovery.py +379 -0
- truthound/plugins/docs/__init__.py +46 -0
- truthound/plugins/docs/extractor.py +444 -0
- truthound/plugins/docs/renderer.py +499 -0
- truthound/plugins/enterprise_manager.py +877 -0
- truthound/plugins/examples/__init__.py +19 -0
- truthound/plugins/examples/custom_validators.py +317 -0
- truthound/plugins/examples/slack_notifier.py +312 -0
- truthound/plugins/examples/xml_reporter.py +254 -0
- truthound/plugins/hooks.py +558 -0
- truthound/plugins/lifecycle/__init__.py +43 -0
- truthound/plugins/lifecycle/hot_reload.py +402 -0
- truthound/plugins/lifecycle/manager.py +371 -0
- truthound/plugins/manager.py +736 -0
- truthound/plugins/registry.py +338 -0
- truthound/plugins/security/__init__.py +93 -0
- truthound/plugins/security/exceptions.py +332 -0
- truthound/plugins/security/policies.py +348 -0
- truthound/plugins/security/protocols.py +643 -0
- truthound/plugins/security/sandbox/__init__.py +45 -0
- truthound/plugins/security/sandbox/context.py +158 -0
- truthound/plugins/security/sandbox/engines/__init__.py +19 -0
- truthound/plugins/security/sandbox/engines/container.py +379 -0
- truthound/plugins/security/sandbox/engines/noop.py +144 -0
- truthound/plugins/security/sandbox/engines/process.py +336 -0
- truthound/plugins/security/sandbox/factory.py +211 -0
- truthound/plugins/security/signing/__init__.py +57 -0
- truthound/plugins/security/signing/service.py +330 -0
- truthound/plugins/security/signing/trust_store.py +368 -0
- truthound/plugins/security/signing/verifier.py +459 -0
- truthound/plugins/versioning/__init__.py +41 -0
- truthound/plugins/versioning/constraints.py +297 -0
- truthound/plugins/versioning/resolver.py +329 -0
- truthound/profiler/__init__.py +1729 -0
- truthound/profiler/_lazy.py +452 -0
- truthound/profiler/ab_testing/__init__.py +80 -0
- truthound/profiler/ab_testing/analysis.py +449 -0
- truthound/profiler/ab_testing/base.py +257 -0
- truthound/profiler/ab_testing/experiment.py +395 -0
- truthound/profiler/ab_testing/tracking.py +368 -0
- truthound/profiler/auto_threshold.py +1170 -0
- truthound/profiler/base.py +579 -0
- truthound/profiler/cache_patterns.py +911 -0
- truthound/profiler/caching.py +1303 -0
- truthound/profiler/column_profiler.py +712 -0
- truthound/profiler/comparison.py +1007 -0
- truthound/profiler/custom_patterns.py +1170 -0
- truthound/profiler/dashboard/__init__.py +50 -0
- truthound/profiler/dashboard/app.py +476 -0
- truthound/profiler/dashboard/components.py +457 -0
- truthound/profiler/dashboard/config.py +72 -0
- truthound/profiler/distributed/__init__.py +83 -0
- truthound/profiler/distributed/base.py +281 -0
- truthound/profiler/distributed/dask_backend.py +498 -0
- truthound/profiler/distributed/local_backend.py +293 -0
- truthound/profiler/distributed/profiler.py +304 -0
- truthound/profiler/distributed/ray_backend.py +374 -0
- truthound/profiler/distributed/spark_backend.py +375 -0
- truthound/profiler/distributed.py +1366 -0
- truthound/profiler/enterprise_sampling.py +1065 -0
- truthound/profiler/errors.py +488 -0
- truthound/profiler/evolution/__init__.py +91 -0
- truthound/profiler/evolution/alerts.py +426 -0
- truthound/profiler/evolution/changes.py +206 -0
- truthound/profiler/evolution/compatibility.py +365 -0
- truthound/profiler/evolution/detector.py +372 -0
- truthound/profiler/evolution/protocols.py +121 -0
- truthound/profiler/generators/__init__.py +48 -0
- truthound/profiler/generators/base.py +384 -0
- truthound/profiler/generators/ml_rules.py +375 -0
- truthound/profiler/generators/pattern_rules.py +384 -0
- truthound/profiler/generators/schema_rules.py +267 -0
- truthound/profiler/generators/stats_rules.py +324 -0
- truthound/profiler/generators/suite_generator.py +857 -0
- truthound/profiler/i18n.py +1542 -0
- truthound/profiler/incremental.py +554 -0
- truthound/profiler/incremental_validation.py +1710 -0
- truthound/profiler/integration/__init__.py +73 -0
- truthound/profiler/integration/adapters.py +345 -0
- truthound/profiler/integration/context.py +371 -0
- truthound/profiler/integration/executor.py +527 -0
- truthound/profiler/integration/naming.py +75 -0
- truthound/profiler/integration/protocols.py +243 -0
- truthound/profiler/memory.py +1185 -0
- truthound/profiler/migration/__init__.py +60 -0
- truthound/profiler/migration/base.py +345 -0
- truthound/profiler/migration/manager.py +444 -0
- truthound/profiler/migration/v1_0_to_v1_1.py +484 -0
- truthound/profiler/ml/__init__.py +73 -0
- truthound/profiler/ml/base.py +244 -0
- truthound/profiler/ml/classifier.py +507 -0
- truthound/profiler/ml/feature_extraction.py +604 -0
- truthound/profiler/ml/pretrained.py +448 -0
- truthound/profiler/ml_inference.py +1276 -0
- truthound/profiler/native_patterns.py +815 -0
- truthound/profiler/observability.py +1184 -0
- truthound/profiler/process_timeout.py +1566 -0
- truthound/profiler/progress.py +568 -0
- truthound/profiler/progress_callbacks.py +1734 -0
- truthound/profiler/quality.py +1345 -0
- truthound/profiler/resilience.py +1180 -0
- truthound/profiler/sampled_matcher.py +794 -0
- truthound/profiler/sampling.py +1288 -0
- truthound/profiler/scheduling/__init__.py +82 -0
- truthound/profiler/scheduling/protocols.py +214 -0
- truthound/profiler/scheduling/scheduler.py +474 -0
- truthound/profiler/scheduling/storage.py +457 -0
- truthound/profiler/scheduling/triggers.py +449 -0
- truthound/profiler/schema.py +603 -0
- truthound/profiler/streaming.py +685 -0
- truthound/profiler/streaming_patterns.py +1354 -0
- truthound/profiler/suite_cli.py +625 -0
- truthound/profiler/suite_config.py +789 -0
- truthound/profiler/suite_export.py +1268 -0
- truthound/profiler/table_profiler.py +547 -0
- truthound/profiler/timeout.py +565 -0
- truthound/profiler/validation.py +1532 -0
- truthound/profiler/visualization/__init__.py +118 -0
- truthound/profiler/visualization/base.py +346 -0
- truthound/profiler/visualization/generator.py +1259 -0
- truthound/profiler/visualization/plotly_renderer.py +811 -0
- truthound/profiler/visualization/renderers.py +669 -0
- truthound/profiler/visualization/sections.py +540 -0
- truthound/profiler/visualization.py +2122 -0
- truthound/profiler/yaml_validation.py +1151 -0
- truthound/py.typed +0 -0
- truthound/ratelimit/__init__.py +248 -0
- truthound/ratelimit/algorithms.py +1108 -0
- truthound/ratelimit/core.py +573 -0
- truthound/ratelimit/integration.py +532 -0
- truthound/ratelimit/limiter.py +663 -0
- truthound/ratelimit/middleware.py +700 -0
- truthound/ratelimit/policy.py +792 -0
- truthound/ratelimit/storage.py +763 -0
- truthound/rbac/__init__.py +340 -0
- truthound/rbac/core.py +976 -0
- truthound/rbac/integration.py +760 -0
- truthound/rbac/manager.py +1052 -0
- truthound/rbac/middleware.py +842 -0
- truthound/rbac/policy.py +954 -0
- truthound/rbac/storage.py +878 -0
- truthound/realtime/__init__.py +141 -0
- truthound/realtime/adapters/__init__.py +43 -0
- truthound/realtime/adapters/base.py +533 -0
- truthound/realtime/adapters/kafka.py +487 -0
- truthound/realtime/adapters/kinesis.py +479 -0
- truthound/realtime/adapters/mock.py +243 -0
- truthound/realtime/base.py +553 -0
- truthound/realtime/factory.py +382 -0
- truthound/realtime/incremental.py +660 -0
- truthound/realtime/processing/__init__.py +67 -0
- truthound/realtime/processing/exactly_once.py +575 -0
- truthound/realtime/processing/state.py +547 -0
- truthound/realtime/processing/windows.py +647 -0
- truthound/realtime/protocols.py +569 -0
- truthound/realtime/streaming.py +605 -0
- truthound/realtime/testing/__init__.py +32 -0
- truthound/realtime/testing/containers.py +615 -0
- truthound/realtime/testing/fixtures.py +484 -0
- truthound/report.py +280 -0
- truthound/reporters/__init__.py +46 -0
- truthound/reporters/_protocols.py +30 -0
- truthound/reporters/base.py +324 -0
- truthound/reporters/ci/__init__.py +66 -0
- truthound/reporters/ci/azure.py +436 -0
- truthound/reporters/ci/base.py +509 -0
- truthound/reporters/ci/bitbucket.py +567 -0
- truthound/reporters/ci/circleci.py +547 -0
- truthound/reporters/ci/detection.py +364 -0
- truthound/reporters/ci/factory.py +182 -0
- truthound/reporters/ci/github.py +388 -0
- truthound/reporters/ci/gitlab.py +471 -0
- truthound/reporters/ci/jenkins.py +525 -0
- truthound/reporters/console_reporter.py +299 -0
- truthound/reporters/factory.py +211 -0
- truthound/reporters/html_reporter.py +524 -0
- truthound/reporters/json_reporter.py +256 -0
- truthound/reporters/markdown_reporter.py +280 -0
- truthound/reporters/sdk/__init__.py +174 -0
- truthound/reporters/sdk/builder.py +558 -0
- truthound/reporters/sdk/mixins.py +1150 -0
- truthound/reporters/sdk/schema.py +1493 -0
- truthound/reporters/sdk/templates.py +666 -0
- truthound/reporters/sdk/testing.py +968 -0
- truthound/scanners.py +170 -0
- truthound/scheduling/__init__.py +122 -0
- truthound/scheduling/cron.py +1136 -0
- truthound/scheduling/presets.py +212 -0
- truthound/schema.py +275 -0
- truthound/secrets/__init__.py +173 -0
- truthound/secrets/base.py +618 -0
- truthound/secrets/cloud.py +682 -0
- truthound/secrets/integration.py +507 -0
- truthound/secrets/manager.py +633 -0
- truthound/secrets/oidc/__init__.py +172 -0
- truthound/secrets/oidc/base.py +902 -0
- truthound/secrets/oidc/credential_provider.py +623 -0
- truthound/secrets/oidc/exchangers.py +1001 -0
- truthound/secrets/oidc/github/__init__.py +110 -0
- truthound/secrets/oidc/github/claims.py +718 -0
- truthound/secrets/oidc/github/enhanced_provider.py +693 -0
- truthound/secrets/oidc/github/trust_policy.py +742 -0
- truthound/secrets/oidc/github/verification.py +723 -0
- truthound/secrets/oidc/github/workflow.py +691 -0
- truthound/secrets/oidc/providers.py +825 -0
- truthound/secrets/providers.py +506 -0
- truthound/secrets/resolver.py +495 -0
- truthound/stores/__init__.py +177 -0
- truthound/stores/backends/__init__.py +18 -0
- truthound/stores/backends/_protocols.py +340 -0
- truthound/stores/backends/azure_blob.py +530 -0
- truthound/stores/backends/concurrent_filesystem.py +915 -0
- truthound/stores/backends/connection_pool.py +1365 -0
- truthound/stores/backends/database.py +743 -0
- truthound/stores/backends/filesystem.py +538 -0
- truthound/stores/backends/gcs.py +399 -0
- truthound/stores/backends/memory.py +354 -0
- truthound/stores/backends/s3.py +434 -0
- truthound/stores/backpressure/__init__.py +84 -0
- truthound/stores/backpressure/base.py +375 -0
- truthound/stores/backpressure/circuit_breaker.py +434 -0
- truthound/stores/backpressure/monitor.py +376 -0
- truthound/stores/backpressure/strategies.py +677 -0
- truthound/stores/base.py +551 -0
- truthound/stores/batching/__init__.py +65 -0
- truthound/stores/batching/base.py +305 -0
- truthound/stores/batching/buffer.py +370 -0
- truthound/stores/batching/store.py +248 -0
- truthound/stores/batching/writer.py +521 -0
- truthound/stores/caching/__init__.py +60 -0
- truthound/stores/caching/backends.py +684 -0
- truthound/stores/caching/base.py +356 -0
- truthound/stores/caching/store.py +305 -0
- truthound/stores/compression/__init__.py +193 -0
- truthound/stores/compression/adaptive.py +694 -0
- truthound/stores/compression/base.py +514 -0
- truthound/stores/compression/pipeline.py +868 -0
- truthound/stores/compression/providers.py +672 -0
- truthound/stores/compression/streaming.py +832 -0
- truthound/stores/concurrency/__init__.py +81 -0
- truthound/stores/concurrency/atomic.py +556 -0
- truthound/stores/concurrency/index.py +775 -0
- truthound/stores/concurrency/locks.py +576 -0
- truthound/stores/concurrency/manager.py +482 -0
- truthound/stores/encryption/__init__.py +297 -0
- truthound/stores/encryption/base.py +952 -0
- truthound/stores/encryption/keys.py +1191 -0
- truthound/stores/encryption/pipeline.py +903 -0
- truthound/stores/encryption/providers.py +953 -0
- truthound/stores/encryption/streaming.py +950 -0
- truthound/stores/expectations.py +227 -0
- truthound/stores/factory.py +246 -0
- truthound/stores/migration/__init__.py +75 -0
- truthound/stores/migration/base.py +480 -0
- truthound/stores/migration/manager.py +347 -0
- truthound/stores/migration/registry.py +382 -0
- truthound/stores/migration/store.py +559 -0
- truthound/stores/observability/__init__.py +106 -0
- truthound/stores/observability/audit.py +718 -0
- truthound/stores/observability/config.py +270 -0
- truthound/stores/observability/factory.py +208 -0
- truthound/stores/observability/metrics.py +636 -0
- truthound/stores/observability/protocols.py +410 -0
- truthound/stores/observability/store.py +570 -0
- truthound/stores/observability/tracing.py +784 -0
- truthound/stores/replication/__init__.py +76 -0
- truthound/stores/replication/base.py +260 -0
- truthound/stores/replication/monitor.py +269 -0
- truthound/stores/replication/store.py +439 -0
- truthound/stores/replication/syncer.py +391 -0
- truthound/stores/results.py +359 -0
- truthound/stores/retention/__init__.py +77 -0
- truthound/stores/retention/base.py +378 -0
- truthound/stores/retention/policies.py +621 -0
- truthound/stores/retention/scheduler.py +279 -0
- truthound/stores/retention/store.py +526 -0
- truthound/stores/streaming/__init__.py +138 -0
- truthound/stores/streaming/base.py +801 -0
- truthound/stores/streaming/database.py +984 -0
- truthound/stores/streaming/filesystem.py +719 -0
- truthound/stores/streaming/reader.py +629 -0
- truthound/stores/streaming/s3.py +843 -0
- truthound/stores/streaming/writer.py +790 -0
- truthound/stores/tiering/__init__.py +108 -0
- truthound/stores/tiering/base.py +462 -0
- truthound/stores/tiering/manager.py +249 -0
- truthound/stores/tiering/policies.py +692 -0
- truthound/stores/tiering/store.py +526 -0
- truthound/stores/versioning/__init__.py +56 -0
- truthound/stores/versioning/base.py +376 -0
- truthound/stores/versioning/store.py +660 -0
- truthound/stores/versioning/strategies.py +353 -0
- truthound/types.py +56 -0
- truthound/validators/__init__.py +774 -0
- truthound/validators/aggregate/__init__.py +27 -0
- truthound/validators/aggregate/central.py +116 -0
- truthound/validators/aggregate/extremes.py +116 -0
- truthound/validators/aggregate/spread.py +118 -0
- truthound/validators/aggregate/sum.py +64 -0
- truthound/validators/aggregate/type.py +78 -0
- truthound/validators/anomaly/__init__.py +93 -0
- truthound/validators/anomaly/base.py +431 -0
- truthound/validators/anomaly/ml_based.py +1190 -0
- truthound/validators/anomaly/multivariate.py +647 -0
- truthound/validators/anomaly/statistical.py +599 -0
- truthound/validators/base.py +1089 -0
- truthound/validators/business_rule/__init__.py +46 -0
- truthound/validators/business_rule/base.py +147 -0
- truthound/validators/business_rule/checksum.py +509 -0
- truthound/validators/business_rule/financial.py +526 -0
- truthound/validators/cache.py +733 -0
- truthound/validators/completeness/__init__.py +39 -0
- truthound/validators/completeness/conditional.py +73 -0
- truthound/validators/completeness/default.py +98 -0
- truthound/validators/completeness/empty.py +103 -0
- truthound/validators/completeness/nan.py +337 -0
- truthound/validators/completeness/null.py +152 -0
- truthound/validators/cross_table/__init__.py +17 -0
- truthound/validators/cross_table/aggregate.py +333 -0
- truthound/validators/cross_table/row_count.py +122 -0
- truthound/validators/datetime/__init__.py +29 -0
- truthound/validators/datetime/format.py +78 -0
- truthound/validators/datetime/freshness.py +269 -0
- truthound/validators/datetime/order.py +73 -0
- truthound/validators/datetime/parseable.py +185 -0
- truthound/validators/datetime/range.py +202 -0
- truthound/validators/datetime/timezone.py +69 -0
- truthound/validators/distribution/__init__.py +49 -0
- truthound/validators/distribution/distribution.py +128 -0
- truthound/validators/distribution/monotonic.py +119 -0
- truthound/validators/distribution/outlier.py +178 -0
- truthound/validators/distribution/quantile.py +80 -0
- truthound/validators/distribution/range.py +254 -0
- truthound/validators/distribution/set.py +125 -0
- truthound/validators/distribution/statistical.py +459 -0
- truthound/validators/drift/__init__.py +79 -0
- truthound/validators/drift/base.py +427 -0
- truthound/validators/drift/multi_feature.py +401 -0
- truthound/validators/drift/numeric.py +395 -0
- truthound/validators/drift/psi.py +446 -0
- truthound/validators/drift/statistical.py +510 -0
- truthound/validators/enterprise.py +1658 -0
- truthound/validators/geospatial/__init__.py +80 -0
- truthound/validators/geospatial/base.py +97 -0
- truthound/validators/geospatial/boundary.py +238 -0
- truthound/validators/geospatial/coordinate.py +351 -0
- truthound/validators/geospatial/distance.py +399 -0
- truthound/validators/geospatial/polygon.py +665 -0
- truthound/validators/i18n/__init__.py +308 -0
- truthound/validators/i18n/bidi.py +571 -0
- truthound/validators/i18n/catalogs.py +570 -0
- truthound/validators/i18n/dialects.py +763 -0
- truthound/validators/i18n/extended_catalogs.py +549 -0
- truthound/validators/i18n/formatting.py +1434 -0
- truthound/validators/i18n/loader.py +1020 -0
- truthound/validators/i18n/messages.py +521 -0
- truthound/validators/i18n/plural.py +683 -0
- truthound/validators/i18n/protocols.py +855 -0
- truthound/validators/i18n/tms.py +1162 -0
- truthound/validators/localization/__init__.py +53 -0
- truthound/validators/localization/base.py +122 -0
- truthound/validators/localization/chinese.py +362 -0
- truthound/validators/localization/japanese.py +275 -0
- truthound/validators/localization/korean.py +524 -0
- truthound/validators/memory/__init__.py +94 -0
- truthound/validators/memory/approximate_knn.py +506 -0
- truthound/validators/memory/base.py +547 -0
- truthound/validators/memory/sgd_online.py +719 -0
- truthound/validators/memory/streaming_ecdf.py +753 -0
- truthound/validators/ml_feature/__init__.py +54 -0
- truthound/validators/ml_feature/base.py +249 -0
- truthound/validators/ml_feature/correlation.py +299 -0
- truthound/validators/ml_feature/leakage.py +344 -0
- truthound/validators/ml_feature/null_impact.py +270 -0
- truthound/validators/ml_feature/scale.py +264 -0
- truthound/validators/multi_column/__init__.py +89 -0
- truthound/validators/multi_column/arithmetic.py +284 -0
- truthound/validators/multi_column/base.py +231 -0
- truthound/validators/multi_column/comparison.py +273 -0
- truthound/validators/multi_column/consistency.py +312 -0
- truthound/validators/multi_column/statistical.py +299 -0
- truthound/validators/optimization/__init__.py +164 -0
- truthound/validators/optimization/aggregation.py +563 -0
- truthound/validators/optimization/covariance.py +556 -0
- truthound/validators/optimization/geo.py +626 -0
- truthound/validators/optimization/graph.py +587 -0
- truthound/validators/optimization/orchestrator.py +970 -0
- truthound/validators/optimization/profiling.py +1312 -0
- truthound/validators/privacy/__init__.py +223 -0
- truthound/validators/privacy/base.py +635 -0
- truthound/validators/privacy/ccpa.py +670 -0
- truthound/validators/privacy/gdpr.py +728 -0
- truthound/validators/privacy/global_patterns.py +604 -0
- truthound/validators/privacy/plugins.py +867 -0
- truthound/validators/profiling/__init__.py +52 -0
- truthound/validators/profiling/base.py +175 -0
- truthound/validators/profiling/cardinality.py +312 -0
- truthound/validators/profiling/entropy.py +391 -0
- truthound/validators/profiling/frequency.py +455 -0
- truthound/validators/pushdown_support.py +660 -0
- truthound/validators/query/__init__.py +91 -0
- truthound/validators/query/aggregate.py +346 -0
- truthound/validators/query/base.py +246 -0
- truthound/validators/query/column.py +249 -0
- truthound/validators/query/expression.py +274 -0
- truthound/validators/query/result.py +323 -0
- truthound/validators/query/row_count.py +264 -0
- truthound/validators/referential/__init__.py +80 -0
- truthound/validators/referential/base.py +395 -0
- truthound/validators/referential/cascade.py +391 -0
- truthound/validators/referential/circular.py +563 -0
- truthound/validators/referential/foreign_key.py +624 -0
- truthound/validators/referential/orphan.py +485 -0
- truthound/validators/registry.py +112 -0
- truthound/validators/schema/__init__.py +41 -0
- truthound/validators/schema/column_count.py +142 -0
- truthound/validators/schema/column_exists.py +80 -0
- truthound/validators/schema/column_order.py +82 -0
- truthound/validators/schema/column_pair.py +85 -0
- truthound/validators/schema/column_pair_set.py +195 -0
- truthound/validators/schema/column_type.py +94 -0
- truthound/validators/schema/multi_column.py +53 -0
- truthound/validators/schema/multi_column_aggregate.py +175 -0
- truthound/validators/schema/referential.py +274 -0
- truthound/validators/schema/table_schema.py +91 -0
- truthound/validators/schema_validator.py +219 -0
- truthound/validators/sdk/__init__.py +250 -0
- truthound/validators/sdk/builder.py +680 -0
- truthound/validators/sdk/decorators.py +474 -0
- truthound/validators/sdk/enterprise/__init__.py +211 -0
- truthound/validators/sdk/enterprise/docs.py +725 -0
- truthound/validators/sdk/enterprise/fuzzing.py +659 -0
- truthound/validators/sdk/enterprise/licensing.py +709 -0
- truthound/validators/sdk/enterprise/manager.py +543 -0
- truthound/validators/sdk/enterprise/resources.py +628 -0
- truthound/validators/sdk/enterprise/sandbox.py +766 -0
- truthound/validators/sdk/enterprise/signing.py +603 -0
- truthound/validators/sdk/enterprise/templates.py +865 -0
- truthound/validators/sdk/enterprise/versioning.py +659 -0
- truthound/validators/sdk/templates.py +757 -0
- truthound/validators/sdk/testing.py +807 -0
- truthound/validators/security/__init__.py +181 -0
- truthound/validators/security/redos/__init__.py +182 -0
- truthound/validators/security/redos/core.py +861 -0
- truthound/validators/security/redos/cpu_monitor.py +593 -0
- truthound/validators/security/redos/cve_database.py +791 -0
- truthound/validators/security/redos/ml/__init__.py +155 -0
- truthound/validators/security/redos/ml/base.py +785 -0
- truthound/validators/security/redos/ml/datasets.py +618 -0
- truthound/validators/security/redos/ml/features.py +359 -0
- truthound/validators/security/redos/ml/models.py +1000 -0
- truthound/validators/security/redos/ml/predictor.py +507 -0
- truthound/validators/security/redos/ml/storage.py +632 -0
- truthound/validators/security/redos/ml/training.py +571 -0
- truthound/validators/security/redos/ml_analyzer.py +937 -0
- truthound/validators/security/redos/optimizer.py +674 -0
- truthound/validators/security/redos/profiler.py +682 -0
- truthound/validators/security/redos/re2_engine.py +709 -0
- truthound/validators/security/redos.py +886 -0
- truthound/validators/security/sql_security.py +1247 -0
- truthound/validators/streaming/__init__.py +126 -0
- truthound/validators/streaming/base.py +292 -0
- truthound/validators/streaming/completeness.py +210 -0
- truthound/validators/streaming/mixin.py +575 -0
- truthound/validators/streaming/range.py +308 -0
- truthound/validators/streaming/sources.py +846 -0
- truthound/validators/string/__init__.py +57 -0
- truthound/validators/string/casing.py +158 -0
- truthound/validators/string/charset.py +96 -0
- truthound/validators/string/format.py +501 -0
- truthound/validators/string/json.py +77 -0
- truthound/validators/string/json_schema.py +184 -0
- truthound/validators/string/length.py +104 -0
- truthound/validators/string/like_pattern.py +237 -0
- truthound/validators/string/regex.py +202 -0
- truthound/validators/string/regex_extended.py +435 -0
- truthound/validators/table/__init__.py +88 -0
- truthound/validators/table/base.py +78 -0
- truthound/validators/table/column_count.py +198 -0
- truthound/validators/table/freshness.py +362 -0
- truthound/validators/table/row_count.py +251 -0
- truthound/validators/table/schema.py +333 -0
- truthound/validators/table/size.py +285 -0
- truthound/validators/timeout/__init__.py +102 -0
- truthound/validators/timeout/advanced/__init__.py +247 -0
- truthound/validators/timeout/advanced/circuit_breaker.py +675 -0
- truthound/validators/timeout/advanced/prediction.py +773 -0
- truthound/validators/timeout/advanced/priority.py +618 -0
- truthound/validators/timeout/advanced/redis_backend.py +770 -0
- truthound/validators/timeout/advanced/retry.py +721 -0
- truthound/validators/timeout/advanced/sampling.py +788 -0
- truthound/validators/timeout/advanced/sla.py +661 -0
- truthound/validators/timeout/advanced/telemetry.py +804 -0
- truthound/validators/timeout/cascade.py +477 -0
- truthound/validators/timeout/deadline.py +657 -0
- truthound/validators/timeout/degradation.py +525 -0
- truthound/validators/timeout/distributed.py +597 -0
- truthound/validators/timeseries/__init__.py +89 -0
- truthound/validators/timeseries/base.py +326 -0
- truthound/validators/timeseries/completeness.py +617 -0
- truthound/validators/timeseries/gap.py +485 -0
- truthound/validators/timeseries/monotonic.py +310 -0
- truthound/validators/timeseries/seasonality.py +422 -0
- truthound/validators/timeseries/trend.py +510 -0
- truthound/validators/uniqueness/__init__.py +59 -0
- truthound/validators/uniqueness/approximate.py +475 -0
- truthound/validators/uniqueness/distinct_values.py +253 -0
- truthound/validators/uniqueness/duplicate.py +118 -0
- truthound/validators/uniqueness/primary_key.py +140 -0
- truthound/validators/uniqueness/unique.py +191 -0
- truthound/validators/uniqueness/within_record.py +599 -0
- truthound/validators/utils.py +756 -0
- truthound-1.0.8.dist-info/METADATA +474 -0
- truthound-1.0.8.dist-info/RECORD +877 -0
- truthound-1.0.8.dist-info/WHEEL +4 -0
- truthound-1.0.8.dist-info/entry_points.txt +2 -0
- truthound-1.0.8.dist-info/licenses/LICENSE +190 -0
|
@@ -0,0 +1,775 @@
|
|
|
1
|
+
"""Concurrent index management for filesystem stores.
|
|
2
|
+
|
|
3
|
+
This module provides thread-safe and process-safe index operations
|
|
4
|
+
for the filesystem store. The index maintains metadata about stored
|
|
5
|
+
items for fast lookups without reading the actual files.
|
|
6
|
+
|
|
7
|
+
Key features:
|
|
8
|
+
- MVCC-like reads (consistent snapshots)
|
|
9
|
+
- Write-ahead logging for durability
|
|
10
|
+
- Automatic index recovery
|
|
11
|
+
- Transaction support for batch updates
|
|
12
|
+
|
|
13
|
+
Example:
|
|
14
|
+
>>> index = ConcurrentIndex(Path(".truthound/store"))
|
|
15
|
+
>>> with index.transaction() as txn:
|
|
16
|
+
... txn.add("item-1", {"data_asset": "customers.csv"})
|
|
17
|
+
... txn.add("item-2", {"data_asset": "orders.csv"})
|
|
18
|
+
... txn.commit()
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
from __future__ import annotations
|
|
22
|
+
|
|
23
|
+
import json
|
|
24
|
+
import os
|
|
25
|
+
import threading
|
|
26
|
+
import time
|
|
27
|
+
from contextlib import contextmanager
|
|
28
|
+
from dataclasses import dataclass, field
|
|
29
|
+
from datetime import datetime
|
|
30
|
+
from pathlib import Path
|
|
31
|
+
from typing import Any, Callable, Iterator, TypeVar
|
|
32
|
+
|
|
33
|
+
from truthound.stores.concurrency.locks import LockMode
|
|
34
|
+
from truthound.stores.concurrency.manager import FileLockManager, get_default_manager
|
|
35
|
+
from truthound.stores.concurrency.atomic import atomic_write, atomic_read, AtomicFileWriter
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
T = TypeVar("T")
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
@dataclass
|
|
42
|
+
class IndexEntry:
|
|
43
|
+
"""Represents a single entry in the index.
|
|
44
|
+
|
|
45
|
+
Attributes:
|
|
46
|
+
item_id: Unique identifier for the item.
|
|
47
|
+
metadata: Metadata about the item.
|
|
48
|
+
created_at: When the entry was created.
|
|
49
|
+
updated_at: When the entry was last updated.
|
|
50
|
+
version: Entry version for optimistic locking.
|
|
51
|
+
"""
|
|
52
|
+
|
|
53
|
+
item_id: str
|
|
54
|
+
metadata: dict[str, Any]
|
|
55
|
+
created_at: datetime = field(default_factory=datetime.now)
|
|
56
|
+
updated_at: datetime = field(default_factory=datetime.now)
|
|
57
|
+
version: int = 1
|
|
58
|
+
|
|
59
|
+
def to_dict(self) -> dict[str, Any]:
|
|
60
|
+
"""Convert to dictionary for serialization."""
|
|
61
|
+
return {
|
|
62
|
+
"item_id": self.item_id,
|
|
63
|
+
"metadata": self.metadata,
|
|
64
|
+
"created_at": self.created_at.isoformat(),
|
|
65
|
+
"updated_at": self.updated_at.isoformat(),
|
|
66
|
+
"version": self.version,
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
@classmethod
|
|
70
|
+
def from_dict(cls, data: dict[str, Any]) -> "IndexEntry":
|
|
71
|
+
"""Create from dictionary."""
|
|
72
|
+
return cls(
|
|
73
|
+
item_id=data["item_id"],
|
|
74
|
+
metadata=data.get("metadata", {}),
|
|
75
|
+
created_at=datetime.fromisoformat(data["created_at"])
|
|
76
|
+
if "created_at" in data
|
|
77
|
+
else datetime.now(),
|
|
78
|
+
updated_at=datetime.fromisoformat(data["updated_at"])
|
|
79
|
+
if "updated_at" in data
|
|
80
|
+
else datetime.now(),
|
|
81
|
+
version=data.get("version", 1),
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
@dataclass
|
|
86
|
+
class IndexSnapshot:
|
|
87
|
+
"""Immutable snapshot of the index at a point in time.
|
|
88
|
+
|
|
89
|
+
Used for MVCC-like reads to provide consistent view of index.
|
|
90
|
+
"""
|
|
91
|
+
|
|
92
|
+
entries: dict[str, IndexEntry]
|
|
93
|
+
version: int
|
|
94
|
+
timestamp: datetime = field(default_factory=datetime.now)
|
|
95
|
+
|
|
96
|
+
def get(self, item_id: str) -> IndexEntry | None:
|
|
97
|
+
"""Get an entry by ID."""
|
|
98
|
+
return self.entries.get(item_id)
|
|
99
|
+
|
|
100
|
+
def contains(self, item_id: str) -> bool:
|
|
101
|
+
"""Check if an entry exists."""
|
|
102
|
+
return item_id in self.entries
|
|
103
|
+
|
|
104
|
+
def list_ids(self) -> list[str]:
|
|
105
|
+
"""List all item IDs."""
|
|
106
|
+
return list(self.entries.keys())
|
|
107
|
+
|
|
108
|
+
def filter(
|
|
109
|
+
self,
|
|
110
|
+
predicate: Callable[[IndexEntry], bool],
|
|
111
|
+
) -> list[IndexEntry]:
|
|
112
|
+
"""Filter entries by predicate."""
|
|
113
|
+
return [e for e in self.entries.values() if predicate(e)]
|
|
114
|
+
|
|
115
|
+
def __len__(self) -> int:
|
|
116
|
+
return len(self.entries)
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
class IndexTransaction:
|
|
120
|
+
"""Transaction for batch index updates.
|
|
121
|
+
|
|
122
|
+
Provides ACID-like semantics for index modifications:
|
|
123
|
+
- Atomic: All changes applied or none
|
|
124
|
+
- Consistent: Index remains valid after transaction
|
|
125
|
+
- Isolated: Changes not visible until commit
|
|
126
|
+
- Durable: Changes persisted after commit
|
|
127
|
+
|
|
128
|
+
Example:
|
|
129
|
+
>>> with index.transaction() as txn:
|
|
130
|
+
... txn.add("item-1", {"key": "value"})
|
|
131
|
+
... txn.update("item-2", {"key": "new-value"})
|
|
132
|
+
... txn.remove("item-3")
|
|
133
|
+
... txn.commit()
|
|
134
|
+
"""
|
|
135
|
+
|
|
136
|
+
def __init__(
|
|
137
|
+
self,
|
|
138
|
+
index: "ConcurrentIndex",
|
|
139
|
+
snapshot: IndexSnapshot,
|
|
140
|
+
) -> None:
|
|
141
|
+
"""Initialize the transaction.
|
|
142
|
+
|
|
143
|
+
Args:
|
|
144
|
+
index: The parent index.
|
|
145
|
+
snapshot: Initial snapshot for the transaction.
|
|
146
|
+
"""
|
|
147
|
+
self._index = index
|
|
148
|
+
self._snapshot = snapshot
|
|
149
|
+
self._pending_adds: dict[str, IndexEntry] = {}
|
|
150
|
+
self._pending_updates: dict[str, IndexEntry] = {}
|
|
151
|
+
self._pending_removes: set[str] = set()
|
|
152
|
+
self._committed = False
|
|
153
|
+
self._rolled_back = False
|
|
154
|
+
|
|
155
|
+
def add(self, item_id: str, metadata: dict[str, Any]) -> IndexEntry:
|
|
156
|
+
"""Add a new entry to the index.
|
|
157
|
+
|
|
158
|
+
Args:
|
|
159
|
+
item_id: Unique identifier for the item.
|
|
160
|
+
metadata: Metadata to store.
|
|
161
|
+
|
|
162
|
+
Returns:
|
|
163
|
+
The created entry.
|
|
164
|
+
|
|
165
|
+
Raises:
|
|
166
|
+
ValueError: If item already exists.
|
|
167
|
+
"""
|
|
168
|
+
self._check_active()
|
|
169
|
+
|
|
170
|
+
if self._snapshot.contains(item_id) or item_id in self._pending_adds:
|
|
171
|
+
if item_id not in self._pending_removes:
|
|
172
|
+
raise ValueError(f"Item already exists: {item_id}")
|
|
173
|
+
|
|
174
|
+
entry = IndexEntry(item_id=item_id, metadata=metadata)
|
|
175
|
+
self._pending_adds[item_id] = entry
|
|
176
|
+
self._pending_removes.discard(item_id)
|
|
177
|
+
return entry
|
|
178
|
+
|
|
179
|
+
def update(
|
|
180
|
+
self,
|
|
181
|
+
item_id: str,
|
|
182
|
+
metadata: dict[str, Any],
|
|
183
|
+
merge: bool = True,
|
|
184
|
+
) -> IndexEntry:
|
|
185
|
+
"""Update an existing entry.
|
|
186
|
+
|
|
187
|
+
Args:
|
|
188
|
+
item_id: Item to update.
|
|
189
|
+
metadata: New metadata.
|
|
190
|
+
merge: If True, merge with existing metadata.
|
|
191
|
+
|
|
192
|
+
Returns:
|
|
193
|
+
The updated entry.
|
|
194
|
+
|
|
195
|
+
Raises:
|
|
196
|
+
KeyError: If item doesn't exist.
|
|
197
|
+
"""
|
|
198
|
+
self._check_active()
|
|
199
|
+
|
|
200
|
+
existing = self._get_current(item_id)
|
|
201
|
+
if existing is None:
|
|
202
|
+
raise KeyError(f"Item not found: {item_id}")
|
|
203
|
+
|
|
204
|
+
if merge:
|
|
205
|
+
new_metadata = {**existing.metadata, **metadata}
|
|
206
|
+
else:
|
|
207
|
+
new_metadata = metadata
|
|
208
|
+
|
|
209
|
+
entry = IndexEntry(
|
|
210
|
+
item_id=item_id,
|
|
211
|
+
metadata=new_metadata,
|
|
212
|
+
created_at=existing.created_at,
|
|
213
|
+
updated_at=datetime.now(),
|
|
214
|
+
version=existing.version + 1,
|
|
215
|
+
)
|
|
216
|
+
self._pending_updates[item_id] = entry
|
|
217
|
+
return entry
|
|
218
|
+
|
|
219
|
+
def upsert(self, item_id: str, metadata: dict[str, Any]) -> IndexEntry:
|
|
220
|
+
"""Add or update an entry.
|
|
221
|
+
|
|
222
|
+
Args:
|
|
223
|
+
item_id: Item identifier.
|
|
224
|
+
metadata: Metadata to store.
|
|
225
|
+
|
|
226
|
+
Returns:
|
|
227
|
+
The created or updated entry.
|
|
228
|
+
"""
|
|
229
|
+
self._check_active()
|
|
230
|
+
|
|
231
|
+
existing = self._get_current(item_id)
|
|
232
|
+
if existing is None:
|
|
233
|
+
return self.add(item_id, metadata)
|
|
234
|
+
else:
|
|
235
|
+
return self.update(item_id, metadata)
|
|
236
|
+
|
|
237
|
+
def remove(self, item_id: str) -> bool:
|
|
238
|
+
"""Remove an entry from the index.
|
|
239
|
+
|
|
240
|
+
Args:
|
|
241
|
+
item_id: Item to remove.
|
|
242
|
+
|
|
243
|
+
Returns:
|
|
244
|
+
True if item existed, False otherwise.
|
|
245
|
+
"""
|
|
246
|
+
self._check_active()
|
|
247
|
+
|
|
248
|
+
exists = self._get_current(item_id) is not None
|
|
249
|
+
|
|
250
|
+
self._pending_removes.add(item_id)
|
|
251
|
+
self._pending_adds.pop(item_id, None)
|
|
252
|
+
self._pending_updates.pop(item_id, None)
|
|
253
|
+
|
|
254
|
+
return exists
|
|
255
|
+
|
|
256
|
+
def get(self, item_id: str) -> IndexEntry | None:
|
|
257
|
+
"""Get an entry, including pending changes.
|
|
258
|
+
|
|
259
|
+
Args:
|
|
260
|
+
item_id: Item to get.
|
|
261
|
+
|
|
262
|
+
Returns:
|
|
263
|
+
Entry if found, None otherwise.
|
|
264
|
+
"""
|
|
265
|
+
return self._get_current(item_id)
|
|
266
|
+
|
|
267
|
+
def _get_current(self, item_id: str) -> IndexEntry | None:
|
|
268
|
+
"""Get current state of an entry including pending changes."""
|
|
269
|
+
if item_id in self._pending_removes:
|
|
270
|
+
return None
|
|
271
|
+
if item_id in self._pending_updates:
|
|
272
|
+
return self._pending_updates[item_id]
|
|
273
|
+
if item_id in self._pending_adds:
|
|
274
|
+
return self._pending_adds[item_id]
|
|
275
|
+
return self._snapshot.get(item_id)
|
|
276
|
+
|
|
277
|
+
def _check_active(self) -> None:
|
|
278
|
+
"""Check that transaction is active."""
|
|
279
|
+
if self._committed:
|
|
280
|
+
raise RuntimeError("Transaction already committed")
|
|
281
|
+
if self._rolled_back:
|
|
282
|
+
raise RuntimeError("Transaction rolled back")
|
|
283
|
+
|
|
284
|
+
def commit(self) -> int:
|
|
285
|
+
"""Commit the transaction.
|
|
286
|
+
|
|
287
|
+
Returns:
|
|
288
|
+
Number of changes applied.
|
|
289
|
+
|
|
290
|
+
Raises:
|
|
291
|
+
RuntimeError: If transaction is not active.
|
|
292
|
+
"""
|
|
293
|
+
self._check_active()
|
|
294
|
+
|
|
295
|
+
changes = (
|
|
296
|
+
len(self._pending_adds)
|
|
297
|
+
+ len(self._pending_updates)
|
|
298
|
+
+ len(self._pending_removes)
|
|
299
|
+
)
|
|
300
|
+
|
|
301
|
+
if changes > 0:
|
|
302
|
+
self._index._apply_transaction(self)
|
|
303
|
+
|
|
304
|
+
self._committed = True
|
|
305
|
+
return changes
|
|
306
|
+
|
|
307
|
+
def rollback(self) -> None:
|
|
308
|
+
"""Rollback the transaction, discarding all changes."""
|
|
309
|
+
self._check_active()
|
|
310
|
+
self._pending_adds.clear()
|
|
311
|
+
self._pending_updates.clear()
|
|
312
|
+
self._pending_removes.clear()
|
|
313
|
+
self._rolled_back = True
|
|
314
|
+
|
|
315
|
+
@property
|
|
316
|
+
def pending_changes(self) -> int:
|
|
317
|
+
"""Number of pending changes."""
|
|
318
|
+
return (
|
|
319
|
+
len(self._pending_adds)
|
|
320
|
+
+ len(self._pending_updates)
|
|
321
|
+
+ len(self._pending_removes)
|
|
322
|
+
)
|
|
323
|
+
|
|
324
|
+
@property
|
|
325
|
+
def is_active(self) -> bool:
|
|
326
|
+
"""Whether transaction is active."""
|
|
327
|
+
return not self._committed and not self._rolled_back
|
|
328
|
+
|
|
329
|
+
|
|
330
|
+
class ConcurrentIndex:
|
|
331
|
+
"""Thread-safe and process-safe index for filesystem stores.
|
|
332
|
+
|
|
333
|
+
This class manages an index file that tracks metadata about stored
|
|
334
|
+
items. It provides:
|
|
335
|
+
- Consistent reads via snapshots
|
|
336
|
+
- Atomic writes via transactions
|
|
337
|
+
- Automatic recovery from corruption
|
|
338
|
+
- Write-ahead logging for durability
|
|
339
|
+
|
|
340
|
+
Example:
|
|
341
|
+
>>> index = ConcurrentIndex(Path(".truthound/store"))
|
|
342
|
+
>>>
|
|
343
|
+
>>> # Read operations (use snapshot)
|
|
344
|
+
>>> snapshot = index.snapshot()
|
|
345
|
+
>>> for item_id in snapshot.list_ids():
|
|
346
|
+
... entry = snapshot.get(item_id)
|
|
347
|
+
>>>
|
|
348
|
+
>>> # Write operations (use transaction)
|
|
349
|
+
>>> with index.transaction() as txn:
|
|
350
|
+
... txn.add("new-item", {"data_asset": "data.csv"})
|
|
351
|
+
... txn.commit()
|
|
352
|
+
"""
|
|
353
|
+
|
|
354
|
+
def __init__(
|
|
355
|
+
self,
|
|
356
|
+
base_path: Path | str,
|
|
357
|
+
index_filename: str = "_index.json",
|
|
358
|
+
lock_manager: FileLockManager | None = None,
|
|
359
|
+
wal_enabled: bool = True,
|
|
360
|
+
) -> None:
|
|
361
|
+
"""Initialize the concurrent index.
|
|
362
|
+
|
|
363
|
+
Args:
|
|
364
|
+
base_path: Base directory for the index.
|
|
365
|
+
index_filename: Name of the index file.
|
|
366
|
+
lock_manager: Lock manager to use.
|
|
367
|
+
wal_enabled: Whether to use write-ahead logging.
|
|
368
|
+
"""
|
|
369
|
+
self._base_path = Path(base_path)
|
|
370
|
+
self._index_path = self._base_path / index_filename
|
|
371
|
+
self._wal_path = self._base_path / f"{index_filename}.wal"
|
|
372
|
+
self._lock_manager = lock_manager or get_default_manager()
|
|
373
|
+
self._wal_enabled = wal_enabled
|
|
374
|
+
|
|
375
|
+
# In-memory cache
|
|
376
|
+
self._cache: dict[str, IndexEntry] = {}
|
|
377
|
+
self._cache_version: int = 0
|
|
378
|
+
self._cache_lock = threading.RLock()
|
|
379
|
+
self._loaded = False
|
|
380
|
+
|
|
381
|
+
def initialize(self) -> None:
|
|
382
|
+
"""Initialize the index, loading from disk if exists."""
|
|
383
|
+
with self._cache_lock:
|
|
384
|
+
if self._loaded:
|
|
385
|
+
return
|
|
386
|
+
|
|
387
|
+
self._base_path.mkdir(parents=True, exist_ok=True)
|
|
388
|
+
|
|
389
|
+
# Recover from WAL if needed
|
|
390
|
+
if self._wal_enabled and self._wal_path.exists():
|
|
391
|
+
self._recover_from_wal()
|
|
392
|
+
|
|
393
|
+
# Load index from disk or create empty
|
|
394
|
+
if self._index_path.exists():
|
|
395
|
+
self._load_from_disk()
|
|
396
|
+
else:
|
|
397
|
+
# Create empty index file
|
|
398
|
+
self._save_to_disk()
|
|
399
|
+
|
|
400
|
+
self._loaded = True
|
|
401
|
+
|
|
402
|
+
def _load_from_disk(self) -> None:
|
|
403
|
+
"""Load index from disk file."""
|
|
404
|
+
try:
|
|
405
|
+
content = atomic_read(self._index_path, lock_manager=self._lock_manager)
|
|
406
|
+
data = json.loads(content.decode("utf-8"))
|
|
407
|
+
|
|
408
|
+
self._cache.clear()
|
|
409
|
+
for item_id, entry_data in data.get("entries", {}).items():
|
|
410
|
+
# Handle both old format (dict) and new format (IndexEntry)
|
|
411
|
+
if isinstance(entry_data, dict) and "item_id" not in entry_data:
|
|
412
|
+
# Old format: just metadata
|
|
413
|
+
entry = IndexEntry(item_id=item_id, metadata=entry_data)
|
|
414
|
+
else:
|
|
415
|
+
entry = IndexEntry.from_dict(entry_data)
|
|
416
|
+
self._cache[item_id] = entry
|
|
417
|
+
|
|
418
|
+
self._cache_version = data.get("version", 0)
|
|
419
|
+
|
|
420
|
+
except (json.JSONDecodeError, OSError):
|
|
421
|
+
# Index corrupted or missing, start fresh
|
|
422
|
+
self._cache.clear()
|
|
423
|
+
self._cache_version = 0
|
|
424
|
+
|
|
425
|
+
def _save_to_disk(self) -> None:
|
|
426
|
+
"""Save index to disk file."""
|
|
427
|
+
data = {
|
|
428
|
+
"version": self._cache_version,
|
|
429
|
+
"updated_at": datetime.now().isoformat(),
|
|
430
|
+
"entries": {
|
|
431
|
+
item_id: entry.to_dict() for item_id, entry in self._cache.items()
|
|
432
|
+
},
|
|
433
|
+
}
|
|
434
|
+
|
|
435
|
+
content = json.dumps(data, indent=2, default=str)
|
|
436
|
+
atomic_write(
|
|
437
|
+
self._index_path,
|
|
438
|
+
content,
|
|
439
|
+
lock_manager=self._lock_manager,
|
|
440
|
+
)
|
|
441
|
+
|
|
442
|
+
def _recover_from_wal(self) -> None:
|
|
443
|
+
"""Recover uncommitted changes from write-ahead log."""
|
|
444
|
+
try:
|
|
445
|
+
with open(self._wal_path, "r") as f:
|
|
446
|
+
for line in f:
|
|
447
|
+
line = line.strip()
|
|
448
|
+
if not line:
|
|
449
|
+
continue
|
|
450
|
+
|
|
451
|
+
operation = json.loads(line)
|
|
452
|
+
op_type = operation.get("type")
|
|
453
|
+
item_id = operation.get("item_id")
|
|
454
|
+
|
|
455
|
+
if op_type == "add" or op_type == "update":
|
|
456
|
+
entry_data = operation.get("entry", {})
|
|
457
|
+
self._cache[item_id] = IndexEntry.from_dict(entry_data)
|
|
458
|
+
elif op_type == "remove":
|
|
459
|
+
self._cache.pop(item_id, None)
|
|
460
|
+
|
|
461
|
+
# Save recovered state and remove WAL
|
|
462
|
+
self._cache_version += 1
|
|
463
|
+
self._save_to_disk()
|
|
464
|
+
self._wal_path.unlink(missing_ok=True)
|
|
465
|
+
|
|
466
|
+
except (json.JSONDecodeError, OSError):
|
|
467
|
+
# WAL corrupted, ignore
|
|
468
|
+
self._wal_path.unlink(missing_ok=True)
|
|
469
|
+
|
|
470
|
+
def _write_wal(self, operations: list[dict[str, Any]]) -> None:
|
|
471
|
+
"""Write operations to write-ahead log."""
|
|
472
|
+
if not self._wal_enabled:
|
|
473
|
+
return
|
|
474
|
+
|
|
475
|
+
with open(self._wal_path, "a") as f:
|
|
476
|
+
for op in operations:
|
|
477
|
+
f.write(json.dumps(op, default=str) + "\n")
|
|
478
|
+
f.flush()
|
|
479
|
+
os.fsync(f.fileno())
|
|
480
|
+
|
|
481
|
+
def _clear_wal(self) -> None:
|
|
482
|
+
"""Clear the write-ahead log."""
|
|
483
|
+
if self._wal_path.exists():
|
|
484
|
+
self._wal_path.unlink(missing_ok=True)
|
|
485
|
+
|
|
486
|
+
def snapshot(self) -> IndexSnapshot:
|
|
487
|
+
"""Get an immutable snapshot of the index.
|
|
488
|
+
|
|
489
|
+
The snapshot provides a consistent view of the index that
|
|
490
|
+
won't change even if the index is modified.
|
|
491
|
+
|
|
492
|
+
Returns:
|
|
493
|
+
IndexSnapshot with current state.
|
|
494
|
+
"""
|
|
495
|
+
self.initialize()
|
|
496
|
+
|
|
497
|
+
with self._cache_lock:
|
|
498
|
+
return IndexSnapshot(
|
|
499
|
+
entries=dict(self._cache),
|
|
500
|
+
version=self._cache_version,
|
|
501
|
+
)
|
|
502
|
+
|
|
503
|
+
def begin_transaction(self) -> IndexTransaction:
|
|
504
|
+
"""Start a transaction for batch updates.
|
|
505
|
+
|
|
506
|
+
Returns:
|
|
507
|
+
IndexTransaction for making changes.
|
|
508
|
+
Caller is responsible for calling commit() or rollback().
|
|
509
|
+
|
|
510
|
+
Example:
|
|
511
|
+
>>> txn = index.begin_transaction()
|
|
512
|
+
>>> try:
|
|
513
|
+
... txn.add("item", {"key": "value"})
|
|
514
|
+
... txn.commit()
|
|
515
|
+
... except Exception:
|
|
516
|
+
... txn.rollback()
|
|
517
|
+
... raise
|
|
518
|
+
"""
|
|
519
|
+
self.initialize()
|
|
520
|
+
snapshot = self.snapshot()
|
|
521
|
+
return IndexTransaction(self, snapshot)
|
|
522
|
+
|
|
523
|
+
@contextmanager
|
|
524
|
+
def transaction(self) -> Iterator[IndexTransaction]:
|
|
525
|
+
"""Start a transaction for batch updates with context manager.
|
|
526
|
+
|
|
527
|
+
Yields:
|
|
528
|
+
IndexTransaction for making changes.
|
|
529
|
+
|
|
530
|
+
Example:
|
|
531
|
+
>>> with index.transaction() as txn:
|
|
532
|
+
... txn.add("item", {"key": "value"})
|
|
533
|
+
... txn.commit()
|
|
534
|
+
"""
|
|
535
|
+
txn = self.begin_transaction()
|
|
536
|
+
|
|
537
|
+
try:
|
|
538
|
+
yield txn
|
|
539
|
+
except Exception:
|
|
540
|
+
if txn.is_active:
|
|
541
|
+
txn.rollback()
|
|
542
|
+
raise
|
|
543
|
+
else:
|
|
544
|
+
if txn.is_active and txn.pending_changes > 0:
|
|
545
|
+
# Auto-commit if not explicitly committed/rolled back
|
|
546
|
+
txn.commit()
|
|
547
|
+
|
|
548
|
+
def _apply_transaction(self, txn: IndexTransaction) -> None:
|
|
549
|
+
"""Apply a transaction's changes to the index.
|
|
550
|
+
|
|
551
|
+
Args:
|
|
552
|
+
txn: The transaction to apply.
|
|
553
|
+
"""
|
|
554
|
+
with self._lock_manager.write_lock(self._index_path):
|
|
555
|
+
with self._cache_lock:
|
|
556
|
+
# Check for conflicts (optimistic concurrency)
|
|
557
|
+
current_snapshot = self.snapshot()
|
|
558
|
+
if current_snapshot.version != txn._snapshot.version:
|
|
559
|
+
# Check if any modified entries have changed
|
|
560
|
+
for item_id in list(txn._pending_updates.keys()) + list(
|
|
561
|
+
txn._pending_removes
|
|
562
|
+
):
|
|
563
|
+
old_entry = txn._snapshot.get(item_id)
|
|
564
|
+
new_entry = current_snapshot.get(item_id)
|
|
565
|
+
|
|
566
|
+
if old_entry is None and new_entry is not None:
|
|
567
|
+
raise RuntimeError(
|
|
568
|
+
f"Conflict: {item_id} was added concurrently"
|
|
569
|
+
)
|
|
570
|
+
if old_entry is not None and new_entry is None:
|
|
571
|
+
raise RuntimeError(
|
|
572
|
+
f"Conflict: {item_id} was removed concurrently"
|
|
573
|
+
)
|
|
574
|
+
if (
|
|
575
|
+
old_entry is not None
|
|
576
|
+
and new_entry is not None
|
|
577
|
+
and old_entry.version != new_entry.version
|
|
578
|
+
):
|
|
579
|
+
raise RuntimeError(
|
|
580
|
+
f"Conflict: {item_id} was modified concurrently"
|
|
581
|
+
)
|
|
582
|
+
|
|
583
|
+
# Write to WAL first
|
|
584
|
+
wal_operations = []
|
|
585
|
+
for item_id, entry in txn._pending_adds.items():
|
|
586
|
+
wal_operations.append(
|
|
587
|
+
{"type": "add", "item_id": item_id, "entry": entry.to_dict()}
|
|
588
|
+
)
|
|
589
|
+
for item_id, entry in txn._pending_updates.items():
|
|
590
|
+
wal_operations.append(
|
|
591
|
+
{"type": "update", "item_id": item_id, "entry": entry.to_dict()}
|
|
592
|
+
)
|
|
593
|
+
for item_id in txn._pending_removes:
|
|
594
|
+
wal_operations.append({"type": "remove", "item_id": item_id})
|
|
595
|
+
|
|
596
|
+
if wal_operations:
|
|
597
|
+
self._write_wal(wal_operations)
|
|
598
|
+
|
|
599
|
+
# Apply changes to cache
|
|
600
|
+
for item_id, entry in txn._pending_adds.items():
|
|
601
|
+
self._cache[item_id] = entry
|
|
602
|
+
for item_id, entry in txn._pending_updates.items():
|
|
603
|
+
self._cache[item_id] = entry
|
|
604
|
+
for item_id in txn._pending_removes:
|
|
605
|
+
self._cache.pop(item_id, None)
|
|
606
|
+
|
|
607
|
+
self._cache_version += 1
|
|
608
|
+
|
|
609
|
+
# Persist to disk
|
|
610
|
+
self._save_to_disk()
|
|
611
|
+
|
|
612
|
+
# Clear WAL after successful write
|
|
613
|
+
self._clear_wal()
|
|
614
|
+
|
|
615
|
+
# Convenience methods for simple operations
|
|
616
|
+
|
|
617
|
+
def add(self, item_id: str, metadata: dict[str, Any]) -> IndexEntry:
|
|
618
|
+
"""Add a single entry (convenience method).
|
|
619
|
+
|
|
620
|
+
Args:
|
|
621
|
+
item_id: Item identifier.
|
|
622
|
+
metadata: Metadata to store.
|
|
623
|
+
|
|
624
|
+
Returns:
|
|
625
|
+
The created entry.
|
|
626
|
+
"""
|
|
627
|
+
with self.transaction() as txn:
|
|
628
|
+
entry = txn.add(item_id, metadata)
|
|
629
|
+
txn.commit()
|
|
630
|
+
return entry
|
|
631
|
+
|
|
632
|
+
def update(
|
|
633
|
+
self,
|
|
634
|
+
item_id: str,
|
|
635
|
+
metadata: dict[str, Any],
|
|
636
|
+
merge: bool = True,
|
|
637
|
+
) -> IndexEntry:
|
|
638
|
+
"""Update a single entry (convenience method).
|
|
639
|
+
|
|
640
|
+
Args:
|
|
641
|
+
item_id: Item to update.
|
|
642
|
+
metadata: New metadata.
|
|
643
|
+
merge: Whether to merge with existing.
|
|
644
|
+
|
|
645
|
+
Returns:
|
|
646
|
+
The updated entry.
|
|
647
|
+
"""
|
|
648
|
+
with self.transaction() as txn:
|
|
649
|
+
entry = txn.update(item_id, metadata, merge=merge)
|
|
650
|
+
txn.commit()
|
|
651
|
+
return entry
|
|
652
|
+
|
|
653
|
+
def upsert(self, item_id: str, metadata: dict[str, Any]) -> IndexEntry:
|
|
654
|
+
"""Add or update a single entry (convenience method).
|
|
655
|
+
|
|
656
|
+
Args:
|
|
657
|
+
item_id: Item identifier.
|
|
658
|
+
metadata: Metadata to store.
|
|
659
|
+
|
|
660
|
+
Returns:
|
|
661
|
+
The created or updated entry.
|
|
662
|
+
"""
|
|
663
|
+
with self.transaction() as txn:
|
|
664
|
+
entry = txn.upsert(item_id, metadata)
|
|
665
|
+
txn.commit()
|
|
666
|
+
return entry
|
|
667
|
+
|
|
668
|
+
def remove(self, item_id: str) -> bool:
|
|
669
|
+
"""Remove a single entry (convenience method).
|
|
670
|
+
|
|
671
|
+
Args:
|
|
672
|
+
item_id: Item to remove.
|
|
673
|
+
|
|
674
|
+
Returns:
|
|
675
|
+
True if item existed.
|
|
676
|
+
"""
|
|
677
|
+
with self.transaction() as txn:
|
|
678
|
+
result = txn.remove(item_id)
|
|
679
|
+
txn.commit()
|
|
680
|
+
return result
|
|
681
|
+
|
|
682
|
+
def get(self, item_id: str) -> IndexEntry | None:
|
|
683
|
+
"""Get an entry by ID.
|
|
684
|
+
|
|
685
|
+
Args:
|
|
686
|
+
item_id: Item to get.
|
|
687
|
+
|
|
688
|
+
Returns:
|
|
689
|
+
Entry if found, None otherwise.
|
|
690
|
+
"""
|
|
691
|
+
return self.snapshot().get(item_id)
|
|
692
|
+
|
|
693
|
+
def contains(self, item_id: str) -> bool:
|
|
694
|
+
"""Check if an entry exists.
|
|
695
|
+
|
|
696
|
+
Args:
|
|
697
|
+
item_id: Item to check.
|
|
698
|
+
|
|
699
|
+
Returns:
|
|
700
|
+
True if exists.
|
|
701
|
+
"""
|
|
702
|
+
return self.snapshot().contains(item_id)
|
|
703
|
+
|
|
704
|
+
def list_ids(self) -> list[str]:
|
|
705
|
+
"""List all item IDs.
|
|
706
|
+
|
|
707
|
+
Returns:
|
|
708
|
+
List of item IDs.
|
|
709
|
+
"""
|
|
710
|
+
return self.snapshot().list_ids()
|
|
711
|
+
|
|
712
|
+
def count(self) -> int:
|
|
713
|
+
"""Get number of entries.
|
|
714
|
+
|
|
715
|
+
Returns:
|
|
716
|
+
Entry count.
|
|
717
|
+
"""
|
|
718
|
+
return len(self.snapshot())
|
|
719
|
+
|
|
720
|
+
def clear(self) -> int:
|
|
721
|
+
"""Remove all entries.
|
|
722
|
+
|
|
723
|
+
Returns:
|
|
724
|
+
Number of entries removed.
|
|
725
|
+
"""
|
|
726
|
+
snapshot = self.snapshot()
|
|
727
|
+
count = len(snapshot)
|
|
728
|
+
|
|
729
|
+
if count > 0:
|
|
730
|
+
with self.transaction() as txn:
|
|
731
|
+
for item_id in snapshot.list_ids():
|
|
732
|
+
txn.remove(item_id)
|
|
733
|
+
txn.commit()
|
|
734
|
+
|
|
735
|
+
return count
|
|
736
|
+
|
|
737
|
+
def rebuild_from_files(
|
|
738
|
+
self,
|
|
739
|
+
file_pattern: str,
|
|
740
|
+
metadata_extractor: Callable[[Path], tuple[str, dict[str, Any]] | None],
|
|
741
|
+
) -> int:
|
|
742
|
+
"""Rebuild index from files in directory.
|
|
743
|
+
|
|
744
|
+
Args:
|
|
745
|
+
file_pattern: Glob pattern for files.
|
|
746
|
+
metadata_extractor: Function to extract (item_id, metadata) from file.
|
|
747
|
+
|
|
748
|
+
Returns:
|
|
749
|
+
Number of entries rebuilt.
|
|
750
|
+
"""
|
|
751
|
+
self.initialize()
|
|
752
|
+
|
|
753
|
+
with self.transaction() as txn:
|
|
754
|
+
# Clear existing entries
|
|
755
|
+
for item_id in self.list_ids():
|
|
756
|
+
txn.remove(item_id)
|
|
757
|
+
|
|
758
|
+
# Scan files
|
|
759
|
+
count = 0
|
|
760
|
+
for file_path in self._base_path.glob(file_pattern):
|
|
761
|
+
if file_path.name.startswith("_"):
|
|
762
|
+
continue
|
|
763
|
+
|
|
764
|
+
try:
|
|
765
|
+
result = metadata_extractor(file_path)
|
|
766
|
+
if result:
|
|
767
|
+
item_id, metadata = result
|
|
768
|
+
txn.add(item_id, metadata)
|
|
769
|
+
count += 1
|
|
770
|
+
except Exception:
|
|
771
|
+
continue
|
|
772
|
+
|
|
773
|
+
txn.commit()
|
|
774
|
+
|
|
775
|
+
return count
|