truthound 1.0.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- truthound/__init__.py +162 -0
- truthound/adapters.py +100 -0
- truthound/api.py +365 -0
- truthound/audit/__init__.py +248 -0
- truthound/audit/core.py +967 -0
- truthound/audit/filters.py +620 -0
- truthound/audit/formatters.py +707 -0
- truthound/audit/logger.py +902 -0
- truthound/audit/middleware.py +571 -0
- truthound/audit/storage.py +1083 -0
- truthound/benchmark/__init__.py +123 -0
- truthound/benchmark/base.py +757 -0
- truthound/benchmark/comparison.py +635 -0
- truthound/benchmark/generators.py +706 -0
- truthound/benchmark/reporters.py +718 -0
- truthound/benchmark/runner.py +635 -0
- truthound/benchmark/scenarios.py +712 -0
- truthound/cache.py +252 -0
- truthound/checkpoint/__init__.py +136 -0
- truthound/checkpoint/actions/__init__.py +164 -0
- truthound/checkpoint/actions/base.py +324 -0
- truthound/checkpoint/actions/custom.py +234 -0
- truthound/checkpoint/actions/discord_notify.py +290 -0
- truthound/checkpoint/actions/email_notify.py +405 -0
- truthound/checkpoint/actions/github_action.py +406 -0
- truthound/checkpoint/actions/opsgenie.py +1499 -0
- truthound/checkpoint/actions/pagerduty.py +226 -0
- truthound/checkpoint/actions/slack_notify.py +233 -0
- truthound/checkpoint/actions/store_result.py +249 -0
- truthound/checkpoint/actions/teams_notify.py +1570 -0
- truthound/checkpoint/actions/telegram_notify.py +419 -0
- truthound/checkpoint/actions/update_docs.py +552 -0
- truthound/checkpoint/actions/webhook.py +293 -0
- truthound/checkpoint/analytics/__init__.py +147 -0
- truthound/checkpoint/analytics/aggregations/__init__.py +23 -0
- truthound/checkpoint/analytics/aggregations/rollup.py +481 -0
- truthound/checkpoint/analytics/aggregations/time_bucket.py +306 -0
- truthound/checkpoint/analytics/analyzers/__init__.py +17 -0
- truthound/checkpoint/analytics/analyzers/anomaly.py +386 -0
- truthound/checkpoint/analytics/analyzers/base.py +270 -0
- truthound/checkpoint/analytics/analyzers/forecast.py +421 -0
- truthound/checkpoint/analytics/analyzers/trend.py +314 -0
- truthound/checkpoint/analytics/models.py +292 -0
- truthound/checkpoint/analytics/protocols.py +549 -0
- truthound/checkpoint/analytics/service.py +718 -0
- truthound/checkpoint/analytics/stores/__init__.py +16 -0
- truthound/checkpoint/analytics/stores/base.py +306 -0
- truthound/checkpoint/analytics/stores/memory_store.py +353 -0
- truthound/checkpoint/analytics/stores/sqlite_store.py +557 -0
- truthound/checkpoint/analytics/stores/timescale_store.py +501 -0
- truthound/checkpoint/async_actions.py +794 -0
- truthound/checkpoint/async_base.py +708 -0
- truthound/checkpoint/async_checkpoint.py +617 -0
- truthound/checkpoint/async_runner.py +639 -0
- truthound/checkpoint/checkpoint.py +527 -0
- truthound/checkpoint/ci/__init__.py +61 -0
- truthound/checkpoint/ci/detector.py +355 -0
- truthound/checkpoint/ci/reporter.py +436 -0
- truthound/checkpoint/ci/templates.py +454 -0
- truthound/checkpoint/circuitbreaker/__init__.py +133 -0
- truthound/checkpoint/circuitbreaker/breaker.py +542 -0
- truthound/checkpoint/circuitbreaker/core.py +252 -0
- truthound/checkpoint/circuitbreaker/detection.py +459 -0
- truthound/checkpoint/circuitbreaker/middleware.py +389 -0
- truthound/checkpoint/circuitbreaker/registry.py +357 -0
- truthound/checkpoint/distributed/__init__.py +139 -0
- truthound/checkpoint/distributed/backends/__init__.py +35 -0
- truthound/checkpoint/distributed/backends/celery_backend.py +503 -0
- truthound/checkpoint/distributed/backends/kubernetes_backend.py +696 -0
- truthound/checkpoint/distributed/backends/local_backend.py +397 -0
- truthound/checkpoint/distributed/backends/ray_backend.py +625 -0
- truthound/checkpoint/distributed/base.py +774 -0
- truthound/checkpoint/distributed/orchestrator.py +765 -0
- truthound/checkpoint/distributed/protocols.py +842 -0
- truthound/checkpoint/distributed/registry.py +449 -0
- truthound/checkpoint/idempotency/__init__.py +120 -0
- truthound/checkpoint/idempotency/core.py +295 -0
- truthound/checkpoint/idempotency/fingerprint.py +454 -0
- truthound/checkpoint/idempotency/locking.py +604 -0
- truthound/checkpoint/idempotency/service.py +592 -0
- truthound/checkpoint/idempotency/stores.py +653 -0
- truthound/checkpoint/monitoring/__init__.py +134 -0
- truthound/checkpoint/monitoring/aggregators/__init__.py +15 -0
- truthound/checkpoint/monitoring/aggregators/base.py +372 -0
- truthound/checkpoint/monitoring/aggregators/realtime.py +300 -0
- truthound/checkpoint/monitoring/aggregators/window.py +493 -0
- truthound/checkpoint/monitoring/collectors/__init__.py +17 -0
- truthound/checkpoint/monitoring/collectors/base.py +257 -0
- truthound/checkpoint/monitoring/collectors/memory_collector.py +617 -0
- truthound/checkpoint/monitoring/collectors/prometheus_collector.py +451 -0
- truthound/checkpoint/monitoring/collectors/redis_collector.py +518 -0
- truthound/checkpoint/monitoring/events.py +410 -0
- truthound/checkpoint/monitoring/protocols.py +636 -0
- truthound/checkpoint/monitoring/service.py +578 -0
- truthound/checkpoint/monitoring/views/__init__.py +17 -0
- truthound/checkpoint/monitoring/views/base.py +172 -0
- truthound/checkpoint/monitoring/views/queue_view.py +220 -0
- truthound/checkpoint/monitoring/views/task_view.py +240 -0
- truthound/checkpoint/monitoring/views/worker_view.py +263 -0
- truthound/checkpoint/registry.py +337 -0
- truthound/checkpoint/runner.py +356 -0
- truthound/checkpoint/transaction/__init__.py +133 -0
- truthound/checkpoint/transaction/base.py +389 -0
- truthound/checkpoint/transaction/compensatable.py +537 -0
- truthound/checkpoint/transaction/coordinator.py +576 -0
- truthound/checkpoint/transaction/executor.py +622 -0
- truthound/checkpoint/transaction/idempotency.py +534 -0
- truthound/checkpoint/transaction/saga/__init__.py +143 -0
- truthound/checkpoint/transaction/saga/builder.py +584 -0
- truthound/checkpoint/transaction/saga/definition.py +515 -0
- truthound/checkpoint/transaction/saga/event_store.py +542 -0
- truthound/checkpoint/transaction/saga/patterns.py +833 -0
- truthound/checkpoint/transaction/saga/runner.py +718 -0
- truthound/checkpoint/transaction/saga/state_machine.py +793 -0
- truthound/checkpoint/transaction/saga/strategies.py +780 -0
- truthound/checkpoint/transaction/saga/testing.py +886 -0
- truthound/checkpoint/triggers/__init__.py +58 -0
- truthound/checkpoint/triggers/base.py +237 -0
- truthound/checkpoint/triggers/event.py +385 -0
- truthound/checkpoint/triggers/schedule.py +355 -0
- truthound/cli.py +2358 -0
- truthound/cli_modules/__init__.py +124 -0
- truthound/cli_modules/advanced/__init__.py +45 -0
- truthound/cli_modules/advanced/benchmark.py +343 -0
- truthound/cli_modules/advanced/docs.py +225 -0
- truthound/cli_modules/advanced/lineage.py +209 -0
- truthound/cli_modules/advanced/ml.py +320 -0
- truthound/cli_modules/advanced/realtime.py +196 -0
- truthound/cli_modules/checkpoint/__init__.py +46 -0
- truthound/cli_modules/checkpoint/init.py +114 -0
- truthound/cli_modules/checkpoint/list.py +71 -0
- truthound/cli_modules/checkpoint/run.py +159 -0
- truthound/cli_modules/checkpoint/validate.py +67 -0
- truthound/cli_modules/common/__init__.py +71 -0
- truthound/cli_modules/common/errors.py +414 -0
- truthound/cli_modules/common/options.py +419 -0
- truthound/cli_modules/common/output.py +507 -0
- truthound/cli_modules/common/protocol.py +552 -0
- truthound/cli_modules/core/__init__.py +48 -0
- truthound/cli_modules/core/check.py +123 -0
- truthound/cli_modules/core/compare.py +104 -0
- truthound/cli_modules/core/learn.py +57 -0
- truthound/cli_modules/core/mask.py +77 -0
- truthound/cli_modules/core/profile.py +65 -0
- truthound/cli_modules/core/scan.py +61 -0
- truthound/cli_modules/profiler/__init__.py +51 -0
- truthound/cli_modules/profiler/auto_profile.py +175 -0
- truthound/cli_modules/profiler/metadata.py +107 -0
- truthound/cli_modules/profiler/suite.py +283 -0
- truthound/cli_modules/registry.py +431 -0
- truthound/cli_modules/scaffolding/__init__.py +89 -0
- truthound/cli_modules/scaffolding/base.py +631 -0
- truthound/cli_modules/scaffolding/commands.py +545 -0
- truthound/cli_modules/scaffolding/plugins.py +1072 -0
- truthound/cli_modules/scaffolding/reporters.py +594 -0
- truthound/cli_modules/scaffolding/validators.py +1127 -0
- truthound/common/__init__.py +18 -0
- truthound/common/resilience/__init__.py +130 -0
- truthound/common/resilience/bulkhead.py +266 -0
- truthound/common/resilience/circuit_breaker.py +516 -0
- truthound/common/resilience/composite.py +332 -0
- truthound/common/resilience/config.py +292 -0
- truthound/common/resilience/protocols.py +217 -0
- truthound/common/resilience/rate_limiter.py +404 -0
- truthound/common/resilience/retry.py +341 -0
- truthound/datadocs/__init__.py +260 -0
- truthound/datadocs/base.py +571 -0
- truthound/datadocs/builder.py +761 -0
- truthound/datadocs/charts.py +764 -0
- truthound/datadocs/dashboard/__init__.py +63 -0
- truthound/datadocs/dashboard/app.py +576 -0
- truthound/datadocs/dashboard/components.py +584 -0
- truthound/datadocs/dashboard/state.py +240 -0
- truthound/datadocs/engine/__init__.py +46 -0
- truthound/datadocs/engine/context.py +376 -0
- truthound/datadocs/engine/pipeline.py +618 -0
- truthound/datadocs/engine/registry.py +469 -0
- truthound/datadocs/exporters/__init__.py +49 -0
- truthound/datadocs/exporters/base.py +198 -0
- truthound/datadocs/exporters/html.py +178 -0
- truthound/datadocs/exporters/json_exporter.py +253 -0
- truthound/datadocs/exporters/markdown.py +284 -0
- truthound/datadocs/exporters/pdf.py +392 -0
- truthound/datadocs/i18n/__init__.py +86 -0
- truthound/datadocs/i18n/catalog.py +960 -0
- truthound/datadocs/i18n/formatting.py +505 -0
- truthound/datadocs/i18n/loader.py +256 -0
- truthound/datadocs/i18n/plurals.py +378 -0
- truthound/datadocs/renderers/__init__.py +42 -0
- truthound/datadocs/renderers/base.py +401 -0
- truthound/datadocs/renderers/custom.py +342 -0
- truthound/datadocs/renderers/jinja.py +697 -0
- truthound/datadocs/sections.py +736 -0
- truthound/datadocs/styles.py +931 -0
- truthound/datadocs/themes/__init__.py +101 -0
- truthound/datadocs/themes/base.py +336 -0
- truthound/datadocs/themes/default.py +417 -0
- truthound/datadocs/themes/enterprise.py +419 -0
- truthound/datadocs/themes/loader.py +336 -0
- truthound/datadocs/themes.py +301 -0
- truthound/datadocs/transformers/__init__.py +57 -0
- truthound/datadocs/transformers/base.py +268 -0
- truthound/datadocs/transformers/enrichers.py +544 -0
- truthound/datadocs/transformers/filters.py +447 -0
- truthound/datadocs/transformers/i18n.py +468 -0
- truthound/datadocs/versioning/__init__.py +62 -0
- truthound/datadocs/versioning/diff.py +639 -0
- truthound/datadocs/versioning/storage.py +497 -0
- truthound/datadocs/versioning/version.py +358 -0
- truthound/datasources/__init__.py +223 -0
- truthound/datasources/_async_protocols.py +222 -0
- truthound/datasources/_protocols.py +159 -0
- truthound/datasources/adapters.py +428 -0
- truthound/datasources/async_base.py +599 -0
- truthound/datasources/async_factory.py +511 -0
- truthound/datasources/base.py +516 -0
- truthound/datasources/factory.py +433 -0
- truthound/datasources/nosql/__init__.py +47 -0
- truthound/datasources/nosql/base.py +487 -0
- truthound/datasources/nosql/elasticsearch.py +801 -0
- truthound/datasources/nosql/mongodb.py +636 -0
- truthound/datasources/pandas_optimized.py +582 -0
- truthound/datasources/pandas_source.py +216 -0
- truthound/datasources/polars_source.py +395 -0
- truthound/datasources/spark_source.py +479 -0
- truthound/datasources/sql/__init__.py +154 -0
- truthound/datasources/sql/base.py +710 -0
- truthound/datasources/sql/bigquery.py +410 -0
- truthound/datasources/sql/cloud_base.py +199 -0
- truthound/datasources/sql/databricks.py +471 -0
- truthound/datasources/sql/mysql.py +316 -0
- truthound/datasources/sql/oracle.py +427 -0
- truthound/datasources/sql/postgresql.py +321 -0
- truthound/datasources/sql/redshift.py +479 -0
- truthound/datasources/sql/snowflake.py +439 -0
- truthound/datasources/sql/sqlite.py +286 -0
- truthound/datasources/sql/sqlserver.py +437 -0
- truthound/datasources/streaming/__init__.py +47 -0
- truthound/datasources/streaming/base.py +350 -0
- truthound/datasources/streaming/kafka.py +670 -0
- truthound/decorators.py +98 -0
- truthound/docs/__init__.py +69 -0
- truthound/docs/extractor.py +971 -0
- truthound/docs/generator.py +601 -0
- truthound/docs/parser.py +1037 -0
- truthound/docs/renderer.py +999 -0
- truthound/drift/__init__.py +22 -0
- truthound/drift/compare.py +189 -0
- truthound/drift/detectors.py +464 -0
- truthound/drift/report.py +160 -0
- truthound/execution/__init__.py +65 -0
- truthound/execution/_protocols.py +324 -0
- truthound/execution/base.py +576 -0
- truthound/execution/distributed/__init__.py +179 -0
- truthound/execution/distributed/aggregations.py +731 -0
- truthound/execution/distributed/arrow_bridge.py +817 -0
- truthound/execution/distributed/base.py +550 -0
- truthound/execution/distributed/dask_engine.py +976 -0
- truthound/execution/distributed/mixins.py +766 -0
- truthound/execution/distributed/protocols.py +756 -0
- truthound/execution/distributed/ray_engine.py +1127 -0
- truthound/execution/distributed/registry.py +446 -0
- truthound/execution/distributed/spark_engine.py +1011 -0
- truthound/execution/distributed/validator_adapter.py +682 -0
- truthound/execution/pandas_engine.py +401 -0
- truthound/execution/polars_engine.py +497 -0
- truthound/execution/pushdown/__init__.py +230 -0
- truthound/execution/pushdown/ast.py +1550 -0
- truthound/execution/pushdown/builder.py +1550 -0
- truthound/execution/pushdown/dialects.py +1072 -0
- truthound/execution/pushdown/executor.py +829 -0
- truthound/execution/pushdown/optimizer.py +1041 -0
- truthound/execution/sql_engine.py +518 -0
- truthound/infrastructure/__init__.py +189 -0
- truthound/infrastructure/audit.py +1515 -0
- truthound/infrastructure/config.py +1133 -0
- truthound/infrastructure/encryption.py +1132 -0
- truthound/infrastructure/logging.py +1503 -0
- truthound/infrastructure/metrics.py +1220 -0
- truthound/lineage/__init__.py +89 -0
- truthound/lineage/base.py +746 -0
- truthound/lineage/impact_analysis.py +474 -0
- truthound/lineage/integrations/__init__.py +22 -0
- truthound/lineage/integrations/openlineage.py +548 -0
- truthound/lineage/tracker.py +512 -0
- truthound/lineage/visualization/__init__.py +33 -0
- truthound/lineage/visualization/protocols.py +145 -0
- truthound/lineage/visualization/renderers/__init__.py +20 -0
- truthound/lineage/visualization/renderers/cytoscape.py +329 -0
- truthound/lineage/visualization/renderers/d3.py +331 -0
- truthound/lineage/visualization/renderers/graphviz.py +276 -0
- truthound/lineage/visualization/renderers/mermaid.py +308 -0
- truthound/maskers.py +113 -0
- truthound/ml/__init__.py +124 -0
- truthound/ml/anomaly_models/__init__.py +31 -0
- truthound/ml/anomaly_models/ensemble.py +362 -0
- truthound/ml/anomaly_models/isolation_forest.py +444 -0
- truthound/ml/anomaly_models/statistical.py +392 -0
- truthound/ml/base.py +1178 -0
- truthound/ml/drift_detection/__init__.py +26 -0
- truthound/ml/drift_detection/concept.py +381 -0
- truthound/ml/drift_detection/distribution.py +361 -0
- truthound/ml/drift_detection/feature.py +442 -0
- truthound/ml/drift_detection/multivariate.py +495 -0
- truthound/ml/monitoring/__init__.py +88 -0
- truthound/ml/monitoring/alerting/__init__.py +33 -0
- truthound/ml/monitoring/alerting/handlers.py +427 -0
- truthound/ml/monitoring/alerting/rules.py +508 -0
- truthound/ml/monitoring/collectors/__init__.py +19 -0
- truthound/ml/monitoring/collectors/composite.py +105 -0
- truthound/ml/monitoring/collectors/drift.py +324 -0
- truthound/ml/monitoring/collectors/performance.py +179 -0
- truthound/ml/monitoring/collectors/quality.py +369 -0
- truthound/ml/monitoring/monitor.py +536 -0
- truthound/ml/monitoring/protocols.py +451 -0
- truthound/ml/monitoring/stores/__init__.py +15 -0
- truthound/ml/monitoring/stores/memory.py +201 -0
- truthound/ml/monitoring/stores/prometheus.py +296 -0
- truthound/ml/rule_learning/__init__.py +25 -0
- truthound/ml/rule_learning/constraint_miner.py +443 -0
- truthound/ml/rule_learning/pattern_learner.py +499 -0
- truthound/ml/rule_learning/profile_learner.py +462 -0
- truthound/multitenancy/__init__.py +326 -0
- truthound/multitenancy/core.py +852 -0
- truthound/multitenancy/integration.py +597 -0
- truthound/multitenancy/isolation.py +630 -0
- truthound/multitenancy/manager.py +770 -0
- truthound/multitenancy/middleware.py +765 -0
- truthound/multitenancy/quota.py +537 -0
- truthound/multitenancy/resolvers.py +603 -0
- truthound/multitenancy/storage.py +703 -0
- truthound/observability/__init__.py +307 -0
- truthound/observability/context.py +531 -0
- truthound/observability/instrumentation.py +611 -0
- truthound/observability/logging.py +887 -0
- truthound/observability/metrics.py +1157 -0
- truthound/observability/tracing/__init__.py +178 -0
- truthound/observability/tracing/baggage.py +310 -0
- truthound/observability/tracing/config.py +426 -0
- truthound/observability/tracing/exporter.py +787 -0
- truthound/observability/tracing/integration.py +1018 -0
- truthound/observability/tracing/otel/__init__.py +146 -0
- truthound/observability/tracing/otel/adapter.py +982 -0
- truthound/observability/tracing/otel/bridge.py +1177 -0
- truthound/observability/tracing/otel/compat.py +681 -0
- truthound/observability/tracing/otel/config.py +691 -0
- truthound/observability/tracing/otel/detection.py +327 -0
- truthound/observability/tracing/otel/protocols.py +426 -0
- truthound/observability/tracing/processor.py +561 -0
- truthound/observability/tracing/propagator.py +757 -0
- truthound/observability/tracing/provider.py +569 -0
- truthound/observability/tracing/resource.py +515 -0
- truthound/observability/tracing/sampler.py +487 -0
- truthound/observability/tracing/span.py +676 -0
- truthound/plugins/__init__.py +198 -0
- truthound/plugins/base.py +599 -0
- truthound/plugins/cli.py +680 -0
- truthound/plugins/dependencies/__init__.py +42 -0
- truthound/plugins/dependencies/graph.py +422 -0
- truthound/plugins/dependencies/resolver.py +417 -0
- truthound/plugins/discovery.py +379 -0
- truthound/plugins/docs/__init__.py +46 -0
- truthound/plugins/docs/extractor.py +444 -0
- truthound/plugins/docs/renderer.py +499 -0
- truthound/plugins/enterprise_manager.py +877 -0
- truthound/plugins/examples/__init__.py +19 -0
- truthound/plugins/examples/custom_validators.py +317 -0
- truthound/plugins/examples/slack_notifier.py +312 -0
- truthound/plugins/examples/xml_reporter.py +254 -0
- truthound/plugins/hooks.py +558 -0
- truthound/plugins/lifecycle/__init__.py +43 -0
- truthound/plugins/lifecycle/hot_reload.py +402 -0
- truthound/plugins/lifecycle/manager.py +371 -0
- truthound/plugins/manager.py +736 -0
- truthound/plugins/registry.py +338 -0
- truthound/plugins/security/__init__.py +93 -0
- truthound/plugins/security/exceptions.py +332 -0
- truthound/plugins/security/policies.py +348 -0
- truthound/plugins/security/protocols.py +643 -0
- truthound/plugins/security/sandbox/__init__.py +45 -0
- truthound/plugins/security/sandbox/context.py +158 -0
- truthound/plugins/security/sandbox/engines/__init__.py +19 -0
- truthound/plugins/security/sandbox/engines/container.py +379 -0
- truthound/plugins/security/sandbox/engines/noop.py +144 -0
- truthound/plugins/security/sandbox/engines/process.py +336 -0
- truthound/plugins/security/sandbox/factory.py +211 -0
- truthound/plugins/security/signing/__init__.py +57 -0
- truthound/plugins/security/signing/service.py +330 -0
- truthound/plugins/security/signing/trust_store.py +368 -0
- truthound/plugins/security/signing/verifier.py +459 -0
- truthound/plugins/versioning/__init__.py +41 -0
- truthound/plugins/versioning/constraints.py +297 -0
- truthound/plugins/versioning/resolver.py +329 -0
- truthound/profiler/__init__.py +1729 -0
- truthound/profiler/_lazy.py +452 -0
- truthound/profiler/ab_testing/__init__.py +80 -0
- truthound/profiler/ab_testing/analysis.py +449 -0
- truthound/profiler/ab_testing/base.py +257 -0
- truthound/profiler/ab_testing/experiment.py +395 -0
- truthound/profiler/ab_testing/tracking.py +368 -0
- truthound/profiler/auto_threshold.py +1170 -0
- truthound/profiler/base.py +579 -0
- truthound/profiler/cache_patterns.py +911 -0
- truthound/profiler/caching.py +1303 -0
- truthound/profiler/column_profiler.py +712 -0
- truthound/profiler/comparison.py +1007 -0
- truthound/profiler/custom_patterns.py +1170 -0
- truthound/profiler/dashboard/__init__.py +50 -0
- truthound/profiler/dashboard/app.py +476 -0
- truthound/profiler/dashboard/components.py +457 -0
- truthound/profiler/dashboard/config.py +72 -0
- truthound/profiler/distributed/__init__.py +83 -0
- truthound/profiler/distributed/base.py +281 -0
- truthound/profiler/distributed/dask_backend.py +498 -0
- truthound/profiler/distributed/local_backend.py +293 -0
- truthound/profiler/distributed/profiler.py +304 -0
- truthound/profiler/distributed/ray_backend.py +374 -0
- truthound/profiler/distributed/spark_backend.py +375 -0
- truthound/profiler/distributed.py +1366 -0
- truthound/profiler/enterprise_sampling.py +1065 -0
- truthound/profiler/errors.py +488 -0
- truthound/profiler/evolution/__init__.py +91 -0
- truthound/profiler/evolution/alerts.py +426 -0
- truthound/profiler/evolution/changes.py +206 -0
- truthound/profiler/evolution/compatibility.py +365 -0
- truthound/profiler/evolution/detector.py +372 -0
- truthound/profiler/evolution/protocols.py +121 -0
- truthound/profiler/generators/__init__.py +48 -0
- truthound/profiler/generators/base.py +384 -0
- truthound/profiler/generators/ml_rules.py +375 -0
- truthound/profiler/generators/pattern_rules.py +384 -0
- truthound/profiler/generators/schema_rules.py +267 -0
- truthound/profiler/generators/stats_rules.py +324 -0
- truthound/profiler/generators/suite_generator.py +857 -0
- truthound/profiler/i18n.py +1542 -0
- truthound/profiler/incremental.py +554 -0
- truthound/profiler/incremental_validation.py +1710 -0
- truthound/profiler/integration/__init__.py +73 -0
- truthound/profiler/integration/adapters.py +345 -0
- truthound/profiler/integration/context.py +371 -0
- truthound/profiler/integration/executor.py +527 -0
- truthound/profiler/integration/naming.py +75 -0
- truthound/profiler/integration/protocols.py +243 -0
- truthound/profiler/memory.py +1185 -0
- truthound/profiler/migration/__init__.py +60 -0
- truthound/profiler/migration/base.py +345 -0
- truthound/profiler/migration/manager.py +444 -0
- truthound/profiler/migration/v1_0_to_v1_1.py +484 -0
- truthound/profiler/ml/__init__.py +73 -0
- truthound/profiler/ml/base.py +244 -0
- truthound/profiler/ml/classifier.py +507 -0
- truthound/profiler/ml/feature_extraction.py +604 -0
- truthound/profiler/ml/pretrained.py +448 -0
- truthound/profiler/ml_inference.py +1276 -0
- truthound/profiler/native_patterns.py +815 -0
- truthound/profiler/observability.py +1184 -0
- truthound/profiler/process_timeout.py +1566 -0
- truthound/profiler/progress.py +568 -0
- truthound/profiler/progress_callbacks.py +1734 -0
- truthound/profiler/quality.py +1345 -0
- truthound/profiler/resilience.py +1180 -0
- truthound/profiler/sampled_matcher.py +794 -0
- truthound/profiler/sampling.py +1288 -0
- truthound/profiler/scheduling/__init__.py +82 -0
- truthound/profiler/scheduling/protocols.py +214 -0
- truthound/profiler/scheduling/scheduler.py +474 -0
- truthound/profiler/scheduling/storage.py +457 -0
- truthound/profiler/scheduling/triggers.py +449 -0
- truthound/profiler/schema.py +603 -0
- truthound/profiler/streaming.py +685 -0
- truthound/profiler/streaming_patterns.py +1354 -0
- truthound/profiler/suite_cli.py +625 -0
- truthound/profiler/suite_config.py +789 -0
- truthound/profiler/suite_export.py +1268 -0
- truthound/profiler/table_profiler.py +547 -0
- truthound/profiler/timeout.py +565 -0
- truthound/profiler/validation.py +1532 -0
- truthound/profiler/visualization/__init__.py +118 -0
- truthound/profiler/visualization/base.py +346 -0
- truthound/profiler/visualization/generator.py +1259 -0
- truthound/profiler/visualization/plotly_renderer.py +811 -0
- truthound/profiler/visualization/renderers.py +669 -0
- truthound/profiler/visualization/sections.py +540 -0
- truthound/profiler/visualization.py +2122 -0
- truthound/profiler/yaml_validation.py +1151 -0
- truthound/py.typed +0 -0
- truthound/ratelimit/__init__.py +248 -0
- truthound/ratelimit/algorithms.py +1108 -0
- truthound/ratelimit/core.py +573 -0
- truthound/ratelimit/integration.py +532 -0
- truthound/ratelimit/limiter.py +663 -0
- truthound/ratelimit/middleware.py +700 -0
- truthound/ratelimit/policy.py +792 -0
- truthound/ratelimit/storage.py +763 -0
- truthound/rbac/__init__.py +340 -0
- truthound/rbac/core.py +976 -0
- truthound/rbac/integration.py +760 -0
- truthound/rbac/manager.py +1052 -0
- truthound/rbac/middleware.py +842 -0
- truthound/rbac/policy.py +954 -0
- truthound/rbac/storage.py +878 -0
- truthound/realtime/__init__.py +141 -0
- truthound/realtime/adapters/__init__.py +43 -0
- truthound/realtime/adapters/base.py +533 -0
- truthound/realtime/adapters/kafka.py +487 -0
- truthound/realtime/adapters/kinesis.py +479 -0
- truthound/realtime/adapters/mock.py +243 -0
- truthound/realtime/base.py +553 -0
- truthound/realtime/factory.py +382 -0
- truthound/realtime/incremental.py +660 -0
- truthound/realtime/processing/__init__.py +67 -0
- truthound/realtime/processing/exactly_once.py +575 -0
- truthound/realtime/processing/state.py +547 -0
- truthound/realtime/processing/windows.py +647 -0
- truthound/realtime/protocols.py +569 -0
- truthound/realtime/streaming.py +605 -0
- truthound/realtime/testing/__init__.py +32 -0
- truthound/realtime/testing/containers.py +615 -0
- truthound/realtime/testing/fixtures.py +484 -0
- truthound/report.py +280 -0
- truthound/reporters/__init__.py +46 -0
- truthound/reporters/_protocols.py +30 -0
- truthound/reporters/base.py +324 -0
- truthound/reporters/ci/__init__.py +66 -0
- truthound/reporters/ci/azure.py +436 -0
- truthound/reporters/ci/base.py +509 -0
- truthound/reporters/ci/bitbucket.py +567 -0
- truthound/reporters/ci/circleci.py +547 -0
- truthound/reporters/ci/detection.py +364 -0
- truthound/reporters/ci/factory.py +182 -0
- truthound/reporters/ci/github.py +388 -0
- truthound/reporters/ci/gitlab.py +471 -0
- truthound/reporters/ci/jenkins.py +525 -0
- truthound/reporters/console_reporter.py +299 -0
- truthound/reporters/factory.py +211 -0
- truthound/reporters/html_reporter.py +524 -0
- truthound/reporters/json_reporter.py +256 -0
- truthound/reporters/markdown_reporter.py +280 -0
- truthound/reporters/sdk/__init__.py +174 -0
- truthound/reporters/sdk/builder.py +558 -0
- truthound/reporters/sdk/mixins.py +1150 -0
- truthound/reporters/sdk/schema.py +1493 -0
- truthound/reporters/sdk/templates.py +666 -0
- truthound/reporters/sdk/testing.py +968 -0
- truthound/scanners.py +170 -0
- truthound/scheduling/__init__.py +122 -0
- truthound/scheduling/cron.py +1136 -0
- truthound/scheduling/presets.py +212 -0
- truthound/schema.py +275 -0
- truthound/secrets/__init__.py +173 -0
- truthound/secrets/base.py +618 -0
- truthound/secrets/cloud.py +682 -0
- truthound/secrets/integration.py +507 -0
- truthound/secrets/manager.py +633 -0
- truthound/secrets/oidc/__init__.py +172 -0
- truthound/secrets/oidc/base.py +902 -0
- truthound/secrets/oidc/credential_provider.py +623 -0
- truthound/secrets/oidc/exchangers.py +1001 -0
- truthound/secrets/oidc/github/__init__.py +110 -0
- truthound/secrets/oidc/github/claims.py +718 -0
- truthound/secrets/oidc/github/enhanced_provider.py +693 -0
- truthound/secrets/oidc/github/trust_policy.py +742 -0
- truthound/secrets/oidc/github/verification.py +723 -0
- truthound/secrets/oidc/github/workflow.py +691 -0
- truthound/secrets/oidc/providers.py +825 -0
- truthound/secrets/providers.py +506 -0
- truthound/secrets/resolver.py +495 -0
- truthound/stores/__init__.py +177 -0
- truthound/stores/backends/__init__.py +18 -0
- truthound/stores/backends/_protocols.py +340 -0
- truthound/stores/backends/azure_blob.py +530 -0
- truthound/stores/backends/concurrent_filesystem.py +915 -0
- truthound/stores/backends/connection_pool.py +1365 -0
- truthound/stores/backends/database.py +743 -0
- truthound/stores/backends/filesystem.py +538 -0
- truthound/stores/backends/gcs.py +399 -0
- truthound/stores/backends/memory.py +354 -0
- truthound/stores/backends/s3.py +434 -0
- truthound/stores/backpressure/__init__.py +84 -0
- truthound/stores/backpressure/base.py +375 -0
- truthound/stores/backpressure/circuit_breaker.py +434 -0
- truthound/stores/backpressure/monitor.py +376 -0
- truthound/stores/backpressure/strategies.py +677 -0
- truthound/stores/base.py +551 -0
- truthound/stores/batching/__init__.py +65 -0
- truthound/stores/batching/base.py +305 -0
- truthound/stores/batching/buffer.py +370 -0
- truthound/stores/batching/store.py +248 -0
- truthound/stores/batching/writer.py +521 -0
- truthound/stores/caching/__init__.py +60 -0
- truthound/stores/caching/backends.py +684 -0
- truthound/stores/caching/base.py +356 -0
- truthound/stores/caching/store.py +305 -0
- truthound/stores/compression/__init__.py +193 -0
- truthound/stores/compression/adaptive.py +694 -0
- truthound/stores/compression/base.py +514 -0
- truthound/stores/compression/pipeline.py +868 -0
- truthound/stores/compression/providers.py +672 -0
- truthound/stores/compression/streaming.py +832 -0
- truthound/stores/concurrency/__init__.py +81 -0
- truthound/stores/concurrency/atomic.py +556 -0
- truthound/stores/concurrency/index.py +775 -0
- truthound/stores/concurrency/locks.py +576 -0
- truthound/stores/concurrency/manager.py +482 -0
- truthound/stores/encryption/__init__.py +297 -0
- truthound/stores/encryption/base.py +952 -0
- truthound/stores/encryption/keys.py +1191 -0
- truthound/stores/encryption/pipeline.py +903 -0
- truthound/stores/encryption/providers.py +953 -0
- truthound/stores/encryption/streaming.py +950 -0
- truthound/stores/expectations.py +227 -0
- truthound/stores/factory.py +246 -0
- truthound/stores/migration/__init__.py +75 -0
- truthound/stores/migration/base.py +480 -0
- truthound/stores/migration/manager.py +347 -0
- truthound/stores/migration/registry.py +382 -0
- truthound/stores/migration/store.py +559 -0
- truthound/stores/observability/__init__.py +106 -0
- truthound/stores/observability/audit.py +718 -0
- truthound/stores/observability/config.py +270 -0
- truthound/stores/observability/factory.py +208 -0
- truthound/stores/observability/metrics.py +636 -0
- truthound/stores/observability/protocols.py +410 -0
- truthound/stores/observability/store.py +570 -0
- truthound/stores/observability/tracing.py +784 -0
- truthound/stores/replication/__init__.py +76 -0
- truthound/stores/replication/base.py +260 -0
- truthound/stores/replication/monitor.py +269 -0
- truthound/stores/replication/store.py +439 -0
- truthound/stores/replication/syncer.py +391 -0
- truthound/stores/results.py +359 -0
- truthound/stores/retention/__init__.py +77 -0
- truthound/stores/retention/base.py +378 -0
- truthound/stores/retention/policies.py +621 -0
- truthound/stores/retention/scheduler.py +279 -0
- truthound/stores/retention/store.py +526 -0
- truthound/stores/streaming/__init__.py +138 -0
- truthound/stores/streaming/base.py +801 -0
- truthound/stores/streaming/database.py +984 -0
- truthound/stores/streaming/filesystem.py +719 -0
- truthound/stores/streaming/reader.py +629 -0
- truthound/stores/streaming/s3.py +843 -0
- truthound/stores/streaming/writer.py +790 -0
- truthound/stores/tiering/__init__.py +108 -0
- truthound/stores/tiering/base.py +462 -0
- truthound/stores/tiering/manager.py +249 -0
- truthound/stores/tiering/policies.py +692 -0
- truthound/stores/tiering/store.py +526 -0
- truthound/stores/versioning/__init__.py +56 -0
- truthound/stores/versioning/base.py +376 -0
- truthound/stores/versioning/store.py +660 -0
- truthound/stores/versioning/strategies.py +353 -0
- truthound/types.py +56 -0
- truthound/validators/__init__.py +774 -0
- truthound/validators/aggregate/__init__.py +27 -0
- truthound/validators/aggregate/central.py +116 -0
- truthound/validators/aggregate/extremes.py +116 -0
- truthound/validators/aggregate/spread.py +118 -0
- truthound/validators/aggregate/sum.py +64 -0
- truthound/validators/aggregate/type.py +78 -0
- truthound/validators/anomaly/__init__.py +93 -0
- truthound/validators/anomaly/base.py +431 -0
- truthound/validators/anomaly/ml_based.py +1190 -0
- truthound/validators/anomaly/multivariate.py +647 -0
- truthound/validators/anomaly/statistical.py +599 -0
- truthound/validators/base.py +1089 -0
- truthound/validators/business_rule/__init__.py +46 -0
- truthound/validators/business_rule/base.py +147 -0
- truthound/validators/business_rule/checksum.py +509 -0
- truthound/validators/business_rule/financial.py +526 -0
- truthound/validators/cache.py +733 -0
- truthound/validators/completeness/__init__.py +39 -0
- truthound/validators/completeness/conditional.py +73 -0
- truthound/validators/completeness/default.py +98 -0
- truthound/validators/completeness/empty.py +103 -0
- truthound/validators/completeness/nan.py +337 -0
- truthound/validators/completeness/null.py +152 -0
- truthound/validators/cross_table/__init__.py +17 -0
- truthound/validators/cross_table/aggregate.py +333 -0
- truthound/validators/cross_table/row_count.py +122 -0
- truthound/validators/datetime/__init__.py +29 -0
- truthound/validators/datetime/format.py +78 -0
- truthound/validators/datetime/freshness.py +269 -0
- truthound/validators/datetime/order.py +73 -0
- truthound/validators/datetime/parseable.py +185 -0
- truthound/validators/datetime/range.py +202 -0
- truthound/validators/datetime/timezone.py +69 -0
- truthound/validators/distribution/__init__.py +49 -0
- truthound/validators/distribution/distribution.py +128 -0
- truthound/validators/distribution/monotonic.py +119 -0
- truthound/validators/distribution/outlier.py +178 -0
- truthound/validators/distribution/quantile.py +80 -0
- truthound/validators/distribution/range.py +254 -0
- truthound/validators/distribution/set.py +125 -0
- truthound/validators/distribution/statistical.py +459 -0
- truthound/validators/drift/__init__.py +79 -0
- truthound/validators/drift/base.py +427 -0
- truthound/validators/drift/multi_feature.py +401 -0
- truthound/validators/drift/numeric.py +395 -0
- truthound/validators/drift/psi.py +446 -0
- truthound/validators/drift/statistical.py +510 -0
- truthound/validators/enterprise.py +1658 -0
- truthound/validators/geospatial/__init__.py +80 -0
- truthound/validators/geospatial/base.py +97 -0
- truthound/validators/geospatial/boundary.py +238 -0
- truthound/validators/geospatial/coordinate.py +351 -0
- truthound/validators/geospatial/distance.py +399 -0
- truthound/validators/geospatial/polygon.py +665 -0
- truthound/validators/i18n/__init__.py +308 -0
- truthound/validators/i18n/bidi.py +571 -0
- truthound/validators/i18n/catalogs.py +570 -0
- truthound/validators/i18n/dialects.py +763 -0
- truthound/validators/i18n/extended_catalogs.py +549 -0
- truthound/validators/i18n/formatting.py +1434 -0
- truthound/validators/i18n/loader.py +1020 -0
- truthound/validators/i18n/messages.py +521 -0
- truthound/validators/i18n/plural.py +683 -0
- truthound/validators/i18n/protocols.py +855 -0
- truthound/validators/i18n/tms.py +1162 -0
- truthound/validators/localization/__init__.py +53 -0
- truthound/validators/localization/base.py +122 -0
- truthound/validators/localization/chinese.py +362 -0
- truthound/validators/localization/japanese.py +275 -0
- truthound/validators/localization/korean.py +524 -0
- truthound/validators/memory/__init__.py +94 -0
- truthound/validators/memory/approximate_knn.py +506 -0
- truthound/validators/memory/base.py +547 -0
- truthound/validators/memory/sgd_online.py +719 -0
- truthound/validators/memory/streaming_ecdf.py +753 -0
- truthound/validators/ml_feature/__init__.py +54 -0
- truthound/validators/ml_feature/base.py +249 -0
- truthound/validators/ml_feature/correlation.py +299 -0
- truthound/validators/ml_feature/leakage.py +344 -0
- truthound/validators/ml_feature/null_impact.py +270 -0
- truthound/validators/ml_feature/scale.py +264 -0
- truthound/validators/multi_column/__init__.py +89 -0
- truthound/validators/multi_column/arithmetic.py +284 -0
- truthound/validators/multi_column/base.py +231 -0
- truthound/validators/multi_column/comparison.py +273 -0
- truthound/validators/multi_column/consistency.py +312 -0
- truthound/validators/multi_column/statistical.py +299 -0
- truthound/validators/optimization/__init__.py +164 -0
- truthound/validators/optimization/aggregation.py +563 -0
- truthound/validators/optimization/covariance.py +556 -0
- truthound/validators/optimization/geo.py +626 -0
- truthound/validators/optimization/graph.py +587 -0
- truthound/validators/optimization/orchestrator.py +970 -0
- truthound/validators/optimization/profiling.py +1312 -0
- truthound/validators/privacy/__init__.py +223 -0
- truthound/validators/privacy/base.py +635 -0
- truthound/validators/privacy/ccpa.py +670 -0
- truthound/validators/privacy/gdpr.py +728 -0
- truthound/validators/privacy/global_patterns.py +604 -0
- truthound/validators/privacy/plugins.py +867 -0
- truthound/validators/profiling/__init__.py +52 -0
- truthound/validators/profiling/base.py +175 -0
- truthound/validators/profiling/cardinality.py +312 -0
- truthound/validators/profiling/entropy.py +391 -0
- truthound/validators/profiling/frequency.py +455 -0
- truthound/validators/pushdown_support.py +660 -0
- truthound/validators/query/__init__.py +91 -0
- truthound/validators/query/aggregate.py +346 -0
- truthound/validators/query/base.py +246 -0
- truthound/validators/query/column.py +249 -0
- truthound/validators/query/expression.py +274 -0
- truthound/validators/query/result.py +323 -0
- truthound/validators/query/row_count.py +264 -0
- truthound/validators/referential/__init__.py +80 -0
- truthound/validators/referential/base.py +395 -0
- truthound/validators/referential/cascade.py +391 -0
- truthound/validators/referential/circular.py +563 -0
- truthound/validators/referential/foreign_key.py +624 -0
- truthound/validators/referential/orphan.py +485 -0
- truthound/validators/registry.py +112 -0
- truthound/validators/schema/__init__.py +41 -0
- truthound/validators/schema/column_count.py +142 -0
- truthound/validators/schema/column_exists.py +80 -0
- truthound/validators/schema/column_order.py +82 -0
- truthound/validators/schema/column_pair.py +85 -0
- truthound/validators/schema/column_pair_set.py +195 -0
- truthound/validators/schema/column_type.py +94 -0
- truthound/validators/schema/multi_column.py +53 -0
- truthound/validators/schema/multi_column_aggregate.py +175 -0
- truthound/validators/schema/referential.py +274 -0
- truthound/validators/schema/table_schema.py +91 -0
- truthound/validators/schema_validator.py +219 -0
- truthound/validators/sdk/__init__.py +250 -0
- truthound/validators/sdk/builder.py +680 -0
- truthound/validators/sdk/decorators.py +474 -0
- truthound/validators/sdk/enterprise/__init__.py +211 -0
- truthound/validators/sdk/enterprise/docs.py +725 -0
- truthound/validators/sdk/enterprise/fuzzing.py +659 -0
- truthound/validators/sdk/enterprise/licensing.py +709 -0
- truthound/validators/sdk/enterprise/manager.py +543 -0
- truthound/validators/sdk/enterprise/resources.py +628 -0
- truthound/validators/sdk/enterprise/sandbox.py +766 -0
- truthound/validators/sdk/enterprise/signing.py +603 -0
- truthound/validators/sdk/enterprise/templates.py +865 -0
- truthound/validators/sdk/enterprise/versioning.py +659 -0
- truthound/validators/sdk/templates.py +757 -0
- truthound/validators/sdk/testing.py +807 -0
- truthound/validators/security/__init__.py +181 -0
- truthound/validators/security/redos/__init__.py +182 -0
- truthound/validators/security/redos/core.py +861 -0
- truthound/validators/security/redos/cpu_monitor.py +593 -0
- truthound/validators/security/redos/cve_database.py +791 -0
- truthound/validators/security/redos/ml/__init__.py +155 -0
- truthound/validators/security/redos/ml/base.py +785 -0
- truthound/validators/security/redos/ml/datasets.py +618 -0
- truthound/validators/security/redos/ml/features.py +359 -0
- truthound/validators/security/redos/ml/models.py +1000 -0
- truthound/validators/security/redos/ml/predictor.py +507 -0
- truthound/validators/security/redos/ml/storage.py +632 -0
- truthound/validators/security/redos/ml/training.py +571 -0
- truthound/validators/security/redos/ml_analyzer.py +937 -0
- truthound/validators/security/redos/optimizer.py +674 -0
- truthound/validators/security/redos/profiler.py +682 -0
- truthound/validators/security/redos/re2_engine.py +709 -0
- truthound/validators/security/redos.py +886 -0
- truthound/validators/security/sql_security.py +1247 -0
- truthound/validators/streaming/__init__.py +126 -0
- truthound/validators/streaming/base.py +292 -0
- truthound/validators/streaming/completeness.py +210 -0
- truthound/validators/streaming/mixin.py +575 -0
- truthound/validators/streaming/range.py +308 -0
- truthound/validators/streaming/sources.py +846 -0
- truthound/validators/string/__init__.py +57 -0
- truthound/validators/string/casing.py +158 -0
- truthound/validators/string/charset.py +96 -0
- truthound/validators/string/format.py +501 -0
- truthound/validators/string/json.py +77 -0
- truthound/validators/string/json_schema.py +184 -0
- truthound/validators/string/length.py +104 -0
- truthound/validators/string/like_pattern.py +237 -0
- truthound/validators/string/regex.py +202 -0
- truthound/validators/string/regex_extended.py +435 -0
- truthound/validators/table/__init__.py +88 -0
- truthound/validators/table/base.py +78 -0
- truthound/validators/table/column_count.py +198 -0
- truthound/validators/table/freshness.py +362 -0
- truthound/validators/table/row_count.py +251 -0
- truthound/validators/table/schema.py +333 -0
- truthound/validators/table/size.py +285 -0
- truthound/validators/timeout/__init__.py +102 -0
- truthound/validators/timeout/advanced/__init__.py +247 -0
- truthound/validators/timeout/advanced/circuit_breaker.py +675 -0
- truthound/validators/timeout/advanced/prediction.py +773 -0
- truthound/validators/timeout/advanced/priority.py +618 -0
- truthound/validators/timeout/advanced/redis_backend.py +770 -0
- truthound/validators/timeout/advanced/retry.py +721 -0
- truthound/validators/timeout/advanced/sampling.py +788 -0
- truthound/validators/timeout/advanced/sla.py +661 -0
- truthound/validators/timeout/advanced/telemetry.py +804 -0
- truthound/validators/timeout/cascade.py +477 -0
- truthound/validators/timeout/deadline.py +657 -0
- truthound/validators/timeout/degradation.py +525 -0
- truthound/validators/timeout/distributed.py +597 -0
- truthound/validators/timeseries/__init__.py +89 -0
- truthound/validators/timeseries/base.py +326 -0
- truthound/validators/timeseries/completeness.py +617 -0
- truthound/validators/timeseries/gap.py +485 -0
- truthound/validators/timeseries/monotonic.py +310 -0
- truthound/validators/timeseries/seasonality.py +422 -0
- truthound/validators/timeseries/trend.py +510 -0
- truthound/validators/uniqueness/__init__.py +59 -0
- truthound/validators/uniqueness/approximate.py +475 -0
- truthound/validators/uniqueness/distinct_values.py +253 -0
- truthound/validators/uniqueness/duplicate.py +118 -0
- truthound/validators/uniqueness/primary_key.py +140 -0
- truthound/validators/uniqueness/unique.py +191 -0
- truthound/validators/uniqueness/within_record.py +599 -0
- truthound/validators/utils.py +756 -0
- truthound-1.0.8.dist-info/METADATA +474 -0
- truthound-1.0.8.dist-info/RECORD +877 -0
- truthound-1.0.8.dist-info/WHEEL +4 -0
- truthound-1.0.8.dist-info/entry_points.txt +2 -0
- truthound-1.0.8.dist-info/licenses/LICENSE +190 -0
|
@@ -0,0 +1,1312 @@
|
|
|
1
|
+
"""Validator performance profiling framework.
|
|
2
|
+
|
|
3
|
+
This module provides comprehensive profiling capabilities for validators:
|
|
4
|
+
- Execution time measurement (per-validator, per-column)
|
|
5
|
+
- Memory usage tracking (peak, delta, GC impact)
|
|
6
|
+
- Statistical aggregation (mean, median, p95, p99)
|
|
7
|
+
- Historical performance tracking
|
|
8
|
+
- Regression detection
|
|
9
|
+
- Export to various formats (JSON, Prometheus, HTML)
|
|
10
|
+
|
|
11
|
+
Design Principles:
|
|
12
|
+
- Zero-overhead when disabled
|
|
13
|
+
- Thread-safe for parallel execution
|
|
14
|
+
- Extensible metric types
|
|
15
|
+
- Integration with existing observability infrastructure
|
|
16
|
+
|
|
17
|
+
Usage:
|
|
18
|
+
from truthound.validators.optimization.profiling import (
|
|
19
|
+
ValidatorProfiler,
|
|
20
|
+
ProfilerConfig,
|
|
21
|
+
profile_validator,
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
# Simple profiling
|
|
25
|
+
with profile_validator(my_validator) as profiler:
|
|
26
|
+
issues = my_validator.validate(lf)
|
|
27
|
+
|
|
28
|
+
print(profiler.metrics.to_dict())
|
|
29
|
+
|
|
30
|
+
# Full profiling session
|
|
31
|
+
profiler = ValidatorProfiler()
|
|
32
|
+
profiler.start_session("my_validation_run")
|
|
33
|
+
|
|
34
|
+
for validator in validators:
|
|
35
|
+
with profiler.profile(validator):
|
|
36
|
+
issues = validator.validate(lf)
|
|
37
|
+
|
|
38
|
+
report = profiler.end_session()
|
|
39
|
+
print(report.summary())
|
|
40
|
+
"""
|
|
41
|
+
|
|
42
|
+
from __future__ import annotations
|
|
43
|
+
|
|
44
|
+
import gc
|
|
45
|
+
import json
|
|
46
|
+
import logging
|
|
47
|
+
import statistics
|
|
48
|
+
import threading
|
|
49
|
+
import time
|
|
50
|
+
import traceback
|
|
51
|
+
from abc import ABC, abstractmethod
|
|
52
|
+
from contextlib import contextmanager
|
|
53
|
+
from dataclasses import dataclass, field
|
|
54
|
+
from datetime import datetime
|
|
55
|
+
from enum import Enum, auto
|
|
56
|
+
from typing import Any, Callable, Iterator, TypeVar, Generic
|
|
57
|
+
|
|
58
|
+
logger = logging.getLogger(__name__)
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
# =============================================================================
|
|
62
|
+
# Constants and Configuration
|
|
63
|
+
# =============================================================================
|
|
64
|
+
|
|
65
|
+
# Default histogram buckets for timing (in milliseconds)
|
|
66
|
+
DEFAULT_TIMING_BUCKETS_MS = [
|
|
67
|
+
0.1, 0.5, 1, 2, 5, 10, 25, 50, 100, 250, 500, 1000, 2500, 5000, 10000
|
|
68
|
+
]
|
|
69
|
+
|
|
70
|
+
# Memory tracking thresholds
|
|
71
|
+
MEMORY_WARNING_THRESHOLD_MB = 100
|
|
72
|
+
MEMORY_CRITICAL_THRESHOLD_MB = 500
|
|
73
|
+
|
|
74
|
+
# Maximum history entries to keep per validator
|
|
75
|
+
MAX_HISTORY_ENTRIES = 1000
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
class MetricType(Enum):
|
|
79
|
+
"""Types of metrics tracked for validators."""
|
|
80
|
+
TIMING = auto() # Execution time
|
|
81
|
+
MEMORY = auto() # Memory usage
|
|
82
|
+
THROUGHPUT = auto() # Rows per second
|
|
83
|
+
ISSUE_COUNT = auto() # Validation issues found
|
|
84
|
+
GC_IMPACT = auto() # Garbage collection overhead
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
class ProfilerMode(Enum):
|
|
88
|
+
"""Profiler operating modes."""
|
|
89
|
+
DISABLED = auto() # No profiling
|
|
90
|
+
BASIC = auto() # Timing only
|
|
91
|
+
STANDARD = auto() # Timing + memory
|
|
92
|
+
DETAILED = auto() # All metrics + tracing
|
|
93
|
+
DIAGNOSTIC = auto() # Maximum detail for debugging
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
# =============================================================================
|
|
97
|
+
# Core Data Structures
|
|
98
|
+
# =============================================================================
|
|
99
|
+
|
|
100
|
+
@dataclass
|
|
101
|
+
class TimingMetrics:
|
|
102
|
+
"""Timing statistics for a validator."""
|
|
103
|
+
durations_ms: list[float] = field(default_factory=list)
|
|
104
|
+
|
|
105
|
+
@property
|
|
106
|
+
def count(self) -> int:
|
|
107
|
+
return len(self.durations_ms)
|
|
108
|
+
|
|
109
|
+
@property
|
|
110
|
+
def total_ms(self) -> float:
|
|
111
|
+
return sum(self.durations_ms) if self.durations_ms else 0.0
|
|
112
|
+
|
|
113
|
+
@property
|
|
114
|
+
def mean_ms(self) -> float:
|
|
115
|
+
return statistics.mean(self.durations_ms) if self.durations_ms else 0.0
|
|
116
|
+
|
|
117
|
+
@property
|
|
118
|
+
def median_ms(self) -> float:
|
|
119
|
+
return statistics.median(self.durations_ms) if self.durations_ms else 0.0
|
|
120
|
+
|
|
121
|
+
@property
|
|
122
|
+
def std_ms(self) -> float:
|
|
123
|
+
if len(self.durations_ms) < 2:
|
|
124
|
+
return 0.0
|
|
125
|
+
return statistics.stdev(self.durations_ms)
|
|
126
|
+
|
|
127
|
+
@property
|
|
128
|
+
def min_ms(self) -> float:
|
|
129
|
+
return min(self.durations_ms) if self.durations_ms else 0.0
|
|
130
|
+
|
|
131
|
+
@property
|
|
132
|
+
def max_ms(self) -> float:
|
|
133
|
+
return max(self.durations_ms) if self.durations_ms else 0.0
|
|
134
|
+
|
|
135
|
+
def percentile(self, p: float) -> float:
|
|
136
|
+
"""Get percentile value (0-100)."""
|
|
137
|
+
if not self.durations_ms:
|
|
138
|
+
return 0.0
|
|
139
|
+
sorted_durations = sorted(self.durations_ms)
|
|
140
|
+
idx = int(len(sorted_durations) * p / 100)
|
|
141
|
+
idx = min(idx, len(sorted_durations) - 1)
|
|
142
|
+
return sorted_durations[idx]
|
|
143
|
+
|
|
144
|
+
@property
|
|
145
|
+
def p50_ms(self) -> float:
|
|
146
|
+
return self.percentile(50)
|
|
147
|
+
|
|
148
|
+
@property
|
|
149
|
+
def p90_ms(self) -> float:
|
|
150
|
+
return self.percentile(90)
|
|
151
|
+
|
|
152
|
+
@property
|
|
153
|
+
def p95_ms(self) -> float:
|
|
154
|
+
return self.percentile(95)
|
|
155
|
+
|
|
156
|
+
@property
|
|
157
|
+
def p99_ms(self) -> float:
|
|
158
|
+
return self.percentile(99)
|
|
159
|
+
|
|
160
|
+
def add(self, duration_ms: float) -> None:
|
|
161
|
+
"""Add a duration observation."""
|
|
162
|
+
self.durations_ms.append(duration_ms)
|
|
163
|
+
# Trim if too many entries
|
|
164
|
+
if len(self.durations_ms) > MAX_HISTORY_ENTRIES:
|
|
165
|
+
self.durations_ms = self.durations_ms[-MAX_HISTORY_ENTRIES:]
|
|
166
|
+
|
|
167
|
+
def to_dict(self) -> dict[str, Any]:
|
|
168
|
+
return {
|
|
169
|
+
"count": self.count,
|
|
170
|
+
"total_ms": round(self.total_ms, 3),
|
|
171
|
+
"mean_ms": round(self.mean_ms, 3),
|
|
172
|
+
"median_ms": round(self.median_ms, 3),
|
|
173
|
+
"std_ms": round(self.std_ms, 3),
|
|
174
|
+
"min_ms": round(self.min_ms, 3),
|
|
175
|
+
"max_ms": round(self.max_ms, 3),
|
|
176
|
+
"p90_ms": round(self.p90_ms, 3),
|
|
177
|
+
"p95_ms": round(self.p95_ms, 3),
|
|
178
|
+
"p99_ms": round(self.p99_ms, 3),
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
@dataclass
|
|
183
|
+
class MemoryMetrics:
|
|
184
|
+
"""Memory usage statistics for a validator."""
|
|
185
|
+
peak_bytes: list[int] = field(default_factory=list)
|
|
186
|
+
delta_bytes: list[int] = field(default_factory=list)
|
|
187
|
+
gc_collections: list[int] = field(default_factory=list)
|
|
188
|
+
|
|
189
|
+
@property
|
|
190
|
+
def count(self) -> int:
|
|
191
|
+
return len(self.peak_bytes)
|
|
192
|
+
|
|
193
|
+
@property
|
|
194
|
+
def mean_peak_mb(self) -> float:
|
|
195
|
+
if not self.peak_bytes:
|
|
196
|
+
return 0.0
|
|
197
|
+
return statistics.mean(self.peak_bytes) / (1024 * 1024)
|
|
198
|
+
|
|
199
|
+
@property
|
|
200
|
+
def max_peak_mb(self) -> float:
|
|
201
|
+
if not self.peak_bytes:
|
|
202
|
+
return 0.0
|
|
203
|
+
return max(self.peak_bytes) / (1024 * 1024)
|
|
204
|
+
|
|
205
|
+
@property
|
|
206
|
+
def mean_delta_mb(self) -> float:
|
|
207
|
+
if not self.delta_bytes:
|
|
208
|
+
return 0.0
|
|
209
|
+
return statistics.mean(self.delta_bytes) / (1024 * 1024)
|
|
210
|
+
|
|
211
|
+
@property
|
|
212
|
+
def total_gc_collections(self) -> int:
|
|
213
|
+
return sum(self.gc_collections)
|
|
214
|
+
|
|
215
|
+
def add(self, peak: int, delta: int, gc_count: int = 0) -> None:
|
|
216
|
+
"""Add memory observation."""
|
|
217
|
+
self.peak_bytes.append(peak)
|
|
218
|
+
self.delta_bytes.append(delta)
|
|
219
|
+
self.gc_collections.append(gc_count)
|
|
220
|
+
# Trim if too many entries
|
|
221
|
+
if len(self.peak_bytes) > MAX_HISTORY_ENTRIES:
|
|
222
|
+
self.peak_bytes = self.peak_bytes[-MAX_HISTORY_ENTRIES:]
|
|
223
|
+
self.delta_bytes = self.delta_bytes[-MAX_HISTORY_ENTRIES:]
|
|
224
|
+
self.gc_collections = self.gc_collections[-MAX_HISTORY_ENTRIES:]
|
|
225
|
+
|
|
226
|
+
def to_dict(self) -> dict[str, Any]:
|
|
227
|
+
return {
|
|
228
|
+
"count": self.count,
|
|
229
|
+
"mean_peak_mb": round(self.mean_peak_mb, 2),
|
|
230
|
+
"max_peak_mb": round(self.max_peak_mb, 2),
|
|
231
|
+
"mean_delta_mb": round(self.mean_delta_mb, 2),
|
|
232
|
+
"total_gc_collections": self.total_gc_collections,
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
|
|
236
|
+
@dataclass
|
|
237
|
+
class ThroughputMetrics:
|
|
238
|
+
"""Throughput statistics for a validator."""
|
|
239
|
+
rows_processed: list[int] = field(default_factory=list)
|
|
240
|
+
durations_ms: list[float] = field(default_factory=list)
|
|
241
|
+
|
|
242
|
+
@property
|
|
243
|
+
def count(self) -> int:
|
|
244
|
+
return len(self.rows_processed)
|
|
245
|
+
|
|
246
|
+
@property
|
|
247
|
+
def total_rows(self) -> int:
|
|
248
|
+
return sum(self.rows_processed)
|
|
249
|
+
|
|
250
|
+
@property
|
|
251
|
+
def mean_rows_per_sec(self) -> float:
|
|
252
|
+
if not self.rows_processed or not self.durations_ms:
|
|
253
|
+
return 0.0
|
|
254
|
+
total_rows = sum(self.rows_processed)
|
|
255
|
+
total_seconds = sum(self.durations_ms) / 1000
|
|
256
|
+
return total_rows / total_seconds if total_seconds > 0 else 0.0
|
|
257
|
+
|
|
258
|
+
def add(self, rows: int, duration_ms: float) -> None:
|
|
259
|
+
"""Add throughput observation."""
|
|
260
|
+
self.rows_processed.append(rows)
|
|
261
|
+
self.durations_ms.append(duration_ms)
|
|
262
|
+
if len(self.rows_processed) > MAX_HISTORY_ENTRIES:
|
|
263
|
+
self.rows_processed = self.rows_processed[-MAX_HISTORY_ENTRIES:]
|
|
264
|
+
self.durations_ms = self.durations_ms[-MAX_HISTORY_ENTRIES:]
|
|
265
|
+
|
|
266
|
+
def to_dict(self) -> dict[str, Any]:
|
|
267
|
+
return {
|
|
268
|
+
"count": self.count,
|
|
269
|
+
"total_rows": self.total_rows,
|
|
270
|
+
"mean_rows_per_sec": round(self.mean_rows_per_sec, 2),
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
|
|
274
|
+
@dataclass
|
|
275
|
+
class ValidatorMetrics:
|
|
276
|
+
"""Complete metrics for a single validator."""
|
|
277
|
+
validator_name: str
|
|
278
|
+
validator_category: str
|
|
279
|
+
timing: TimingMetrics = field(default_factory=TimingMetrics)
|
|
280
|
+
memory: MemoryMetrics = field(default_factory=MemoryMetrics)
|
|
281
|
+
throughput: ThroughputMetrics = field(default_factory=ThroughputMetrics)
|
|
282
|
+
issue_counts: list[int] = field(default_factory=list)
|
|
283
|
+
error_counts: int = 0
|
|
284
|
+
last_execution: datetime | None = None
|
|
285
|
+
|
|
286
|
+
@property
|
|
287
|
+
def total_issues(self) -> int:
|
|
288
|
+
return sum(self.issue_counts)
|
|
289
|
+
|
|
290
|
+
@property
|
|
291
|
+
def mean_issues(self) -> float:
|
|
292
|
+
if not self.issue_counts:
|
|
293
|
+
return 0.0
|
|
294
|
+
return statistics.mean(self.issue_counts)
|
|
295
|
+
|
|
296
|
+
@property
|
|
297
|
+
def execution_count(self) -> int:
|
|
298
|
+
return self.timing.count
|
|
299
|
+
|
|
300
|
+
def record_execution(
|
|
301
|
+
self,
|
|
302
|
+
duration_ms: float,
|
|
303
|
+
issue_count: int = 0,
|
|
304
|
+
rows_processed: int = 0,
|
|
305
|
+
peak_memory: int = 0,
|
|
306
|
+
memory_delta: int = 0,
|
|
307
|
+
gc_collections: int = 0,
|
|
308
|
+
error: bool = False,
|
|
309
|
+
) -> None:
|
|
310
|
+
"""Record a complete execution observation."""
|
|
311
|
+
self.timing.add(duration_ms)
|
|
312
|
+
self.issue_counts.append(issue_count)
|
|
313
|
+
|
|
314
|
+
if rows_processed > 0:
|
|
315
|
+
self.throughput.add(rows_processed, duration_ms)
|
|
316
|
+
|
|
317
|
+
if peak_memory > 0 or memory_delta != 0:
|
|
318
|
+
self.memory.add(peak_memory, memory_delta, gc_collections)
|
|
319
|
+
|
|
320
|
+
if error:
|
|
321
|
+
self.error_counts += 1
|
|
322
|
+
|
|
323
|
+
self.last_execution = datetime.now()
|
|
324
|
+
|
|
325
|
+
# Trim issue counts
|
|
326
|
+
if len(self.issue_counts) > MAX_HISTORY_ENTRIES:
|
|
327
|
+
self.issue_counts = self.issue_counts[-MAX_HISTORY_ENTRIES:]
|
|
328
|
+
|
|
329
|
+
def to_dict(self) -> dict[str, Any]:
|
|
330
|
+
return {
|
|
331
|
+
"validator_name": self.validator_name,
|
|
332
|
+
"validator_category": self.validator_category,
|
|
333
|
+
"execution_count": self.execution_count,
|
|
334
|
+
"error_count": self.error_counts,
|
|
335
|
+
"total_issues": self.total_issues,
|
|
336
|
+
"mean_issues": round(self.mean_issues, 2),
|
|
337
|
+
"last_execution": self.last_execution.isoformat() if self.last_execution else None,
|
|
338
|
+
"timing": self.timing.to_dict(),
|
|
339
|
+
"memory": self.memory.to_dict(),
|
|
340
|
+
"throughput": self.throughput.to_dict(),
|
|
341
|
+
}
|
|
342
|
+
|
|
343
|
+
|
|
344
|
+
# =============================================================================
|
|
345
|
+
# Profiling Session and Context
|
|
346
|
+
# =============================================================================
|
|
347
|
+
|
|
348
|
+
@dataclass
|
|
349
|
+
class ExecutionSnapshot:
|
|
350
|
+
"""Snapshot of a single execution for detailed analysis."""
|
|
351
|
+
validator_name: str
|
|
352
|
+
timestamp: datetime
|
|
353
|
+
duration_ms: float
|
|
354
|
+
rows_processed: int
|
|
355
|
+
issue_count: int
|
|
356
|
+
peak_memory_bytes: int
|
|
357
|
+
memory_delta_bytes: int
|
|
358
|
+
gc_before: tuple[int, int, int] # gen0, gen1, gen2 counts
|
|
359
|
+
gc_after: tuple[int, int, int]
|
|
360
|
+
success: bool
|
|
361
|
+
error_message: str | None = None
|
|
362
|
+
attributes: dict[str, Any] = field(default_factory=dict)
|
|
363
|
+
|
|
364
|
+
def to_dict(self) -> dict[str, Any]:
|
|
365
|
+
return {
|
|
366
|
+
"validator_name": self.validator_name,
|
|
367
|
+
"timestamp": self.timestamp.isoformat(),
|
|
368
|
+
"duration_ms": round(self.duration_ms, 3),
|
|
369
|
+
"rows_processed": self.rows_processed,
|
|
370
|
+
"issue_count": self.issue_count,
|
|
371
|
+
"peak_memory_mb": round(self.peak_memory_bytes / (1024 * 1024), 2),
|
|
372
|
+
"memory_delta_mb": round(self.memory_delta_bytes / (1024 * 1024), 2),
|
|
373
|
+
"gc_collections": sum(self.gc_after) - sum(self.gc_before),
|
|
374
|
+
"success": self.success,
|
|
375
|
+
"error_message": self.error_message,
|
|
376
|
+
"attributes": self.attributes,
|
|
377
|
+
}
|
|
378
|
+
|
|
379
|
+
|
|
380
|
+
@dataclass
|
|
381
|
+
class ProfilingSession:
|
|
382
|
+
"""A profiling session containing multiple validator executions."""
|
|
383
|
+
session_id: str
|
|
384
|
+
start_time: datetime
|
|
385
|
+
end_time: datetime | None = None
|
|
386
|
+
validator_metrics: dict[str, ValidatorMetrics] = field(default_factory=dict)
|
|
387
|
+
snapshots: list[ExecutionSnapshot] = field(default_factory=list)
|
|
388
|
+
attributes: dict[str, Any] = field(default_factory=dict)
|
|
389
|
+
|
|
390
|
+
@property
|
|
391
|
+
def duration_ms(self) -> float:
|
|
392
|
+
if self.end_time is None:
|
|
393
|
+
return (datetime.now() - self.start_time).total_seconds() * 1000
|
|
394
|
+
return (self.end_time - self.start_time).total_seconds() * 1000
|
|
395
|
+
|
|
396
|
+
@property
|
|
397
|
+
def total_validators(self) -> int:
|
|
398
|
+
return len(self.validator_metrics)
|
|
399
|
+
|
|
400
|
+
@property
|
|
401
|
+
def total_executions(self) -> int:
|
|
402
|
+
return sum(m.execution_count for m in self.validator_metrics.values())
|
|
403
|
+
|
|
404
|
+
@property
|
|
405
|
+
def total_issues(self) -> int:
|
|
406
|
+
return sum(m.total_issues for m in self.validator_metrics.values())
|
|
407
|
+
|
|
408
|
+
def get_or_create_metrics(
|
|
409
|
+
self,
|
|
410
|
+
validator_name: str,
|
|
411
|
+
validator_category: str = "unknown",
|
|
412
|
+
) -> ValidatorMetrics:
|
|
413
|
+
"""Get or create metrics for a validator."""
|
|
414
|
+
if validator_name not in self.validator_metrics:
|
|
415
|
+
self.validator_metrics[validator_name] = ValidatorMetrics(
|
|
416
|
+
validator_name=validator_name,
|
|
417
|
+
validator_category=validator_category,
|
|
418
|
+
)
|
|
419
|
+
return self.validator_metrics[validator_name]
|
|
420
|
+
|
|
421
|
+
def add_snapshot(self, snapshot: ExecutionSnapshot) -> None:
|
|
422
|
+
"""Add an execution snapshot."""
|
|
423
|
+
self.snapshots.append(snapshot)
|
|
424
|
+
# Trim if too many
|
|
425
|
+
if len(self.snapshots) > MAX_HISTORY_ENTRIES * 10:
|
|
426
|
+
self.snapshots = self.snapshots[-MAX_HISTORY_ENTRIES * 10:]
|
|
427
|
+
|
|
428
|
+
def summary(self) -> dict[str, Any]:
|
|
429
|
+
"""Get session summary."""
|
|
430
|
+
return {
|
|
431
|
+
"session_id": self.session_id,
|
|
432
|
+
"start_time": self.start_time.isoformat(),
|
|
433
|
+
"end_time": self.end_time.isoformat() if self.end_time else None,
|
|
434
|
+
"duration_ms": round(self.duration_ms, 2),
|
|
435
|
+
"total_validators": self.total_validators,
|
|
436
|
+
"total_executions": self.total_executions,
|
|
437
|
+
"total_issues": self.total_issues,
|
|
438
|
+
"attributes": self.attributes,
|
|
439
|
+
}
|
|
440
|
+
|
|
441
|
+
def to_dict(self) -> dict[str, Any]:
|
|
442
|
+
return {
|
|
443
|
+
**self.summary(),
|
|
444
|
+
"validators": {
|
|
445
|
+
name: metrics.to_dict()
|
|
446
|
+
for name, metrics in self.validator_metrics.items()
|
|
447
|
+
},
|
|
448
|
+
"snapshots_count": len(self.snapshots),
|
|
449
|
+
}
|
|
450
|
+
|
|
451
|
+
def to_json(self, include_snapshots: bool = False) -> str:
|
|
452
|
+
"""Export to JSON."""
|
|
453
|
+
data = self.to_dict()
|
|
454
|
+
if include_snapshots:
|
|
455
|
+
data["snapshots"] = [s.to_dict() for s in self.snapshots]
|
|
456
|
+
return json.dumps(data, indent=2, default=str)
|
|
457
|
+
|
|
458
|
+
|
|
459
|
+
# =============================================================================
|
|
460
|
+
# Configuration
|
|
461
|
+
# =============================================================================
|
|
462
|
+
|
|
463
|
+
@dataclass
|
|
464
|
+
class ProfilerConfig:
|
|
465
|
+
"""Configuration for validator profiling."""
|
|
466
|
+
mode: ProfilerMode = ProfilerMode.STANDARD
|
|
467
|
+
track_memory: bool = True
|
|
468
|
+
track_gc: bool = True
|
|
469
|
+
track_throughput: bool = True
|
|
470
|
+
record_snapshots: bool = False
|
|
471
|
+
max_snapshots: int = 1000
|
|
472
|
+
memory_warning_mb: float = MEMORY_WARNING_THRESHOLD_MB
|
|
473
|
+
memory_critical_mb: float = MEMORY_CRITICAL_THRESHOLD_MB
|
|
474
|
+
timing_buckets_ms: list[float] = field(
|
|
475
|
+
default_factory=lambda: DEFAULT_TIMING_BUCKETS_MS.copy()
|
|
476
|
+
)
|
|
477
|
+
|
|
478
|
+
@classmethod
|
|
479
|
+
def disabled(cls) -> "ProfilerConfig":
|
|
480
|
+
"""Create a disabled configuration."""
|
|
481
|
+
return cls(mode=ProfilerMode.DISABLED)
|
|
482
|
+
|
|
483
|
+
@classmethod
|
|
484
|
+
def basic(cls) -> "ProfilerConfig":
|
|
485
|
+
"""Create a basic configuration (timing only)."""
|
|
486
|
+
return cls(
|
|
487
|
+
mode=ProfilerMode.BASIC,
|
|
488
|
+
track_memory=False,
|
|
489
|
+
track_gc=False,
|
|
490
|
+
track_throughput=False,
|
|
491
|
+
record_snapshots=False,
|
|
492
|
+
)
|
|
493
|
+
|
|
494
|
+
@classmethod
|
|
495
|
+
def detailed(cls) -> "ProfilerConfig":
|
|
496
|
+
"""Create a detailed configuration."""
|
|
497
|
+
return cls(
|
|
498
|
+
mode=ProfilerMode.DETAILED,
|
|
499
|
+
record_snapshots=True,
|
|
500
|
+
)
|
|
501
|
+
|
|
502
|
+
@classmethod
|
|
503
|
+
def diagnostic(cls) -> "ProfilerConfig":
|
|
504
|
+
"""Create a diagnostic configuration (maximum detail)."""
|
|
505
|
+
return cls(
|
|
506
|
+
mode=ProfilerMode.DIAGNOSTIC,
|
|
507
|
+
record_snapshots=True,
|
|
508
|
+
max_snapshots=10000,
|
|
509
|
+
)
|
|
510
|
+
|
|
511
|
+
|
|
512
|
+
# =============================================================================
|
|
513
|
+
# Memory Tracker
|
|
514
|
+
# =============================================================================
|
|
515
|
+
|
|
516
|
+
class MemoryTracker:
|
|
517
|
+
"""Tracks memory usage during validator execution."""
|
|
518
|
+
|
|
519
|
+
_psutil_available: bool | None = None
|
|
520
|
+
|
|
521
|
+
def __init__(self):
|
|
522
|
+
self._start_memory: int = 0
|
|
523
|
+
self._peak_memory: int = 0
|
|
524
|
+
self._gc_before: tuple[int, int, int] = (0, 0, 0)
|
|
525
|
+
self._tracking: bool = False
|
|
526
|
+
|
|
527
|
+
@classmethod
|
|
528
|
+
def is_available(cls) -> bool:
|
|
529
|
+
"""Check if memory tracking is available (psutil installed)."""
|
|
530
|
+
if cls._psutil_available is None:
|
|
531
|
+
try:
|
|
532
|
+
import psutil
|
|
533
|
+
cls._psutil_available = True
|
|
534
|
+
except ImportError:
|
|
535
|
+
cls._psutil_available = False
|
|
536
|
+
return cls._psutil_available
|
|
537
|
+
|
|
538
|
+
def _get_current_memory(self) -> int:
|
|
539
|
+
"""Get current process memory in bytes."""
|
|
540
|
+
if not self.is_available():
|
|
541
|
+
return 0
|
|
542
|
+
try:
|
|
543
|
+
import psutil
|
|
544
|
+
process = psutil.Process()
|
|
545
|
+
return process.memory_info().rss
|
|
546
|
+
except Exception:
|
|
547
|
+
return 0
|
|
548
|
+
|
|
549
|
+
def _get_gc_counts(self) -> tuple[int, int, int]:
|
|
550
|
+
"""Get GC collection counts for all generations."""
|
|
551
|
+
stats = gc.get_stats()
|
|
552
|
+
return (
|
|
553
|
+
stats[0].get("collections", 0),
|
|
554
|
+
stats[1].get("collections", 0),
|
|
555
|
+
stats[2].get("collections", 0),
|
|
556
|
+
)
|
|
557
|
+
|
|
558
|
+
def start(self) -> None:
|
|
559
|
+
"""Start memory tracking."""
|
|
560
|
+
self._tracking = True
|
|
561
|
+
self._start_memory = self._get_current_memory()
|
|
562
|
+
self._peak_memory = self._start_memory
|
|
563
|
+
self._gc_before = self._get_gc_counts()
|
|
564
|
+
|
|
565
|
+
def update_peak(self) -> None:
|
|
566
|
+
"""Update peak memory if current is higher."""
|
|
567
|
+
if self._tracking:
|
|
568
|
+
current = self._get_current_memory()
|
|
569
|
+
if current > self._peak_memory:
|
|
570
|
+
self._peak_memory = current
|
|
571
|
+
|
|
572
|
+
def stop(self) -> tuple[int, int, int, tuple[int, int, int], tuple[int, int, int]]:
|
|
573
|
+
"""Stop tracking and return (peak, delta, end_memory, gc_before, gc_after)."""
|
|
574
|
+
if not self._tracking:
|
|
575
|
+
return 0, 0, 0, (0, 0, 0), (0, 0, 0)
|
|
576
|
+
|
|
577
|
+
self._tracking = False
|
|
578
|
+
end_memory = self._get_current_memory()
|
|
579
|
+
self.update_peak()
|
|
580
|
+
gc_after = self._get_gc_counts()
|
|
581
|
+
|
|
582
|
+
delta = end_memory - self._start_memory
|
|
583
|
+
return (
|
|
584
|
+
self._peak_memory,
|
|
585
|
+
delta,
|
|
586
|
+
end_memory,
|
|
587
|
+
self._gc_before,
|
|
588
|
+
gc_after,
|
|
589
|
+
)
|
|
590
|
+
|
|
591
|
+
|
|
592
|
+
# =============================================================================
|
|
593
|
+
# Profiling Context Manager
|
|
594
|
+
# =============================================================================
|
|
595
|
+
|
|
596
|
+
@dataclass
|
|
597
|
+
class ProfileContext:
|
|
598
|
+
"""Context for a single validator profiling operation."""
|
|
599
|
+
validator_name: str
|
|
600
|
+
validator_category: str
|
|
601
|
+
config: ProfilerConfig
|
|
602
|
+
session: ProfilingSession | None = None
|
|
603
|
+
rows_processed: int = 0
|
|
604
|
+
attributes: dict[str, Any] = field(default_factory=dict)
|
|
605
|
+
|
|
606
|
+
# Internal state
|
|
607
|
+
_start_time: float = field(default=0.0, init=False)
|
|
608
|
+
_memory_tracker: MemoryTracker = field(default_factory=MemoryTracker, init=False)
|
|
609
|
+
_completed: bool = field(default=False, init=False)
|
|
610
|
+
_metrics: ValidatorMetrics | None = field(default=None, init=False)
|
|
611
|
+
_snapshot: ExecutionSnapshot | None = field(default=None, init=False)
|
|
612
|
+
_issue_count: int = field(default=0, init=False)
|
|
613
|
+
_error: bool = field(default=False, init=False)
|
|
614
|
+
_error_message: str | None = field(default=None, init=False)
|
|
615
|
+
|
|
616
|
+
@property
|
|
617
|
+
def metrics(self) -> ValidatorMetrics | None:
|
|
618
|
+
"""Get the metrics after completion."""
|
|
619
|
+
return self._metrics
|
|
620
|
+
|
|
621
|
+
@property
|
|
622
|
+
def snapshot(self) -> ExecutionSnapshot | None:
|
|
623
|
+
"""Get the execution snapshot after completion."""
|
|
624
|
+
return self._snapshot
|
|
625
|
+
|
|
626
|
+
@property
|
|
627
|
+
def duration_ms(self) -> float:
|
|
628
|
+
"""Get the execution duration."""
|
|
629
|
+
if self._completed and self._metrics:
|
|
630
|
+
return self._metrics.timing.durations_ms[-1] if self._metrics.timing.durations_ms else 0.0
|
|
631
|
+
return (time.time() - self._start_time) * 1000 if self._start_time > 0 else 0.0
|
|
632
|
+
|
|
633
|
+
def start(self) -> None:
|
|
634
|
+
"""Start profiling."""
|
|
635
|
+
if self.config.mode == ProfilerMode.DISABLED:
|
|
636
|
+
return
|
|
637
|
+
|
|
638
|
+
self._start_time = time.time()
|
|
639
|
+
|
|
640
|
+
if self.config.track_memory:
|
|
641
|
+
self._memory_tracker.start()
|
|
642
|
+
|
|
643
|
+
def set_issue_count(self, count: int) -> None:
|
|
644
|
+
"""Set the number of issues found."""
|
|
645
|
+
self._issue_count = count
|
|
646
|
+
|
|
647
|
+
def set_rows_processed(self, rows: int) -> None:
|
|
648
|
+
"""Set the number of rows processed."""
|
|
649
|
+
self.rows_processed = rows
|
|
650
|
+
|
|
651
|
+
def set_error(self, error_message: str | None = None) -> None:
|
|
652
|
+
"""Mark as failed with optional error message."""
|
|
653
|
+
self._error = True
|
|
654
|
+
self._error_message = error_message
|
|
655
|
+
|
|
656
|
+
def add_attribute(self, key: str, value: Any) -> None:
|
|
657
|
+
"""Add an attribute to the profile."""
|
|
658
|
+
self.attributes[key] = value
|
|
659
|
+
|
|
660
|
+
def stop(self) -> None:
|
|
661
|
+
"""Stop profiling and record results."""
|
|
662
|
+
if self.config.mode == ProfilerMode.DISABLED:
|
|
663
|
+
self._completed = True
|
|
664
|
+
return
|
|
665
|
+
|
|
666
|
+
if self._completed:
|
|
667
|
+
return
|
|
668
|
+
|
|
669
|
+
end_time = time.time()
|
|
670
|
+
duration_ms = (end_time - self._start_time) * 1000
|
|
671
|
+
|
|
672
|
+
# Get memory stats
|
|
673
|
+
peak_memory = 0
|
|
674
|
+
memory_delta = 0
|
|
675
|
+
gc_before = (0, 0, 0)
|
|
676
|
+
gc_after = (0, 0, 0)
|
|
677
|
+
gc_collections = 0
|
|
678
|
+
|
|
679
|
+
if self.config.track_memory:
|
|
680
|
+
peak_memory, memory_delta, _, gc_before, gc_after = self._memory_tracker.stop()
|
|
681
|
+
gc_collections = sum(gc_after) - sum(gc_before)
|
|
682
|
+
|
|
683
|
+
# Get or create metrics
|
|
684
|
+
if self.session:
|
|
685
|
+
self._metrics = self.session.get_or_create_metrics(
|
|
686
|
+
self.validator_name,
|
|
687
|
+
self.validator_category,
|
|
688
|
+
)
|
|
689
|
+
else:
|
|
690
|
+
self._metrics = ValidatorMetrics(
|
|
691
|
+
validator_name=self.validator_name,
|
|
692
|
+
validator_category=self.validator_category,
|
|
693
|
+
)
|
|
694
|
+
|
|
695
|
+
# Record execution
|
|
696
|
+
self._metrics.record_execution(
|
|
697
|
+
duration_ms=duration_ms,
|
|
698
|
+
issue_count=self._issue_count,
|
|
699
|
+
rows_processed=self.rows_processed,
|
|
700
|
+
peak_memory=peak_memory,
|
|
701
|
+
memory_delta=memory_delta,
|
|
702
|
+
gc_collections=gc_collections,
|
|
703
|
+
error=self._error,
|
|
704
|
+
)
|
|
705
|
+
|
|
706
|
+
# Create snapshot if configured
|
|
707
|
+
if self.config.record_snapshots:
|
|
708
|
+
self._snapshot = ExecutionSnapshot(
|
|
709
|
+
validator_name=self.validator_name,
|
|
710
|
+
timestamp=datetime.now(),
|
|
711
|
+
duration_ms=duration_ms,
|
|
712
|
+
rows_processed=self.rows_processed,
|
|
713
|
+
issue_count=self._issue_count,
|
|
714
|
+
peak_memory_bytes=peak_memory,
|
|
715
|
+
memory_delta_bytes=memory_delta,
|
|
716
|
+
gc_before=gc_before,
|
|
717
|
+
gc_after=gc_after,
|
|
718
|
+
success=not self._error,
|
|
719
|
+
error_message=self._error_message,
|
|
720
|
+
attributes=self.attributes.copy(),
|
|
721
|
+
)
|
|
722
|
+
if self.session:
|
|
723
|
+
self.session.add_snapshot(self._snapshot)
|
|
724
|
+
|
|
725
|
+
self._completed = True
|
|
726
|
+
|
|
727
|
+
|
|
728
|
+
# =============================================================================
|
|
729
|
+
# Main Profiler Class
|
|
730
|
+
# =============================================================================
|
|
731
|
+
|
|
732
|
+
class ValidatorProfiler:
|
|
733
|
+
"""Main profiler for validator performance tracking.
|
|
734
|
+
|
|
735
|
+
Thread-safe profiler that tracks execution metrics across multiple
|
|
736
|
+
validators and sessions.
|
|
737
|
+
|
|
738
|
+
Example:
|
|
739
|
+
profiler = ValidatorProfiler()
|
|
740
|
+
|
|
741
|
+
# Start a session
|
|
742
|
+
profiler.start_session("validation_run_1")
|
|
743
|
+
|
|
744
|
+
for validator in validators:
|
|
745
|
+
with profiler.profile(validator) as ctx:
|
|
746
|
+
issues = validator.validate(lf)
|
|
747
|
+
ctx.set_issue_count(len(issues))
|
|
748
|
+
|
|
749
|
+
# Get session results
|
|
750
|
+
session = profiler.end_session()
|
|
751
|
+
print(session.to_json())
|
|
752
|
+
"""
|
|
753
|
+
|
|
754
|
+
def __init__(self, config: ProfilerConfig | None = None):
|
|
755
|
+
"""Initialize profiler.
|
|
756
|
+
|
|
757
|
+
Args:
|
|
758
|
+
config: Profiler configuration. Defaults to STANDARD mode.
|
|
759
|
+
"""
|
|
760
|
+
self.config = config or ProfilerConfig()
|
|
761
|
+
self._lock = threading.RLock() # Reentrant lock to allow nested calls
|
|
762
|
+
self._current_session: ProfilingSession | None = None
|
|
763
|
+
self._completed_sessions: list[ProfilingSession] = []
|
|
764
|
+
self._global_metrics: dict[str, ValidatorMetrics] = {}
|
|
765
|
+
|
|
766
|
+
@property
|
|
767
|
+
def is_enabled(self) -> bool:
|
|
768
|
+
"""Check if profiling is enabled."""
|
|
769
|
+
return self.config.mode != ProfilerMode.DISABLED
|
|
770
|
+
|
|
771
|
+
@property
|
|
772
|
+
def current_session(self) -> ProfilingSession | None:
|
|
773
|
+
"""Get the current active session."""
|
|
774
|
+
return self._current_session
|
|
775
|
+
|
|
776
|
+
@property
|
|
777
|
+
def global_metrics(self) -> dict[str, ValidatorMetrics]:
|
|
778
|
+
"""Get global metrics across all sessions."""
|
|
779
|
+
return self._global_metrics.copy()
|
|
780
|
+
|
|
781
|
+
def start_session(
|
|
782
|
+
self,
|
|
783
|
+
session_id: str | None = None,
|
|
784
|
+
attributes: dict[str, Any] | None = None,
|
|
785
|
+
) -> ProfilingSession:
|
|
786
|
+
"""Start a new profiling session.
|
|
787
|
+
|
|
788
|
+
Args:
|
|
789
|
+
session_id: Optional session identifier
|
|
790
|
+
attributes: Optional session attributes
|
|
791
|
+
|
|
792
|
+
Returns:
|
|
793
|
+
The new profiling session
|
|
794
|
+
"""
|
|
795
|
+
with self._lock:
|
|
796
|
+
if self._current_session is not None:
|
|
797
|
+
# End previous session
|
|
798
|
+
self._end_session_internal()
|
|
799
|
+
|
|
800
|
+
if session_id is None:
|
|
801
|
+
session_id = f"session_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
|
|
802
|
+
|
|
803
|
+
self._current_session = ProfilingSession(
|
|
804
|
+
session_id=session_id,
|
|
805
|
+
start_time=datetime.now(),
|
|
806
|
+
attributes=attributes or {},
|
|
807
|
+
)
|
|
808
|
+
return self._current_session
|
|
809
|
+
|
|
810
|
+
def end_session(self) -> ProfilingSession | None:
|
|
811
|
+
"""End the current profiling session.
|
|
812
|
+
|
|
813
|
+
Returns:
|
|
814
|
+
The completed session, or None if no session was active
|
|
815
|
+
"""
|
|
816
|
+
with self._lock:
|
|
817
|
+
return self._end_session_internal()
|
|
818
|
+
|
|
819
|
+
def _end_session_internal(self) -> ProfilingSession | None:
|
|
820
|
+
"""Internal session end (must hold lock)."""
|
|
821
|
+
if self._current_session is None:
|
|
822
|
+
return None
|
|
823
|
+
|
|
824
|
+
session = self._current_session
|
|
825
|
+
session.end_time = datetime.now()
|
|
826
|
+
self._completed_sessions.append(session)
|
|
827
|
+
self._current_session = None
|
|
828
|
+
|
|
829
|
+
# Merge into global metrics
|
|
830
|
+
for name, metrics in session.validator_metrics.items():
|
|
831
|
+
if name not in self._global_metrics:
|
|
832
|
+
self._global_metrics[name] = ValidatorMetrics(
|
|
833
|
+
validator_name=metrics.validator_name,
|
|
834
|
+
validator_category=metrics.validator_category,
|
|
835
|
+
)
|
|
836
|
+
global_m = self._global_metrics[name]
|
|
837
|
+
# Merge timing
|
|
838
|
+
global_m.timing.durations_ms.extend(metrics.timing.durations_ms)
|
|
839
|
+
# Merge memory
|
|
840
|
+
global_m.memory.peak_bytes.extend(metrics.memory.peak_bytes)
|
|
841
|
+
global_m.memory.delta_bytes.extend(metrics.memory.delta_bytes)
|
|
842
|
+
global_m.memory.gc_collections.extend(metrics.memory.gc_collections)
|
|
843
|
+
# Merge throughput
|
|
844
|
+
global_m.throughput.rows_processed.extend(metrics.throughput.rows_processed)
|
|
845
|
+
global_m.throughput.durations_ms.extend(metrics.throughput.durations_ms)
|
|
846
|
+
# Merge issues
|
|
847
|
+
global_m.issue_counts.extend(metrics.issue_counts)
|
|
848
|
+
global_m.error_counts += metrics.error_counts
|
|
849
|
+
global_m.last_execution = metrics.last_execution
|
|
850
|
+
|
|
851
|
+
return session
|
|
852
|
+
|
|
853
|
+
@contextmanager
|
|
854
|
+
def profile(
|
|
855
|
+
self,
|
|
856
|
+
validator: Any,
|
|
857
|
+
rows_processed: int = 0,
|
|
858
|
+
**attributes: Any,
|
|
859
|
+
) -> Iterator[ProfileContext]:
|
|
860
|
+
"""Profile a validator execution.
|
|
861
|
+
|
|
862
|
+
Args:
|
|
863
|
+
validator: The validator being profiled (must have name/category)
|
|
864
|
+
rows_processed: Number of rows being validated
|
|
865
|
+
**attributes: Additional attributes to record
|
|
866
|
+
|
|
867
|
+
Yields:
|
|
868
|
+
ProfileContext for recording metrics
|
|
869
|
+
|
|
870
|
+
Example:
|
|
871
|
+
with profiler.profile(my_validator, rows_processed=10000) as ctx:
|
|
872
|
+
issues = my_validator.validate(lf)
|
|
873
|
+
ctx.set_issue_count(len(issues))
|
|
874
|
+
"""
|
|
875
|
+
# Extract validator info
|
|
876
|
+
validator_name = getattr(validator, "name", type(validator).__name__)
|
|
877
|
+
validator_category = getattr(validator, "category", "unknown")
|
|
878
|
+
|
|
879
|
+
ctx = ProfileContext(
|
|
880
|
+
validator_name=validator_name,
|
|
881
|
+
validator_category=validator_category,
|
|
882
|
+
config=self.config,
|
|
883
|
+
session=self._current_session,
|
|
884
|
+
rows_processed=rows_processed,
|
|
885
|
+
attributes=dict(attributes),
|
|
886
|
+
)
|
|
887
|
+
|
|
888
|
+
ctx.start()
|
|
889
|
+
try:
|
|
890
|
+
yield ctx
|
|
891
|
+
except Exception as e:
|
|
892
|
+
ctx.set_error(str(e))
|
|
893
|
+
raise
|
|
894
|
+
finally:
|
|
895
|
+
ctx.stop()
|
|
896
|
+
|
|
897
|
+
def get_metrics(self, validator_name: str) -> ValidatorMetrics | None:
|
|
898
|
+
"""Get metrics for a specific validator.
|
|
899
|
+
|
|
900
|
+
Args:
|
|
901
|
+
validator_name: Name of the validator
|
|
902
|
+
|
|
903
|
+
Returns:
|
|
904
|
+
ValidatorMetrics or None if not found
|
|
905
|
+
"""
|
|
906
|
+
with self._lock:
|
|
907
|
+
# Check current session first
|
|
908
|
+
if self._current_session and validator_name in self._current_session.validator_metrics:
|
|
909
|
+
return self._current_session.validator_metrics[validator_name]
|
|
910
|
+
# Fall back to global
|
|
911
|
+
return self._global_metrics.get(validator_name)
|
|
912
|
+
|
|
913
|
+
def get_slowest_validators(self, n: int = 10) -> list[tuple[str, float]]:
|
|
914
|
+
"""Get the N slowest validators by mean execution time.
|
|
915
|
+
|
|
916
|
+
Args:
|
|
917
|
+
n: Number of validators to return
|
|
918
|
+
|
|
919
|
+
Returns:
|
|
920
|
+
List of (validator_name, mean_ms) tuples
|
|
921
|
+
"""
|
|
922
|
+
with self._lock:
|
|
923
|
+
all_metrics = {**self._global_metrics}
|
|
924
|
+
if self._current_session:
|
|
925
|
+
all_metrics.update(self._current_session.validator_metrics)
|
|
926
|
+
|
|
927
|
+
sorted_validators = sorted(
|
|
928
|
+
all_metrics.items(),
|
|
929
|
+
key=lambda x: x[1].timing.mean_ms,
|
|
930
|
+
reverse=True,
|
|
931
|
+
)
|
|
932
|
+
return [(name, m.timing.mean_ms) for name, m in sorted_validators[:n]]
|
|
933
|
+
|
|
934
|
+
def get_memory_intensive_validators(self, n: int = 10) -> list[tuple[str, float]]:
|
|
935
|
+
"""Get the N most memory-intensive validators.
|
|
936
|
+
|
|
937
|
+
Args:
|
|
938
|
+
n: Number of validators to return
|
|
939
|
+
|
|
940
|
+
Returns:
|
|
941
|
+
List of (validator_name, max_peak_mb) tuples
|
|
942
|
+
"""
|
|
943
|
+
with self._lock:
|
|
944
|
+
all_metrics = {**self._global_metrics}
|
|
945
|
+
if self._current_session:
|
|
946
|
+
all_metrics.update(self._current_session.validator_metrics)
|
|
947
|
+
|
|
948
|
+
sorted_validators = sorted(
|
|
949
|
+
all_metrics.items(),
|
|
950
|
+
key=lambda x: x[1].memory.max_peak_mb,
|
|
951
|
+
reverse=True,
|
|
952
|
+
)
|
|
953
|
+
return [(name, m.memory.max_peak_mb) for name, m in sorted_validators[:n]]
|
|
954
|
+
|
|
955
|
+
def summary(self) -> dict[str, Any]:
|
|
956
|
+
"""Get a summary of all profiling data."""
|
|
957
|
+
with self._lock:
|
|
958
|
+
all_metrics = {**self._global_metrics}
|
|
959
|
+
if self._current_session:
|
|
960
|
+
all_metrics.update(self._current_session.validator_metrics)
|
|
961
|
+
|
|
962
|
+
total_executions = sum(m.execution_count for m in all_metrics.values())
|
|
963
|
+
total_issues = sum(m.total_issues for m in all_metrics.values())
|
|
964
|
+
total_time_ms = sum(m.timing.total_ms for m in all_metrics.values())
|
|
965
|
+
|
|
966
|
+
return {
|
|
967
|
+
"total_validators": len(all_metrics),
|
|
968
|
+
"total_executions": total_executions,
|
|
969
|
+
"total_issues": total_issues,
|
|
970
|
+
"total_time_ms": round(total_time_ms, 2),
|
|
971
|
+
"completed_sessions": len(self._completed_sessions),
|
|
972
|
+
"current_session_active": self._current_session is not None,
|
|
973
|
+
"memory_tracking_available": MemoryTracker.is_available(),
|
|
974
|
+
}
|
|
975
|
+
|
|
976
|
+
def reset(self) -> None:
|
|
977
|
+
"""Reset all profiling data."""
|
|
978
|
+
with self._lock:
|
|
979
|
+
self._current_session = None
|
|
980
|
+
self._completed_sessions.clear()
|
|
981
|
+
self._global_metrics.clear()
|
|
982
|
+
|
|
983
|
+
def to_dict(self) -> dict[str, Any]:
|
|
984
|
+
"""Export all profiling data to a dictionary."""
|
|
985
|
+
with self._lock:
|
|
986
|
+
return {
|
|
987
|
+
"summary": self.summary(),
|
|
988
|
+
"global_metrics": {
|
|
989
|
+
name: m.to_dict() for name, m in self._global_metrics.items()
|
|
990
|
+
},
|
|
991
|
+
"completed_sessions": [
|
|
992
|
+
s.summary() for s in self._completed_sessions
|
|
993
|
+
],
|
|
994
|
+
"current_session": (
|
|
995
|
+
self._current_session.summary()
|
|
996
|
+
if self._current_session else None
|
|
997
|
+
),
|
|
998
|
+
}
|
|
999
|
+
|
|
1000
|
+
def to_json(self) -> str:
|
|
1001
|
+
"""Export all profiling data to JSON."""
|
|
1002
|
+
return json.dumps(self.to_dict(), indent=2, default=str)
|
|
1003
|
+
|
|
1004
|
+
def to_prometheus(self) -> str:
|
|
1005
|
+
"""Export metrics in Prometheus format."""
|
|
1006
|
+
lines = []
|
|
1007
|
+
|
|
1008
|
+
with self._lock:
|
|
1009
|
+
all_metrics = {**self._global_metrics}
|
|
1010
|
+
if self._current_session:
|
|
1011
|
+
all_metrics.update(self._current_session.validator_metrics)
|
|
1012
|
+
|
|
1013
|
+
# Timing metrics
|
|
1014
|
+
lines.append("# HELP validator_execution_duration_ms Validator execution duration in milliseconds")
|
|
1015
|
+
lines.append("# TYPE validator_execution_duration_ms gauge")
|
|
1016
|
+
for name, m in all_metrics.items():
|
|
1017
|
+
labels = f'validator="{name}",category="{m.validator_category}"'
|
|
1018
|
+
lines.append(f"validator_execution_duration_ms_mean{{{labels}}} {m.timing.mean_ms:.3f}")
|
|
1019
|
+
lines.append(f"validator_execution_duration_ms_p95{{{labels}}} {m.timing.p95_ms:.3f}")
|
|
1020
|
+
lines.append(f"validator_execution_duration_ms_p99{{{labels}}} {m.timing.p99_ms:.3f}")
|
|
1021
|
+
|
|
1022
|
+
# Execution count
|
|
1023
|
+
lines.append("")
|
|
1024
|
+
lines.append("# HELP validator_execution_count Total validator executions")
|
|
1025
|
+
lines.append("# TYPE validator_execution_count counter")
|
|
1026
|
+
for name, m in all_metrics.items():
|
|
1027
|
+
labels = f'validator="{name}",category="{m.validator_category}"'
|
|
1028
|
+
lines.append(f"validator_execution_count{{{labels}}} {m.execution_count}")
|
|
1029
|
+
|
|
1030
|
+
# Memory metrics
|
|
1031
|
+
lines.append("")
|
|
1032
|
+
lines.append("# HELP validator_memory_peak_mb Peak memory usage in MB")
|
|
1033
|
+
lines.append("# TYPE validator_memory_peak_mb gauge")
|
|
1034
|
+
for name, m in all_metrics.items():
|
|
1035
|
+
if m.memory.count > 0:
|
|
1036
|
+
labels = f'validator="{name}",category="{m.validator_category}"'
|
|
1037
|
+
lines.append(f"validator_memory_peak_mb{{{labels}}} {m.memory.max_peak_mb:.2f}")
|
|
1038
|
+
|
|
1039
|
+
# Issue count
|
|
1040
|
+
lines.append("")
|
|
1041
|
+
lines.append("# HELP validator_issues_total Total validation issues found")
|
|
1042
|
+
lines.append("# TYPE validator_issues_total counter")
|
|
1043
|
+
for name, m in all_metrics.items():
|
|
1044
|
+
labels = f'validator="{name}",category="{m.validator_category}"'
|
|
1045
|
+
lines.append(f"validator_issues_total{{{labels}}} {m.total_issues}")
|
|
1046
|
+
|
|
1047
|
+
return "\n".join(lines)
|
|
1048
|
+
|
|
1049
|
+
|
|
1050
|
+
# =============================================================================
|
|
1051
|
+
# Convenience Functions and Decorators
|
|
1052
|
+
# =============================================================================
|
|
1053
|
+
|
|
1054
|
+
# Global default profiler
|
|
1055
|
+
_default_profiler: ValidatorProfiler | None = None
|
|
1056
|
+
_profiler_lock = threading.Lock()
|
|
1057
|
+
|
|
1058
|
+
|
|
1059
|
+
def get_default_profiler() -> ValidatorProfiler:
|
|
1060
|
+
"""Get the default global profiler."""
|
|
1061
|
+
global _default_profiler
|
|
1062
|
+
with _profiler_lock:
|
|
1063
|
+
if _default_profiler is None:
|
|
1064
|
+
_default_profiler = ValidatorProfiler()
|
|
1065
|
+
return _default_profiler
|
|
1066
|
+
|
|
1067
|
+
|
|
1068
|
+
def set_default_profiler(profiler: ValidatorProfiler) -> None:
|
|
1069
|
+
"""Set the default global profiler."""
|
|
1070
|
+
global _default_profiler
|
|
1071
|
+
with _profiler_lock:
|
|
1072
|
+
_default_profiler = profiler
|
|
1073
|
+
|
|
1074
|
+
|
|
1075
|
+
def reset_default_profiler() -> None:
|
|
1076
|
+
"""Reset the default global profiler."""
|
|
1077
|
+
global _default_profiler
|
|
1078
|
+
with _profiler_lock:
|
|
1079
|
+
if _default_profiler:
|
|
1080
|
+
_default_profiler.reset()
|
|
1081
|
+
_default_profiler = None
|
|
1082
|
+
|
|
1083
|
+
|
|
1084
|
+
@contextmanager
|
|
1085
|
+
def profile_validator(
|
|
1086
|
+
validator: Any,
|
|
1087
|
+
rows_processed: int = 0,
|
|
1088
|
+
profiler: ValidatorProfiler | None = None,
|
|
1089
|
+
**attributes: Any,
|
|
1090
|
+
) -> Iterator[ProfileContext]:
|
|
1091
|
+
"""Profile a validator execution using the global or provided profiler.
|
|
1092
|
+
|
|
1093
|
+
Args:
|
|
1094
|
+
validator: The validator to profile
|
|
1095
|
+
rows_processed: Number of rows being processed
|
|
1096
|
+
profiler: Optional profiler (uses global if not provided)
|
|
1097
|
+
**attributes: Additional attributes
|
|
1098
|
+
|
|
1099
|
+
Yields:
|
|
1100
|
+
ProfileContext
|
|
1101
|
+
|
|
1102
|
+
Example:
|
|
1103
|
+
with profile_validator(my_validator) as ctx:
|
|
1104
|
+
issues = my_validator.validate(lf)
|
|
1105
|
+
ctx.set_issue_count(len(issues))
|
|
1106
|
+
"""
|
|
1107
|
+
if profiler is None:
|
|
1108
|
+
profiler = get_default_profiler()
|
|
1109
|
+
|
|
1110
|
+
with profiler.profile(validator, rows_processed, **attributes) as ctx:
|
|
1111
|
+
yield ctx
|
|
1112
|
+
|
|
1113
|
+
|
|
1114
|
+
def profiled(
|
|
1115
|
+
profiler: ValidatorProfiler | None = None,
|
|
1116
|
+
track_issues: bool = True,
|
|
1117
|
+
) -> Callable:
|
|
1118
|
+
"""Decorator for profiling validator methods.
|
|
1119
|
+
|
|
1120
|
+
Args:
|
|
1121
|
+
profiler: Optional profiler (uses global if not provided)
|
|
1122
|
+
track_issues: Whether to track issue count from return value
|
|
1123
|
+
|
|
1124
|
+
Returns:
|
|
1125
|
+
Decorated function
|
|
1126
|
+
|
|
1127
|
+
Example:
|
|
1128
|
+
class MyValidator(Validator):
|
|
1129
|
+
@profiled()
|
|
1130
|
+
def validate(self, lf):
|
|
1131
|
+
return [issue1, issue2]
|
|
1132
|
+
"""
|
|
1133
|
+
def decorator(func: Callable) -> Callable:
|
|
1134
|
+
def wrapper(self, *args, **kwargs):
|
|
1135
|
+
nonlocal profiler
|
|
1136
|
+
if profiler is None:
|
|
1137
|
+
profiler = get_default_profiler()
|
|
1138
|
+
|
|
1139
|
+
# Try to get row count from LazyFrame
|
|
1140
|
+
rows = 0
|
|
1141
|
+
if args and hasattr(args[0], "select"):
|
|
1142
|
+
try:
|
|
1143
|
+
import polars as pl
|
|
1144
|
+
rows = args[0].select(pl.len()).collect().item()
|
|
1145
|
+
except Exception:
|
|
1146
|
+
pass
|
|
1147
|
+
|
|
1148
|
+
with profiler.profile(self, rows_processed=rows) as ctx:
|
|
1149
|
+
result = func(self, *args, **kwargs)
|
|
1150
|
+
if track_issues and isinstance(result, list):
|
|
1151
|
+
ctx.set_issue_count(len(result))
|
|
1152
|
+
return result
|
|
1153
|
+
|
|
1154
|
+
return wrapper
|
|
1155
|
+
return decorator
|
|
1156
|
+
|
|
1157
|
+
|
|
1158
|
+
# =============================================================================
|
|
1159
|
+
# Report Generation
|
|
1160
|
+
# =============================================================================
|
|
1161
|
+
|
|
1162
|
+
class ProfilingReport:
|
|
1163
|
+
"""Generates human-readable reports from profiling data."""
|
|
1164
|
+
|
|
1165
|
+
def __init__(self, profiler: ValidatorProfiler):
|
|
1166
|
+
self.profiler = profiler
|
|
1167
|
+
|
|
1168
|
+
def text_summary(self) -> str:
|
|
1169
|
+
"""Generate a text summary report."""
|
|
1170
|
+
lines = []
|
|
1171
|
+
lines.append("=" * 60)
|
|
1172
|
+
lines.append("VALIDATOR PROFILING REPORT")
|
|
1173
|
+
lines.append("=" * 60)
|
|
1174
|
+
|
|
1175
|
+
summary = self.profiler.summary()
|
|
1176
|
+
lines.append(f"Total Validators: {summary['total_validators']}")
|
|
1177
|
+
lines.append(f"Total Executions: {summary['total_executions']}")
|
|
1178
|
+
lines.append(f"Total Issues Found: {summary['total_issues']}")
|
|
1179
|
+
lines.append(f"Total Time: {summary['total_time_ms']:.2f}ms")
|
|
1180
|
+
lines.append("")
|
|
1181
|
+
|
|
1182
|
+
# Slowest validators
|
|
1183
|
+
lines.append("-" * 60)
|
|
1184
|
+
lines.append("TOP 10 SLOWEST VALIDATORS (by mean execution time)")
|
|
1185
|
+
lines.append("-" * 60)
|
|
1186
|
+
slowest = self.profiler.get_slowest_validators(10)
|
|
1187
|
+
for i, (name, mean_ms) in enumerate(slowest, 1):
|
|
1188
|
+
lines.append(f"{i:2}. {name}: {mean_ms:.2f}ms")
|
|
1189
|
+
lines.append("")
|
|
1190
|
+
|
|
1191
|
+
# Memory intensive
|
|
1192
|
+
if MemoryTracker.is_available():
|
|
1193
|
+
lines.append("-" * 60)
|
|
1194
|
+
lines.append("TOP 10 MEMORY INTENSIVE VALIDATORS")
|
|
1195
|
+
lines.append("-" * 60)
|
|
1196
|
+
memory_heavy = self.profiler.get_memory_intensive_validators(10)
|
|
1197
|
+
for i, (name, peak_mb) in enumerate(memory_heavy, 1):
|
|
1198
|
+
lines.append(f"{i:2}. {name}: {peak_mb:.2f}MB peak")
|
|
1199
|
+
lines.append("")
|
|
1200
|
+
|
|
1201
|
+
lines.append("=" * 60)
|
|
1202
|
+
return "\n".join(lines)
|
|
1203
|
+
|
|
1204
|
+
def html_report(self) -> str:
|
|
1205
|
+
"""Generate an HTML report."""
|
|
1206
|
+
data = self.profiler.to_dict()
|
|
1207
|
+
|
|
1208
|
+
html = """<!DOCTYPE html>
|
|
1209
|
+
<html>
|
|
1210
|
+
<head>
|
|
1211
|
+
<title>Validator Profiling Report</title>
|
|
1212
|
+
<style>
|
|
1213
|
+
body { font-family: Arial, sans-serif; margin: 20px; }
|
|
1214
|
+
h1 { color: #333; }
|
|
1215
|
+
h2 { color: #666; border-bottom: 1px solid #ccc; }
|
|
1216
|
+
table { border-collapse: collapse; width: 100%; margin: 10px 0; }
|
|
1217
|
+
th, td { border: 1px solid #ddd; padding: 8px; text-align: left; }
|
|
1218
|
+
th { background-color: #4CAF50; color: white; }
|
|
1219
|
+
tr:nth-child(even) { background-color: #f2f2f2; }
|
|
1220
|
+
.metric { font-weight: bold; color: #2196F3; }
|
|
1221
|
+
.warning { color: #ff9800; }
|
|
1222
|
+
.error { color: #f44336; }
|
|
1223
|
+
</style>
|
|
1224
|
+
</head>
|
|
1225
|
+
<body>
|
|
1226
|
+
<h1>Validator Profiling Report</h1>
|
|
1227
|
+
"""
|
|
1228
|
+
|
|
1229
|
+
# Summary section
|
|
1230
|
+
summary = data["summary"]
|
|
1231
|
+
html += f"""
|
|
1232
|
+
<h2>Summary</h2>
|
|
1233
|
+
<table>
|
|
1234
|
+
<tr><td>Total Validators</td><td class="metric">{summary['total_validators']}</td></tr>
|
|
1235
|
+
<tr><td>Total Executions</td><td class="metric">{summary['total_executions']}</td></tr>
|
|
1236
|
+
<tr><td>Total Issues</td><td class="metric">{summary['total_issues']}</td></tr>
|
|
1237
|
+
<tr><td>Total Time</td><td class="metric">{summary['total_time_ms']:.2f}ms</td></tr>
|
|
1238
|
+
</table>
|
|
1239
|
+
"""
|
|
1240
|
+
|
|
1241
|
+
# Validator details
|
|
1242
|
+
html += """
|
|
1243
|
+
<h2>Validator Performance</h2>
|
|
1244
|
+
<table>
|
|
1245
|
+
<tr>
|
|
1246
|
+
<th>Validator</th>
|
|
1247
|
+
<th>Category</th>
|
|
1248
|
+
<th>Executions</th>
|
|
1249
|
+
<th>Mean (ms)</th>
|
|
1250
|
+
<th>P95 (ms)</th>
|
|
1251
|
+
<th>P99 (ms)</th>
|
|
1252
|
+
<th>Max Peak (MB)</th>
|
|
1253
|
+
<th>Issues</th>
|
|
1254
|
+
</tr>
|
|
1255
|
+
"""
|
|
1256
|
+
for name, metrics in data["global_metrics"].items():
|
|
1257
|
+
timing = metrics["timing"]
|
|
1258
|
+
memory = metrics["memory"]
|
|
1259
|
+
html += f"""
|
|
1260
|
+
<tr>
|
|
1261
|
+
<td>{name}</td>
|
|
1262
|
+
<td>{metrics['validator_category']}</td>
|
|
1263
|
+
<td>{metrics['execution_count']}</td>
|
|
1264
|
+
<td>{timing['mean_ms']:.2f}</td>
|
|
1265
|
+
<td>{timing['p95_ms']:.2f}</td>
|
|
1266
|
+
<td>{timing['p99_ms']:.2f}</td>
|
|
1267
|
+
<td>{memory['max_peak_mb']:.2f}</td>
|
|
1268
|
+
<td>{metrics['total_issues']}</td>
|
|
1269
|
+
</tr>
|
|
1270
|
+
"""
|
|
1271
|
+
|
|
1272
|
+
html += """
|
|
1273
|
+
</table>
|
|
1274
|
+
</body>
|
|
1275
|
+
</html>
|
|
1276
|
+
"""
|
|
1277
|
+
return html
|
|
1278
|
+
|
|
1279
|
+
|
|
1280
|
+
# =============================================================================
|
|
1281
|
+
# Public API
|
|
1282
|
+
# =============================================================================
|
|
1283
|
+
|
|
1284
|
+
__all__ = [
|
|
1285
|
+
# Enums
|
|
1286
|
+
"MetricType",
|
|
1287
|
+
"ProfilerMode",
|
|
1288
|
+
# Data classes
|
|
1289
|
+
"TimingMetrics",
|
|
1290
|
+
"MemoryMetrics",
|
|
1291
|
+
"ThroughputMetrics",
|
|
1292
|
+
"ValidatorMetrics",
|
|
1293
|
+
"ExecutionSnapshot",
|
|
1294
|
+
"ProfilingSession",
|
|
1295
|
+
"ProfilerConfig",
|
|
1296
|
+
"ProfileContext",
|
|
1297
|
+
# Main classes
|
|
1298
|
+
"ValidatorProfiler",
|
|
1299
|
+
"MemoryTracker",
|
|
1300
|
+
"ProfilingReport",
|
|
1301
|
+
# Convenience functions
|
|
1302
|
+
"get_default_profiler",
|
|
1303
|
+
"set_default_profiler",
|
|
1304
|
+
"reset_default_profiler",
|
|
1305
|
+
"profile_validator",
|
|
1306
|
+
"profiled",
|
|
1307
|
+
# Constants
|
|
1308
|
+
"DEFAULT_TIMING_BUCKETS_MS",
|
|
1309
|
+
"MEMORY_WARNING_THRESHOLD_MB",
|
|
1310
|
+
"MEMORY_CRITICAL_THRESHOLD_MB",
|
|
1311
|
+
"MAX_HISTORY_ENTRIES",
|
|
1312
|
+
]
|