truthound 1.0.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- truthound/__init__.py +162 -0
- truthound/adapters.py +100 -0
- truthound/api.py +365 -0
- truthound/audit/__init__.py +248 -0
- truthound/audit/core.py +967 -0
- truthound/audit/filters.py +620 -0
- truthound/audit/formatters.py +707 -0
- truthound/audit/logger.py +902 -0
- truthound/audit/middleware.py +571 -0
- truthound/audit/storage.py +1083 -0
- truthound/benchmark/__init__.py +123 -0
- truthound/benchmark/base.py +757 -0
- truthound/benchmark/comparison.py +635 -0
- truthound/benchmark/generators.py +706 -0
- truthound/benchmark/reporters.py +718 -0
- truthound/benchmark/runner.py +635 -0
- truthound/benchmark/scenarios.py +712 -0
- truthound/cache.py +252 -0
- truthound/checkpoint/__init__.py +136 -0
- truthound/checkpoint/actions/__init__.py +164 -0
- truthound/checkpoint/actions/base.py +324 -0
- truthound/checkpoint/actions/custom.py +234 -0
- truthound/checkpoint/actions/discord_notify.py +290 -0
- truthound/checkpoint/actions/email_notify.py +405 -0
- truthound/checkpoint/actions/github_action.py +406 -0
- truthound/checkpoint/actions/opsgenie.py +1499 -0
- truthound/checkpoint/actions/pagerduty.py +226 -0
- truthound/checkpoint/actions/slack_notify.py +233 -0
- truthound/checkpoint/actions/store_result.py +249 -0
- truthound/checkpoint/actions/teams_notify.py +1570 -0
- truthound/checkpoint/actions/telegram_notify.py +419 -0
- truthound/checkpoint/actions/update_docs.py +552 -0
- truthound/checkpoint/actions/webhook.py +293 -0
- truthound/checkpoint/analytics/__init__.py +147 -0
- truthound/checkpoint/analytics/aggregations/__init__.py +23 -0
- truthound/checkpoint/analytics/aggregations/rollup.py +481 -0
- truthound/checkpoint/analytics/aggregations/time_bucket.py +306 -0
- truthound/checkpoint/analytics/analyzers/__init__.py +17 -0
- truthound/checkpoint/analytics/analyzers/anomaly.py +386 -0
- truthound/checkpoint/analytics/analyzers/base.py +270 -0
- truthound/checkpoint/analytics/analyzers/forecast.py +421 -0
- truthound/checkpoint/analytics/analyzers/trend.py +314 -0
- truthound/checkpoint/analytics/models.py +292 -0
- truthound/checkpoint/analytics/protocols.py +549 -0
- truthound/checkpoint/analytics/service.py +718 -0
- truthound/checkpoint/analytics/stores/__init__.py +16 -0
- truthound/checkpoint/analytics/stores/base.py +306 -0
- truthound/checkpoint/analytics/stores/memory_store.py +353 -0
- truthound/checkpoint/analytics/stores/sqlite_store.py +557 -0
- truthound/checkpoint/analytics/stores/timescale_store.py +501 -0
- truthound/checkpoint/async_actions.py +794 -0
- truthound/checkpoint/async_base.py +708 -0
- truthound/checkpoint/async_checkpoint.py +617 -0
- truthound/checkpoint/async_runner.py +639 -0
- truthound/checkpoint/checkpoint.py +527 -0
- truthound/checkpoint/ci/__init__.py +61 -0
- truthound/checkpoint/ci/detector.py +355 -0
- truthound/checkpoint/ci/reporter.py +436 -0
- truthound/checkpoint/ci/templates.py +454 -0
- truthound/checkpoint/circuitbreaker/__init__.py +133 -0
- truthound/checkpoint/circuitbreaker/breaker.py +542 -0
- truthound/checkpoint/circuitbreaker/core.py +252 -0
- truthound/checkpoint/circuitbreaker/detection.py +459 -0
- truthound/checkpoint/circuitbreaker/middleware.py +389 -0
- truthound/checkpoint/circuitbreaker/registry.py +357 -0
- truthound/checkpoint/distributed/__init__.py +139 -0
- truthound/checkpoint/distributed/backends/__init__.py +35 -0
- truthound/checkpoint/distributed/backends/celery_backend.py +503 -0
- truthound/checkpoint/distributed/backends/kubernetes_backend.py +696 -0
- truthound/checkpoint/distributed/backends/local_backend.py +397 -0
- truthound/checkpoint/distributed/backends/ray_backend.py +625 -0
- truthound/checkpoint/distributed/base.py +774 -0
- truthound/checkpoint/distributed/orchestrator.py +765 -0
- truthound/checkpoint/distributed/protocols.py +842 -0
- truthound/checkpoint/distributed/registry.py +449 -0
- truthound/checkpoint/idempotency/__init__.py +120 -0
- truthound/checkpoint/idempotency/core.py +295 -0
- truthound/checkpoint/idempotency/fingerprint.py +454 -0
- truthound/checkpoint/idempotency/locking.py +604 -0
- truthound/checkpoint/idempotency/service.py +592 -0
- truthound/checkpoint/idempotency/stores.py +653 -0
- truthound/checkpoint/monitoring/__init__.py +134 -0
- truthound/checkpoint/monitoring/aggregators/__init__.py +15 -0
- truthound/checkpoint/monitoring/aggregators/base.py +372 -0
- truthound/checkpoint/monitoring/aggregators/realtime.py +300 -0
- truthound/checkpoint/monitoring/aggregators/window.py +493 -0
- truthound/checkpoint/monitoring/collectors/__init__.py +17 -0
- truthound/checkpoint/monitoring/collectors/base.py +257 -0
- truthound/checkpoint/monitoring/collectors/memory_collector.py +617 -0
- truthound/checkpoint/monitoring/collectors/prometheus_collector.py +451 -0
- truthound/checkpoint/monitoring/collectors/redis_collector.py +518 -0
- truthound/checkpoint/monitoring/events.py +410 -0
- truthound/checkpoint/monitoring/protocols.py +636 -0
- truthound/checkpoint/monitoring/service.py +578 -0
- truthound/checkpoint/monitoring/views/__init__.py +17 -0
- truthound/checkpoint/monitoring/views/base.py +172 -0
- truthound/checkpoint/monitoring/views/queue_view.py +220 -0
- truthound/checkpoint/monitoring/views/task_view.py +240 -0
- truthound/checkpoint/monitoring/views/worker_view.py +263 -0
- truthound/checkpoint/registry.py +337 -0
- truthound/checkpoint/runner.py +356 -0
- truthound/checkpoint/transaction/__init__.py +133 -0
- truthound/checkpoint/transaction/base.py +389 -0
- truthound/checkpoint/transaction/compensatable.py +537 -0
- truthound/checkpoint/transaction/coordinator.py +576 -0
- truthound/checkpoint/transaction/executor.py +622 -0
- truthound/checkpoint/transaction/idempotency.py +534 -0
- truthound/checkpoint/transaction/saga/__init__.py +143 -0
- truthound/checkpoint/transaction/saga/builder.py +584 -0
- truthound/checkpoint/transaction/saga/definition.py +515 -0
- truthound/checkpoint/transaction/saga/event_store.py +542 -0
- truthound/checkpoint/transaction/saga/patterns.py +833 -0
- truthound/checkpoint/transaction/saga/runner.py +718 -0
- truthound/checkpoint/transaction/saga/state_machine.py +793 -0
- truthound/checkpoint/transaction/saga/strategies.py +780 -0
- truthound/checkpoint/transaction/saga/testing.py +886 -0
- truthound/checkpoint/triggers/__init__.py +58 -0
- truthound/checkpoint/triggers/base.py +237 -0
- truthound/checkpoint/triggers/event.py +385 -0
- truthound/checkpoint/triggers/schedule.py +355 -0
- truthound/cli.py +2358 -0
- truthound/cli_modules/__init__.py +124 -0
- truthound/cli_modules/advanced/__init__.py +45 -0
- truthound/cli_modules/advanced/benchmark.py +343 -0
- truthound/cli_modules/advanced/docs.py +225 -0
- truthound/cli_modules/advanced/lineage.py +209 -0
- truthound/cli_modules/advanced/ml.py +320 -0
- truthound/cli_modules/advanced/realtime.py +196 -0
- truthound/cli_modules/checkpoint/__init__.py +46 -0
- truthound/cli_modules/checkpoint/init.py +114 -0
- truthound/cli_modules/checkpoint/list.py +71 -0
- truthound/cli_modules/checkpoint/run.py +159 -0
- truthound/cli_modules/checkpoint/validate.py +67 -0
- truthound/cli_modules/common/__init__.py +71 -0
- truthound/cli_modules/common/errors.py +414 -0
- truthound/cli_modules/common/options.py +419 -0
- truthound/cli_modules/common/output.py +507 -0
- truthound/cli_modules/common/protocol.py +552 -0
- truthound/cli_modules/core/__init__.py +48 -0
- truthound/cli_modules/core/check.py +123 -0
- truthound/cli_modules/core/compare.py +104 -0
- truthound/cli_modules/core/learn.py +57 -0
- truthound/cli_modules/core/mask.py +77 -0
- truthound/cli_modules/core/profile.py +65 -0
- truthound/cli_modules/core/scan.py +61 -0
- truthound/cli_modules/profiler/__init__.py +51 -0
- truthound/cli_modules/profiler/auto_profile.py +175 -0
- truthound/cli_modules/profiler/metadata.py +107 -0
- truthound/cli_modules/profiler/suite.py +283 -0
- truthound/cli_modules/registry.py +431 -0
- truthound/cli_modules/scaffolding/__init__.py +89 -0
- truthound/cli_modules/scaffolding/base.py +631 -0
- truthound/cli_modules/scaffolding/commands.py +545 -0
- truthound/cli_modules/scaffolding/plugins.py +1072 -0
- truthound/cli_modules/scaffolding/reporters.py +594 -0
- truthound/cli_modules/scaffolding/validators.py +1127 -0
- truthound/common/__init__.py +18 -0
- truthound/common/resilience/__init__.py +130 -0
- truthound/common/resilience/bulkhead.py +266 -0
- truthound/common/resilience/circuit_breaker.py +516 -0
- truthound/common/resilience/composite.py +332 -0
- truthound/common/resilience/config.py +292 -0
- truthound/common/resilience/protocols.py +217 -0
- truthound/common/resilience/rate_limiter.py +404 -0
- truthound/common/resilience/retry.py +341 -0
- truthound/datadocs/__init__.py +260 -0
- truthound/datadocs/base.py +571 -0
- truthound/datadocs/builder.py +761 -0
- truthound/datadocs/charts.py +764 -0
- truthound/datadocs/dashboard/__init__.py +63 -0
- truthound/datadocs/dashboard/app.py +576 -0
- truthound/datadocs/dashboard/components.py +584 -0
- truthound/datadocs/dashboard/state.py +240 -0
- truthound/datadocs/engine/__init__.py +46 -0
- truthound/datadocs/engine/context.py +376 -0
- truthound/datadocs/engine/pipeline.py +618 -0
- truthound/datadocs/engine/registry.py +469 -0
- truthound/datadocs/exporters/__init__.py +49 -0
- truthound/datadocs/exporters/base.py +198 -0
- truthound/datadocs/exporters/html.py +178 -0
- truthound/datadocs/exporters/json_exporter.py +253 -0
- truthound/datadocs/exporters/markdown.py +284 -0
- truthound/datadocs/exporters/pdf.py +392 -0
- truthound/datadocs/i18n/__init__.py +86 -0
- truthound/datadocs/i18n/catalog.py +960 -0
- truthound/datadocs/i18n/formatting.py +505 -0
- truthound/datadocs/i18n/loader.py +256 -0
- truthound/datadocs/i18n/plurals.py +378 -0
- truthound/datadocs/renderers/__init__.py +42 -0
- truthound/datadocs/renderers/base.py +401 -0
- truthound/datadocs/renderers/custom.py +342 -0
- truthound/datadocs/renderers/jinja.py +697 -0
- truthound/datadocs/sections.py +736 -0
- truthound/datadocs/styles.py +931 -0
- truthound/datadocs/themes/__init__.py +101 -0
- truthound/datadocs/themes/base.py +336 -0
- truthound/datadocs/themes/default.py +417 -0
- truthound/datadocs/themes/enterprise.py +419 -0
- truthound/datadocs/themes/loader.py +336 -0
- truthound/datadocs/themes.py +301 -0
- truthound/datadocs/transformers/__init__.py +57 -0
- truthound/datadocs/transformers/base.py +268 -0
- truthound/datadocs/transformers/enrichers.py +544 -0
- truthound/datadocs/transformers/filters.py +447 -0
- truthound/datadocs/transformers/i18n.py +468 -0
- truthound/datadocs/versioning/__init__.py +62 -0
- truthound/datadocs/versioning/diff.py +639 -0
- truthound/datadocs/versioning/storage.py +497 -0
- truthound/datadocs/versioning/version.py +358 -0
- truthound/datasources/__init__.py +223 -0
- truthound/datasources/_async_protocols.py +222 -0
- truthound/datasources/_protocols.py +159 -0
- truthound/datasources/adapters.py +428 -0
- truthound/datasources/async_base.py +599 -0
- truthound/datasources/async_factory.py +511 -0
- truthound/datasources/base.py +516 -0
- truthound/datasources/factory.py +433 -0
- truthound/datasources/nosql/__init__.py +47 -0
- truthound/datasources/nosql/base.py +487 -0
- truthound/datasources/nosql/elasticsearch.py +801 -0
- truthound/datasources/nosql/mongodb.py +636 -0
- truthound/datasources/pandas_optimized.py +582 -0
- truthound/datasources/pandas_source.py +216 -0
- truthound/datasources/polars_source.py +395 -0
- truthound/datasources/spark_source.py +479 -0
- truthound/datasources/sql/__init__.py +154 -0
- truthound/datasources/sql/base.py +710 -0
- truthound/datasources/sql/bigquery.py +410 -0
- truthound/datasources/sql/cloud_base.py +199 -0
- truthound/datasources/sql/databricks.py +471 -0
- truthound/datasources/sql/mysql.py +316 -0
- truthound/datasources/sql/oracle.py +427 -0
- truthound/datasources/sql/postgresql.py +321 -0
- truthound/datasources/sql/redshift.py +479 -0
- truthound/datasources/sql/snowflake.py +439 -0
- truthound/datasources/sql/sqlite.py +286 -0
- truthound/datasources/sql/sqlserver.py +437 -0
- truthound/datasources/streaming/__init__.py +47 -0
- truthound/datasources/streaming/base.py +350 -0
- truthound/datasources/streaming/kafka.py +670 -0
- truthound/decorators.py +98 -0
- truthound/docs/__init__.py +69 -0
- truthound/docs/extractor.py +971 -0
- truthound/docs/generator.py +601 -0
- truthound/docs/parser.py +1037 -0
- truthound/docs/renderer.py +999 -0
- truthound/drift/__init__.py +22 -0
- truthound/drift/compare.py +189 -0
- truthound/drift/detectors.py +464 -0
- truthound/drift/report.py +160 -0
- truthound/execution/__init__.py +65 -0
- truthound/execution/_protocols.py +324 -0
- truthound/execution/base.py +576 -0
- truthound/execution/distributed/__init__.py +179 -0
- truthound/execution/distributed/aggregations.py +731 -0
- truthound/execution/distributed/arrow_bridge.py +817 -0
- truthound/execution/distributed/base.py +550 -0
- truthound/execution/distributed/dask_engine.py +976 -0
- truthound/execution/distributed/mixins.py +766 -0
- truthound/execution/distributed/protocols.py +756 -0
- truthound/execution/distributed/ray_engine.py +1127 -0
- truthound/execution/distributed/registry.py +446 -0
- truthound/execution/distributed/spark_engine.py +1011 -0
- truthound/execution/distributed/validator_adapter.py +682 -0
- truthound/execution/pandas_engine.py +401 -0
- truthound/execution/polars_engine.py +497 -0
- truthound/execution/pushdown/__init__.py +230 -0
- truthound/execution/pushdown/ast.py +1550 -0
- truthound/execution/pushdown/builder.py +1550 -0
- truthound/execution/pushdown/dialects.py +1072 -0
- truthound/execution/pushdown/executor.py +829 -0
- truthound/execution/pushdown/optimizer.py +1041 -0
- truthound/execution/sql_engine.py +518 -0
- truthound/infrastructure/__init__.py +189 -0
- truthound/infrastructure/audit.py +1515 -0
- truthound/infrastructure/config.py +1133 -0
- truthound/infrastructure/encryption.py +1132 -0
- truthound/infrastructure/logging.py +1503 -0
- truthound/infrastructure/metrics.py +1220 -0
- truthound/lineage/__init__.py +89 -0
- truthound/lineage/base.py +746 -0
- truthound/lineage/impact_analysis.py +474 -0
- truthound/lineage/integrations/__init__.py +22 -0
- truthound/lineage/integrations/openlineage.py +548 -0
- truthound/lineage/tracker.py +512 -0
- truthound/lineage/visualization/__init__.py +33 -0
- truthound/lineage/visualization/protocols.py +145 -0
- truthound/lineage/visualization/renderers/__init__.py +20 -0
- truthound/lineage/visualization/renderers/cytoscape.py +329 -0
- truthound/lineage/visualization/renderers/d3.py +331 -0
- truthound/lineage/visualization/renderers/graphviz.py +276 -0
- truthound/lineage/visualization/renderers/mermaid.py +308 -0
- truthound/maskers.py +113 -0
- truthound/ml/__init__.py +124 -0
- truthound/ml/anomaly_models/__init__.py +31 -0
- truthound/ml/anomaly_models/ensemble.py +362 -0
- truthound/ml/anomaly_models/isolation_forest.py +444 -0
- truthound/ml/anomaly_models/statistical.py +392 -0
- truthound/ml/base.py +1178 -0
- truthound/ml/drift_detection/__init__.py +26 -0
- truthound/ml/drift_detection/concept.py +381 -0
- truthound/ml/drift_detection/distribution.py +361 -0
- truthound/ml/drift_detection/feature.py +442 -0
- truthound/ml/drift_detection/multivariate.py +495 -0
- truthound/ml/monitoring/__init__.py +88 -0
- truthound/ml/monitoring/alerting/__init__.py +33 -0
- truthound/ml/monitoring/alerting/handlers.py +427 -0
- truthound/ml/monitoring/alerting/rules.py +508 -0
- truthound/ml/monitoring/collectors/__init__.py +19 -0
- truthound/ml/monitoring/collectors/composite.py +105 -0
- truthound/ml/monitoring/collectors/drift.py +324 -0
- truthound/ml/monitoring/collectors/performance.py +179 -0
- truthound/ml/monitoring/collectors/quality.py +369 -0
- truthound/ml/monitoring/monitor.py +536 -0
- truthound/ml/monitoring/protocols.py +451 -0
- truthound/ml/monitoring/stores/__init__.py +15 -0
- truthound/ml/monitoring/stores/memory.py +201 -0
- truthound/ml/monitoring/stores/prometheus.py +296 -0
- truthound/ml/rule_learning/__init__.py +25 -0
- truthound/ml/rule_learning/constraint_miner.py +443 -0
- truthound/ml/rule_learning/pattern_learner.py +499 -0
- truthound/ml/rule_learning/profile_learner.py +462 -0
- truthound/multitenancy/__init__.py +326 -0
- truthound/multitenancy/core.py +852 -0
- truthound/multitenancy/integration.py +597 -0
- truthound/multitenancy/isolation.py +630 -0
- truthound/multitenancy/manager.py +770 -0
- truthound/multitenancy/middleware.py +765 -0
- truthound/multitenancy/quota.py +537 -0
- truthound/multitenancy/resolvers.py +603 -0
- truthound/multitenancy/storage.py +703 -0
- truthound/observability/__init__.py +307 -0
- truthound/observability/context.py +531 -0
- truthound/observability/instrumentation.py +611 -0
- truthound/observability/logging.py +887 -0
- truthound/observability/metrics.py +1157 -0
- truthound/observability/tracing/__init__.py +178 -0
- truthound/observability/tracing/baggage.py +310 -0
- truthound/observability/tracing/config.py +426 -0
- truthound/observability/tracing/exporter.py +787 -0
- truthound/observability/tracing/integration.py +1018 -0
- truthound/observability/tracing/otel/__init__.py +146 -0
- truthound/observability/tracing/otel/adapter.py +982 -0
- truthound/observability/tracing/otel/bridge.py +1177 -0
- truthound/observability/tracing/otel/compat.py +681 -0
- truthound/observability/tracing/otel/config.py +691 -0
- truthound/observability/tracing/otel/detection.py +327 -0
- truthound/observability/tracing/otel/protocols.py +426 -0
- truthound/observability/tracing/processor.py +561 -0
- truthound/observability/tracing/propagator.py +757 -0
- truthound/observability/tracing/provider.py +569 -0
- truthound/observability/tracing/resource.py +515 -0
- truthound/observability/tracing/sampler.py +487 -0
- truthound/observability/tracing/span.py +676 -0
- truthound/plugins/__init__.py +198 -0
- truthound/plugins/base.py +599 -0
- truthound/plugins/cli.py +680 -0
- truthound/plugins/dependencies/__init__.py +42 -0
- truthound/plugins/dependencies/graph.py +422 -0
- truthound/plugins/dependencies/resolver.py +417 -0
- truthound/plugins/discovery.py +379 -0
- truthound/plugins/docs/__init__.py +46 -0
- truthound/plugins/docs/extractor.py +444 -0
- truthound/plugins/docs/renderer.py +499 -0
- truthound/plugins/enterprise_manager.py +877 -0
- truthound/plugins/examples/__init__.py +19 -0
- truthound/plugins/examples/custom_validators.py +317 -0
- truthound/plugins/examples/slack_notifier.py +312 -0
- truthound/plugins/examples/xml_reporter.py +254 -0
- truthound/plugins/hooks.py +558 -0
- truthound/plugins/lifecycle/__init__.py +43 -0
- truthound/plugins/lifecycle/hot_reload.py +402 -0
- truthound/plugins/lifecycle/manager.py +371 -0
- truthound/plugins/manager.py +736 -0
- truthound/plugins/registry.py +338 -0
- truthound/plugins/security/__init__.py +93 -0
- truthound/plugins/security/exceptions.py +332 -0
- truthound/plugins/security/policies.py +348 -0
- truthound/plugins/security/protocols.py +643 -0
- truthound/plugins/security/sandbox/__init__.py +45 -0
- truthound/plugins/security/sandbox/context.py +158 -0
- truthound/plugins/security/sandbox/engines/__init__.py +19 -0
- truthound/plugins/security/sandbox/engines/container.py +379 -0
- truthound/plugins/security/sandbox/engines/noop.py +144 -0
- truthound/plugins/security/sandbox/engines/process.py +336 -0
- truthound/plugins/security/sandbox/factory.py +211 -0
- truthound/plugins/security/signing/__init__.py +57 -0
- truthound/plugins/security/signing/service.py +330 -0
- truthound/plugins/security/signing/trust_store.py +368 -0
- truthound/plugins/security/signing/verifier.py +459 -0
- truthound/plugins/versioning/__init__.py +41 -0
- truthound/plugins/versioning/constraints.py +297 -0
- truthound/plugins/versioning/resolver.py +329 -0
- truthound/profiler/__init__.py +1729 -0
- truthound/profiler/_lazy.py +452 -0
- truthound/profiler/ab_testing/__init__.py +80 -0
- truthound/profiler/ab_testing/analysis.py +449 -0
- truthound/profiler/ab_testing/base.py +257 -0
- truthound/profiler/ab_testing/experiment.py +395 -0
- truthound/profiler/ab_testing/tracking.py +368 -0
- truthound/profiler/auto_threshold.py +1170 -0
- truthound/profiler/base.py +579 -0
- truthound/profiler/cache_patterns.py +911 -0
- truthound/profiler/caching.py +1303 -0
- truthound/profiler/column_profiler.py +712 -0
- truthound/profiler/comparison.py +1007 -0
- truthound/profiler/custom_patterns.py +1170 -0
- truthound/profiler/dashboard/__init__.py +50 -0
- truthound/profiler/dashboard/app.py +476 -0
- truthound/profiler/dashboard/components.py +457 -0
- truthound/profiler/dashboard/config.py +72 -0
- truthound/profiler/distributed/__init__.py +83 -0
- truthound/profiler/distributed/base.py +281 -0
- truthound/profiler/distributed/dask_backend.py +498 -0
- truthound/profiler/distributed/local_backend.py +293 -0
- truthound/profiler/distributed/profiler.py +304 -0
- truthound/profiler/distributed/ray_backend.py +374 -0
- truthound/profiler/distributed/spark_backend.py +375 -0
- truthound/profiler/distributed.py +1366 -0
- truthound/profiler/enterprise_sampling.py +1065 -0
- truthound/profiler/errors.py +488 -0
- truthound/profiler/evolution/__init__.py +91 -0
- truthound/profiler/evolution/alerts.py +426 -0
- truthound/profiler/evolution/changes.py +206 -0
- truthound/profiler/evolution/compatibility.py +365 -0
- truthound/profiler/evolution/detector.py +372 -0
- truthound/profiler/evolution/protocols.py +121 -0
- truthound/profiler/generators/__init__.py +48 -0
- truthound/profiler/generators/base.py +384 -0
- truthound/profiler/generators/ml_rules.py +375 -0
- truthound/profiler/generators/pattern_rules.py +384 -0
- truthound/profiler/generators/schema_rules.py +267 -0
- truthound/profiler/generators/stats_rules.py +324 -0
- truthound/profiler/generators/suite_generator.py +857 -0
- truthound/profiler/i18n.py +1542 -0
- truthound/profiler/incremental.py +554 -0
- truthound/profiler/incremental_validation.py +1710 -0
- truthound/profiler/integration/__init__.py +73 -0
- truthound/profiler/integration/adapters.py +345 -0
- truthound/profiler/integration/context.py +371 -0
- truthound/profiler/integration/executor.py +527 -0
- truthound/profiler/integration/naming.py +75 -0
- truthound/profiler/integration/protocols.py +243 -0
- truthound/profiler/memory.py +1185 -0
- truthound/profiler/migration/__init__.py +60 -0
- truthound/profiler/migration/base.py +345 -0
- truthound/profiler/migration/manager.py +444 -0
- truthound/profiler/migration/v1_0_to_v1_1.py +484 -0
- truthound/profiler/ml/__init__.py +73 -0
- truthound/profiler/ml/base.py +244 -0
- truthound/profiler/ml/classifier.py +507 -0
- truthound/profiler/ml/feature_extraction.py +604 -0
- truthound/profiler/ml/pretrained.py +448 -0
- truthound/profiler/ml_inference.py +1276 -0
- truthound/profiler/native_patterns.py +815 -0
- truthound/profiler/observability.py +1184 -0
- truthound/profiler/process_timeout.py +1566 -0
- truthound/profiler/progress.py +568 -0
- truthound/profiler/progress_callbacks.py +1734 -0
- truthound/profiler/quality.py +1345 -0
- truthound/profiler/resilience.py +1180 -0
- truthound/profiler/sampled_matcher.py +794 -0
- truthound/profiler/sampling.py +1288 -0
- truthound/profiler/scheduling/__init__.py +82 -0
- truthound/profiler/scheduling/protocols.py +214 -0
- truthound/profiler/scheduling/scheduler.py +474 -0
- truthound/profiler/scheduling/storage.py +457 -0
- truthound/profiler/scheduling/triggers.py +449 -0
- truthound/profiler/schema.py +603 -0
- truthound/profiler/streaming.py +685 -0
- truthound/profiler/streaming_patterns.py +1354 -0
- truthound/profiler/suite_cli.py +625 -0
- truthound/profiler/suite_config.py +789 -0
- truthound/profiler/suite_export.py +1268 -0
- truthound/profiler/table_profiler.py +547 -0
- truthound/profiler/timeout.py +565 -0
- truthound/profiler/validation.py +1532 -0
- truthound/profiler/visualization/__init__.py +118 -0
- truthound/profiler/visualization/base.py +346 -0
- truthound/profiler/visualization/generator.py +1259 -0
- truthound/profiler/visualization/plotly_renderer.py +811 -0
- truthound/profiler/visualization/renderers.py +669 -0
- truthound/profiler/visualization/sections.py +540 -0
- truthound/profiler/visualization.py +2122 -0
- truthound/profiler/yaml_validation.py +1151 -0
- truthound/py.typed +0 -0
- truthound/ratelimit/__init__.py +248 -0
- truthound/ratelimit/algorithms.py +1108 -0
- truthound/ratelimit/core.py +573 -0
- truthound/ratelimit/integration.py +532 -0
- truthound/ratelimit/limiter.py +663 -0
- truthound/ratelimit/middleware.py +700 -0
- truthound/ratelimit/policy.py +792 -0
- truthound/ratelimit/storage.py +763 -0
- truthound/rbac/__init__.py +340 -0
- truthound/rbac/core.py +976 -0
- truthound/rbac/integration.py +760 -0
- truthound/rbac/manager.py +1052 -0
- truthound/rbac/middleware.py +842 -0
- truthound/rbac/policy.py +954 -0
- truthound/rbac/storage.py +878 -0
- truthound/realtime/__init__.py +141 -0
- truthound/realtime/adapters/__init__.py +43 -0
- truthound/realtime/adapters/base.py +533 -0
- truthound/realtime/adapters/kafka.py +487 -0
- truthound/realtime/adapters/kinesis.py +479 -0
- truthound/realtime/adapters/mock.py +243 -0
- truthound/realtime/base.py +553 -0
- truthound/realtime/factory.py +382 -0
- truthound/realtime/incremental.py +660 -0
- truthound/realtime/processing/__init__.py +67 -0
- truthound/realtime/processing/exactly_once.py +575 -0
- truthound/realtime/processing/state.py +547 -0
- truthound/realtime/processing/windows.py +647 -0
- truthound/realtime/protocols.py +569 -0
- truthound/realtime/streaming.py +605 -0
- truthound/realtime/testing/__init__.py +32 -0
- truthound/realtime/testing/containers.py +615 -0
- truthound/realtime/testing/fixtures.py +484 -0
- truthound/report.py +280 -0
- truthound/reporters/__init__.py +46 -0
- truthound/reporters/_protocols.py +30 -0
- truthound/reporters/base.py +324 -0
- truthound/reporters/ci/__init__.py +66 -0
- truthound/reporters/ci/azure.py +436 -0
- truthound/reporters/ci/base.py +509 -0
- truthound/reporters/ci/bitbucket.py +567 -0
- truthound/reporters/ci/circleci.py +547 -0
- truthound/reporters/ci/detection.py +364 -0
- truthound/reporters/ci/factory.py +182 -0
- truthound/reporters/ci/github.py +388 -0
- truthound/reporters/ci/gitlab.py +471 -0
- truthound/reporters/ci/jenkins.py +525 -0
- truthound/reporters/console_reporter.py +299 -0
- truthound/reporters/factory.py +211 -0
- truthound/reporters/html_reporter.py +524 -0
- truthound/reporters/json_reporter.py +256 -0
- truthound/reporters/markdown_reporter.py +280 -0
- truthound/reporters/sdk/__init__.py +174 -0
- truthound/reporters/sdk/builder.py +558 -0
- truthound/reporters/sdk/mixins.py +1150 -0
- truthound/reporters/sdk/schema.py +1493 -0
- truthound/reporters/sdk/templates.py +666 -0
- truthound/reporters/sdk/testing.py +968 -0
- truthound/scanners.py +170 -0
- truthound/scheduling/__init__.py +122 -0
- truthound/scheduling/cron.py +1136 -0
- truthound/scheduling/presets.py +212 -0
- truthound/schema.py +275 -0
- truthound/secrets/__init__.py +173 -0
- truthound/secrets/base.py +618 -0
- truthound/secrets/cloud.py +682 -0
- truthound/secrets/integration.py +507 -0
- truthound/secrets/manager.py +633 -0
- truthound/secrets/oidc/__init__.py +172 -0
- truthound/secrets/oidc/base.py +902 -0
- truthound/secrets/oidc/credential_provider.py +623 -0
- truthound/secrets/oidc/exchangers.py +1001 -0
- truthound/secrets/oidc/github/__init__.py +110 -0
- truthound/secrets/oidc/github/claims.py +718 -0
- truthound/secrets/oidc/github/enhanced_provider.py +693 -0
- truthound/secrets/oidc/github/trust_policy.py +742 -0
- truthound/secrets/oidc/github/verification.py +723 -0
- truthound/secrets/oidc/github/workflow.py +691 -0
- truthound/secrets/oidc/providers.py +825 -0
- truthound/secrets/providers.py +506 -0
- truthound/secrets/resolver.py +495 -0
- truthound/stores/__init__.py +177 -0
- truthound/stores/backends/__init__.py +18 -0
- truthound/stores/backends/_protocols.py +340 -0
- truthound/stores/backends/azure_blob.py +530 -0
- truthound/stores/backends/concurrent_filesystem.py +915 -0
- truthound/stores/backends/connection_pool.py +1365 -0
- truthound/stores/backends/database.py +743 -0
- truthound/stores/backends/filesystem.py +538 -0
- truthound/stores/backends/gcs.py +399 -0
- truthound/stores/backends/memory.py +354 -0
- truthound/stores/backends/s3.py +434 -0
- truthound/stores/backpressure/__init__.py +84 -0
- truthound/stores/backpressure/base.py +375 -0
- truthound/stores/backpressure/circuit_breaker.py +434 -0
- truthound/stores/backpressure/monitor.py +376 -0
- truthound/stores/backpressure/strategies.py +677 -0
- truthound/stores/base.py +551 -0
- truthound/stores/batching/__init__.py +65 -0
- truthound/stores/batching/base.py +305 -0
- truthound/stores/batching/buffer.py +370 -0
- truthound/stores/batching/store.py +248 -0
- truthound/stores/batching/writer.py +521 -0
- truthound/stores/caching/__init__.py +60 -0
- truthound/stores/caching/backends.py +684 -0
- truthound/stores/caching/base.py +356 -0
- truthound/stores/caching/store.py +305 -0
- truthound/stores/compression/__init__.py +193 -0
- truthound/stores/compression/adaptive.py +694 -0
- truthound/stores/compression/base.py +514 -0
- truthound/stores/compression/pipeline.py +868 -0
- truthound/stores/compression/providers.py +672 -0
- truthound/stores/compression/streaming.py +832 -0
- truthound/stores/concurrency/__init__.py +81 -0
- truthound/stores/concurrency/atomic.py +556 -0
- truthound/stores/concurrency/index.py +775 -0
- truthound/stores/concurrency/locks.py +576 -0
- truthound/stores/concurrency/manager.py +482 -0
- truthound/stores/encryption/__init__.py +297 -0
- truthound/stores/encryption/base.py +952 -0
- truthound/stores/encryption/keys.py +1191 -0
- truthound/stores/encryption/pipeline.py +903 -0
- truthound/stores/encryption/providers.py +953 -0
- truthound/stores/encryption/streaming.py +950 -0
- truthound/stores/expectations.py +227 -0
- truthound/stores/factory.py +246 -0
- truthound/stores/migration/__init__.py +75 -0
- truthound/stores/migration/base.py +480 -0
- truthound/stores/migration/manager.py +347 -0
- truthound/stores/migration/registry.py +382 -0
- truthound/stores/migration/store.py +559 -0
- truthound/stores/observability/__init__.py +106 -0
- truthound/stores/observability/audit.py +718 -0
- truthound/stores/observability/config.py +270 -0
- truthound/stores/observability/factory.py +208 -0
- truthound/stores/observability/metrics.py +636 -0
- truthound/stores/observability/protocols.py +410 -0
- truthound/stores/observability/store.py +570 -0
- truthound/stores/observability/tracing.py +784 -0
- truthound/stores/replication/__init__.py +76 -0
- truthound/stores/replication/base.py +260 -0
- truthound/stores/replication/monitor.py +269 -0
- truthound/stores/replication/store.py +439 -0
- truthound/stores/replication/syncer.py +391 -0
- truthound/stores/results.py +359 -0
- truthound/stores/retention/__init__.py +77 -0
- truthound/stores/retention/base.py +378 -0
- truthound/stores/retention/policies.py +621 -0
- truthound/stores/retention/scheduler.py +279 -0
- truthound/stores/retention/store.py +526 -0
- truthound/stores/streaming/__init__.py +138 -0
- truthound/stores/streaming/base.py +801 -0
- truthound/stores/streaming/database.py +984 -0
- truthound/stores/streaming/filesystem.py +719 -0
- truthound/stores/streaming/reader.py +629 -0
- truthound/stores/streaming/s3.py +843 -0
- truthound/stores/streaming/writer.py +790 -0
- truthound/stores/tiering/__init__.py +108 -0
- truthound/stores/tiering/base.py +462 -0
- truthound/stores/tiering/manager.py +249 -0
- truthound/stores/tiering/policies.py +692 -0
- truthound/stores/tiering/store.py +526 -0
- truthound/stores/versioning/__init__.py +56 -0
- truthound/stores/versioning/base.py +376 -0
- truthound/stores/versioning/store.py +660 -0
- truthound/stores/versioning/strategies.py +353 -0
- truthound/types.py +56 -0
- truthound/validators/__init__.py +774 -0
- truthound/validators/aggregate/__init__.py +27 -0
- truthound/validators/aggregate/central.py +116 -0
- truthound/validators/aggregate/extremes.py +116 -0
- truthound/validators/aggregate/spread.py +118 -0
- truthound/validators/aggregate/sum.py +64 -0
- truthound/validators/aggregate/type.py +78 -0
- truthound/validators/anomaly/__init__.py +93 -0
- truthound/validators/anomaly/base.py +431 -0
- truthound/validators/anomaly/ml_based.py +1190 -0
- truthound/validators/anomaly/multivariate.py +647 -0
- truthound/validators/anomaly/statistical.py +599 -0
- truthound/validators/base.py +1089 -0
- truthound/validators/business_rule/__init__.py +46 -0
- truthound/validators/business_rule/base.py +147 -0
- truthound/validators/business_rule/checksum.py +509 -0
- truthound/validators/business_rule/financial.py +526 -0
- truthound/validators/cache.py +733 -0
- truthound/validators/completeness/__init__.py +39 -0
- truthound/validators/completeness/conditional.py +73 -0
- truthound/validators/completeness/default.py +98 -0
- truthound/validators/completeness/empty.py +103 -0
- truthound/validators/completeness/nan.py +337 -0
- truthound/validators/completeness/null.py +152 -0
- truthound/validators/cross_table/__init__.py +17 -0
- truthound/validators/cross_table/aggregate.py +333 -0
- truthound/validators/cross_table/row_count.py +122 -0
- truthound/validators/datetime/__init__.py +29 -0
- truthound/validators/datetime/format.py +78 -0
- truthound/validators/datetime/freshness.py +269 -0
- truthound/validators/datetime/order.py +73 -0
- truthound/validators/datetime/parseable.py +185 -0
- truthound/validators/datetime/range.py +202 -0
- truthound/validators/datetime/timezone.py +69 -0
- truthound/validators/distribution/__init__.py +49 -0
- truthound/validators/distribution/distribution.py +128 -0
- truthound/validators/distribution/monotonic.py +119 -0
- truthound/validators/distribution/outlier.py +178 -0
- truthound/validators/distribution/quantile.py +80 -0
- truthound/validators/distribution/range.py +254 -0
- truthound/validators/distribution/set.py +125 -0
- truthound/validators/distribution/statistical.py +459 -0
- truthound/validators/drift/__init__.py +79 -0
- truthound/validators/drift/base.py +427 -0
- truthound/validators/drift/multi_feature.py +401 -0
- truthound/validators/drift/numeric.py +395 -0
- truthound/validators/drift/psi.py +446 -0
- truthound/validators/drift/statistical.py +510 -0
- truthound/validators/enterprise.py +1658 -0
- truthound/validators/geospatial/__init__.py +80 -0
- truthound/validators/geospatial/base.py +97 -0
- truthound/validators/geospatial/boundary.py +238 -0
- truthound/validators/geospatial/coordinate.py +351 -0
- truthound/validators/geospatial/distance.py +399 -0
- truthound/validators/geospatial/polygon.py +665 -0
- truthound/validators/i18n/__init__.py +308 -0
- truthound/validators/i18n/bidi.py +571 -0
- truthound/validators/i18n/catalogs.py +570 -0
- truthound/validators/i18n/dialects.py +763 -0
- truthound/validators/i18n/extended_catalogs.py +549 -0
- truthound/validators/i18n/formatting.py +1434 -0
- truthound/validators/i18n/loader.py +1020 -0
- truthound/validators/i18n/messages.py +521 -0
- truthound/validators/i18n/plural.py +683 -0
- truthound/validators/i18n/protocols.py +855 -0
- truthound/validators/i18n/tms.py +1162 -0
- truthound/validators/localization/__init__.py +53 -0
- truthound/validators/localization/base.py +122 -0
- truthound/validators/localization/chinese.py +362 -0
- truthound/validators/localization/japanese.py +275 -0
- truthound/validators/localization/korean.py +524 -0
- truthound/validators/memory/__init__.py +94 -0
- truthound/validators/memory/approximate_knn.py +506 -0
- truthound/validators/memory/base.py +547 -0
- truthound/validators/memory/sgd_online.py +719 -0
- truthound/validators/memory/streaming_ecdf.py +753 -0
- truthound/validators/ml_feature/__init__.py +54 -0
- truthound/validators/ml_feature/base.py +249 -0
- truthound/validators/ml_feature/correlation.py +299 -0
- truthound/validators/ml_feature/leakage.py +344 -0
- truthound/validators/ml_feature/null_impact.py +270 -0
- truthound/validators/ml_feature/scale.py +264 -0
- truthound/validators/multi_column/__init__.py +89 -0
- truthound/validators/multi_column/arithmetic.py +284 -0
- truthound/validators/multi_column/base.py +231 -0
- truthound/validators/multi_column/comparison.py +273 -0
- truthound/validators/multi_column/consistency.py +312 -0
- truthound/validators/multi_column/statistical.py +299 -0
- truthound/validators/optimization/__init__.py +164 -0
- truthound/validators/optimization/aggregation.py +563 -0
- truthound/validators/optimization/covariance.py +556 -0
- truthound/validators/optimization/geo.py +626 -0
- truthound/validators/optimization/graph.py +587 -0
- truthound/validators/optimization/orchestrator.py +970 -0
- truthound/validators/optimization/profiling.py +1312 -0
- truthound/validators/privacy/__init__.py +223 -0
- truthound/validators/privacy/base.py +635 -0
- truthound/validators/privacy/ccpa.py +670 -0
- truthound/validators/privacy/gdpr.py +728 -0
- truthound/validators/privacy/global_patterns.py +604 -0
- truthound/validators/privacy/plugins.py +867 -0
- truthound/validators/profiling/__init__.py +52 -0
- truthound/validators/profiling/base.py +175 -0
- truthound/validators/profiling/cardinality.py +312 -0
- truthound/validators/profiling/entropy.py +391 -0
- truthound/validators/profiling/frequency.py +455 -0
- truthound/validators/pushdown_support.py +660 -0
- truthound/validators/query/__init__.py +91 -0
- truthound/validators/query/aggregate.py +346 -0
- truthound/validators/query/base.py +246 -0
- truthound/validators/query/column.py +249 -0
- truthound/validators/query/expression.py +274 -0
- truthound/validators/query/result.py +323 -0
- truthound/validators/query/row_count.py +264 -0
- truthound/validators/referential/__init__.py +80 -0
- truthound/validators/referential/base.py +395 -0
- truthound/validators/referential/cascade.py +391 -0
- truthound/validators/referential/circular.py +563 -0
- truthound/validators/referential/foreign_key.py +624 -0
- truthound/validators/referential/orphan.py +485 -0
- truthound/validators/registry.py +112 -0
- truthound/validators/schema/__init__.py +41 -0
- truthound/validators/schema/column_count.py +142 -0
- truthound/validators/schema/column_exists.py +80 -0
- truthound/validators/schema/column_order.py +82 -0
- truthound/validators/schema/column_pair.py +85 -0
- truthound/validators/schema/column_pair_set.py +195 -0
- truthound/validators/schema/column_type.py +94 -0
- truthound/validators/schema/multi_column.py +53 -0
- truthound/validators/schema/multi_column_aggregate.py +175 -0
- truthound/validators/schema/referential.py +274 -0
- truthound/validators/schema/table_schema.py +91 -0
- truthound/validators/schema_validator.py +219 -0
- truthound/validators/sdk/__init__.py +250 -0
- truthound/validators/sdk/builder.py +680 -0
- truthound/validators/sdk/decorators.py +474 -0
- truthound/validators/sdk/enterprise/__init__.py +211 -0
- truthound/validators/sdk/enterprise/docs.py +725 -0
- truthound/validators/sdk/enterprise/fuzzing.py +659 -0
- truthound/validators/sdk/enterprise/licensing.py +709 -0
- truthound/validators/sdk/enterprise/manager.py +543 -0
- truthound/validators/sdk/enterprise/resources.py +628 -0
- truthound/validators/sdk/enterprise/sandbox.py +766 -0
- truthound/validators/sdk/enterprise/signing.py +603 -0
- truthound/validators/sdk/enterprise/templates.py +865 -0
- truthound/validators/sdk/enterprise/versioning.py +659 -0
- truthound/validators/sdk/templates.py +757 -0
- truthound/validators/sdk/testing.py +807 -0
- truthound/validators/security/__init__.py +181 -0
- truthound/validators/security/redos/__init__.py +182 -0
- truthound/validators/security/redos/core.py +861 -0
- truthound/validators/security/redos/cpu_monitor.py +593 -0
- truthound/validators/security/redos/cve_database.py +791 -0
- truthound/validators/security/redos/ml/__init__.py +155 -0
- truthound/validators/security/redos/ml/base.py +785 -0
- truthound/validators/security/redos/ml/datasets.py +618 -0
- truthound/validators/security/redos/ml/features.py +359 -0
- truthound/validators/security/redos/ml/models.py +1000 -0
- truthound/validators/security/redos/ml/predictor.py +507 -0
- truthound/validators/security/redos/ml/storage.py +632 -0
- truthound/validators/security/redos/ml/training.py +571 -0
- truthound/validators/security/redos/ml_analyzer.py +937 -0
- truthound/validators/security/redos/optimizer.py +674 -0
- truthound/validators/security/redos/profiler.py +682 -0
- truthound/validators/security/redos/re2_engine.py +709 -0
- truthound/validators/security/redos.py +886 -0
- truthound/validators/security/sql_security.py +1247 -0
- truthound/validators/streaming/__init__.py +126 -0
- truthound/validators/streaming/base.py +292 -0
- truthound/validators/streaming/completeness.py +210 -0
- truthound/validators/streaming/mixin.py +575 -0
- truthound/validators/streaming/range.py +308 -0
- truthound/validators/streaming/sources.py +846 -0
- truthound/validators/string/__init__.py +57 -0
- truthound/validators/string/casing.py +158 -0
- truthound/validators/string/charset.py +96 -0
- truthound/validators/string/format.py +501 -0
- truthound/validators/string/json.py +77 -0
- truthound/validators/string/json_schema.py +184 -0
- truthound/validators/string/length.py +104 -0
- truthound/validators/string/like_pattern.py +237 -0
- truthound/validators/string/regex.py +202 -0
- truthound/validators/string/regex_extended.py +435 -0
- truthound/validators/table/__init__.py +88 -0
- truthound/validators/table/base.py +78 -0
- truthound/validators/table/column_count.py +198 -0
- truthound/validators/table/freshness.py +362 -0
- truthound/validators/table/row_count.py +251 -0
- truthound/validators/table/schema.py +333 -0
- truthound/validators/table/size.py +285 -0
- truthound/validators/timeout/__init__.py +102 -0
- truthound/validators/timeout/advanced/__init__.py +247 -0
- truthound/validators/timeout/advanced/circuit_breaker.py +675 -0
- truthound/validators/timeout/advanced/prediction.py +773 -0
- truthound/validators/timeout/advanced/priority.py +618 -0
- truthound/validators/timeout/advanced/redis_backend.py +770 -0
- truthound/validators/timeout/advanced/retry.py +721 -0
- truthound/validators/timeout/advanced/sampling.py +788 -0
- truthound/validators/timeout/advanced/sla.py +661 -0
- truthound/validators/timeout/advanced/telemetry.py +804 -0
- truthound/validators/timeout/cascade.py +477 -0
- truthound/validators/timeout/deadline.py +657 -0
- truthound/validators/timeout/degradation.py +525 -0
- truthound/validators/timeout/distributed.py +597 -0
- truthound/validators/timeseries/__init__.py +89 -0
- truthound/validators/timeseries/base.py +326 -0
- truthound/validators/timeseries/completeness.py +617 -0
- truthound/validators/timeseries/gap.py +485 -0
- truthound/validators/timeseries/monotonic.py +310 -0
- truthound/validators/timeseries/seasonality.py +422 -0
- truthound/validators/timeseries/trend.py +510 -0
- truthound/validators/uniqueness/__init__.py +59 -0
- truthound/validators/uniqueness/approximate.py +475 -0
- truthound/validators/uniqueness/distinct_values.py +253 -0
- truthound/validators/uniqueness/duplicate.py +118 -0
- truthound/validators/uniqueness/primary_key.py +140 -0
- truthound/validators/uniqueness/unique.py +191 -0
- truthound/validators/uniqueness/within_record.py +599 -0
- truthound/validators/utils.py +756 -0
- truthound-1.0.8.dist-info/METADATA +474 -0
- truthound-1.0.8.dist-info/RECORD +877 -0
- truthound-1.0.8.dist-info/WHEEL +4 -0
- truthound-1.0.8.dist-info/entry_points.txt +2 -0
- truthound-1.0.8.dist-info/licenses/LICENSE +190 -0
|
@@ -0,0 +1,1185 @@
|
|
|
1
|
+
"""Memory usage monitoring and OOM prevention.
|
|
2
|
+
|
|
3
|
+
This module provides comprehensive memory monitoring capabilities:
|
|
4
|
+
- Real-time memory usage tracking
|
|
5
|
+
- OOM (Out of Memory) risk detection and prevention
|
|
6
|
+
- Memory-aware batch processing
|
|
7
|
+
- Memory profiling for optimization
|
|
8
|
+
- Automatic memory cleanup triggers
|
|
9
|
+
|
|
10
|
+
Key features:
|
|
11
|
+
- psutil-based memory tracking
|
|
12
|
+
- Configurable thresholds and alerts
|
|
13
|
+
- Context managers for scoped monitoring
|
|
14
|
+
- Integration with profiling operations
|
|
15
|
+
- Memory leak detection
|
|
16
|
+
|
|
17
|
+
Example:
|
|
18
|
+
from truthound.profiler.memory import (
|
|
19
|
+
MemoryMonitor,
|
|
20
|
+
memory_guard,
|
|
21
|
+
MemoryTracker,
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
# Simple usage with context manager
|
|
25
|
+
with memory_guard(max_memory_mb=1024):
|
|
26
|
+
process_large_dataset(data)
|
|
27
|
+
|
|
28
|
+
# Detailed monitoring
|
|
29
|
+
monitor = MemoryMonitor(threshold_percent=80)
|
|
30
|
+
monitor.start()
|
|
31
|
+
|
|
32
|
+
for batch in data_batches:
|
|
33
|
+
if monitor.is_critical():
|
|
34
|
+
break
|
|
35
|
+
process_batch(batch)
|
|
36
|
+
|
|
37
|
+
report = monitor.stop()
|
|
38
|
+
print(f"Peak memory: {report.peak_mb:.1f} MB")
|
|
39
|
+
"""
|
|
40
|
+
|
|
41
|
+
from __future__ import annotations
|
|
42
|
+
|
|
43
|
+
import gc
|
|
44
|
+
import os
|
|
45
|
+
import sys
|
|
46
|
+
import threading
|
|
47
|
+
import time
|
|
48
|
+
import traceback
|
|
49
|
+
import warnings
|
|
50
|
+
from abc import ABC, abstractmethod
|
|
51
|
+
from collections import deque
|
|
52
|
+
from contextlib import contextmanager
|
|
53
|
+
from dataclasses import dataclass, field
|
|
54
|
+
from datetime import datetime, timedelta
|
|
55
|
+
from enum import Enum
|
|
56
|
+
from pathlib import Path
|
|
57
|
+
from typing import Any, Callable, Generic, Iterator, Protocol, TypeVar
|
|
58
|
+
|
|
59
|
+
try:
|
|
60
|
+
import psutil
|
|
61
|
+
PSUTIL_AVAILABLE = True
|
|
62
|
+
except ImportError:
|
|
63
|
+
PSUTIL_AVAILABLE = False
|
|
64
|
+
warnings.warn(
|
|
65
|
+
"psutil not installed. Memory monitoring will use fallback methods. "
|
|
66
|
+
"Install with: pip install psutil",
|
|
67
|
+
UserWarning,
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
# =============================================================================
|
|
72
|
+
# Types and Enums
|
|
73
|
+
# =============================================================================
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
class MemoryUnit(str, Enum):
|
|
77
|
+
"""Memory size units."""
|
|
78
|
+
|
|
79
|
+
BYTES = "bytes"
|
|
80
|
+
KB = "kb"
|
|
81
|
+
MB = "mb"
|
|
82
|
+
GB = "gb"
|
|
83
|
+
|
|
84
|
+
@classmethod
|
|
85
|
+
def convert(
|
|
86
|
+
cls,
|
|
87
|
+
value: float,
|
|
88
|
+
from_unit: "MemoryUnit",
|
|
89
|
+
to_unit: "MemoryUnit",
|
|
90
|
+
) -> float:
|
|
91
|
+
"""Convert between memory units."""
|
|
92
|
+
# Convert to bytes first
|
|
93
|
+
multipliers = {
|
|
94
|
+
cls.BYTES: 1,
|
|
95
|
+
cls.KB: 1024,
|
|
96
|
+
cls.MB: 1024 * 1024,
|
|
97
|
+
cls.GB: 1024 * 1024 * 1024,
|
|
98
|
+
}
|
|
99
|
+
bytes_value = value * multipliers[from_unit]
|
|
100
|
+
return bytes_value / multipliers[to_unit]
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
class MemoryStatus(str, Enum):
|
|
104
|
+
"""Memory usage status levels."""
|
|
105
|
+
|
|
106
|
+
OK = "ok" # Normal usage
|
|
107
|
+
WARNING = "warning" # Approaching threshold
|
|
108
|
+
CRITICAL = "critical" # Near limit, action needed
|
|
109
|
+
OOM_RISK = "oom_risk" # Immediate OOM risk
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
class MemoryAction(str, Enum):
|
|
113
|
+
"""Actions to take when memory is critical."""
|
|
114
|
+
|
|
115
|
+
NONE = "none"
|
|
116
|
+
WARN = "warn"
|
|
117
|
+
GC_COLLECT = "gc_collect"
|
|
118
|
+
RAISE_ERROR = "raise_error"
|
|
119
|
+
CALLBACK = "callback"
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
# =============================================================================
|
|
123
|
+
# Exceptions
|
|
124
|
+
# =============================================================================
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
class MemoryLimitExceeded(Exception):
|
|
128
|
+
"""Raised when memory limit is exceeded."""
|
|
129
|
+
|
|
130
|
+
def __init__(
|
|
131
|
+
self,
|
|
132
|
+
current_mb: float,
|
|
133
|
+
limit_mb: float,
|
|
134
|
+
message: str = "",
|
|
135
|
+
):
|
|
136
|
+
self.current_mb = current_mb
|
|
137
|
+
self.limit_mb = limit_mb
|
|
138
|
+
super().__init__(
|
|
139
|
+
message or f"Memory limit exceeded: {current_mb:.1f} MB > {limit_mb:.1f} MB"
|
|
140
|
+
)
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
class OOMRiskDetected(Exception):
|
|
144
|
+
"""Raised when OOM risk is detected."""
|
|
145
|
+
|
|
146
|
+
def __init__(
|
|
147
|
+
self,
|
|
148
|
+
available_mb: float,
|
|
149
|
+
required_mb: float | None = None,
|
|
150
|
+
):
|
|
151
|
+
self.available_mb = available_mb
|
|
152
|
+
self.required_mb = required_mb
|
|
153
|
+
msg = f"OOM risk: only {available_mb:.1f} MB available"
|
|
154
|
+
if required_mb:
|
|
155
|
+
msg += f", but {required_mb:.1f} MB required"
|
|
156
|
+
super().__init__(msg)
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
# =============================================================================
|
|
160
|
+
# Memory Information
|
|
161
|
+
# =============================================================================
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
@dataclass(frozen=True)
|
|
165
|
+
class MemorySnapshot:
|
|
166
|
+
"""Snapshot of memory usage at a point in time."""
|
|
167
|
+
|
|
168
|
+
timestamp: datetime
|
|
169
|
+
process_rss_bytes: int # Resident Set Size
|
|
170
|
+
process_vms_bytes: int # Virtual Memory Size
|
|
171
|
+
process_percent: float # Process memory as % of total
|
|
172
|
+
system_total_bytes: int
|
|
173
|
+
system_available_bytes: int
|
|
174
|
+
system_used_bytes: int
|
|
175
|
+
system_percent: float
|
|
176
|
+
|
|
177
|
+
@property
|
|
178
|
+
def process_rss_mb(self) -> float:
|
|
179
|
+
"""Process RSS in MB."""
|
|
180
|
+
return self.process_rss_bytes / (1024 * 1024)
|
|
181
|
+
|
|
182
|
+
@property
|
|
183
|
+
def process_vms_mb(self) -> float:
|
|
184
|
+
"""Process VMS in MB."""
|
|
185
|
+
return self.process_vms_bytes / (1024 * 1024)
|
|
186
|
+
|
|
187
|
+
@property
|
|
188
|
+
def system_available_mb(self) -> float:
|
|
189
|
+
"""System available memory in MB."""
|
|
190
|
+
return self.system_available_bytes / (1024 * 1024)
|
|
191
|
+
|
|
192
|
+
@property
|
|
193
|
+
def system_total_mb(self) -> float:
|
|
194
|
+
"""System total memory in MB."""
|
|
195
|
+
return self.system_total_bytes / (1024 * 1024)
|
|
196
|
+
|
|
197
|
+
def to_dict(self) -> dict[str, Any]:
|
|
198
|
+
"""Convert to dictionary."""
|
|
199
|
+
return {
|
|
200
|
+
"timestamp": self.timestamp.isoformat(),
|
|
201
|
+
"process": {
|
|
202
|
+
"rss_bytes": self.process_rss_bytes,
|
|
203
|
+
"rss_mb": self.process_rss_mb,
|
|
204
|
+
"vms_bytes": self.process_vms_bytes,
|
|
205
|
+
"vms_mb": self.process_vms_mb,
|
|
206
|
+
"percent": self.process_percent,
|
|
207
|
+
},
|
|
208
|
+
"system": {
|
|
209
|
+
"total_bytes": self.system_total_bytes,
|
|
210
|
+
"total_mb": self.system_total_mb,
|
|
211
|
+
"available_bytes": self.system_available_bytes,
|
|
212
|
+
"available_mb": self.system_available_mb,
|
|
213
|
+
"used_bytes": self.system_used_bytes,
|
|
214
|
+
"percent": self.system_percent,
|
|
215
|
+
},
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
|
|
219
|
+
@dataclass
|
|
220
|
+
class MemoryReport:
|
|
221
|
+
"""Complete memory usage report."""
|
|
222
|
+
|
|
223
|
+
start_time: datetime
|
|
224
|
+
end_time: datetime
|
|
225
|
+
duration_seconds: float
|
|
226
|
+
|
|
227
|
+
# Process memory stats
|
|
228
|
+
initial_rss_mb: float
|
|
229
|
+
final_rss_mb: float
|
|
230
|
+
peak_rss_mb: float
|
|
231
|
+
min_rss_mb: float
|
|
232
|
+
avg_rss_mb: float
|
|
233
|
+
|
|
234
|
+
# System memory stats
|
|
235
|
+
initial_system_percent: float
|
|
236
|
+
final_system_percent: float
|
|
237
|
+
peak_system_percent: float
|
|
238
|
+
|
|
239
|
+
# Status tracking
|
|
240
|
+
status_history: list[tuple[datetime, MemoryStatus]] = field(default_factory=list)
|
|
241
|
+
warnings: list[str] = field(default_factory=list)
|
|
242
|
+
gc_collections: int = 0
|
|
243
|
+
|
|
244
|
+
# Snapshots (if detailed tracking enabled)
|
|
245
|
+
snapshots: list[MemorySnapshot] = field(default_factory=list)
|
|
246
|
+
|
|
247
|
+
@property
|
|
248
|
+
def memory_growth_mb(self) -> float:
|
|
249
|
+
"""Memory growth during monitoring period."""
|
|
250
|
+
return self.final_rss_mb - self.initial_rss_mb
|
|
251
|
+
|
|
252
|
+
@property
|
|
253
|
+
def memory_growth_percent(self) -> float:
|
|
254
|
+
"""Memory growth as percentage."""
|
|
255
|
+
if self.initial_rss_mb == 0:
|
|
256
|
+
return 0.0
|
|
257
|
+
return (self.memory_growth_mb / self.initial_rss_mb) * 100
|
|
258
|
+
|
|
259
|
+
def to_dict(self) -> dict[str, Any]:
|
|
260
|
+
"""Convert to dictionary."""
|
|
261
|
+
return {
|
|
262
|
+
"duration_seconds": self.duration_seconds,
|
|
263
|
+
"start_time": self.start_time.isoformat(),
|
|
264
|
+
"end_time": self.end_time.isoformat(),
|
|
265
|
+
"process": {
|
|
266
|
+
"initial_rss_mb": self.initial_rss_mb,
|
|
267
|
+
"final_rss_mb": self.final_rss_mb,
|
|
268
|
+
"peak_rss_mb": self.peak_rss_mb,
|
|
269
|
+
"min_rss_mb": self.min_rss_mb,
|
|
270
|
+
"avg_rss_mb": self.avg_rss_mb,
|
|
271
|
+
"growth_mb": self.memory_growth_mb,
|
|
272
|
+
"growth_percent": self.memory_growth_percent,
|
|
273
|
+
},
|
|
274
|
+
"system": {
|
|
275
|
+
"initial_percent": self.initial_system_percent,
|
|
276
|
+
"final_percent": self.final_system_percent,
|
|
277
|
+
"peak_percent": self.peak_system_percent,
|
|
278
|
+
},
|
|
279
|
+
"gc_collections": self.gc_collections,
|
|
280
|
+
"warnings": self.warnings,
|
|
281
|
+
"n_snapshots": len(self.snapshots),
|
|
282
|
+
}
|
|
283
|
+
|
|
284
|
+
|
|
285
|
+
# =============================================================================
|
|
286
|
+
# Memory Reader (Platform Abstraction)
|
|
287
|
+
# =============================================================================
|
|
288
|
+
|
|
289
|
+
|
|
290
|
+
class MemoryReader(Protocol):
|
|
291
|
+
"""Protocol for reading memory information."""
|
|
292
|
+
|
|
293
|
+
def get_snapshot(self) -> MemorySnapshot:
|
|
294
|
+
"""Get current memory snapshot."""
|
|
295
|
+
...
|
|
296
|
+
|
|
297
|
+
|
|
298
|
+
class PsutilMemoryReader:
|
|
299
|
+
"""Memory reader using psutil."""
|
|
300
|
+
|
|
301
|
+
def __init__(self, pid: int | None = None):
|
|
302
|
+
"""Initialize reader.
|
|
303
|
+
|
|
304
|
+
Args:
|
|
305
|
+
pid: Process ID to monitor (None for current process)
|
|
306
|
+
"""
|
|
307
|
+
if not PSUTIL_AVAILABLE:
|
|
308
|
+
raise ImportError("psutil is required for PsutilMemoryReader")
|
|
309
|
+
self._process = psutil.Process(pid)
|
|
310
|
+
|
|
311
|
+
def get_snapshot(self) -> MemorySnapshot:
|
|
312
|
+
"""Get current memory snapshot."""
|
|
313
|
+
proc_mem = self._process.memory_info()
|
|
314
|
+
proc_percent = self._process.memory_percent()
|
|
315
|
+
sys_mem = psutil.virtual_memory()
|
|
316
|
+
|
|
317
|
+
return MemorySnapshot(
|
|
318
|
+
timestamp=datetime.now(),
|
|
319
|
+
process_rss_bytes=proc_mem.rss,
|
|
320
|
+
process_vms_bytes=proc_mem.vms,
|
|
321
|
+
process_percent=proc_percent,
|
|
322
|
+
system_total_bytes=sys_mem.total,
|
|
323
|
+
system_available_bytes=sys_mem.available,
|
|
324
|
+
system_used_bytes=sys_mem.used,
|
|
325
|
+
system_percent=sys_mem.percent,
|
|
326
|
+
)
|
|
327
|
+
|
|
328
|
+
|
|
329
|
+
class FallbackMemoryReader:
|
|
330
|
+
"""Fallback memory reader when psutil is not available.
|
|
331
|
+
|
|
332
|
+
Uses resource module on Unix or basic estimation on other platforms.
|
|
333
|
+
"""
|
|
334
|
+
|
|
335
|
+
def __init__(self) -> None:
|
|
336
|
+
self._has_resource = False
|
|
337
|
+
try:
|
|
338
|
+
import resource
|
|
339
|
+
self._resource = resource
|
|
340
|
+
self._has_resource = True
|
|
341
|
+
except ImportError:
|
|
342
|
+
pass
|
|
343
|
+
|
|
344
|
+
def get_snapshot(self) -> MemorySnapshot:
|
|
345
|
+
"""Get current memory snapshot (limited without psutil)."""
|
|
346
|
+
timestamp = datetime.now()
|
|
347
|
+
|
|
348
|
+
if self._has_resource:
|
|
349
|
+
# Unix systems
|
|
350
|
+
usage = self._resource.getrusage(self._resource.RUSAGE_SELF)
|
|
351
|
+
rss_bytes = usage.ru_maxrss
|
|
352
|
+
# On macOS, ru_maxrss is in bytes; on Linux, it's in KB
|
|
353
|
+
if sys.platform == "darwin":
|
|
354
|
+
pass # Already in bytes
|
|
355
|
+
else:
|
|
356
|
+
rss_bytes *= 1024
|
|
357
|
+
else:
|
|
358
|
+
# Estimate from sys.getsizeof of globals
|
|
359
|
+
rss_bytes = sum(sys.getsizeof(obj) for obj in gc.get_objects()[:1000])
|
|
360
|
+
|
|
361
|
+
# Estimate total system memory (fallback)
|
|
362
|
+
total_bytes = 8 * 1024 * 1024 * 1024 # Assume 8GB
|
|
363
|
+
|
|
364
|
+
return MemorySnapshot(
|
|
365
|
+
timestamp=timestamp,
|
|
366
|
+
process_rss_bytes=rss_bytes,
|
|
367
|
+
process_vms_bytes=rss_bytes, # No VMS info
|
|
368
|
+
process_percent=rss_bytes / total_bytes * 100,
|
|
369
|
+
system_total_bytes=total_bytes,
|
|
370
|
+
system_available_bytes=total_bytes - rss_bytes,
|
|
371
|
+
system_used_bytes=rss_bytes,
|
|
372
|
+
system_percent=rss_bytes / total_bytes * 100,
|
|
373
|
+
)
|
|
374
|
+
|
|
375
|
+
|
|
376
|
+
def get_memory_reader(pid: int | None = None) -> MemoryReader:
|
|
377
|
+
"""Get the best available memory reader.
|
|
378
|
+
|
|
379
|
+
Args:
|
|
380
|
+
pid: Process ID to monitor (None for current process)
|
|
381
|
+
|
|
382
|
+
Returns:
|
|
383
|
+
Memory reader instance
|
|
384
|
+
"""
|
|
385
|
+
if PSUTIL_AVAILABLE:
|
|
386
|
+
return PsutilMemoryReader(pid)
|
|
387
|
+
return FallbackMemoryReader()
|
|
388
|
+
|
|
389
|
+
|
|
390
|
+
# =============================================================================
|
|
391
|
+
# Memory Monitor Configuration
|
|
392
|
+
# =============================================================================
|
|
393
|
+
|
|
394
|
+
|
|
395
|
+
@dataclass
|
|
396
|
+
class MemoryConfig:
|
|
397
|
+
"""Configuration for memory monitoring.
|
|
398
|
+
|
|
399
|
+
Attributes:
|
|
400
|
+
warning_threshold_percent: System memory % to trigger warning
|
|
401
|
+
critical_threshold_percent: System memory % to trigger critical
|
|
402
|
+
oom_threshold_percent: System memory % to consider OOM risk
|
|
403
|
+
max_process_memory_mb: Maximum process memory allowed (None = unlimited)
|
|
404
|
+
check_interval_seconds: How often to check memory
|
|
405
|
+
enable_gc_on_warning: Run GC when warning threshold hit
|
|
406
|
+
enable_gc_on_critical: Run GC when critical threshold hit
|
|
407
|
+
raise_on_oom_risk: Raise exception on OOM risk
|
|
408
|
+
callback_on_warning: Callback when warning threshold hit
|
|
409
|
+
callback_on_critical: Callback when critical threshold hit
|
|
410
|
+
keep_snapshots: Whether to keep all snapshots
|
|
411
|
+
max_snapshots: Maximum snapshots to keep (0 = unlimited)
|
|
412
|
+
"""
|
|
413
|
+
|
|
414
|
+
warning_threshold_percent: float = 70.0
|
|
415
|
+
critical_threshold_percent: float = 85.0
|
|
416
|
+
oom_threshold_percent: float = 95.0
|
|
417
|
+
max_process_memory_mb: float | None = None
|
|
418
|
+
|
|
419
|
+
check_interval_seconds: float = 1.0
|
|
420
|
+
enable_gc_on_warning: bool = False
|
|
421
|
+
enable_gc_on_critical: bool = True
|
|
422
|
+
raise_on_oom_risk: bool = True
|
|
423
|
+
|
|
424
|
+
callback_on_warning: Callable[[MemorySnapshot], None] | None = None
|
|
425
|
+
callback_on_critical: Callable[[MemorySnapshot], None] | None = None
|
|
426
|
+
|
|
427
|
+
keep_snapshots: bool = False
|
|
428
|
+
max_snapshots: int = 1000
|
|
429
|
+
|
|
430
|
+
def get_status(self, snapshot: MemorySnapshot) -> MemoryStatus:
|
|
431
|
+
"""Determine memory status from snapshot."""
|
|
432
|
+
# Check process limit
|
|
433
|
+
if self.max_process_memory_mb is not None:
|
|
434
|
+
if snapshot.process_rss_mb > self.max_process_memory_mb:
|
|
435
|
+
return MemoryStatus.CRITICAL
|
|
436
|
+
|
|
437
|
+
# Check system memory
|
|
438
|
+
percent = snapshot.system_percent
|
|
439
|
+
|
|
440
|
+
if percent >= self.oom_threshold_percent:
|
|
441
|
+
return MemoryStatus.OOM_RISK
|
|
442
|
+
elif percent >= self.critical_threshold_percent:
|
|
443
|
+
return MemoryStatus.CRITICAL
|
|
444
|
+
elif percent >= self.warning_threshold_percent:
|
|
445
|
+
return MemoryStatus.WARNING
|
|
446
|
+
else:
|
|
447
|
+
return MemoryStatus.OK
|
|
448
|
+
|
|
449
|
+
|
|
450
|
+
# =============================================================================
|
|
451
|
+
# Memory Monitor
|
|
452
|
+
# =============================================================================
|
|
453
|
+
|
|
454
|
+
|
|
455
|
+
class MemoryMonitor:
|
|
456
|
+
"""Real-time memory usage monitor.
|
|
457
|
+
|
|
458
|
+
Monitors memory usage and provides alerts when thresholds are exceeded.
|
|
459
|
+
|
|
460
|
+
Example:
|
|
461
|
+
monitor = MemoryMonitor(
|
|
462
|
+
warning_threshold_percent=70,
|
|
463
|
+
critical_threshold_percent=85,
|
|
464
|
+
)
|
|
465
|
+
|
|
466
|
+
monitor.start()
|
|
467
|
+
|
|
468
|
+
# Do work...
|
|
469
|
+
for batch in batches:
|
|
470
|
+
if monitor.is_critical():
|
|
471
|
+
print("Memory critical, stopping")
|
|
472
|
+
break
|
|
473
|
+
process(batch)
|
|
474
|
+
|
|
475
|
+
report = monitor.stop()
|
|
476
|
+
print(f"Peak memory: {report.peak_rss_mb:.1f} MB")
|
|
477
|
+
"""
|
|
478
|
+
|
|
479
|
+
def __init__(
|
|
480
|
+
self,
|
|
481
|
+
config: MemoryConfig | None = None,
|
|
482
|
+
warning_threshold_percent: float = 70.0,
|
|
483
|
+
critical_threshold_percent: float = 85.0,
|
|
484
|
+
max_process_memory_mb: float | None = None,
|
|
485
|
+
check_interval_seconds: float = 1.0,
|
|
486
|
+
):
|
|
487
|
+
"""Initialize monitor.
|
|
488
|
+
|
|
489
|
+
Args:
|
|
490
|
+
config: Full configuration (overrides other params)
|
|
491
|
+
warning_threshold_percent: Warning threshold
|
|
492
|
+
critical_threshold_percent: Critical threshold
|
|
493
|
+
max_process_memory_mb: Max process memory
|
|
494
|
+
check_interval_seconds: Check interval
|
|
495
|
+
"""
|
|
496
|
+
if config is not None:
|
|
497
|
+
self._config = config
|
|
498
|
+
else:
|
|
499
|
+
self._config = MemoryConfig(
|
|
500
|
+
warning_threshold_percent=warning_threshold_percent,
|
|
501
|
+
critical_threshold_percent=critical_threshold_percent,
|
|
502
|
+
max_process_memory_mb=max_process_memory_mb,
|
|
503
|
+
check_interval_seconds=check_interval_seconds,
|
|
504
|
+
)
|
|
505
|
+
|
|
506
|
+
self._reader = get_memory_reader()
|
|
507
|
+
self._running = False
|
|
508
|
+
self._thread: threading.Thread | None = None
|
|
509
|
+
self._lock = threading.Lock()
|
|
510
|
+
|
|
511
|
+
# State
|
|
512
|
+
self._snapshots: deque[MemorySnapshot] = deque(
|
|
513
|
+
maxlen=self._config.max_snapshots if self._config.max_snapshots > 0 else None
|
|
514
|
+
)
|
|
515
|
+
self._start_time: datetime | None = None
|
|
516
|
+
self._end_time: datetime | None = None
|
|
517
|
+
self._initial_snapshot: MemorySnapshot | None = None
|
|
518
|
+
self._peak_rss_bytes: int = 0
|
|
519
|
+
self._min_rss_bytes: int = float("inf") # type: ignore
|
|
520
|
+
self._sum_rss_bytes: int = 0
|
|
521
|
+
self._sample_count: int = 0
|
|
522
|
+
self._gc_count: int = 0
|
|
523
|
+
self._status_history: list[tuple[datetime, MemoryStatus]] = []
|
|
524
|
+
self._current_status: MemoryStatus = MemoryStatus.OK
|
|
525
|
+
self._warnings: list[str] = []
|
|
526
|
+
|
|
527
|
+
@property
|
|
528
|
+
def is_running(self) -> bool:
|
|
529
|
+
"""Check if monitor is running."""
|
|
530
|
+
return self._running
|
|
531
|
+
|
|
532
|
+
@property
|
|
533
|
+
def current_status(self) -> MemoryStatus:
|
|
534
|
+
"""Get current memory status."""
|
|
535
|
+
return self._current_status
|
|
536
|
+
|
|
537
|
+
def start(self) -> None:
|
|
538
|
+
"""Start monitoring in background thread."""
|
|
539
|
+
if self._running:
|
|
540
|
+
return
|
|
541
|
+
|
|
542
|
+
self._running = True
|
|
543
|
+
self._start_time = datetime.now()
|
|
544
|
+
self._initial_snapshot = self._reader.get_snapshot()
|
|
545
|
+
self._peak_rss_bytes = self._initial_snapshot.process_rss_bytes
|
|
546
|
+
self._min_rss_bytes = self._initial_snapshot.process_rss_bytes
|
|
547
|
+
|
|
548
|
+
if self._config.keep_snapshots:
|
|
549
|
+
self._snapshots.append(self._initial_snapshot)
|
|
550
|
+
|
|
551
|
+
self._thread = threading.Thread(target=self._monitor_loop, daemon=True)
|
|
552
|
+
self._thread.start()
|
|
553
|
+
|
|
554
|
+
def stop(self) -> MemoryReport:
|
|
555
|
+
"""Stop monitoring and return report."""
|
|
556
|
+
self._running = False
|
|
557
|
+
self._end_time = datetime.now()
|
|
558
|
+
|
|
559
|
+
if self._thread is not None:
|
|
560
|
+
self._thread.join(timeout=2.0)
|
|
561
|
+
self._thread = None
|
|
562
|
+
|
|
563
|
+
return self._generate_report()
|
|
564
|
+
|
|
565
|
+
def check(self) -> MemorySnapshot:
|
|
566
|
+
"""Take a manual memory check.
|
|
567
|
+
|
|
568
|
+
Returns:
|
|
569
|
+
Current memory snapshot
|
|
570
|
+
"""
|
|
571
|
+
snapshot = self._reader.get_snapshot()
|
|
572
|
+
self._process_snapshot(snapshot)
|
|
573
|
+
return snapshot
|
|
574
|
+
|
|
575
|
+
def is_ok(self) -> bool:
|
|
576
|
+
"""Check if memory status is OK."""
|
|
577
|
+
return self._current_status == MemoryStatus.OK
|
|
578
|
+
|
|
579
|
+
def is_warning(self) -> bool:
|
|
580
|
+
"""Check if memory is at warning level."""
|
|
581
|
+
return self._current_status == MemoryStatus.WARNING
|
|
582
|
+
|
|
583
|
+
def is_critical(self) -> bool:
|
|
584
|
+
"""Check if memory is at critical level."""
|
|
585
|
+
return self._current_status in (
|
|
586
|
+
MemoryStatus.CRITICAL,
|
|
587
|
+
MemoryStatus.OOM_RISK,
|
|
588
|
+
)
|
|
589
|
+
|
|
590
|
+
def get_available_mb(self) -> float:
|
|
591
|
+
"""Get available system memory in MB."""
|
|
592
|
+
snapshot = self._reader.get_snapshot()
|
|
593
|
+
return snapshot.system_available_mb
|
|
594
|
+
|
|
595
|
+
def get_process_memory_mb(self) -> float:
|
|
596
|
+
"""Get current process memory in MB."""
|
|
597
|
+
snapshot = self._reader.get_snapshot()
|
|
598
|
+
return snapshot.process_rss_mb
|
|
599
|
+
|
|
600
|
+
def _monitor_loop(self) -> None:
|
|
601
|
+
"""Background monitoring loop."""
|
|
602
|
+
while self._running:
|
|
603
|
+
try:
|
|
604
|
+
snapshot = self._reader.get_snapshot()
|
|
605
|
+
self._process_snapshot(snapshot)
|
|
606
|
+
except Exception as e:
|
|
607
|
+
self._warnings.append(f"Monitor error: {e}")
|
|
608
|
+
|
|
609
|
+
time.sleep(self._config.check_interval_seconds)
|
|
610
|
+
|
|
611
|
+
def _process_snapshot(self, snapshot: MemorySnapshot) -> None:
|
|
612
|
+
"""Process a memory snapshot."""
|
|
613
|
+
with self._lock:
|
|
614
|
+
# Update stats
|
|
615
|
+
self._peak_rss_bytes = max(self._peak_rss_bytes, snapshot.process_rss_bytes)
|
|
616
|
+
self._min_rss_bytes = min(self._min_rss_bytes, snapshot.process_rss_bytes)
|
|
617
|
+
self._sum_rss_bytes += snapshot.process_rss_bytes
|
|
618
|
+
self._sample_count += 1
|
|
619
|
+
|
|
620
|
+
if self._config.keep_snapshots:
|
|
621
|
+
self._snapshots.append(snapshot)
|
|
622
|
+
|
|
623
|
+
# Check status
|
|
624
|
+
new_status = self._config.get_status(snapshot)
|
|
625
|
+
|
|
626
|
+
if new_status != self._current_status:
|
|
627
|
+
self._status_history.append((snapshot.timestamp, new_status))
|
|
628
|
+
self._current_status = new_status
|
|
629
|
+
|
|
630
|
+
# Take actions based on status
|
|
631
|
+
if new_status == MemoryStatus.WARNING:
|
|
632
|
+
if self._config.callback_on_warning:
|
|
633
|
+
try:
|
|
634
|
+
self._config.callback_on_warning(snapshot)
|
|
635
|
+
except Exception:
|
|
636
|
+
pass
|
|
637
|
+
|
|
638
|
+
if self._config.enable_gc_on_warning:
|
|
639
|
+
gc.collect()
|
|
640
|
+
self._gc_count += 1
|
|
641
|
+
|
|
642
|
+
elif new_status == MemoryStatus.CRITICAL:
|
|
643
|
+
if self._config.callback_on_critical:
|
|
644
|
+
try:
|
|
645
|
+
self._config.callback_on_critical(snapshot)
|
|
646
|
+
except Exception:
|
|
647
|
+
pass
|
|
648
|
+
|
|
649
|
+
if self._config.enable_gc_on_critical:
|
|
650
|
+
gc.collect()
|
|
651
|
+
self._gc_count += 1
|
|
652
|
+
|
|
653
|
+
elif new_status == MemoryStatus.OOM_RISK:
|
|
654
|
+
if self._config.raise_on_oom_risk:
|
|
655
|
+
raise OOMRiskDetected(snapshot.system_available_mb)
|
|
656
|
+
|
|
657
|
+
def _generate_report(self) -> MemoryReport:
|
|
658
|
+
"""Generate memory report."""
|
|
659
|
+
with self._lock:
|
|
660
|
+
final_snapshot = self._reader.get_snapshot()
|
|
661
|
+
|
|
662
|
+
return MemoryReport(
|
|
663
|
+
start_time=self._start_time or datetime.now(),
|
|
664
|
+
end_time=self._end_time or datetime.now(),
|
|
665
|
+
duration_seconds=(
|
|
666
|
+
(self._end_time - self._start_time).total_seconds()
|
|
667
|
+
if self._start_time and self._end_time
|
|
668
|
+
else 0.0
|
|
669
|
+
),
|
|
670
|
+
initial_rss_mb=(
|
|
671
|
+
self._initial_snapshot.process_rss_mb
|
|
672
|
+
if self._initial_snapshot
|
|
673
|
+
else 0.0
|
|
674
|
+
),
|
|
675
|
+
final_rss_mb=final_snapshot.process_rss_mb,
|
|
676
|
+
peak_rss_mb=self._peak_rss_bytes / (1024 * 1024),
|
|
677
|
+
min_rss_mb=self._min_rss_bytes / (1024 * 1024),
|
|
678
|
+
avg_rss_mb=(
|
|
679
|
+
self._sum_rss_bytes / self._sample_count / (1024 * 1024)
|
|
680
|
+
if self._sample_count > 0
|
|
681
|
+
else 0.0
|
|
682
|
+
),
|
|
683
|
+
initial_system_percent=(
|
|
684
|
+
self._initial_snapshot.system_percent
|
|
685
|
+
if self._initial_snapshot
|
|
686
|
+
else 0.0
|
|
687
|
+
),
|
|
688
|
+
final_system_percent=final_snapshot.system_percent,
|
|
689
|
+
peak_system_percent=max(
|
|
690
|
+
s.system_percent for s in self._snapshots
|
|
691
|
+
) if self._snapshots else final_snapshot.system_percent,
|
|
692
|
+
status_history=list(self._status_history),
|
|
693
|
+
warnings=list(self._warnings),
|
|
694
|
+
gc_collections=self._gc_count,
|
|
695
|
+
snapshots=list(self._snapshots) if self._config.keep_snapshots else [],
|
|
696
|
+
)
|
|
697
|
+
|
|
698
|
+
|
|
699
|
+
# =============================================================================
|
|
700
|
+
# Memory Guard Context Manager
|
|
701
|
+
# =============================================================================
|
|
702
|
+
|
|
703
|
+
|
|
704
|
+
@contextmanager
|
|
705
|
+
def memory_guard(
|
|
706
|
+
max_memory_mb: float | None = None,
|
|
707
|
+
warning_threshold_percent: float = 70.0,
|
|
708
|
+
critical_threshold_percent: float = 85.0,
|
|
709
|
+
raise_on_critical: bool = False,
|
|
710
|
+
callback: Callable[[MemorySnapshot], None] | None = None,
|
|
711
|
+
) -> Iterator[MemoryMonitor]:
|
|
712
|
+
"""Context manager for memory-guarded execution.
|
|
713
|
+
|
|
714
|
+
Monitors memory usage during the context and optionally
|
|
715
|
+
raises exceptions if limits are exceeded.
|
|
716
|
+
|
|
717
|
+
Args:
|
|
718
|
+
max_memory_mb: Maximum process memory allowed
|
|
719
|
+
warning_threshold_percent: Warning threshold
|
|
720
|
+
critical_threshold_percent: Critical threshold
|
|
721
|
+
raise_on_critical: Whether to raise on critical status
|
|
722
|
+
callback: Callback on warning/critical
|
|
723
|
+
|
|
724
|
+
Yields:
|
|
725
|
+
MemoryMonitor instance
|
|
726
|
+
|
|
727
|
+
Example:
|
|
728
|
+
with memory_guard(max_memory_mb=1024) as monitor:
|
|
729
|
+
process_data(data)
|
|
730
|
+
if monitor.is_critical():
|
|
731
|
+
cleanup()
|
|
732
|
+
"""
|
|
733
|
+
config = MemoryConfig(
|
|
734
|
+
warning_threshold_percent=warning_threshold_percent,
|
|
735
|
+
critical_threshold_percent=critical_threshold_percent,
|
|
736
|
+
max_process_memory_mb=max_memory_mb,
|
|
737
|
+
callback_on_warning=callback,
|
|
738
|
+
callback_on_critical=callback,
|
|
739
|
+
)
|
|
740
|
+
|
|
741
|
+
monitor = MemoryMonitor(config=config)
|
|
742
|
+
monitor.start()
|
|
743
|
+
|
|
744
|
+
try:
|
|
745
|
+
yield monitor
|
|
746
|
+
finally:
|
|
747
|
+
report = monitor.stop()
|
|
748
|
+
|
|
749
|
+
if raise_on_critical and any(
|
|
750
|
+
status in (MemoryStatus.CRITICAL, MemoryStatus.OOM_RISK)
|
|
751
|
+
for _, status in report.status_history
|
|
752
|
+
):
|
|
753
|
+
raise MemoryLimitExceeded(
|
|
754
|
+
report.peak_rss_mb,
|
|
755
|
+
max_memory_mb or float("inf"),
|
|
756
|
+
f"Memory exceeded critical threshold (peak: {report.peak_rss_mb:.1f} MB)",
|
|
757
|
+
)
|
|
758
|
+
|
|
759
|
+
|
|
760
|
+
# =============================================================================
|
|
761
|
+
# Memory Tracker (Lightweight)
|
|
762
|
+
# =============================================================================
|
|
763
|
+
|
|
764
|
+
|
|
765
|
+
class MemoryTracker:
|
|
766
|
+
"""Lightweight memory tracker for specific operations.
|
|
767
|
+
|
|
768
|
+
Unlike MemoryMonitor, this doesn't run a background thread.
|
|
769
|
+
Instead, it takes snapshots on demand.
|
|
770
|
+
|
|
771
|
+
Example:
|
|
772
|
+
tracker = MemoryTracker()
|
|
773
|
+
|
|
774
|
+
tracker.checkpoint("start")
|
|
775
|
+
do_operation()
|
|
776
|
+
tracker.checkpoint("after_operation")
|
|
777
|
+
|
|
778
|
+
print(tracker.get_delta("start", "after_operation"))
|
|
779
|
+
"""
|
|
780
|
+
|
|
781
|
+
def __init__(self):
|
|
782
|
+
"""Initialize tracker."""
|
|
783
|
+
self._reader = get_memory_reader()
|
|
784
|
+
self._checkpoints: dict[str, MemorySnapshot] = {}
|
|
785
|
+
|
|
786
|
+
def checkpoint(self, name: str) -> MemorySnapshot:
|
|
787
|
+
"""Take a memory checkpoint.
|
|
788
|
+
|
|
789
|
+
Args:
|
|
790
|
+
name: Checkpoint name
|
|
791
|
+
|
|
792
|
+
Returns:
|
|
793
|
+
Memory snapshot
|
|
794
|
+
"""
|
|
795
|
+
snapshot = self._reader.get_snapshot()
|
|
796
|
+
self._checkpoints[name] = snapshot
|
|
797
|
+
return snapshot
|
|
798
|
+
|
|
799
|
+
def get_checkpoint(self, name: str) -> MemorySnapshot | None:
|
|
800
|
+
"""Get a checkpoint by name."""
|
|
801
|
+
return self._checkpoints.get(name)
|
|
802
|
+
|
|
803
|
+
def get_delta(
|
|
804
|
+
self,
|
|
805
|
+
from_name: str,
|
|
806
|
+
to_name: str,
|
|
807
|
+
) -> dict[str, float]:
|
|
808
|
+
"""Get memory delta between checkpoints.
|
|
809
|
+
|
|
810
|
+
Args:
|
|
811
|
+
from_name: Starting checkpoint
|
|
812
|
+
to_name: Ending checkpoint
|
|
813
|
+
|
|
814
|
+
Returns:
|
|
815
|
+
Dictionary with memory deltas
|
|
816
|
+
"""
|
|
817
|
+
from_snap = self._checkpoints.get(from_name)
|
|
818
|
+
to_snap = self._checkpoints.get(to_name)
|
|
819
|
+
|
|
820
|
+
if not from_snap or not to_snap:
|
|
821
|
+
return {}
|
|
822
|
+
|
|
823
|
+
return {
|
|
824
|
+
"rss_delta_mb": to_snap.process_rss_mb - from_snap.process_rss_mb,
|
|
825
|
+
"vms_delta_mb": to_snap.process_vms_mb - from_snap.process_vms_mb,
|
|
826
|
+
"system_delta_percent": to_snap.system_percent - from_snap.system_percent,
|
|
827
|
+
"duration_seconds": (to_snap.timestamp - from_snap.timestamp).total_seconds(),
|
|
828
|
+
}
|
|
829
|
+
|
|
830
|
+
def get_all_checkpoints(self) -> dict[str, MemorySnapshot]:
|
|
831
|
+
"""Get all checkpoints."""
|
|
832
|
+
return dict(self._checkpoints)
|
|
833
|
+
|
|
834
|
+
def clear(self) -> None:
|
|
835
|
+
"""Clear all checkpoints."""
|
|
836
|
+
self._checkpoints.clear()
|
|
837
|
+
|
|
838
|
+
def summary(self) -> dict[str, Any]:
|
|
839
|
+
"""Get summary of all checkpoints."""
|
|
840
|
+
if not self._checkpoints:
|
|
841
|
+
return {}
|
|
842
|
+
|
|
843
|
+
snapshots = list(self._checkpoints.values())
|
|
844
|
+
rss_values = [s.process_rss_mb for s in snapshots]
|
|
845
|
+
|
|
846
|
+
return {
|
|
847
|
+
"n_checkpoints": len(snapshots),
|
|
848
|
+
"checkpoints": list(self._checkpoints.keys()),
|
|
849
|
+
"min_rss_mb": min(rss_values),
|
|
850
|
+
"max_rss_mb": max(rss_values),
|
|
851
|
+
"first_checkpoint": min(self._checkpoints.keys()),
|
|
852
|
+
"last_checkpoint": max(self._checkpoints.keys()),
|
|
853
|
+
}
|
|
854
|
+
|
|
855
|
+
|
|
856
|
+
# =============================================================================
|
|
857
|
+
# Memory-Aware Batch Processor
|
|
858
|
+
# =============================================================================
|
|
859
|
+
|
|
860
|
+
|
|
861
|
+
T = TypeVar("T")
|
|
862
|
+
R = TypeVar("R")
|
|
863
|
+
|
|
864
|
+
|
|
865
|
+
class MemoryAwareBatchProcessor(Generic[T, R]):
|
|
866
|
+
"""Batch processor that adapts to memory constraints.
|
|
867
|
+
|
|
868
|
+
Automatically adjusts batch size based on available memory.
|
|
869
|
+
|
|
870
|
+
Example:
|
|
871
|
+
processor = MemoryAwareBatchProcessor(
|
|
872
|
+
process_fn=process_batch,
|
|
873
|
+
max_memory_percent=80,
|
|
874
|
+
)
|
|
875
|
+
|
|
876
|
+
results = processor.process(all_items)
|
|
877
|
+
"""
|
|
878
|
+
|
|
879
|
+
def __init__(
|
|
880
|
+
self,
|
|
881
|
+
process_fn: Callable[[list[T]], list[R]],
|
|
882
|
+
initial_batch_size: int = 1000,
|
|
883
|
+
min_batch_size: int = 10,
|
|
884
|
+
max_batch_size: int = 100000,
|
|
885
|
+
max_memory_percent: float = 80.0,
|
|
886
|
+
memory_check_frequency: int = 1,
|
|
887
|
+
):
|
|
888
|
+
"""Initialize processor.
|
|
889
|
+
|
|
890
|
+
Args:
|
|
891
|
+
process_fn: Function to process a batch
|
|
892
|
+
initial_batch_size: Starting batch size
|
|
893
|
+
min_batch_size: Minimum batch size
|
|
894
|
+
max_batch_size: Maximum batch size
|
|
895
|
+
max_memory_percent: Maximum memory usage percent
|
|
896
|
+
memory_check_frequency: Check memory every N batches
|
|
897
|
+
"""
|
|
898
|
+
self.process_fn = process_fn
|
|
899
|
+
self.initial_batch_size = initial_batch_size
|
|
900
|
+
self.min_batch_size = min_batch_size
|
|
901
|
+
self.max_batch_size = max_batch_size
|
|
902
|
+
self.max_memory_percent = max_memory_percent
|
|
903
|
+
self.memory_check_frequency = memory_check_frequency
|
|
904
|
+
|
|
905
|
+
self._reader = get_memory_reader()
|
|
906
|
+
self._current_batch_size = initial_batch_size
|
|
907
|
+
self._batch_count = 0
|
|
908
|
+
|
|
909
|
+
def process(
|
|
910
|
+
self,
|
|
911
|
+
items: list[T],
|
|
912
|
+
callback: Callable[[int, int], None] | None = None,
|
|
913
|
+
) -> list[R]:
|
|
914
|
+
"""Process all items in adaptive batches.
|
|
915
|
+
|
|
916
|
+
Args:
|
|
917
|
+
items: Items to process
|
|
918
|
+
callback: Progress callback (processed, total)
|
|
919
|
+
|
|
920
|
+
Returns:
|
|
921
|
+
Combined results from all batches
|
|
922
|
+
"""
|
|
923
|
+
results: list[R] = []
|
|
924
|
+
total = len(items)
|
|
925
|
+
processed = 0
|
|
926
|
+
|
|
927
|
+
while processed < total:
|
|
928
|
+
# Get current batch
|
|
929
|
+
batch_end = min(processed + self._current_batch_size, total)
|
|
930
|
+
batch = items[processed:batch_end]
|
|
931
|
+
|
|
932
|
+
# Process batch
|
|
933
|
+
batch_results = self.process_fn(batch)
|
|
934
|
+
results.extend(batch_results)
|
|
935
|
+
|
|
936
|
+
processed = batch_end
|
|
937
|
+
self._batch_count += 1
|
|
938
|
+
|
|
939
|
+
# Report progress
|
|
940
|
+
if callback:
|
|
941
|
+
callback(processed, total)
|
|
942
|
+
|
|
943
|
+
# Check memory and adjust batch size
|
|
944
|
+
if self._batch_count % self.memory_check_frequency == 0:
|
|
945
|
+
self._adjust_batch_size()
|
|
946
|
+
|
|
947
|
+
# Run GC periodically
|
|
948
|
+
if self._batch_count % 10 == 0:
|
|
949
|
+
gc.collect()
|
|
950
|
+
|
|
951
|
+
return results
|
|
952
|
+
|
|
953
|
+
def _adjust_batch_size(self) -> None:
|
|
954
|
+
"""Adjust batch size based on memory usage."""
|
|
955
|
+
snapshot = self._reader.get_snapshot()
|
|
956
|
+
current_percent = snapshot.system_percent
|
|
957
|
+
|
|
958
|
+
if current_percent >= self.max_memory_percent:
|
|
959
|
+
# Reduce batch size
|
|
960
|
+
new_size = max(
|
|
961
|
+
self.min_batch_size,
|
|
962
|
+
int(self._current_batch_size * 0.7),
|
|
963
|
+
)
|
|
964
|
+
self._current_batch_size = new_size
|
|
965
|
+
|
|
966
|
+
elif current_percent < self.max_memory_percent * 0.7:
|
|
967
|
+
# Can increase batch size
|
|
968
|
+
new_size = min(
|
|
969
|
+
self.max_batch_size,
|
|
970
|
+
int(self._current_batch_size * 1.3),
|
|
971
|
+
)
|
|
972
|
+
self._current_batch_size = new_size
|
|
973
|
+
|
|
974
|
+
|
|
975
|
+
# =============================================================================
|
|
976
|
+
# Memory Leak Detector
|
|
977
|
+
# =============================================================================
|
|
978
|
+
|
|
979
|
+
|
|
980
|
+
@dataclass
|
|
981
|
+
class LeakSuspect:
|
|
982
|
+
"""Potential memory leak information."""
|
|
983
|
+
|
|
984
|
+
type_name: str
|
|
985
|
+
count_initial: int
|
|
986
|
+
count_final: int
|
|
987
|
+
count_delta: int
|
|
988
|
+
growth_percent: float
|
|
989
|
+
sample_referrers: list[str] = field(default_factory=list)
|
|
990
|
+
|
|
991
|
+
|
|
992
|
+
class MemoryLeakDetector:
|
|
993
|
+
"""Detects potential memory leaks by tracking object counts.
|
|
994
|
+
|
|
995
|
+
Example:
|
|
996
|
+
detector = MemoryLeakDetector()
|
|
997
|
+
|
|
998
|
+
detector.start()
|
|
999
|
+
do_operations()
|
|
1000
|
+
suspects = detector.detect()
|
|
1001
|
+
|
|
1002
|
+
for suspect in suspects:
|
|
1003
|
+
print(f"Possible leak: {suspect.type_name} +{suspect.count_delta}")
|
|
1004
|
+
"""
|
|
1005
|
+
|
|
1006
|
+
def __init__(
|
|
1007
|
+
self,
|
|
1008
|
+
min_growth_count: int = 100,
|
|
1009
|
+
min_growth_percent: float = 10.0,
|
|
1010
|
+
):
|
|
1011
|
+
"""Initialize detector.
|
|
1012
|
+
|
|
1013
|
+
Args:
|
|
1014
|
+
min_growth_count: Minimum object count growth to flag
|
|
1015
|
+
min_growth_percent: Minimum growth percentage to flag
|
|
1016
|
+
"""
|
|
1017
|
+
self.min_growth_count = min_growth_count
|
|
1018
|
+
self.min_growth_percent = min_growth_percent
|
|
1019
|
+
self._initial_counts: dict[str, int] = {}
|
|
1020
|
+
|
|
1021
|
+
def start(self) -> None:
|
|
1022
|
+
"""Start tracking object counts."""
|
|
1023
|
+
gc.collect()
|
|
1024
|
+
self._initial_counts = self._count_objects()
|
|
1025
|
+
|
|
1026
|
+
def detect(self) -> list[LeakSuspect]:
|
|
1027
|
+
"""Detect potential memory leaks.
|
|
1028
|
+
|
|
1029
|
+
Returns:
|
|
1030
|
+
List of suspected leaks
|
|
1031
|
+
"""
|
|
1032
|
+
gc.collect()
|
|
1033
|
+
final_counts = self._count_objects()
|
|
1034
|
+
|
|
1035
|
+
suspects = []
|
|
1036
|
+
|
|
1037
|
+
for type_name, final_count in final_counts.items():
|
|
1038
|
+
initial_count = self._initial_counts.get(type_name, 0)
|
|
1039
|
+
delta = final_count - initial_count
|
|
1040
|
+
|
|
1041
|
+
if delta < self.min_growth_count:
|
|
1042
|
+
continue
|
|
1043
|
+
|
|
1044
|
+
if initial_count > 0:
|
|
1045
|
+
growth_percent = (delta / initial_count) * 100
|
|
1046
|
+
else:
|
|
1047
|
+
growth_percent = 100.0
|
|
1048
|
+
|
|
1049
|
+
if growth_percent < self.min_growth_percent:
|
|
1050
|
+
continue
|
|
1051
|
+
|
|
1052
|
+
suspects.append(LeakSuspect(
|
|
1053
|
+
type_name=type_name,
|
|
1054
|
+
count_initial=initial_count,
|
|
1055
|
+
count_final=final_count,
|
|
1056
|
+
count_delta=delta,
|
|
1057
|
+
growth_percent=growth_percent,
|
|
1058
|
+
))
|
|
1059
|
+
|
|
1060
|
+
# Sort by delta (largest first)
|
|
1061
|
+
suspects.sort(key=lambda s: s.count_delta, reverse=True)
|
|
1062
|
+
return suspects
|
|
1063
|
+
|
|
1064
|
+
def _count_objects(self) -> dict[str, int]:
|
|
1065
|
+
"""Count objects by type."""
|
|
1066
|
+
counts: dict[str, int] = {}
|
|
1067
|
+
|
|
1068
|
+
for obj in gc.get_objects():
|
|
1069
|
+
try:
|
|
1070
|
+
type_name = type(obj).__name__
|
|
1071
|
+
counts[type_name] = counts.get(type_name, 0) + 1
|
|
1072
|
+
except Exception:
|
|
1073
|
+
pass
|
|
1074
|
+
|
|
1075
|
+
return counts
|
|
1076
|
+
|
|
1077
|
+
|
|
1078
|
+
# =============================================================================
|
|
1079
|
+
# Convenience Functions
|
|
1080
|
+
# =============================================================================
|
|
1081
|
+
|
|
1082
|
+
|
|
1083
|
+
def get_memory_usage() -> dict[str, float]:
|
|
1084
|
+
"""Get current memory usage.
|
|
1085
|
+
|
|
1086
|
+
Returns:
|
|
1087
|
+
Dictionary with memory information
|
|
1088
|
+
"""
|
|
1089
|
+
reader = get_memory_reader()
|
|
1090
|
+
snapshot = reader.get_snapshot()
|
|
1091
|
+
|
|
1092
|
+
return {
|
|
1093
|
+
"process_rss_mb": snapshot.process_rss_mb,
|
|
1094
|
+
"process_vms_mb": snapshot.process_vms_mb,
|
|
1095
|
+
"process_percent": snapshot.process_percent,
|
|
1096
|
+
"system_available_mb": snapshot.system_available_mb,
|
|
1097
|
+
"system_total_mb": snapshot.system_total_mb,
|
|
1098
|
+
"system_percent": snapshot.system_percent,
|
|
1099
|
+
}
|
|
1100
|
+
|
|
1101
|
+
|
|
1102
|
+
def check_memory_available(
|
|
1103
|
+
required_mb: float,
|
|
1104
|
+
safety_margin: float = 0.2,
|
|
1105
|
+
) -> bool:
|
|
1106
|
+
"""Check if enough memory is available.
|
|
1107
|
+
|
|
1108
|
+
Args:
|
|
1109
|
+
required_mb: Required memory in MB
|
|
1110
|
+
safety_margin: Safety margin (0.2 = 20% buffer)
|
|
1111
|
+
|
|
1112
|
+
Returns:
|
|
1113
|
+
True if enough memory is available
|
|
1114
|
+
"""
|
|
1115
|
+
reader = get_memory_reader()
|
|
1116
|
+
snapshot = reader.get_snapshot()
|
|
1117
|
+
|
|
1118
|
+
available = snapshot.system_available_mb
|
|
1119
|
+
required_with_margin = required_mb * (1 + safety_margin)
|
|
1120
|
+
|
|
1121
|
+
return available >= required_with_margin
|
|
1122
|
+
|
|
1123
|
+
|
|
1124
|
+
def estimate_batch_size(
|
|
1125
|
+
item_size_bytes: int,
|
|
1126
|
+
target_memory_mb: float = 100,
|
|
1127
|
+
) -> int:
|
|
1128
|
+
"""Estimate optimal batch size for given item size.
|
|
1129
|
+
|
|
1130
|
+
Args:
|
|
1131
|
+
item_size_bytes: Size of each item in bytes
|
|
1132
|
+
target_memory_mb: Target memory usage per batch
|
|
1133
|
+
|
|
1134
|
+
Returns:
|
|
1135
|
+
Recommended batch size
|
|
1136
|
+
"""
|
|
1137
|
+
target_bytes = target_memory_mb * 1024 * 1024
|
|
1138
|
+
return max(1, int(target_bytes / item_size_bytes))
|
|
1139
|
+
|
|
1140
|
+
|
|
1141
|
+
def force_gc() -> dict[str, int]:
|
|
1142
|
+
"""Force garbage collection and return stats.
|
|
1143
|
+
|
|
1144
|
+
Returns:
|
|
1145
|
+
GC collection statistics
|
|
1146
|
+
"""
|
|
1147
|
+
before = get_memory_usage()
|
|
1148
|
+
|
|
1149
|
+
collected = {
|
|
1150
|
+
"gen0": gc.collect(0),
|
|
1151
|
+
"gen1": gc.collect(1),
|
|
1152
|
+
"gen2": gc.collect(2),
|
|
1153
|
+
}
|
|
1154
|
+
|
|
1155
|
+
after = get_memory_usage()
|
|
1156
|
+
|
|
1157
|
+
collected["freed_mb"] = before["process_rss_mb"] - after["process_rss_mb"]
|
|
1158
|
+
|
|
1159
|
+
return collected
|
|
1160
|
+
|
|
1161
|
+
|
|
1162
|
+
def monitor_function(
|
|
1163
|
+
func: Callable[..., R],
|
|
1164
|
+
*args: Any,
|
|
1165
|
+
**kwargs: Any,
|
|
1166
|
+
) -> tuple[R, MemoryReport]:
|
|
1167
|
+
"""Execute a function with memory monitoring.
|
|
1168
|
+
|
|
1169
|
+
Args:
|
|
1170
|
+
func: Function to execute
|
|
1171
|
+
*args: Function arguments
|
|
1172
|
+
**kwargs: Function keyword arguments
|
|
1173
|
+
|
|
1174
|
+
Returns:
|
|
1175
|
+
Tuple of (function result, memory report)
|
|
1176
|
+
"""
|
|
1177
|
+
monitor = MemoryMonitor()
|
|
1178
|
+
monitor.start()
|
|
1179
|
+
|
|
1180
|
+
try:
|
|
1181
|
+
result = func(*args, **kwargs)
|
|
1182
|
+
finally:
|
|
1183
|
+
report = monitor.stop()
|
|
1184
|
+
|
|
1185
|
+
return result, report
|