truthound 1.0.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- truthound/__init__.py +162 -0
- truthound/adapters.py +100 -0
- truthound/api.py +365 -0
- truthound/audit/__init__.py +248 -0
- truthound/audit/core.py +967 -0
- truthound/audit/filters.py +620 -0
- truthound/audit/formatters.py +707 -0
- truthound/audit/logger.py +902 -0
- truthound/audit/middleware.py +571 -0
- truthound/audit/storage.py +1083 -0
- truthound/benchmark/__init__.py +123 -0
- truthound/benchmark/base.py +757 -0
- truthound/benchmark/comparison.py +635 -0
- truthound/benchmark/generators.py +706 -0
- truthound/benchmark/reporters.py +718 -0
- truthound/benchmark/runner.py +635 -0
- truthound/benchmark/scenarios.py +712 -0
- truthound/cache.py +252 -0
- truthound/checkpoint/__init__.py +136 -0
- truthound/checkpoint/actions/__init__.py +164 -0
- truthound/checkpoint/actions/base.py +324 -0
- truthound/checkpoint/actions/custom.py +234 -0
- truthound/checkpoint/actions/discord_notify.py +290 -0
- truthound/checkpoint/actions/email_notify.py +405 -0
- truthound/checkpoint/actions/github_action.py +406 -0
- truthound/checkpoint/actions/opsgenie.py +1499 -0
- truthound/checkpoint/actions/pagerduty.py +226 -0
- truthound/checkpoint/actions/slack_notify.py +233 -0
- truthound/checkpoint/actions/store_result.py +249 -0
- truthound/checkpoint/actions/teams_notify.py +1570 -0
- truthound/checkpoint/actions/telegram_notify.py +419 -0
- truthound/checkpoint/actions/update_docs.py +552 -0
- truthound/checkpoint/actions/webhook.py +293 -0
- truthound/checkpoint/analytics/__init__.py +147 -0
- truthound/checkpoint/analytics/aggregations/__init__.py +23 -0
- truthound/checkpoint/analytics/aggregations/rollup.py +481 -0
- truthound/checkpoint/analytics/aggregations/time_bucket.py +306 -0
- truthound/checkpoint/analytics/analyzers/__init__.py +17 -0
- truthound/checkpoint/analytics/analyzers/anomaly.py +386 -0
- truthound/checkpoint/analytics/analyzers/base.py +270 -0
- truthound/checkpoint/analytics/analyzers/forecast.py +421 -0
- truthound/checkpoint/analytics/analyzers/trend.py +314 -0
- truthound/checkpoint/analytics/models.py +292 -0
- truthound/checkpoint/analytics/protocols.py +549 -0
- truthound/checkpoint/analytics/service.py +718 -0
- truthound/checkpoint/analytics/stores/__init__.py +16 -0
- truthound/checkpoint/analytics/stores/base.py +306 -0
- truthound/checkpoint/analytics/stores/memory_store.py +353 -0
- truthound/checkpoint/analytics/stores/sqlite_store.py +557 -0
- truthound/checkpoint/analytics/stores/timescale_store.py +501 -0
- truthound/checkpoint/async_actions.py +794 -0
- truthound/checkpoint/async_base.py +708 -0
- truthound/checkpoint/async_checkpoint.py +617 -0
- truthound/checkpoint/async_runner.py +639 -0
- truthound/checkpoint/checkpoint.py +527 -0
- truthound/checkpoint/ci/__init__.py +61 -0
- truthound/checkpoint/ci/detector.py +355 -0
- truthound/checkpoint/ci/reporter.py +436 -0
- truthound/checkpoint/ci/templates.py +454 -0
- truthound/checkpoint/circuitbreaker/__init__.py +133 -0
- truthound/checkpoint/circuitbreaker/breaker.py +542 -0
- truthound/checkpoint/circuitbreaker/core.py +252 -0
- truthound/checkpoint/circuitbreaker/detection.py +459 -0
- truthound/checkpoint/circuitbreaker/middleware.py +389 -0
- truthound/checkpoint/circuitbreaker/registry.py +357 -0
- truthound/checkpoint/distributed/__init__.py +139 -0
- truthound/checkpoint/distributed/backends/__init__.py +35 -0
- truthound/checkpoint/distributed/backends/celery_backend.py +503 -0
- truthound/checkpoint/distributed/backends/kubernetes_backend.py +696 -0
- truthound/checkpoint/distributed/backends/local_backend.py +397 -0
- truthound/checkpoint/distributed/backends/ray_backend.py +625 -0
- truthound/checkpoint/distributed/base.py +774 -0
- truthound/checkpoint/distributed/orchestrator.py +765 -0
- truthound/checkpoint/distributed/protocols.py +842 -0
- truthound/checkpoint/distributed/registry.py +449 -0
- truthound/checkpoint/idempotency/__init__.py +120 -0
- truthound/checkpoint/idempotency/core.py +295 -0
- truthound/checkpoint/idempotency/fingerprint.py +454 -0
- truthound/checkpoint/idempotency/locking.py +604 -0
- truthound/checkpoint/idempotency/service.py +592 -0
- truthound/checkpoint/idempotency/stores.py +653 -0
- truthound/checkpoint/monitoring/__init__.py +134 -0
- truthound/checkpoint/monitoring/aggregators/__init__.py +15 -0
- truthound/checkpoint/monitoring/aggregators/base.py +372 -0
- truthound/checkpoint/monitoring/aggregators/realtime.py +300 -0
- truthound/checkpoint/monitoring/aggregators/window.py +493 -0
- truthound/checkpoint/monitoring/collectors/__init__.py +17 -0
- truthound/checkpoint/monitoring/collectors/base.py +257 -0
- truthound/checkpoint/monitoring/collectors/memory_collector.py +617 -0
- truthound/checkpoint/monitoring/collectors/prometheus_collector.py +451 -0
- truthound/checkpoint/monitoring/collectors/redis_collector.py +518 -0
- truthound/checkpoint/monitoring/events.py +410 -0
- truthound/checkpoint/monitoring/protocols.py +636 -0
- truthound/checkpoint/monitoring/service.py +578 -0
- truthound/checkpoint/monitoring/views/__init__.py +17 -0
- truthound/checkpoint/monitoring/views/base.py +172 -0
- truthound/checkpoint/monitoring/views/queue_view.py +220 -0
- truthound/checkpoint/monitoring/views/task_view.py +240 -0
- truthound/checkpoint/monitoring/views/worker_view.py +263 -0
- truthound/checkpoint/registry.py +337 -0
- truthound/checkpoint/runner.py +356 -0
- truthound/checkpoint/transaction/__init__.py +133 -0
- truthound/checkpoint/transaction/base.py +389 -0
- truthound/checkpoint/transaction/compensatable.py +537 -0
- truthound/checkpoint/transaction/coordinator.py +576 -0
- truthound/checkpoint/transaction/executor.py +622 -0
- truthound/checkpoint/transaction/idempotency.py +534 -0
- truthound/checkpoint/transaction/saga/__init__.py +143 -0
- truthound/checkpoint/transaction/saga/builder.py +584 -0
- truthound/checkpoint/transaction/saga/definition.py +515 -0
- truthound/checkpoint/transaction/saga/event_store.py +542 -0
- truthound/checkpoint/transaction/saga/patterns.py +833 -0
- truthound/checkpoint/transaction/saga/runner.py +718 -0
- truthound/checkpoint/transaction/saga/state_machine.py +793 -0
- truthound/checkpoint/transaction/saga/strategies.py +780 -0
- truthound/checkpoint/transaction/saga/testing.py +886 -0
- truthound/checkpoint/triggers/__init__.py +58 -0
- truthound/checkpoint/triggers/base.py +237 -0
- truthound/checkpoint/triggers/event.py +385 -0
- truthound/checkpoint/triggers/schedule.py +355 -0
- truthound/cli.py +2358 -0
- truthound/cli_modules/__init__.py +124 -0
- truthound/cli_modules/advanced/__init__.py +45 -0
- truthound/cli_modules/advanced/benchmark.py +343 -0
- truthound/cli_modules/advanced/docs.py +225 -0
- truthound/cli_modules/advanced/lineage.py +209 -0
- truthound/cli_modules/advanced/ml.py +320 -0
- truthound/cli_modules/advanced/realtime.py +196 -0
- truthound/cli_modules/checkpoint/__init__.py +46 -0
- truthound/cli_modules/checkpoint/init.py +114 -0
- truthound/cli_modules/checkpoint/list.py +71 -0
- truthound/cli_modules/checkpoint/run.py +159 -0
- truthound/cli_modules/checkpoint/validate.py +67 -0
- truthound/cli_modules/common/__init__.py +71 -0
- truthound/cli_modules/common/errors.py +414 -0
- truthound/cli_modules/common/options.py +419 -0
- truthound/cli_modules/common/output.py +507 -0
- truthound/cli_modules/common/protocol.py +552 -0
- truthound/cli_modules/core/__init__.py +48 -0
- truthound/cli_modules/core/check.py +123 -0
- truthound/cli_modules/core/compare.py +104 -0
- truthound/cli_modules/core/learn.py +57 -0
- truthound/cli_modules/core/mask.py +77 -0
- truthound/cli_modules/core/profile.py +65 -0
- truthound/cli_modules/core/scan.py +61 -0
- truthound/cli_modules/profiler/__init__.py +51 -0
- truthound/cli_modules/profiler/auto_profile.py +175 -0
- truthound/cli_modules/profiler/metadata.py +107 -0
- truthound/cli_modules/profiler/suite.py +283 -0
- truthound/cli_modules/registry.py +431 -0
- truthound/cli_modules/scaffolding/__init__.py +89 -0
- truthound/cli_modules/scaffolding/base.py +631 -0
- truthound/cli_modules/scaffolding/commands.py +545 -0
- truthound/cli_modules/scaffolding/plugins.py +1072 -0
- truthound/cli_modules/scaffolding/reporters.py +594 -0
- truthound/cli_modules/scaffolding/validators.py +1127 -0
- truthound/common/__init__.py +18 -0
- truthound/common/resilience/__init__.py +130 -0
- truthound/common/resilience/bulkhead.py +266 -0
- truthound/common/resilience/circuit_breaker.py +516 -0
- truthound/common/resilience/composite.py +332 -0
- truthound/common/resilience/config.py +292 -0
- truthound/common/resilience/protocols.py +217 -0
- truthound/common/resilience/rate_limiter.py +404 -0
- truthound/common/resilience/retry.py +341 -0
- truthound/datadocs/__init__.py +260 -0
- truthound/datadocs/base.py +571 -0
- truthound/datadocs/builder.py +761 -0
- truthound/datadocs/charts.py +764 -0
- truthound/datadocs/dashboard/__init__.py +63 -0
- truthound/datadocs/dashboard/app.py +576 -0
- truthound/datadocs/dashboard/components.py +584 -0
- truthound/datadocs/dashboard/state.py +240 -0
- truthound/datadocs/engine/__init__.py +46 -0
- truthound/datadocs/engine/context.py +376 -0
- truthound/datadocs/engine/pipeline.py +618 -0
- truthound/datadocs/engine/registry.py +469 -0
- truthound/datadocs/exporters/__init__.py +49 -0
- truthound/datadocs/exporters/base.py +198 -0
- truthound/datadocs/exporters/html.py +178 -0
- truthound/datadocs/exporters/json_exporter.py +253 -0
- truthound/datadocs/exporters/markdown.py +284 -0
- truthound/datadocs/exporters/pdf.py +392 -0
- truthound/datadocs/i18n/__init__.py +86 -0
- truthound/datadocs/i18n/catalog.py +960 -0
- truthound/datadocs/i18n/formatting.py +505 -0
- truthound/datadocs/i18n/loader.py +256 -0
- truthound/datadocs/i18n/plurals.py +378 -0
- truthound/datadocs/renderers/__init__.py +42 -0
- truthound/datadocs/renderers/base.py +401 -0
- truthound/datadocs/renderers/custom.py +342 -0
- truthound/datadocs/renderers/jinja.py +697 -0
- truthound/datadocs/sections.py +736 -0
- truthound/datadocs/styles.py +931 -0
- truthound/datadocs/themes/__init__.py +101 -0
- truthound/datadocs/themes/base.py +336 -0
- truthound/datadocs/themes/default.py +417 -0
- truthound/datadocs/themes/enterprise.py +419 -0
- truthound/datadocs/themes/loader.py +336 -0
- truthound/datadocs/themes.py +301 -0
- truthound/datadocs/transformers/__init__.py +57 -0
- truthound/datadocs/transformers/base.py +268 -0
- truthound/datadocs/transformers/enrichers.py +544 -0
- truthound/datadocs/transformers/filters.py +447 -0
- truthound/datadocs/transformers/i18n.py +468 -0
- truthound/datadocs/versioning/__init__.py +62 -0
- truthound/datadocs/versioning/diff.py +639 -0
- truthound/datadocs/versioning/storage.py +497 -0
- truthound/datadocs/versioning/version.py +358 -0
- truthound/datasources/__init__.py +223 -0
- truthound/datasources/_async_protocols.py +222 -0
- truthound/datasources/_protocols.py +159 -0
- truthound/datasources/adapters.py +428 -0
- truthound/datasources/async_base.py +599 -0
- truthound/datasources/async_factory.py +511 -0
- truthound/datasources/base.py +516 -0
- truthound/datasources/factory.py +433 -0
- truthound/datasources/nosql/__init__.py +47 -0
- truthound/datasources/nosql/base.py +487 -0
- truthound/datasources/nosql/elasticsearch.py +801 -0
- truthound/datasources/nosql/mongodb.py +636 -0
- truthound/datasources/pandas_optimized.py +582 -0
- truthound/datasources/pandas_source.py +216 -0
- truthound/datasources/polars_source.py +395 -0
- truthound/datasources/spark_source.py +479 -0
- truthound/datasources/sql/__init__.py +154 -0
- truthound/datasources/sql/base.py +710 -0
- truthound/datasources/sql/bigquery.py +410 -0
- truthound/datasources/sql/cloud_base.py +199 -0
- truthound/datasources/sql/databricks.py +471 -0
- truthound/datasources/sql/mysql.py +316 -0
- truthound/datasources/sql/oracle.py +427 -0
- truthound/datasources/sql/postgresql.py +321 -0
- truthound/datasources/sql/redshift.py +479 -0
- truthound/datasources/sql/snowflake.py +439 -0
- truthound/datasources/sql/sqlite.py +286 -0
- truthound/datasources/sql/sqlserver.py +437 -0
- truthound/datasources/streaming/__init__.py +47 -0
- truthound/datasources/streaming/base.py +350 -0
- truthound/datasources/streaming/kafka.py +670 -0
- truthound/decorators.py +98 -0
- truthound/docs/__init__.py +69 -0
- truthound/docs/extractor.py +971 -0
- truthound/docs/generator.py +601 -0
- truthound/docs/parser.py +1037 -0
- truthound/docs/renderer.py +999 -0
- truthound/drift/__init__.py +22 -0
- truthound/drift/compare.py +189 -0
- truthound/drift/detectors.py +464 -0
- truthound/drift/report.py +160 -0
- truthound/execution/__init__.py +65 -0
- truthound/execution/_protocols.py +324 -0
- truthound/execution/base.py +576 -0
- truthound/execution/distributed/__init__.py +179 -0
- truthound/execution/distributed/aggregations.py +731 -0
- truthound/execution/distributed/arrow_bridge.py +817 -0
- truthound/execution/distributed/base.py +550 -0
- truthound/execution/distributed/dask_engine.py +976 -0
- truthound/execution/distributed/mixins.py +766 -0
- truthound/execution/distributed/protocols.py +756 -0
- truthound/execution/distributed/ray_engine.py +1127 -0
- truthound/execution/distributed/registry.py +446 -0
- truthound/execution/distributed/spark_engine.py +1011 -0
- truthound/execution/distributed/validator_adapter.py +682 -0
- truthound/execution/pandas_engine.py +401 -0
- truthound/execution/polars_engine.py +497 -0
- truthound/execution/pushdown/__init__.py +230 -0
- truthound/execution/pushdown/ast.py +1550 -0
- truthound/execution/pushdown/builder.py +1550 -0
- truthound/execution/pushdown/dialects.py +1072 -0
- truthound/execution/pushdown/executor.py +829 -0
- truthound/execution/pushdown/optimizer.py +1041 -0
- truthound/execution/sql_engine.py +518 -0
- truthound/infrastructure/__init__.py +189 -0
- truthound/infrastructure/audit.py +1515 -0
- truthound/infrastructure/config.py +1133 -0
- truthound/infrastructure/encryption.py +1132 -0
- truthound/infrastructure/logging.py +1503 -0
- truthound/infrastructure/metrics.py +1220 -0
- truthound/lineage/__init__.py +89 -0
- truthound/lineage/base.py +746 -0
- truthound/lineage/impact_analysis.py +474 -0
- truthound/lineage/integrations/__init__.py +22 -0
- truthound/lineage/integrations/openlineage.py +548 -0
- truthound/lineage/tracker.py +512 -0
- truthound/lineage/visualization/__init__.py +33 -0
- truthound/lineage/visualization/protocols.py +145 -0
- truthound/lineage/visualization/renderers/__init__.py +20 -0
- truthound/lineage/visualization/renderers/cytoscape.py +329 -0
- truthound/lineage/visualization/renderers/d3.py +331 -0
- truthound/lineage/visualization/renderers/graphviz.py +276 -0
- truthound/lineage/visualization/renderers/mermaid.py +308 -0
- truthound/maskers.py +113 -0
- truthound/ml/__init__.py +124 -0
- truthound/ml/anomaly_models/__init__.py +31 -0
- truthound/ml/anomaly_models/ensemble.py +362 -0
- truthound/ml/anomaly_models/isolation_forest.py +444 -0
- truthound/ml/anomaly_models/statistical.py +392 -0
- truthound/ml/base.py +1178 -0
- truthound/ml/drift_detection/__init__.py +26 -0
- truthound/ml/drift_detection/concept.py +381 -0
- truthound/ml/drift_detection/distribution.py +361 -0
- truthound/ml/drift_detection/feature.py +442 -0
- truthound/ml/drift_detection/multivariate.py +495 -0
- truthound/ml/monitoring/__init__.py +88 -0
- truthound/ml/monitoring/alerting/__init__.py +33 -0
- truthound/ml/monitoring/alerting/handlers.py +427 -0
- truthound/ml/monitoring/alerting/rules.py +508 -0
- truthound/ml/monitoring/collectors/__init__.py +19 -0
- truthound/ml/monitoring/collectors/composite.py +105 -0
- truthound/ml/monitoring/collectors/drift.py +324 -0
- truthound/ml/monitoring/collectors/performance.py +179 -0
- truthound/ml/monitoring/collectors/quality.py +369 -0
- truthound/ml/monitoring/monitor.py +536 -0
- truthound/ml/monitoring/protocols.py +451 -0
- truthound/ml/monitoring/stores/__init__.py +15 -0
- truthound/ml/monitoring/stores/memory.py +201 -0
- truthound/ml/monitoring/stores/prometheus.py +296 -0
- truthound/ml/rule_learning/__init__.py +25 -0
- truthound/ml/rule_learning/constraint_miner.py +443 -0
- truthound/ml/rule_learning/pattern_learner.py +499 -0
- truthound/ml/rule_learning/profile_learner.py +462 -0
- truthound/multitenancy/__init__.py +326 -0
- truthound/multitenancy/core.py +852 -0
- truthound/multitenancy/integration.py +597 -0
- truthound/multitenancy/isolation.py +630 -0
- truthound/multitenancy/manager.py +770 -0
- truthound/multitenancy/middleware.py +765 -0
- truthound/multitenancy/quota.py +537 -0
- truthound/multitenancy/resolvers.py +603 -0
- truthound/multitenancy/storage.py +703 -0
- truthound/observability/__init__.py +307 -0
- truthound/observability/context.py +531 -0
- truthound/observability/instrumentation.py +611 -0
- truthound/observability/logging.py +887 -0
- truthound/observability/metrics.py +1157 -0
- truthound/observability/tracing/__init__.py +178 -0
- truthound/observability/tracing/baggage.py +310 -0
- truthound/observability/tracing/config.py +426 -0
- truthound/observability/tracing/exporter.py +787 -0
- truthound/observability/tracing/integration.py +1018 -0
- truthound/observability/tracing/otel/__init__.py +146 -0
- truthound/observability/tracing/otel/adapter.py +982 -0
- truthound/observability/tracing/otel/bridge.py +1177 -0
- truthound/observability/tracing/otel/compat.py +681 -0
- truthound/observability/tracing/otel/config.py +691 -0
- truthound/observability/tracing/otel/detection.py +327 -0
- truthound/observability/tracing/otel/protocols.py +426 -0
- truthound/observability/tracing/processor.py +561 -0
- truthound/observability/tracing/propagator.py +757 -0
- truthound/observability/tracing/provider.py +569 -0
- truthound/observability/tracing/resource.py +515 -0
- truthound/observability/tracing/sampler.py +487 -0
- truthound/observability/tracing/span.py +676 -0
- truthound/plugins/__init__.py +198 -0
- truthound/plugins/base.py +599 -0
- truthound/plugins/cli.py +680 -0
- truthound/plugins/dependencies/__init__.py +42 -0
- truthound/plugins/dependencies/graph.py +422 -0
- truthound/plugins/dependencies/resolver.py +417 -0
- truthound/plugins/discovery.py +379 -0
- truthound/plugins/docs/__init__.py +46 -0
- truthound/plugins/docs/extractor.py +444 -0
- truthound/plugins/docs/renderer.py +499 -0
- truthound/plugins/enterprise_manager.py +877 -0
- truthound/plugins/examples/__init__.py +19 -0
- truthound/plugins/examples/custom_validators.py +317 -0
- truthound/plugins/examples/slack_notifier.py +312 -0
- truthound/plugins/examples/xml_reporter.py +254 -0
- truthound/plugins/hooks.py +558 -0
- truthound/plugins/lifecycle/__init__.py +43 -0
- truthound/plugins/lifecycle/hot_reload.py +402 -0
- truthound/plugins/lifecycle/manager.py +371 -0
- truthound/plugins/manager.py +736 -0
- truthound/plugins/registry.py +338 -0
- truthound/plugins/security/__init__.py +93 -0
- truthound/plugins/security/exceptions.py +332 -0
- truthound/plugins/security/policies.py +348 -0
- truthound/plugins/security/protocols.py +643 -0
- truthound/plugins/security/sandbox/__init__.py +45 -0
- truthound/plugins/security/sandbox/context.py +158 -0
- truthound/plugins/security/sandbox/engines/__init__.py +19 -0
- truthound/plugins/security/sandbox/engines/container.py +379 -0
- truthound/plugins/security/sandbox/engines/noop.py +144 -0
- truthound/plugins/security/sandbox/engines/process.py +336 -0
- truthound/plugins/security/sandbox/factory.py +211 -0
- truthound/plugins/security/signing/__init__.py +57 -0
- truthound/plugins/security/signing/service.py +330 -0
- truthound/plugins/security/signing/trust_store.py +368 -0
- truthound/plugins/security/signing/verifier.py +459 -0
- truthound/plugins/versioning/__init__.py +41 -0
- truthound/plugins/versioning/constraints.py +297 -0
- truthound/plugins/versioning/resolver.py +329 -0
- truthound/profiler/__init__.py +1729 -0
- truthound/profiler/_lazy.py +452 -0
- truthound/profiler/ab_testing/__init__.py +80 -0
- truthound/profiler/ab_testing/analysis.py +449 -0
- truthound/profiler/ab_testing/base.py +257 -0
- truthound/profiler/ab_testing/experiment.py +395 -0
- truthound/profiler/ab_testing/tracking.py +368 -0
- truthound/profiler/auto_threshold.py +1170 -0
- truthound/profiler/base.py +579 -0
- truthound/profiler/cache_patterns.py +911 -0
- truthound/profiler/caching.py +1303 -0
- truthound/profiler/column_profiler.py +712 -0
- truthound/profiler/comparison.py +1007 -0
- truthound/profiler/custom_patterns.py +1170 -0
- truthound/profiler/dashboard/__init__.py +50 -0
- truthound/profiler/dashboard/app.py +476 -0
- truthound/profiler/dashboard/components.py +457 -0
- truthound/profiler/dashboard/config.py +72 -0
- truthound/profiler/distributed/__init__.py +83 -0
- truthound/profiler/distributed/base.py +281 -0
- truthound/profiler/distributed/dask_backend.py +498 -0
- truthound/profiler/distributed/local_backend.py +293 -0
- truthound/profiler/distributed/profiler.py +304 -0
- truthound/profiler/distributed/ray_backend.py +374 -0
- truthound/profiler/distributed/spark_backend.py +375 -0
- truthound/profiler/distributed.py +1366 -0
- truthound/profiler/enterprise_sampling.py +1065 -0
- truthound/profiler/errors.py +488 -0
- truthound/profiler/evolution/__init__.py +91 -0
- truthound/profiler/evolution/alerts.py +426 -0
- truthound/profiler/evolution/changes.py +206 -0
- truthound/profiler/evolution/compatibility.py +365 -0
- truthound/profiler/evolution/detector.py +372 -0
- truthound/profiler/evolution/protocols.py +121 -0
- truthound/profiler/generators/__init__.py +48 -0
- truthound/profiler/generators/base.py +384 -0
- truthound/profiler/generators/ml_rules.py +375 -0
- truthound/profiler/generators/pattern_rules.py +384 -0
- truthound/profiler/generators/schema_rules.py +267 -0
- truthound/profiler/generators/stats_rules.py +324 -0
- truthound/profiler/generators/suite_generator.py +857 -0
- truthound/profiler/i18n.py +1542 -0
- truthound/profiler/incremental.py +554 -0
- truthound/profiler/incremental_validation.py +1710 -0
- truthound/profiler/integration/__init__.py +73 -0
- truthound/profiler/integration/adapters.py +345 -0
- truthound/profiler/integration/context.py +371 -0
- truthound/profiler/integration/executor.py +527 -0
- truthound/profiler/integration/naming.py +75 -0
- truthound/profiler/integration/protocols.py +243 -0
- truthound/profiler/memory.py +1185 -0
- truthound/profiler/migration/__init__.py +60 -0
- truthound/profiler/migration/base.py +345 -0
- truthound/profiler/migration/manager.py +444 -0
- truthound/profiler/migration/v1_0_to_v1_1.py +484 -0
- truthound/profiler/ml/__init__.py +73 -0
- truthound/profiler/ml/base.py +244 -0
- truthound/profiler/ml/classifier.py +507 -0
- truthound/profiler/ml/feature_extraction.py +604 -0
- truthound/profiler/ml/pretrained.py +448 -0
- truthound/profiler/ml_inference.py +1276 -0
- truthound/profiler/native_patterns.py +815 -0
- truthound/profiler/observability.py +1184 -0
- truthound/profiler/process_timeout.py +1566 -0
- truthound/profiler/progress.py +568 -0
- truthound/profiler/progress_callbacks.py +1734 -0
- truthound/profiler/quality.py +1345 -0
- truthound/profiler/resilience.py +1180 -0
- truthound/profiler/sampled_matcher.py +794 -0
- truthound/profiler/sampling.py +1288 -0
- truthound/profiler/scheduling/__init__.py +82 -0
- truthound/profiler/scheduling/protocols.py +214 -0
- truthound/profiler/scheduling/scheduler.py +474 -0
- truthound/profiler/scheduling/storage.py +457 -0
- truthound/profiler/scheduling/triggers.py +449 -0
- truthound/profiler/schema.py +603 -0
- truthound/profiler/streaming.py +685 -0
- truthound/profiler/streaming_patterns.py +1354 -0
- truthound/profiler/suite_cli.py +625 -0
- truthound/profiler/suite_config.py +789 -0
- truthound/profiler/suite_export.py +1268 -0
- truthound/profiler/table_profiler.py +547 -0
- truthound/profiler/timeout.py +565 -0
- truthound/profiler/validation.py +1532 -0
- truthound/profiler/visualization/__init__.py +118 -0
- truthound/profiler/visualization/base.py +346 -0
- truthound/profiler/visualization/generator.py +1259 -0
- truthound/profiler/visualization/plotly_renderer.py +811 -0
- truthound/profiler/visualization/renderers.py +669 -0
- truthound/profiler/visualization/sections.py +540 -0
- truthound/profiler/visualization.py +2122 -0
- truthound/profiler/yaml_validation.py +1151 -0
- truthound/py.typed +0 -0
- truthound/ratelimit/__init__.py +248 -0
- truthound/ratelimit/algorithms.py +1108 -0
- truthound/ratelimit/core.py +573 -0
- truthound/ratelimit/integration.py +532 -0
- truthound/ratelimit/limiter.py +663 -0
- truthound/ratelimit/middleware.py +700 -0
- truthound/ratelimit/policy.py +792 -0
- truthound/ratelimit/storage.py +763 -0
- truthound/rbac/__init__.py +340 -0
- truthound/rbac/core.py +976 -0
- truthound/rbac/integration.py +760 -0
- truthound/rbac/manager.py +1052 -0
- truthound/rbac/middleware.py +842 -0
- truthound/rbac/policy.py +954 -0
- truthound/rbac/storage.py +878 -0
- truthound/realtime/__init__.py +141 -0
- truthound/realtime/adapters/__init__.py +43 -0
- truthound/realtime/adapters/base.py +533 -0
- truthound/realtime/adapters/kafka.py +487 -0
- truthound/realtime/adapters/kinesis.py +479 -0
- truthound/realtime/adapters/mock.py +243 -0
- truthound/realtime/base.py +553 -0
- truthound/realtime/factory.py +382 -0
- truthound/realtime/incremental.py +660 -0
- truthound/realtime/processing/__init__.py +67 -0
- truthound/realtime/processing/exactly_once.py +575 -0
- truthound/realtime/processing/state.py +547 -0
- truthound/realtime/processing/windows.py +647 -0
- truthound/realtime/protocols.py +569 -0
- truthound/realtime/streaming.py +605 -0
- truthound/realtime/testing/__init__.py +32 -0
- truthound/realtime/testing/containers.py +615 -0
- truthound/realtime/testing/fixtures.py +484 -0
- truthound/report.py +280 -0
- truthound/reporters/__init__.py +46 -0
- truthound/reporters/_protocols.py +30 -0
- truthound/reporters/base.py +324 -0
- truthound/reporters/ci/__init__.py +66 -0
- truthound/reporters/ci/azure.py +436 -0
- truthound/reporters/ci/base.py +509 -0
- truthound/reporters/ci/bitbucket.py +567 -0
- truthound/reporters/ci/circleci.py +547 -0
- truthound/reporters/ci/detection.py +364 -0
- truthound/reporters/ci/factory.py +182 -0
- truthound/reporters/ci/github.py +388 -0
- truthound/reporters/ci/gitlab.py +471 -0
- truthound/reporters/ci/jenkins.py +525 -0
- truthound/reporters/console_reporter.py +299 -0
- truthound/reporters/factory.py +211 -0
- truthound/reporters/html_reporter.py +524 -0
- truthound/reporters/json_reporter.py +256 -0
- truthound/reporters/markdown_reporter.py +280 -0
- truthound/reporters/sdk/__init__.py +174 -0
- truthound/reporters/sdk/builder.py +558 -0
- truthound/reporters/sdk/mixins.py +1150 -0
- truthound/reporters/sdk/schema.py +1493 -0
- truthound/reporters/sdk/templates.py +666 -0
- truthound/reporters/sdk/testing.py +968 -0
- truthound/scanners.py +170 -0
- truthound/scheduling/__init__.py +122 -0
- truthound/scheduling/cron.py +1136 -0
- truthound/scheduling/presets.py +212 -0
- truthound/schema.py +275 -0
- truthound/secrets/__init__.py +173 -0
- truthound/secrets/base.py +618 -0
- truthound/secrets/cloud.py +682 -0
- truthound/secrets/integration.py +507 -0
- truthound/secrets/manager.py +633 -0
- truthound/secrets/oidc/__init__.py +172 -0
- truthound/secrets/oidc/base.py +902 -0
- truthound/secrets/oidc/credential_provider.py +623 -0
- truthound/secrets/oidc/exchangers.py +1001 -0
- truthound/secrets/oidc/github/__init__.py +110 -0
- truthound/secrets/oidc/github/claims.py +718 -0
- truthound/secrets/oidc/github/enhanced_provider.py +693 -0
- truthound/secrets/oidc/github/trust_policy.py +742 -0
- truthound/secrets/oidc/github/verification.py +723 -0
- truthound/secrets/oidc/github/workflow.py +691 -0
- truthound/secrets/oidc/providers.py +825 -0
- truthound/secrets/providers.py +506 -0
- truthound/secrets/resolver.py +495 -0
- truthound/stores/__init__.py +177 -0
- truthound/stores/backends/__init__.py +18 -0
- truthound/stores/backends/_protocols.py +340 -0
- truthound/stores/backends/azure_blob.py +530 -0
- truthound/stores/backends/concurrent_filesystem.py +915 -0
- truthound/stores/backends/connection_pool.py +1365 -0
- truthound/stores/backends/database.py +743 -0
- truthound/stores/backends/filesystem.py +538 -0
- truthound/stores/backends/gcs.py +399 -0
- truthound/stores/backends/memory.py +354 -0
- truthound/stores/backends/s3.py +434 -0
- truthound/stores/backpressure/__init__.py +84 -0
- truthound/stores/backpressure/base.py +375 -0
- truthound/stores/backpressure/circuit_breaker.py +434 -0
- truthound/stores/backpressure/monitor.py +376 -0
- truthound/stores/backpressure/strategies.py +677 -0
- truthound/stores/base.py +551 -0
- truthound/stores/batching/__init__.py +65 -0
- truthound/stores/batching/base.py +305 -0
- truthound/stores/batching/buffer.py +370 -0
- truthound/stores/batching/store.py +248 -0
- truthound/stores/batching/writer.py +521 -0
- truthound/stores/caching/__init__.py +60 -0
- truthound/stores/caching/backends.py +684 -0
- truthound/stores/caching/base.py +356 -0
- truthound/stores/caching/store.py +305 -0
- truthound/stores/compression/__init__.py +193 -0
- truthound/stores/compression/adaptive.py +694 -0
- truthound/stores/compression/base.py +514 -0
- truthound/stores/compression/pipeline.py +868 -0
- truthound/stores/compression/providers.py +672 -0
- truthound/stores/compression/streaming.py +832 -0
- truthound/stores/concurrency/__init__.py +81 -0
- truthound/stores/concurrency/atomic.py +556 -0
- truthound/stores/concurrency/index.py +775 -0
- truthound/stores/concurrency/locks.py +576 -0
- truthound/stores/concurrency/manager.py +482 -0
- truthound/stores/encryption/__init__.py +297 -0
- truthound/stores/encryption/base.py +952 -0
- truthound/stores/encryption/keys.py +1191 -0
- truthound/stores/encryption/pipeline.py +903 -0
- truthound/stores/encryption/providers.py +953 -0
- truthound/stores/encryption/streaming.py +950 -0
- truthound/stores/expectations.py +227 -0
- truthound/stores/factory.py +246 -0
- truthound/stores/migration/__init__.py +75 -0
- truthound/stores/migration/base.py +480 -0
- truthound/stores/migration/manager.py +347 -0
- truthound/stores/migration/registry.py +382 -0
- truthound/stores/migration/store.py +559 -0
- truthound/stores/observability/__init__.py +106 -0
- truthound/stores/observability/audit.py +718 -0
- truthound/stores/observability/config.py +270 -0
- truthound/stores/observability/factory.py +208 -0
- truthound/stores/observability/metrics.py +636 -0
- truthound/stores/observability/protocols.py +410 -0
- truthound/stores/observability/store.py +570 -0
- truthound/stores/observability/tracing.py +784 -0
- truthound/stores/replication/__init__.py +76 -0
- truthound/stores/replication/base.py +260 -0
- truthound/stores/replication/monitor.py +269 -0
- truthound/stores/replication/store.py +439 -0
- truthound/stores/replication/syncer.py +391 -0
- truthound/stores/results.py +359 -0
- truthound/stores/retention/__init__.py +77 -0
- truthound/stores/retention/base.py +378 -0
- truthound/stores/retention/policies.py +621 -0
- truthound/stores/retention/scheduler.py +279 -0
- truthound/stores/retention/store.py +526 -0
- truthound/stores/streaming/__init__.py +138 -0
- truthound/stores/streaming/base.py +801 -0
- truthound/stores/streaming/database.py +984 -0
- truthound/stores/streaming/filesystem.py +719 -0
- truthound/stores/streaming/reader.py +629 -0
- truthound/stores/streaming/s3.py +843 -0
- truthound/stores/streaming/writer.py +790 -0
- truthound/stores/tiering/__init__.py +108 -0
- truthound/stores/tiering/base.py +462 -0
- truthound/stores/tiering/manager.py +249 -0
- truthound/stores/tiering/policies.py +692 -0
- truthound/stores/tiering/store.py +526 -0
- truthound/stores/versioning/__init__.py +56 -0
- truthound/stores/versioning/base.py +376 -0
- truthound/stores/versioning/store.py +660 -0
- truthound/stores/versioning/strategies.py +353 -0
- truthound/types.py +56 -0
- truthound/validators/__init__.py +774 -0
- truthound/validators/aggregate/__init__.py +27 -0
- truthound/validators/aggregate/central.py +116 -0
- truthound/validators/aggregate/extremes.py +116 -0
- truthound/validators/aggregate/spread.py +118 -0
- truthound/validators/aggregate/sum.py +64 -0
- truthound/validators/aggregate/type.py +78 -0
- truthound/validators/anomaly/__init__.py +93 -0
- truthound/validators/anomaly/base.py +431 -0
- truthound/validators/anomaly/ml_based.py +1190 -0
- truthound/validators/anomaly/multivariate.py +647 -0
- truthound/validators/anomaly/statistical.py +599 -0
- truthound/validators/base.py +1089 -0
- truthound/validators/business_rule/__init__.py +46 -0
- truthound/validators/business_rule/base.py +147 -0
- truthound/validators/business_rule/checksum.py +509 -0
- truthound/validators/business_rule/financial.py +526 -0
- truthound/validators/cache.py +733 -0
- truthound/validators/completeness/__init__.py +39 -0
- truthound/validators/completeness/conditional.py +73 -0
- truthound/validators/completeness/default.py +98 -0
- truthound/validators/completeness/empty.py +103 -0
- truthound/validators/completeness/nan.py +337 -0
- truthound/validators/completeness/null.py +152 -0
- truthound/validators/cross_table/__init__.py +17 -0
- truthound/validators/cross_table/aggregate.py +333 -0
- truthound/validators/cross_table/row_count.py +122 -0
- truthound/validators/datetime/__init__.py +29 -0
- truthound/validators/datetime/format.py +78 -0
- truthound/validators/datetime/freshness.py +269 -0
- truthound/validators/datetime/order.py +73 -0
- truthound/validators/datetime/parseable.py +185 -0
- truthound/validators/datetime/range.py +202 -0
- truthound/validators/datetime/timezone.py +69 -0
- truthound/validators/distribution/__init__.py +49 -0
- truthound/validators/distribution/distribution.py +128 -0
- truthound/validators/distribution/monotonic.py +119 -0
- truthound/validators/distribution/outlier.py +178 -0
- truthound/validators/distribution/quantile.py +80 -0
- truthound/validators/distribution/range.py +254 -0
- truthound/validators/distribution/set.py +125 -0
- truthound/validators/distribution/statistical.py +459 -0
- truthound/validators/drift/__init__.py +79 -0
- truthound/validators/drift/base.py +427 -0
- truthound/validators/drift/multi_feature.py +401 -0
- truthound/validators/drift/numeric.py +395 -0
- truthound/validators/drift/psi.py +446 -0
- truthound/validators/drift/statistical.py +510 -0
- truthound/validators/enterprise.py +1658 -0
- truthound/validators/geospatial/__init__.py +80 -0
- truthound/validators/geospatial/base.py +97 -0
- truthound/validators/geospatial/boundary.py +238 -0
- truthound/validators/geospatial/coordinate.py +351 -0
- truthound/validators/geospatial/distance.py +399 -0
- truthound/validators/geospatial/polygon.py +665 -0
- truthound/validators/i18n/__init__.py +308 -0
- truthound/validators/i18n/bidi.py +571 -0
- truthound/validators/i18n/catalogs.py +570 -0
- truthound/validators/i18n/dialects.py +763 -0
- truthound/validators/i18n/extended_catalogs.py +549 -0
- truthound/validators/i18n/formatting.py +1434 -0
- truthound/validators/i18n/loader.py +1020 -0
- truthound/validators/i18n/messages.py +521 -0
- truthound/validators/i18n/plural.py +683 -0
- truthound/validators/i18n/protocols.py +855 -0
- truthound/validators/i18n/tms.py +1162 -0
- truthound/validators/localization/__init__.py +53 -0
- truthound/validators/localization/base.py +122 -0
- truthound/validators/localization/chinese.py +362 -0
- truthound/validators/localization/japanese.py +275 -0
- truthound/validators/localization/korean.py +524 -0
- truthound/validators/memory/__init__.py +94 -0
- truthound/validators/memory/approximate_knn.py +506 -0
- truthound/validators/memory/base.py +547 -0
- truthound/validators/memory/sgd_online.py +719 -0
- truthound/validators/memory/streaming_ecdf.py +753 -0
- truthound/validators/ml_feature/__init__.py +54 -0
- truthound/validators/ml_feature/base.py +249 -0
- truthound/validators/ml_feature/correlation.py +299 -0
- truthound/validators/ml_feature/leakage.py +344 -0
- truthound/validators/ml_feature/null_impact.py +270 -0
- truthound/validators/ml_feature/scale.py +264 -0
- truthound/validators/multi_column/__init__.py +89 -0
- truthound/validators/multi_column/arithmetic.py +284 -0
- truthound/validators/multi_column/base.py +231 -0
- truthound/validators/multi_column/comparison.py +273 -0
- truthound/validators/multi_column/consistency.py +312 -0
- truthound/validators/multi_column/statistical.py +299 -0
- truthound/validators/optimization/__init__.py +164 -0
- truthound/validators/optimization/aggregation.py +563 -0
- truthound/validators/optimization/covariance.py +556 -0
- truthound/validators/optimization/geo.py +626 -0
- truthound/validators/optimization/graph.py +587 -0
- truthound/validators/optimization/orchestrator.py +970 -0
- truthound/validators/optimization/profiling.py +1312 -0
- truthound/validators/privacy/__init__.py +223 -0
- truthound/validators/privacy/base.py +635 -0
- truthound/validators/privacy/ccpa.py +670 -0
- truthound/validators/privacy/gdpr.py +728 -0
- truthound/validators/privacy/global_patterns.py +604 -0
- truthound/validators/privacy/plugins.py +867 -0
- truthound/validators/profiling/__init__.py +52 -0
- truthound/validators/profiling/base.py +175 -0
- truthound/validators/profiling/cardinality.py +312 -0
- truthound/validators/profiling/entropy.py +391 -0
- truthound/validators/profiling/frequency.py +455 -0
- truthound/validators/pushdown_support.py +660 -0
- truthound/validators/query/__init__.py +91 -0
- truthound/validators/query/aggregate.py +346 -0
- truthound/validators/query/base.py +246 -0
- truthound/validators/query/column.py +249 -0
- truthound/validators/query/expression.py +274 -0
- truthound/validators/query/result.py +323 -0
- truthound/validators/query/row_count.py +264 -0
- truthound/validators/referential/__init__.py +80 -0
- truthound/validators/referential/base.py +395 -0
- truthound/validators/referential/cascade.py +391 -0
- truthound/validators/referential/circular.py +563 -0
- truthound/validators/referential/foreign_key.py +624 -0
- truthound/validators/referential/orphan.py +485 -0
- truthound/validators/registry.py +112 -0
- truthound/validators/schema/__init__.py +41 -0
- truthound/validators/schema/column_count.py +142 -0
- truthound/validators/schema/column_exists.py +80 -0
- truthound/validators/schema/column_order.py +82 -0
- truthound/validators/schema/column_pair.py +85 -0
- truthound/validators/schema/column_pair_set.py +195 -0
- truthound/validators/schema/column_type.py +94 -0
- truthound/validators/schema/multi_column.py +53 -0
- truthound/validators/schema/multi_column_aggregate.py +175 -0
- truthound/validators/schema/referential.py +274 -0
- truthound/validators/schema/table_schema.py +91 -0
- truthound/validators/schema_validator.py +219 -0
- truthound/validators/sdk/__init__.py +250 -0
- truthound/validators/sdk/builder.py +680 -0
- truthound/validators/sdk/decorators.py +474 -0
- truthound/validators/sdk/enterprise/__init__.py +211 -0
- truthound/validators/sdk/enterprise/docs.py +725 -0
- truthound/validators/sdk/enterprise/fuzzing.py +659 -0
- truthound/validators/sdk/enterprise/licensing.py +709 -0
- truthound/validators/sdk/enterprise/manager.py +543 -0
- truthound/validators/sdk/enterprise/resources.py +628 -0
- truthound/validators/sdk/enterprise/sandbox.py +766 -0
- truthound/validators/sdk/enterprise/signing.py +603 -0
- truthound/validators/sdk/enterprise/templates.py +865 -0
- truthound/validators/sdk/enterprise/versioning.py +659 -0
- truthound/validators/sdk/templates.py +757 -0
- truthound/validators/sdk/testing.py +807 -0
- truthound/validators/security/__init__.py +181 -0
- truthound/validators/security/redos/__init__.py +182 -0
- truthound/validators/security/redos/core.py +861 -0
- truthound/validators/security/redos/cpu_monitor.py +593 -0
- truthound/validators/security/redos/cve_database.py +791 -0
- truthound/validators/security/redos/ml/__init__.py +155 -0
- truthound/validators/security/redos/ml/base.py +785 -0
- truthound/validators/security/redos/ml/datasets.py +618 -0
- truthound/validators/security/redos/ml/features.py +359 -0
- truthound/validators/security/redos/ml/models.py +1000 -0
- truthound/validators/security/redos/ml/predictor.py +507 -0
- truthound/validators/security/redos/ml/storage.py +632 -0
- truthound/validators/security/redos/ml/training.py +571 -0
- truthound/validators/security/redos/ml_analyzer.py +937 -0
- truthound/validators/security/redos/optimizer.py +674 -0
- truthound/validators/security/redos/profiler.py +682 -0
- truthound/validators/security/redos/re2_engine.py +709 -0
- truthound/validators/security/redos.py +886 -0
- truthound/validators/security/sql_security.py +1247 -0
- truthound/validators/streaming/__init__.py +126 -0
- truthound/validators/streaming/base.py +292 -0
- truthound/validators/streaming/completeness.py +210 -0
- truthound/validators/streaming/mixin.py +575 -0
- truthound/validators/streaming/range.py +308 -0
- truthound/validators/streaming/sources.py +846 -0
- truthound/validators/string/__init__.py +57 -0
- truthound/validators/string/casing.py +158 -0
- truthound/validators/string/charset.py +96 -0
- truthound/validators/string/format.py +501 -0
- truthound/validators/string/json.py +77 -0
- truthound/validators/string/json_schema.py +184 -0
- truthound/validators/string/length.py +104 -0
- truthound/validators/string/like_pattern.py +237 -0
- truthound/validators/string/regex.py +202 -0
- truthound/validators/string/regex_extended.py +435 -0
- truthound/validators/table/__init__.py +88 -0
- truthound/validators/table/base.py +78 -0
- truthound/validators/table/column_count.py +198 -0
- truthound/validators/table/freshness.py +362 -0
- truthound/validators/table/row_count.py +251 -0
- truthound/validators/table/schema.py +333 -0
- truthound/validators/table/size.py +285 -0
- truthound/validators/timeout/__init__.py +102 -0
- truthound/validators/timeout/advanced/__init__.py +247 -0
- truthound/validators/timeout/advanced/circuit_breaker.py +675 -0
- truthound/validators/timeout/advanced/prediction.py +773 -0
- truthound/validators/timeout/advanced/priority.py +618 -0
- truthound/validators/timeout/advanced/redis_backend.py +770 -0
- truthound/validators/timeout/advanced/retry.py +721 -0
- truthound/validators/timeout/advanced/sampling.py +788 -0
- truthound/validators/timeout/advanced/sla.py +661 -0
- truthound/validators/timeout/advanced/telemetry.py +804 -0
- truthound/validators/timeout/cascade.py +477 -0
- truthound/validators/timeout/deadline.py +657 -0
- truthound/validators/timeout/degradation.py +525 -0
- truthound/validators/timeout/distributed.py +597 -0
- truthound/validators/timeseries/__init__.py +89 -0
- truthound/validators/timeseries/base.py +326 -0
- truthound/validators/timeseries/completeness.py +617 -0
- truthound/validators/timeseries/gap.py +485 -0
- truthound/validators/timeseries/monotonic.py +310 -0
- truthound/validators/timeseries/seasonality.py +422 -0
- truthound/validators/timeseries/trend.py +510 -0
- truthound/validators/uniqueness/__init__.py +59 -0
- truthound/validators/uniqueness/approximate.py +475 -0
- truthound/validators/uniqueness/distinct_values.py +253 -0
- truthound/validators/uniqueness/duplicate.py +118 -0
- truthound/validators/uniqueness/primary_key.py +140 -0
- truthound/validators/uniqueness/unique.py +191 -0
- truthound/validators/uniqueness/within_record.py +599 -0
- truthound/validators/utils.py +756 -0
- truthound-1.0.8.dist-info/METADATA +474 -0
- truthound-1.0.8.dist-info/RECORD +877 -0
- truthound-1.0.8.dist-info/WHEEL +4 -0
- truthound-1.0.8.dist-info/entry_points.txt +2 -0
- truthound-1.0.8.dist-info/licenses/LICENSE +190 -0
|
@@ -0,0 +1,1170 @@
|
|
|
1
|
+
"""Custom pattern configuration via YAML files.
|
|
2
|
+
|
|
3
|
+
This module provides a flexible system for defining custom validation patterns
|
|
4
|
+
through YAML configuration files, eliminating the need for code changes.
|
|
5
|
+
|
|
6
|
+
Key features:
|
|
7
|
+
- YAML-based pattern definitions
|
|
8
|
+
- Hierarchical pattern organization
|
|
9
|
+
- Pattern inheritance and composition
|
|
10
|
+
- Hot-reload support for development
|
|
11
|
+
- Pattern validation and testing
|
|
12
|
+
|
|
13
|
+
Example YAML configuration:
|
|
14
|
+
patterns:
|
|
15
|
+
korean_phone:
|
|
16
|
+
name: Korean Phone Number
|
|
17
|
+
regex: "^01[0-9]-[0-9]{3,4}-[0-9]{4}$"
|
|
18
|
+
priority: 90
|
|
19
|
+
data_type: korean_phone
|
|
20
|
+
examples:
|
|
21
|
+
- "010-1234-5678"
|
|
22
|
+
- "011-123-4567"
|
|
23
|
+
|
|
24
|
+
email:
|
|
25
|
+
name: Email Address
|
|
26
|
+
regex: "^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}$"
|
|
27
|
+
priority: 85
|
|
28
|
+
data_type: email
|
|
29
|
+
|
|
30
|
+
Example usage:
|
|
31
|
+
from truthound.profiler.custom_patterns import (
|
|
32
|
+
PatternConfig,
|
|
33
|
+
load_patterns,
|
|
34
|
+
PatternConfigLoader,
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
# Load patterns from YAML
|
|
38
|
+
patterns = load_patterns("patterns.yaml")
|
|
39
|
+
|
|
40
|
+
# Or use the loader for more control
|
|
41
|
+
loader = PatternConfigLoader()
|
|
42
|
+
loader.load_file("patterns.yaml")
|
|
43
|
+
loader.load_directory("patterns/")
|
|
44
|
+
|
|
45
|
+
# Get all patterns
|
|
46
|
+
all_patterns = loader.get_all_patterns()
|
|
47
|
+
"""
|
|
48
|
+
|
|
49
|
+
from __future__ import annotations
|
|
50
|
+
|
|
51
|
+
import os
|
|
52
|
+
import re
|
|
53
|
+
import threading
|
|
54
|
+
import time
|
|
55
|
+
from dataclasses import dataclass, field
|
|
56
|
+
from datetime import datetime
|
|
57
|
+
from enum import Enum
|
|
58
|
+
from pathlib import Path
|
|
59
|
+
from typing import Any, Callable
|
|
60
|
+
|
|
61
|
+
from truthound.profiler.base import DataType
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
# =============================================================================
|
|
65
|
+
# Pattern Configuration Types
|
|
66
|
+
# =============================================================================
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
class PatternPriority(int, Enum):
|
|
70
|
+
"""Priority levels for pattern matching."""
|
|
71
|
+
|
|
72
|
+
HIGHEST = 100
|
|
73
|
+
HIGH = 90
|
|
74
|
+
MEDIUM = 50
|
|
75
|
+
LOW = 25
|
|
76
|
+
LOWEST = 10
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
@dataclass
|
|
80
|
+
class PatternExample:
|
|
81
|
+
"""Example value for pattern testing."""
|
|
82
|
+
|
|
83
|
+
value: str
|
|
84
|
+
should_match: bool = True
|
|
85
|
+
description: str = ""
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
@dataclass
|
|
89
|
+
class PatternConfig:
|
|
90
|
+
"""Configuration for a single pattern.
|
|
91
|
+
|
|
92
|
+
Attributes:
|
|
93
|
+
name: Human-readable pattern name
|
|
94
|
+
pattern_id: Unique identifier for the pattern
|
|
95
|
+
regex: Regular expression pattern
|
|
96
|
+
priority: Matching priority (higher = checked first)
|
|
97
|
+
data_type: Inferred data type when pattern matches
|
|
98
|
+
min_match_ratio: Minimum ratio of values that must match
|
|
99
|
+
description: Pattern description
|
|
100
|
+
examples: Example values for testing
|
|
101
|
+
tags: Tags for categorization
|
|
102
|
+
enabled: Whether pattern is active
|
|
103
|
+
case_sensitive: Whether regex is case-sensitive
|
|
104
|
+
multiline: Whether regex uses multiline mode
|
|
105
|
+
validator_fn: Optional custom validator function name
|
|
106
|
+
metadata: Additional metadata
|
|
107
|
+
"""
|
|
108
|
+
|
|
109
|
+
name: str
|
|
110
|
+
pattern_id: str
|
|
111
|
+
regex: str
|
|
112
|
+
priority: int = PatternPriority.MEDIUM
|
|
113
|
+
data_type: str = "string"
|
|
114
|
+
min_match_ratio: float = 0.8
|
|
115
|
+
description: str = ""
|
|
116
|
+
examples: list[PatternExample] = field(default_factory=list)
|
|
117
|
+
tags: list[str] = field(default_factory=list)
|
|
118
|
+
enabled: bool = True
|
|
119
|
+
case_sensitive: bool = True
|
|
120
|
+
multiline: bool = False
|
|
121
|
+
validator_fn: str | None = None
|
|
122
|
+
metadata: dict[str, Any] = field(default_factory=dict)
|
|
123
|
+
|
|
124
|
+
# Internal
|
|
125
|
+
_compiled_regex: re.Pattern | None = field(default=None, repr=False)
|
|
126
|
+
|
|
127
|
+
def __post_init__(self) -> None:
|
|
128
|
+
"""Compile regex pattern."""
|
|
129
|
+
self._compile_regex()
|
|
130
|
+
|
|
131
|
+
def _compile_regex(self) -> None:
|
|
132
|
+
"""Compile the regex pattern."""
|
|
133
|
+
flags = 0
|
|
134
|
+
if not self.case_sensitive:
|
|
135
|
+
flags |= re.IGNORECASE
|
|
136
|
+
if self.multiline:
|
|
137
|
+
flags |= re.MULTILINE
|
|
138
|
+
|
|
139
|
+
try:
|
|
140
|
+
self._compiled_regex = re.compile(self.regex, flags)
|
|
141
|
+
except re.error as e:
|
|
142
|
+
raise ValueError(f"Invalid regex for pattern '{self.name}': {e}")
|
|
143
|
+
|
|
144
|
+
@property
|
|
145
|
+
def compiled_regex(self) -> re.Pattern:
|
|
146
|
+
"""Get compiled regex pattern."""
|
|
147
|
+
if self._compiled_regex is None:
|
|
148
|
+
self._compile_regex()
|
|
149
|
+
return self._compiled_regex # type: ignore
|
|
150
|
+
|
|
151
|
+
def matches(self, value: str) -> bool:
|
|
152
|
+
"""Check if value matches the pattern."""
|
|
153
|
+
if value is None:
|
|
154
|
+
return False
|
|
155
|
+
try:
|
|
156
|
+
return bool(self.compiled_regex.match(str(value)))
|
|
157
|
+
except Exception:
|
|
158
|
+
return False
|
|
159
|
+
|
|
160
|
+
def get_data_type(self) -> DataType:
|
|
161
|
+
"""Get the DataType enum value."""
|
|
162
|
+
try:
|
|
163
|
+
return DataType(self.data_type)
|
|
164
|
+
except ValueError:
|
|
165
|
+
return DataType.STRING
|
|
166
|
+
|
|
167
|
+
def validate_examples(self) -> list[tuple[str, bool, str]]:
|
|
168
|
+
"""Validate all examples against the pattern.
|
|
169
|
+
|
|
170
|
+
Returns:
|
|
171
|
+
List of (value, passed, message) tuples
|
|
172
|
+
"""
|
|
173
|
+
results = []
|
|
174
|
+
for example in self.examples:
|
|
175
|
+
actual = self.matches(example.value)
|
|
176
|
+
passed = actual == example.should_match
|
|
177
|
+
|
|
178
|
+
if passed:
|
|
179
|
+
message = "OK"
|
|
180
|
+
else:
|
|
181
|
+
expected = "match" if example.should_match else "not match"
|
|
182
|
+
got = "matched" if actual else "did not match"
|
|
183
|
+
message = f"Expected {expected}, but {got}"
|
|
184
|
+
|
|
185
|
+
results.append((example.value, passed, message))
|
|
186
|
+
|
|
187
|
+
return results
|
|
188
|
+
|
|
189
|
+
def to_dict(self) -> dict[str, Any]:
|
|
190
|
+
"""Convert to dictionary for serialization."""
|
|
191
|
+
return {
|
|
192
|
+
"name": self.name,
|
|
193
|
+
"pattern_id": self.pattern_id,
|
|
194
|
+
"regex": self.regex,
|
|
195
|
+
"priority": self.priority,
|
|
196
|
+
"data_type": self.data_type,
|
|
197
|
+
"min_match_ratio": self.min_match_ratio,
|
|
198
|
+
"description": self.description,
|
|
199
|
+
"examples": [
|
|
200
|
+
{
|
|
201
|
+
"value": e.value,
|
|
202
|
+
"should_match": e.should_match,
|
|
203
|
+
"description": e.description,
|
|
204
|
+
}
|
|
205
|
+
for e in self.examples
|
|
206
|
+
],
|
|
207
|
+
"tags": self.tags,
|
|
208
|
+
"enabled": self.enabled,
|
|
209
|
+
"case_sensitive": self.case_sensitive,
|
|
210
|
+
"multiline": self.multiline,
|
|
211
|
+
"validator_fn": self.validator_fn,
|
|
212
|
+
"metadata": self.metadata,
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
@classmethod
|
|
216
|
+
def from_dict(cls, data: dict[str, Any], pattern_id: str | None = None) -> "PatternConfig":
|
|
217
|
+
"""Create from dictionary."""
|
|
218
|
+
examples = []
|
|
219
|
+
for ex in data.get("examples", []):
|
|
220
|
+
if isinstance(ex, str):
|
|
221
|
+
examples.append(PatternExample(value=ex))
|
|
222
|
+
elif isinstance(ex, dict):
|
|
223
|
+
examples.append(PatternExample(
|
|
224
|
+
value=ex.get("value", ""),
|
|
225
|
+
should_match=ex.get("should_match", True),
|
|
226
|
+
description=ex.get("description", ""),
|
|
227
|
+
))
|
|
228
|
+
|
|
229
|
+
return cls(
|
|
230
|
+
name=data.get("name", pattern_id or "unnamed"),
|
|
231
|
+
pattern_id=pattern_id or data.get("pattern_id", data.get("name", "unnamed")),
|
|
232
|
+
regex=data["regex"],
|
|
233
|
+
priority=data.get("priority", PatternPriority.MEDIUM),
|
|
234
|
+
data_type=data.get("data_type", "string"),
|
|
235
|
+
min_match_ratio=data.get("min_match_ratio", 0.8),
|
|
236
|
+
description=data.get("description", ""),
|
|
237
|
+
examples=examples,
|
|
238
|
+
tags=data.get("tags", []),
|
|
239
|
+
enabled=data.get("enabled", True),
|
|
240
|
+
case_sensitive=data.get("case_sensitive", True),
|
|
241
|
+
multiline=data.get("multiline", False),
|
|
242
|
+
validator_fn=data.get("validator_fn"),
|
|
243
|
+
metadata=data.get("metadata", {}),
|
|
244
|
+
)
|
|
245
|
+
|
|
246
|
+
|
|
247
|
+
# =============================================================================
|
|
248
|
+
# Pattern Group Configuration
|
|
249
|
+
# =============================================================================
|
|
250
|
+
|
|
251
|
+
|
|
252
|
+
@dataclass
|
|
253
|
+
class PatternGroup:
|
|
254
|
+
"""Group of related patterns.
|
|
255
|
+
|
|
256
|
+
Allows organizing patterns into logical categories.
|
|
257
|
+
"""
|
|
258
|
+
|
|
259
|
+
name: str
|
|
260
|
+
group_id: str
|
|
261
|
+
description: str = ""
|
|
262
|
+
patterns: list[PatternConfig] = field(default_factory=list)
|
|
263
|
+
enabled: bool = True
|
|
264
|
+
priority_boost: int = 0 # Added to all pattern priorities
|
|
265
|
+
tags: list[str] = field(default_factory=list)
|
|
266
|
+
metadata: dict[str, Any] = field(default_factory=dict)
|
|
267
|
+
|
|
268
|
+
def get_patterns(self, include_disabled: bool = False) -> list[PatternConfig]:
|
|
269
|
+
"""Get patterns in this group.
|
|
270
|
+
|
|
271
|
+
Args:
|
|
272
|
+
include_disabled: Whether to include disabled patterns
|
|
273
|
+
|
|
274
|
+
Returns:
|
|
275
|
+
List of patterns
|
|
276
|
+
"""
|
|
277
|
+
if not self.enabled:
|
|
278
|
+
return []
|
|
279
|
+
|
|
280
|
+
patterns = []
|
|
281
|
+
for p in self.patterns:
|
|
282
|
+
if p.enabled or include_disabled:
|
|
283
|
+
# Apply priority boost
|
|
284
|
+
if self.priority_boost != 0:
|
|
285
|
+
p = PatternConfig(
|
|
286
|
+
**{
|
|
287
|
+
**p.to_dict(),
|
|
288
|
+
"priority": p.priority + self.priority_boost,
|
|
289
|
+
}
|
|
290
|
+
)
|
|
291
|
+
patterns.append(p)
|
|
292
|
+
|
|
293
|
+
return patterns
|
|
294
|
+
|
|
295
|
+
def to_dict(self) -> dict[str, Any]:
|
|
296
|
+
"""Convert to dictionary."""
|
|
297
|
+
return {
|
|
298
|
+
"name": self.name,
|
|
299
|
+
"group_id": self.group_id,
|
|
300
|
+
"description": self.description,
|
|
301
|
+
"patterns": [p.to_dict() for p in self.patterns],
|
|
302
|
+
"enabled": self.enabled,
|
|
303
|
+
"priority_boost": self.priority_boost,
|
|
304
|
+
"tags": self.tags,
|
|
305
|
+
"metadata": self.metadata,
|
|
306
|
+
}
|
|
307
|
+
|
|
308
|
+
@classmethod
|
|
309
|
+
def from_dict(cls, data: dict[str, Any], group_id: str | None = None) -> "PatternGroup":
|
|
310
|
+
"""Create from dictionary."""
|
|
311
|
+
patterns = []
|
|
312
|
+
for pattern_id, pattern_data in data.get("patterns", {}).items():
|
|
313
|
+
patterns.append(PatternConfig.from_dict(pattern_data, pattern_id))
|
|
314
|
+
|
|
315
|
+
return cls(
|
|
316
|
+
name=data.get("name", group_id or "unnamed"),
|
|
317
|
+
group_id=group_id or data.get("group_id", data.get("name", "unnamed")),
|
|
318
|
+
description=data.get("description", ""),
|
|
319
|
+
patterns=patterns,
|
|
320
|
+
enabled=data.get("enabled", True),
|
|
321
|
+
priority_boost=data.get("priority_boost", 0),
|
|
322
|
+
tags=data.get("tags", []),
|
|
323
|
+
metadata=data.get("metadata", {}),
|
|
324
|
+
)
|
|
325
|
+
|
|
326
|
+
|
|
327
|
+
# =============================================================================
|
|
328
|
+
# YAML Configuration Schema
|
|
329
|
+
# =============================================================================
|
|
330
|
+
|
|
331
|
+
|
|
332
|
+
@dataclass
|
|
333
|
+
class PatternConfigSchema:
|
|
334
|
+
"""Complete pattern configuration schema.
|
|
335
|
+
|
|
336
|
+
Represents a full YAML configuration file.
|
|
337
|
+
"""
|
|
338
|
+
|
|
339
|
+
version: str = "1.0"
|
|
340
|
+
name: str = ""
|
|
341
|
+
description: str = ""
|
|
342
|
+
patterns: dict[str, PatternConfig] = field(default_factory=dict)
|
|
343
|
+
groups: dict[str, PatternGroup] = field(default_factory=dict)
|
|
344
|
+
extends: list[str] = field(default_factory=list) # Parent configs to inherit from
|
|
345
|
+
metadata: dict[str, Any] = field(default_factory=dict)
|
|
346
|
+
loaded_at: datetime = field(default_factory=datetime.now)
|
|
347
|
+
source_path: str = ""
|
|
348
|
+
|
|
349
|
+
def get_all_patterns(self, include_disabled: bool = False) -> list[PatternConfig]:
|
|
350
|
+
"""Get all patterns from this configuration.
|
|
351
|
+
|
|
352
|
+
Args:
|
|
353
|
+
include_disabled: Whether to include disabled patterns
|
|
354
|
+
|
|
355
|
+
Returns:
|
|
356
|
+
List of all patterns, sorted by priority
|
|
357
|
+
"""
|
|
358
|
+
all_patterns = []
|
|
359
|
+
|
|
360
|
+
# Add standalone patterns
|
|
361
|
+
for p in self.patterns.values():
|
|
362
|
+
if p.enabled or include_disabled:
|
|
363
|
+
all_patterns.append(p)
|
|
364
|
+
|
|
365
|
+
# Add patterns from groups
|
|
366
|
+
for group in self.groups.values():
|
|
367
|
+
all_patterns.extend(group.get_patterns(include_disabled))
|
|
368
|
+
|
|
369
|
+
# Sort by priority (highest first)
|
|
370
|
+
return sorted(all_patterns, key=lambda p: p.priority, reverse=True)
|
|
371
|
+
|
|
372
|
+
def get_pattern(self, pattern_id: str) -> PatternConfig | None:
|
|
373
|
+
"""Get a specific pattern by ID."""
|
|
374
|
+
if pattern_id in self.patterns:
|
|
375
|
+
return self.patterns[pattern_id]
|
|
376
|
+
|
|
377
|
+
for group in self.groups.values():
|
|
378
|
+
for p in group.patterns:
|
|
379
|
+
if p.pattern_id == pattern_id:
|
|
380
|
+
return p
|
|
381
|
+
|
|
382
|
+
return None
|
|
383
|
+
|
|
384
|
+
def to_dict(self) -> dict[str, Any]:
|
|
385
|
+
"""Convert to dictionary."""
|
|
386
|
+
return {
|
|
387
|
+
"version": self.version,
|
|
388
|
+
"name": self.name,
|
|
389
|
+
"description": self.description,
|
|
390
|
+
"patterns": {pid: p.to_dict() for pid, p in self.patterns.items()},
|
|
391
|
+
"groups": {gid: g.to_dict() for gid, g in self.groups.items()},
|
|
392
|
+
"extends": self.extends,
|
|
393
|
+
"metadata": self.metadata,
|
|
394
|
+
}
|
|
395
|
+
|
|
396
|
+
|
|
397
|
+
# =============================================================================
|
|
398
|
+
# YAML Parser
|
|
399
|
+
# =============================================================================
|
|
400
|
+
|
|
401
|
+
|
|
402
|
+
def _parse_yaml(content: str) -> dict[str, Any]:
|
|
403
|
+
"""Parse YAML content.
|
|
404
|
+
|
|
405
|
+
Supports PyYAML if available, otherwise uses basic parsing.
|
|
406
|
+
"""
|
|
407
|
+
try:
|
|
408
|
+
import yaml
|
|
409
|
+
return yaml.safe_load(content) or {}
|
|
410
|
+
except ImportError:
|
|
411
|
+
# Basic YAML parsing for simple structures
|
|
412
|
+
return _basic_yaml_parse(content)
|
|
413
|
+
|
|
414
|
+
|
|
415
|
+
def _basic_yaml_parse(content: str) -> dict[str, Any]:
|
|
416
|
+
"""Basic YAML parser for simple structures.
|
|
417
|
+
|
|
418
|
+
This is a fallback when PyYAML is not installed.
|
|
419
|
+
Only supports simple key-value pairs and lists.
|
|
420
|
+
"""
|
|
421
|
+
import json
|
|
422
|
+
|
|
423
|
+
# Try JSON first (YAML is a superset)
|
|
424
|
+
try:
|
|
425
|
+
return json.loads(content)
|
|
426
|
+
except json.JSONDecodeError:
|
|
427
|
+
pass
|
|
428
|
+
|
|
429
|
+
# Very basic YAML parsing
|
|
430
|
+
result: dict[str, Any] = {}
|
|
431
|
+
current_key: str | None = None
|
|
432
|
+
current_indent = 0
|
|
433
|
+
|
|
434
|
+
for line in content.split("\n"):
|
|
435
|
+
stripped = line.strip()
|
|
436
|
+
if not stripped or stripped.startswith("#"):
|
|
437
|
+
continue
|
|
438
|
+
|
|
439
|
+
indent = len(line) - len(line.lstrip())
|
|
440
|
+
|
|
441
|
+
if ":" in stripped:
|
|
442
|
+
key, _, value = stripped.partition(":")
|
|
443
|
+
key = key.strip()
|
|
444
|
+
value = value.strip()
|
|
445
|
+
|
|
446
|
+
if value:
|
|
447
|
+
# Remove quotes
|
|
448
|
+
if value.startswith('"') and value.endswith('"'):
|
|
449
|
+
value = value[1:-1]
|
|
450
|
+
elif value.startswith("'") and value.endswith("'"):
|
|
451
|
+
value = value[1:-1]
|
|
452
|
+
|
|
453
|
+
result[key] = value
|
|
454
|
+
else:
|
|
455
|
+
result[key] = {}
|
|
456
|
+
current_key = key
|
|
457
|
+
current_indent = indent
|
|
458
|
+
|
|
459
|
+
return result
|
|
460
|
+
|
|
461
|
+
|
|
462
|
+
def _dump_yaml(data: dict[str, Any]) -> str:
|
|
463
|
+
"""Dump dictionary to YAML string."""
|
|
464
|
+
try:
|
|
465
|
+
import yaml
|
|
466
|
+
return yaml.dump(data, default_flow_style=False, allow_unicode=True)
|
|
467
|
+
except ImportError:
|
|
468
|
+
import json
|
|
469
|
+
return json.dumps(data, indent=2, ensure_ascii=False)
|
|
470
|
+
|
|
471
|
+
|
|
472
|
+
# =============================================================================
|
|
473
|
+
# Configuration Loader
|
|
474
|
+
# =============================================================================
|
|
475
|
+
|
|
476
|
+
|
|
477
|
+
class PatternConfigLoader:
|
|
478
|
+
"""Loads pattern configurations from YAML files.
|
|
479
|
+
|
|
480
|
+
Supports:
|
|
481
|
+
- Single file loading
|
|
482
|
+
- Directory scanning
|
|
483
|
+
- Configuration inheritance
|
|
484
|
+
- Hot-reload for development
|
|
485
|
+
|
|
486
|
+
Example:
|
|
487
|
+
loader = PatternConfigLoader()
|
|
488
|
+
loader.load_file("patterns.yaml")
|
|
489
|
+
loader.load_directory("patterns/")
|
|
490
|
+
|
|
491
|
+
# Get all patterns
|
|
492
|
+
patterns = loader.get_all_patterns()
|
|
493
|
+
|
|
494
|
+
# Enable hot-reload
|
|
495
|
+
loader.enable_hot_reload(interval=5.0)
|
|
496
|
+
"""
|
|
497
|
+
|
|
498
|
+
def __init__(
|
|
499
|
+
self,
|
|
500
|
+
auto_validate: bool = True,
|
|
501
|
+
strict_mode: bool = False,
|
|
502
|
+
):
|
|
503
|
+
"""Initialize loader.
|
|
504
|
+
|
|
505
|
+
Args:
|
|
506
|
+
auto_validate: Validate patterns on load
|
|
507
|
+
strict_mode: Fail on any validation error
|
|
508
|
+
"""
|
|
509
|
+
self.auto_validate = auto_validate
|
|
510
|
+
self.strict_mode = strict_mode
|
|
511
|
+
|
|
512
|
+
self._configs: dict[str, PatternConfigSchema] = {}
|
|
513
|
+
self._file_mtimes: dict[str, float] = {}
|
|
514
|
+
self._hot_reload_enabled = False
|
|
515
|
+
self._hot_reload_thread: threading.Thread | None = None
|
|
516
|
+
self._lock = threading.Lock()
|
|
517
|
+
self._stop_hot_reload = threading.Event()
|
|
518
|
+
|
|
519
|
+
def load_file(self, path: str | Path) -> PatternConfigSchema:
|
|
520
|
+
"""Load patterns from a YAML file.
|
|
521
|
+
|
|
522
|
+
Args:
|
|
523
|
+
path: Path to YAML file
|
|
524
|
+
|
|
525
|
+
Returns:
|
|
526
|
+
Loaded configuration
|
|
527
|
+
"""
|
|
528
|
+
path = Path(path)
|
|
529
|
+
if not path.exists():
|
|
530
|
+
raise FileNotFoundError(f"Pattern config not found: {path}")
|
|
531
|
+
|
|
532
|
+
with open(path, "r", encoding="utf-8") as f:
|
|
533
|
+
content = f.read()
|
|
534
|
+
|
|
535
|
+
data = _parse_yaml(content)
|
|
536
|
+
config = self._parse_config(data, str(path))
|
|
537
|
+
|
|
538
|
+
if self.auto_validate:
|
|
539
|
+
errors = self.validate_config(config)
|
|
540
|
+
if errors and self.strict_mode:
|
|
541
|
+
raise ValueError(f"Pattern validation errors: {errors}")
|
|
542
|
+
|
|
543
|
+
with self._lock:
|
|
544
|
+
self._configs[str(path)] = config
|
|
545
|
+
self._file_mtimes[str(path)] = path.stat().st_mtime
|
|
546
|
+
|
|
547
|
+
return config
|
|
548
|
+
|
|
549
|
+
def load_directory(
|
|
550
|
+
self,
|
|
551
|
+
directory: str | Path,
|
|
552
|
+
pattern: str = "*.yaml",
|
|
553
|
+
recursive: bool = True,
|
|
554
|
+
) -> list[PatternConfigSchema]:
|
|
555
|
+
"""Load all pattern files from a directory.
|
|
556
|
+
|
|
557
|
+
Args:
|
|
558
|
+
directory: Directory to scan
|
|
559
|
+
pattern: Glob pattern for files
|
|
560
|
+
recursive: Whether to search recursively
|
|
561
|
+
|
|
562
|
+
Returns:
|
|
563
|
+
List of loaded configurations
|
|
564
|
+
"""
|
|
565
|
+
directory = Path(directory)
|
|
566
|
+
if not directory.exists():
|
|
567
|
+
return []
|
|
568
|
+
|
|
569
|
+
configs = []
|
|
570
|
+
glob_method = directory.rglob if recursive else directory.glob
|
|
571
|
+
|
|
572
|
+
for file_path in glob_method(pattern):
|
|
573
|
+
if file_path.is_file():
|
|
574
|
+
try:
|
|
575
|
+
config = self.load_file(file_path)
|
|
576
|
+
configs.append(config)
|
|
577
|
+
except Exception as e:
|
|
578
|
+
if self.strict_mode:
|
|
579
|
+
raise
|
|
580
|
+
# Log warning and continue
|
|
581
|
+
|
|
582
|
+
# Also try .yml extension
|
|
583
|
+
if pattern.endswith(".yaml"):
|
|
584
|
+
yml_pattern = pattern.replace(".yaml", ".yml")
|
|
585
|
+
for file_path in glob_method(yml_pattern):
|
|
586
|
+
if file_path.is_file() and str(file_path) not in self._configs:
|
|
587
|
+
try:
|
|
588
|
+
config = self.load_file(file_path)
|
|
589
|
+
configs.append(config)
|
|
590
|
+
except Exception as e:
|
|
591
|
+
if self.strict_mode:
|
|
592
|
+
raise
|
|
593
|
+
|
|
594
|
+
return configs
|
|
595
|
+
|
|
596
|
+
def load_from_string(self, content: str, name: str = "inline") -> PatternConfigSchema:
|
|
597
|
+
"""Load patterns from a YAML string.
|
|
598
|
+
|
|
599
|
+
Args:
|
|
600
|
+
content: YAML content
|
|
601
|
+
name: Name for the configuration
|
|
602
|
+
|
|
603
|
+
Returns:
|
|
604
|
+
Loaded configuration
|
|
605
|
+
"""
|
|
606
|
+
data = _parse_yaml(content)
|
|
607
|
+
config = self._parse_config(data, name)
|
|
608
|
+
|
|
609
|
+
with self._lock:
|
|
610
|
+
self._configs[name] = config
|
|
611
|
+
|
|
612
|
+
return config
|
|
613
|
+
|
|
614
|
+
def _parse_config(self, data: dict[str, Any], source: str) -> PatternConfigSchema:
|
|
615
|
+
"""Parse configuration data into schema object."""
|
|
616
|
+
# Parse standalone patterns
|
|
617
|
+
patterns = {}
|
|
618
|
+
for pattern_id, pattern_data in data.get("patterns", {}).items():
|
|
619
|
+
if isinstance(pattern_data, dict):
|
|
620
|
+
patterns[pattern_id] = PatternConfig.from_dict(pattern_data, pattern_id)
|
|
621
|
+
|
|
622
|
+
# Parse groups
|
|
623
|
+
groups = {}
|
|
624
|
+
for group_id, group_data in data.get("groups", {}).items():
|
|
625
|
+
if isinstance(group_data, dict):
|
|
626
|
+
groups[group_id] = PatternGroup.from_dict(group_data, group_id)
|
|
627
|
+
|
|
628
|
+
return PatternConfigSchema(
|
|
629
|
+
version=data.get("version", "1.0"),
|
|
630
|
+
name=data.get("name", ""),
|
|
631
|
+
description=data.get("description", ""),
|
|
632
|
+
patterns=patterns,
|
|
633
|
+
groups=groups,
|
|
634
|
+
extends=data.get("extends", []),
|
|
635
|
+
metadata=data.get("metadata", {}),
|
|
636
|
+
source_path=source,
|
|
637
|
+
)
|
|
638
|
+
|
|
639
|
+
def get_all_patterns(self, include_disabled: bool = False) -> list[PatternConfig]:
|
|
640
|
+
"""Get all loaded patterns.
|
|
641
|
+
|
|
642
|
+
Args:
|
|
643
|
+
include_disabled: Whether to include disabled patterns
|
|
644
|
+
|
|
645
|
+
Returns:
|
|
646
|
+
List of all patterns, sorted by priority
|
|
647
|
+
"""
|
|
648
|
+
all_patterns = []
|
|
649
|
+
|
|
650
|
+
with self._lock:
|
|
651
|
+
for config in self._configs.values():
|
|
652
|
+
all_patterns.extend(config.get_all_patterns(include_disabled))
|
|
653
|
+
|
|
654
|
+
# Remove duplicates by pattern_id (keep highest priority)
|
|
655
|
+
seen: dict[str, PatternConfig] = {}
|
|
656
|
+
for p in all_patterns:
|
|
657
|
+
if p.pattern_id not in seen or p.priority > seen[p.pattern_id].priority:
|
|
658
|
+
seen[p.pattern_id] = p
|
|
659
|
+
|
|
660
|
+
return sorted(seen.values(), key=lambda p: p.priority, reverse=True)
|
|
661
|
+
|
|
662
|
+
def get_pattern(self, pattern_id: str) -> PatternConfig | None:
|
|
663
|
+
"""Get a specific pattern by ID.
|
|
664
|
+
|
|
665
|
+
Args:
|
|
666
|
+
pattern_id: Pattern identifier
|
|
667
|
+
|
|
668
|
+
Returns:
|
|
669
|
+
Pattern config or None
|
|
670
|
+
"""
|
|
671
|
+
with self._lock:
|
|
672
|
+
for config in self._configs.values():
|
|
673
|
+
pattern = config.get_pattern(pattern_id)
|
|
674
|
+
if pattern:
|
|
675
|
+
return pattern
|
|
676
|
+
return None
|
|
677
|
+
|
|
678
|
+
def get_patterns_by_tag(self, tag: str) -> list[PatternConfig]:
|
|
679
|
+
"""Get patterns with a specific tag.
|
|
680
|
+
|
|
681
|
+
Args:
|
|
682
|
+
tag: Tag to filter by
|
|
683
|
+
|
|
684
|
+
Returns:
|
|
685
|
+
List of matching patterns
|
|
686
|
+
"""
|
|
687
|
+
return [p for p in self.get_all_patterns() if tag in p.tags]
|
|
688
|
+
|
|
689
|
+
def get_patterns_by_type(self, data_type: str | DataType) -> list[PatternConfig]:
|
|
690
|
+
"""Get patterns for a specific data type.
|
|
691
|
+
|
|
692
|
+
Args:
|
|
693
|
+
data_type: Data type to filter by
|
|
694
|
+
|
|
695
|
+
Returns:
|
|
696
|
+
List of matching patterns
|
|
697
|
+
"""
|
|
698
|
+
if isinstance(data_type, DataType):
|
|
699
|
+
data_type = data_type.value
|
|
700
|
+
return [p for p in self.get_all_patterns() if p.data_type == data_type]
|
|
701
|
+
|
|
702
|
+
def validate_config(self, config: PatternConfigSchema) -> list[str]:
|
|
703
|
+
"""Validate a configuration.
|
|
704
|
+
|
|
705
|
+
Args:
|
|
706
|
+
config: Configuration to validate
|
|
707
|
+
|
|
708
|
+
Returns:
|
|
709
|
+
List of error messages
|
|
710
|
+
"""
|
|
711
|
+
errors = []
|
|
712
|
+
|
|
713
|
+
for pattern in config.get_all_patterns(include_disabled=True):
|
|
714
|
+
# Validate regex
|
|
715
|
+
try:
|
|
716
|
+
re.compile(pattern.regex)
|
|
717
|
+
except re.error as e:
|
|
718
|
+
errors.append(f"Pattern '{pattern.pattern_id}': Invalid regex: {e}")
|
|
719
|
+
|
|
720
|
+
# Validate examples
|
|
721
|
+
example_results = pattern.validate_examples()
|
|
722
|
+
for value, passed, message in example_results:
|
|
723
|
+
if not passed:
|
|
724
|
+
errors.append(
|
|
725
|
+
f"Pattern '{pattern.pattern_id}': Example '{value}' failed: {message}"
|
|
726
|
+
)
|
|
727
|
+
|
|
728
|
+
# Validate data type
|
|
729
|
+
try:
|
|
730
|
+
DataType(pattern.data_type)
|
|
731
|
+
except ValueError:
|
|
732
|
+
errors.append(
|
|
733
|
+
f"Pattern '{pattern.pattern_id}': Unknown data_type '{pattern.data_type}'"
|
|
734
|
+
)
|
|
735
|
+
|
|
736
|
+
return errors
|
|
737
|
+
|
|
738
|
+
def enable_hot_reload(self, interval: float = 5.0) -> None:
|
|
739
|
+
"""Enable hot-reload of configuration files.
|
|
740
|
+
|
|
741
|
+
Args:
|
|
742
|
+
interval: Check interval in seconds
|
|
743
|
+
"""
|
|
744
|
+
if self._hot_reload_enabled:
|
|
745
|
+
return
|
|
746
|
+
|
|
747
|
+
self._hot_reload_enabled = True
|
|
748
|
+
self._stop_hot_reload.clear()
|
|
749
|
+
|
|
750
|
+
def watch_loop() -> None:
|
|
751
|
+
while not self._stop_hot_reload.wait(interval):
|
|
752
|
+
self._check_for_changes()
|
|
753
|
+
|
|
754
|
+
self._hot_reload_thread = threading.Thread(target=watch_loop, daemon=True)
|
|
755
|
+
self._hot_reload_thread.start()
|
|
756
|
+
|
|
757
|
+
def disable_hot_reload(self) -> None:
|
|
758
|
+
"""Disable hot-reload."""
|
|
759
|
+
if not self._hot_reload_enabled:
|
|
760
|
+
return
|
|
761
|
+
|
|
762
|
+
self._stop_hot_reload.set()
|
|
763
|
+
if self._hot_reload_thread:
|
|
764
|
+
self._hot_reload_thread.join(timeout=2.0)
|
|
765
|
+
self._hot_reload_enabled = False
|
|
766
|
+
|
|
767
|
+
def _check_for_changes(self) -> None:
|
|
768
|
+
"""Check for file changes and reload if necessary."""
|
|
769
|
+
with self._lock:
|
|
770
|
+
paths_to_reload = []
|
|
771
|
+
|
|
772
|
+
for path, mtime in list(self._file_mtimes.items()):
|
|
773
|
+
try:
|
|
774
|
+
current_mtime = Path(path).stat().st_mtime
|
|
775
|
+
if current_mtime > mtime:
|
|
776
|
+
paths_to_reload.append(path)
|
|
777
|
+
except OSError:
|
|
778
|
+
# File deleted or inaccessible
|
|
779
|
+
del self._configs[path]
|
|
780
|
+
del self._file_mtimes[path]
|
|
781
|
+
|
|
782
|
+
for path in paths_to_reload:
|
|
783
|
+
try:
|
|
784
|
+
self.load_file(path)
|
|
785
|
+
except Exception:
|
|
786
|
+
pass # Keep old config on reload failure
|
|
787
|
+
|
|
788
|
+
def clear(self) -> None:
|
|
789
|
+
"""Clear all loaded configurations."""
|
|
790
|
+
with self._lock:
|
|
791
|
+
self._configs.clear()
|
|
792
|
+
self._file_mtimes.clear()
|
|
793
|
+
|
|
794
|
+
|
|
795
|
+
# =============================================================================
|
|
796
|
+
# Pattern Registry
|
|
797
|
+
# =============================================================================
|
|
798
|
+
|
|
799
|
+
|
|
800
|
+
class PatternRegistry:
|
|
801
|
+
"""Global registry for custom patterns.
|
|
802
|
+
|
|
803
|
+
Provides a singleton-like interface for pattern management.
|
|
804
|
+
|
|
805
|
+
Example:
|
|
806
|
+
# Register patterns
|
|
807
|
+
registry = PatternRegistry()
|
|
808
|
+
registry.load_file("patterns.yaml")
|
|
809
|
+
|
|
810
|
+
# Use patterns
|
|
811
|
+
for pattern in registry.get_patterns():
|
|
812
|
+
if pattern.matches(value):
|
|
813
|
+
print(f"Matched: {pattern.name}")
|
|
814
|
+
"""
|
|
815
|
+
|
|
816
|
+
_instance: "PatternRegistry | None" = None
|
|
817
|
+
_lock = threading.Lock()
|
|
818
|
+
|
|
819
|
+
def __new__(cls) -> "PatternRegistry":
|
|
820
|
+
if cls._instance is None:
|
|
821
|
+
with cls._lock:
|
|
822
|
+
if cls._instance is None:
|
|
823
|
+
cls._instance = super().__new__(cls)
|
|
824
|
+
cls._instance._initialized = False
|
|
825
|
+
return cls._instance
|
|
826
|
+
|
|
827
|
+
def __init__(self) -> None:
|
|
828
|
+
if getattr(self, "_initialized", False):
|
|
829
|
+
return
|
|
830
|
+
|
|
831
|
+
self._loader = PatternConfigLoader()
|
|
832
|
+
self._custom_patterns: dict[str, PatternConfig] = {}
|
|
833
|
+
self._initialized = True
|
|
834
|
+
|
|
835
|
+
@property
|
|
836
|
+
def loader(self) -> PatternConfigLoader:
|
|
837
|
+
"""Access the internal loader."""
|
|
838
|
+
return self._loader
|
|
839
|
+
|
|
840
|
+
def load_file(self, path: str | Path) -> PatternConfigSchema:
|
|
841
|
+
"""Load patterns from file."""
|
|
842
|
+
return self._loader.load_file(path)
|
|
843
|
+
|
|
844
|
+
def load_directory(self, directory: str | Path) -> list[PatternConfigSchema]:
|
|
845
|
+
"""Load patterns from directory."""
|
|
846
|
+
return self._loader.load_directory(directory)
|
|
847
|
+
|
|
848
|
+
def register(self, pattern: PatternConfig) -> None:
|
|
849
|
+
"""Register a pattern programmatically.
|
|
850
|
+
|
|
851
|
+
Args:
|
|
852
|
+
pattern: Pattern to register
|
|
853
|
+
"""
|
|
854
|
+
self._custom_patterns[pattern.pattern_id] = pattern
|
|
855
|
+
|
|
856
|
+
def unregister(self, pattern_id: str) -> bool:
|
|
857
|
+
"""Unregister a pattern.
|
|
858
|
+
|
|
859
|
+
Args:
|
|
860
|
+
pattern_id: Pattern to unregister
|
|
861
|
+
|
|
862
|
+
Returns:
|
|
863
|
+
True if pattern was removed
|
|
864
|
+
"""
|
|
865
|
+
if pattern_id in self._custom_patterns:
|
|
866
|
+
del self._custom_patterns[pattern_id]
|
|
867
|
+
return True
|
|
868
|
+
return False
|
|
869
|
+
|
|
870
|
+
def get_patterns(self, include_disabled: bool = False) -> list[PatternConfig]:
|
|
871
|
+
"""Get all registered patterns.
|
|
872
|
+
|
|
873
|
+
Args:
|
|
874
|
+
include_disabled: Whether to include disabled patterns
|
|
875
|
+
|
|
876
|
+
Returns:
|
|
877
|
+
List of patterns sorted by priority
|
|
878
|
+
"""
|
|
879
|
+
all_patterns = self._loader.get_all_patterns(include_disabled)
|
|
880
|
+
|
|
881
|
+
# Add custom patterns
|
|
882
|
+
for p in self._custom_patterns.values():
|
|
883
|
+
if p.enabled or include_disabled:
|
|
884
|
+
all_patterns.append(p)
|
|
885
|
+
|
|
886
|
+
# Remove duplicates and sort
|
|
887
|
+
seen: dict[str, PatternConfig] = {}
|
|
888
|
+
for p in all_patterns:
|
|
889
|
+
if p.pattern_id not in seen or p.priority > seen[p.pattern_id].priority:
|
|
890
|
+
seen[p.pattern_id] = p
|
|
891
|
+
|
|
892
|
+
return sorted(seen.values(), key=lambda p: p.priority, reverse=True)
|
|
893
|
+
|
|
894
|
+
def get_pattern(self, pattern_id: str) -> PatternConfig | None:
|
|
895
|
+
"""Get pattern by ID."""
|
|
896
|
+
if pattern_id in self._custom_patterns:
|
|
897
|
+
return self._custom_patterns[pattern_id]
|
|
898
|
+
return self._loader.get_pattern(pattern_id)
|
|
899
|
+
|
|
900
|
+
def match(self, value: str) -> list[PatternConfig]:
|
|
901
|
+
"""Find all patterns that match a value.
|
|
902
|
+
|
|
903
|
+
Args:
|
|
904
|
+
value: Value to match
|
|
905
|
+
|
|
906
|
+
Returns:
|
|
907
|
+
List of matching patterns (highest priority first)
|
|
908
|
+
"""
|
|
909
|
+
matches = []
|
|
910
|
+
for pattern in self.get_patterns():
|
|
911
|
+
if pattern.matches(value):
|
|
912
|
+
matches.append(pattern)
|
|
913
|
+
return matches
|
|
914
|
+
|
|
915
|
+
def match_first(self, value: str) -> PatternConfig | None:
|
|
916
|
+
"""Find the first (highest priority) matching pattern.
|
|
917
|
+
|
|
918
|
+
Args:
|
|
919
|
+
value: Value to match
|
|
920
|
+
|
|
921
|
+
Returns:
|
|
922
|
+
First matching pattern or None
|
|
923
|
+
"""
|
|
924
|
+
for pattern in self.get_patterns():
|
|
925
|
+
if pattern.matches(value):
|
|
926
|
+
return pattern
|
|
927
|
+
return None
|
|
928
|
+
|
|
929
|
+
def clear(self) -> None:
|
|
930
|
+
"""Clear all patterns."""
|
|
931
|
+
self._loader.clear()
|
|
932
|
+
self._custom_patterns.clear()
|
|
933
|
+
|
|
934
|
+
|
|
935
|
+
# Global registry instance
|
|
936
|
+
pattern_registry = PatternRegistry()
|
|
937
|
+
|
|
938
|
+
|
|
939
|
+
# =============================================================================
|
|
940
|
+
# Default Patterns
|
|
941
|
+
# =============================================================================
|
|
942
|
+
|
|
943
|
+
|
|
944
|
+
DEFAULT_PATTERNS_YAML = r"""
|
|
945
|
+
version: "1.0"
|
|
946
|
+
name: "Default Patterns"
|
|
947
|
+
description: "Built-in patterns for common data types"
|
|
948
|
+
|
|
949
|
+
patterns:
|
|
950
|
+
email:
|
|
951
|
+
name: Email Address
|
|
952
|
+
regex: "^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}$"
|
|
953
|
+
priority: 85
|
|
954
|
+
data_type: email
|
|
955
|
+
description: Standard email address format
|
|
956
|
+
examples:
|
|
957
|
+
- value: "user@example.com"
|
|
958
|
+
should_match: true
|
|
959
|
+
- value: "not-an-email"
|
|
960
|
+
should_match: false
|
|
961
|
+
|
|
962
|
+
url:
|
|
963
|
+
name: URL
|
|
964
|
+
regex: "^https?://[\\w.-]+(?:/[\\w./?%&=-]*)?$"
|
|
965
|
+
priority: 80
|
|
966
|
+
data_type: url
|
|
967
|
+
description: HTTP/HTTPS URL
|
|
968
|
+
examples:
|
|
969
|
+
- value: "https://example.com/path"
|
|
970
|
+
should_match: true
|
|
971
|
+
|
|
972
|
+
uuid:
|
|
973
|
+
name: UUID
|
|
974
|
+
regex: "^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$"
|
|
975
|
+
priority: 90
|
|
976
|
+
data_type: uuid
|
|
977
|
+
description: UUID v4 format
|
|
978
|
+
examples:
|
|
979
|
+
- value: "550e8400-e29b-41d4-a716-446655440000"
|
|
980
|
+
should_match: true
|
|
981
|
+
|
|
982
|
+
ip_v4:
|
|
983
|
+
name: IPv4 Address
|
|
984
|
+
regex: "^(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$"
|
|
985
|
+
priority: 85
|
|
986
|
+
data_type: ip_address
|
|
987
|
+
description: IPv4 address
|
|
988
|
+
examples:
|
|
989
|
+
- value: "192.168.1.1"
|
|
990
|
+
should_match: true
|
|
991
|
+
- value: "256.1.1.1"
|
|
992
|
+
should_match: false
|
|
993
|
+
|
|
994
|
+
iso_date:
|
|
995
|
+
name: ISO Date
|
|
996
|
+
regex: "^\\d{4}-\\d{2}-\\d{2}$"
|
|
997
|
+
priority: 75
|
|
998
|
+
data_type: date
|
|
999
|
+
description: ISO 8601 date format
|
|
1000
|
+
examples:
|
|
1001
|
+
- value: "2024-12-25"
|
|
1002
|
+
should_match: true
|
|
1003
|
+
|
|
1004
|
+
iso_datetime:
|
|
1005
|
+
name: ISO DateTime
|
|
1006
|
+
regex: "^\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}(?:\\.\\d+)?(?:Z|[+-]\\d{2}:\\d{2})?$"
|
|
1007
|
+
priority: 80
|
|
1008
|
+
data_type: datetime
|
|
1009
|
+
description: ISO 8601 datetime format
|
|
1010
|
+
examples:
|
|
1011
|
+
- value: "2024-12-25T10:30:00Z"
|
|
1012
|
+
should_match: true
|
|
1013
|
+
|
|
1014
|
+
groups:
|
|
1015
|
+
korean:
|
|
1016
|
+
name: Korean Patterns
|
|
1017
|
+
description: Patterns for Korean data formats
|
|
1018
|
+
priority_boost: 5
|
|
1019
|
+
patterns:
|
|
1020
|
+
korean_phone:
|
|
1021
|
+
name: Korean Phone Number
|
|
1022
|
+
regex: "^01[016789]-?\\d{3,4}-?\\d{4}$"
|
|
1023
|
+
priority: 90
|
|
1024
|
+
data_type: korean_phone
|
|
1025
|
+
examples:
|
|
1026
|
+
- value: "010-1234-5678"
|
|
1027
|
+
should_match: true
|
|
1028
|
+
- value: "01012345678"
|
|
1029
|
+
should_match: true
|
|
1030
|
+
|
|
1031
|
+
korean_rrn:
|
|
1032
|
+
name: Korean RRN
|
|
1033
|
+
regex: "^\\d{6}-?[1-4]\\d{6}$"
|
|
1034
|
+
priority: 95
|
|
1035
|
+
data_type: korean_rrn
|
|
1036
|
+
description: Korean Resident Registration Number
|
|
1037
|
+
examples:
|
|
1038
|
+
- value: "900101-1234567"
|
|
1039
|
+
should_match: true
|
|
1040
|
+
|
|
1041
|
+
korean_business_number:
|
|
1042
|
+
name: Korean Business Number
|
|
1043
|
+
regex: "^\\d{3}-\\d{2}-\\d{5}$"
|
|
1044
|
+
priority: 90
|
|
1045
|
+
data_type: korean_business_number
|
|
1046
|
+
examples:
|
|
1047
|
+
- value: "123-45-67890"
|
|
1048
|
+
should_match: true
|
|
1049
|
+
"""
|
|
1050
|
+
|
|
1051
|
+
|
|
1052
|
+
def load_default_patterns() -> None:
|
|
1053
|
+
"""Load default patterns into the global registry."""
|
|
1054
|
+
pattern_registry.loader.load_from_string(DEFAULT_PATTERNS_YAML, "defaults")
|
|
1055
|
+
|
|
1056
|
+
|
|
1057
|
+
# =============================================================================
|
|
1058
|
+
# Convenience Functions
|
|
1059
|
+
# =============================================================================
|
|
1060
|
+
|
|
1061
|
+
|
|
1062
|
+
def load_patterns(path: str | Path) -> list[PatternConfig]:
|
|
1063
|
+
"""Load patterns from a file.
|
|
1064
|
+
|
|
1065
|
+
Args:
|
|
1066
|
+
path: Path to YAML file
|
|
1067
|
+
|
|
1068
|
+
Returns:
|
|
1069
|
+
List of loaded patterns
|
|
1070
|
+
"""
|
|
1071
|
+
config = pattern_registry.load_file(path)
|
|
1072
|
+
return config.get_all_patterns()
|
|
1073
|
+
|
|
1074
|
+
|
|
1075
|
+
def load_patterns_directory(directory: str | Path) -> list[PatternConfig]:
|
|
1076
|
+
"""Load patterns from a directory.
|
|
1077
|
+
|
|
1078
|
+
Args:
|
|
1079
|
+
directory: Directory containing YAML files
|
|
1080
|
+
|
|
1081
|
+
Returns:
|
|
1082
|
+
List of all loaded patterns
|
|
1083
|
+
"""
|
|
1084
|
+
pattern_registry.load_directory(directory)
|
|
1085
|
+
return pattern_registry.get_patterns()
|
|
1086
|
+
|
|
1087
|
+
|
|
1088
|
+
def register_pattern(
|
|
1089
|
+
pattern_id: str,
|
|
1090
|
+
regex: str,
|
|
1091
|
+
name: str | None = None,
|
|
1092
|
+
data_type: str = "string",
|
|
1093
|
+
priority: int = PatternPriority.MEDIUM,
|
|
1094
|
+
**kwargs: Any,
|
|
1095
|
+
) -> PatternConfig:
|
|
1096
|
+
"""Register a pattern programmatically.
|
|
1097
|
+
|
|
1098
|
+
Args:
|
|
1099
|
+
pattern_id: Unique pattern identifier
|
|
1100
|
+
regex: Regular expression
|
|
1101
|
+
name: Human-readable name
|
|
1102
|
+
data_type: Inferred data type
|
|
1103
|
+
priority: Match priority
|
|
1104
|
+
**kwargs: Additional pattern options
|
|
1105
|
+
|
|
1106
|
+
Returns:
|
|
1107
|
+
Created pattern config
|
|
1108
|
+
"""
|
|
1109
|
+
pattern = PatternConfig(
|
|
1110
|
+
pattern_id=pattern_id,
|
|
1111
|
+
name=name or pattern_id,
|
|
1112
|
+
regex=regex,
|
|
1113
|
+
data_type=data_type,
|
|
1114
|
+
priority=priority,
|
|
1115
|
+
**kwargs,
|
|
1116
|
+
)
|
|
1117
|
+
pattern_registry.register(pattern)
|
|
1118
|
+
return pattern
|
|
1119
|
+
|
|
1120
|
+
|
|
1121
|
+
def match_patterns(value: str) -> list[PatternConfig]:
|
|
1122
|
+
"""Find patterns matching a value.
|
|
1123
|
+
|
|
1124
|
+
Args:
|
|
1125
|
+
value: Value to match
|
|
1126
|
+
|
|
1127
|
+
Returns:
|
|
1128
|
+
List of matching patterns
|
|
1129
|
+
"""
|
|
1130
|
+
return pattern_registry.match(value)
|
|
1131
|
+
|
|
1132
|
+
|
|
1133
|
+
def infer_type_from_patterns(value: str) -> DataType | None:
|
|
1134
|
+
"""Infer data type from matching patterns.
|
|
1135
|
+
|
|
1136
|
+
Args:
|
|
1137
|
+
value: Value to analyze
|
|
1138
|
+
|
|
1139
|
+
Returns:
|
|
1140
|
+
Inferred DataType or None
|
|
1141
|
+
"""
|
|
1142
|
+
pattern = pattern_registry.match_first(value)
|
|
1143
|
+
if pattern:
|
|
1144
|
+
return pattern.get_data_type()
|
|
1145
|
+
return None
|
|
1146
|
+
|
|
1147
|
+
|
|
1148
|
+
def export_patterns(
|
|
1149
|
+
path: str | Path,
|
|
1150
|
+
patterns: list[PatternConfig] | None = None,
|
|
1151
|
+
) -> None:
|
|
1152
|
+
"""Export patterns to a YAML file.
|
|
1153
|
+
|
|
1154
|
+
Args:
|
|
1155
|
+
path: Output file path
|
|
1156
|
+
patterns: Patterns to export (defaults to all registered)
|
|
1157
|
+
"""
|
|
1158
|
+
if patterns is None:
|
|
1159
|
+
patterns = pattern_registry.get_patterns()
|
|
1160
|
+
|
|
1161
|
+
config = {
|
|
1162
|
+
"version": "1.0",
|
|
1163
|
+
"name": "Exported Patterns",
|
|
1164
|
+
"patterns": {p.pattern_id: p.to_dict() for p in patterns},
|
|
1165
|
+
}
|
|
1166
|
+
|
|
1167
|
+
content = _dump_yaml(config)
|
|
1168
|
+
|
|
1169
|
+
with open(path, "w", encoding="utf-8") as f:
|
|
1170
|
+
f.write(content)
|