tapps-agents 3.5.40__py3-none-any.whl → 3.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tapps_agents/__init__.py +2 -2
- tapps_agents/agents/__init__.py +22 -22
- tapps_agents/agents/analyst/__init__.py +5 -5
- tapps_agents/agents/architect/__init__.py +5 -5
- tapps_agents/agents/architect/agent.py +1033 -1033
- tapps_agents/agents/architect/pattern_detector.py +75 -75
- tapps_agents/agents/cleanup/__init__.py +7 -7
- tapps_agents/agents/cleanup/agent.py +445 -445
- tapps_agents/agents/debugger/__init__.py +7 -7
- tapps_agents/agents/debugger/agent.py +310 -310
- tapps_agents/agents/debugger/error_analyzer.py +437 -437
- tapps_agents/agents/designer/__init__.py +5 -5
- tapps_agents/agents/designer/agent.py +786 -786
- tapps_agents/agents/designer/visual_designer.py +638 -638
- tapps_agents/agents/documenter/__init__.py +7 -7
- tapps_agents/agents/documenter/agent.py +531 -531
- tapps_agents/agents/documenter/doc_generator.py +472 -472
- tapps_agents/agents/documenter/doc_validator.py +393 -393
- tapps_agents/agents/documenter/framework_doc_updater.py +493 -493
- tapps_agents/agents/enhancer/__init__.py +7 -7
- tapps_agents/agents/evaluator/__init__.py +7 -7
- tapps_agents/agents/evaluator/agent.py +443 -443
- tapps_agents/agents/evaluator/priority_evaluator.py +641 -641
- tapps_agents/agents/evaluator/quality_analyzer.py +147 -147
- tapps_agents/agents/evaluator/report_generator.py +344 -344
- tapps_agents/agents/evaluator/usage_analyzer.py +192 -192
- tapps_agents/agents/evaluator/workflow_analyzer.py +189 -189
- tapps_agents/agents/implementer/__init__.py +7 -7
- tapps_agents/agents/implementer/agent.py +798 -798
- tapps_agents/agents/implementer/auto_fix.py +1119 -1119
- tapps_agents/agents/implementer/code_generator.py +73 -73
- tapps_agents/agents/improver/__init__.py +1 -1
- tapps_agents/agents/improver/agent.py +753 -753
- tapps_agents/agents/ops/__init__.py +1 -1
- tapps_agents/agents/ops/agent.py +619 -619
- tapps_agents/agents/ops/dependency_analyzer.py +600 -600
- tapps_agents/agents/orchestrator/__init__.py +5 -5
- tapps_agents/agents/orchestrator/agent.py +522 -522
- tapps_agents/agents/planner/__init__.py +7 -7
- tapps_agents/agents/planner/agent.py +1127 -1127
- tapps_agents/agents/reviewer/__init__.py +24 -24
- tapps_agents/agents/reviewer/agent.py +3513 -3513
- tapps_agents/agents/reviewer/aggregator.py +213 -213
- tapps_agents/agents/reviewer/batch_review.py +448 -448
- tapps_agents/agents/reviewer/cache.py +443 -443
- tapps_agents/agents/reviewer/context7_enhancer.py +630 -630
- tapps_agents/agents/reviewer/context_detector.py +203 -203
- tapps_agents/agents/reviewer/docker_compose_validator.py +158 -158
- tapps_agents/agents/reviewer/dockerfile_validator.py +176 -176
- tapps_agents/agents/reviewer/error_handling.py +126 -126
- tapps_agents/agents/reviewer/feedback_generator.py +490 -490
- tapps_agents/agents/reviewer/influxdb_validator.py +316 -316
- tapps_agents/agents/reviewer/issue_tracking.py +169 -169
- tapps_agents/agents/reviewer/library_detector.py +295 -295
- tapps_agents/agents/reviewer/library_patterns.py +268 -268
- tapps_agents/agents/reviewer/maintainability_scorer.py +593 -593
- tapps_agents/agents/reviewer/metric_strategies.py +276 -276
- tapps_agents/agents/reviewer/mqtt_validator.py +160 -160
- tapps_agents/agents/reviewer/output_enhancer.py +105 -105
- tapps_agents/agents/reviewer/pattern_detector.py +241 -241
- tapps_agents/agents/reviewer/performance_scorer.py +357 -357
- tapps_agents/agents/reviewer/phased_review.py +516 -516
- tapps_agents/agents/reviewer/progressive_review.py +435 -435
- tapps_agents/agents/reviewer/react_scorer.py +331 -331
- tapps_agents/agents/reviewer/score_constants.py +228 -228
- tapps_agents/agents/reviewer/score_validator.py +507 -507
- tapps_agents/agents/reviewer/scorer_registry.py +373 -373
- tapps_agents/agents/reviewer/scoring.py +1566 -1566
- tapps_agents/agents/reviewer/service_discovery.py +534 -534
- tapps_agents/agents/reviewer/tools/__init__.py +41 -41
- tapps_agents/agents/reviewer/tools/parallel_executor.py +581 -581
- tapps_agents/agents/reviewer/tools/ruff_grouping.py +250 -250
- tapps_agents/agents/reviewer/tools/scoped_mypy.py +284 -284
- tapps_agents/agents/reviewer/typescript_scorer.py +1142 -1142
- tapps_agents/agents/reviewer/validation.py +208 -208
- tapps_agents/agents/reviewer/websocket_validator.py +132 -132
- tapps_agents/agents/tester/__init__.py +7 -7
- tapps_agents/agents/tester/accessibility_auditor.py +309 -309
- tapps_agents/agents/tester/agent.py +1080 -1080
- tapps_agents/agents/tester/batch_generator.py +54 -54
- tapps_agents/agents/tester/context_learner.py +51 -51
- tapps_agents/agents/tester/coverage_analyzer.py +386 -386
- tapps_agents/agents/tester/coverage_test_generator.py +290 -290
- tapps_agents/agents/tester/debug_enhancer.py +238 -238
- tapps_agents/agents/tester/device_emulator.py +241 -241
- tapps_agents/agents/tester/integration_generator.py +62 -62
- tapps_agents/agents/tester/network_recorder.py +300 -300
- tapps_agents/agents/tester/performance_monitor.py +320 -320
- tapps_agents/agents/tester/test_fixer.py +316 -316
- tapps_agents/agents/tester/test_generator.py +632 -632
- tapps_agents/agents/tester/trace_manager.py +234 -234
- tapps_agents/agents/tester/visual_regression.py +291 -291
- tapps_agents/analysis/pattern_detector.py +36 -36
- tapps_agents/beads/hydration.py +213 -213
- tapps_agents/beads/parse.py +32 -32
- tapps_agents/beads/specs.py +206 -206
- tapps_agents/cli/__init__.py +9 -9
- tapps_agents/cli/__main__.py +8 -8
- tapps_agents/cli/base.py +478 -478
- tapps_agents/cli/command_classifier.py +72 -72
- tapps_agents/cli/commands/__init__.py +2 -2
- tapps_agents/cli/commands/analyst.py +173 -173
- tapps_agents/cli/commands/architect.py +109 -109
- tapps_agents/cli/commands/cleanup_agent.py +92 -92
- tapps_agents/cli/commands/common.py +126 -126
- tapps_agents/cli/commands/debugger.py +90 -90
- tapps_agents/cli/commands/designer.py +112 -112
- tapps_agents/cli/commands/documenter.py +136 -136
- tapps_agents/cli/commands/enhancer.py +110 -110
- tapps_agents/cli/commands/evaluator.py +255 -255
- tapps_agents/cli/commands/health.py +665 -665
- tapps_agents/cli/commands/implementer.py +301 -301
- tapps_agents/cli/commands/improver.py +91 -91
- tapps_agents/cli/commands/knowledge.py +111 -111
- tapps_agents/cli/commands/learning.py +172 -172
- tapps_agents/cli/commands/observability.py +283 -283
- tapps_agents/cli/commands/ops.py +135 -135
- tapps_agents/cli/commands/orchestrator.py +116 -116
- tapps_agents/cli/commands/planner.py +237 -237
- tapps_agents/cli/commands/reviewer.py +1872 -1872
- tapps_agents/cli/commands/status.py +285 -285
- tapps_agents/cli/commands/task.py +227 -219
- tapps_agents/cli/commands/tester.py +191 -191
- tapps_agents/cli/commands/top_level.py +3586 -3586
- tapps_agents/cli/feedback.py +936 -936
- tapps_agents/cli/formatters.py +608 -608
- tapps_agents/cli/help/__init__.py +7 -7
- tapps_agents/cli/help/static_help.py +425 -425
- tapps_agents/cli/network_detection.py +110 -110
- tapps_agents/cli/output_compactor.py +274 -274
- tapps_agents/cli/parsers/__init__.py +2 -2
- tapps_agents/cli/parsers/analyst.py +186 -186
- tapps_agents/cli/parsers/architect.py +167 -167
- tapps_agents/cli/parsers/cleanup_agent.py +228 -228
- tapps_agents/cli/parsers/debugger.py +116 -116
- tapps_agents/cli/parsers/designer.py +182 -182
- tapps_agents/cli/parsers/documenter.py +134 -134
- tapps_agents/cli/parsers/enhancer.py +113 -113
- tapps_agents/cli/parsers/evaluator.py +213 -213
- tapps_agents/cli/parsers/implementer.py +168 -168
- tapps_agents/cli/parsers/improver.py +132 -132
- tapps_agents/cli/parsers/ops.py +159 -159
- tapps_agents/cli/parsers/orchestrator.py +98 -98
- tapps_agents/cli/parsers/planner.py +145 -145
- tapps_agents/cli/parsers/reviewer.py +462 -462
- tapps_agents/cli/parsers/tester.py +124 -124
- tapps_agents/cli/progress_heartbeat.py +254 -254
- tapps_agents/cli/streaming_progress.py +336 -336
- tapps_agents/cli/utils/__init__.py +6 -6
- tapps_agents/cli/utils/agent_lifecycle.py +48 -48
- tapps_agents/cli/utils/error_formatter.py +82 -82
- tapps_agents/cli/utils/error_recovery.py +188 -188
- tapps_agents/cli/utils/output_handler.py +59 -59
- tapps_agents/cli/utils/prompt_enhancer.py +319 -319
- tapps_agents/cli/validators/__init__.py +9 -9
- tapps_agents/cli/validators/command_validator.py +81 -81
- tapps_agents/context7/__init__.py +112 -112
- tapps_agents/context7/agent_integration.py +869 -869
- tapps_agents/context7/analytics.py +382 -382
- tapps_agents/context7/analytics_dashboard.py +299 -299
- tapps_agents/context7/async_cache.py +681 -681
- tapps_agents/context7/backup_client.py +958 -958
- tapps_agents/context7/cache_locking.py +194 -194
- tapps_agents/context7/cache_metadata.py +214 -214
- tapps_agents/context7/cache_prewarm.py +488 -488
- tapps_agents/context7/cache_structure.py +168 -168
- tapps_agents/context7/cache_warming.py +604 -604
- tapps_agents/context7/circuit_breaker.py +376 -376
- tapps_agents/context7/cleanup.py +461 -461
- tapps_agents/context7/commands.py +858 -858
- tapps_agents/context7/credential_validation.py +276 -276
- tapps_agents/context7/cross_reference_resolver.py +168 -168
- tapps_agents/context7/cross_references.py +424 -424
- tapps_agents/context7/doc_manager.py +225 -225
- tapps_agents/context7/fuzzy_matcher.py +369 -369
- tapps_agents/context7/kb_cache.py +404 -404
- tapps_agents/context7/language_detector.py +219 -219
- tapps_agents/context7/library_detector.py +725 -725
- tapps_agents/context7/lookup.py +738 -738
- tapps_agents/context7/metadata.py +258 -258
- tapps_agents/context7/refresh_queue.py +300 -300
- tapps_agents/context7/security.py +373 -373
- tapps_agents/context7/staleness_policies.py +278 -278
- tapps_agents/context7/tiles_integration.py +47 -47
- tapps_agents/continuous_bug_fix/__init__.py +20 -20
- tapps_agents/continuous_bug_fix/bug_finder.py +306 -306
- tapps_agents/continuous_bug_fix/bug_fix_coordinator.py +177 -177
- tapps_agents/continuous_bug_fix/commit_manager.py +178 -178
- tapps_agents/continuous_bug_fix/continuous_bug_fixer.py +322 -322
- tapps_agents/continuous_bug_fix/proactive_bug_finder.py +285 -285
- tapps_agents/core/__init__.py +298 -298
- tapps_agents/core/adaptive_cache_config.py +432 -432
- tapps_agents/core/agent_base.py +647 -647
- tapps_agents/core/agent_cache.py +466 -466
- tapps_agents/core/agent_learning.py +1865 -1865
- tapps_agents/core/analytics_dashboard.py +563 -563
- tapps_agents/core/analytics_enhancements.py +597 -597
- tapps_agents/core/anonymization.py +274 -274
- tapps_agents/core/artifact_context_builder.py +293 -0
- tapps_agents/core/ast_parser.py +228 -228
- tapps_agents/core/async_file_ops.py +402 -402
- tapps_agents/core/best_practice_consultant.py +299 -299
- tapps_agents/core/brownfield_analyzer.py +299 -299
- tapps_agents/core/brownfield_review.py +541 -541
- tapps_agents/core/browser_controller.py +513 -513
- tapps_agents/core/capability_registry.py +418 -418
- tapps_agents/core/change_impact_analyzer.py +190 -190
- tapps_agents/core/checkpoint_manager.py +377 -377
- tapps_agents/core/code_generator.py +329 -329
- tapps_agents/core/code_validator.py +276 -276
- tapps_agents/core/command_registry.py +327 -327
- tapps_agents/core/config.py +33 -0
- tapps_agents/core/context_gathering/__init__.py +2 -2
- tapps_agents/core/context_gathering/repository_explorer.py +28 -28
- tapps_agents/core/context_intelligence/__init__.py +2 -2
- tapps_agents/core/context_intelligence/relevance_scorer.py +24 -24
- tapps_agents/core/context_intelligence/token_budget_manager.py +27 -27
- tapps_agents/core/context_manager.py +240 -240
- tapps_agents/core/cursor_feedback_monitor.py +146 -146
- tapps_agents/core/cursor_verification.py +290 -290
- tapps_agents/core/customization_loader.py +280 -280
- tapps_agents/core/customization_schema.py +260 -260
- tapps_agents/core/customization_template.py +238 -238
- tapps_agents/core/debug_logger.py +124 -124
- tapps_agents/core/design_validator.py +298 -298
- tapps_agents/core/diagram_generator.py +226 -226
- tapps_agents/core/docker_utils.py +232 -232
- tapps_agents/core/document_generator.py +617 -617
- tapps_agents/core/domain_detector.py +30 -30
- tapps_agents/core/error_envelope.py +454 -454
- tapps_agents/core/error_handler.py +270 -270
- tapps_agents/core/estimation_tracker.py +189 -189
- tapps_agents/core/eval_prompt_engine.py +116 -116
- tapps_agents/core/evaluation_base.py +119 -119
- tapps_agents/core/evaluation_models.py +320 -320
- tapps_agents/core/evaluation_orchestrator.py +225 -225
- tapps_agents/core/evaluators/__init__.py +7 -7
- tapps_agents/core/evaluators/architectural_evaluator.py +205 -205
- tapps_agents/core/evaluators/behavioral_evaluator.py +160 -160
- tapps_agents/core/evaluators/performance_profile_evaluator.py +160 -160
- tapps_agents/core/evaluators/security_posture_evaluator.py +148 -148
- tapps_agents/core/evaluators/spec_compliance_evaluator.py +181 -181
- tapps_agents/core/exceptions.py +107 -107
- tapps_agents/core/expert_config_generator.py +293 -293
- tapps_agents/core/export_schema.py +202 -202
- tapps_agents/core/external_feedback_models.py +102 -102
- tapps_agents/core/external_feedback_storage.py +213 -213
- tapps_agents/core/fallback_strategy.py +314 -314
- tapps_agents/core/feedback_analyzer.py +162 -162
- tapps_agents/core/feedback_collector.py +178 -178
- tapps_agents/core/git_operations.py +445 -445
- tapps_agents/core/hardware_profiler.py +151 -151
- tapps_agents/core/instructions.py +324 -324
- tapps_agents/core/io_guardrails.py +69 -69
- tapps_agents/core/issue_manifest.py +249 -249
- tapps_agents/core/issue_schema.py +139 -139
- tapps_agents/core/json_utils.py +128 -128
- tapps_agents/core/knowledge_graph.py +446 -446
- tapps_agents/core/language_detector.py +296 -296
- tapps_agents/core/learning_confidence.py +242 -242
- tapps_agents/core/learning_dashboard.py +246 -246
- tapps_agents/core/learning_decision.py +384 -384
- tapps_agents/core/learning_explainability.py +578 -578
- tapps_agents/core/learning_export.py +287 -287
- tapps_agents/core/learning_integration.py +228 -228
- tapps_agents/core/llm_behavior.py +232 -232
- tapps_agents/core/long_duration_support.py +786 -786
- tapps_agents/core/mcp_setup.py +106 -106
- tapps_agents/core/memory_integration.py +396 -396
- tapps_agents/core/meta_learning.py +666 -666
- tapps_agents/core/module_path_sanitizer.py +199 -199
- tapps_agents/core/multi_agent_orchestrator.py +382 -382
- tapps_agents/core/network_errors.py +125 -125
- tapps_agents/core/nfr_validator.py +336 -336
- tapps_agents/core/offline_mode.py +158 -158
- tapps_agents/core/output_contracts.py +300 -300
- tapps_agents/core/output_formatter.py +300 -300
- tapps_agents/core/path_normalizer.py +174 -174
- tapps_agents/core/path_validator.py +322 -322
- tapps_agents/core/pattern_library.py +250 -250
- tapps_agents/core/performance_benchmark.py +301 -301
- tapps_agents/core/performance_monitor.py +184 -184
- tapps_agents/core/playwright_mcp_controller.py +771 -771
- tapps_agents/core/policy_loader.py +135 -135
- tapps_agents/core/progress.py +166 -166
- tapps_agents/core/project_profile.py +354 -354
- tapps_agents/core/project_type_detector.py +454 -454
- tapps_agents/core/prompt_base.py +223 -223
- tapps_agents/core/prompt_learning/__init__.py +2 -2
- tapps_agents/core/prompt_learning/learning_loop.py +24 -24
- tapps_agents/core/prompt_learning/project_prompt_store.py +25 -25
- tapps_agents/core/prompt_learning/skills_prompt_analyzer.py +35 -35
- tapps_agents/core/prompt_optimization/__init__.py +6 -6
- tapps_agents/core/prompt_optimization/ab_tester.py +114 -114
- tapps_agents/core/prompt_optimization/correlation_analyzer.py +160 -160
- tapps_agents/core/prompt_optimization/progressive_refiner.py +129 -129
- tapps_agents/core/prompt_optimization/prompt_library.py +37 -37
- tapps_agents/core/requirements_evaluator.py +431 -431
- tapps_agents/core/resource_aware_executor.py +449 -449
- tapps_agents/core/resource_monitor.py +343 -343
- tapps_agents/core/resume_handler.py +298 -298
- tapps_agents/core/retry_handler.py +197 -197
- tapps_agents/core/review_checklists.py +479 -479
- tapps_agents/core/role_loader.py +201 -201
- tapps_agents/core/role_template_loader.py +201 -201
- tapps_agents/core/runtime_mode.py +60 -60
- tapps_agents/core/security_scanner.py +342 -342
- tapps_agents/core/skill_agent_registry.py +194 -194
- tapps_agents/core/skill_integration.py +208 -208
- tapps_agents/core/skill_loader.py +492 -492
- tapps_agents/core/skill_template.py +341 -341
- tapps_agents/core/skill_validator.py +478 -478
- tapps_agents/core/stack_analyzer.py +35 -35
- tapps_agents/core/startup.py +174 -174
- tapps_agents/core/storage_manager.py +397 -397
- tapps_agents/core/storage_models.py +166 -166
- tapps_agents/core/story_evaluator.py +410 -410
- tapps_agents/core/subprocess_utils.py +170 -170
- tapps_agents/core/task_duration.py +296 -296
- tapps_agents/core/task_memory.py +582 -582
- tapps_agents/core/task_state.py +226 -226
- tapps_agents/core/tech_stack_priorities.py +208 -208
- tapps_agents/core/temp_directory.py +194 -194
- tapps_agents/core/template_merger.py +600 -600
- tapps_agents/core/template_selector.py +280 -280
- tapps_agents/core/test_generator.py +286 -286
- tapps_agents/core/tiered_context.py +253 -253
- tapps_agents/core/token_monitor.py +345 -345
- tapps_agents/core/traceability.py +254 -254
- tapps_agents/core/trajectory_tracker.py +50 -50
- tapps_agents/core/unicode_safe.py +143 -143
- tapps_agents/core/unified_cache_config.py +170 -170
- tapps_agents/core/unified_state.py +324 -324
- tapps_agents/core/validate_cursor_setup.py +237 -237
- tapps_agents/core/validation_registry.py +136 -136
- tapps_agents/core/validators/__init__.py +4 -4
- tapps_agents/core/validators/python_validator.py +87 -87
- tapps_agents/core/verification_agent.py +90 -90
- tapps_agents/core/visual_feedback.py +644 -644
- tapps_agents/core/workflow_validator.py +197 -197
- tapps_agents/core/worktree.py +367 -367
- tapps_agents/docker/__init__.py +10 -10
- tapps_agents/docker/analyzer.py +186 -186
- tapps_agents/docker/debugger.py +229 -229
- tapps_agents/docker/error_patterns.py +216 -216
- tapps_agents/epic/__init__.py +22 -22
- tapps_agents/epic/beads_sync.py +115 -115
- tapps_agents/epic/markdown_sync.py +105 -105
- tapps_agents/epic/models.py +96 -96
- tapps_agents/experts/__init__.py +163 -163
- tapps_agents/experts/agent_integration.py +243 -243
- tapps_agents/experts/auto_generator.py +331 -331
- tapps_agents/experts/base_expert.py +536 -536
- tapps_agents/experts/builtin_registry.py +261 -261
- tapps_agents/experts/business_metrics.py +565 -565
- tapps_agents/experts/cache.py +266 -266
- tapps_agents/experts/confidence_breakdown.py +306 -306
- tapps_agents/experts/confidence_calculator.py +336 -336
- tapps_agents/experts/confidence_metrics.py +236 -236
- tapps_agents/experts/domain_config.py +311 -311
- tapps_agents/experts/domain_detector.py +550 -550
- tapps_agents/experts/domain_utils.py +84 -84
- tapps_agents/experts/expert_config.py +113 -113
- tapps_agents/experts/expert_engine.py +465 -465
- tapps_agents/experts/expert_registry.py +744 -744
- tapps_agents/experts/expert_synthesizer.py +70 -70
- tapps_agents/experts/governance.py +197 -197
- tapps_agents/experts/history_logger.py +312 -312
- tapps_agents/experts/knowledge/README.md +180 -180
- tapps_agents/experts/knowledge/accessibility/accessible-forms.md +331 -331
- tapps_agents/experts/knowledge/accessibility/aria-patterns.md +344 -344
- tapps_agents/experts/knowledge/accessibility/color-contrast.md +285 -285
- tapps_agents/experts/knowledge/accessibility/keyboard-navigation.md +332 -332
- tapps_agents/experts/knowledge/accessibility/screen-readers.md +282 -282
- tapps_agents/experts/knowledge/accessibility/semantic-html.md +355 -355
- tapps_agents/experts/knowledge/accessibility/testing-accessibility.md +369 -369
- tapps_agents/experts/knowledge/accessibility/wcag-2.1.md +296 -296
- tapps_agents/experts/knowledge/accessibility/wcag-2.2.md +211 -211
- tapps_agents/experts/knowledge/agent-learning/best-practices.md +715 -715
- tapps_agents/experts/knowledge/agent-learning/pattern-extraction.md +282 -282
- tapps_agents/experts/knowledge/agent-learning/prompt-optimization.md +320 -320
- tapps_agents/experts/knowledge/ai-frameworks/model-optimization.md +90 -90
- tapps_agents/experts/knowledge/ai-frameworks/openvino-patterns.md +260 -260
- tapps_agents/experts/knowledge/api-design-integration/api-gateway-patterns.md +309 -309
- tapps_agents/experts/knowledge/api-design-integration/api-security-patterns.md +521 -521
- tapps_agents/experts/knowledge/api-design-integration/api-versioning.md +421 -421
- tapps_agents/experts/knowledge/api-design-integration/async-protocol-patterns.md +61 -61
- tapps_agents/experts/knowledge/api-design-integration/contract-testing.md +221 -221
- tapps_agents/experts/knowledge/api-design-integration/external-api-integration.md +489 -489
- tapps_agents/experts/knowledge/api-design-integration/fastapi-patterns.md +360 -360
- tapps_agents/experts/knowledge/api-design-integration/fastapi-testing.md +262 -262
- tapps_agents/experts/knowledge/api-design-integration/graphql-patterns.md +582 -582
- tapps_agents/experts/knowledge/api-design-integration/grpc-best-practices.md +499 -499
- tapps_agents/experts/knowledge/api-design-integration/mqtt-patterns.md +455 -455
- tapps_agents/experts/knowledge/api-design-integration/rate-limiting.md +507 -507
- tapps_agents/experts/knowledge/api-design-integration/restful-api-design.md +618 -618
- tapps_agents/experts/knowledge/api-design-integration/websocket-patterns.md +480 -480
- tapps_agents/experts/knowledge/cloud-infrastructure/cloud-native-patterns.md +175 -175
- tapps_agents/experts/knowledge/cloud-infrastructure/container-health-checks.md +261 -261
- tapps_agents/experts/knowledge/cloud-infrastructure/containerization.md +222 -222
- tapps_agents/experts/knowledge/cloud-infrastructure/cost-optimization.md +122 -122
- tapps_agents/experts/knowledge/cloud-infrastructure/disaster-recovery.md +153 -153
- tapps_agents/experts/knowledge/cloud-infrastructure/dockerfile-patterns.md +285 -285
- tapps_agents/experts/knowledge/cloud-infrastructure/infrastructure-as-code.md +187 -187
- tapps_agents/experts/knowledge/cloud-infrastructure/kubernetes-patterns.md +253 -253
- tapps_agents/experts/knowledge/cloud-infrastructure/multi-cloud-strategies.md +155 -155
- tapps_agents/experts/knowledge/cloud-infrastructure/serverless-architecture.md +200 -200
- tapps_agents/experts/knowledge/code-quality-analysis/README.md +16 -16
- tapps_agents/experts/knowledge/code-quality-analysis/code-metrics.md +137 -137
- tapps_agents/experts/knowledge/code-quality-analysis/complexity-analysis.md +181 -181
- tapps_agents/experts/knowledge/code-quality-analysis/technical-debt-patterns.md +191 -191
- tapps_agents/experts/knowledge/data-privacy-compliance/anonymization.md +313 -313
- tapps_agents/experts/knowledge/data-privacy-compliance/ccpa.md +255 -255
- tapps_agents/experts/knowledge/data-privacy-compliance/consent-management.md +282 -282
- tapps_agents/experts/knowledge/data-privacy-compliance/data-minimization.md +275 -275
- tapps_agents/experts/knowledge/data-privacy-compliance/data-retention.md +297 -297
- tapps_agents/experts/knowledge/data-privacy-compliance/data-subject-rights.md +383 -383
- tapps_agents/experts/knowledge/data-privacy-compliance/encryption-privacy.md +285 -285
- tapps_agents/experts/knowledge/data-privacy-compliance/gdpr.md +344 -344
- tapps_agents/experts/knowledge/data-privacy-compliance/hipaa.md +385 -385
- tapps_agents/experts/knowledge/data-privacy-compliance/privacy-by-design.md +280 -280
- tapps_agents/experts/knowledge/database-data-management/acid-vs-cap.md +164 -164
- tapps_agents/experts/knowledge/database-data-management/backup-and-recovery.md +182 -182
- tapps_agents/experts/knowledge/database-data-management/data-modeling.md +172 -172
- tapps_agents/experts/knowledge/database-data-management/database-design.md +187 -187
- tapps_agents/experts/knowledge/database-data-management/flux-query-optimization.md +342 -342
- tapps_agents/experts/knowledge/database-data-management/influxdb-connection-patterns.md +432 -432
- tapps_agents/experts/knowledge/database-data-management/influxdb-patterns.md +442 -442
- tapps_agents/experts/knowledge/database-data-management/migration-strategies.md +216 -216
- tapps_agents/experts/knowledge/database-data-management/nosql-patterns.md +259 -259
- tapps_agents/experts/knowledge/database-data-management/scalability-patterns.md +184 -184
- tapps_agents/experts/knowledge/database-data-management/sql-optimization.md +175 -175
- tapps_agents/experts/knowledge/database-data-management/time-series-modeling.md +444 -444
- tapps_agents/experts/knowledge/development-workflow/README.md +16 -16
- tapps_agents/experts/knowledge/development-workflow/automation-best-practices.md +216 -216
- tapps_agents/experts/knowledge/development-workflow/build-strategies.md +198 -198
- tapps_agents/experts/knowledge/development-workflow/deployment-patterns.md +205 -205
- tapps_agents/experts/knowledge/development-workflow/git-workflows.md +205 -205
- tapps_agents/experts/knowledge/documentation-knowledge-management/README.md +16 -16
- tapps_agents/experts/knowledge/documentation-knowledge-management/api-documentation-patterns.md +231 -231
- tapps_agents/experts/knowledge/documentation-knowledge-management/documentation-standards.md +191 -191
- tapps_agents/experts/knowledge/documentation-knowledge-management/knowledge-management.md +171 -171
- tapps_agents/experts/knowledge/documentation-knowledge-management/technical-writing-guide.md +192 -192
- tapps_agents/experts/knowledge/observability-monitoring/alerting-patterns.md +461 -461
- tapps_agents/experts/knowledge/observability-monitoring/apm-tools.md +459 -459
- tapps_agents/experts/knowledge/observability-monitoring/distributed-tracing.md +367 -367
- tapps_agents/experts/knowledge/observability-monitoring/logging-strategies.md +478 -478
- tapps_agents/experts/knowledge/observability-monitoring/metrics-and-monitoring.md +510 -510
- tapps_agents/experts/knowledge/observability-monitoring/observability-best-practices.md +492 -492
- tapps_agents/experts/knowledge/observability-monitoring/open-telemetry.md +573 -573
- tapps_agents/experts/knowledge/observability-monitoring/slo-sli-sla.md +419 -419
- tapps_agents/experts/knowledge/performance/anti-patterns.md +284 -284
- tapps_agents/experts/knowledge/performance/api-performance.md +256 -256
- tapps_agents/experts/knowledge/performance/caching.md +327 -327
- tapps_agents/experts/knowledge/performance/database-performance.md +252 -252
- tapps_agents/experts/knowledge/performance/optimization-patterns.md +327 -327
- tapps_agents/experts/knowledge/performance/profiling.md +297 -297
- tapps_agents/experts/knowledge/performance/resource-management.md +293 -293
- tapps_agents/experts/knowledge/performance/scalability.md +306 -306
- tapps_agents/experts/knowledge/security/owasp-top10.md +209 -209
- tapps_agents/experts/knowledge/security/secure-coding-practices.md +207 -207
- tapps_agents/experts/knowledge/security/threat-modeling.md +220 -220
- tapps_agents/experts/knowledge/security/vulnerability-patterns.md +342 -342
- tapps_agents/experts/knowledge/software-architecture/docker-compose-patterns.md +314 -314
- tapps_agents/experts/knowledge/software-architecture/microservices-patterns.md +379 -379
- tapps_agents/experts/knowledge/software-architecture/service-communication.md +316 -316
- tapps_agents/experts/knowledge/testing/best-practices.md +310 -310
- tapps_agents/experts/knowledge/testing/coverage-analysis.md +293 -293
- tapps_agents/experts/knowledge/testing/mocking.md +256 -256
- tapps_agents/experts/knowledge/testing/test-automation.md +276 -276
- tapps_agents/experts/knowledge/testing/test-data.md +271 -271
- tapps_agents/experts/knowledge/testing/test-design-patterns.md +280 -280
- tapps_agents/experts/knowledge/testing/test-maintenance.md +236 -236
- tapps_agents/experts/knowledge/testing/test-strategies.md +311 -311
- tapps_agents/experts/knowledge/user-experience/information-architecture.md +325 -325
- tapps_agents/experts/knowledge/user-experience/interaction-design.md +363 -363
- tapps_agents/experts/knowledge/user-experience/prototyping.md +293 -293
- tapps_agents/experts/knowledge/user-experience/usability-heuristics.md +337 -337
- tapps_agents/experts/knowledge/user-experience/usability-testing.md +311 -311
- tapps_agents/experts/knowledge/user-experience/user-journeys.md +296 -296
- tapps_agents/experts/knowledge/user-experience/user-research.md +373 -373
- tapps_agents/experts/knowledge/user-experience/ux-principles.md +340 -340
- tapps_agents/experts/knowledge_freshness.py +321 -321
- tapps_agents/experts/knowledge_ingestion.py +438 -438
- tapps_agents/experts/knowledge_need_detector.py +93 -93
- tapps_agents/experts/knowledge_validator.py +382 -382
- tapps_agents/experts/observability.py +440 -440
- tapps_agents/experts/passive_notifier.py +238 -238
- tapps_agents/experts/proactive_orchestrator.py +32 -32
- tapps_agents/experts/rag_chunker.py +205 -205
- tapps_agents/experts/rag_embedder.py +152 -152
- tapps_agents/experts/rag_evaluation.py +299 -299
- tapps_agents/experts/rag_index.py +303 -303
- tapps_agents/experts/rag_metrics.py +293 -293
- tapps_agents/experts/rag_safety.py +263 -263
- tapps_agents/experts/report_generator.py +296 -296
- tapps_agents/experts/setup_wizard.py +441 -441
- tapps_agents/experts/simple_rag.py +431 -431
- tapps_agents/experts/vector_rag.py +354 -354
- tapps_agents/experts/weight_distributor.py +304 -304
- tapps_agents/health/__init__.py +24 -24
- tapps_agents/health/base.py +75 -75
- tapps_agents/health/checks/__init__.py +22 -22
- tapps_agents/health/checks/automation.py +127 -127
- tapps_agents/health/checks/context7_cache.py +210 -210
- tapps_agents/health/checks/environment.py +116 -116
- tapps_agents/health/checks/execution.py +170 -170
- tapps_agents/health/checks/knowledge_base.py +187 -187
- tapps_agents/health/checks/outcomes.py +324 -324
- tapps_agents/health/collector.py +280 -280
- tapps_agents/health/dashboard.py +137 -137
- tapps_agents/health/metrics.py +151 -151
- tapps_agents/health/orchestrator.py +271 -271
- tapps_agents/health/registry.py +166 -166
- tapps_agents/hooks/__init__.py +33 -33
- tapps_agents/hooks/config.py +140 -140
- tapps_agents/hooks/events.py +135 -135
- tapps_agents/hooks/executor.py +128 -128
- tapps_agents/hooks/manager.py +143 -143
- tapps_agents/integration/__init__.py +8 -8
- tapps_agents/integration/service_integrator.py +121 -121
- tapps_agents/integrations/__init__.py +10 -10
- tapps_agents/integrations/clawdbot.py +525 -525
- tapps_agents/integrations/memory_bridge.py +356 -356
- tapps_agents/mcp/__init__.py +18 -18
- tapps_agents/mcp/gateway.py +112 -112
- tapps_agents/mcp/servers/__init__.py +13 -13
- tapps_agents/mcp/servers/analysis.py +204 -204
- tapps_agents/mcp/servers/context7.py +198 -198
- tapps_agents/mcp/servers/filesystem.py +218 -218
- tapps_agents/mcp/servers/git.py +201 -201
- tapps_agents/mcp/tool_registry.py +115 -115
- tapps_agents/quality/__init__.py +54 -54
- tapps_agents/quality/coverage_analyzer.py +379 -379
- tapps_agents/quality/enforcement.py +82 -82
- tapps_agents/quality/gates/__init__.py +37 -37
- tapps_agents/quality/gates/approval_gate.py +255 -255
- tapps_agents/quality/gates/base.py +84 -84
- tapps_agents/quality/gates/exceptions.py +43 -43
- tapps_agents/quality/gates/policy_gate.py +195 -195
- tapps_agents/quality/gates/registry.py +239 -239
- tapps_agents/quality/gates/security_gate.py +156 -156
- tapps_agents/quality/quality_gates.py +369 -369
- tapps_agents/quality/secret_scanner.py +335 -335
- tapps_agents/session/__init__.py +19 -19
- tapps_agents/session/manager.py +256 -256
- tapps_agents/simple_mode/__init__.py +66 -66
- tapps_agents/simple_mode/agent_contracts.py +357 -357
- tapps_agents/simple_mode/beads_hooks.py +151 -151
- tapps_agents/simple_mode/code_snippet_handler.py +382 -382
- tapps_agents/simple_mode/documentation_manager.py +395 -395
- tapps_agents/simple_mode/documentation_reader.py +187 -187
- tapps_agents/simple_mode/file_inference.py +292 -292
- tapps_agents/simple_mode/framework_change_detector.py +268 -268
- tapps_agents/simple_mode/intent_parser.py +510 -510
- tapps_agents/simple_mode/learning_progression.py +358 -358
- tapps_agents/simple_mode/nl_handler.py +700 -700
- tapps_agents/simple_mode/onboarding.py +253 -253
- tapps_agents/simple_mode/orchestrators/__init__.py +38 -38
- tapps_agents/simple_mode/orchestrators/base.py +185 -185
- tapps_agents/simple_mode/orchestrators/breakdown_orchestrator.py +49 -49
- tapps_agents/simple_mode/orchestrators/brownfield_orchestrator.py +135 -135
- tapps_agents/simple_mode/orchestrators/build_orchestrator.py +2700 -2667
- tapps_agents/simple_mode/orchestrators/deliverable_checklist.py +349 -349
- tapps_agents/simple_mode/orchestrators/enhance_orchestrator.py +53 -53
- tapps_agents/simple_mode/orchestrators/epic_orchestrator.py +122 -122
- tapps_agents/simple_mode/orchestrators/explore_orchestrator.py +184 -184
- tapps_agents/simple_mode/orchestrators/fix_orchestrator.py +723 -723
- tapps_agents/simple_mode/orchestrators/plan_analysis_orchestrator.py +206 -206
- tapps_agents/simple_mode/orchestrators/pr_orchestrator.py +237 -237
- tapps_agents/simple_mode/orchestrators/refactor_orchestrator.py +222 -222
- tapps_agents/simple_mode/orchestrators/requirements_tracer.py +262 -262
- tapps_agents/simple_mode/orchestrators/resume_orchestrator.py +210 -210
- tapps_agents/simple_mode/orchestrators/review_orchestrator.py +161 -161
- tapps_agents/simple_mode/orchestrators/test_orchestrator.py +82 -82
- tapps_agents/simple_mode/output_aggregator.py +340 -340
- tapps_agents/simple_mode/result_formatters.py +598 -598
- tapps_agents/simple_mode/step_dependencies.py +382 -382
- tapps_agents/simple_mode/step_results.py +276 -276
- tapps_agents/simple_mode/streaming.py +388 -388
- tapps_agents/simple_mode/variations.py +129 -129
- tapps_agents/simple_mode/visual_feedback.py +238 -238
- tapps_agents/simple_mode/zero_config.py +274 -274
- tapps_agents/suggestions/__init__.py +8 -8
- tapps_agents/suggestions/inline_suggester.py +52 -52
- tapps_agents/templates/__init__.py +8 -8
- tapps_agents/templates/microservice_generator.py +274 -274
- tapps_agents/utils/env_validator.py +291 -291
- tapps_agents/workflow/__init__.py +171 -171
- tapps_agents/workflow/acceptance_verifier.py +132 -132
- tapps_agents/workflow/agent_handlers/__init__.py +41 -41
- tapps_agents/workflow/agent_handlers/analyst_handler.py +75 -75
- tapps_agents/workflow/agent_handlers/architect_handler.py +107 -107
- tapps_agents/workflow/agent_handlers/base.py +84 -84
- tapps_agents/workflow/agent_handlers/debugger_handler.py +100 -100
- tapps_agents/workflow/agent_handlers/designer_handler.py +110 -110
- tapps_agents/workflow/agent_handlers/documenter_handler.py +94 -94
- tapps_agents/workflow/agent_handlers/implementer_handler.py +235 -235
- tapps_agents/workflow/agent_handlers/ops_handler.py +62 -62
- tapps_agents/workflow/agent_handlers/orchestrator_handler.py +43 -43
- tapps_agents/workflow/agent_handlers/planner_handler.py +98 -98
- tapps_agents/workflow/agent_handlers/registry.py +119 -119
- tapps_agents/workflow/agent_handlers/reviewer_handler.py +119 -119
- tapps_agents/workflow/agent_handlers/tester_handler.py +69 -69
- tapps_agents/workflow/analytics_accessor.py +337 -337
- tapps_agents/workflow/analytics_alerts.py +416 -416
- tapps_agents/workflow/analytics_dashboard_cursor.py +281 -281
- tapps_agents/workflow/analytics_dual_write.py +103 -103
- tapps_agents/workflow/analytics_integration.py +119 -119
- tapps_agents/workflow/analytics_query_parser.py +278 -278
- tapps_agents/workflow/analytics_visualizer.py +259 -259
- tapps_agents/workflow/artifact_helper.py +204 -204
- tapps_agents/workflow/audit_logger.py +263 -263
- tapps_agents/workflow/auto_execution_config.py +340 -340
- tapps_agents/workflow/auto_progression.py +586 -586
- tapps_agents/workflow/branch_cleanup.py +349 -349
- tapps_agents/workflow/checkpoint.py +256 -256
- tapps_agents/workflow/checkpoint_manager.py +178 -178
- tapps_agents/workflow/code_artifact.py +179 -179
- tapps_agents/workflow/common_enums.py +96 -96
- tapps_agents/workflow/confirmation_handler.py +130 -130
- tapps_agents/workflow/context_analyzer.py +222 -222
- tapps_agents/workflow/context_artifact.py +230 -230
- tapps_agents/workflow/cursor_chat.py +94 -94
- tapps_agents/workflow/cursor_executor.py +2337 -2196
- tapps_agents/workflow/cursor_skill_helper.py +516 -516
- tapps_agents/workflow/dependency_resolver.py +244 -244
- tapps_agents/workflow/design_artifact.py +156 -156
- tapps_agents/workflow/detector.py +751 -751
- tapps_agents/workflow/direct_execution_fallback.py +301 -301
- tapps_agents/workflow/docs_artifact.py +168 -168
- tapps_agents/workflow/enforcer.py +389 -389
- tapps_agents/workflow/enhancement_artifact.py +142 -142
- tapps_agents/workflow/error_recovery.py +806 -806
- tapps_agents/workflow/event_bus.py +183 -183
- tapps_agents/workflow/event_log.py +612 -612
- tapps_agents/workflow/events.py +63 -63
- tapps_agents/workflow/exceptions.py +43 -43
- tapps_agents/workflow/execution_graph.py +498 -498
- tapps_agents/workflow/execution_plan.py +126 -126
- tapps_agents/workflow/file_utils.py +186 -186
- tapps_agents/workflow/gate_evaluator.py +182 -182
- tapps_agents/workflow/gate_integration.py +200 -200
- tapps_agents/workflow/graph_visualizer.py +130 -130
- tapps_agents/workflow/health_checker.py +206 -206
- tapps_agents/workflow/logging_helper.py +243 -243
- tapps_agents/workflow/manifest.py +582 -582
- tapps_agents/workflow/marker_writer.py +250 -250
- tapps_agents/workflow/message_formatter.py +188 -188
- tapps_agents/workflow/messaging.py +325 -325
- tapps_agents/workflow/metadata_models.py +91 -91
- tapps_agents/workflow/metrics_integration.py +226 -226
- tapps_agents/workflow/migration_utils.py +116 -116
- tapps_agents/workflow/models.py +148 -111
- tapps_agents/workflow/nlp_config.py +198 -198
- tapps_agents/workflow/nlp_error_handler.py +207 -207
- tapps_agents/workflow/nlp_executor.py +163 -163
- tapps_agents/workflow/nlp_parser.py +528 -528
- tapps_agents/workflow/observability_dashboard.py +451 -451
- tapps_agents/workflow/observer.py +170 -170
- tapps_agents/workflow/ops_artifact.py +257 -257
- tapps_agents/workflow/output_passing.py +214 -214
- tapps_agents/workflow/parallel_executor.py +463 -463
- tapps_agents/workflow/planning_artifact.py +179 -179
- tapps_agents/workflow/preset_loader.py +285 -285
- tapps_agents/workflow/preset_recommender.py +270 -270
- tapps_agents/workflow/progress_logger.py +145 -145
- tapps_agents/workflow/progress_manager.py +303 -303
- tapps_agents/workflow/progress_monitor.py +186 -186
- tapps_agents/workflow/progress_updates.py +423 -423
- tapps_agents/workflow/quality_artifact.py +158 -158
- tapps_agents/workflow/quality_loopback.py +101 -101
- tapps_agents/workflow/recommender.py +387 -387
- tapps_agents/workflow/remediation_loop.py +166 -166
- tapps_agents/workflow/result_aggregator.py +300 -300
- tapps_agents/workflow/review_artifact.py +185 -185
- tapps_agents/workflow/schema_validator.py +522 -522
- tapps_agents/workflow/session_handoff.py +178 -178
- tapps_agents/workflow/skill_invoker.py +648 -648
- tapps_agents/workflow/state_manager.py +756 -756
- tapps_agents/workflow/state_persistence_config.py +331 -331
- tapps_agents/workflow/status_monitor.py +449 -449
- tapps_agents/workflow/step_checkpoint.py +314 -314
- tapps_agents/workflow/step_details.py +201 -201
- tapps_agents/workflow/story_models.py +147 -147
- tapps_agents/workflow/streaming.py +416 -416
- tapps_agents/workflow/suggestion_engine.py +552 -552
- tapps_agents/workflow/testing_artifact.py +186 -186
- tapps_agents/workflow/timeline.py +158 -158
- tapps_agents/workflow/token_integration.py +209 -209
- tapps_agents/workflow/validation.py +217 -217
- tapps_agents/workflow/visual_feedback.py +391 -391
- tapps_agents/workflow/workflow_chain.py +95 -95
- tapps_agents/workflow/workflow_summary.py +219 -219
- tapps_agents/workflow/worktree_manager.py +724 -724
- {tapps_agents-3.5.40.dist-info → tapps_agents-3.6.0.dist-info}/METADATA +672 -672
- tapps_agents-3.6.0.dist-info/RECORD +758 -0
- {tapps_agents-3.5.40.dist-info → tapps_agents-3.6.0.dist-info}/licenses/LICENSE +22 -22
- tapps_agents/health/checks/outcomes.backup_20260204_064058.py +0 -324
- tapps_agents/health/checks/outcomes.backup_20260204_064256.py +0 -324
- tapps_agents/health/checks/outcomes.backup_20260204_064600.py +0 -324
- tapps_agents-3.5.40.dist-info/RECORD +0 -760
- {tapps_agents-3.5.40.dist-info → tapps_agents-3.6.0.dist-info}/WHEEL +0 -0
- {tapps_agents-3.5.40.dist-info → tapps_agents-3.6.0.dist-info}/entry_points.txt +0 -0
- {tapps_agents-3.5.40.dist-info → tapps_agents-3.6.0.dist-info}/top_level.txt +0 -0
|
@@ -1,492 +1,492 @@
|
|
|
1
|
-
# Observability Best Practices
|
|
2
|
-
|
|
3
|
-
## Overview
|
|
4
|
-
|
|
5
|
-
Observability is the ability to understand a system's internal state from its external outputs. The three pillars of observability are logs, metrics, and traces, which together provide comprehensive visibility into system behavior.
|
|
6
|
-
|
|
7
|
-
## Three Pillars of Observability
|
|
8
|
-
|
|
9
|
-
### 1. Logs
|
|
10
|
-
|
|
11
|
-
**Purpose:** Discrete events with timestamps
|
|
12
|
-
|
|
13
|
-
**Use For:**
|
|
14
|
-
- Debugging specific requests
|
|
15
|
-
- Audit trails
|
|
16
|
-
- Error investigation
|
|
17
|
-
- User activity tracking
|
|
18
|
-
|
|
19
|
-
**Characteristics:**
|
|
20
|
-
- High cardinality
|
|
21
|
-
- Rich context
|
|
22
|
-
- Event-oriented
|
|
23
|
-
- Text or structured
|
|
24
|
-
|
|
25
|
-
### 2. Metrics
|
|
26
|
-
|
|
27
|
-
**Purpose:** Aggregated measurements over time
|
|
28
|
-
|
|
29
|
-
**Use For:**
|
|
30
|
-
- System health monitoring
|
|
31
|
-
- Performance trends
|
|
32
|
-
- Capacity planning
|
|
33
|
-
- Alerting
|
|
34
|
-
|
|
35
|
-
**Characteristics:**
|
|
36
|
-
- Low cardinality
|
|
37
|
-
- Aggregated data
|
|
38
|
-
- Time-series
|
|
39
|
-
- Numerical
|
|
40
|
-
|
|
41
|
-
### 3. Traces
|
|
42
|
-
|
|
43
|
-
**Purpose:** Request flow through distributed systems
|
|
44
|
-
|
|
45
|
-
**Use For:**
|
|
46
|
-
- Understanding request paths
|
|
47
|
-
- Identifying bottlenecks
|
|
48
|
-
- Debugging distributed issues
|
|
49
|
-
- Service dependency mapping
|
|
50
|
-
|
|
51
|
-
**Characteristics:**
|
|
52
|
-
- Request-oriented
|
|
53
|
-
- Cross-service
|
|
54
|
-
- Hierarchical
|
|
55
|
-
- Correlation IDs
|
|
56
|
-
|
|
57
|
-
## Correlation Across Pillars
|
|
58
|
-
|
|
59
|
-
### Unified Observability
|
|
60
|
-
|
|
61
|
-
**Link logs, metrics, and traces:**
|
|
62
|
-
```python
|
|
63
|
-
import logging
|
|
64
|
-
from opentelemetry import trace
|
|
65
|
-
|
|
66
|
-
logger = logging.getLogger(__name__)
|
|
67
|
-
tracer = trace.get_tracer(__name__)
|
|
68
|
-
|
|
69
|
-
def process_request(request_id: str):
|
|
70
|
-
span = tracer.start_span("process_request")
|
|
71
|
-
span.set_attribute("request.id", request_id)
|
|
72
|
-
|
|
73
|
-
# Log includes trace context
|
|
74
|
-
logger.info("Processing request",
|
|
75
|
-
extra={
|
|
76
|
-
"request_id": request_id,
|
|
77
|
-
"trace_id": format_trace_id(span.get_span_context().trace_id),
|
|
78
|
-
"span_id": format_span_id(span.get_span_context().span_id)
|
|
79
|
-
}
|
|
80
|
-
)
|
|
81
|
-
|
|
82
|
-
# Metrics include trace context for correlation
|
|
83
|
-
request_counter.inc({
|
|
84
|
-
"request_id": request_id,
|
|
85
|
-
"trace_id": format_trace_id(span.get_span_context().trace_id)
|
|
86
|
-
})
|
|
87
|
-
|
|
88
|
-
try:
|
|
89
|
-
result = do_work()
|
|
90
|
-
span.set_status(trace.Status(trace.StatusCode.OK))
|
|
91
|
-
return result
|
|
92
|
-
except Exception as e:
|
|
93
|
-
logger.error("Request failed", exc_info=True,
|
|
94
|
-
extra={
|
|
95
|
-
"request_id": request_id,
|
|
96
|
-
"trace_id": format_trace_id(span.get_span_context().trace_id),
|
|
97
|
-
"error": str(e)
|
|
98
|
-
}
|
|
99
|
-
)
|
|
100
|
-
span.record_exception(e)
|
|
101
|
-
span.set_status(trace.Status(trace.StatusCode.ERROR))
|
|
102
|
-
raise
|
|
103
|
-
```
|
|
104
|
-
|
|
105
|
-
### Correlation IDs
|
|
106
|
-
|
|
107
|
-
**Propagate across services:**
|
|
108
|
-
```python
|
|
109
|
-
# Extract correlation ID from request
|
|
110
|
-
def extract_correlation_id(request):
|
|
111
|
-
return (
|
|
112
|
-
request.headers.get('X-Request-ID') or
|
|
113
|
-
request.headers.get('X-Correlation-ID') or
|
|
114
|
-
generate_request_id()
|
|
115
|
-
)
|
|
116
|
-
|
|
117
|
-
# Include in all observability data
|
|
118
|
-
correlation_id = extract_correlation_id(request)
|
|
119
|
-
logger.info("Processing", extra={"correlation_id": correlation_id})
|
|
120
|
-
span.set_attribute("correlation.id", correlation_id)
|
|
121
|
-
metrics.labels(correlation_id=correlation_id).inc()
|
|
122
|
-
```
|
|
123
|
-
|
|
124
|
-
## Instrumentation Strategy
|
|
125
|
-
|
|
126
|
-
### What to Instrument
|
|
127
|
-
|
|
128
|
-
**Application Level:**
|
|
129
|
-
- Request handling
|
|
130
|
-
- Business logic operations
|
|
131
|
-
- External service calls
|
|
132
|
-
- Database operations
|
|
133
|
-
- Cache operations
|
|
134
|
-
|
|
135
|
-
**Infrastructure Level:**
|
|
136
|
-
- CPU, memory, disk
|
|
137
|
-
- Network I/O
|
|
138
|
-
- Container metrics
|
|
139
|
-
- Service mesh metrics
|
|
140
|
-
|
|
141
|
-
**Business Level:**
|
|
142
|
-
- User actions
|
|
143
|
-
- Transactions
|
|
144
|
-
- Conversions
|
|
145
|
-
- Revenue events
|
|
146
|
-
|
|
147
|
-
### Instrumentation Depth
|
|
148
|
-
|
|
149
|
-
**High-Value Operations:**
|
|
150
|
-
- User-facing operations
|
|
151
|
-
- Critical business logic
|
|
152
|
-
- External dependencies
|
|
153
|
-
- Expensive computations
|
|
154
|
-
|
|
155
|
-
**Lower Priority:**
|
|
156
|
-
- Internal helper functions
|
|
157
|
-
- Low-frequency operations
|
|
158
|
-
- Non-critical paths
|
|
159
|
-
|
|
160
|
-
### Automatic vs Manual Instrumentation
|
|
161
|
-
|
|
162
|
-
**Use Automatic For:**
|
|
163
|
-
- Common frameworks (HTTP, gRPC, databases)
|
|
164
|
-
- Standard libraries
|
|
165
|
-
- Quick wins
|
|
166
|
-
- Consistency
|
|
167
|
-
|
|
168
|
-
**Use Manual For:**
|
|
169
|
-
- Business-specific logic
|
|
170
|
-
- Custom protocols
|
|
171
|
-
- Critical paths needing detail
|
|
172
|
-
- Framework gaps
|
|
173
|
-
|
|
174
|
-
## Structured Data
|
|
175
|
-
|
|
176
|
-
### Structured Logs
|
|
177
|
-
|
|
178
|
-
**Benefits:**
|
|
179
|
-
- Machine-readable
|
|
180
|
-
- Queryable
|
|
181
|
-
- Aggregatable
|
|
182
|
-
- Consistent format
|
|
183
|
-
|
|
184
|
-
**Format:**
|
|
185
|
-
```json
|
|
186
|
-
{
|
|
187
|
-
"timestamp": "2026-01-15T10:30:45.123Z",
|
|
188
|
-
"level": "INFO",
|
|
189
|
-
"service": "order-service",
|
|
190
|
-
"message": "Order processed",
|
|
191
|
-
"request_id": "abc123",
|
|
192
|
-
"trace_id": "def456",
|
|
193
|
-
"user_id": "user789",
|
|
194
|
-
"order_id": "order123",
|
|
195
|
-
"duration_ms": 245
|
|
196
|
-
}
|
|
197
|
-
```
|
|
198
|
-
|
|
199
|
-
### Semantic Conventions
|
|
200
|
-
|
|
201
|
-
**Use Standards:**
|
|
202
|
-
- OpenTelemetry semantic conventions
|
|
203
|
-
- Consistent attribute names
|
|
204
|
-
- Standard units
|
|
205
|
-
- Common labels
|
|
206
|
-
|
|
207
|
-
**Example:**
|
|
208
|
-
```python
|
|
209
|
-
# Follow OpenTelemetry conventions
|
|
210
|
-
span.set_attribute("http.method", "GET")
|
|
211
|
-
span.set_attribute("http.url", "/api/users")
|
|
212
|
-
span.set_attribute("http.status_code", 200)
|
|
213
|
-
span.set_attribute("db.system", "postgresql")
|
|
214
|
-
span.set_attribute("db.name", "users")
|
|
215
|
-
span.set_attribute("db.operation", "SELECT")
|
|
216
|
-
```
|
|
217
|
-
|
|
218
|
-
## Sampling Strategies
|
|
219
|
-
|
|
220
|
-
### When to Sample
|
|
221
|
-
|
|
222
|
-
**Always Record:**
|
|
223
|
-
- Errors and exceptions
|
|
224
|
-
- Critical business events
|
|
225
|
-
- Security events
|
|
226
|
-
- SLO violations
|
|
227
|
-
|
|
228
|
-
**Sample:**
|
|
229
|
-
- High-volume normal operations
|
|
230
|
-
- Debug-level logs
|
|
231
|
-
- Verbose traces
|
|
232
|
-
- Low-value metrics
|
|
233
|
-
|
|
234
|
-
### Sampling Approaches
|
|
235
|
-
|
|
236
|
-
**Head-Based Sampling:**
|
|
237
|
-
- Decide at trace start
|
|
238
|
-
- Consistent per trace
|
|
239
|
-
- Good for debugging
|
|
240
|
-
- Simple implementation
|
|
241
|
-
|
|
242
|
-
**Tail-Based Sampling:**
|
|
243
|
-
- Decide after trace completes
|
|
244
|
-
- Prioritize errors/slow traces
|
|
245
|
-
- Better cost optimization
|
|
246
|
-
- More complex
|
|
247
|
-
|
|
248
|
-
**Adaptive Sampling:**
|
|
249
|
-
- Adjust based on error rates
|
|
250
|
-
- Sample more errors
|
|
251
|
-
- Balance cost and visibility
|
|
252
|
-
|
|
253
|
-
**Example:**
|
|
254
|
-
```python
|
|
255
|
-
def should_sample_trace(trace_context):
|
|
256
|
-
# Always sample errors
|
|
257
|
-
if has_error(trace_context):
|
|
258
|
-
return True
|
|
259
|
-
|
|
260
|
-
# Sample slow traces
|
|
261
|
-
if trace_duration > threshold:
|
|
262
|
-
return True
|
|
263
|
-
|
|
264
|
-
# Probabilistic sampling for normal traces
|
|
265
|
-
return random.random() < sampling_rate
|
|
266
|
-
```
|
|
267
|
-
|
|
268
|
-
## Performance Considerations
|
|
269
|
-
|
|
270
|
-
### Overhead Management
|
|
271
|
-
|
|
272
|
-
**Minimize Impact:**
|
|
273
|
-
- Use async instrumentation
|
|
274
|
-
- Batch exports
|
|
275
|
-
- Sample appropriately
|
|
276
|
-
- Use efficient serialization
|
|
277
|
-
|
|
278
|
-
**Measure Overhead:**
|
|
279
|
-
```python
|
|
280
|
-
import time
|
|
281
|
-
|
|
282
|
-
overhead_start = time.time()
|
|
283
|
-
# Instrumentation code
|
|
284
|
-
overhead_end = time.time()
|
|
285
|
-
overhead_ms = (overhead_end - overhead_start) * 1000
|
|
286
|
-
|
|
287
|
-
if overhead_ms > 10: # Alert if overhead > 10ms
|
|
288
|
-
logger.warning("High instrumentation overhead", overhead_ms=overhead_ms)
|
|
289
|
-
```
|
|
290
|
-
|
|
291
|
-
### Async Operations
|
|
292
|
-
|
|
293
|
-
**Use Async Exporters:**
|
|
294
|
-
```python
|
|
295
|
-
from opentelemetry.sdk.trace.export import BatchSpanProcessor
|
|
296
|
-
from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter
|
|
297
|
-
|
|
298
|
-
exporter = OTLPSpanExporter(endpoint="http://collector:4317")
|
|
299
|
-
processor = BatchSpanProcessor(exporter)
|
|
300
|
-
tracer_provider.add_span_processor(processor)
|
|
301
|
-
```
|
|
302
|
-
|
|
303
|
-
### Cardinality Management
|
|
304
|
-
|
|
305
|
-
**Control Label/Attribute Cardinality:**
|
|
306
|
-
```python
|
|
307
|
-
# Bad: High cardinality (unique per user)
|
|
308
|
-
metrics.labels(user_id=user_id).inc()
|
|
309
|
-
|
|
310
|
-
# Good: Bounded cardinality
|
|
311
|
-
metrics.labels(user_tier="premium").inc()
|
|
312
|
-
|
|
313
|
-
# High-cardinality data goes in logs/traces
|
|
314
|
-
logger.info("User action", user_id=user_id)
|
|
315
|
-
```
|
|
316
|
-
|
|
317
|
-
## Security and Privacy
|
|
318
|
-
|
|
319
|
-
### Sensitive Data
|
|
320
|
-
|
|
321
|
-
**Don't Log:**
|
|
322
|
-
- Passwords and tokens
|
|
323
|
-
- Credit card numbers
|
|
324
|
-
- PII (unless required)
|
|
325
|
-
- Full request/response bodies
|
|
326
|
-
|
|
327
|
-
**Redaction:**
|
|
328
|
-
```python
|
|
329
|
-
def redact_sensitive(message: str) -> str:
|
|
330
|
-
# Redact credit cards
|
|
331
|
-
message = re.sub(r'\b\d{4}[\s-]?\d{4}[\s-]?\d{4}[\s-]?\d{4}\b',
|
|
332
|
-
'[REDACTED]', message)
|
|
333
|
-
# Redact emails
|
|
334
|
-
message = re.sub(r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b',
|
|
335
|
-
'[EMAIL_REDACTED]', message)
|
|
336
|
-
return message
|
|
337
|
-
```
|
|
338
|
-
|
|
339
|
-
### Access Control
|
|
340
|
-
|
|
341
|
-
**Control Access:**
|
|
342
|
-
- Restrict log access
|
|
343
|
-
- Encrypt data in transit
|
|
344
|
-
- Encrypt data at rest
|
|
345
|
-
- Audit access to observability data
|
|
346
|
-
|
|
347
|
-
## Dashboard Design
|
|
348
|
-
|
|
349
|
-
### Effective Dashboards
|
|
350
|
-
|
|
351
|
-
**Principles:**
|
|
352
|
-
- Focus on user experience
|
|
353
|
-
- Show key metrics prominently
|
|
354
|
-
- Group related metrics
|
|
355
|
-
- Use appropriate visualizations
|
|
356
|
-
- Keep it simple
|
|
357
|
-
|
|
358
|
-
**Layout:**
|
|
359
|
-
1. **Top:** Golden signals (latency, traffic, errors, saturation)
|
|
360
|
-
2. **Middle:** Service-specific metrics
|
|
361
|
-
3. **Bottom:** Infrastructure metrics
|
|
362
|
-
|
|
363
|
-
**Visualization Types:**
|
|
364
|
-
- **Line graphs:** Trends over time
|
|
365
|
-
- **Gauges:** Current values
|
|
366
|
-
- **Tables:** Detailed breakdowns
|
|
367
|
-
- **Heatmaps:** Distribution patterns
|
|
368
|
-
|
|
369
|
-
### Dashboard Hierarchies
|
|
370
|
-
|
|
371
|
-
**Level 1: Service Overview**
|
|
372
|
-
- Overall health
|
|
373
|
-
- Key SLIs
|
|
374
|
-
- Error rates
|
|
375
|
-
- Traffic volume
|
|
376
|
-
|
|
377
|
-
**Level 2: Service Details**
|
|
378
|
-
- Per-endpoint metrics
|
|
379
|
-
- Dependency health
|
|
380
|
-
- Resource usage
|
|
381
|
-
- Error breakdown
|
|
382
|
-
|
|
383
|
-
**Level 3: Deep Dive**
|
|
384
|
-
- Individual request traces
|
|
385
|
-
- Detailed logs
|
|
386
|
-
- Profiling data
|
|
387
|
-
- Custom queries
|
|
388
|
-
|
|
389
|
-
## Best Practices Summary
|
|
390
|
-
|
|
391
|
-
### 1. Start Small
|
|
392
|
-
|
|
393
|
-
- Begin with critical paths
|
|
394
|
-
- Add instrumentation incrementally
|
|
395
|
-
- Focus on high-value areas
|
|
396
|
-
- Don't instrument everything at once
|
|
397
|
-
|
|
398
|
-
### 2. Use Standards
|
|
399
|
-
|
|
400
|
-
- OpenTelemetry for traces
|
|
401
|
-
- Prometheus for metrics
|
|
402
|
-
- Structured logging (JSON)
|
|
403
|
-
- Semantic conventions
|
|
404
|
-
|
|
405
|
-
### 3. Correlate Everything
|
|
406
|
-
|
|
407
|
-
- Include correlation IDs
|
|
408
|
-
- Link logs, metrics, traces
|
|
409
|
-
- Propagate context
|
|
410
|
-
- Use consistent identifiers
|
|
411
|
-
|
|
412
|
-
### 4. Sample Wisely
|
|
413
|
-
|
|
414
|
-
- Always record errors
|
|
415
|
-
- Sample normal operations
|
|
416
|
-
- Use adaptive sampling
|
|
417
|
-
- Balance cost and visibility
|
|
418
|
-
|
|
419
|
-
### 5. Manage Overhead
|
|
420
|
-
|
|
421
|
-
- Use async exporters
|
|
422
|
-
- Batch operations
|
|
423
|
-
- Measure impact
|
|
424
|
-
- Optimize hot paths
|
|
425
|
-
|
|
426
|
-
### 6. Control Cardinality
|
|
427
|
-
|
|
428
|
-
- Bounded label sets
|
|
429
|
-
- Avoid high-cardinality attributes
|
|
430
|
-
- Use logs for detailed data
|
|
431
|
-
- Aggregate metrics appropriately
|
|
432
|
-
|
|
433
|
-
### 7. Secure and Private
|
|
434
|
-
|
|
435
|
-
- Redact sensitive data
|
|
436
|
-
- Control access
|
|
437
|
-
- Encrypt data
|
|
438
|
-
- Audit access
|
|
439
|
-
|
|
440
|
-
### 8. Document and Educate
|
|
441
|
-
|
|
442
|
-
- Document instrumentation
|
|
443
|
-
- Share best practices
|
|
444
|
-
- Create runbooks
|
|
445
|
-
- Regular reviews
|
|
446
|
-
|
|
447
|
-
## Tools and Platforms
|
|
448
|
-
|
|
449
|
-
### Open Source Stack
|
|
450
|
-
|
|
451
|
-
**Collection:**
|
|
452
|
-
- Prometheus (metrics)
|
|
453
|
-
- Jaeger/Zipkin (traces)
|
|
454
|
-
- Fluentd/Fluent Bit (logs)
|
|
455
|
-
|
|
456
|
-
**Storage:**
|
|
457
|
-
- Prometheus (metrics)
|
|
458
|
-
- Elasticsearch (logs)
|
|
459
|
-
- Tempo (traces)
|
|
460
|
-
|
|
461
|
-
**Visualization:**
|
|
462
|
-
- Grafana (metrics and traces)
|
|
463
|
-
- Kibana (logs)
|
|
464
|
-
- Jaeger UI (traces)
|
|
465
|
-
|
|
466
|
-
### Commercial Platforms
|
|
467
|
-
|
|
468
|
-
**Full-Stack:**
|
|
469
|
-
- Datadog
|
|
470
|
-
- New Relic
|
|
471
|
-
- Dynatrace
|
|
472
|
-
- AppDynamics
|
|
473
|
-
|
|
474
|
-
**Specialized:**
|
|
475
|
-
- Honeycomb (traces)
|
|
476
|
-
- Splunk (logs)
|
|
477
|
-
- CloudWatch (AWS)
|
|
478
|
-
- Stackdriver (GCP)
|
|
479
|
-
|
|
480
|
-
## Summary
|
|
481
|
-
|
|
482
|
-
Effective observability requires:
|
|
483
|
-
|
|
484
|
-
1. **Three pillars:** Logs, metrics, and traces
|
|
485
|
-
2. **Correlation:** Link across all signals
|
|
486
|
-
3. **Standards:** Use semantic conventions
|
|
487
|
-
4. **Sampling:** Balance cost and visibility
|
|
488
|
-
5. **Performance:** Minimize overhead
|
|
489
|
-
6. **Security:** Protect sensitive data
|
|
490
|
-
7. **Documentation:** Share knowledge
|
|
491
|
-
8. **Iteration:** Continuous improvement
|
|
492
|
-
|
|
1
|
+
# Observability Best Practices
|
|
2
|
+
|
|
3
|
+
## Overview
|
|
4
|
+
|
|
5
|
+
Observability is the ability to understand a system's internal state from its external outputs. The three pillars of observability are logs, metrics, and traces, which together provide comprehensive visibility into system behavior.
|
|
6
|
+
|
|
7
|
+
## Three Pillars of Observability
|
|
8
|
+
|
|
9
|
+
### 1. Logs
|
|
10
|
+
|
|
11
|
+
**Purpose:** Discrete events with timestamps
|
|
12
|
+
|
|
13
|
+
**Use For:**
|
|
14
|
+
- Debugging specific requests
|
|
15
|
+
- Audit trails
|
|
16
|
+
- Error investigation
|
|
17
|
+
- User activity tracking
|
|
18
|
+
|
|
19
|
+
**Characteristics:**
|
|
20
|
+
- High cardinality
|
|
21
|
+
- Rich context
|
|
22
|
+
- Event-oriented
|
|
23
|
+
- Text or structured
|
|
24
|
+
|
|
25
|
+
### 2. Metrics
|
|
26
|
+
|
|
27
|
+
**Purpose:** Aggregated measurements over time
|
|
28
|
+
|
|
29
|
+
**Use For:**
|
|
30
|
+
- System health monitoring
|
|
31
|
+
- Performance trends
|
|
32
|
+
- Capacity planning
|
|
33
|
+
- Alerting
|
|
34
|
+
|
|
35
|
+
**Characteristics:**
|
|
36
|
+
- Low cardinality
|
|
37
|
+
- Aggregated data
|
|
38
|
+
- Time-series
|
|
39
|
+
- Numerical
|
|
40
|
+
|
|
41
|
+
### 3. Traces
|
|
42
|
+
|
|
43
|
+
**Purpose:** Request flow through distributed systems
|
|
44
|
+
|
|
45
|
+
**Use For:**
|
|
46
|
+
- Understanding request paths
|
|
47
|
+
- Identifying bottlenecks
|
|
48
|
+
- Debugging distributed issues
|
|
49
|
+
- Service dependency mapping
|
|
50
|
+
|
|
51
|
+
**Characteristics:**
|
|
52
|
+
- Request-oriented
|
|
53
|
+
- Cross-service
|
|
54
|
+
- Hierarchical
|
|
55
|
+
- Correlation IDs
|
|
56
|
+
|
|
57
|
+
## Correlation Across Pillars
|
|
58
|
+
|
|
59
|
+
### Unified Observability
|
|
60
|
+
|
|
61
|
+
**Link logs, metrics, and traces:**
|
|
62
|
+
```python
|
|
63
|
+
import logging
|
|
64
|
+
from opentelemetry import trace
|
|
65
|
+
|
|
66
|
+
logger = logging.getLogger(__name__)
|
|
67
|
+
tracer = trace.get_tracer(__name__)
|
|
68
|
+
|
|
69
|
+
def process_request(request_id: str):
|
|
70
|
+
span = tracer.start_span("process_request")
|
|
71
|
+
span.set_attribute("request.id", request_id)
|
|
72
|
+
|
|
73
|
+
# Log includes trace context
|
|
74
|
+
logger.info("Processing request",
|
|
75
|
+
extra={
|
|
76
|
+
"request_id": request_id,
|
|
77
|
+
"trace_id": format_trace_id(span.get_span_context().trace_id),
|
|
78
|
+
"span_id": format_span_id(span.get_span_context().span_id)
|
|
79
|
+
}
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
# Metrics include trace context for correlation
|
|
83
|
+
request_counter.inc({
|
|
84
|
+
"request_id": request_id,
|
|
85
|
+
"trace_id": format_trace_id(span.get_span_context().trace_id)
|
|
86
|
+
})
|
|
87
|
+
|
|
88
|
+
try:
|
|
89
|
+
result = do_work()
|
|
90
|
+
span.set_status(trace.Status(trace.StatusCode.OK))
|
|
91
|
+
return result
|
|
92
|
+
except Exception as e:
|
|
93
|
+
logger.error("Request failed", exc_info=True,
|
|
94
|
+
extra={
|
|
95
|
+
"request_id": request_id,
|
|
96
|
+
"trace_id": format_trace_id(span.get_span_context().trace_id),
|
|
97
|
+
"error": str(e)
|
|
98
|
+
}
|
|
99
|
+
)
|
|
100
|
+
span.record_exception(e)
|
|
101
|
+
span.set_status(trace.Status(trace.StatusCode.ERROR))
|
|
102
|
+
raise
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
### Correlation IDs
|
|
106
|
+
|
|
107
|
+
**Propagate across services:**
|
|
108
|
+
```python
|
|
109
|
+
# Extract correlation ID from request
|
|
110
|
+
def extract_correlation_id(request):
|
|
111
|
+
return (
|
|
112
|
+
request.headers.get('X-Request-ID') or
|
|
113
|
+
request.headers.get('X-Correlation-ID') or
|
|
114
|
+
generate_request_id()
|
|
115
|
+
)
|
|
116
|
+
|
|
117
|
+
# Include in all observability data
|
|
118
|
+
correlation_id = extract_correlation_id(request)
|
|
119
|
+
logger.info("Processing", extra={"correlation_id": correlation_id})
|
|
120
|
+
span.set_attribute("correlation.id", correlation_id)
|
|
121
|
+
metrics.labels(correlation_id=correlation_id).inc()
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
## Instrumentation Strategy
|
|
125
|
+
|
|
126
|
+
### What to Instrument
|
|
127
|
+
|
|
128
|
+
**Application Level:**
|
|
129
|
+
- Request handling
|
|
130
|
+
- Business logic operations
|
|
131
|
+
- External service calls
|
|
132
|
+
- Database operations
|
|
133
|
+
- Cache operations
|
|
134
|
+
|
|
135
|
+
**Infrastructure Level:**
|
|
136
|
+
- CPU, memory, disk
|
|
137
|
+
- Network I/O
|
|
138
|
+
- Container metrics
|
|
139
|
+
- Service mesh metrics
|
|
140
|
+
|
|
141
|
+
**Business Level:**
|
|
142
|
+
- User actions
|
|
143
|
+
- Transactions
|
|
144
|
+
- Conversions
|
|
145
|
+
- Revenue events
|
|
146
|
+
|
|
147
|
+
### Instrumentation Depth
|
|
148
|
+
|
|
149
|
+
**High-Value Operations:**
|
|
150
|
+
- User-facing operations
|
|
151
|
+
- Critical business logic
|
|
152
|
+
- External dependencies
|
|
153
|
+
- Expensive computations
|
|
154
|
+
|
|
155
|
+
**Lower Priority:**
|
|
156
|
+
- Internal helper functions
|
|
157
|
+
- Low-frequency operations
|
|
158
|
+
- Non-critical paths
|
|
159
|
+
|
|
160
|
+
### Automatic vs Manual Instrumentation
|
|
161
|
+
|
|
162
|
+
**Use Automatic For:**
|
|
163
|
+
- Common frameworks (HTTP, gRPC, databases)
|
|
164
|
+
- Standard libraries
|
|
165
|
+
- Quick wins
|
|
166
|
+
- Consistency
|
|
167
|
+
|
|
168
|
+
**Use Manual For:**
|
|
169
|
+
- Business-specific logic
|
|
170
|
+
- Custom protocols
|
|
171
|
+
- Critical paths needing detail
|
|
172
|
+
- Framework gaps
|
|
173
|
+
|
|
174
|
+
## Structured Data
|
|
175
|
+
|
|
176
|
+
### Structured Logs
|
|
177
|
+
|
|
178
|
+
**Benefits:**
|
|
179
|
+
- Machine-readable
|
|
180
|
+
- Queryable
|
|
181
|
+
- Aggregatable
|
|
182
|
+
- Consistent format
|
|
183
|
+
|
|
184
|
+
**Format:**
|
|
185
|
+
```json
|
|
186
|
+
{
|
|
187
|
+
"timestamp": "2026-01-15T10:30:45.123Z",
|
|
188
|
+
"level": "INFO",
|
|
189
|
+
"service": "order-service",
|
|
190
|
+
"message": "Order processed",
|
|
191
|
+
"request_id": "abc123",
|
|
192
|
+
"trace_id": "def456",
|
|
193
|
+
"user_id": "user789",
|
|
194
|
+
"order_id": "order123",
|
|
195
|
+
"duration_ms": 245
|
|
196
|
+
}
|
|
197
|
+
```
|
|
198
|
+
|
|
199
|
+
### Semantic Conventions
|
|
200
|
+
|
|
201
|
+
**Use Standards:**
|
|
202
|
+
- OpenTelemetry semantic conventions
|
|
203
|
+
- Consistent attribute names
|
|
204
|
+
- Standard units
|
|
205
|
+
- Common labels
|
|
206
|
+
|
|
207
|
+
**Example:**
|
|
208
|
+
```python
|
|
209
|
+
# Follow OpenTelemetry conventions
|
|
210
|
+
span.set_attribute("http.method", "GET")
|
|
211
|
+
span.set_attribute("http.url", "/api/users")
|
|
212
|
+
span.set_attribute("http.status_code", 200)
|
|
213
|
+
span.set_attribute("db.system", "postgresql")
|
|
214
|
+
span.set_attribute("db.name", "users")
|
|
215
|
+
span.set_attribute("db.operation", "SELECT")
|
|
216
|
+
```
|
|
217
|
+
|
|
218
|
+
## Sampling Strategies
|
|
219
|
+
|
|
220
|
+
### When to Sample
|
|
221
|
+
|
|
222
|
+
**Always Record:**
|
|
223
|
+
- Errors and exceptions
|
|
224
|
+
- Critical business events
|
|
225
|
+
- Security events
|
|
226
|
+
- SLO violations
|
|
227
|
+
|
|
228
|
+
**Sample:**
|
|
229
|
+
- High-volume normal operations
|
|
230
|
+
- Debug-level logs
|
|
231
|
+
- Verbose traces
|
|
232
|
+
- Low-value metrics
|
|
233
|
+
|
|
234
|
+
### Sampling Approaches
|
|
235
|
+
|
|
236
|
+
**Head-Based Sampling:**
|
|
237
|
+
- Decide at trace start
|
|
238
|
+
- Consistent per trace
|
|
239
|
+
- Good for debugging
|
|
240
|
+
- Simple implementation
|
|
241
|
+
|
|
242
|
+
**Tail-Based Sampling:**
|
|
243
|
+
- Decide after trace completes
|
|
244
|
+
- Prioritize errors/slow traces
|
|
245
|
+
- Better cost optimization
|
|
246
|
+
- More complex
|
|
247
|
+
|
|
248
|
+
**Adaptive Sampling:**
|
|
249
|
+
- Adjust based on error rates
|
|
250
|
+
- Sample more errors
|
|
251
|
+
- Balance cost and visibility
|
|
252
|
+
|
|
253
|
+
**Example:**
|
|
254
|
+
```python
|
|
255
|
+
def should_sample_trace(trace_context):
|
|
256
|
+
# Always sample errors
|
|
257
|
+
if has_error(trace_context):
|
|
258
|
+
return True
|
|
259
|
+
|
|
260
|
+
# Sample slow traces
|
|
261
|
+
if trace_duration > threshold:
|
|
262
|
+
return True
|
|
263
|
+
|
|
264
|
+
# Probabilistic sampling for normal traces
|
|
265
|
+
return random.random() < sampling_rate
|
|
266
|
+
```
|
|
267
|
+
|
|
268
|
+
## Performance Considerations
|
|
269
|
+
|
|
270
|
+
### Overhead Management
|
|
271
|
+
|
|
272
|
+
**Minimize Impact:**
|
|
273
|
+
- Use async instrumentation
|
|
274
|
+
- Batch exports
|
|
275
|
+
- Sample appropriately
|
|
276
|
+
- Use efficient serialization
|
|
277
|
+
|
|
278
|
+
**Measure Overhead:**
|
|
279
|
+
```python
|
|
280
|
+
import time
|
|
281
|
+
|
|
282
|
+
overhead_start = time.time()
|
|
283
|
+
# Instrumentation code
|
|
284
|
+
overhead_end = time.time()
|
|
285
|
+
overhead_ms = (overhead_end - overhead_start) * 1000
|
|
286
|
+
|
|
287
|
+
if overhead_ms > 10: # Alert if overhead > 10ms
|
|
288
|
+
logger.warning("High instrumentation overhead", overhead_ms=overhead_ms)
|
|
289
|
+
```
|
|
290
|
+
|
|
291
|
+
### Async Operations
|
|
292
|
+
|
|
293
|
+
**Use Async Exporters:**
|
|
294
|
+
```python
|
|
295
|
+
from opentelemetry.sdk.trace.export import BatchSpanProcessor
|
|
296
|
+
from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter
|
|
297
|
+
|
|
298
|
+
exporter = OTLPSpanExporter(endpoint="http://collector:4317")
|
|
299
|
+
processor = BatchSpanProcessor(exporter)
|
|
300
|
+
tracer_provider.add_span_processor(processor)
|
|
301
|
+
```
|
|
302
|
+
|
|
303
|
+
### Cardinality Management
|
|
304
|
+
|
|
305
|
+
**Control Label/Attribute Cardinality:**
|
|
306
|
+
```python
|
|
307
|
+
# Bad: High cardinality (unique per user)
|
|
308
|
+
metrics.labels(user_id=user_id).inc()
|
|
309
|
+
|
|
310
|
+
# Good: Bounded cardinality
|
|
311
|
+
metrics.labels(user_tier="premium").inc()
|
|
312
|
+
|
|
313
|
+
# High-cardinality data goes in logs/traces
|
|
314
|
+
logger.info("User action", user_id=user_id)
|
|
315
|
+
```
|
|
316
|
+
|
|
317
|
+
## Security and Privacy
|
|
318
|
+
|
|
319
|
+
### Sensitive Data
|
|
320
|
+
|
|
321
|
+
**Don't Log:**
|
|
322
|
+
- Passwords and tokens
|
|
323
|
+
- Credit card numbers
|
|
324
|
+
- PII (unless required)
|
|
325
|
+
- Full request/response bodies
|
|
326
|
+
|
|
327
|
+
**Redaction:**
|
|
328
|
+
```python
|
|
329
|
+
def redact_sensitive(message: str) -> str:
|
|
330
|
+
# Redact credit cards
|
|
331
|
+
message = re.sub(r'\b\d{4}[\s-]?\d{4}[\s-]?\d{4}[\s-]?\d{4}\b',
|
|
332
|
+
'[REDACTED]', message)
|
|
333
|
+
# Redact emails
|
|
334
|
+
message = re.sub(r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b',
|
|
335
|
+
'[EMAIL_REDACTED]', message)
|
|
336
|
+
return message
|
|
337
|
+
```
|
|
338
|
+
|
|
339
|
+
### Access Control
|
|
340
|
+
|
|
341
|
+
**Control Access:**
|
|
342
|
+
- Restrict log access
|
|
343
|
+
- Encrypt data in transit
|
|
344
|
+
- Encrypt data at rest
|
|
345
|
+
- Audit access to observability data
|
|
346
|
+
|
|
347
|
+
## Dashboard Design
|
|
348
|
+
|
|
349
|
+
### Effective Dashboards
|
|
350
|
+
|
|
351
|
+
**Principles:**
|
|
352
|
+
- Focus on user experience
|
|
353
|
+
- Show key metrics prominently
|
|
354
|
+
- Group related metrics
|
|
355
|
+
- Use appropriate visualizations
|
|
356
|
+
- Keep it simple
|
|
357
|
+
|
|
358
|
+
**Layout:**
|
|
359
|
+
1. **Top:** Golden signals (latency, traffic, errors, saturation)
|
|
360
|
+
2. **Middle:** Service-specific metrics
|
|
361
|
+
3. **Bottom:** Infrastructure metrics
|
|
362
|
+
|
|
363
|
+
**Visualization Types:**
|
|
364
|
+
- **Line graphs:** Trends over time
|
|
365
|
+
- **Gauges:** Current values
|
|
366
|
+
- **Tables:** Detailed breakdowns
|
|
367
|
+
- **Heatmaps:** Distribution patterns
|
|
368
|
+
|
|
369
|
+
### Dashboard Hierarchies
|
|
370
|
+
|
|
371
|
+
**Level 1: Service Overview**
|
|
372
|
+
- Overall health
|
|
373
|
+
- Key SLIs
|
|
374
|
+
- Error rates
|
|
375
|
+
- Traffic volume
|
|
376
|
+
|
|
377
|
+
**Level 2: Service Details**
|
|
378
|
+
- Per-endpoint metrics
|
|
379
|
+
- Dependency health
|
|
380
|
+
- Resource usage
|
|
381
|
+
- Error breakdown
|
|
382
|
+
|
|
383
|
+
**Level 3: Deep Dive**
|
|
384
|
+
- Individual request traces
|
|
385
|
+
- Detailed logs
|
|
386
|
+
- Profiling data
|
|
387
|
+
- Custom queries
|
|
388
|
+
|
|
389
|
+
## Best Practices Summary
|
|
390
|
+
|
|
391
|
+
### 1. Start Small
|
|
392
|
+
|
|
393
|
+
- Begin with critical paths
|
|
394
|
+
- Add instrumentation incrementally
|
|
395
|
+
- Focus on high-value areas
|
|
396
|
+
- Don't instrument everything at once
|
|
397
|
+
|
|
398
|
+
### 2. Use Standards
|
|
399
|
+
|
|
400
|
+
- OpenTelemetry for traces
|
|
401
|
+
- Prometheus for metrics
|
|
402
|
+
- Structured logging (JSON)
|
|
403
|
+
- Semantic conventions
|
|
404
|
+
|
|
405
|
+
### 3. Correlate Everything
|
|
406
|
+
|
|
407
|
+
- Include correlation IDs
|
|
408
|
+
- Link logs, metrics, traces
|
|
409
|
+
- Propagate context
|
|
410
|
+
- Use consistent identifiers
|
|
411
|
+
|
|
412
|
+
### 4. Sample Wisely
|
|
413
|
+
|
|
414
|
+
- Always record errors
|
|
415
|
+
- Sample normal operations
|
|
416
|
+
- Use adaptive sampling
|
|
417
|
+
- Balance cost and visibility
|
|
418
|
+
|
|
419
|
+
### 5. Manage Overhead
|
|
420
|
+
|
|
421
|
+
- Use async exporters
|
|
422
|
+
- Batch operations
|
|
423
|
+
- Measure impact
|
|
424
|
+
- Optimize hot paths
|
|
425
|
+
|
|
426
|
+
### 6. Control Cardinality
|
|
427
|
+
|
|
428
|
+
- Bounded label sets
|
|
429
|
+
- Avoid high-cardinality attributes
|
|
430
|
+
- Use logs for detailed data
|
|
431
|
+
- Aggregate metrics appropriately
|
|
432
|
+
|
|
433
|
+
### 7. Secure and Private
|
|
434
|
+
|
|
435
|
+
- Redact sensitive data
|
|
436
|
+
- Control access
|
|
437
|
+
- Encrypt data
|
|
438
|
+
- Audit access
|
|
439
|
+
|
|
440
|
+
### 8. Document and Educate
|
|
441
|
+
|
|
442
|
+
- Document instrumentation
|
|
443
|
+
- Share best practices
|
|
444
|
+
- Create runbooks
|
|
445
|
+
- Regular reviews
|
|
446
|
+
|
|
447
|
+
## Tools and Platforms
|
|
448
|
+
|
|
449
|
+
### Open Source Stack
|
|
450
|
+
|
|
451
|
+
**Collection:**
|
|
452
|
+
- Prometheus (metrics)
|
|
453
|
+
- Jaeger/Zipkin (traces)
|
|
454
|
+
- Fluentd/Fluent Bit (logs)
|
|
455
|
+
|
|
456
|
+
**Storage:**
|
|
457
|
+
- Prometheus (metrics)
|
|
458
|
+
- Elasticsearch (logs)
|
|
459
|
+
- Tempo (traces)
|
|
460
|
+
|
|
461
|
+
**Visualization:**
|
|
462
|
+
- Grafana (metrics and traces)
|
|
463
|
+
- Kibana (logs)
|
|
464
|
+
- Jaeger UI (traces)
|
|
465
|
+
|
|
466
|
+
### Commercial Platforms
|
|
467
|
+
|
|
468
|
+
**Full-Stack:**
|
|
469
|
+
- Datadog
|
|
470
|
+
- New Relic
|
|
471
|
+
- Dynatrace
|
|
472
|
+
- AppDynamics
|
|
473
|
+
|
|
474
|
+
**Specialized:**
|
|
475
|
+
- Honeycomb (traces)
|
|
476
|
+
- Splunk (logs)
|
|
477
|
+
- CloudWatch (AWS)
|
|
478
|
+
- Stackdriver (GCP)
|
|
479
|
+
|
|
480
|
+
## Summary
|
|
481
|
+
|
|
482
|
+
Effective observability requires:
|
|
483
|
+
|
|
484
|
+
1. **Three pillars:** Logs, metrics, and traces
|
|
485
|
+
2. **Correlation:** Link across all signals
|
|
486
|
+
3. **Standards:** Use semantic conventions
|
|
487
|
+
4. **Sampling:** Balance cost and visibility
|
|
488
|
+
5. **Performance:** Minimize overhead
|
|
489
|
+
6. **Security:** Protect sensitive data
|
|
490
|
+
7. **Documentation:** Share knowledge
|
|
491
|
+
8. **Iteration:** Continuous improvement
|
|
492
|
+
|