tapps-agents 3.6.0__py3-none-any.whl → 3.6.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tapps_agents/__init__.py +2 -2
- tapps_agents/agents/__init__.py +22 -22
- tapps_agents/agents/analyst/__init__.py +5 -5
- tapps_agents/agents/architect/__init__.py +5 -5
- tapps_agents/agents/architect/agent.py +1033 -1033
- tapps_agents/agents/architect/pattern_detector.py +75 -75
- tapps_agents/agents/cleanup/__init__.py +7 -7
- tapps_agents/agents/cleanup/agent.py +445 -445
- tapps_agents/agents/debugger/__init__.py +7 -7
- tapps_agents/agents/debugger/agent.py +310 -310
- tapps_agents/agents/debugger/error_analyzer.py +437 -437
- tapps_agents/agents/designer/__init__.py +5 -5
- tapps_agents/agents/designer/agent.py +786 -786
- tapps_agents/agents/designer/visual_designer.py +638 -638
- tapps_agents/agents/documenter/__init__.py +7 -7
- tapps_agents/agents/documenter/agent.py +531 -531
- tapps_agents/agents/documenter/doc_generator.py +472 -472
- tapps_agents/agents/documenter/doc_validator.py +393 -393
- tapps_agents/agents/documenter/framework_doc_updater.py +493 -493
- tapps_agents/agents/enhancer/__init__.py +7 -7
- tapps_agents/agents/evaluator/__init__.py +7 -7
- tapps_agents/agents/evaluator/agent.py +443 -443
- tapps_agents/agents/evaluator/priority_evaluator.py +641 -641
- tapps_agents/agents/evaluator/quality_analyzer.py +147 -147
- tapps_agents/agents/evaluator/report_generator.py +344 -344
- tapps_agents/agents/evaluator/usage_analyzer.py +192 -192
- tapps_agents/agents/evaluator/workflow_analyzer.py +189 -189
- tapps_agents/agents/implementer/__init__.py +7 -7
- tapps_agents/agents/implementer/agent.py +798 -798
- tapps_agents/agents/implementer/auto_fix.py +1119 -1119
- tapps_agents/agents/implementer/code_generator.py +73 -73
- tapps_agents/agents/improver/__init__.py +1 -1
- tapps_agents/agents/improver/agent.py +753 -753
- tapps_agents/agents/ops/__init__.py +1 -1
- tapps_agents/agents/ops/agent.py +619 -619
- tapps_agents/agents/ops/dependency_analyzer.py +600 -600
- tapps_agents/agents/orchestrator/__init__.py +5 -5
- tapps_agents/agents/orchestrator/agent.py +522 -522
- tapps_agents/agents/planner/__init__.py +7 -7
- tapps_agents/agents/planner/agent.py +1127 -1127
- tapps_agents/agents/reviewer/__init__.py +24 -24
- tapps_agents/agents/reviewer/agent.py +3513 -3513
- tapps_agents/agents/reviewer/aggregator.py +213 -213
- tapps_agents/agents/reviewer/batch_review.py +448 -448
- tapps_agents/agents/reviewer/cache.py +443 -443
- tapps_agents/agents/reviewer/context7_enhancer.py +630 -630
- tapps_agents/agents/reviewer/context_detector.py +203 -203
- tapps_agents/agents/reviewer/docker_compose_validator.py +158 -158
- tapps_agents/agents/reviewer/dockerfile_validator.py +176 -176
- tapps_agents/agents/reviewer/error_handling.py +126 -126
- tapps_agents/agents/reviewer/feedback_generator.py +490 -490
- tapps_agents/agents/reviewer/influxdb_validator.py +316 -316
- tapps_agents/agents/reviewer/issue_tracking.py +169 -169
- tapps_agents/agents/reviewer/library_detector.py +295 -295
- tapps_agents/agents/reviewer/library_patterns.py +268 -268
- tapps_agents/agents/reviewer/maintainability_scorer.py +593 -593
- tapps_agents/agents/reviewer/metric_strategies.py +276 -276
- tapps_agents/agents/reviewer/mqtt_validator.py +160 -160
- tapps_agents/agents/reviewer/output_enhancer.py +105 -105
- tapps_agents/agents/reviewer/pattern_detector.py +241 -241
- tapps_agents/agents/reviewer/performance_scorer.py +357 -357
- tapps_agents/agents/reviewer/phased_review.py +516 -516
- tapps_agents/agents/reviewer/progressive_review.py +435 -435
- tapps_agents/agents/reviewer/react_scorer.py +331 -331
- tapps_agents/agents/reviewer/score_constants.py +228 -228
- tapps_agents/agents/reviewer/score_validator.py +507 -507
- tapps_agents/agents/reviewer/scorer_registry.py +373 -373
- tapps_agents/agents/reviewer/service_discovery.py +534 -534
- tapps_agents/agents/reviewer/tools/parallel_executor.py +581 -581
- tapps_agents/agents/reviewer/tools/ruff_grouping.py +250 -250
- tapps_agents/agents/reviewer/tools/scoped_mypy.py +284 -284
- tapps_agents/agents/reviewer/typescript_scorer.py +1142 -1142
- tapps_agents/agents/reviewer/validation.py +208 -208
- tapps_agents/agents/reviewer/websocket_validator.py +132 -132
- tapps_agents/agents/tester/__init__.py +7 -7
- tapps_agents/agents/tester/accessibility_auditor.py +309 -309
- tapps_agents/agents/tester/agent.py +1080 -1080
- tapps_agents/agents/tester/batch_generator.py +54 -54
- tapps_agents/agents/tester/context_learner.py +51 -51
- tapps_agents/agents/tester/coverage_analyzer.py +386 -386
- tapps_agents/agents/tester/coverage_test_generator.py +290 -290
- tapps_agents/agents/tester/debug_enhancer.py +238 -238
- tapps_agents/agents/tester/device_emulator.py +241 -241
- tapps_agents/agents/tester/integration_generator.py +62 -62
- tapps_agents/agents/tester/network_recorder.py +300 -300
- tapps_agents/agents/tester/performance_monitor.py +320 -320
- tapps_agents/agents/tester/test_fixer.py +316 -316
- tapps_agents/agents/tester/test_generator.py +632 -632
- tapps_agents/agents/tester/trace_manager.py +234 -234
- tapps_agents/agents/tester/visual_regression.py +291 -291
- tapps_agents/analysis/pattern_detector.py +36 -36
- tapps_agents/beads/hydration.py +213 -213
- tapps_agents/beads/parse.py +32 -32
- tapps_agents/beads/specs.py +206 -206
- tapps_agents/cli/__init__.py +9 -9
- tapps_agents/cli/__main__.py +8 -8
- tapps_agents/cli/base.py +478 -478
- tapps_agents/cli/command_classifier.py +72 -72
- tapps_agents/cli/commands/__init__.py +2 -2
- tapps_agents/cli/commands/analyst.py +173 -173
- tapps_agents/cli/commands/architect.py +109 -109
- tapps_agents/cli/commands/cleanup_agent.py +92 -92
- tapps_agents/cli/commands/common.py +126 -126
- tapps_agents/cli/commands/debugger.py +90 -90
- tapps_agents/cli/commands/designer.py +112 -112
- tapps_agents/cli/commands/documenter.py +136 -136
- tapps_agents/cli/commands/enhancer.py +110 -110
- tapps_agents/cli/commands/evaluator.py +255 -255
- tapps_agents/cli/commands/implementer.py +301 -301
- tapps_agents/cli/commands/improver.py +91 -91
- tapps_agents/cli/commands/knowledge.py +111 -111
- tapps_agents/cli/commands/learning.py +172 -172
- tapps_agents/cli/commands/observability.py +283 -283
- tapps_agents/cli/commands/ops.py +135 -135
- tapps_agents/cli/commands/orchestrator.py +116 -116
- tapps_agents/cli/commands/planner.py +237 -237
- tapps_agents/cli/commands/reviewer.py +1872 -1872
- tapps_agents/cli/commands/status.py +285 -285
- tapps_agents/cli/commands/task.py +227 -227
- tapps_agents/cli/commands/tester.py +191 -191
- tapps_agents/cli/feedback.py +936 -936
- tapps_agents/cli/formatters.py +608 -608
- tapps_agents/cli/help/__init__.py +7 -7
- tapps_agents/cli/help/static_help.py +425 -425
- tapps_agents/cli/network_detection.py +110 -110
- tapps_agents/cli/output_compactor.py +274 -274
- tapps_agents/cli/parsers/__init__.py +2 -2
- tapps_agents/cli/parsers/analyst.py +186 -186
- tapps_agents/cli/parsers/architect.py +167 -167
- tapps_agents/cli/parsers/cleanup_agent.py +228 -228
- tapps_agents/cli/parsers/debugger.py +116 -116
- tapps_agents/cli/parsers/designer.py +182 -182
- tapps_agents/cli/parsers/documenter.py +134 -134
- tapps_agents/cli/parsers/enhancer.py +113 -113
- tapps_agents/cli/parsers/evaluator.py +213 -213
- tapps_agents/cli/parsers/implementer.py +168 -168
- tapps_agents/cli/parsers/improver.py +132 -132
- tapps_agents/cli/parsers/ops.py +159 -159
- tapps_agents/cli/parsers/orchestrator.py +98 -98
- tapps_agents/cli/parsers/planner.py +145 -145
- tapps_agents/cli/parsers/reviewer.py +462 -462
- tapps_agents/cli/parsers/tester.py +124 -124
- tapps_agents/cli/progress_heartbeat.py +254 -254
- tapps_agents/cli/streaming_progress.py +336 -336
- tapps_agents/cli/utils/__init__.py +6 -6
- tapps_agents/cli/utils/agent_lifecycle.py +48 -48
- tapps_agents/cli/utils/error_formatter.py +82 -82
- tapps_agents/cli/utils/error_recovery.py +188 -188
- tapps_agents/cli/utils/output_handler.py +59 -59
- tapps_agents/cli/utils/prompt_enhancer.py +319 -319
- tapps_agents/cli/validators/__init__.py +9 -9
- tapps_agents/cli/validators/command_validator.py +81 -81
- tapps_agents/context7/__init__.py +112 -112
- tapps_agents/context7/agent_integration.py +869 -869
- tapps_agents/context7/analytics.py +382 -382
- tapps_agents/context7/analytics_dashboard.py +299 -299
- tapps_agents/context7/async_cache.py +681 -681
- tapps_agents/context7/backup_client.py +958 -958
- tapps_agents/context7/cache_locking.py +194 -194
- tapps_agents/context7/cache_metadata.py +214 -214
- tapps_agents/context7/cache_prewarm.py +488 -488
- tapps_agents/context7/cache_structure.py +168 -168
- tapps_agents/context7/cache_warming.py +604 -604
- tapps_agents/context7/circuit_breaker.py +376 -376
- tapps_agents/context7/cleanup.py +461 -461
- tapps_agents/context7/commands.py +858 -858
- tapps_agents/context7/credential_validation.py +276 -276
- tapps_agents/context7/cross_reference_resolver.py +168 -168
- tapps_agents/context7/cross_references.py +424 -424
- tapps_agents/context7/doc_manager.py +225 -225
- tapps_agents/context7/fuzzy_matcher.py +369 -369
- tapps_agents/context7/kb_cache.py +404 -404
- tapps_agents/context7/language_detector.py +219 -219
- tapps_agents/context7/library_detector.py +725 -725
- tapps_agents/context7/lookup.py +738 -738
- tapps_agents/context7/metadata.py +258 -258
- tapps_agents/context7/refresh_queue.py +300 -300
- tapps_agents/context7/security.py +373 -373
- tapps_agents/context7/staleness_policies.py +278 -278
- tapps_agents/context7/tiles_integration.py +47 -47
- tapps_agents/continuous_bug_fix/__init__.py +20 -20
- tapps_agents/continuous_bug_fix/bug_finder.py +306 -306
- tapps_agents/continuous_bug_fix/bug_fix_coordinator.py +177 -177
- tapps_agents/continuous_bug_fix/commit_manager.py +178 -178
- tapps_agents/continuous_bug_fix/continuous_bug_fixer.py +322 -322
- tapps_agents/continuous_bug_fix/proactive_bug_finder.py +285 -285
- tapps_agents/core/__init__.py +298 -298
- tapps_agents/core/adaptive_cache_config.py +432 -432
- tapps_agents/core/agent_base.py +647 -647
- tapps_agents/core/agent_cache.py +466 -466
- tapps_agents/core/agent_learning.py +1865 -1865
- tapps_agents/core/analytics_dashboard.py +563 -563
- tapps_agents/core/analytics_enhancements.py +597 -597
- tapps_agents/core/anonymization.py +274 -274
- tapps_agents/core/ast_parser.py +228 -228
- tapps_agents/core/async_file_ops.py +402 -402
- tapps_agents/core/best_practice_consultant.py +299 -299
- tapps_agents/core/brownfield_analyzer.py +299 -299
- tapps_agents/core/brownfield_review.py +541 -541
- tapps_agents/core/browser_controller.py +513 -513
- tapps_agents/core/capability_registry.py +418 -418
- tapps_agents/core/change_impact_analyzer.py +190 -190
- tapps_agents/core/checkpoint_manager.py +377 -377
- tapps_agents/core/code_generator.py +329 -329
- tapps_agents/core/code_validator.py +276 -276
- tapps_agents/core/command_registry.py +327 -327
- tapps_agents/core/context_gathering/__init__.py +2 -2
- tapps_agents/core/context_gathering/repository_explorer.py +28 -28
- tapps_agents/core/context_intelligence/__init__.py +2 -2
- tapps_agents/core/context_intelligence/relevance_scorer.py +24 -24
- tapps_agents/core/context_intelligence/token_budget_manager.py +27 -27
- tapps_agents/core/context_manager.py +240 -240
- tapps_agents/core/cursor_feedback_monitor.py +146 -146
- tapps_agents/core/cursor_verification.py +290 -290
- tapps_agents/core/customization_loader.py +280 -280
- tapps_agents/core/customization_schema.py +260 -260
- tapps_agents/core/customization_template.py +238 -238
- tapps_agents/core/debug_logger.py +124 -124
- tapps_agents/core/design_validator.py +298 -298
- tapps_agents/core/diagram_generator.py +226 -226
- tapps_agents/core/docker_utils.py +232 -232
- tapps_agents/core/document_generator.py +617 -617
- tapps_agents/core/domain_detector.py +30 -30
- tapps_agents/core/error_envelope.py +454 -454
- tapps_agents/core/error_handler.py +270 -270
- tapps_agents/core/estimation_tracker.py +189 -189
- tapps_agents/core/eval_prompt_engine.py +116 -116
- tapps_agents/core/evaluation_base.py +119 -119
- tapps_agents/core/evaluation_models.py +320 -320
- tapps_agents/core/evaluation_orchestrator.py +225 -225
- tapps_agents/core/evaluators/__init__.py +7 -7
- tapps_agents/core/evaluators/architectural_evaluator.py +205 -205
- tapps_agents/core/evaluators/behavioral_evaluator.py +160 -160
- tapps_agents/core/evaluators/performance_profile_evaluator.py +160 -160
- tapps_agents/core/evaluators/security_posture_evaluator.py +148 -148
- tapps_agents/core/evaluators/spec_compliance_evaluator.py +181 -181
- tapps_agents/core/exceptions.py +107 -107
- tapps_agents/core/expert_config_generator.py +293 -293
- tapps_agents/core/export_schema.py +202 -202
- tapps_agents/core/external_feedback_models.py +102 -102
- tapps_agents/core/external_feedback_storage.py +213 -213
- tapps_agents/core/fallback_strategy.py +314 -314
- tapps_agents/core/feedback_analyzer.py +162 -162
- tapps_agents/core/feedback_collector.py +178 -178
- tapps_agents/core/git_operations.py +445 -445
- tapps_agents/core/hardware_profiler.py +151 -151
- tapps_agents/core/instructions.py +324 -324
- tapps_agents/core/io_guardrails.py +69 -69
- tapps_agents/core/issue_manifest.py +249 -249
- tapps_agents/core/issue_schema.py +139 -139
- tapps_agents/core/json_utils.py +128 -128
- tapps_agents/core/knowledge_graph.py +446 -446
- tapps_agents/core/language_detector.py +296 -296
- tapps_agents/core/learning_confidence.py +242 -242
- tapps_agents/core/learning_dashboard.py +246 -246
- tapps_agents/core/learning_decision.py +384 -384
- tapps_agents/core/learning_explainability.py +578 -578
- tapps_agents/core/learning_export.py +287 -287
- tapps_agents/core/learning_integration.py +228 -228
- tapps_agents/core/llm_behavior.py +232 -232
- tapps_agents/core/long_duration_support.py +786 -786
- tapps_agents/core/mcp_setup.py +106 -106
- tapps_agents/core/memory_integration.py +396 -396
- tapps_agents/core/meta_learning.py +666 -666
- tapps_agents/core/module_path_sanitizer.py +199 -199
- tapps_agents/core/multi_agent_orchestrator.py +382 -382
- tapps_agents/core/network_errors.py +125 -125
- tapps_agents/core/nfr_validator.py +336 -336
- tapps_agents/core/offline_mode.py +158 -158
- tapps_agents/core/output_contracts.py +300 -300
- tapps_agents/core/output_formatter.py +300 -300
- tapps_agents/core/path_normalizer.py +174 -174
- tapps_agents/core/path_validator.py +322 -322
- tapps_agents/core/pattern_library.py +250 -250
- tapps_agents/core/performance_benchmark.py +301 -301
- tapps_agents/core/performance_monitor.py +184 -184
- tapps_agents/core/playwright_mcp_controller.py +771 -771
- tapps_agents/core/policy_loader.py +135 -135
- tapps_agents/core/progress.py +166 -166
- tapps_agents/core/project_profile.py +354 -354
- tapps_agents/core/project_type_detector.py +454 -454
- tapps_agents/core/prompt_base.py +223 -223
- tapps_agents/core/prompt_learning/__init__.py +2 -2
- tapps_agents/core/prompt_learning/learning_loop.py +24 -24
- tapps_agents/core/prompt_learning/project_prompt_store.py +25 -25
- tapps_agents/core/prompt_learning/skills_prompt_analyzer.py +35 -35
- tapps_agents/core/prompt_optimization/__init__.py +6 -6
- tapps_agents/core/prompt_optimization/ab_tester.py +114 -114
- tapps_agents/core/prompt_optimization/correlation_analyzer.py +160 -160
- tapps_agents/core/prompt_optimization/progressive_refiner.py +129 -129
- tapps_agents/core/prompt_optimization/prompt_library.py +37 -37
- tapps_agents/core/requirements_evaluator.py +431 -431
- tapps_agents/core/resource_aware_executor.py +449 -449
- tapps_agents/core/resource_monitor.py +343 -343
- tapps_agents/core/resume_handler.py +298 -298
- tapps_agents/core/retry_handler.py +197 -197
- tapps_agents/core/review_checklists.py +479 -479
- tapps_agents/core/role_loader.py +201 -201
- tapps_agents/core/role_template_loader.py +201 -201
- tapps_agents/core/runtime_mode.py +60 -60
- tapps_agents/core/security_scanner.py +342 -342
- tapps_agents/core/skill_agent_registry.py +194 -194
- tapps_agents/core/skill_integration.py +208 -208
- tapps_agents/core/skill_loader.py +492 -492
- tapps_agents/core/skill_template.py +341 -341
- tapps_agents/core/skill_validator.py +478 -478
- tapps_agents/core/stack_analyzer.py +35 -35
- tapps_agents/core/startup.py +174 -174
- tapps_agents/core/storage_manager.py +397 -397
- tapps_agents/core/storage_models.py +166 -166
- tapps_agents/core/story_evaluator.py +410 -410
- tapps_agents/core/subprocess_utils.py +170 -170
- tapps_agents/core/task_duration.py +296 -296
- tapps_agents/core/task_memory.py +582 -582
- tapps_agents/core/task_state.py +226 -226
- tapps_agents/core/tech_stack_priorities.py +208 -208
- tapps_agents/core/temp_directory.py +194 -194
- tapps_agents/core/template_merger.py +600 -600
- tapps_agents/core/template_selector.py +280 -280
- tapps_agents/core/test_generator.py +286 -286
- tapps_agents/core/tiered_context.py +253 -253
- tapps_agents/core/token_monitor.py +345 -345
- tapps_agents/core/traceability.py +254 -254
- tapps_agents/core/trajectory_tracker.py +50 -50
- tapps_agents/core/unicode_safe.py +143 -143
- tapps_agents/core/unified_cache_config.py +170 -170
- tapps_agents/core/unified_state.py +324 -324
- tapps_agents/core/validate_cursor_setup.py +237 -237
- tapps_agents/core/validation_registry.py +136 -136
- tapps_agents/core/validators/__init__.py +4 -4
- tapps_agents/core/validators/python_validator.py +87 -87
- tapps_agents/core/verification_agent.py +90 -90
- tapps_agents/core/visual_feedback.py +644 -644
- tapps_agents/core/workflow_validator.py +197 -197
- tapps_agents/core/worktree.py +367 -367
- tapps_agents/docker/__init__.py +10 -10
- tapps_agents/docker/analyzer.py +186 -186
- tapps_agents/docker/debugger.py +229 -229
- tapps_agents/docker/error_patterns.py +216 -216
- tapps_agents/epic/__init__.py +22 -22
- tapps_agents/epic/beads_sync.py +115 -115
- tapps_agents/epic/markdown_sync.py +105 -105
- tapps_agents/epic/models.py +96 -96
- tapps_agents/experts/__init__.py +163 -163
- tapps_agents/experts/agent_integration.py +243 -243
- tapps_agents/experts/auto_generator.py +331 -331
- tapps_agents/experts/base_expert.py +536 -536
- tapps_agents/experts/builtin_registry.py +261 -261
- tapps_agents/experts/business_metrics.py +565 -565
- tapps_agents/experts/cache.py +266 -266
- tapps_agents/experts/confidence_breakdown.py +306 -306
- tapps_agents/experts/confidence_calculator.py +336 -336
- tapps_agents/experts/confidence_metrics.py +236 -236
- tapps_agents/experts/domain_config.py +311 -311
- tapps_agents/experts/domain_detector.py +550 -550
- tapps_agents/experts/domain_utils.py +84 -84
- tapps_agents/experts/expert_config.py +113 -113
- tapps_agents/experts/expert_engine.py +465 -465
- tapps_agents/experts/expert_registry.py +744 -744
- tapps_agents/experts/expert_synthesizer.py +70 -70
- tapps_agents/experts/governance.py +197 -197
- tapps_agents/experts/history_logger.py +312 -312
- tapps_agents/experts/knowledge/README.md +180 -180
- tapps_agents/experts/knowledge/accessibility/accessible-forms.md +331 -331
- tapps_agents/experts/knowledge/accessibility/aria-patterns.md +344 -344
- tapps_agents/experts/knowledge/accessibility/color-contrast.md +285 -285
- tapps_agents/experts/knowledge/accessibility/keyboard-navigation.md +332 -332
- tapps_agents/experts/knowledge/accessibility/screen-readers.md +282 -282
- tapps_agents/experts/knowledge/accessibility/semantic-html.md +355 -355
- tapps_agents/experts/knowledge/accessibility/testing-accessibility.md +369 -369
- tapps_agents/experts/knowledge/accessibility/wcag-2.1.md +296 -296
- tapps_agents/experts/knowledge/accessibility/wcag-2.2.md +211 -211
- tapps_agents/experts/knowledge/agent-learning/best-practices.md +715 -715
- tapps_agents/experts/knowledge/agent-learning/pattern-extraction.md +282 -282
- tapps_agents/experts/knowledge/agent-learning/prompt-optimization.md +320 -320
- tapps_agents/experts/knowledge/ai-frameworks/model-optimization.md +90 -90
- tapps_agents/experts/knowledge/ai-frameworks/openvino-patterns.md +260 -260
- tapps_agents/experts/knowledge/api-design-integration/api-gateway-patterns.md +309 -309
- tapps_agents/experts/knowledge/api-design-integration/api-security-patterns.md +521 -521
- tapps_agents/experts/knowledge/api-design-integration/api-versioning.md +421 -421
- tapps_agents/experts/knowledge/api-design-integration/async-protocol-patterns.md +61 -61
- tapps_agents/experts/knowledge/api-design-integration/contract-testing.md +221 -221
- tapps_agents/experts/knowledge/api-design-integration/external-api-integration.md +489 -489
- tapps_agents/experts/knowledge/api-design-integration/fastapi-patterns.md +360 -360
- tapps_agents/experts/knowledge/api-design-integration/fastapi-testing.md +262 -262
- tapps_agents/experts/knowledge/api-design-integration/graphql-patterns.md +582 -582
- tapps_agents/experts/knowledge/api-design-integration/grpc-best-practices.md +499 -499
- tapps_agents/experts/knowledge/api-design-integration/mqtt-patterns.md +455 -455
- tapps_agents/experts/knowledge/api-design-integration/rate-limiting.md +507 -507
- tapps_agents/experts/knowledge/api-design-integration/restful-api-design.md +618 -618
- tapps_agents/experts/knowledge/api-design-integration/websocket-patterns.md +480 -480
- tapps_agents/experts/knowledge/cloud-infrastructure/cloud-native-patterns.md +175 -175
- tapps_agents/experts/knowledge/cloud-infrastructure/container-health-checks.md +261 -261
- tapps_agents/experts/knowledge/cloud-infrastructure/containerization.md +222 -222
- tapps_agents/experts/knowledge/cloud-infrastructure/cost-optimization.md +122 -122
- tapps_agents/experts/knowledge/cloud-infrastructure/disaster-recovery.md +153 -153
- tapps_agents/experts/knowledge/cloud-infrastructure/dockerfile-patterns.md +285 -285
- tapps_agents/experts/knowledge/cloud-infrastructure/infrastructure-as-code.md +187 -187
- tapps_agents/experts/knowledge/cloud-infrastructure/kubernetes-patterns.md +253 -253
- tapps_agents/experts/knowledge/cloud-infrastructure/multi-cloud-strategies.md +155 -155
- tapps_agents/experts/knowledge/cloud-infrastructure/serverless-architecture.md +200 -200
- tapps_agents/experts/knowledge/code-quality-analysis/README.md +16 -16
- tapps_agents/experts/knowledge/code-quality-analysis/code-metrics.md +137 -137
- tapps_agents/experts/knowledge/code-quality-analysis/complexity-analysis.md +181 -181
- tapps_agents/experts/knowledge/code-quality-analysis/technical-debt-patterns.md +191 -191
- tapps_agents/experts/knowledge/data-privacy-compliance/anonymization.md +313 -313
- tapps_agents/experts/knowledge/data-privacy-compliance/ccpa.md +255 -255
- tapps_agents/experts/knowledge/data-privacy-compliance/consent-management.md +282 -282
- tapps_agents/experts/knowledge/data-privacy-compliance/data-minimization.md +275 -275
- tapps_agents/experts/knowledge/data-privacy-compliance/data-retention.md +297 -297
- tapps_agents/experts/knowledge/data-privacy-compliance/data-subject-rights.md +383 -383
- tapps_agents/experts/knowledge/data-privacy-compliance/encryption-privacy.md +285 -285
- tapps_agents/experts/knowledge/data-privacy-compliance/gdpr.md +344 -344
- tapps_agents/experts/knowledge/data-privacy-compliance/hipaa.md +385 -385
- tapps_agents/experts/knowledge/data-privacy-compliance/privacy-by-design.md +280 -280
- tapps_agents/experts/knowledge/database-data-management/acid-vs-cap.md +164 -164
- tapps_agents/experts/knowledge/database-data-management/backup-and-recovery.md +182 -182
- tapps_agents/experts/knowledge/database-data-management/data-modeling.md +172 -172
- tapps_agents/experts/knowledge/database-data-management/database-design.md +187 -187
- tapps_agents/experts/knowledge/database-data-management/flux-query-optimization.md +342 -342
- tapps_agents/experts/knowledge/database-data-management/influxdb-connection-patterns.md +432 -432
- tapps_agents/experts/knowledge/database-data-management/influxdb-patterns.md +442 -442
- tapps_agents/experts/knowledge/database-data-management/migration-strategies.md +216 -216
- tapps_agents/experts/knowledge/database-data-management/nosql-patterns.md +259 -259
- tapps_agents/experts/knowledge/database-data-management/scalability-patterns.md +184 -184
- tapps_agents/experts/knowledge/database-data-management/sql-optimization.md +175 -175
- tapps_agents/experts/knowledge/database-data-management/time-series-modeling.md +444 -444
- tapps_agents/experts/knowledge/development-workflow/README.md +16 -16
- tapps_agents/experts/knowledge/development-workflow/automation-best-practices.md +216 -216
- tapps_agents/experts/knowledge/development-workflow/build-strategies.md +198 -198
- tapps_agents/experts/knowledge/development-workflow/deployment-patterns.md +205 -205
- tapps_agents/experts/knowledge/development-workflow/git-workflows.md +205 -205
- tapps_agents/experts/knowledge/documentation-knowledge-management/README.md +16 -16
- tapps_agents/experts/knowledge/documentation-knowledge-management/api-documentation-patterns.md +231 -231
- tapps_agents/experts/knowledge/documentation-knowledge-management/documentation-standards.md +191 -191
- tapps_agents/experts/knowledge/documentation-knowledge-management/knowledge-management.md +171 -171
- tapps_agents/experts/knowledge/documentation-knowledge-management/technical-writing-guide.md +192 -192
- tapps_agents/experts/knowledge/observability-monitoring/alerting-patterns.md +461 -461
- tapps_agents/experts/knowledge/observability-monitoring/apm-tools.md +459 -459
- tapps_agents/experts/knowledge/observability-monitoring/distributed-tracing.md +367 -367
- tapps_agents/experts/knowledge/observability-monitoring/logging-strategies.md +478 -478
- tapps_agents/experts/knowledge/observability-monitoring/metrics-and-monitoring.md +510 -510
- tapps_agents/experts/knowledge/observability-monitoring/observability-best-practices.md +492 -492
- tapps_agents/experts/knowledge/observability-monitoring/open-telemetry.md +573 -573
- tapps_agents/experts/knowledge/observability-monitoring/slo-sli-sla.md +419 -419
- tapps_agents/experts/knowledge/performance/anti-patterns.md +284 -284
- tapps_agents/experts/knowledge/performance/api-performance.md +256 -256
- tapps_agents/experts/knowledge/performance/caching.md +327 -327
- tapps_agents/experts/knowledge/performance/database-performance.md +252 -252
- tapps_agents/experts/knowledge/performance/optimization-patterns.md +327 -327
- tapps_agents/experts/knowledge/performance/profiling.md +297 -297
- tapps_agents/experts/knowledge/performance/resource-management.md +293 -293
- tapps_agents/experts/knowledge/performance/scalability.md +306 -306
- tapps_agents/experts/knowledge/security/owasp-top10.md +209 -209
- tapps_agents/experts/knowledge/security/secure-coding-practices.md +207 -207
- tapps_agents/experts/knowledge/security/threat-modeling.md +220 -220
- tapps_agents/experts/knowledge/security/vulnerability-patterns.md +342 -342
- tapps_agents/experts/knowledge/software-architecture/docker-compose-patterns.md +314 -314
- tapps_agents/experts/knowledge/software-architecture/microservices-patterns.md +379 -379
- tapps_agents/experts/knowledge/software-architecture/service-communication.md +316 -316
- tapps_agents/experts/knowledge/testing/best-practices.md +310 -310
- tapps_agents/experts/knowledge/testing/coverage-analysis.md +293 -293
- tapps_agents/experts/knowledge/testing/mocking.md +256 -256
- tapps_agents/experts/knowledge/testing/test-automation.md +276 -276
- tapps_agents/experts/knowledge/testing/test-data.md +271 -271
- tapps_agents/experts/knowledge/testing/test-design-patterns.md +280 -280
- tapps_agents/experts/knowledge/testing/test-maintenance.md +236 -236
- tapps_agents/experts/knowledge/testing/test-strategies.md +311 -311
- tapps_agents/experts/knowledge/user-experience/information-architecture.md +325 -325
- tapps_agents/experts/knowledge/user-experience/interaction-design.md +363 -363
- tapps_agents/experts/knowledge/user-experience/prototyping.md +293 -293
- tapps_agents/experts/knowledge/user-experience/usability-heuristics.md +337 -337
- tapps_agents/experts/knowledge/user-experience/usability-testing.md +311 -311
- tapps_agents/experts/knowledge/user-experience/user-journeys.md +296 -296
- tapps_agents/experts/knowledge/user-experience/user-research.md +373 -373
- tapps_agents/experts/knowledge/user-experience/ux-principles.md +340 -340
- tapps_agents/experts/knowledge_freshness.py +321 -321
- tapps_agents/experts/knowledge_ingestion.py +438 -438
- tapps_agents/experts/knowledge_need_detector.py +93 -93
- tapps_agents/experts/knowledge_validator.py +382 -382
- tapps_agents/experts/observability.py +440 -440
- tapps_agents/experts/passive_notifier.py +238 -238
- tapps_agents/experts/proactive_orchestrator.py +32 -32
- tapps_agents/experts/rag_chunker.py +205 -205
- tapps_agents/experts/rag_embedder.py +152 -152
- tapps_agents/experts/rag_evaluation.py +299 -299
- tapps_agents/experts/rag_index.py +303 -303
- tapps_agents/experts/rag_metrics.py +293 -293
- tapps_agents/experts/rag_safety.py +263 -263
- tapps_agents/experts/report_generator.py +296 -296
- tapps_agents/experts/setup_wizard.py +441 -441
- tapps_agents/experts/simple_rag.py +431 -431
- tapps_agents/experts/vector_rag.py +354 -354
- tapps_agents/experts/weight_distributor.py +304 -304
- tapps_agents/health/__init__.py +24 -24
- tapps_agents/health/base.py +75 -75
- tapps_agents/health/checks/__init__.py +22 -22
- tapps_agents/health/checks/automation.py +127 -127
- tapps_agents/health/checks/context7_cache.py +210 -210
- tapps_agents/health/checks/environment.py +116 -116
- tapps_agents/health/checks/execution.py +170 -170
- tapps_agents/health/checks/knowledge_base.py +187 -187
- tapps_agents/health/checks/outcomes.backup_20260204_064058.py +324 -0
- tapps_agents/health/checks/outcomes.backup_20260204_064256.py +324 -0
- tapps_agents/health/checks/outcomes.backup_20260204_064600.py +324 -0
- tapps_agents/health/checks/outcomes.py +324 -324
- tapps_agents/health/collector.py +280 -280
- tapps_agents/health/dashboard.py +137 -137
- tapps_agents/health/metrics.py +151 -151
- tapps_agents/health/registry.py +166 -166
- tapps_agents/hooks/__init__.py +33 -33
- tapps_agents/hooks/config.py +140 -140
- tapps_agents/hooks/events.py +135 -135
- tapps_agents/hooks/executor.py +128 -128
- tapps_agents/hooks/manager.py +143 -143
- tapps_agents/integration/__init__.py +8 -8
- tapps_agents/integration/service_integrator.py +121 -121
- tapps_agents/integrations/__init__.py +10 -10
- tapps_agents/integrations/clawdbot.py +525 -525
- tapps_agents/integrations/memory_bridge.py +356 -356
- tapps_agents/mcp/__init__.py +18 -18
- tapps_agents/mcp/gateway.py +112 -112
- tapps_agents/mcp/servers/__init__.py +13 -13
- tapps_agents/mcp/servers/analysis.py +204 -204
- tapps_agents/mcp/servers/context7.py +198 -198
- tapps_agents/mcp/servers/filesystem.py +218 -218
- tapps_agents/mcp/servers/git.py +201 -201
- tapps_agents/mcp/tool_registry.py +115 -115
- tapps_agents/quality/__init__.py +54 -54
- tapps_agents/quality/coverage_analyzer.py +379 -379
- tapps_agents/quality/enforcement.py +82 -82
- tapps_agents/quality/gates/__init__.py +37 -37
- tapps_agents/quality/gates/approval_gate.py +255 -255
- tapps_agents/quality/gates/base.py +84 -84
- tapps_agents/quality/gates/exceptions.py +43 -43
- tapps_agents/quality/gates/policy_gate.py +195 -195
- tapps_agents/quality/gates/registry.py +239 -239
- tapps_agents/quality/gates/security_gate.py +156 -156
- tapps_agents/quality/quality_gates.py +369 -369
- tapps_agents/quality/secret_scanner.py +335 -335
- tapps_agents/resources/__init__.py +5 -0
- tapps_agents/resources/claude/__init__.py +1 -0
- tapps_agents/resources/claude/commands/README.md +156 -0
- tapps_agents/resources/claude/commands/__init__.py +1 -0
- tapps_agents/resources/claude/commands/build-fix.md +22 -0
- tapps_agents/resources/claude/commands/build.md +77 -0
- tapps_agents/resources/claude/commands/debug.md +53 -0
- tapps_agents/resources/claude/commands/design.md +68 -0
- tapps_agents/resources/claude/commands/docs.md +53 -0
- tapps_agents/resources/claude/commands/e2e.md +22 -0
- tapps_agents/resources/claude/commands/fix.md +54 -0
- tapps_agents/resources/claude/commands/implement.md +53 -0
- tapps_agents/resources/claude/commands/improve.md +53 -0
- tapps_agents/resources/claude/commands/library-docs.md +64 -0
- tapps_agents/resources/claude/commands/lint.md +52 -0
- tapps_agents/resources/claude/commands/plan.md +65 -0
- tapps_agents/resources/claude/commands/refactor-clean.md +21 -0
- tapps_agents/resources/claude/commands/refactor.md +55 -0
- tapps_agents/resources/claude/commands/review.md +67 -0
- tapps_agents/resources/claude/commands/score.md +60 -0
- tapps_agents/resources/claude/commands/security-review.md +22 -0
- tapps_agents/resources/claude/commands/security-scan.md +54 -0
- tapps_agents/resources/claude/commands/tdd.md +24 -0
- tapps_agents/resources/claude/commands/test-coverage.md +21 -0
- tapps_agents/resources/claude/commands/test.md +54 -0
- tapps_agents/resources/claude/commands/update-codemaps.md +20 -0
- tapps_agents/resources/claude/commands/update-docs.md +21 -0
- tapps_agents/resources/claude/skills/__init__.py +1 -0
- tapps_agents/resources/claude/skills/analyst/SKILL.md +272 -0
- tapps_agents/resources/claude/skills/analyst/__init__.py +1 -0
- tapps_agents/resources/claude/skills/architect/SKILL.md +282 -0
- tapps_agents/resources/claude/skills/architect/__init__.py +1 -0
- tapps_agents/resources/claude/skills/backend-patterns/SKILL.md +30 -0
- tapps_agents/resources/claude/skills/backend-patterns/__init__.py +1 -0
- tapps_agents/resources/claude/skills/coding-standards/SKILL.md +29 -0
- tapps_agents/resources/claude/skills/coding-standards/__init__.py +1 -0
- tapps_agents/resources/claude/skills/debugger/SKILL.md +203 -0
- tapps_agents/resources/claude/skills/debugger/__init__.py +1 -0
- tapps_agents/resources/claude/skills/designer/SKILL.md +243 -0
- tapps_agents/resources/claude/skills/designer/__init__.py +1 -0
- tapps_agents/resources/claude/skills/documenter/SKILL.md +252 -0
- tapps_agents/resources/claude/skills/documenter/__init__.py +1 -0
- tapps_agents/resources/claude/skills/enhancer/SKILL.md +307 -0
- tapps_agents/resources/claude/skills/enhancer/__init__.py +1 -0
- tapps_agents/resources/claude/skills/evaluator/SKILL.md +204 -0
- tapps_agents/resources/claude/skills/evaluator/__init__.py +1 -0
- tapps_agents/resources/claude/skills/frontend-patterns/SKILL.md +29 -0
- tapps_agents/resources/claude/skills/frontend-patterns/__init__.py +1 -0
- tapps_agents/resources/claude/skills/implementer/SKILL.md +188 -0
- tapps_agents/resources/claude/skills/implementer/__init__.py +1 -0
- tapps_agents/resources/claude/skills/improver/SKILL.md +218 -0
- tapps_agents/resources/claude/skills/improver/__init__.py +1 -0
- tapps_agents/resources/claude/skills/ops/SKILL.md +281 -0
- tapps_agents/resources/claude/skills/ops/__init__.py +1 -0
- tapps_agents/resources/claude/skills/orchestrator/SKILL.md +390 -0
- tapps_agents/resources/claude/skills/orchestrator/__init__.py +1 -0
- tapps_agents/resources/claude/skills/planner/SKILL.md +254 -0
- tapps_agents/resources/claude/skills/planner/__init__.py +1 -0
- tapps_agents/resources/claude/skills/reviewer/SKILL.md +434 -0
- tapps_agents/resources/claude/skills/reviewer/__init__.py +1 -0
- tapps_agents/resources/claude/skills/security-review/SKILL.md +31 -0
- tapps_agents/resources/claude/skills/security-review/__init__.py +1 -0
- tapps_agents/resources/claude/skills/simple-mode/SKILL.md +695 -0
- tapps_agents/resources/claude/skills/simple-mode/__init__.py +1 -0
- tapps_agents/resources/claude/skills/tester/SKILL.md +219 -0
- tapps_agents/resources/claude/skills/tester/__init__.py +1 -0
- tapps_agents/resources/cursor/.cursorignore +35 -0
- tapps_agents/resources/cursor/__init__.py +1 -0
- tapps_agents/resources/cursor/commands/__init__.py +1 -0
- tapps_agents/resources/cursor/commands/build-fix.md +11 -0
- tapps_agents/resources/cursor/commands/build.md +11 -0
- tapps_agents/resources/cursor/commands/e2e.md +11 -0
- tapps_agents/resources/cursor/commands/fix.md +11 -0
- tapps_agents/resources/cursor/commands/refactor-clean.md +11 -0
- tapps_agents/resources/cursor/commands/review.md +11 -0
- tapps_agents/resources/cursor/commands/security-review.md +11 -0
- tapps_agents/resources/cursor/commands/tdd.md +11 -0
- tapps_agents/resources/cursor/commands/test-coverage.md +11 -0
- tapps_agents/resources/cursor/commands/test.md +11 -0
- tapps_agents/resources/cursor/commands/update-codemaps.md +10 -0
- tapps_agents/resources/cursor/commands/update-docs.md +11 -0
- tapps_agents/resources/cursor/rules/__init__.py +1 -0
- tapps_agents/resources/cursor/rules/agent-capabilities.mdc +687 -0
- tapps_agents/resources/cursor/rules/coding-style.mdc +31 -0
- tapps_agents/resources/cursor/rules/command-reference.mdc +2081 -0
- tapps_agents/resources/cursor/rules/cursor-mode-usage.mdc +125 -0
- tapps_agents/resources/cursor/rules/git-workflow.mdc +29 -0
- tapps_agents/resources/cursor/rules/performance.mdc +29 -0
- tapps_agents/resources/cursor/rules/project-context.mdc +163 -0
- tapps_agents/resources/cursor/rules/project-profiling.mdc +197 -0
- tapps_agents/resources/cursor/rules/quick-reference.mdc +630 -0
- tapps_agents/resources/cursor/rules/security.mdc +32 -0
- tapps_agents/resources/cursor/rules/simple-mode.mdc +500 -0
- tapps_agents/resources/cursor/rules/testing.mdc +31 -0
- tapps_agents/resources/cursor/rules/when-to-use.mdc +156 -0
- tapps_agents/resources/cursor/rules/workflow-presets.mdc +179 -0
- tapps_agents/resources/customizations/__init__.py +1 -0
- tapps_agents/resources/customizations/example-custom.yaml +83 -0
- tapps_agents/resources/hooks/__init__.py +1 -0
- tapps_agents/resources/hooks/templates/README.md +5 -0
- tapps_agents/resources/hooks/templates/__init__.py +1 -0
- tapps_agents/resources/hooks/templates/add-project-context.yaml +8 -0
- tapps_agents/resources/hooks/templates/auto-format-js.yaml +10 -0
- tapps_agents/resources/hooks/templates/auto-format-python.yaml +10 -0
- tapps_agents/resources/hooks/templates/git-commit-check.yaml +7 -0
- tapps_agents/resources/hooks/templates/notify-on-complete.yaml +8 -0
- tapps_agents/resources/hooks/templates/quality-gate.yaml +8 -0
- tapps_agents/resources/hooks/templates/security-scan-on-edit.yaml +10 -0
- tapps_agents/resources/hooks/templates/session-end-log.yaml +7 -0
- tapps_agents/resources/hooks/templates/show-beads-ready.yaml +8 -0
- tapps_agents/resources/hooks/templates/test-on-edit.yaml +10 -0
- tapps_agents/resources/hooks/templates/update-docs-on-complete.yaml +8 -0
- tapps_agents/resources/hooks/templates/user-prompt-log.yaml +7 -0
- tapps_agents/resources/scripts/__init__.py +1 -0
- tapps_agents/resources/scripts/set_bd_path.ps1 +51 -0
- tapps_agents/resources/workflows/__init__.py +1 -0
- tapps_agents/resources/workflows/presets/__init__.py +1 -0
- tapps_agents/resources/workflows/presets/brownfield-analysis.yaml +235 -0
- tapps_agents/resources/workflows/presets/fix.yaml +78 -0
- tapps_agents/resources/workflows/presets/full-sdlc.yaml +122 -0
- tapps_agents/resources/workflows/presets/quality.yaml +82 -0
- tapps_agents/resources/workflows/presets/rapid-dev.yaml +84 -0
- tapps_agents/session/__init__.py +19 -19
- tapps_agents/session/manager.py +256 -256
- tapps_agents/simple_mode/__init__.py +66 -66
- tapps_agents/simple_mode/agent_contracts.py +357 -357
- tapps_agents/simple_mode/beads_hooks.py +151 -151
- tapps_agents/simple_mode/code_snippet_handler.py +382 -382
- tapps_agents/simple_mode/documentation_manager.py +395 -395
- tapps_agents/simple_mode/documentation_reader.py +187 -187
- tapps_agents/simple_mode/file_inference.py +292 -292
- tapps_agents/simple_mode/framework_change_detector.py +268 -268
- tapps_agents/simple_mode/intent_parser.py +510 -510
- tapps_agents/simple_mode/learning_progression.py +358 -358
- tapps_agents/simple_mode/nl_handler.py +700 -700
- tapps_agents/simple_mode/onboarding.py +253 -253
- tapps_agents/simple_mode/orchestrators/__init__.py +38 -38
- tapps_agents/simple_mode/orchestrators/breakdown_orchestrator.py +49 -49
- tapps_agents/simple_mode/orchestrators/brownfield_orchestrator.py +135 -135
- tapps_agents/simple_mode/orchestrators/deliverable_checklist.py +349 -349
- tapps_agents/simple_mode/orchestrators/enhance_orchestrator.py +53 -53
- tapps_agents/simple_mode/orchestrators/epic_orchestrator.py +122 -122
- tapps_agents/simple_mode/orchestrators/explore_orchestrator.py +184 -184
- tapps_agents/simple_mode/orchestrators/plan_analysis_orchestrator.py +206 -206
- tapps_agents/simple_mode/orchestrators/pr_orchestrator.py +237 -237
- tapps_agents/simple_mode/orchestrators/refactor_orchestrator.py +222 -222
- tapps_agents/simple_mode/orchestrators/requirements_tracer.py +262 -262
- tapps_agents/simple_mode/orchestrators/resume_orchestrator.py +210 -210
- tapps_agents/simple_mode/orchestrators/review_orchestrator.py +161 -161
- tapps_agents/simple_mode/orchestrators/test_orchestrator.py +82 -82
- tapps_agents/simple_mode/output_aggregator.py +340 -340
- tapps_agents/simple_mode/result_formatters.py +598 -598
- tapps_agents/simple_mode/step_dependencies.py +382 -382
- tapps_agents/simple_mode/step_results.py +276 -276
- tapps_agents/simple_mode/streaming.py +388 -388
- tapps_agents/simple_mode/variations.py +129 -129
- tapps_agents/simple_mode/visual_feedback.py +238 -238
- tapps_agents/simple_mode/zero_config.py +274 -274
- tapps_agents/suggestions/__init__.py +8 -8
- tapps_agents/suggestions/inline_suggester.py +52 -52
- tapps_agents/templates/__init__.py +8 -8
- tapps_agents/templates/microservice_generator.py +274 -274
- tapps_agents/utils/env_validator.py +291 -291
- tapps_agents/workflow/__init__.py +171 -171
- tapps_agents/workflow/acceptance_verifier.py +132 -132
- tapps_agents/workflow/agent_handlers/__init__.py +41 -41
- tapps_agents/workflow/agent_handlers/analyst_handler.py +75 -75
- tapps_agents/workflow/agent_handlers/architect_handler.py +107 -107
- tapps_agents/workflow/agent_handlers/base.py +84 -84
- tapps_agents/workflow/agent_handlers/debugger_handler.py +100 -100
- tapps_agents/workflow/agent_handlers/designer_handler.py +110 -110
- tapps_agents/workflow/agent_handlers/documenter_handler.py +94 -94
- tapps_agents/workflow/agent_handlers/implementer_handler.py +235 -235
- tapps_agents/workflow/agent_handlers/ops_handler.py +62 -62
- tapps_agents/workflow/agent_handlers/orchestrator_handler.py +43 -43
- tapps_agents/workflow/agent_handlers/planner_handler.py +98 -98
- tapps_agents/workflow/agent_handlers/registry.py +119 -119
- tapps_agents/workflow/agent_handlers/reviewer_handler.py +119 -119
- tapps_agents/workflow/agent_handlers/tester_handler.py +69 -69
- tapps_agents/workflow/analytics_accessor.py +337 -337
- tapps_agents/workflow/analytics_alerts.py +416 -416
- tapps_agents/workflow/analytics_dashboard_cursor.py +281 -281
- tapps_agents/workflow/analytics_dual_write.py +103 -103
- tapps_agents/workflow/analytics_integration.py +119 -119
- tapps_agents/workflow/analytics_query_parser.py +278 -278
- tapps_agents/workflow/analytics_visualizer.py +259 -259
- tapps_agents/workflow/artifact_helper.py +204 -204
- tapps_agents/workflow/audit_logger.py +263 -263
- tapps_agents/workflow/auto_execution_config.py +340 -340
- tapps_agents/workflow/auto_progression.py +586 -586
- tapps_agents/workflow/branch_cleanup.py +349 -349
- tapps_agents/workflow/checkpoint.py +256 -256
- tapps_agents/workflow/checkpoint_manager.py +178 -178
- tapps_agents/workflow/code_artifact.py +179 -179
- tapps_agents/workflow/common_enums.py +96 -96
- tapps_agents/workflow/confirmation_handler.py +130 -130
- tapps_agents/workflow/context_analyzer.py +222 -222
- tapps_agents/workflow/context_artifact.py +230 -230
- tapps_agents/workflow/cursor_chat.py +94 -94
- tapps_agents/workflow/cursor_skill_helper.py +516 -516
- tapps_agents/workflow/dependency_resolver.py +244 -244
- tapps_agents/workflow/design_artifact.py +156 -156
- tapps_agents/workflow/detector.py +751 -751
- tapps_agents/workflow/direct_execution_fallback.py +301 -301
- tapps_agents/workflow/docs_artifact.py +168 -168
- tapps_agents/workflow/enforcer.py +389 -389
- tapps_agents/workflow/enhancement_artifact.py +142 -142
- tapps_agents/workflow/error_recovery.py +806 -806
- tapps_agents/workflow/event_bus.py +183 -183
- tapps_agents/workflow/event_log.py +612 -612
- tapps_agents/workflow/events.py +63 -63
- tapps_agents/workflow/exceptions.py +43 -43
- tapps_agents/workflow/execution_graph.py +498 -498
- tapps_agents/workflow/execution_plan.py +126 -126
- tapps_agents/workflow/file_utils.py +186 -186
- tapps_agents/workflow/gate_evaluator.py +182 -182
- tapps_agents/workflow/gate_integration.py +200 -200
- tapps_agents/workflow/graph_visualizer.py +130 -130
- tapps_agents/workflow/health_checker.py +206 -206
- tapps_agents/workflow/logging_helper.py +243 -243
- tapps_agents/workflow/manifest.py +582 -582
- tapps_agents/workflow/marker_writer.py +250 -250
- tapps_agents/workflow/messaging.py +325 -325
- tapps_agents/workflow/metadata_models.py +91 -91
- tapps_agents/workflow/metrics_integration.py +226 -226
- tapps_agents/workflow/migration_utils.py +116 -116
- tapps_agents/workflow/models.py +148 -148
- tapps_agents/workflow/nlp_config.py +198 -198
- tapps_agents/workflow/nlp_error_handler.py +207 -207
- tapps_agents/workflow/nlp_executor.py +163 -163
- tapps_agents/workflow/nlp_parser.py +528 -528
- tapps_agents/workflow/observability_dashboard.py +451 -451
- tapps_agents/workflow/observer.py +170 -170
- tapps_agents/workflow/ops_artifact.py +257 -257
- tapps_agents/workflow/output_passing.py +214 -214
- tapps_agents/workflow/parallel_executor.py +463 -463
- tapps_agents/workflow/planning_artifact.py +179 -179
- tapps_agents/workflow/preset_loader.py +285 -285
- tapps_agents/workflow/preset_recommender.py +270 -270
- tapps_agents/workflow/progress_logger.py +145 -145
- tapps_agents/workflow/progress_manager.py +303 -303
- tapps_agents/workflow/progress_monitor.py +186 -186
- tapps_agents/workflow/progress_updates.py +423 -423
- tapps_agents/workflow/quality_artifact.py +158 -158
- tapps_agents/workflow/quality_loopback.py +101 -101
- tapps_agents/workflow/recommender.py +387 -387
- tapps_agents/workflow/remediation_loop.py +166 -166
- tapps_agents/workflow/result_aggregator.py +300 -300
- tapps_agents/workflow/review_artifact.py +185 -185
- tapps_agents/workflow/schema_validator.py +522 -522
- tapps_agents/workflow/session_handoff.py +178 -178
- tapps_agents/workflow/skill_invoker.py +648 -648
- tapps_agents/workflow/state_manager.py +756 -756
- tapps_agents/workflow/state_persistence_config.py +331 -331
- tapps_agents/workflow/status_monitor.py +449 -449
- tapps_agents/workflow/step_checkpoint.py +314 -314
- tapps_agents/workflow/step_details.py +201 -201
- tapps_agents/workflow/story_models.py +147 -147
- tapps_agents/workflow/streaming.py +416 -416
- tapps_agents/workflow/suggestion_engine.py +552 -552
- tapps_agents/workflow/testing_artifact.py +186 -186
- tapps_agents/workflow/timeline.py +158 -158
- tapps_agents/workflow/token_integration.py +209 -209
- tapps_agents/workflow/validation.py +217 -217
- tapps_agents/workflow/visual_feedback.py +391 -391
- tapps_agents/workflow/workflow_chain.py +95 -95
- tapps_agents/workflow/workflow_summary.py +219 -219
- tapps_agents/workflow/worktree_manager.py +724 -724
- {tapps_agents-3.6.0.dist-info → tapps_agents-3.6.1.dist-info}/METADATA +672 -672
- tapps_agents-3.6.1.dist-info/RECORD +883 -0
- {tapps_agents-3.6.0.dist-info → tapps_agents-3.6.1.dist-info}/licenses/LICENSE +22 -22
- tapps_agents-3.6.0.dist-info/RECORD +0 -758
- {tapps_agents-3.6.0.dist-info → tapps_agents-3.6.1.dist-info}/WHEEL +0 -0
- {tapps_agents-3.6.0.dist-info → tapps_agents-3.6.1.dist-info}/entry_points.txt +0 -0
- {tapps_agents-3.6.0.dist-info → tapps_agents-3.6.1.dist-info}/top_level.txt +0 -0
|
@@ -1,510 +1,510 @@
|
|
|
1
|
-
# Metrics and Monitoring
|
|
2
|
-
|
|
3
|
-
## Overview
|
|
4
|
-
|
|
5
|
-
Metrics provide quantitative measurements of system behavior over time. Unlike logs and traces which capture discrete events, metrics aggregate data points to show trends, patterns, and system health.
|
|
6
|
-
|
|
7
|
-
## Types of Metrics
|
|
8
|
-
|
|
9
|
-
### 1. Counter
|
|
10
|
-
|
|
11
|
-
**Purpose:** Count occurrences of events (monotonically increasing)
|
|
12
|
-
|
|
13
|
-
**Use Cases:**
|
|
14
|
-
- Request count
|
|
15
|
-
- Error count
|
|
16
|
-
- Total bytes processed
|
|
17
|
-
- Items created
|
|
18
|
-
|
|
19
|
-
**Example:**
|
|
20
|
-
```python
|
|
21
|
-
from prometheus_client import Counter
|
|
22
|
-
|
|
23
|
-
http_requests_total = Counter(
|
|
24
|
-
'http_requests_total',
|
|
25
|
-
'Total HTTP requests',
|
|
26
|
-
['method', 'endpoint', 'status']
|
|
27
|
-
)
|
|
28
|
-
|
|
29
|
-
http_requests_total.labels(method='GET', endpoint='/api/users', status='200').inc()
|
|
30
|
-
```
|
|
31
|
-
|
|
32
|
-
**Characteristics:**
|
|
33
|
-
- Always increments
|
|
34
|
-
- Resets on process restart
|
|
35
|
-
- Good for rate calculations
|
|
36
|
-
|
|
37
|
-
### 2. Gauge
|
|
38
|
-
|
|
39
|
-
**Purpose:** Measure a value that can go up or down
|
|
40
|
-
|
|
41
|
-
**Use Cases:**
|
|
42
|
-
- Current memory usage
|
|
43
|
-
- Active connections
|
|
44
|
-
- Queue size
|
|
45
|
-
- Temperature
|
|
46
|
-
|
|
47
|
-
**Example:**
|
|
48
|
-
```python
|
|
49
|
-
from prometheus_client import Gauge
|
|
50
|
-
|
|
51
|
-
active_connections = Gauge(
|
|
52
|
-
'active_connections',
|
|
53
|
-
'Number of active connections',
|
|
54
|
-
['service']
|
|
55
|
-
)
|
|
56
|
-
|
|
57
|
-
active_connections.labels(service='api').set(42)
|
|
58
|
-
active_connections.labels(service='api').inc() # Increase by 1
|
|
59
|
-
active_connections.labels(service='api').dec() # Decrease by 1
|
|
60
|
-
```
|
|
61
|
-
|
|
62
|
-
**Characteristics:**
|
|
63
|
-
- Can increase or decrease
|
|
64
|
-
- Represents current state
|
|
65
|
-
- Not suitable for aggregation over time (use counter + rate)
|
|
66
|
-
|
|
67
|
-
### 3. Histogram
|
|
68
|
-
|
|
69
|
-
**Purpose:** Measure distribution of values in buckets
|
|
70
|
-
|
|
71
|
-
**Use Cases:**
|
|
72
|
-
- Request duration
|
|
73
|
-
- Response sizes
|
|
74
|
-
- Processing times
|
|
75
|
-
- Latency percentiles
|
|
76
|
-
|
|
77
|
-
**Example:**
|
|
78
|
-
```python
|
|
79
|
-
from prometheus_client import Histogram
|
|
80
|
-
|
|
81
|
-
request_duration = Histogram(
|
|
82
|
-
'http_request_duration_seconds',
|
|
83
|
-
'HTTP request duration',
|
|
84
|
-
['method', 'endpoint'],
|
|
85
|
-
buckets=[0.1, 0.5, 1.0, 2.5, 5.0, 10.0]
|
|
86
|
-
)
|
|
87
|
-
|
|
88
|
-
with request_duration.labels(method='GET', endpoint='/api/users').time():
|
|
89
|
-
# Process request
|
|
90
|
-
pass
|
|
91
|
-
```
|
|
92
|
-
|
|
93
|
-
**Characteristics:**
|
|
94
|
-
- Pre-defined buckets
|
|
95
|
-
- Provides quantiles (p50, p95, p99)
|
|
96
|
-
- Fixed number of data points
|
|
97
|
-
|
|
98
|
-
### 4. Summary
|
|
99
|
-
|
|
100
|
-
**Purpose:** Similar to histogram, but calculates quantiles on the client side
|
|
101
|
-
|
|
102
|
-
**Use Cases:**
|
|
103
|
-
- When you need exact quantiles
|
|
104
|
-
- When bucket boundaries are not known in advance
|
|
105
|
-
|
|
106
|
-
**Example:**
|
|
107
|
-
```python
|
|
108
|
-
from prometheus_client import Summary
|
|
109
|
-
|
|
110
|
-
request_size = Summary(
|
|
111
|
-
'http_request_size_bytes',
|
|
112
|
-
'HTTP request size',
|
|
113
|
-
['method']
|
|
114
|
-
)
|
|
115
|
-
|
|
116
|
-
request_size.labels(method='POST').observe(1024)
|
|
117
|
-
```
|
|
118
|
-
|
|
119
|
-
**Characteristics:**
|
|
120
|
-
- Computes quantiles on client
|
|
121
|
-
- More expensive than histogram
|
|
122
|
-
- Use when exact quantiles needed
|
|
123
|
-
|
|
124
|
-
## Metric Naming Conventions
|
|
125
|
-
|
|
126
|
-
### Prometheus Naming Best Practices
|
|
127
|
-
|
|
128
|
-
**Format:** `{namespace}_{name}_{unit}_{suffix}`
|
|
129
|
-
|
|
130
|
-
**Components:**
|
|
131
|
-
- `namespace`: Service or application name (e.g., `http`, `db`, `cache`)
|
|
132
|
-
- `name`: Descriptive metric name
|
|
133
|
-
- `unit`: Unit of measurement (e.g., `seconds`, `bytes`, `total`)
|
|
134
|
-
- `suffix`: Type indicator (e.g., `total`, `count`, `sum`)
|
|
135
|
-
|
|
136
|
-
**Examples:**
|
|
137
|
-
```python
|
|
138
|
-
# Good naming
|
|
139
|
-
http_requests_total
|
|
140
|
-
http_request_duration_seconds
|
|
141
|
-
cache_hits_total
|
|
142
|
-
cache_miss_rate
|
|
143
|
-
db_connection_pool_size
|
|
144
|
-
memory_usage_bytes
|
|
145
|
-
|
|
146
|
-
# Bad naming
|
|
147
|
-
requests # Too generic
|
|
148
|
-
req_time # Abbreviated, unclear unit
|
|
149
|
-
cache # Not descriptive
|
|
150
|
-
```
|
|
151
|
-
|
|
152
|
-
**Rules:**
|
|
153
|
-
- Use lowercase
|
|
154
|
-
- Separate words with underscores
|
|
155
|
-
- Use base units (seconds, bytes, not milliseconds, KB)
|
|
156
|
-
- End counters with `_total`
|
|
157
|
-
- End summaries/histograms with `_seconds` or `_bytes`
|
|
158
|
-
|
|
159
|
-
## Golden Signals
|
|
160
|
-
|
|
161
|
-
### 1. Latency
|
|
162
|
-
|
|
163
|
-
**Definition:** Time taken to serve a request
|
|
164
|
-
|
|
165
|
-
**Key Metrics:**
|
|
166
|
-
- Request duration (p50, p95, p99)
|
|
167
|
-
- Response time distribution
|
|
168
|
-
- Time to first byte (TTFB)
|
|
169
|
-
|
|
170
|
-
**Monitoring:**
|
|
171
|
-
```python
|
|
172
|
-
request_latency = Histogram(
|
|
173
|
-
'http_request_duration_seconds',
|
|
174
|
-
'Request latency',
|
|
175
|
-
['method', 'endpoint'],
|
|
176
|
-
buckets=[0.01, 0.05, 0.1, 0.5, 1.0, 2.0, 5.0]
|
|
177
|
-
)
|
|
178
|
-
|
|
179
|
-
# Alert on: p99 latency > 1 second
|
|
180
|
-
```
|
|
181
|
-
|
|
182
|
-
### 2. Traffic
|
|
183
|
-
|
|
184
|
-
**Definition:** Demand placed on the system
|
|
185
|
-
|
|
186
|
-
**Key Metrics:**
|
|
187
|
-
- Requests per second (RPS)
|
|
188
|
-
- Concurrent connections
|
|
189
|
-
- Message throughput
|
|
190
|
-
|
|
191
|
-
**Monitoring:**
|
|
192
|
-
```python
|
|
193
|
-
request_rate = Counter(
|
|
194
|
-
'http_requests_total',
|
|
195
|
-
'Total requests',
|
|
196
|
-
['method', 'endpoint', 'status']
|
|
197
|
-
)
|
|
198
|
-
|
|
199
|
-
# Alert on: Sudden drop in traffic (>50% decrease)
|
|
200
|
-
# Alert on: Traffic spike (>200% increase)
|
|
201
|
-
```
|
|
202
|
-
|
|
203
|
-
### 3. Errors
|
|
204
|
-
|
|
205
|
-
**Definition:** Rate of requests that fail
|
|
206
|
-
|
|
207
|
-
**Key Metrics:**
|
|
208
|
-
- Error rate (4xx, 5xx)
|
|
209
|
-
- Error count
|
|
210
|
-
- Exception rate
|
|
211
|
-
|
|
212
|
-
**Monitoring:**
|
|
213
|
-
```python
|
|
214
|
-
error_rate = Counter(
|
|
215
|
-
'http_errors_total',
|
|
216
|
-
'Total errors',
|
|
217
|
-
['status_code', 'error_type']
|
|
218
|
-
)
|
|
219
|
-
|
|
220
|
-
# Alert on: Error rate > 1%
|
|
221
|
-
# Alert on: 5xx errors > 0.1%
|
|
222
|
-
```
|
|
223
|
-
|
|
224
|
-
### 4. Saturation
|
|
225
|
-
|
|
226
|
-
**Definition:** How "full" the system is
|
|
227
|
-
|
|
228
|
-
**Key Metrics:**
|
|
229
|
-
- CPU utilization
|
|
230
|
-
- Memory usage
|
|
231
|
-
- Queue depth
|
|
232
|
-
- Disk I/O utilization
|
|
233
|
-
|
|
234
|
-
**Monitoring:**
|
|
235
|
-
```python
|
|
236
|
-
cpu_usage = Gauge('cpu_usage_percent', 'CPU usage percentage')
|
|
237
|
-
memory_usage = Gauge('memory_usage_bytes', 'Memory usage')
|
|
238
|
-
queue_depth = Gauge('queue_depth', 'Queue depth', ['queue_name'])
|
|
239
|
-
|
|
240
|
-
# Alert on: CPU > 80%
|
|
241
|
-
# Alert on: Memory > 90%
|
|
242
|
-
# Alert on: Queue depth > 1000
|
|
243
|
-
```
|
|
244
|
-
|
|
245
|
-
## Instrumentation Patterns
|
|
246
|
-
|
|
247
|
-
### 1. HTTP Server Metrics
|
|
248
|
-
|
|
249
|
-
```python
|
|
250
|
-
from prometheus_client import Counter, Histogram, Gauge
|
|
251
|
-
from flask import Flask, request
|
|
252
|
-
|
|
253
|
-
app = Flask(__name__)
|
|
254
|
-
|
|
255
|
-
http_requests_total = Counter(
|
|
256
|
-
'http_requests_total',
|
|
257
|
-
'Total HTTP requests',
|
|
258
|
-
['method', 'endpoint', 'status']
|
|
259
|
-
)
|
|
260
|
-
|
|
261
|
-
http_request_duration = Histogram(
|
|
262
|
-
'http_request_duration_seconds',
|
|
263
|
-
'HTTP request duration',
|
|
264
|
-
['method', 'endpoint']
|
|
265
|
-
)
|
|
266
|
-
|
|
267
|
-
active_requests = Gauge(
|
|
268
|
-
'http_active_requests',
|
|
269
|
-
'Currently active requests'
|
|
270
|
-
)
|
|
271
|
-
|
|
272
|
-
@app.before_request
|
|
273
|
-
def before_request():
|
|
274
|
-
active_requests.inc()
|
|
275
|
-
request._start_time = time.time()
|
|
276
|
-
|
|
277
|
-
@app.after_request
|
|
278
|
-
def after_request(response):
|
|
279
|
-
duration = time.time() - request._start_time
|
|
280
|
-
active_requests.dec()
|
|
281
|
-
|
|
282
|
-
http_request_duration.labels(
|
|
283
|
-
method=request.method,
|
|
284
|
-
endpoint=request.endpoint
|
|
285
|
-
).observe(duration)
|
|
286
|
-
|
|
287
|
-
http_requests_total.labels(
|
|
288
|
-
method=request.method,
|
|
289
|
-
endpoint=request.endpoint,
|
|
290
|
-
status=response.status_code
|
|
291
|
-
).inc()
|
|
292
|
-
|
|
293
|
-
return response
|
|
294
|
-
```
|
|
295
|
-
|
|
296
|
-
### 2. Database Metrics
|
|
297
|
-
|
|
298
|
-
```python
|
|
299
|
-
db_query_duration = Histogram(
|
|
300
|
-
'db_query_duration_seconds',
|
|
301
|
-
'Database query duration',
|
|
302
|
-
['query_type', 'table']
|
|
303
|
-
)
|
|
304
|
-
|
|
305
|
-
db_connections = Gauge(
|
|
306
|
-
'db_connections_active',
|
|
307
|
-
'Active database connections'
|
|
308
|
-
)
|
|
309
|
-
|
|
310
|
-
db_query_errors = Counter(
|
|
311
|
-
'db_query_errors_total',
|
|
312
|
-
'Database query errors',
|
|
313
|
-
['error_type']
|
|
314
|
-
)
|
|
315
|
-
|
|
316
|
-
def execute_query(query: str, query_type: str, table: str):
|
|
317
|
-
with db_query_duration.labels(query_type=query_type, table=table).time():
|
|
318
|
-
try:
|
|
319
|
-
db_connections.inc()
|
|
320
|
-
result = db.execute(query)
|
|
321
|
-
return result
|
|
322
|
-
except Exception as e:
|
|
323
|
-
db_query_errors.labels(error_type=type(e).__name__).inc()
|
|
324
|
-
raise
|
|
325
|
-
finally:
|
|
326
|
-
db_connections.dec()
|
|
327
|
-
```
|
|
328
|
-
|
|
329
|
-
### 3. Business Metrics
|
|
330
|
-
|
|
331
|
-
```python
|
|
332
|
-
orders_created = Counter(
|
|
333
|
-
'orders_created_total',
|
|
334
|
-
'Total orders created',
|
|
335
|
-
['product_type', 'payment_method']
|
|
336
|
-
)
|
|
337
|
-
|
|
338
|
-
order_value = Histogram(
|
|
339
|
-
'order_value_dollars',
|
|
340
|
-
'Order value distribution',
|
|
341
|
-
['product_type'],
|
|
342
|
-
buckets=[10, 50, 100, 500, 1000, 5000]
|
|
343
|
-
)
|
|
344
|
-
|
|
345
|
-
def create_order(product_type: str, payment_method: str, value: float):
|
|
346
|
-
orders_created.labels(
|
|
347
|
-
product_type=product_type,
|
|
348
|
-
payment_method=payment_method
|
|
349
|
-
).inc()
|
|
350
|
-
|
|
351
|
-
order_value.labels(product_type=product_type).observe(value)
|
|
352
|
-
```
|
|
353
|
-
|
|
354
|
-
## Monitoring Best Practices
|
|
355
|
-
|
|
356
|
-
### 1. Cardinality Management
|
|
357
|
-
|
|
358
|
-
**Problem:** Too many unique label combinations create too many time series
|
|
359
|
-
|
|
360
|
-
**Bad Example:**
|
|
361
|
-
```python
|
|
362
|
-
# Creates unique series for every user ID - BAD!
|
|
363
|
-
user_requests_total.labels(user_id=user_id).inc()
|
|
364
|
-
```
|
|
365
|
-
|
|
366
|
-
**Good Example:**
|
|
367
|
-
```python
|
|
368
|
-
# Use bounded label sets
|
|
369
|
-
user_requests_total.labels(user_tier='premium').inc()
|
|
370
|
-
```
|
|
371
|
-
|
|
372
|
-
**Guidelines:**
|
|
373
|
-
- Use bounded label values (status codes, not user IDs)
|
|
374
|
-
- Limit label cardinality (< 100 unique combinations)
|
|
375
|
-
- Use logs/traces for high-cardinality data
|
|
376
|
-
|
|
377
|
-
### 2. Aggregation Strategy
|
|
378
|
-
|
|
379
|
-
**Levels of Aggregation:**
|
|
380
|
-
- **Per-service:** Overall service health
|
|
381
|
-
- **Per-endpoint:** API endpoint performance
|
|
382
|
-
- **Per-instance:** Individual instance health
|
|
383
|
-
|
|
384
|
-
**Example:**
|
|
385
|
-
```python
|
|
386
|
-
# Service-level
|
|
387
|
-
service_requests_total = Counter('service_requests_total', ...)
|
|
388
|
-
|
|
389
|
-
# Endpoint-level
|
|
390
|
-
endpoint_requests_total = Counter(
|
|
391
|
-
'endpoint_requests_total',
|
|
392
|
-
...,
|
|
393
|
-
['endpoint']
|
|
394
|
-
)
|
|
395
|
-
```
|
|
396
|
-
|
|
397
|
-
### 3. Retention and Storage
|
|
398
|
-
|
|
399
|
-
**Time Series Database Considerations:**
|
|
400
|
-
- Retention periods (15 days, 30 days, 1 year)
|
|
401
|
-
- Downsampling for long-term storage
|
|
402
|
-
- Compression strategies
|
|
403
|
-
|
|
404
|
-
**Example Retention:**
|
|
405
|
-
- Raw metrics: 15 days
|
|
406
|
-
- 5-minute aggregates: 30 days
|
|
407
|
-
- 1-hour aggregates: 1 year
|
|
408
|
-
|
|
409
|
-
### 4. Alerting Thresholds
|
|
410
|
-
|
|
411
|
-
**SLO-Based Alerting:**
|
|
412
|
-
- Alert when SLO is at risk
|
|
413
|
-
- Use burn rate for error budgets
|
|
414
|
-
- Alert on symptoms, not causes
|
|
415
|
-
|
|
416
|
-
**Example:**
|
|
417
|
-
```yaml
|
|
418
|
-
# Alert if error rate threatens SLO
|
|
419
|
-
- alert: HighErrorRate
|
|
420
|
-
expr: |
|
|
421
|
-
rate(http_errors_total[5m]) / rate(http_requests_total[5m]) > 0.01
|
|
422
|
-
for: 5m
|
|
423
|
-
annotations:
|
|
424
|
-
summary: "Error rate above 1% for 5 minutes"
|
|
425
|
-
```
|
|
426
|
-
|
|
427
|
-
## Tools and Platforms
|
|
428
|
-
|
|
429
|
-
### Open Source
|
|
430
|
-
|
|
431
|
-
**Prometheus:**
|
|
432
|
-
- Time series database
|
|
433
|
-
- Pull-based metric collection
|
|
434
|
-
- Powerful query language (PromQL)
|
|
435
|
-
- Service discovery integration
|
|
436
|
-
|
|
437
|
-
**StatsD:**
|
|
438
|
-
- Simple metrics aggregation daemon
|
|
439
|
-
- Push-based (UDP)
|
|
440
|
-
- Language-agnostic
|
|
441
|
-
- Aggregates before storage
|
|
442
|
-
|
|
443
|
-
**Grafana:**
|
|
444
|
-
- Visualization and alerting platform
|
|
445
|
-
- Supports multiple data sources
|
|
446
|
-
- Rich dashboard ecosystem
|
|
447
|
-
|
|
448
|
-
### Commercial
|
|
449
|
-
|
|
450
|
-
**Datadog:**
|
|
451
|
-
- Infrastructure and application monitoring
|
|
452
|
-
- Custom metrics support
|
|
453
|
-
- AI-powered anomaly detection
|
|
454
|
-
|
|
455
|
-
**New Relic:**
|
|
456
|
-
- Full-stack observability
|
|
457
|
-
- Custom metrics
|
|
458
|
-
- Intelligent alerting
|
|
459
|
-
|
|
460
|
-
**CloudWatch (AWS):**
|
|
461
|
-
- Native AWS integration
|
|
462
|
-
- Custom metrics
|
|
463
|
-
- CloudWatch Alarms
|
|
464
|
-
|
|
465
|
-
## Metric Collection Patterns
|
|
466
|
-
|
|
467
|
-
### 1. Push vs Pull
|
|
468
|
-
|
|
469
|
-
**Pull Model (Prometheus):**
|
|
470
|
-
- Scraper polls endpoints
|
|
471
|
-
- Better for centralization
|
|
472
|
-
- Supports service discovery
|
|
473
|
-
|
|
474
|
-
**Push Model (StatsD, CloudWatch):**
|
|
475
|
-
- Services push metrics
|
|
476
|
-
- Simpler for ephemeral services
|
|
477
|
-
- Good for serverless
|
|
478
|
-
|
|
479
|
-
### 2. Export Strategies
|
|
480
|
-
|
|
481
|
-
**Direct Export:**
|
|
482
|
-
```python
|
|
483
|
-
# Export metrics directly
|
|
484
|
-
from prometheus_client import start_http_server
|
|
485
|
-
start_http_server(8000) # Exposes /metrics endpoint
|
|
486
|
-
```
|
|
487
|
-
|
|
488
|
-
**Push Gateway:**
|
|
489
|
-
```python
|
|
490
|
-
# Push to gateway (for short-lived jobs)
|
|
491
|
-
from prometheus_client import CollectorRegistry, push_to_gateway
|
|
492
|
-
|
|
493
|
-
registry = CollectorRegistry()
|
|
494
|
-
# ... register metrics ...
|
|
495
|
-
push_to_gateway('pushgateway:9091', job='batch-job', registry=registry)
|
|
496
|
-
```
|
|
497
|
-
|
|
498
|
-
## Best Practices Summary
|
|
499
|
-
|
|
500
|
-
1. **Follow naming conventions** (namespace_name_unit_suffix)
|
|
501
|
-
2. **Use appropriate metric types** (counter, gauge, histogram, summary)
|
|
502
|
-
3. **Monitor golden signals** (latency, traffic, errors, saturation)
|
|
503
|
-
4. **Manage cardinality** (avoid high-cardinality labels)
|
|
504
|
-
5. **Instrument at the right granularity** (service, endpoint, operation)
|
|
505
|
-
6. **Set appropriate retention** periods
|
|
506
|
-
7. **Create actionable alerts** based on SLOs
|
|
507
|
-
8. **Use aggregation** for long-term trends
|
|
508
|
-
9. **Combine with logs and traces** for complete picture
|
|
509
|
-
10. **Document metrics** with clear descriptions and units
|
|
510
|
-
|
|
1
|
+
# Metrics and Monitoring
|
|
2
|
+
|
|
3
|
+
## Overview
|
|
4
|
+
|
|
5
|
+
Metrics provide quantitative measurements of system behavior over time. Unlike logs and traces which capture discrete events, metrics aggregate data points to show trends, patterns, and system health.
|
|
6
|
+
|
|
7
|
+
## Types of Metrics
|
|
8
|
+
|
|
9
|
+
### 1. Counter
|
|
10
|
+
|
|
11
|
+
**Purpose:** Count occurrences of events (monotonically increasing)
|
|
12
|
+
|
|
13
|
+
**Use Cases:**
|
|
14
|
+
- Request count
|
|
15
|
+
- Error count
|
|
16
|
+
- Total bytes processed
|
|
17
|
+
- Items created
|
|
18
|
+
|
|
19
|
+
**Example:**
|
|
20
|
+
```python
|
|
21
|
+
from prometheus_client import Counter
|
|
22
|
+
|
|
23
|
+
http_requests_total = Counter(
|
|
24
|
+
'http_requests_total',
|
|
25
|
+
'Total HTTP requests',
|
|
26
|
+
['method', 'endpoint', 'status']
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
http_requests_total.labels(method='GET', endpoint='/api/users', status='200').inc()
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
**Characteristics:**
|
|
33
|
+
- Always increments
|
|
34
|
+
- Resets on process restart
|
|
35
|
+
- Good for rate calculations
|
|
36
|
+
|
|
37
|
+
### 2. Gauge
|
|
38
|
+
|
|
39
|
+
**Purpose:** Measure a value that can go up or down
|
|
40
|
+
|
|
41
|
+
**Use Cases:**
|
|
42
|
+
- Current memory usage
|
|
43
|
+
- Active connections
|
|
44
|
+
- Queue size
|
|
45
|
+
- Temperature
|
|
46
|
+
|
|
47
|
+
**Example:**
|
|
48
|
+
```python
|
|
49
|
+
from prometheus_client import Gauge
|
|
50
|
+
|
|
51
|
+
active_connections = Gauge(
|
|
52
|
+
'active_connections',
|
|
53
|
+
'Number of active connections',
|
|
54
|
+
['service']
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
active_connections.labels(service='api').set(42)
|
|
58
|
+
active_connections.labels(service='api').inc() # Increase by 1
|
|
59
|
+
active_connections.labels(service='api').dec() # Decrease by 1
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
**Characteristics:**
|
|
63
|
+
- Can increase or decrease
|
|
64
|
+
- Represents current state
|
|
65
|
+
- Not suitable for aggregation over time (use counter + rate)
|
|
66
|
+
|
|
67
|
+
### 3. Histogram
|
|
68
|
+
|
|
69
|
+
**Purpose:** Measure distribution of values in buckets
|
|
70
|
+
|
|
71
|
+
**Use Cases:**
|
|
72
|
+
- Request duration
|
|
73
|
+
- Response sizes
|
|
74
|
+
- Processing times
|
|
75
|
+
- Latency percentiles
|
|
76
|
+
|
|
77
|
+
**Example:**
|
|
78
|
+
```python
|
|
79
|
+
from prometheus_client import Histogram
|
|
80
|
+
|
|
81
|
+
request_duration = Histogram(
|
|
82
|
+
'http_request_duration_seconds',
|
|
83
|
+
'HTTP request duration',
|
|
84
|
+
['method', 'endpoint'],
|
|
85
|
+
buckets=[0.1, 0.5, 1.0, 2.5, 5.0, 10.0]
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
with request_duration.labels(method='GET', endpoint='/api/users').time():
|
|
89
|
+
# Process request
|
|
90
|
+
pass
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
**Characteristics:**
|
|
94
|
+
- Pre-defined buckets
|
|
95
|
+
- Provides quantiles (p50, p95, p99)
|
|
96
|
+
- Fixed number of data points
|
|
97
|
+
|
|
98
|
+
### 4. Summary
|
|
99
|
+
|
|
100
|
+
**Purpose:** Similar to histogram, but calculates quantiles on the client side
|
|
101
|
+
|
|
102
|
+
**Use Cases:**
|
|
103
|
+
- When you need exact quantiles
|
|
104
|
+
- When bucket boundaries are not known in advance
|
|
105
|
+
|
|
106
|
+
**Example:**
|
|
107
|
+
```python
|
|
108
|
+
from prometheus_client import Summary
|
|
109
|
+
|
|
110
|
+
request_size = Summary(
|
|
111
|
+
'http_request_size_bytes',
|
|
112
|
+
'HTTP request size',
|
|
113
|
+
['method']
|
|
114
|
+
)
|
|
115
|
+
|
|
116
|
+
request_size.labels(method='POST').observe(1024)
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
**Characteristics:**
|
|
120
|
+
- Computes quantiles on client
|
|
121
|
+
- More expensive than histogram
|
|
122
|
+
- Use when exact quantiles needed
|
|
123
|
+
|
|
124
|
+
## Metric Naming Conventions
|
|
125
|
+
|
|
126
|
+
### Prometheus Naming Best Practices
|
|
127
|
+
|
|
128
|
+
**Format:** `{namespace}_{name}_{unit}_{suffix}`
|
|
129
|
+
|
|
130
|
+
**Components:**
|
|
131
|
+
- `namespace`: Service or application name (e.g., `http`, `db`, `cache`)
|
|
132
|
+
- `name`: Descriptive metric name
|
|
133
|
+
- `unit`: Unit of measurement (e.g., `seconds`, `bytes`, `total`)
|
|
134
|
+
- `suffix`: Type indicator (e.g., `total`, `count`, `sum`)
|
|
135
|
+
|
|
136
|
+
**Examples:**
|
|
137
|
+
```python
|
|
138
|
+
# Good naming
|
|
139
|
+
http_requests_total
|
|
140
|
+
http_request_duration_seconds
|
|
141
|
+
cache_hits_total
|
|
142
|
+
cache_miss_rate
|
|
143
|
+
db_connection_pool_size
|
|
144
|
+
memory_usage_bytes
|
|
145
|
+
|
|
146
|
+
# Bad naming
|
|
147
|
+
requests # Too generic
|
|
148
|
+
req_time # Abbreviated, unclear unit
|
|
149
|
+
cache # Not descriptive
|
|
150
|
+
```
|
|
151
|
+
|
|
152
|
+
**Rules:**
|
|
153
|
+
- Use lowercase
|
|
154
|
+
- Separate words with underscores
|
|
155
|
+
- Use base units (seconds, bytes, not milliseconds, KB)
|
|
156
|
+
- End counters with `_total`
|
|
157
|
+
- End summaries/histograms with `_seconds` or `_bytes`
|
|
158
|
+
|
|
159
|
+
## Golden Signals
|
|
160
|
+
|
|
161
|
+
### 1. Latency
|
|
162
|
+
|
|
163
|
+
**Definition:** Time taken to serve a request
|
|
164
|
+
|
|
165
|
+
**Key Metrics:**
|
|
166
|
+
- Request duration (p50, p95, p99)
|
|
167
|
+
- Response time distribution
|
|
168
|
+
- Time to first byte (TTFB)
|
|
169
|
+
|
|
170
|
+
**Monitoring:**
|
|
171
|
+
```python
|
|
172
|
+
request_latency = Histogram(
|
|
173
|
+
'http_request_duration_seconds',
|
|
174
|
+
'Request latency',
|
|
175
|
+
['method', 'endpoint'],
|
|
176
|
+
buckets=[0.01, 0.05, 0.1, 0.5, 1.0, 2.0, 5.0]
|
|
177
|
+
)
|
|
178
|
+
|
|
179
|
+
# Alert on: p99 latency > 1 second
|
|
180
|
+
```
|
|
181
|
+
|
|
182
|
+
### 2. Traffic
|
|
183
|
+
|
|
184
|
+
**Definition:** Demand placed on the system
|
|
185
|
+
|
|
186
|
+
**Key Metrics:**
|
|
187
|
+
- Requests per second (RPS)
|
|
188
|
+
- Concurrent connections
|
|
189
|
+
- Message throughput
|
|
190
|
+
|
|
191
|
+
**Monitoring:**
|
|
192
|
+
```python
|
|
193
|
+
request_rate = Counter(
|
|
194
|
+
'http_requests_total',
|
|
195
|
+
'Total requests',
|
|
196
|
+
['method', 'endpoint', 'status']
|
|
197
|
+
)
|
|
198
|
+
|
|
199
|
+
# Alert on: Sudden drop in traffic (>50% decrease)
|
|
200
|
+
# Alert on: Traffic spike (>200% increase)
|
|
201
|
+
```
|
|
202
|
+
|
|
203
|
+
### 3. Errors
|
|
204
|
+
|
|
205
|
+
**Definition:** Rate of requests that fail
|
|
206
|
+
|
|
207
|
+
**Key Metrics:**
|
|
208
|
+
- Error rate (4xx, 5xx)
|
|
209
|
+
- Error count
|
|
210
|
+
- Exception rate
|
|
211
|
+
|
|
212
|
+
**Monitoring:**
|
|
213
|
+
```python
|
|
214
|
+
error_rate = Counter(
|
|
215
|
+
'http_errors_total',
|
|
216
|
+
'Total errors',
|
|
217
|
+
['status_code', 'error_type']
|
|
218
|
+
)
|
|
219
|
+
|
|
220
|
+
# Alert on: Error rate > 1%
|
|
221
|
+
# Alert on: 5xx errors > 0.1%
|
|
222
|
+
```
|
|
223
|
+
|
|
224
|
+
### 4. Saturation
|
|
225
|
+
|
|
226
|
+
**Definition:** How "full" the system is
|
|
227
|
+
|
|
228
|
+
**Key Metrics:**
|
|
229
|
+
- CPU utilization
|
|
230
|
+
- Memory usage
|
|
231
|
+
- Queue depth
|
|
232
|
+
- Disk I/O utilization
|
|
233
|
+
|
|
234
|
+
**Monitoring:**
|
|
235
|
+
```python
|
|
236
|
+
cpu_usage = Gauge('cpu_usage_percent', 'CPU usage percentage')
|
|
237
|
+
memory_usage = Gauge('memory_usage_bytes', 'Memory usage')
|
|
238
|
+
queue_depth = Gauge('queue_depth', 'Queue depth', ['queue_name'])
|
|
239
|
+
|
|
240
|
+
# Alert on: CPU > 80%
|
|
241
|
+
# Alert on: Memory > 90%
|
|
242
|
+
# Alert on: Queue depth > 1000
|
|
243
|
+
```
|
|
244
|
+
|
|
245
|
+
## Instrumentation Patterns
|
|
246
|
+
|
|
247
|
+
### 1. HTTP Server Metrics
|
|
248
|
+
|
|
249
|
+
```python
|
|
250
|
+
from prometheus_client import Counter, Histogram, Gauge
|
|
251
|
+
from flask import Flask, request
|
|
252
|
+
|
|
253
|
+
app = Flask(__name__)
|
|
254
|
+
|
|
255
|
+
http_requests_total = Counter(
|
|
256
|
+
'http_requests_total',
|
|
257
|
+
'Total HTTP requests',
|
|
258
|
+
['method', 'endpoint', 'status']
|
|
259
|
+
)
|
|
260
|
+
|
|
261
|
+
http_request_duration = Histogram(
|
|
262
|
+
'http_request_duration_seconds',
|
|
263
|
+
'HTTP request duration',
|
|
264
|
+
['method', 'endpoint']
|
|
265
|
+
)
|
|
266
|
+
|
|
267
|
+
active_requests = Gauge(
|
|
268
|
+
'http_active_requests',
|
|
269
|
+
'Currently active requests'
|
|
270
|
+
)
|
|
271
|
+
|
|
272
|
+
@app.before_request
|
|
273
|
+
def before_request():
|
|
274
|
+
active_requests.inc()
|
|
275
|
+
request._start_time = time.time()
|
|
276
|
+
|
|
277
|
+
@app.after_request
|
|
278
|
+
def after_request(response):
|
|
279
|
+
duration = time.time() - request._start_time
|
|
280
|
+
active_requests.dec()
|
|
281
|
+
|
|
282
|
+
http_request_duration.labels(
|
|
283
|
+
method=request.method,
|
|
284
|
+
endpoint=request.endpoint
|
|
285
|
+
).observe(duration)
|
|
286
|
+
|
|
287
|
+
http_requests_total.labels(
|
|
288
|
+
method=request.method,
|
|
289
|
+
endpoint=request.endpoint,
|
|
290
|
+
status=response.status_code
|
|
291
|
+
).inc()
|
|
292
|
+
|
|
293
|
+
return response
|
|
294
|
+
```
|
|
295
|
+
|
|
296
|
+
### 2. Database Metrics
|
|
297
|
+
|
|
298
|
+
```python
|
|
299
|
+
db_query_duration = Histogram(
|
|
300
|
+
'db_query_duration_seconds',
|
|
301
|
+
'Database query duration',
|
|
302
|
+
['query_type', 'table']
|
|
303
|
+
)
|
|
304
|
+
|
|
305
|
+
db_connections = Gauge(
|
|
306
|
+
'db_connections_active',
|
|
307
|
+
'Active database connections'
|
|
308
|
+
)
|
|
309
|
+
|
|
310
|
+
db_query_errors = Counter(
|
|
311
|
+
'db_query_errors_total',
|
|
312
|
+
'Database query errors',
|
|
313
|
+
['error_type']
|
|
314
|
+
)
|
|
315
|
+
|
|
316
|
+
def execute_query(query: str, query_type: str, table: str):
|
|
317
|
+
with db_query_duration.labels(query_type=query_type, table=table).time():
|
|
318
|
+
try:
|
|
319
|
+
db_connections.inc()
|
|
320
|
+
result = db.execute(query)
|
|
321
|
+
return result
|
|
322
|
+
except Exception as e:
|
|
323
|
+
db_query_errors.labels(error_type=type(e).__name__).inc()
|
|
324
|
+
raise
|
|
325
|
+
finally:
|
|
326
|
+
db_connections.dec()
|
|
327
|
+
```
|
|
328
|
+
|
|
329
|
+
### 3. Business Metrics
|
|
330
|
+
|
|
331
|
+
```python
|
|
332
|
+
orders_created = Counter(
|
|
333
|
+
'orders_created_total',
|
|
334
|
+
'Total orders created',
|
|
335
|
+
['product_type', 'payment_method']
|
|
336
|
+
)
|
|
337
|
+
|
|
338
|
+
order_value = Histogram(
|
|
339
|
+
'order_value_dollars',
|
|
340
|
+
'Order value distribution',
|
|
341
|
+
['product_type'],
|
|
342
|
+
buckets=[10, 50, 100, 500, 1000, 5000]
|
|
343
|
+
)
|
|
344
|
+
|
|
345
|
+
def create_order(product_type: str, payment_method: str, value: float):
|
|
346
|
+
orders_created.labels(
|
|
347
|
+
product_type=product_type,
|
|
348
|
+
payment_method=payment_method
|
|
349
|
+
).inc()
|
|
350
|
+
|
|
351
|
+
order_value.labels(product_type=product_type).observe(value)
|
|
352
|
+
```
|
|
353
|
+
|
|
354
|
+
## Monitoring Best Practices
|
|
355
|
+
|
|
356
|
+
### 1. Cardinality Management
|
|
357
|
+
|
|
358
|
+
**Problem:** Too many unique label combinations create too many time series
|
|
359
|
+
|
|
360
|
+
**Bad Example:**
|
|
361
|
+
```python
|
|
362
|
+
# Creates unique series for every user ID - BAD!
|
|
363
|
+
user_requests_total.labels(user_id=user_id).inc()
|
|
364
|
+
```
|
|
365
|
+
|
|
366
|
+
**Good Example:**
|
|
367
|
+
```python
|
|
368
|
+
# Use bounded label sets
|
|
369
|
+
user_requests_total.labels(user_tier='premium').inc()
|
|
370
|
+
```
|
|
371
|
+
|
|
372
|
+
**Guidelines:**
|
|
373
|
+
- Use bounded label values (status codes, not user IDs)
|
|
374
|
+
- Limit label cardinality (< 100 unique combinations)
|
|
375
|
+
- Use logs/traces for high-cardinality data
|
|
376
|
+
|
|
377
|
+
### 2. Aggregation Strategy
|
|
378
|
+
|
|
379
|
+
**Levels of Aggregation:**
|
|
380
|
+
- **Per-service:** Overall service health
|
|
381
|
+
- **Per-endpoint:** API endpoint performance
|
|
382
|
+
- **Per-instance:** Individual instance health
|
|
383
|
+
|
|
384
|
+
**Example:**
|
|
385
|
+
```python
|
|
386
|
+
# Service-level
|
|
387
|
+
service_requests_total = Counter('service_requests_total', ...)
|
|
388
|
+
|
|
389
|
+
# Endpoint-level
|
|
390
|
+
endpoint_requests_total = Counter(
|
|
391
|
+
'endpoint_requests_total',
|
|
392
|
+
...,
|
|
393
|
+
['endpoint']
|
|
394
|
+
)
|
|
395
|
+
```
|
|
396
|
+
|
|
397
|
+
### 3. Retention and Storage
|
|
398
|
+
|
|
399
|
+
**Time Series Database Considerations:**
|
|
400
|
+
- Retention periods (15 days, 30 days, 1 year)
|
|
401
|
+
- Downsampling for long-term storage
|
|
402
|
+
- Compression strategies
|
|
403
|
+
|
|
404
|
+
**Example Retention:**
|
|
405
|
+
- Raw metrics: 15 days
|
|
406
|
+
- 5-minute aggregates: 30 days
|
|
407
|
+
- 1-hour aggregates: 1 year
|
|
408
|
+
|
|
409
|
+
### 4. Alerting Thresholds
|
|
410
|
+
|
|
411
|
+
**SLO-Based Alerting:**
|
|
412
|
+
- Alert when SLO is at risk
|
|
413
|
+
- Use burn rate for error budgets
|
|
414
|
+
- Alert on symptoms, not causes
|
|
415
|
+
|
|
416
|
+
**Example:**
|
|
417
|
+
```yaml
|
|
418
|
+
# Alert if error rate threatens SLO
|
|
419
|
+
- alert: HighErrorRate
|
|
420
|
+
expr: |
|
|
421
|
+
rate(http_errors_total[5m]) / rate(http_requests_total[5m]) > 0.01
|
|
422
|
+
for: 5m
|
|
423
|
+
annotations:
|
|
424
|
+
summary: "Error rate above 1% for 5 minutes"
|
|
425
|
+
```
|
|
426
|
+
|
|
427
|
+
## Tools and Platforms
|
|
428
|
+
|
|
429
|
+
### Open Source
|
|
430
|
+
|
|
431
|
+
**Prometheus:**
|
|
432
|
+
- Time series database
|
|
433
|
+
- Pull-based metric collection
|
|
434
|
+
- Powerful query language (PromQL)
|
|
435
|
+
- Service discovery integration
|
|
436
|
+
|
|
437
|
+
**StatsD:**
|
|
438
|
+
- Simple metrics aggregation daemon
|
|
439
|
+
- Push-based (UDP)
|
|
440
|
+
- Language-agnostic
|
|
441
|
+
- Aggregates before storage
|
|
442
|
+
|
|
443
|
+
**Grafana:**
|
|
444
|
+
- Visualization and alerting platform
|
|
445
|
+
- Supports multiple data sources
|
|
446
|
+
- Rich dashboard ecosystem
|
|
447
|
+
|
|
448
|
+
### Commercial
|
|
449
|
+
|
|
450
|
+
**Datadog:**
|
|
451
|
+
- Infrastructure and application monitoring
|
|
452
|
+
- Custom metrics support
|
|
453
|
+
- AI-powered anomaly detection
|
|
454
|
+
|
|
455
|
+
**New Relic:**
|
|
456
|
+
- Full-stack observability
|
|
457
|
+
- Custom metrics
|
|
458
|
+
- Intelligent alerting
|
|
459
|
+
|
|
460
|
+
**CloudWatch (AWS):**
|
|
461
|
+
- Native AWS integration
|
|
462
|
+
- Custom metrics
|
|
463
|
+
- CloudWatch Alarms
|
|
464
|
+
|
|
465
|
+
## Metric Collection Patterns
|
|
466
|
+
|
|
467
|
+
### 1. Push vs Pull
|
|
468
|
+
|
|
469
|
+
**Pull Model (Prometheus):**
|
|
470
|
+
- Scraper polls endpoints
|
|
471
|
+
- Better for centralization
|
|
472
|
+
- Supports service discovery
|
|
473
|
+
|
|
474
|
+
**Push Model (StatsD, CloudWatch):**
|
|
475
|
+
- Services push metrics
|
|
476
|
+
- Simpler for ephemeral services
|
|
477
|
+
- Good for serverless
|
|
478
|
+
|
|
479
|
+
### 2. Export Strategies
|
|
480
|
+
|
|
481
|
+
**Direct Export:**
|
|
482
|
+
```python
|
|
483
|
+
# Export metrics directly
|
|
484
|
+
from prometheus_client import start_http_server
|
|
485
|
+
start_http_server(8000) # Exposes /metrics endpoint
|
|
486
|
+
```
|
|
487
|
+
|
|
488
|
+
**Push Gateway:**
|
|
489
|
+
```python
|
|
490
|
+
# Push to gateway (for short-lived jobs)
|
|
491
|
+
from prometheus_client import CollectorRegistry, push_to_gateway
|
|
492
|
+
|
|
493
|
+
registry = CollectorRegistry()
|
|
494
|
+
# ... register metrics ...
|
|
495
|
+
push_to_gateway('pushgateway:9091', job='batch-job', registry=registry)
|
|
496
|
+
```
|
|
497
|
+
|
|
498
|
+
## Best Practices Summary
|
|
499
|
+
|
|
500
|
+
1. **Follow naming conventions** (namespace_name_unit_suffix)
|
|
501
|
+
2. **Use appropriate metric types** (counter, gauge, histogram, summary)
|
|
502
|
+
3. **Monitor golden signals** (latency, traffic, errors, saturation)
|
|
503
|
+
4. **Manage cardinality** (avoid high-cardinality labels)
|
|
504
|
+
5. **Instrument at the right granularity** (service, endpoint, operation)
|
|
505
|
+
6. **Set appropriate retention** periods
|
|
506
|
+
7. **Create actionable alerts** based on SLOs
|
|
507
|
+
8. **Use aggregation** for long-term trends
|
|
508
|
+
9. **Combine with logs and traces** for complete picture
|
|
509
|
+
10. **Document metrics** with clear descriptions and units
|
|
510
|
+
|