tapps-agents 3.6.0__py3-none-any.whl → 3.6.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tapps_agents/__init__.py +2 -2
- tapps_agents/agents/__init__.py +22 -22
- tapps_agents/agents/analyst/__init__.py +5 -5
- tapps_agents/agents/architect/__init__.py +5 -5
- tapps_agents/agents/architect/agent.py +1033 -1033
- tapps_agents/agents/architect/pattern_detector.py +75 -75
- tapps_agents/agents/cleanup/__init__.py +7 -7
- tapps_agents/agents/cleanup/agent.py +445 -445
- tapps_agents/agents/debugger/__init__.py +7 -7
- tapps_agents/agents/debugger/agent.py +310 -310
- tapps_agents/agents/debugger/error_analyzer.py +437 -437
- tapps_agents/agents/designer/__init__.py +5 -5
- tapps_agents/agents/designer/agent.py +786 -786
- tapps_agents/agents/designer/visual_designer.py +638 -638
- tapps_agents/agents/documenter/__init__.py +7 -7
- tapps_agents/agents/documenter/agent.py +531 -531
- tapps_agents/agents/documenter/doc_generator.py +472 -472
- tapps_agents/agents/documenter/doc_validator.py +393 -393
- tapps_agents/agents/documenter/framework_doc_updater.py +493 -493
- tapps_agents/agents/enhancer/__init__.py +7 -7
- tapps_agents/agents/evaluator/__init__.py +7 -7
- tapps_agents/agents/evaluator/agent.py +443 -443
- tapps_agents/agents/evaluator/priority_evaluator.py +641 -641
- tapps_agents/agents/evaluator/quality_analyzer.py +147 -147
- tapps_agents/agents/evaluator/report_generator.py +344 -344
- tapps_agents/agents/evaluator/usage_analyzer.py +192 -192
- tapps_agents/agents/evaluator/workflow_analyzer.py +189 -189
- tapps_agents/agents/implementer/__init__.py +7 -7
- tapps_agents/agents/implementer/agent.py +798 -798
- tapps_agents/agents/implementer/auto_fix.py +1119 -1119
- tapps_agents/agents/implementer/code_generator.py +73 -73
- tapps_agents/agents/improver/__init__.py +1 -1
- tapps_agents/agents/improver/agent.py +753 -753
- tapps_agents/agents/ops/__init__.py +1 -1
- tapps_agents/agents/ops/agent.py +619 -619
- tapps_agents/agents/ops/dependency_analyzer.py +600 -600
- tapps_agents/agents/orchestrator/__init__.py +5 -5
- tapps_agents/agents/orchestrator/agent.py +522 -522
- tapps_agents/agents/planner/__init__.py +7 -7
- tapps_agents/agents/planner/agent.py +1127 -1127
- tapps_agents/agents/reviewer/__init__.py +24 -24
- tapps_agents/agents/reviewer/agent.py +3513 -3513
- tapps_agents/agents/reviewer/aggregator.py +213 -213
- tapps_agents/agents/reviewer/batch_review.py +448 -448
- tapps_agents/agents/reviewer/cache.py +443 -443
- tapps_agents/agents/reviewer/context7_enhancer.py +630 -630
- tapps_agents/agents/reviewer/context_detector.py +203 -203
- tapps_agents/agents/reviewer/docker_compose_validator.py +158 -158
- tapps_agents/agents/reviewer/dockerfile_validator.py +176 -176
- tapps_agents/agents/reviewer/error_handling.py +126 -126
- tapps_agents/agents/reviewer/feedback_generator.py +490 -490
- tapps_agents/agents/reviewer/influxdb_validator.py +316 -316
- tapps_agents/agents/reviewer/issue_tracking.py +169 -169
- tapps_agents/agents/reviewer/library_detector.py +295 -295
- tapps_agents/agents/reviewer/library_patterns.py +268 -268
- tapps_agents/agents/reviewer/maintainability_scorer.py +593 -593
- tapps_agents/agents/reviewer/metric_strategies.py +276 -276
- tapps_agents/agents/reviewer/mqtt_validator.py +160 -160
- tapps_agents/agents/reviewer/output_enhancer.py +105 -105
- tapps_agents/agents/reviewer/pattern_detector.py +241 -241
- tapps_agents/agents/reviewer/performance_scorer.py +357 -357
- tapps_agents/agents/reviewer/phased_review.py +516 -516
- tapps_agents/agents/reviewer/progressive_review.py +435 -435
- tapps_agents/agents/reviewer/react_scorer.py +331 -331
- tapps_agents/agents/reviewer/score_constants.py +228 -228
- tapps_agents/agents/reviewer/score_validator.py +507 -507
- tapps_agents/agents/reviewer/scorer_registry.py +373 -373
- tapps_agents/agents/reviewer/service_discovery.py +534 -534
- tapps_agents/agents/reviewer/tools/parallel_executor.py +581 -581
- tapps_agents/agents/reviewer/tools/ruff_grouping.py +250 -250
- tapps_agents/agents/reviewer/tools/scoped_mypy.py +284 -284
- tapps_agents/agents/reviewer/typescript_scorer.py +1142 -1142
- tapps_agents/agents/reviewer/validation.py +208 -208
- tapps_agents/agents/reviewer/websocket_validator.py +132 -132
- tapps_agents/agents/tester/__init__.py +7 -7
- tapps_agents/agents/tester/accessibility_auditor.py +309 -309
- tapps_agents/agents/tester/agent.py +1080 -1080
- tapps_agents/agents/tester/batch_generator.py +54 -54
- tapps_agents/agents/tester/context_learner.py +51 -51
- tapps_agents/agents/tester/coverage_analyzer.py +386 -386
- tapps_agents/agents/tester/coverage_test_generator.py +290 -290
- tapps_agents/agents/tester/debug_enhancer.py +238 -238
- tapps_agents/agents/tester/device_emulator.py +241 -241
- tapps_agents/agents/tester/integration_generator.py +62 -62
- tapps_agents/agents/tester/network_recorder.py +300 -300
- tapps_agents/agents/tester/performance_monitor.py +320 -320
- tapps_agents/agents/tester/test_fixer.py +316 -316
- tapps_agents/agents/tester/test_generator.py +632 -632
- tapps_agents/agents/tester/trace_manager.py +234 -234
- tapps_agents/agents/tester/visual_regression.py +291 -291
- tapps_agents/analysis/pattern_detector.py +36 -36
- tapps_agents/beads/hydration.py +213 -213
- tapps_agents/beads/parse.py +32 -32
- tapps_agents/beads/specs.py +206 -206
- tapps_agents/cli/__init__.py +9 -9
- tapps_agents/cli/__main__.py +8 -8
- tapps_agents/cli/base.py +478 -478
- tapps_agents/cli/command_classifier.py +72 -72
- tapps_agents/cli/commands/__init__.py +2 -2
- tapps_agents/cli/commands/analyst.py +173 -173
- tapps_agents/cli/commands/architect.py +109 -109
- tapps_agents/cli/commands/cleanup_agent.py +92 -92
- tapps_agents/cli/commands/common.py +126 -126
- tapps_agents/cli/commands/debugger.py +90 -90
- tapps_agents/cli/commands/designer.py +112 -112
- tapps_agents/cli/commands/documenter.py +136 -136
- tapps_agents/cli/commands/enhancer.py +110 -110
- tapps_agents/cli/commands/evaluator.py +255 -255
- tapps_agents/cli/commands/implementer.py +301 -301
- tapps_agents/cli/commands/improver.py +91 -91
- tapps_agents/cli/commands/knowledge.py +111 -111
- tapps_agents/cli/commands/learning.py +172 -172
- tapps_agents/cli/commands/observability.py +283 -283
- tapps_agents/cli/commands/ops.py +135 -135
- tapps_agents/cli/commands/orchestrator.py +116 -116
- tapps_agents/cli/commands/planner.py +237 -237
- tapps_agents/cli/commands/reviewer.py +1872 -1872
- tapps_agents/cli/commands/status.py +285 -285
- tapps_agents/cli/commands/task.py +227 -227
- tapps_agents/cli/commands/tester.py +191 -191
- tapps_agents/cli/feedback.py +936 -936
- tapps_agents/cli/formatters.py +608 -608
- tapps_agents/cli/help/__init__.py +7 -7
- tapps_agents/cli/help/static_help.py +425 -425
- tapps_agents/cli/network_detection.py +110 -110
- tapps_agents/cli/output_compactor.py +274 -274
- tapps_agents/cli/parsers/__init__.py +2 -2
- tapps_agents/cli/parsers/analyst.py +186 -186
- tapps_agents/cli/parsers/architect.py +167 -167
- tapps_agents/cli/parsers/cleanup_agent.py +228 -228
- tapps_agents/cli/parsers/debugger.py +116 -116
- tapps_agents/cli/parsers/designer.py +182 -182
- tapps_agents/cli/parsers/documenter.py +134 -134
- tapps_agents/cli/parsers/enhancer.py +113 -113
- tapps_agents/cli/parsers/evaluator.py +213 -213
- tapps_agents/cli/parsers/implementer.py +168 -168
- tapps_agents/cli/parsers/improver.py +132 -132
- tapps_agents/cli/parsers/ops.py +159 -159
- tapps_agents/cli/parsers/orchestrator.py +98 -98
- tapps_agents/cli/parsers/planner.py +145 -145
- tapps_agents/cli/parsers/reviewer.py +462 -462
- tapps_agents/cli/parsers/tester.py +124 -124
- tapps_agents/cli/progress_heartbeat.py +254 -254
- tapps_agents/cli/streaming_progress.py +336 -336
- tapps_agents/cli/utils/__init__.py +6 -6
- tapps_agents/cli/utils/agent_lifecycle.py +48 -48
- tapps_agents/cli/utils/error_formatter.py +82 -82
- tapps_agents/cli/utils/error_recovery.py +188 -188
- tapps_agents/cli/utils/output_handler.py +59 -59
- tapps_agents/cli/utils/prompt_enhancer.py +319 -319
- tapps_agents/cli/validators/__init__.py +9 -9
- tapps_agents/cli/validators/command_validator.py +81 -81
- tapps_agents/context7/__init__.py +112 -112
- tapps_agents/context7/agent_integration.py +869 -869
- tapps_agents/context7/analytics.py +382 -382
- tapps_agents/context7/analytics_dashboard.py +299 -299
- tapps_agents/context7/async_cache.py +681 -681
- tapps_agents/context7/backup_client.py +958 -958
- tapps_agents/context7/cache_locking.py +194 -194
- tapps_agents/context7/cache_metadata.py +214 -214
- tapps_agents/context7/cache_prewarm.py +488 -488
- tapps_agents/context7/cache_structure.py +168 -168
- tapps_agents/context7/cache_warming.py +604 -604
- tapps_agents/context7/circuit_breaker.py +376 -376
- tapps_agents/context7/cleanup.py +461 -461
- tapps_agents/context7/commands.py +858 -858
- tapps_agents/context7/credential_validation.py +276 -276
- tapps_agents/context7/cross_reference_resolver.py +168 -168
- tapps_agents/context7/cross_references.py +424 -424
- tapps_agents/context7/doc_manager.py +225 -225
- tapps_agents/context7/fuzzy_matcher.py +369 -369
- tapps_agents/context7/kb_cache.py +404 -404
- tapps_agents/context7/language_detector.py +219 -219
- tapps_agents/context7/library_detector.py +725 -725
- tapps_agents/context7/lookup.py +738 -738
- tapps_agents/context7/metadata.py +258 -258
- tapps_agents/context7/refresh_queue.py +300 -300
- tapps_agents/context7/security.py +373 -373
- tapps_agents/context7/staleness_policies.py +278 -278
- tapps_agents/context7/tiles_integration.py +47 -47
- tapps_agents/continuous_bug_fix/__init__.py +20 -20
- tapps_agents/continuous_bug_fix/bug_finder.py +306 -306
- tapps_agents/continuous_bug_fix/bug_fix_coordinator.py +177 -177
- tapps_agents/continuous_bug_fix/commit_manager.py +178 -178
- tapps_agents/continuous_bug_fix/continuous_bug_fixer.py +322 -322
- tapps_agents/continuous_bug_fix/proactive_bug_finder.py +285 -285
- tapps_agents/core/__init__.py +298 -298
- tapps_agents/core/adaptive_cache_config.py +432 -432
- tapps_agents/core/agent_base.py +647 -647
- tapps_agents/core/agent_cache.py +466 -466
- tapps_agents/core/agent_learning.py +1865 -1865
- tapps_agents/core/analytics_dashboard.py +563 -563
- tapps_agents/core/analytics_enhancements.py +597 -597
- tapps_agents/core/anonymization.py +274 -274
- tapps_agents/core/ast_parser.py +228 -228
- tapps_agents/core/async_file_ops.py +402 -402
- tapps_agents/core/best_practice_consultant.py +299 -299
- tapps_agents/core/brownfield_analyzer.py +299 -299
- tapps_agents/core/brownfield_review.py +541 -541
- tapps_agents/core/browser_controller.py +513 -513
- tapps_agents/core/capability_registry.py +418 -418
- tapps_agents/core/change_impact_analyzer.py +190 -190
- tapps_agents/core/checkpoint_manager.py +377 -377
- tapps_agents/core/code_generator.py +329 -329
- tapps_agents/core/code_validator.py +276 -276
- tapps_agents/core/command_registry.py +327 -327
- tapps_agents/core/context_gathering/__init__.py +2 -2
- tapps_agents/core/context_gathering/repository_explorer.py +28 -28
- tapps_agents/core/context_intelligence/__init__.py +2 -2
- tapps_agents/core/context_intelligence/relevance_scorer.py +24 -24
- tapps_agents/core/context_intelligence/token_budget_manager.py +27 -27
- tapps_agents/core/context_manager.py +240 -240
- tapps_agents/core/cursor_feedback_monitor.py +146 -146
- tapps_agents/core/cursor_verification.py +290 -290
- tapps_agents/core/customization_loader.py +280 -280
- tapps_agents/core/customization_schema.py +260 -260
- tapps_agents/core/customization_template.py +238 -238
- tapps_agents/core/debug_logger.py +124 -124
- tapps_agents/core/design_validator.py +298 -298
- tapps_agents/core/diagram_generator.py +226 -226
- tapps_agents/core/docker_utils.py +232 -232
- tapps_agents/core/document_generator.py +617 -617
- tapps_agents/core/domain_detector.py +30 -30
- tapps_agents/core/error_envelope.py +454 -454
- tapps_agents/core/error_handler.py +270 -270
- tapps_agents/core/estimation_tracker.py +189 -189
- tapps_agents/core/eval_prompt_engine.py +116 -116
- tapps_agents/core/evaluation_base.py +119 -119
- tapps_agents/core/evaluation_models.py +320 -320
- tapps_agents/core/evaluation_orchestrator.py +225 -225
- tapps_agents/core/evaluators/__init__.py +7 -7
- tapps_agents/core/evaluators/architectural_evaluator.py +205 -205
- tapps_agents/core/evaluators/behavioral_evaluator.py +160 -160
- tapps_agents/core/evaluators/performance_profile_evaluator.py +160 -160
- tapps_agents/core/evaluators/security_posture_evaluator.py +148 -148
- tapps_agents/core/evaluators/spec_compliance_evaluator.py +181 -181
- tapps_agents/core/exceptions.py +107 -107
- tapps_agents/core/expert_config_generator.py +293 -293
- tapps_agents/core/export_schema.py +202 -202
- tapps_agents/core/external_feedback_models.py +102 -102
- tapps_agents/core/external_feedback_storage.py +213 -213
- tapps_agents/core/fallback_strategy.py +314 -314
- tapps_agents/core/feedback_analyzer.py +162 -162
- tapps_agents/core/feedback_collector.py +178 -178
- tapps_agents/core/git_operations.py +445 -445
- tapps_agents/core/hardware_profiler.py +151 -151
- tapps_agents/core/instructions.py +324 -324
- tapps_agents/core/io_guardrails.py +69 -69
- tapps_agents/core/issue_manifest.py +249 -249
- tapps_agents/core/issue_schema.py +139 -139
- tapps_agents/core/json_utils.py +128 -128
- tapps_agents/core/knowledge_graph.py +446 -446
- tapps_agents/core/language_detector.py +296 -296
- tapps_agents/core/learning_confidence.py +242 -242
- tapps_agents/core/learning_dashboard.py +246 -246
- tapps_agents/core/learning_decision.py +384 -384
- tapps_agents/core/learning_explainability.py +578 -578
- tapps_agents/core/learning_export.py +287 -287
- tapps_agents/core/learning_integration.py +228 -228
- tapps_agents/core/llm_behavior.py +232 -232
- tapps_agents/core/long_duration_support.py +786 -786
- tapps_agents/core/mcp_setup.py +106 -106
- tapps_agents/core/memory_integration.py +396 -396
- tapps_agents/core/meta_learning.py +666 -666
- tapps_agents/core/module_path_sanitizer.py +199 -199
- tapps_agents/core/multi_agent_orchestrator.py +382 -382
- tapps_agents/core/network_errors.py +125 -125
- tapps_agents/core/nfr_validator.py +336 -336
- tapps_agents/core/offline_mode.py +158 -158
- tapps_agents/core/output_contracts.py +300 -300
- tapps_agents/core/output_formatter.py +300 -300
- tapps_agents/core/path_normalizer.py +174 -174
- tapps_agents/core/path_validator.py +322 -322
- tapps_agents/core/pattern_library.py +250 -250
- tapps_agents/core/performance_benchmark.py +301 -301
- tapps_agents/core/performance_monitor.py +184 -184
- tapps_agents/core/playwright_mcp_controller.py +771 -771
- tapps_agents/core/policy_loader.py +135 -135
- tapps_agents/core/progress.py +166 -166
- tapps_agents/core/project_profile.py +354 -354
- tapps_agents/core/project_type_detector.py +454 -454
- tapps_agents/core/prompt_base.py +223 -223
- tapps_agents/core/prompt_learning/__init__.py +2 -2
- tapps_agents/core/prompt_learning/learning_loop.py +24 -24
- tapps_agents/core/prompt_learning/project_prompt_store.py +25 -25
- tapps_agents/core/prompt_learning/skills_prompt_analyzer.py +35 -35
- tapps_agents/core/prompt_optimization/__init__.py +6 -6
- tapps_agents/core/prompt_optimization/ab_tester.py +114 -114
- tapps_agents/core/prompt_optimization/correlation_analyzer.py +160 -160
- tapps_agents/core/prompt_optimization/progressive_refiner.py +129 -129
- tapps_agents/core/prompt_optimization/prompt_library.py +37 -37
- tapps_agents/core/requirements_evaluator.py +431 -431
- tapps_agents/core/resource_aware_executor.py +449 -449
- tapps_agents/core/resource_monitor.py +343 -343
- tapps_agents/core/resume_handler.py +298 -298
- tapps_agents/core/retry_handler.py +197 -197
- tapps_agents/core/review_checklists.py +479 -479
- tapps_agents/core/role_loader.py +201 -201
- tapps_agents/core/role_template_loader.py +201 -201
- tapps_agents/core/runtime_mode.py +60 -60
- tapps_agents/core/security_scanner.py +342 -342
- tapps_agents/core/skill_agent_registry.py +194 -194
- tapps_agents/core/skill_integration.py +208 -208
- tapps_agents/core/skill_loader.py +492 -492
- tapps_agents/core/skill_template.py +341 -341
- tapps_agents/core/skill_validator.py +478 -478
- tapps_agents/core/stack_analyzer.py +35 -35
- tapps_agents/core/startup.py +174 -174
- tapps_agents/core/storage_manager.py +397 -397
- tapps_agents/core/storage_models.py +166 -166
- tapps_agents/core/story_evaluator.py +410 -410
- tapps_agents/core/subprocess_utils.py +170 -170
- tapps_agents/core/task_duration.py +296 -296
- tapps_agents/core/task_memory.py +582 -582
- tapps_agents/core/task_state.py +226 -226
- tapps_agents/core/tech_stack_priorities.py +208 -208
- tapps_agents/core/temp_directory.py +194 -194
- tapps_agents/core/template_merger.py +600 -600
- tapps_agents/core/template_selector.py +280 -280
- tapps_agents/core/test_generator.py +286 -286
- tapps_agents/core/tiered_context.py +253 -253
- tapps_agents/core/token_monitor.py +345 -345
- tapps_agents/core/traceability.py +254 -254
- tapps_agents/core/trajectory_tracker.py +50 -50
- tapps_agents/core/unicode_safe.py +143 -143
- tapps_agents/core/unified_cache_config.py +170 -170
- tapps_agents/core/unified_state.py +324 -324
- tapps_agents/core/validate_cursor_setup.py +237 -237
- tapps_agents/core/validation_registry.py +136 -136
- tapps_agents/core/validators/__init__.py +4 -4
- tapps_agents/core/validators/python_validator.py +87 -87
- tapps_agents/core/verification_agent.py +90 -90
- tapps_agents/core/visual_feedback.py +644 -644
- tapps_agents/core/workflow_validator.py +197 -197
- tapps_agents/core/worktree.py +367 -367
- tapps_agents/docker/__init__.py +10 -10
- tapps_agents/docker/analyzer.py +186 -186
- tapps_agents/docker/debugger.py +229 -229
- tapps_agents/docker/error_patterns.py +216 -216
- tapps_agents/epic/__init__.py +22 -22
- tapps_agents/epic/beads_sync.py +115 -115
- tapps_agents/epic/markdown_sync.py +105 -105
- tapps_agents/epic/models.py +96 -96
- tapps_agents/experts/__init__.py +163 -163
- tapps_agents/experts/agent_integration.py +243 -243
- tapps_agents/experts/auto_generator.py +331 -331
- tapps_agents/experts/base_expert.py +536 -536
- tapps_agents/experts/builtin_registry.py +261 -261
- tapps_agents/experts/business_metrics.py +565 -565
- tapps_agents/experts/cache.py +266 -266
- tapps_agents/experts/confidence_breakdown.py +306 -306
- tapps_agents/experts/confidence_calculator.py +336 -336
- tapps_agents/experts/confidence_metrics.py +236 -236
- tapps_agents/experts/domain_config.py +311 -311
- tapps_agents/experts/domain_detector.py +550 -550
- tapps_agents/experts/domain_utils.py +84 -84
- tapps_agents/experts/expert_config.py +113 -113
- tapps_agents/experts/expert_engine.py +465 -465
- tapps_agents/experts/expert_registry.py +744 -744
- tapps_agents/experts/expert_synthesizer.py +70 -70
- tapps_agents/experts/governance.py +197 -197
- tapps_agents/experts/history_logger.py +312 -312
- tapps_agents/experts/knowledge/README.md +180 -180
- tapps_agents/experts/knowledge/accessibility/accessible-forms.md +331 -331
- tapps_agents/experts/knowledge/accessibility/aria-patterns.md +344 -344
- tapps_agents/experts/knowledge/accessibility/color-contrast.md +285 -285
- tapps_agents/experts/knowledge/accessibility/keyboard-navigation.md +332 -332
- tapps_agents/experts/knowledge/accessibility/screen-readers.md +282 -282
- tapps_agents/experts/knowledge/accessibility/semantic-html.md +355 -355
- tapps_agents/experts/knowledge/accessibility/testing-accessibility.md +369 -369
- tapps_agents/experts/knowledge/accessibility/wcag-2.1.md +296 -296
- tapps_agents/experts/knowledge/accessibility/wcag-2.2.md +211 -211
- tapps_agents/experts/knowledge/agent-learning/best-practices.md +715 -715
- tapps_agents/experts/knowledge/agent-learning/pattern-extraction.md +282 -282
- tapps_agents/experts/knowledge/agent-learning/prompt-optimization.md +320 -320
- tapps_agents/experts/knowledge/ai-frameworks/model-optimization.md +90 -90
- tapps_agents/experts/knowledge/ai-frameworks/openvino-patterns.md +260 -260
- tapps_agents/experts/knowledge/api-design-integration/api-gateway-patterns.md +309 -309
- tapps_agents/experts/knowledge/api-design-integration/api-security-patterns.md +521 -521
- tapps_agents/experts/knowledge/api-design-integration/api-versioning.md +421 -421
- tapps_agents/experts/knowledge/api-design-integration/async-protocol-patterns.md +61 -61
- tapps_agents/experts/knowledge/api-design-integration/contract-testing.md +221 -221
- tapps_agents/experts/knowledge/api-design-integration/external-api-integration.md +489 -489
- tapps_agents/experts/knowledge/api-design-integration/fastapi-patterns.md +360 -360
- tapps_agents/experts/knowledge/api-design-integration/fastapi-testing.md +262 -262
- tapps_agents/experts/knowledge/api-design-integration/graphql-patterns.md +582 -582
- tapps_agents/experts/knowledge/api-design-integration/grpc-best-practices.md +499 -499
- tapps_agents/experts/knowledge/api-design-integration/mqtt-patterns.md +455 -455
- tapps_agents/experts/knowledge/api-design-integration/rate-limiting.md +507 -507
- tapps_agents/experts/knowledge/api-design-integration/restful-api-design.md +618 -618
- tapps_agents/experts/knowledge/api-design-integration/websocket-patterns.md +480 -480
- tapps_agents/experts/knowledge/cloud-infrastructure/cloud-native-patterns.md +175 -175
- tapps_agents/experts/knowledge/cloud-infrastructure/container-health-checks.md +261 -261
- tapps_agents/experts/knowledge/cloud-infrastructure/containerization.md +222 -222
- tapps_agents/experts/knowledge/cloud-infrastructure/cost-optimization.md +122 -122
- tapps_agents/experts/knowledge/cloud-infrastructure/disaster-recovery.md +153 -153
- tapps_agents/experts/knowledge/cloud-infrastructure/dockerfile-patterns.md +285 -285
- tapps_agents/experts/knowledge/cloud-infrastructure/infrastructure-as-code.md +187 -187
- tapps_agents/experts/knowledge/cloud-infrastructure/kubernetes-patterns.md +253 -253
- tapps_agents/experts/knowledge/cloud-infrastructure/multi-cloud-strategies.md +155 -155
- tapps_agents/experts/knowledge/cloud-infrastructure/serverless-architecture.md +200 -200
- tapps_agents/experts/knowledge/code-quality-analysis/README.md +16 -16
- tapps_agents/experts/knowledge/code-quality-analysis/code-metrics.md +137 -137
- tapps_agents/experts/knowledge/code-quality-analysis/complexity-analysis.md +181 -181
- tapps_agents/experts/knowledge/code-quality-analysis/technical-debt-patterns.md +191 -191
- tapps_agents/experts/knowledge/data-privacy-compliance/anonymization.md +313 -313
- tapps_agents/experts/knowledge/data-privacy-compliance/ccpa.md +255 -255
- tapps_agents/experts/knowledge/data-privacy-compliance/consent-management.md +282 -282
- tapps_agents/experts/knowledge/data-privacy-compliance/data-minimization.md +275 -275
- tapps_agents/experts/knowledge/data-privacy-compliance/data-retention.md +297 -297
- tapps_agents/experts/knowledge/data-privacy-compliance/data-subject-rights.md +383 -383
- tapps_agents/experts/knowledge/data-privacy-compliance/encryption-privacy.md +285 -285
- tapps_agents/experts/knowledge/data-privacy-compliance/gdpr.md +344 -344
- tapps_agents/experts/knowledge/data-privacy-compliance/hipaa.md +385 -385
- tapps_agents/experts/knowledge/data-privacy-compliance/privacy-by-design.md +280 -280
- tapps_agents/experts/knowledge/database-data-management/acid-vs-cap.md +164 -164
- tapps_agents/experts/knowledge/database-data-management/backup-and-recovery.md +182 -182
- tapps_agents/experts/knowledge/database-data-management/data-modeling.md +172 -172
- tapps_agents/experts/knowledge/database-data-management/database-design.md +187 -187
- tapps_agents/experts/knowledge/database-data-management/flux-query-optimization.md +342 -342
- tapps_agents/experts/knowledge/database-data-management/influxdb-connection-patterns.md +432 -432
- tapps_agents/experts/knowledge/database-data-management/influxdb-patterns.md +442 -442
- tapps_agents/experts/knowledge/database-data-management/migration-strategies.md +216 -216
- tapps_agents/experts/knowledge/database-data-management/nosql-patterns.md +259 -259
- tapps_agents/experts/knowledge/database-data-management/scalability-patterns.md +184 -184
- tapps_agents/experts/knowledge/database-data-management/sql-optimization.md +175 -175
- tapps_agents/experts/knowledge/database-data-management/time-series-modeling.md +444 -444
- tapps_agents/experts/knowledge/development-workflow/README.md +16 -16
- tapps_agents/experts/knowledge/development-workflow/automation-best-practices.md +216 -216
- tapps_agents/experts/knowledge/development-workflow/build-strategies.md +198 -198
- tapps_agents/experts/knowledge/development-workflow/deployment-patterns.md +205 -205
- tapps_agents/experts/knowledge/development-workflow/git-workflows.md +205 -205
- tapps_agents/experts/knowledge/documentation-knowledge-management/README.md +16 -16
- tapps_agents/experts/knowledge/documentation-knowledge-management/api-documentation-patterns.md +231 -231
- tapps_agents/experts/knowledge/documentation-knowledge-management/documentation-standards.md +191 -191
- tapps_agents/experts/knowledge/documentation-knowledge-management/knowledge-management.md +171 -171
- tapps_agents/experts/knowledge/documentation-knowledge-management/technical-writing-guide.md +192 -192
- tapps_agents/experts/knowledge/observability-monitoring/alerting-patterns.md +461 -461
- tapps_agents/experts/knowledge/observability-monitoring/apm-tools.md +459 -459
- tapps_agents/experts/knowledge/observability-monitoring/distributed-tracing.md +367 -367
- tapps_agents/experts/knowledge/observability-monitoring/logging-strategies.md +478 -478
- tapps_agents/experts/knowledge/observability-monitoring/metrics-and-monitoring.md +510 -510
- tapps_agents/experts/knowledge/observability-monitoring/observability-best-practices.md +492 -492
- tapps_agents/experts/knowledge/observability-monitoring/open-telemetry.md +573 -573
- tapps_agents/experts/knowledge/observability-monitoring/slo-sli-sla.md +419 -419
- tapps_agents/experts/knowledge/performance/anti-patterns.md +284 -284
- tapps_agents/experts/knowledge/performance/api-performance.md +256 -256
- tapps_agents/experts/knowledge/performance/caching.md +327 -327
- tapps_agents/experts/knowledge/performance/database-performance.md +252 -252
- tapps_agents/experts/knowledge/performance/optimization-patterns.md +327 -327
- tapps_agents/experts/knowledge/performance/profiling.md +297 -297
- tapps_agents/experts/knowledge/performance/resource-management.md +293 -293
- tapps_agents/experts/knowledge/performance/scalability.md +306 -306
- tapps_agents/experts/knowledge/security/owasp-top10.md +209 -209
- tapps_agents/experts/knowledge/security/secure-coding-practices.md +207 -207
- tapps_agents/experts/knowledge/security/threat-modeling.md +220 -220
- tapps_agents/experts/knowledge/security/vulnerability-patterns.md +342 -342
- tapps_agents/experts/knowledge/software-architecture/docker-compose-patterns.md +314 -314
- tapps_agents/experts/knowledge/software-architecture/microservices-patterns.md +379 -379
- tapps_agents/experts/knowledge/software-architecture/service-communication.md +316 -316
- tapps_agents/experts/knowledge/testing/best-practices.md +310 -310
- tapps_agents/experts/knowledge/testing/coverage-analysis.md +293 -293
- tapps_agents/experts/knowledge/testing/mocking.md +256 -256
- tapps_agents/experts/knowledge/testing/test-automation.md +276 -276
- tapps_agents/experts/knowledge/testing/test-data.md +271 -271
- tapps_agents/experts/knowledge/testing/test-design-patterns.md +280 -280
- tapps_agents/experts/knowledge/testing/test-maintenance.md +236 -236
- tapps_agents/experts/knowledge/testing/test-strategies.md +311 -311
- tapps_agents/experts/knowledge/user-experience/information-architecture.md +325 -325
- tapps_agents/experts/knowledge/user-experience/interaction-design.md +363 -363
- tapps_agents/experts/knowledge/user-experience/prototyping.md +293 -293
- tapps_agents/experts/knowledge/user-experience/usability-heuristics.md +337 -337
- tapps_agents/experts/knowledge/user-experience/usability-testing.md +311 -311
- tapps_agents/experts/knowledge/user-experience/user-journeys.md +296 -296
- tapps_agents/experts/knowledge/user-experience/user-research.md +373 -373
- tapps_agents/experts/knowledge/user-experience/ux-principles.md +340 -340
- tapps_agents/experts/knowledge_freshness.py +321 -321
- tapps_agents/experts/knowledge_ingestion.py +438 -438
- tapps_agents/experts/knowledge_need_detector.py +93 -93
- tapps_agents/experts/knowledge_validator.py +382 -382
- tapps_agents/experts/observability.py +440 -440
- tapps_agents/experts/passive_notifier.py +238 -238
- tapps_agents/experts/proactive_orchestrator.py +32 -32
- tapps_agents/experts/rag_chunker.py +205 -205
- tapps_agents/experts/rag_embedder.py +152 -152
- tapps_agents/experts/rag_evaluation.py +299 -299
- tapps_agents/experts/rag_index.py +303 -303
- tapps_agents/experts/rag_metrics.py +293 -293
- tapps_agents/experts/rag_safety.py +263 -263
- tapps_agents/experts/report_generator.py +296 -296
- tapps_agents/experts/setup_wizard.py +441 -441
- tapps_agents/experts/simple_rag.py +431 -431
- tapps_agents/experts/vector_rag.py +354 -354
- tapps_agents/experts/weight_distributor.py +304 -304
- tapps_agents/health/__init__.py +24 -24
- tapps_agents/health/base.py +75 -75
- tapps_agents/health/checks/__init__.py +22 -22
- tapps_agents/health/checks/automation.py +127 -127
- tapps_agents/health/checks/context7_cache.py +210 -210
- tapps_agents/health/checks/environment.py +116 -116
- tapps_agents/health/checks/execution.py +170 -170
- tapps_agents/health/checks/knowledge_base.py +187 -187
- tapps_agents/health/checks/outcomes.backup_20260204_064058.py +324 -0
- tapps_agents/health/checks/outcomes.backup_20260204_064256.py +324 -0
- tapps_agents/health/checks/outcomes.backup_20260204_064600.py +324 -0
- tapps_agents/health/checks/outcomes.py +324 -324
- tapps_agents/health/collector.py +280 -280
- tapps_agents/health/dashboard.py +137 -137
- tapps_agents/health/metrics.py +151 -151
- tapps_agents/health/registry.py +166 -166
- tapps_agents/hooks/__init__.py +33 -33
- tapps_agents/hooks/config.py +140 -140
- tapps_agents/hooks/events.py +135 -135
- tapps_agents/hooks/executor.py +128 -128
- tapps_agents/hooks/manager.py +143 -143
- tapps_agents/integration/__init__.py +8 -8
- tapps_agents/integration/service_integrator.py +121 -121
- tapps_agents/integrations/__init__.py +10 -10
- tapps_agents/integrations/clawdbot.py +525 -525
- tapps_agents/integrations/memory_bridge.py +356 -356
- tapps_agents/mcp/__init__.py +18 -18
- tapps_agents/mcp/gateway.py +112 -112
- tapps_agents/mcp/servers/__init__.py +13 -13
- tapps_agents/mcp/servers/analysis.py +204 -204
- tapps_agents/mcp/servers/context7.py +198 -198
- tapps_agents/mcp/servers/filesystem.py +218 -218
- tapps_agents/mcp/servers/git.py +201 -201
- tapps_agents/mcp/tool_registry.py +115 -115
- tapps_agents/quality/__init__.py +54 -54
- tapps_agents/quality/coverage_analyzer.py +379 -379
- tapps_agents/quality/enforcement.py +82 -82
- tapps_agents/quality/gates/__init__.py +37 -37
- tapps_agents/quality/gates/approval_gate.py +255 -255
- tapps_agents/quality/gates/base.py +84 -84
- tapps_agents/quality/gates/exceptions.py +43 -43
- tapps_agents/quality/gates/policy_gate.py +195 -195
- tapps_agents/quality/gates/registry.py +239 -239
- tapps_agents/quality/gates/security_gate.py +156 -156
- tapps_agents/quality/quality_gates.py +369 -369
- tapps_agents/quality/secret_scanner.py +335 -335
- tapps_agents/resources/__init__.py +5 -0
- tapps_agents/resources/claude/__init__.py +1 -0
- tapps_agents/resources/claude/commands/README.md +156 -0
- tapps_agents/resources/claude/commands/__init__.py +1 -0
- tapps_agents/resources/claude/commands/build-fix.md +22 -0
- tapps_agents/resources/claude/commands/build.md +77 -0
- tapps_agents/resources/claude/commands/debug.md +53 -0
- tapps_agents/resources/claude/commands/design.md +68 -0
- tapps_agents/resources/claude/commands/docs.md +53 -0
- tapps_agents/resources/claude/commands/e2e.md +22 -0
- tapps_agents/resources/claude/commands/fix.md +54 -0
- tapps_agents/resources/claude/commands/implement.md +53 -0
- tapps_agents/resources/claude/commands/improve.md +53 -0
- tapps_agents/resources/claude/commands/library-docs.md +64 -0
- tapps_agents/resources/claude/commands/lint.md +52 -0
- tapps_agents/resources/claude/commands/plan.md +65 -0
- tapps_agents/resources/claude/commands/refactor-clean.md +21 -0
- tapps_agents/resources/claude/commands/refactor.md +55 -0
- tapps_agents/resources/claude/commands/review.md +67 -0
- tapps_agents/resources/claude/commands/score.md +60 -0
- tapps_agents/resources/claude/commands/security-review.md +22 -0
- tapps_agents/resources/claude/commands/security-scan.md +54 -0
- tapps_agents/resources/claude/commands/tdd.md +24 -0
- tapps_agents/resources/claude/commands/test-coverage.md +21 -0
- tapps_agents/resources/claude/commands/test.md +54 -0
- tapps_agents/resources/claude/commands/update-codemaps.md +20 -0
- tapps_agents/resources/claude/commands/update-docs.md +21 -0
- tapps_agents/resources/claude/skills/__init__.py +1 -0
- tapps_agents/resources/claude/skills/analyst/SKILL.md +272 -0
- tapps_agents/resources/claude/skills/analyst/__init__.py +1 -0
- tapps_agents/resources/claude/skills/architect/SKILL.md +282 -0
- tapps_agents/resources/claude/skills/architect/__init__.py +1 -0
- tapps_agents/resources/claude/skills/backend-patterns/SKILL.md +30 -0
- tapps_agents/resources/claude/skills/backend-patterns/__init__.py +1 -0
- tapps_agents/resources/claude/skills/coding-standards/SKILL.md +29 -0
- tapps_agents/resources/claude/skills/coding-standards/__init__.py +1 -0
- tapps_agents/resources/claude/skills/debugger/SKILL.md +203 -0
- tapps_agents/resources/claude/skills/debugger/__init__.py +1 -0
- tapps_agents/resources/claude/skills/designer/SKILL.md +243 -0
- tapps_agents/resources/claude/skills/designer/__init__.py +1 -0
- tapps_agents/resources/claude/skills/documenter/SKILL.md +252 -0
- tapps_agents/resources/claude/skills/documenter/__init__.py +1 -0
- tapps_agents/resources/claude/skills/enhancer/SKILL.md +307 -0
- tapps_agents/resources/claude/skills/enhancer/__init__.py +1 -0
- tapps_agents/resources/claude/skills/evaluator/SKILL.md +204 -0
- tapps_agents/resources/claude/skills/evaluator/__init__.py +1 -0
- tapps_agents/resources/claude/skills/frontend-patterns/SKILL.md +29 -0
- tapps_agents/resources/claude/skills/frontend-patterns/__init__.py +1 -0
- tapps_agents/resources/claude/skills/implementer/SKILL.md +188 -0
- tapps_agents/resources/claude/skills/implementer/__init__.py +1 -0
- tapps_agents/resources/claude/skills/improver/SKILL.md +218 -0
- tapps_agents/resources/claude/skills/improver/__init__.py +1 -0
- tapps_agents/resources/claude/skills/ops/SKILL.md +281 -0
- tapps_agents/resources/claude/skills/ops/__init__.py +1 -0
- tapps_agents/resources/claude/skills/orchestrator/SKILL.md +390 -0
- tapps_agents/resources/claude/skills/orchestrator/__init__.py +1 -0
- tapps_agents/resources/claude/skills/planner/SKILL.md +254 -0
- tapps_agents/resources/claude/skills/planner/__init__.py +1 -0
- tapps_agents/resources/claude/skills/reviewer/SKILL.md +434 -0
- tapps_agents/resources/claude/skills/reviewer/__init__.py +1 -0
- tapps_agents/resources/claude/skills/security-review/SKILL.md +31 -0
- tapps_agents/resources/claude/skills/security-review/__init__.py +1 -0
- tapps_agents/resources/claude/skills/simple-mode/SKILL.md +695 -0
- tapps_agents/resources/claude/skills/simple-mode/__init__.py +1 -0
- tapps_agents/resources/claude/skills/tester/SKILL.md +219 -0
- tapps_agents/resources/claude/skills/tester/__init__.py +1 -0
- tapps_agents/resources/cursor/.cursorignore +35 -0
- tapps_agents/resources/cursor/__init__.py +1 -0
- tapps_agents/resources/cursor/commands/__init__.py +1 -0
- tapps_agents/resources/cursor/commands/build-fix.md +11 -0
- tapps_agents/resources/cursor/commands/build.md +11 -0
- tapps_agents/resources/cursor/commands/e2e.md +11 -0
- tapps_agents/resources/cursor/commands/fix.md +11 -0
- tapps_agents/resources/cursor/commands/refactor-clean.md +11 -0
- tapps_agents/resources/cursor/commands/review.md +11 -0
- tapps_agents/resources/cursor/commands/security-review.md +11 -0
- tapps_agents/resources/cursor/commands/tdd.md +11 -0
- tapps_agents/resources/cursor/commands/test-coverage.md +11 -0
- tapps_agents/resources/cursor/commands/test.md +11 -0
- tapps_agents/resources/cursor/commands/update-codemaps.md +10 -0
- tapps_agents/resources/cursor/commands/update-docs.md +11 -0
- tapps_agents/resources/cursor/rules/__init__.py +1 -0
- tapps_agents/resources/cursor/rules/agent-capabilities.mdc +687 -0
- tapps_agents/resources/cursor/rules/coding-style.mdc +31 -0
- tapps_agents/resources/cursor/rules/command-reference.mdc +2081 -0
- tapps_agents/resources/cursor/rules/cursor-mode-usage.mdc +125 -0
- tapps_agents/resources/cursor/rules/git-workflow.mdc +29 -0
- tapps_agents/resources/cursor/rules/performance.mdc +29 -0
- tapps_agents/resources/cursor/rules/project-context.mdc +163 -0
- tapps_agents/resources/cursor/rules/project-profiling.mdc +197 -0
- tapps_agents/resources/cursor/rules/quick-reference.mdc +630 -0
- tapps_agents/resources/cursor/rules/security.mdc +32 -0
- tapps_agents/resources/cursor/rules/simple-mode.mdc +500 -0
- tapps_agents/resources/cursor/rules/testing.mdc +31 -0
- tapps_agents/resources/cursor/rules/when-to-use.mdc +156 -0
- tapps_agents/resources/cursor/rules/workflow-presets.mdc +179 -0
- tapps_agents/resources/customizations/__init__.py +1 -0
- tapps_agents/resources/customizations/example-custom.yaml +83 -0
- tapps_agents/resources/hooks/__init__.py +1 -0
- tapps_agents/resources/hooks/templates/README.md +5 -0
- tapps_agents/resources/hooks/templates/__init__.py +1 -0
- tapps_agents/resources/hooks/templates/add-project-context.yaml +8 -0
- tapps_agents/resources/hooks/templates/auto-format-js.yaml +10 -0
- tapps_agents/resources/hooks/templates/auto-format-python.yaml +10 -0
- tapps_agents/resources/hooks/templates/git-commit-check.yaml +7 -0
- tapps_agents/resources/hooks/templates/notify-on-complete.yaml +8 -0
- tapps_agents/resources/hooks/templates/quality-gate.yaml +8 -0
- tapps_agents/resources/hooks/templates/security-scan-on-edit.yaml +10 -0
- tapps_agents/resources/hooks/templates/session-end-log.yaml +7 -0
- tapps_agents/resources/hooks/templates/show-beads-ready.yaml +8 -0
- tapps_agents/resources/hooks/templates/test-on-edit.yaml +10 -0
- tapps_agents/resources/hooks/templates/update-docs-on-complete.yaml +8 -0
- tapps_agents/resources/hooks/templates/user-prompt-log.yaml +7 -0
- tapps_agents/resources/scripts/__init__.py +1 -0
- tapps_agents/resources/scripts/set_bd_path.ps1 +51 -0
- tapps_agents/resources/workflows/__init__.py +1 -0
- tapps_agents/resources/workflows/presets/__init__.py +1 -0
- tapps_agents/resources/workflows/presets/brownfield-analysis.yaml +235 -0
- tapps_agents/resources/workflows/presets/fix.yaml +78 -0
- tapps_agents/resources/workflows/presets/full-sdlc.yaml +122 -0
- tapps_agents/resources/workflows/presets/quality.yaml +82 -0
- tapps_agents/resources/workflows/presets/rapid-dev.yaml +84 -0
- tapps_agents/session/__init__.py +19 -19
- tapps_agents/session/manager.py +256 -256
- tapps_agents/simple_mode/__init__.py +66 -66
- tapps_agents/simple_mode/agent_contracts.py +357 -357
- tapps_agents/simple_mode/beads_hooks.py +151 -151
- tapps_agents/simple_mode/code_snippet_handler.py +382 -382
- tapps_agents/simple_mode/documentation_manager.py +395 -395
- tapps_agents/simple_mode/documentation_reader.py +187 -187
- tapps_agents/simple_mode/file_inference.py +292 -292
- tapps_agents/simple_mode/framework_change_detector.py +268 -268
- tapps_agents/simple_mode/intent_parser.py +510 -510
- tapps_agents/simple_mode/learning_progression.py +358 -358
- tapps_agents/simple_mode/nl_handler.py +700 -700
- tapps_agents/simple_mode/onboarding.py +253 -253
- tapps_agents/simple_mode/orchestrators/__init__.py +38 -38
- tapps_agents/simple_mode/orchestrators/breakdown_orchestrator.py +49 -49
- tapps_agents/simple_mode/orchestrators/brownfield_orchestrator.py +135 -135
- tapps_agents/simple_mode/orchestrators/deliverable_checklist.py +349 -349
- tapps_agents/simple_mode/orchestrators/enhance_orchestrator.py +53 -53
- tapps_agents/simple_mode/orchestrators/epic_orchestrator.py +122 -122
- tapps_agents/simple_mode/orchestrators/explore_orchestrator.py +184 -184
- tapps_agents/simple_mode/orchestrators/plan_analysis_orchestrator.py +206 -206
- tapps_agents/simple_mode/orchestrators/pr_orchestrator.py +237 -237
- tapps_agents/simple_mode/orchestrators/refactor_orchestrator.py +222 -222
- tapps_agents/simple_mode/orchestrators/requirements_tracer.py +262 -262
- tapps_agents/simple_mode/orchestrators/resume_orchestrator.py +210 -210
- tapps_agents/simple_mode/orchestrators/review_orchestrator.py +161 -161
- tapps_agents/simple_mode/orchestrators/test_orchestrator.py +82 -82
- tapps_agents/simple_mode/output_aggregator.py +340 -340
- tapps_agents/simple_mode/result_formatters.py +598 -598
- tapps_agents/simple_mode/step_dependencies.py +382 -382
- tapps_agents/simple_mode/step_results.py +276 -276
- tapps_agents/simple_mode/streaming.py +388 -388
- tapps_agents/simple_mode/variations.py +129 -129
- tapps_agents/simple_mode/visual_feedback.py +238 -238
- tapps_agents/simple_mode/zero_config.py +274 -274
- tapps_agents/suggestions/__init__.py +8 -8
- tapps_agents/suggestions/inline_suggester.py +52 -52
- tapps_agents/templates/__init__.py +8 -8
- tapps_agents/templates/microservice_generator.py +274 -274
- tapps_agents/utils/env_validator.py +291 -291
- tapps_agents/workflow/__init__.py +171 -171
- tapps_agents/workflow/acceptance_verifier.py +132 -132
- tapps_agents/workflow/agent_handlers/__init__.py +41 -41
- tapps_agents/workflow/agent_handlers/analyst_handler.py +75 -75
- tapps_agents/workflow/agent_handlers/architect_handler.py +107 -107
- tapps_agents/workflow/agent_handlers/base.py +84 -84
- tapps_agents/workflow/agent_handlers/debugger_handler.py +100 -100
- tapps_agents/workflow/agent_handlers/designer_handler.py +110 -110
- tapps_agents/workflow/agent_handlers/documenter_handler.py +94 -94
- tapps_agents/workflow/agent_handlers/implementer_handler.py +235 -235
- tapps_agents/workflow/agent_handlers/ops_handler.py +62 -62
- tapps_agents/workflow/agent_handlers/orchestrator_handler.py +43 -43
- tapps_agents/workflow/agent_handlers/planner_handler.py +98 -98
- tapps_agents/workflow/agent_handlers/registry.py +119 -119
- tapps_agents/workflow/agent_handlers/reviewer_handler.py +119 -119
- tapps_agents/workflow/agent_handlers/tester_handler.py +69 -69
- tapps_agents/workflow/analytics_accessor.py +337 -337
- tapps_agents/workflow/analytics_alerts.py +416 -416
- tapps_agents/workflow/analytics_dashboard_cursor.py +281 -281
- tapps_agents/workflow/analytics_dual_write.py +103 -103
- tapps_agents/workflow/analytics_integration.py +119 -119
- tapps_agents/workflow/analytics_query_parser.py +278 -278
- tapps_agents/workflow/analytics_visualizer.py +259 -259
- tapps_agents/workflow/artifact_helper.py +204 -204
- tapps_agents/workflow/audit_logger.py +263 -263
- tapps_agents/workflow/auto_execution_config.py +340 -340
- tapps_agents/workflow/auto_progression.py +586 -586
- tapps_agents/workflow/branch_cleanup.py +349 -349
- tapps_agents/workflow/checkpoint.py +256 -256
- tapps_agents/workflow/checkpoint_manager.py +178 -178
- tapps_agents/workflow/code_artifact.py +179 -179
- tapps_agents/workflow/common_enums.py +96 -96
- tapps_agents/workflow/confirmation_handler.py +130 -130
- tapps_agents/workflow/context_analyzer.py +222 -222
- tapps_agents/workflow/context_artifact.py +230 -230
- tapps_agents/workflow/cursor_chat.py +94 -94
- tapps_agents/workflow/cursor_skill_helper.py +516 -516
- tapps_agents/workflow/dependency_resolver.py +244 -244
- tapps_agents/workflow/design_artifact.py +156 -156
- tapps_agents/workflow/detector.py +751 -751
- tapps_agents/workflow/direct_execution_fallback.py +301 -301
- tapps_agents/workflow/docs_artifact.py +168 -168
- tapps_agents/workflow/enforcer.py +389 -389
- tapps_agents/workflow/enhancement_artifact.py +142 -142
- tapps_agents/workflow/error_recovery.py +806 -806
- tapps_agents/workflow/event_bus.py +183 -183
- tapps_agents/workflow/event_log.py +612 -612
- tapps_agents/workflow/events.py +63 -63
- tapps_agents/workflow/exceptions.py +43 -43
- tapps_agents/workflow/execution_graph.py +498 -498
- tapps_agents/workflow/execution_plan.py +126 -126
- tapps_agents/workflow/file_utils.py +186 -186
- tapps_agents/workflow/gate_evaluator.py +182 -182
- tapps_agents/workflow/gate_integration.py +200 -200
- tapps_agents/workflow/graph_visualizer.py +130 -130
- tapps_agents/workflow/health_checker.py +206 -206
- tapps_agents/workflow/logging_helper.py +243 -243
- tapps_agents/workflow/manifest.py +582 -582
- tapps_agents/workflow/marker_writer.py +250 -250
- tapps_agents/workflow/messaging.py +325 -325
- tapps_agents/workflow/metadata_models.py +91 -91
- tapps_agents/workflow/metrics_integration.py +226 -226
- tapps_agents/workflow/migration_utils.py +116 -116
- tapps_agents/workflow/models.py +148 -148
- tapps_agents/workflow/nlp_config.py +198 -198
- tapps_agents/workflow/nlp_error_handler.py +207 -207
- tapps_agents/workflow/nlp_executor.py +163 -163
- tapps_agents/workflow/nlp_parser.py +528 -528
- tapps_agents/workflow/observability_dashboard.py +451 -451
- tapps_agents/workflow/observer.py +170 -170
- tapps_agents/workflow/ops_artifact.py +257 -257
- tapps_agents/workflow/output_passing.py +214 -214
- tapps_agents/workflow/parallel_executor.py +463 -463
- tapps_agents/workflow/planning_artifact.py +179 -179
- tapps_agents/workflow/preset_loader.py +285 -285
- tapps_agents/workflow/preset_recommender.py +270 -270
- tapps_agents/workflow/progress_logger.py +145 -145
- tapps_agents/workflow/progress_manager.py +303 -303
- tapps_agents/workflow/progress_monitor.py +186 -186
- tapps_agents/workflow/progress_updates.py +423 -423
- tapps_agents/workflow/quality_artifact.py +158 -158
- tapps_agents/workflow/quality_loopback.py +101 -101
- tapps_agents/workflow/recommender.py +387 -387
- tapps_agents/workflow/remediation_loop.py +166 -166
- tapps_agents/workflow/result_aggregator.py +300 -300
- tapps_agents/workflow/review_artifact.py +185 -185
- tapps_agents/workflow/schema_validator.py +522 -522
- tapps_agents/workflow/session_handoff.py +178 -178
- tapps_agents/workflow/skill_invoker.py +648 -648
- tapps_agents/workflow/state_manager.py +756 -756
- tapps_agents/workflow/state_persistence_config.py +331 -331
- tapps_agents/workflow/status_monitor.py +449 -449
- tapps_agents/workflow/step_checkpoint.py +314 -314
- tapps_agents/workflow/step_details.py +201 -201
- tapps_agents/workflow/story_models.py +147 -147
- tapps_agents/workflow/streaming.py +416 -416
- tapps_agents/workflow/suggestion_engine.py +552 -552
- tapps_agents/workflow/testing_artifact.py +186 -186
- tapps_agents/workflow/timeline.py +158 -158
- tapps_agents/workflow/token_integration.py +209 -209
- tapps_agents/workflow/validation.py +217 -217
- tapps_agents/workflow/visual_feedback.py +391 -391
- tapps_agents/workflow/workflow_chain.py +95 -95
- tapps_agents/workflow/workflow_summary.py +219 -219
- tapps_agents/workflow/worktree_manager.py +724 -724
- {tapps_agents-3.6.0.dist-info → tapps_agents-3.6.1.dist-info}/METADATA +672 -672
- tapps_agents-3.6.1.dist-info/RECORD +883 -0
- {tapps_agents-3.6.0.dist-info → tapps_agents-3.6.1.dist-info}/licenses/LICENSE +22 -22
- tapps_agents-3.6.0.dist-info/RECORD +0 -758
- {tapps_agents-3.6.0.dist-info → tapps_agents-3.6.1.dist-info}/WHEEL +0 -0
- {tapps_agents-3.6.0.dist-info → tapps_agents-3.6.1.dist-info}/entry_points.txt +0 -0
- {tapps_agents-3.6.0.dist-info → tapps_agents-3.6.1.dist-info}/top_level.txt +0 -0
|
@@ -1,786 +1,786 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Long-Duration Support for 30+ Hour Operations
|
|
3
|
-
|
|
4
|
-
Provides durability guarantees, failure recovery, and progress tracking for
|
|
5
|
-
long-running tasks.
|
|
6
|
-
"""
|
|
7
|
-
|
|
8
|
-
from __future__ import annotations
|
|
9
|
-
|
|
10
|
-
import json
|
|
11
|
-
import logging
|
|
12
|
-
import shutil
|
|
13
|
-
import threading
|
|
14
|
-
import time
|
|
15
|
-
from dataclasses import asdict, dataclass, field
|
|
16
|
-
from datetime import UTC, datetime, timedelta
|
|
17
|
-
from enum import Enum
|
|
18
|
-
from pathlib import Path
|
|
19
|
-
from typing import Any
|
|
20
|
-
|
|
21
|
-
from .checkpoint_manager import CheckpointManager, TaskCheckpoint
|
|
22
|
-
from .hardware_profiler import HardwareProfile, HardwareProfiler
|
|
23
|
-
from .resource_aware_executor import ResourceAwareExecutor
|
|
24
|
-
from .session_manager import AgentSession, SessionManager
|
|
25
|
-
from .task_state import TaskState, TaskStateManager
|
|
26
|
-
|
|
27
|
-
logger = logging.getLogger(__name__)
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
class DurabilityLevel(Enum):
|
|
31
|
-
"""Durability guarantee level."""
|
|
32
|
-
|
|
33
|
-
BASIC = "basic" # Checkpoints every 10 minutes
|
|
34
|
-
STANDARD = "standard" # Checkpoints every 5 minutes
|
|
35
|
-
HIGH = "high" # Checkpoints every 2 minutes + artifact backup
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
@dataclass
|
|
39
|
-
class ProgressSnapshot:
|
|
40
|
-
"""Snapshot of task progress at a point in time."""
|
|
41
|
-
|
|
42
|
-
timestamp: datetime
|
|
43
|
-
progress: float # 0.0 to 1.0
|
|
44
|
-
current_step: str
|
|
45
|
-
steps_completed: int
|
|
46
|
-
total_steps: int
|
|
47
|
-
estimated_remaining_hours: float | None = None
|
|
48
|
-
metadata: dict[str, Any] = field(default_factory=dict)
|
|
49
|
-
|
|
50
|
-
def to_dict(self) -> dict[str, Any]:
|
|
51
|
-
"""Convert to dictionary."""
|
|
52
|
-
data = asdict(self)
|
|
53
|
-
data["timestamp"] = self.timestamp.isoformat()
|
|
54
|
-
return data
|
|
55
|
-
|
|
56
|
-
@classmethod
|
|
57
|
-
def from_dict(cls, data: dict[str, Any]) -> ProgressSnapshot:
|
|
58
|
-
"""Create from dictionary."""
|
|
59
|
-
data = data.copy()
|
|
60
|
-
data["timestamp"] = datetime.fromisoformat(data["timestamp"])
|
|
61
|
-
return cls(**data)
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
@dataclass
|
|
65
|
-
class FailureRecord:
|
|
66
|
-
"""Record of a failure event."""
|
|
67
|
-
|
|
68
|
-
timestamp: datetime
|
|
69
|
-
failure_type: str # "crash", "timeout", "resource_exhaustion", "error"
|
|
70
|
-
error_message: str
|
|
71
|
-
stack_trace: str | None = None
|
|
72
|
-
recovery_attempted: bool = False
|
|
73
|
-
recovery_successful: bool = False
|
|
74
|
-
checkpoint_available: bool = False
|
|
75
|
-
metadata: dict[str, Any] = field(default_factory=dict)
|
|
76
|
-
|
|
77
|
-
def to_dict(self) -> dict[str, Any]:
|
|
78
|
-
"""Convert to dictionary."""
|
|
79
|
-
data = asdict(self)
|
|
80
|
-
data["timestamp"] = self.timestamp.isoformat()
|
|
81
|
-
return data
|
|
82
|
-
|
|
83
|
-
@classmethod
|
|
84
|
-
def from_dict(cls, data: dict[str, Any]) -> FailureRecord:
|
|
85
|
-
"""Create from dictionary."""
|
|
86
|
-
data = data.copy()
|
|
87
|
-
data["timestamp"] = datetime.fromisoformat(data["timestamp"])
|
|
88
|
-
return cls(**data)
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
class ProgressTracker:
|
|
92
|
-
"""Tracks progress over long periods."""
|
|
93
|
-
|
|
94
|
-
def __init__(self, storage_dir: Path | None = None):
|
|
95
|
-
"""
|
|
96
|
-
Initialize progress tracker.
|
|
97
|
-
|
|
98
|
-
Args:
|
|
99
|
-
storage_dir: Directory to store progress snapshots
|
|
100
|
-
"""
|
|
101
|
-
self.storage_dir = (
|
|
102
|
-
Path(storage_dir) if storage_dir else Path(".tapps-agents/progress")
|
|
103
|
-
)
|
|
104
|
-
self.storage_dir.mkdir(parents=True, exist_ok=True)
|
|
105
|
-
|
|
106
|
-
self.snapshots: list[ProgressSnapshot] = []
|
|
107
|
-
self.max_snapshots = 1000 # Keep last 1000 snapshots
|
|
108
|
-
|
|
109
|
-
def record_progress(
|
|
110
|
-
self,
|
|
111
|
-
progress: float,
|
|
112
|
-
current_step: str,
|
|
113
|
-
steps_completed: int,
|
|
114
|
-
total_steps: int,
|
|
115
|
-
estimated_remaining_hours: float | None = None,
|
|
116
|
-
metadata: dict[str, Any] | None = None,
|
|
117
|
-
) -> ProgressSnapshot:
|
|
118
|
-
"""
|
|
119
|
-
Record a progress snapshot.
|
|
120
|
-
|
|
121
|
-
Args:
|
|
122
|
-
progress: Progress percentage (0.0 to 1.0)
|
|
123
|
-
current_step: Current step description
|
|
124
|
-
steps_completed: Number of steps completed
|
|
125
|
-
total_steps: Total number of steps
|
|
126
|
-
estimated_remaining_hours: Estimated hours remaining
|
|
127
|
-
metadata: Additional metadata
|
|
128
|
-
|
|
129
|
-
Returns:
|
|
130
|
-
ProgressSnapshot instance
|
|
131
|
-
"""
|
|
132
|
-
snapshot = ProgressSnapshot(
|
|
133
|
-
timestamp=datetime.now(UTC),
|
|
134
|
-
progress=progress,
|
|
135
|
-
current_step=current_step,
|
|
136
|
-
steps_completed=steps_completed,
|
|
137
|
-
total_steps=total_steps,
|
|
138
|
-
estimated_remaining_hours=estimated_remaining_hours,
|
|
139
|
-
metadata=metadata or {},
|
|
140
|
-
)
|
|
141
|
-
|
|
142
|
-
self.snapshots.append(snapshot)
|
|
143
|
-
if len(self.snapshots) > self.max_snapshots:
|
|
144
|
-
self.snapshots = self.snapshots[-self.max_snapshots :]
|
|
145
|
-
|
|
146
|
-
# Persist to disk
|
|
147
|
-
self._save_snapshot(snapshot)
|
|
148
|
-
|
|
149
|
-
return snapshot
|
|
150
|
-
|
|
151
|
-
def _save_snapshot(self, snapshot: ProgressSnapshot):
|
|
152
|
-
"""Save snapshot to disk."""
|
|
153
|
-
snapshot_file = (
|
|
154
|
-
self.storage_dir
|
|
155
|
-
/ f"snapshot_{snapshot.timestamp.strftime('%Y%m%d_%H%M%S')}.json"
|
|
156
|
-
)
|
|
157
|
-
with open(snapshot_file, "w") as f:
|
|
158
|
-
json.dump(snapshot.to_dict(), f, indent=2)
|
|
159
|
-
|
|
160
|
-
def get_latest_progress(self) -> ProgressSnapshot | None:
|
|
161
|
-
"""Get the latest progress snapshot."""
|
|
162
|
-
return self.snapshots[-1] if self.snapshots else None
|
|
163
|
-
|
|
164
|
-
def get_progress_history(
|
|
165
|
-
self, hours: float | None = None
|
|
166
|
-
) -> list[ProgressSnapshot]:
|
|
167
|
-
"""
|
|
168
|
-
Get progress history.
|
|
169
|
-
|
|
170
|
-
Args:
|
|
171
|
-
hours: Optional limit to last N hours
|
|
172
|
-
|
|
173
|
-
Returns:
|
|
174
|
-
List of progress snapshots
|
|
175
|
-
"""
|
|
176
|
-
if not hours:
|
|
177
|
-
return self.snapshots.copy()
|
|
178
|
-
|
|
179
|
-
cutoff = datetime.now(UTC) - timedelta(hours=hours)
|
|
180
|
-
return [s for s in self.snapshots if s.timestamp >= cutoff]
|
|
181
|
-
|
|
182
|
-
def calculate_velocity(self) -> float | None:
|
|
183
|
-
"""
|
|
184
|
-
Calculate progress velocity (progress per hour).
|
|
185
|
-
|
|
186
|
-
Returns:
|
|
187
|
-
Progress velocity or None if insufficient data
|
|
188
|
-
"""
|
|
189
|
-
if len(self.snapshots) < 2:
|
|
190
|
-
return None
|
|
191
|
-
|
|
192
|
-
first = self.snapshots[0]
|
|
193
|
-
last = self.snapshots[-1]
|
|
194
|
-
|
|
195
|
-
time_delta = (last.timestamp - first.timestamp).total_seconds() / 3600.0
|
|
196
|
-
if time_delta == 0:
|
|
197
|
-
return None
|
|
198
|
-
|
|
199
|
-
progress_delta = last.progress - first.progress
|
|
200
|
-
return progress_delta / time_delta
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
class DurabilityGuarantee:
|
|
204
|
-
"""Ensures task durability through frequent checkpoints and state persistence."""
|
|
205
|
-
|
|
206
|
-
def __init__(
|
|
207
|
-
self,
|
|
208
|
-
checkpoint_manager: CheckpointManager,
|
|
209
|
-
durability_level: DurabilityLevel = DurabilityLevel.STANDARD,
|
|
210
|
-
hardware_profile: HardwareProfile | None = None,
|
|
211
|
-
):
|
|
212
|
-
"""
|
|
213
|
-
Initialize durability guarantee.
|
|
214
|
-
|
|
215
|
-
Args:
|
|
216
|
-
checkpoint_manager: Checkpoint manager instance
|
|
217
|
-
durability_level: Durability level
|
|
218
|
-
hardware_profile: Hardware profile for optimization
|
|
219
|
-
"""
|
|
220
|
-
self.checkpoint_manager = checkpoint_manager
|
|
221
|
-
self.durability_level = durability_level
|
|
222
|
-
self.hardware_profile = hardware_profile or HardwareProfiler().detect_profile()
|
|
223
|
-
|
|
224
|
-
# Set checkpoint interval based on durability level and hardware
|
|
225
|
-
self.checkpoint_interval = self._get_checkpoint_interval()
|
|
226
|
-
|
|
227
|
-
self.last_checkpoint_time: datetime | None = None
|
|
228
|
-
self.checkpoint_count = 0
|
|
229
|
-
|
|
230
|
-
def _get_checkpoint_interval(self) -> float:
|
|
231
|
-
"""Checkpoint interval by durability level (workstation-like; hardware taxonomy removed)."""
|
|
232
|
-
base_intervals = {
|
|
233
|
-
DurabilityLevel.BASIC: 600.0, # 10 minutes
|
|
234
|
-
DurabilityLevel.STANDARD: 300.0, # 5 minutes
|
|
235
|
-
DurabilityLevel.HIGH: 120.0, # 2 minutes
|
|
236
|
-
}
|
|
237
|
-
return base_intervals[self.durability_level] * 1.2 # Workstation-like
|
|
238
|
-
|
|
239
|
-
def should_checkpoint(self) -> bool:
|
|
240
|
-
"""Check if a checkpoint should be created."""
|
|
241
|
-
if self.last_checkpoint_time is None:
|
|
242
|
-
return True
|
|
243
|
-
|
|
244
|
-
elapsed = (datetime.now(UTC) - self.last_checkpoint_time).total_seconds()
|
|
245
|
-
return elapsed >= self.checkpoint_interval
|
|
246
|
-
|
|
247
|
-
def create_checkpoint(
|
|
248
|
-
self,
|
|
249
|
-
task_id: str,
|
|
250
|
-
agent_id: str,
|
|
251
|
-
command: str,
|
|
252
|
-
state: str,
|
|
253
|
-
progress: float,
|
|
254
|
-
context: dict[str, Any] | None = None,
|
|
255
|
-
artifacts: list[str] | None = None,
|
|
256
|
-
) -> TaskCheckpoint:
|
|
257
|
-
"""
|
|
258
|
-
Create a checkpoint.
|
|
259
|
-
|
|
260
|
-
Args:
|
|
261
|
-
task_id: Task ID
|
|
262
|
-
agent_id: Agent ID
|
|
263
|
-
command: Command being executed
|
|
264
|
-
state: Current state (as string, will be converted to TaskState)
|
|
265
|
-
progress: Progress (0.0 to 1.0)
|
|
266
|
-
context: Agent context
|
|
267
|
-
artifacts: Generated artifacts
|
|
268
|
-
|
|
269
|
-
Returns:
|
|
270
|
-
Created checkpoint
|
|
271
|
-
"""
|
|
272
|
-
# Convert state string to TaskState and create TaskStateManager
|
|
273
|
-
try:
|
|
274
|
-
task_state = TaskState(state)
|
|
275
|
-
except ValueError:
|
|
276
|
-
# Default to RUNNING if state is not recognized
|
|
277
|
-
task_state = TaskState.RUNNING
|
|
278
|
-
|
|
279
|
-
state_manager = TaskStateManager(task_id=task_id, initial_state=task_state)
|
|
280
|
-
|
|
281
|
-
checkpoint = self.checkpoint_manager.create_checkpoint(
|
|
282
|
-
task_id=task_id,
|
|
283
|
-
agent_id=agent_id,
|
|
284
|
-
command=command,
|
|
285
|
-
state_manager=state_manager,
|
|
286
|
-
progress=progress,
|
|
287
|
-
context=context or {},
|
|
288
|
-
artifacts=artifacts or [],
|
|
289
|
-
)
|
|
290
|
-
|
|
291
|
-
self.last_checkpoint_time = datetime.now(UTC)
|
|
292
|
-
self.checkpoint_count += 1
|
|
293
|
-
|
|
294
|
-
logger.info(f"Created checkpoint {self.checkpoint_count} for task {task_id}")
|
|
295
|
-
|
|
296
|
-
return checkpoint
|
|
297
|
-
|
|
298
|
-
def backup_artifacts(self, artifacts: list[str], backup_dir: Path) -> list[Path]:
|
|
299
|
-
"""
|
|
300
|
-
Backup artifacts to a backup directory.
|
|
301
|
-
|
|
302
|
-
Args:
|
|
303
|
-
artifacts: List of artifact file paths
|
|
304
|
-
backup_dir: Backup directory
|
|
305
|
-
|
|
306
|
-
Returns:
|
|
307
|
-
List of backed up file paths
|
|
308
|
-
"""
|
|
309
|
-
if self.durability_level != DurabilityLevel.HIGH:
|
|
310
|
-
return [] # Only backup artifacts for HIGH durability
|
|
311
|
-
|
|
312
|
-
backup_dir = Path(backup_dir)
|
|
313
|
-
backup_dir.mkdir(parents=True, exist_ok=True)
|
|
314
|
-
|
|
315
|
-
backed_up = []
|
|
316
|
-
for artifact_path in artifacts:
|
|
317
|
-
artifact = Path(artifact_path)
|
|
318
|
-
if not artifact.exists():
|
|
319
|
-
continue
|
|
320
|
-
|
|
321
|
-
backup_path = backup_dir / artifact.name
|
|
322
|
-
try:
|
|
323
|
-
if artifact.is_file():
|
|
324
|
-
shutil.copy2(artifact, backup_path)
|
|
325
|
-
elif artifact.is_dir():
|
|
326
|
-
shutil.copytree(artifact, backup_path, dirs_exist_ok=True)
|
|
327
|
-
backed_up.append(backup_path)
|
|
328
|
-
except Exception as e:
|
|
329
|
-
logger.error(
|
|
330
|
-
f"Failed to backup artifact {artifact}: {e}", exc_info=True
|
|
331
|
-
)
|
|
332
|
-
|
|
333
|
-
return backed_up
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
class FailureRecovery:
|
|
337
|
-
"""Recovers from failures automatically."""
|
|
338
|
-
|
|
339
|
-
def __init__(
|
|
340
|
-
self,
|
|
341
|
-
checkpoint_manager: CheckpointManager,
|
|
342
|
-
session_manager: SessionManager | None = None,
|
|
343
|
-
):
|
|
344
|
-
"""
|
|
345
|
-
Initialize failure recovery.
|
|
346
|
-
|
|
347
|
-
Args:
|
|
348
|
-
checkpoint_manager: Checkpoint manager instance
|
|
349
|
-
session_manager: Optional session manager
|
|
350
|
-
"""
|
|
351
|
-
self.checkpoint_manager = checkpoint_manager
|
|
352
|
-
self.session_manager = session_manager
|
|
353
|
-
|
|
354
|
-
self.failure_history: list[FailureRecord] = []
|
|
355
|
-
self.max_failures = 100 # Keep last 100 failures
|
|
356
|
-
|
|
357
|
-
def record_failure(
|
|
358
|
-
self,
|
|
359
|
-
failure_type: str,
|
|
360
|
-
error_message: str,
|
|
361
|
-
stack_trace: str | None = None,
|
|
362
|
-
task_id: str | None = None,
|
|
363
|
-
metadata: dict[str, Any] | None = None,
|
|
364
|
-
) -> FailureRecord:
|
|
365
|
-
"""
|
|
366
|
-
Record a failure event.
|
|
367
|
-
|
|
368
|
-
Args:
|
|
369
|
-
failure_type: Type of failure
|
|
370
|
-
error_message: Error message
|
|
371
|
-
stack_trace: Optional stack trace
|
|
372
|
-
task_id: Optional task ID
|
|
373
|
-
metadata: Additional metadata
|
|
374
|
-
|
|
375
|
-
Returns:
|
|
376
|
-
FailureRecord instance
|
|
377
|
-
"""
|
|
378
|
-
# Check if checkpoint is available (prefer list_checkpoints for testability/mocks)
|
|
379
|
-
checkpoint_available = False
|
|
380
|
-
if task_id:
|
|
381
|
-
try:
|
|
382
|
-
listed = self.checkpoint_manager.list_checkpoints()
|
|
383
|
-
except (OSError, PermissionError) as e:
|
|
384
|
-
# File system errors when listing files
|
|
385
|
-
logger.debug(f"Failed to list files: {e}")
|
|
386
|
-
listed = []
|
|
387
|
-
|
|
388
|
-
# `list_checkpoints()` may return task_ids or TaskCheckpoint objects depending on implementation/mocks.
|
|
389
|
-
if listed:
|
|
390
|
-
first = listed[0]
|
|
391
|
-
if hasattr(first, "task_id"):
|
|
392
|
-
checkpoint_available = any(
|
|
393
|
-
getattr(cp, "task_id", None) == task_id for cp in listed
|
|
394
|
-
)
|
|
395
|
-
else:
|
|
396
|
-
checkpoint_available = task_id in listed
|
|
397
|
-
|
|
398
|
-
failure_metadata = metadata or {}
|
|
399
|
-
if task_id:
|
|
400
|
-
failure_metadata["task_id"] = task_id
|
|
401
|
-
|
|
402
|
-
failure = FailureRecord(
|
|
403
|
-
timestamp=datetime.now(UTC),
|
|
404
|
-
failure_type=failure_type,
|
|
405
|
-
error_message=error_message,
|
|
406
|
-
stack_trace=stack_trace,
|
|
407
|
-
checkpoint_available=checkpoint_available,
|
|
408
|
-
metadata=failure_metadata,
|
|
409
|
-
)
|
|
410
|
-
|
|
411
|
-
self.failure_history.append(failure)
|
|
412
|
-
if len(self.failure_history) > self.max_failures:
|
|
413
|
-
self.failure_history = self.failure_history[-self.max_failures :]
|
|
414
|
-
|
|
415
|
-
logger.warning(f"Recorded failure: {failure_type} - {error_message}")
|
|
416
|
-
|
|
417
|
-
return failure
|
|
418
|
-
|
|
419
|
-
def recover_from_checkpoint(
|
|
420
|
-
self, task_id: str, agent_id: str | None = None
|
|
421
|
-
) -> TaskCheckpoint | None:
|
|
422
|
-
"""
|
|
423
|
-
Recover from the latest checkpoint.
|
|
424
|
-
|
|
425
|
-
Args:
|
|
426
|
-
task_id: Task ID
|
|
427
|
-
agent_id: Optional agent ID
|
|
428
|
-
|
|
429
|
-
Returns:
|
|
430
|
-
Latest checkpoint or None if not available
|
|
431
|
-
"""
|
|
432
|
-
# Prefer list_checkpoints() so mocked managers can provide TaskCheckpoint objects directly.
|
|
433
|
-
try:
|
|
434
|
-
listed: list[Any] = self.checkpoint_manager.list_checkpoints()
|
|
435
|
-
except (OSError, PermissionError) as e:
|
|
436
|
-
# File system errors when listing files
|
|
437
|
-
logger.debug(f"Failed to list files: {e}")
|
|
438
|
-
listed = []
|
|
439
|
-
|
|
440
|
-
if not listed:
|
|
441
|
-
logger.warning(f"No checkpoints available for task {task_id}")
|
|
442
|
-
return None
|
|
443
|
-
|
|
444
|
-
candidates: list[TaskCheckpoint] = []
|
|
445
|
-
first = listed[0]
|
|
446
|
-
|
|
447
|
-
if hasattr(first, "task_id"):
|
|
448
|
-
# Mock/test path: list contains TaskCheckpoint objects
|
|
449
|
-
candidates = [
|
|
450
|
-
cp for cp in listed if getattr(cp, "task_id", None) == task_id
|
|
451
|
-
]
|
|
452
|
-
else:
|
|
453
|
-
# Storage path: list contains task_id strings
|
|
454
|
-
if task_id in listed:
|
|
455
|
-
cp = self.checkpoint_manager.load_checkpoint(task_id)
|
|
456
|
-
if cp:
|
|
457
|
-
candidates = [cp]
|
|
458
|
-
|
|
459
|
-
if agent_id:
|
|
460
|
-
candidates = [
|
|
461
|
-
cp for cp in candidates if getattr(cp, "agent_id", None) == agent_id
|
|
462
|
-
]
|
|
463
|
-
|
|
464
|
-
if not candidates:
|
|
465
|
-
logger.warning(f"No checkpoints available for task {task_id}")
|
|
466
|
-
return None
|
|
467
|
-
|
|
468
|
-
# Choose latest by checkpoint_time if present
|
|
469
|
-
checkpoint = max(
|
|
470
|
-
candidates,
|
|
471
|
-
key=lambda cp: getattr(
|
|
472
|
-
cp, "checkpoint_time", datetime.min.replace(tzinfo=UTC)
|
|
473
|
-
),
|
|
474
|
-
)
|
|
475
|
-
|
|
476
|
-
# Validate checkpoint
|
|
477
|
-
if not checkpoint.validate():
|
|
478
|
-
logger.error(f"Checkpoint validation failed for task {task_id}")
|
|
479
|
-
return None
|
|
480
|
-
|
|
481
|
-
logger.info(
|
|
482
|
-
f"Recovering task {task_id} from checkpoint at {checkpoint.checkpoint_time}"
|
|
483
|
-
)
|
|
484
|
-
|
|
485
|
-
# Update failure record if this is a recovery attempt
|
|
486
|
-
if self.failure_history:
|
|
487
|
-
last_failure = self.failure_history[-1]
|
|
488
|
-
last_failure.recovery_attempted = True
|
|
489
|
-
last_failure.recovery_successful = True
|
|
490
|
-
|
|
491
|
-
return checkpoint
|
|
492
|
-
|
|
493
|
-
def get_recovery_strategy(self, failure_type: str) -> str:
|
|
494
|
-
"""
|
|
495
|
-
Get recovery strategy for a failure type.
|
|
496
|
-
|
|
497
|
-
Args:
|
|
498
|
-
failure_type: Type of failure
|
|
499
|
-
|
|
500
|
-
Returns:
|
|
501
|
-
Recovery strategy description
|
|
502
|
-
"""
|
|
503
|
-
strategies = {
|
|
504
|
-
"crash": "Restore from latest checkpoint and resume",
|
|
505
|
-
"timeout": "Restore from latest checkpoint and retry with extended timeout",
|
|
506
|
-
"resource_exhaustion": "Restore from latest checkpoint and resume with resource limits",
|
|
507
|
-
"error": "Restore from latest checkpoint and retry with error handling",
|
|
508
|
-
}
|
|
509
|
-
|
|
510
|
-
return strategies.get(failure_type, "Restore from latest checkpoint and resume")
|
|
511
|
-
|
|
512
|
-
def get_failure_history(self, task_id: str | None = None) -> list[FailureRecord]:
|
|
513
|
-
"""
|
|
514
|
-
Get failure history.
|
|
515
|
-
|
|
516
|
-
Args:
|
|
517
|
-
task_id: Optional task ID to filter by
|
|
518
|
-
|
|
519
|
-
Returns:
|
|
520
|
-
List of failure records
|
|
521
|
-
"""
|
|
522
|
-
if not task_id:
|
|
523
|
-
return self.failure_history.copy()
|
|
524
|
-
|
|
525
|
-
# Filter by task_id if present in metadata
|
|
526
|
-
return [f for f in self.failure_history if f.metadata.get("task_id") == task_id]
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
class LongDurationManager:
|
|
530
|
-
"""Manages 30+ hour operations with durability guarantees."""
|
|
531
|
-
|
|
532
|
-
def __init__(
|
|
533
|
-
self,
|
|
534
|
-
session_manager: SessionManager,
|
|
535
|
-
checkpoint_manager: CheckpointManager,
|
|
536
|
-
resource_executor: ResourceAwareExecutor | None = None,
|
|
537
|
-
durability_level: DurabilityLevel = DurabilityLevel.STANDARD,
|
|
538
|
-
hardware_profile: HardwareProfile | None = None,
|
|
539
|
-
):
|
|
540
|
-
"""
|
|
541
|
-
Initialize long-duration manager.
|
|
542
|
-
|
|
543
|
-
Args:
|
|
544
|
-
session_manager: Session manager instance
|
|
545
|
-
checkpoint_manager: Checkpoint manager instance
|
|
546
|
-
resource_executor: Optional resource-aware executor
|
|
547
|
-
durability_level: Durability guarantee level
|
|
548
|
-
hardware_profile: Hardware profile
|
|
549
|
-
"""
|
|
550
|
-
self.session_manager = session_manager
|
|
551
|
-
self.checkpoint_manager = checkpoint_manager
|
|
552
|
-
self.resource_executor = resource_executor
|
|
553
|
-
|
|
554
|
-
self.durability = DurabilityGuarantee(
|
|
555
|
-
checkpoint_manager=checkpoint_manager,
|
|
556
|
-
durability_level=durability_level,
|
|
557
|
-
hardware_profile=hardware_profile,
|
|
558
|
-
)
|
|
559
|
-
|
|
560
|
-
self.failure_recovery = FailureRecovery(
|
|
561
|
-
checkpoint_manager=checkpoint_manager, session_manager=session_manager
|
|
562
|
-
)
|
|
563
|
-
|
|
564
|
-
self.progress_tracker = ProgressTracker()
|
|
565
|
-
|
|
566
|
-
self.active_tasks: dict[str, AgentSession] = {}
|
|
567
|
-
self._lock = threading.Lock()
|
|
568
|
-
|
|
569
|
-
# Background checkpoint thread
|
|
570
|
-
self._checkpoint_thread: threading.Thread | None = None
|
|
571
|
-
self._checkpoint_active = False
|
|
572
|
-
|
|
573
|
-
def start_long_duration_task(
|
|
574
|
-
self,
|
|
575
|
-
task_id: str,
|
|
576
|
-
agent_id: str,
|
|
577
|
-
command: str,
|
|
578
|
-
initial_context: dict[str, Any] | None = None,
|
|
579
|
-
) -> AgentSession:
|
|
580
|
-
"""
|
|
581
|
-
Start a long-duration task.
|
|
582
|
-
|
|
583
|
-
Args:
|
|
584
|
-
task_id: Task ID
|
|
585
|
-
agent_id: Agent ID
|
|
586
|
-
command: Command to execute
|
|
587
|
-
initial_context: Initial context
|
|
588
|
-
|
|
589
|
-
Returns:
|
|
590
|
-
Created session
|
|
591
|
-
"""
|
|
592
|
-
# Create session with metadata
|
|
593
|
-
metadata = {"task_id": task_id, "command": command, **(initial_context or {})}
|
|
594
|
-
session = self.session_manager.create_session(
|
|
595
|
-
agent_id=agent_id, metadata=metadata
|
|
596
|
-
)
|
|
597
|
-
|
|
598
|
-
with self._lock:
|
|
599
|
-
self.active_tasks[task_id] = session
|
|
600
|
-
|
|
601
|
-
# Create initial checkpoint
|
|
602
|
-
self.durability.create_checkpoint(
|
|
603
|
-
task_id=task_id,
|
|
604
|
-
agent_id=agent_id,
|
|
605
|
-
command=command,
|
|
606
|
-
state="running",
|
|
607
|
-
progress=0.0,
|
|
608
|
-
context=initial_context or {},
|
|
609
|
-
)
|
|
610
|
-
|
|
611
|
-
# Start background checkpointing
|
|
612
|
-
self._start_checkpoint_thread()
|
|
613
|
-
|
|
614
|
-
logger.info(
|
|
615
|
-
f"Started long-duration task {task_id} with session {session.session_id}"
|
|
616
|
-
)
|
|
617
|
-
|
|
618
|
-
return session
|
|
619
|
-
|
|
620
|
-
def update_progress(
|
|
621
|
-
self,
|
|
622
|
-
task_id: str,
|
|
623
|
-
progress: float,
|
|
624
|
-
current_step: str,
|
|
625
|
-
steps_completed: int,
|
|
626
|
-
total_steps: int,
|
|
627
|
-
estimated_remaining_hours: float | None = None,
|
|
628
|
-
metadata: dict[str, Any] | None = None,
|
|
629
|
-
):
|
|
630
|
-
"""
|
|
631
|
-
Update task progress.
|
|
632
|
-
|
|
633
|
-
Args:
|
|
634
|
-
task_id: Task ID
|
|
635
|
-
progress: Progress (0.0 to 1.0)
|
|
636
|
-
current_step: Current step description
|
|
637
|
-
steps_completed: Steps completed
|
|
638
|
-
total_steps: Total steps
|
|
639
|
-
estimated_remaining_hours: Estimated hours remaining
|
|
640
|
-
metadata: Additional metadata
|
|
641
|
-
"""
|
|
642
|
-
self.progress_tracker.record_progress(
|
|
643
|
-
progress=progress,
|
|
644
|
-
current_step=current_step,
|
|
645
|
-
steps_completed=steps_completed,
|
|
646
|
-
total_steps=total_steps,
|
|
647
|
-
estimated_remaining_hours=estimated_remaining_hours,
|
|
648
|
-
metadata=metadata or {},
|
|
649
|
-
)
|
|
650
|
-
|
|
651
|
-
# Create checkpoint if needed
|
|
652
|
-
if self.durability.should_checkpoint():
|
|
653
|
-
session = self.active_tasks.get(task_id)
|
|
654
|
-
if session:
|
|
655
|
-
self.durability.create_checkpoint(
|
|
656
|
-
task_id=task_id,
|
|
657
|
-
agent_id=session.agent_id,
|
|
658
|
-
command=session.metadata.get("command", ""),
|
|
659
|
-
state=session.state.value,
|
|
660
|
-
progress=progress,
|
|
661
|
-
context=session.metadata,
|
|
662
|
-
artifacts=session.metadata.get("artifacts", []),
|
|
663
|
-
)
|
|
664
|
-
|
|
665
|
-
# Backup artifacts if HIGH durability
|
|
666
|
-
if self.durability.durability_level == DurabilityLevel.HIGH:
|
|
667
|
-
artifacts = session.metadata.get("artifacts", [])
|
|
668
|
-
if artifacts:
|
|
669
|
-
backup_dir = Path(".tapps-agents/backups") / task_id
|
|
670
|
-
self.durability.backup_artifacts(artifacts, backup_dir)
|
|
671
|
-
|
|
672
|
-
def handle_failure(
|
|
673
|
-
self,
|
|
674
|
-
task_id: str,
|
|
675
|
-
failure_type: str,
|
|
676
|
-
error_message: str,
|
|
677
|
-
stack_trace: str | None = None,
|
|
678
|
-
) -> TaskCheckpoint | None:
|
|
679
|
-
"""
|
|
680
|
-
Handle a task failure.
|
|
681
|
-
|
|
682
|
-
Args:
|
|
683
|
-
task_id: Task ID
|
|
684
|
-
failure_type: Type of failure
|
|
685
|
-
error_message: Error message
|
|
686
|
-
stack_trace: Optional stack trace
|
|
687
|
-
|
|
688
|
-
Returns:
|
|
689
|
-
Recovery checkpoint or None
|
|
690
|
-
"""
|
|
691
|
-
# Record failure
|
|
692
|
-
self.failure_recovery.record_failure(
|
|
693
|
-
failure_type=failure_type,
|
|
694
|
-
error_message=error_message,
|
|
695
|
-
stack_trace=stack_trace,
|
|
696
|
-
task_id=task_id,
|
|
697
|
-
)
|
|
698
|
-
|
|
699
|
-
# Attempt recovery
|
|
700
|
-
session = self.active_tasks.get(task_id)
|
|
701
|
-
if not session:
|
|
702
|
-
logger.error(f"No active session found for task {task_id}")
|
|
703
|
-
return None
|
|
704
|
-
|
|
705
|
-
checkpoint = self.failure_recovery.recover_from_checkpoint(
|
|
706
|
-
task_id=task_id, agent_id=session.agent_id
|
|
707
|
-
)
|
|
708
|
-
|
|
709
|
-
if checkpoint:
|
|
710
|
-
logger.info(f"Recovery checkpoint available for task {task_id}")
|
|
711
|
-
# Update session with recovery checkpoint
|
|
712
|
-
self.session_manager.add_checkpoint(session.session_id, checkpoint)
|
|
713
|
-
else:
|
|
714
|
-
logger.warning(f"No recovery checkpoint available for task {task_id}")
|
|
715
|
-
|
|
716
|
-
return checkpoint
|
|
717
|
-
|
|
718
|
-
def get_progress(self, task_id: str) -> ProgressSnapshot | None:
|
|
719
|
-
"""Get latest progress for a task."""
|
|
720
|
-
return self.progress_tracker.get_latest_progress()
|
|
721
|
-
|
|
722
|
-
def get_progress_history(
|
|
723
|
-
self, task_id: str, hours: float | None = None
|
|
724
|
-
) -> list[ProgressSnapshot]:
|
|
725
|
-
"""Get progress history for a task."""
|
|
726
|
-
return self.progress_tracker.get_progress_history(hours=hours)
|
|
727
|
-
|
|
728
|
-
def _start_checkpoint_thread(self):
|
|
729
|
-
"""Start background checkpoint thread."""
|
|
730
|
-
if self._checkpoint_active:
|
|
731
|
-
return
|
|
732
|
-
|
|
733
|
-
self._checkpoint_active = True
|
|
734
|
-
self._checkpoint_thread = threading.Thread(
|
|
735
|
-
target=self._checkpoint_loop, daemon=True
|
|
736
|
-
)
|
|
737
|
-
self._checkpoint_thread.start()
|
|
738
|
-
|
|
739
|
-
def _checkpoint_loop(self):
|
|
740
|
-
"""Background checkpoint loop."""
|
|
741
|
-
while self._checkpoint_active:
|
|
742
|
-
try:
|
|
743
|
-
time.sleep(60) # Check every minute
|
|
744
|
-
|
|
745
|
-
with self._lock:
|
|
746
|
-
for task_id, session in list(self.active_tasks.items()):
|
|
747
|
-
if self.durability.should_checkpoint():
|
|
748
|
-
# Get latest progress
|
|
749
|
-
progress_snapshot = (
|
|
750
|
-
self.progress_tracker.get_latest_progress()
|
|
751
|
-
)
|
|
752
|
-
progress = (
|
|
753
|
-
progress_snapshot.progress if progress_snapshot else 0.0
|
|
754
|
-
)
|
|
755
|
-
|
|
756
|
-
checkpoint = self.durability.create_checkpoint(
|
|
757
|
-
task_id=task_id,
|
|
758
|
-
agent_id=session.agent_id,
|
|
759
|
-
command=session.metadata.get("command", ""),
|
|
760
|
-
state=session.state.value,
|
|
761
|
-
progress=progress,
|
|
762
|
-
context=session.metadata,
|
|
763
|
-
artifacts=session.metadata.get("artifacts", []),
|
|
764
|
-
)
|
|
765
|
-
|
|
766
|
-
self.session_manager.add_checkpoint(
|
|
767
|
-
session.session_id, checkpoint
|
|
768
|
-
)
|
|
769
|
-
except Exception as e:
|
|
770
|
-
logger.error(f"Error in checkpoint loop: {e}", exc_info=True)
|
|
771
|
-
|
|
772
|
-
def stop(self):
|
|
773
|
-
"""Stop the long-duration manager."""
|
|
774
|
-
self._checkpoint_active = False
|
|
775
|
-
if self._checkpoint_thread:
|
|
776
|
-
self._checkpoint_thread.join(timeout=5.0)
|
|
777
|
-
|
|
778
|
-
logger.info("Long-duration manager stopped")
|
|
779
|
-
|
|
780
|
-
def __enter__(self):
|
|
781
|
-
"""Context manager entry."""
|
|
782
|
-
return self
|
|
783
|
-
|
|
784
|
-
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
785
|
-
"""Context manager exit."""
|
|
786
|
-
self.stop()
|
|
1
|
+
"""
|
|
2
|
+
Long-Duration Support for 30+ Hour Operations
|
|
3
|
+
|
|
4
|
+
Provides durability guarantees, failure recovery, and progress tracking for
|
|
5
|
+
long-running tasks.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import json
|
|
11
|
+
import logging
|
|
12
|
+
import shutil
|
|
13
|
+
import threading
|
|
14
|
+
import time
|
|
15
|
+
from dataclasses import asdict, dataclass, field
|
|
16
|
+
from datetime import UTC, datetime, timedelta
|
|
17
|
+
from enum import Enum
|
|
18
|
+
from pathlib import Path
|
|
19
|
+
from typing import Any
|
|
20
|
+
|
|
21
|
+
from .checkpoint_manager import CheckpointManager, TaskCheckpoint
|
|
22
|
+
from .hardware_profiler import HardwareProfile, HardwareProfiler
|
|
23
|
+
from .resource_aware_executor import ResourceAwareExecutor
|
|
24
|
+
from .session_manager import AgentSession, SessionManager
|
|
25
|
+
from .task_state import TaskState, TaskStateManager
|
|
26
|
+
|
|
27
|
+
logger = logging.getLogger(__name__)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class DurabilityLevel(Enum):
|
|
31
|
+
"""Durability guarantee level."""
|
|
32
|
+
|
|
33
|
+
BASIC = "basic" # Checkpoints every 10 minutes
|
|
34
|
+
STANDARD = "standard" # Checkpoints every 5 minutes
|
|
35
|
+
HIGH = "high" # Checkpoints every 2 minutes + artifact backup
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
@dataclass
|
|
39
|
+
class ProgressSnapshot:
|
|
40
|
+
"""Snapshot of task progress at a point in time."""
|
|
41
|
+
|
|
42
|
+
timestamp: datetime
|
|
43
|
+
progress: float # 0.0 to 1.0
|
|
44
|
+
current_step: str
|
|
45
|
+
steps_completed: int
|
|
46
|
+
total_steps: int
|
|
47
|
+
estimated_remaining_hours: float | None = None
|
|
48
|
+
metadata: dict[str, Any] = field(default_factory=dict)
|
|
49
|
+
|
|
50
|
+
def to_dict(self) -> dict[str, Any]:
|
|
51
|
+
"""Convert to dictionary."""
|
|
52
|
+
data = asdict(self)
|
|
53
|
+
data["timestamp"] = self.timestamp.isoformat()
|
|
54
|
+
return data
|
|
55
|
+
|
|
56
|
+
@classmethod
|
|
57
|
+
def from_dict(cls, data: dict[str, Any]) -> ProgressSnapshot:
|
|
58
|
+
"""Create from dictionary."""
|
|
59
|
+
data = data.copy()
|
|
60
|
+
data["timestamp"] = datetime.fromisoformat(data["timestamp"])
|
|
61
|
+
return cls(**data)
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
@dataclass
|
|
65
|
+
class FailureRecord:
|
|
66
|
+
"""Record of a failure event."""
|
|
67
|
+
|
|
68
|
+
timestamp: datetime
|
|
69
|
+
failure_type: str # "crash", "timeout", "resource_exhaustion", "error"
|
|
70
|
+
error_message: str
|
|
71
|
+
stack_trace: str | None = None
|
|
72
|
+
recovery_attempted: bool = False
|
|
73
|
+
recovery_successful: bool = False
|
|
74
|
+
checkpoint_available: bool = False
|
|
75
|
+
metadata: dict[str, Any] = field(default_factory=dict)
|
|
76
|
+
|
|
77
|
+
def to_dict(self) -> dict[str, Any]:
|
|
78
|
+
"""Convert to dictionary."""
|
|
79
|
+
data = asdict(self)
|
|
80
|
+
data["timestamp"] = self.timestamp.isoformat()
|
|
81
|
+
return data
|
|
82
|
+
|
|
83
|
+
@classmethod
|
|
84
|
+
def from_dict(cls, data: dict[str, Any]) -> FailureRecord:
|
|
85
|
+
"""Create from dictionary."""
|
|
86
|
+
data = data.copy()
|
|
87
|
+
data["timestamp"] = datetime.fromisoformat(data["timestamp"])
|
|
88
|
+
return cls(**data)
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
class ProgressTracker:
|
|
92
|
+
"""Tracks progress over long periods."""
|
|
93
|
+
|
|
94
|
+
def __init__(self, storage_dir: Path | None = None):
|
|
95
|
+
"""
|
|
96
|
+
Initialize progress tracker.
|
|
97
|
+
|
|
98
|
+
Args:
|
|
99
|
+
storage_dir: Directory to store progress snapshots
|
|
100
|
+
"""
|
|
101
|
+
self.storage_dir = (
|
|
102
|
+
Path(storage_dir) if storage_dir else Path(".tapps-agents/progress")
|
|
103
|
+
)
|
|
104
|
+
self.storage_dir.mkdir(parents=True, exist_ok=True)
|
|
105
|
+
|
|
106
|
+
self.snapshots: list[ProgressSnapshot] = []
|
|
107
|
+
self.max_snapshots = 1000 # Keep last 1000 snapshots
|
|
108
|
+
|
|
109
|
+
def record_progress(
|
|
110
|
+
self,
|
|
111
|
+
progress: float,
|
|
112
|
+
current_step: str,
|
|
113
|
+
steps_completed: int,
|
|
114
|
+
total_steps: int,
|
|
115
|
+
estimated_remaining_hours: float | None = None,
|
|
116
|
+
metadata: dict[str, Any] | None = None,
|
|
117
|
+
) -> ProgressSnapshot:
|
|
118
|
+
"""
|
|
119
|
+
Record a progress snapshot.
|
|
120
|
+
|
|
121
|
+
Args:
|
|
122
|
+
progress: Progress percentage (0.0 to 1.0)
|
|
123
|
+
current_step: Current step description
|
|
124
|
+
steps_completed: Number of steps completed
|
|
125
|
+
total_steps: Total number of steps
|
|
126
|
+
estimated_remaining_hours: Estimated hours remaining
|
|
127
|
+
metadata: Additional metadata
|
|
128
|
+
|
|
129
|
+
Returns:
|
|
130
|
+
ProgressSnapshot instance
|
|
131
|
+
"""
|
|
132
|
+
snapshot = ProgressSnapshot(
|
|
133
|
+
timestamp=datetime.now(UTC),
|
|
134
|
+
progress=progress,
|
|
135
|
+
current_step=current_step,
|
|
136
|
+
steps_completed=steps_completed,
|
|
137
|
+
total_steps=total_steps,
|
|
138
|
+
estimated_remaining_hours=estimated_remaining_hours,
|
|
139
|
+
metadata=metadata or {},
|
|
140
|
+
)
|
|
141
|
+
|
|
142
|
+
self.snapshots.append(snapshot)
|
|
143
|
+
if len(self.snapshots) > self.max_snapshots:
|
|
144
|
+
self.snapshots = self.snapshots[-self.max_snapshots :]
|
|
145
|
+
|
|
146
|
+
# Persist to disk
|
|
147
|
+
self._save_snapshot(snapshot)
|
|
148
|
+
|
|
149
|
+
return snapshot
|
|
150
|
+
|
|
151
|
+
def _save_snapshot(self, snapshot: ProgressSnapshot):
|
|
152
|
+
"""Save snapshot to disk."""
|
|
153
|
+
snapshot_file = (
|
|
154
|
+
self.storage_dir
|
|
155
|
+
/ f"snapshot_{snapshot.timestamp.strftime('%Y%m%d_%H%M%S')}.json"
|
|
156
|
+
)
|
|
157
|
+
with open(snapshot_file, "w") as f:
|
|
158
|
+
json.dump(snapshot.to_dict(), f, indent=2)
|
|
159
|
+
|
|
160
|
+
def get_latest_progress(self) -> ProgressSnapshot | None:
|
|
161
|
+
"""Get the latest progress snapshot."""
|
|
162
|
+
return self.snapshots[-1] if self.snapshots else None
|
|
163
|
+
|
|
164
|
+
def get_progress_history(
|
|
165
|
+
self, hours: float | None = None
|
|
166
|
+
) -> list[ProgressSnapshot]:
|
|
167
|
+
"""
|
|
168
|
+
Get progress history.
|
|
169
|
+
|
|
170
|
+
Args:
|
|
171
|
+
hours: Optional limit to last N hours
|
|
172
|
+
|
|
173
|
+
Returns:
|
|
174
|
+
List of progress snapshots
|
|
175
|
+
"""
|
|
176
|
+
if not hours:
|
|
177
|
+
return self.snapshots.copy()
|
|
178
|
+
|
|
179
|
+
cutoff = datetime.now(UTC) - timedelta(hours=hours)
|
|
180
|
+
return [s for s in self.snapshots if s.timestamp >= cutoff]
|
|
181
|
+
|
|
182
|
+
def calculate_velocity(self) -> float | None:
|
|
183
|
+
"""
|
|
184
|
+
Calculate progress velocity (progress per hour).
|
|
185
|
+
|
|
186
|
+
Returns:
|
|
187
|
+
Progress velocity or None if insufficient data
|
|
188
|
+
"""
|
|
189
|
+
if len(self.snapshots) < 2:
|
|
190
|
+
return None
|
|
191
|
+
|
|
192
|
+
first = self.snapshots[0]
|
|
193
|
+
last = self.snapshots[-1]
|
|
194
|
+
|
|
195
|
+
time_delta = (last.timestamp - first.timestamp).total_seconds() / 3600.0
|
|
196
|
+
if time_delta == 0:
|
|
197
|
+
return None
|
|
198
|
+
|
|
199
|
+
progress_delta = last.progress - first.progress
|
|
200
|
+
return progress_delta / time_delta
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
class DurabilityGuarantee:
|
|
204
|
+
"""Ensures task durability through frequent checkpoints and state persistence."""
|
|
205
|
+
|
|
206
|
+
def __init__(
|
|
207
|
+
self,
|
|
208
|
+
checkpoint_manager: CheckpointManager,
|
|
209
|
+
durability_level: DurabilityLevel = DurabilityLevel.STANDARD,
|
|
210
|
+
hardware_profile: HardwareProfile | None = None,
|
|
211
|
+
):
|
|
212
|
+
"""
|
|
213
|
+
Initialize durability guarantee.
|
|
214
|
+
|
|
215
|
+
Args:
|
|
216
|
+
checkpoint_manager: Checkpoint manager instance
|
|
217
|
+
durability_level: Durability level
|
|
218
|
+
hardware_profile: Hardware profile for optimization
|
|
219
|
+
"""
|
|
220
|
+
self.checkpoint_manager = checkpoint_manager
|
|
221
|
+
self.durability_level = durability_level
|
|
222
|
+
self.hardware_profile = hardware_profile or HardwareProfiler().detect_profile()
|
|
223
|
+
|
|
224
|
+
# Set checkpoint interval based on durability level and hardware
|
|
225
|
+
self.checkpoint_interval = self._get_checkpoint_interval()
|
|
226
|
+
|
|
227
|
+
self.last_checkpoint_time: datetime | None = None
|
|
228
|
+
self.checkpoint_count = 0
|
|
229
|
+
|
|
230
|
+
def _get_checkpoint_interval(self) -> float:
|
|
231
|
+
"""Checkpoint interval by durability level (workstation-like; hardware taxonomy removed)."""
|
|
232
|
+
base_intervals = {
|
|
233
|
+
DurabilityLevel.BASIC: 600.0, # 10 minutes
|
|
234
|
+
DurabilityLevel.STANDARD: 300.0, # 5 minutes
|
|
235
|
+
DurabilityLevel.HIGH: 120.0, # 2 minutes
|
|
236
|
+
}
|
|
237
|
+
return base_intervals[self.durability_level] * 1.2 # Workstation-like
|
|
238
|
+
|
|
239
|
+
def should_checkpoint(self) -> bool:
|
|
240
|
+
"""Check if a checkpoint should be created."""
|
|
241
|
+
if self.last_checkpoint_time is None:
|
|
242
|
+
return True
|
|
243
|
+
|
|
244
|
+
elapsed = (datetime.now(UTC) - self.last_checkpoint_time).total_seconds()
|
|
245
|
+
return elapsed >= self.checkpoint_interval
|
|
246
|
+
|
|
247
|
+
def create_checkpoint(
|
|
248
|
+
self,
|
|
249
|
+
task_id: str,
|
|
250
|
+
agent_id: str,
|
|
251
|
+
command: str,
|
|
252
|
+
state: str,
|
|
253
|
+
progress: float,
|
|
254
|
+
context: dict[str, Any] | None = None,
|
|
255
|
+
artifacts: list[str] | None = None,
|
|
256
|
+
) -> TaskCheckpoint:
|
|
257
|
+
"""
|
|
258
|
+
Create a checkpoint.
|
|
259
|
+
|
|
260
|
+
Args:
|
|
261
|
+
task_id: Task ID
|
|
262
|
+
agent_id: Agent ID
|
|
263
|
+
command: Command being executed
|
|
264
|
+
state: Current state (as string, will be converted to TaskState)
|
|
265
|
+
progress: Progress (0.0 to 1.0)
|
|
266
|
+
context: Agent context
|
|
267
|
+
artifacts: Generated artifacts
|
|
268
|
+
|
|
269
|
+
Returns:
|
|
270
|
+
Created checkpoint
|
|
271
|
+
"""
|
|
272
|
+
# Convert state string to TaskState and create TaskStateManager
|
|
273
|
+
try:
|
|
274
|
+
task_state = TaskState(state)
|
|
275
|
+
except ValueError:
|
|
276
|
+
# Default to RUNNING if state is not recognized
|
|
277
|
+
task_state = TaskState.RUNNING
|
|
278
|
+
|
|
279
|
+
state_manager = TaskStateManager(task_id=task_id, initial_state=task_state)
|
|
280
|
+
|
|
281
|
+
checkpoint = self.checkpoint_manager.create_checkpoint(
|
|
282
|
+
task_id=task_id,
|
|
283
|
+
agent_id=agent_id,
|
|
284
|
+
command=command,
|
|
285
|
+
state_manager=state_manager,
|
|
286
|
+
progress=progress,
|
|
287
|
+
context=context or {},
|
|
288
|
+
artifacts=artifacts or [],
|
|
289
|
+
)
|
|
290
|
+
|
|
291
|
+
self.last_checkpoint_time = datetime.now(UTC)
|
|
292
|
+
self.checkpoint_count += 1
|
|
293
|
+
|
|
294
|
+
logger.info(f"Created checkpoint {self.checkpoint_count} for task {task_id}")
|
|
295
|
+
|
|
296
|
+
return checkpoint
|
|
297
|
+
|
|
298
|
+
def backup_artifacts(self, artifacts: list[str], backup_dir: Path) -> list[Path]:
|
|
299
|
+
"""
|
|
300
|
+
Backup artifacts to a backup directory.
|
|
301
|
+
|
|
302
|
+
Args:
|
|
303
|
+
artifacts: List of artifact file paths
|
|
304
|
+
backup_dir: Backup directory
|
|
305
|
+
|
|
306
|
+
Returns:
|
|
307
|
+
List of backed up file paths
|
|
308
|
+
"""
|
|
309
|
+
if self.durability_level != DurabilityLevel.HIGH:
|
|
310
|
+
return [] # Only backup artifacts for HIGH durability
|
|
311
|
+
|
|
312
|
+
backup_dir = Path(backup_dir)
|
|
313
|
+
backup_dir.mkdir(parents=True, exist_ok=True)
|
|
314
|
+
|
|
315
|
+
backed_up = []
|
|
316
|
+
for artifact_path in artifacts:
|
|
317
|
+
artifact = Path(artifact_path)
|
|
318
|
+
if not artifact.exists():
|
|
319
|
+
continue
|
|
320
|
+
|
|
321
|
+
backup_path = backup_dir / artifact.name
|
|
322
|
+
try:
|
|
323
|
+
if artifact.is_file():
|
|
324
|
+
shutil.copy2(artifact, backup_path)
|
|
325
|
+
elif artifact.is_dir():
|
|
326
|
+
shutil.copytree(artifact, backup_path, dirs_exist_ok=True)
|
|
327
|
+
backed_up.append(backup_path)
|
|
328
|
+
except Exception as e:
|
|
329
|
+
logger.error(
|
|
330
|
+
f"Failed to backup artifact {artifact}: {e}", exc_info=True
|
|
331
|
+
)
|
|
332
|
+
|
|
333
|
+
return backed_up
|
|
334
|
+
|
|
335
|
+
|
|
336
|
+
class FailureRecovery:
|
|
337
|
+
"""Recovers from failures automatically."""
|
|
338
|
+
|
|
339
|
+
def __init__(
|
|
340
|
+
self,
|
|
341
|
+
checkpoint_manager: CheckpointManager,
|
|
342
|
+
session_manager: SessionManager | None = None,
|
|
343
|
+
):
|
|
344
|
+
"""
|
|
345
|
+
Initialize failure recovery.
|
|
346
|
+
|
|
347
|
+
Args:
|
|
348
|
+
checkpoint_manager: Checkpoint manager instance
|
|
349
|
+
session_manager: Optional session manager
|
|
350
|
+
"""
|
|
351
|
+
self.checkpoint_manager = checkpoint_manager
|
|
352
|
+
self.session_manager = session_manager
|
|
353
|
+
|
|
354
|
+
self.failure_history: list[FailureRecord] = []
|
|
355
|
+
self.max_failures = 100 # Keep last 100 failures
|
|
356
|
+
|
|
357
|
+
def record_failure(
|
|
358
|
+
self,
|
|
359
|
+
failure_type: str,
|
|
360
|
+
error_message: str,
|
|
361
|
+
stack_trace: str | None = None,
|
|
362
|
+
task_id: str | None = None,
|
|
363
|
+
metadata: dict[str, Any] | None = None,
|
|
364
|
+
) -> FailureRecord:
|
|
365
|
+
"""
|
|
366
|
+
Record a failure event.
|
|
367
|
+
|
|
368
|
+
Args:
|
|
369
|
+
failure_type: Type of failure
|
|
370
|
+
error_message: Error message
|
|
371
|
+
stack_trace: Optional stack trace
|
|
372
|
+
task_id: Optional task ID
|
|
373
|
+
metadata: Additional metadata
|
|
374
|
+
|
|
375
|
+
Returns:
|
|
376
|
+
FailureRecord instance
|
|
377
|
+
"""
|
|
378
|
+
# Check if checkpoint is available (prefer list_checkpoints for testability/mocks)
|
|
379
|
+
checkpoint_available = False
|
|
380
|
+
if task_id:
|
|
381
|
+
try:
|
|
382
|
+
listed = self.checkpoint_manager.list_checkpoints()
|
|
383
|
+
except (OSError, PermissionError) as e:
|
|
384
|
+
# File system errors when listing files
|
|
385
|
+
logger.debug(f"Failed to list files: {e}")
|
|
386
|
+
listed = []
|
|
387
|
+
|
|
388
|
+
# `list_checkpoints()` may return task_ids or TaskCheckpoint objects depending on implementation/mocks.
|
|
389
|
+
if listed:
|
|
390
|
+
first = listed[0]
|
|
391
|
+
if hasattr(first, "task_id"):
|
|
392
|
+
checkpoint_available = any(
|
|
393
|
+
getattr(cp, "task_id", None) == task_id for cp in listed
|
|
394
|
+
)
|
|
395
|
+
else:
|
|
396
|
+
checkpoint_available = task_id in listed
|
|
397
|
+
|
|
398
|
+
failure_metadata = metadata or {}
|
|
399
|
+
if task_id:
|
|
400
|
+
failure_metadata["task_id"] = task_id
|
|
401
|
+
|
|
402
|
+
failure = FailureRecord(
|
|
403
|
+
timestamp=datetime.now(UTC),
|
|
404
|
+
failure_type=failure_type,
|
|
405
|
+
error_message=error_message,
|
|
406
|
+
stack_trace=stack_trace,
|
|
407
|
+
checkpoint_available=checkpoint_available,
|
|
408
|
+
metadata=failure_metadata,
|
|
409
|
+
)
|
|
410
|
+
|
|
411
|
+
self.failure_history.append(failure)
|
|
412
|
+
if len(self.failure_history) > self.max_failures:
|
|
413
|
+
self.failure_history = self.failure_history[-self.max_failures :]
|
|
414
|
+
|
|
415
|
+
logger.warning(f"Recorded failure: {failure_type} - {error_message}")
|
|
416
|
+
|
|
417
|
+
return failure
|
|
418
|
+
|
|
419
|
+
def recover_from_checkpoint(
|
|
420
|
+
self, task_id: str, agent_id: str | None = None
|
|
421
|
+
) -> TaskCheckpoint | None:
|
|
422
|
+
"""
|
|
423
|
+
Recover from the latest checkpoint.
|
|
424
|
+
|
|
425
|
+
Args:
|
|
426
|
+
task_id: Task ID
|
|
427
|
+
agent_id: Optional agent ID
|
|
428
|
+
|
|
429
|
+
Returns:
|
|
430
|
+
Latest checkpoint or None if not available
|
|
431
|
+
"""
|
|
432
|
+
# Prefer list_checkpoints() so mocked managers can provide TaskCheckpoint objects directly.
|
|
433
|
+
try:
|
|
434
|
+
listed: list[Any] = self.checkpoint_manager.list_checkpoints()
|
|
435
|
+
except (OSError, PermissionError) as e:
|
|
436
|
+
# File system errors when listing files
|
|
437
|
+
logger.debug(f"Failed to list files: {e}")
|
|
438
|
+
listed = []
|
|
439
|
+
|
|
440
|
+
if not listed:
|
|
441
|
+
logger.warning(f"No checkpoints available for task {task_id}")
|
|
442
|
+
return None
|
|
443
|
+
|
|
444
|
+
candidates: list[TaskCheckpoint] = []
|
|
445
|
+
first = listed[0]
|
|
446
|
+
|
|
447
|
+
if hasattr(first, "task_id"):
|
|
448
|
+
# Mock/test path: list contains TaskCheckpoint objects
|
|
449
|
+
candidates = [
|
|
450
|
+
cp for cp in listed if getattr(cp, "task_id", None) == task_id
|
|
451
|
+
]
|
|
452
|
+
else:
|
|
453
|
+
# Storage path: list contains task_id strings
|
|
454
|
+
if task_id in listed:
|
|
455
|
+
cp = self.checkpoint_manager.load_checkpoint(task_id)
|
|
456
|
+
if cp:
|
|
457
|
+
candidates = [cp]
|
|
458
|
+
|
|
459
|
+
if agent_id:
|
|
460
|
+
candidates = [
|
|
461
|
+
cp for cp in candidates if getattr(cp, "agent_id", None) == agent_id
|
|
462
|
+
]
|
|
463
|
+
|
|
464
|
+
if not candidates:
|
|
465
|
+
logger.warning(f"No checkpoints available for task {task_id}")
|
|
466
|
+
return None
|
|
467
|
+
|
|
468
|
+
# Choose latest by checkpoint_time if present
|
|
469
|
+
checkpoint = max(
|
|
470
|
+
candidates,
|
|
471
|
+
key=lambda cp: getattr(
|
|
472
|
+
cp, "checkpoint_time", datetime.min.replace(tzinfo=UTC)
|
|
473
|
+
),
|
|
474
|
+
)
|
|
475
|
+
|
|
476
|
+
# Validate checkpoint
|
|
477
|
+
if not checkpoint.validate():
|
|
478
|
+
logger.error(f"Checkpoint validation failed for task {task_id}")
|
|
479
|
+
return None
|
|
480
|
+
|
|
481
|
+
logger.info(
|
|
482
|
+
f"Recovering task {task_id} from checkpoint at {checkpoint.checkpoint_time}"
|
|
483
|
+
)
|
|
484
|
+
|
|
485
|
+
# Update failure record if this is a recovery attempt
|
|
486
|
+
if self.failure_history:
|
|
487
|
+
last_failure = self.failure_history[-1]
|
|
488
|
+
last_failure.recovery_attempted = True
|
|
489
|
+
last_failure.recovery_successful = True
|
|
490
|
+
|
|
491
|
+
return checkpoint
|
|
492
|
+
|
|
493
|
+
def get_recovery_strategy(self, failure_type: str) -> str:
|
|
494
|
+
"""
|
|
495
|
+
Get recovery strategy for a failure type.
|
|
496
|
+
|
|
497
|
+
Args:
|
|
498
|
+
failure_type: Type of failure
|
|
499
|
+
|
|
500
|
+
Returns:
|
|
501
|
+
Recovery strategy description
|
|
502
|
+
"""
|
|
503
|
+
strategies = {
|
|
504
|
+
"crash": "Restore from latest checkpoint and resume",
|
|
505
|
+
"timeout": "Restore from latest checkpoint and retry with extended timeout",
|
|
506
|
+
"resource_exhaustion": "Restore from latest checkpoint and resume with resource limits",
|
|
507
|
+
"error": "Restore from latest checkpoint and retry with error handling",
|
|
508
|
+
}
|
|
509
|
+
|
|
510
|
+
return strategies.get(failure_type, "Restore from latest checkpoint and resume")
|
|
511
|
+
|
|
512
|
+
def get_failure_history(self, task_id: str | None = None) -> list[FailureRecord]:
|
|
513
|
+
"""
|
|
514
|
+
Get failure history.
|
|
515
|
+
|
|
516
|
+
Args:
|
|
517
|
+
task_id: Optional task ID to filter by
|
|
518
|
+
|
|
519
|
+
Returns:
|
|
520
|
+
List of failure records
|
|
521
|
+
"""
|
|
522
|
+
if not task_id:
|
|
523
|
+
return self.failure_history.copy()
|
|
524
|
+
|
|
525
|
+
# Filter by task_id if present in metadata
|
|
526
|
+
return [f for f in self.failure_history if f.metadata.get("task_id") == task_id]
|
|
527
|
+
|
|
528
|
+
|
|
529
|
+
class LongDurationManager:
|
|
530
|
+
"""Manages 30+ hour operations with durability guarantees."""
|
|
531
|
+
|
|
532
|
+
def __init__(
|
|
533
|
+
self,
|
|
534
|
+
session_manager: SessionManager,
|
|
535
|
+
checkpoint_manager: CheckpointManager,
|
|
536
|
+
resource_executor: ResourceAwareExecutor | None = None,
|
|
537
|
+
durability_level: DurabilityLevel = DurabilityLevel.STANDARD,
|
|
538
|
+
hardware_profile: HardwareProfile | None = None,
|
|
539
|
+
):
|
|
540
|
+
"""
|
|
541
|
+
Initialize long-duration manager.
|
|
542
|
+
|
|
543
|
+
Args:
|
|
544
|
+
session_manager: Session manager instance
|
|
545
|
+
checkpoint_manager: Checkpoint manager instance
|
|
546
|
+
resource_executor: Optional resource-aware executor
|
|
547
|
+
durability_level: Durability guarantee level
|
|
548
|
+
hardware_profile: Hardware profile
|
|
549
|
+
"""
|
|
550
|
+
self.session_manager = session_manager
|
|
551
|
+
self.checkpoint_manager = checkpoint_manager
|
|
552
|
+
self.resource_executor = resource_executor
|
|
553
|
+
|
|
554
|
+
self.durability = DurabilityGuarantee(
|
|
555
|
+
checkpoint_manager=checkpoint_manager,
|
|
556
|
+
durability_level=durability_level,
|
|
557
|
+
hardware_profile=hardware_profile,
|
|
558
|
+
)
|
|
559
|
+
|
|
560
|
+
self.failure_recovery = FailureRecovery(
|
|
561
|
+
checkpoint_manager=checkpoint_manager, session_manager=session_manager
|
|
562
|
+
)
|
|
563
|
+
|
|
564
|
+
self.progress_tracker = ProgressTracker()
|
|
565
|
+
|
|
566
|
+
self.active_tasks: dict[str, AgentSession] = {}
|
|
567
|
+
self._lock = threading.Lock()
|
|
568
|
+
|
|
569
|
+
# Background checkpoint thread
|
|
570
|
+
self._checkpoint_thread: threading.Thread | None = None
|
|
571
|
+
self._checkpoint_active = False
|
|
572
|
+
|
|
573
|
+
def start_long_duration_task(
|
|
574
|
+
self,
|
|
575
|
+
task_id: str,
|
|
576
|
+
agent_id: str,
|
|
577
|
+
command: str,
|
|
578
|
+
initial_context: dict[str, Any] | None = None,
|
|
579
|
+
) -> AgentSession:
|
|
580
|
+
"""
|
|
581
|
+
Start a long-duration task.
|
|
582
|
+
|
|
583
|
+
Args:
|
|
584
|
+
task_id: Task ID
|
|
585
|
+
agent_id: Agent ID
|
|
586
|
+
command: Command to execute
|
|
587
|
+
initial_context: Initial context
|
|
588
|
+
|
|
589
|
+
Returns:
|
|
590
|
+
Created session
|
|
591
|
+
"""
|
|
592
|
+
# Create session with metadata
|
|
593
|
+
metadata = {"task_id": task_id, "command": command, **(initial_context or {})}
|
|
594
|
+
session = self.session_manager.create_session(
|
|
595
|
+
agent_id=agent_id, metadata=metadata
|
|
596
|
+
)
|
|
597
|
+
|
|
598
|
+
with self._lock:
|
|
599
|
+
self.active_tasks[task_id] = session
|
|
600
|
+
|
|
601
|
+
# Create initial checkpoint
|
|
602
|
+
self.durability.create_checkpoint(
|
|
603
|
+
task_id=task_id,
|
|
604
|
+
agent_id=agent_id,
|
|
605
|
+
command=command,
|
|
606
|
+
state="running",
|
|
607
|
+
progress=0.0,
|
|
608
|
+
context=initial_context or {},
|
|
609
|
+
)
|
|
610
|
+
|
|
611
|
+
# Start background checkpointing
|
|
612
|
+
self._start_checkpoint_thread()
|
|
613
|
+
|
|
614
|
+
logger.info(
|
|
615
|
+
f"Started long-duration task {task_id} with session {session.session_id}"
|
|
616
|
+
)
|
|
617
|
+
|
|
618
|
+
return session
|
|
619
|
+
|
|
620
|
+
def update_progress(
|
|
621
|
+
self,
|
|
622
|
+
task_id: str,
|
|
623
|
+
progress: float,
|
|
624
|
+
current_step: str,
|
|
625
|
+
steps_completed: int,
|
|
626
|
+
total_steps: int,
|
|
627
|
+
estimated_remaining_hours: float | None = None,
|
|
628
|
+
metadata: dict[str, Any] | None = None,
|
|
629
|
+
):
|
|
630
|
+
"""
|
|
631
|
+
Update task progress.
|
|
632
|
+
|
|
633
|
+
Args:
|
|
634
|
+
task_id: Task ID
|
|
635
|
+
progress: Progress (0.0 to 1.0)
|
|
636
|
+
current_step: Current step description
|
|
637
|
+
steps_completed: Steps completed
|
|
638
|
+
total_steps: Total steps
|
|
639
|
+
estimated_remaining_hours: Estimated hours remaining
|
|
640
|
+
metadata: Additional metadata
|
|
641
|
+
"""
|
|
642
|
+
self.progress_tracker.record_progress(
|
|
643
|
+
progress=progress,
|
|
644
|
+
current_step=current_step,
|
|
645
|
+
steps_completed=steps_completed,
|
|
646
|
+
total_steps=total_steps,
|
|
647
|
+
estimated_remaining_hours=estimated_remaining_hours,
|
|
648
|
+
metadata=metadata or {},
|
|
649
|
+
)
|
|
650
|
+
|
|
651
|
+
# Create checkpoint if needed
|
|
652
|
+
if self.durability.should_checkpoint():
|
|
653
|
+
session = self.active_tasks.get(task_id)
|
|
654
|
+
if session:
|
|
655
|
+
self.durability.create_checkpoint(
|
|
656
|
+
task_id=task_id,
|
|
657
|
+
agent_id=session.agent_id,
|
|
658
|
+
command=session.metadata.get("command", ""),
|
|
659
|
+
state=session.state.value,
|
|
660
|
+
progress=progress,
|
|
661
|
+
context=session.metadata,
|
|
662
|
+
artifacts=session.metadata.get("artifacts", []),
|
|
663
|
+
)
|
|
664
|
+
|
|
665
|
+
# Backup artifacts if HIGH durability
|
|
666
|
+
if self.durability.durability_level == DurabilityLevel.HIGH:
|
|
667
|
+
artifacts = session.metadata.get("artifacts", [])
|
|
668
|
+
if artifacts:
|
|
669
|
+
backup_dir = Path(".tapps-agents/backups") / task_id
|
|
670
|
+
self.durability.backup_artifacts(artifacts, backup_dir)
|
|
671
|
+
|
|
672
|
+
def handle_failure(
|
|
673
|
+
self,
|
|
674
|
+
task_id: str,
|
|
675
|
+
failure_type: str,
|
|
676
|
+
error_message: str,
|
|
677
|
+
stack_trace: str | None = None,
|
|
678
|
+
) -> TaskCheckpoint | None:
|
|
679
|
+
"""
|
|
680
|
+
Handle a task failure.
|
|
681
|
+
|
|
682
|
+
Args:
|
|
683
|
+
task_id: Task ID
|
|
684
|
+
failure_type: Type of failure
|
|
685
|
+
error_message: Error message
|
|
686
|
+
stack_trace: Optional stack trace
|
|
687
|
+
|
|
688
|
+
Returns:
|
|
689
|
+
Recovery checkpoint or None
|
|
690
|
+
"""
|
|
691
|
+
# Record failure
|
|
692
|
+
self.failure_recovery.record_failure(
|
|
693
|
+
failure_type=failure_type,
|
|
694
|
+
error_message=error_message,
|
|
695
|
+
stack_trace=stack_trace,
|
|
696
|
+
task_id=task_id,
|
|
697
|
+
)
|
|
698
|
+
|
|
699
|
+
# Attempt recovery
|
|
700
|
+
session = self.active_tasks.get(task_id)
|
|
701
|
+
if not session:
|
|
702
|
+
logger.error(f"No active session found for task {task_id}")
|
|
703
|
+
return None
|
|
704
|
+
|
|
705
|
+
checkpoint = self.failure_recovery.recover_from_checkpoint(
|
|
706
|
+
task_id=task_id, agent_id=session.agent_id
|
|
707
|
+
)
|
|
708
|
+
|
|
709
|
+
if checkpoint:
|
|
710
|
+
logger.info(f"Recovery checkpoint available for task {task_id}")
|
|
711
|
+
# Update session with recovery checkpoint
|
|
712
|
+
self.session_manager.add_checkpoint(session.session_id, checkpoint)
|
|
713
|
+
else:
|
|
714
|
+
logger.warning(f"No recovery checkpoint available for task {task_id}")
|
|
715
|
+
|
|
716
|
+
return checkpoint
|
|
717
|
+
|
|
718
|
+
def get_progress(self, task_id: str) -> ProgressSnapshot | None:
|
|
719
|
+
"""Get latest progress for a task."""
|
|
720
|
+
return self.progress_tracker.get_latest_progress()
|
|
721
|
+
|
|
722
|
+
def get_progress_history(
|
|
723
|
+
self, task_id: str, hours: float | None = None
|
|
724
|
+
) -> list[ProgressSnapshot]:
|
|
725
|
+
"""Get progress history for a task."""
|
|
726
|
+
return self.progress_tracker.get_progress_history(hours=hours)
|
|
727
|
+
|
|
728
|
+
def _start_checkpoint_thread(self):
|
|
729
|
+
"""Start background checkpoint thread."""
|
|
730
|
+
if self._checkpoint_active:
|
|
731
|
+
return
|
|
732
|
+
|
|
733
|
+
self._checkpoint_active = True
|
|
734
|
+
self._checkpoint_thread = threading.Thread(
|
|
735
|
+
target=self._checkpoint_loop, daemon=True
|
|
736
|
+
)
|
|
737
|
+
self._checkpoint_thread.start()
|
|
738
|
+
|
|
739
|
+
def _checkpoint_loop(self):
|
|
740
|
+
"""Background checkpoint loop."""
|
|
741
|
+
while self._checkpoint_active:
|
|
742
|
+
try:
|
|
743
|
+
time.sleep(60) # Check every minute
|
|
744
|
+
|
|
745
|
+
with self._lock:
|
|
746
|
+
for task_id, session in list(self.active_tasks.items()):
|
|
747
|
+
if self.durability.should_checkpoint():
|
|
748
|
+
# Get latest progress
|
|
749
|
+
progress_snapshot = (
|
|
750
|
+
self.progress_tracker.get_latest_progress()
|
|
751
|
+
)
|
|
752
|
+
progress = (
|
|
753
|
+
progress_snapshot.progress if progress_snapshot else 0.0
|
|
754
|
+
)
|
|
755
|
+
|
|
756
|
+
checkpoint = self.durability.create_checkpoint(
|
|
757
|
+
task_id=task_id,
|
|
758
|
+
agent_id=session.agent_id,
|
|
759
|
+
command=session.metadata.get("command", ""),
|
|
760
|
+
state=session.state.value,
|
|
761
|
+
progress=progress,
|
|
762
|
+
context=session.metadata,
|
|
763
|
+
artifacts=session.metadata.get("artifacts", []),
|
|
764
|
+
)
|
|
765
|
+
|
|
766
|
+
self.session_manager.add_checkpoint(
|
|
767
|
+
session.session_id, checkpoint
|
|
768
|
+
)
|
|
769
|
+
except Exception as e:
|
|
770
|
+
logger.error(f"Error in checkpoint loop: {e}", exc_info=True)
|
|
771
|
+
|
|
772
|
+
def stop(self):
|
|
773
|
+
"""Stop the long-duration manager."""
|
|
774
|
+
self._checkpoint_active = False
|
|
775
|
+
if self._checkpoint_thread:
|
|
776
|
+
self._checkpoint_thread.join(timeout=5.0)
|
|
777
|
+
|
|
778
|
+
logger.info("Long-duration manager stopped")
|
|
779
|
+
|
|
780
|
+
def __enter__(self):
|
|
781
|
+
"""Context manager entry."""
|
|
782
|
+
return self
|
|
783
|
+
|
|
784
|
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
785
|
+
"""Context manager exit."""
|
|
786
|
+
self.stop()
|