tapps-agents 3.5.39__py3-none-any.whl → 3.5.40__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. tapps_agents/__init__.py +2 -2
  2. tapps_agents/agents/enhancer/agent.py +2728 -2728
  3. tapps_agents/agents/implementer/agent.py +35 -13
  4. tapps_agents/agents/reviewer/agent.py +43 -10
  5. tapps_agents/agents/reviewer/scoring.py +59 -68
  6. tapps_agents/agents/reviewer/tools/__init__.py +24 -0
  7. tapps_agents/agents/reviewer/tools/ruff_grouping.py +250 -0
  8. tapps_agents/agents/reviewer/tools/scoped_mypy.py +284 -0
  9. tapps_agents/beads/__init__.py +11 -0
  10. tapps_agents/beads/hydration.py +213 -0
  11. tapps_agents/beads/specs.py +206 -0
  12. tapps_agents/cli/commands/health.py +19 -3
  13. tapps_agents/cli/commands/simple_mode.py +842 -676
  14. tapps_agents/cli/commands/task.py +219 -0
  15. tapps_agents/cli/commands/top_level.py +13 -0
  16. tapps_agents/cli/main.py +658 -651
  17. tapps_agents/cli/parsers/top_level.py +1978 -1881
  18. tapps_agents/core/config.py +1622 -1622
  19. tapps_agents/core/init_project.py +3012 -2897
  20. tapps_agents/epic/markdown_sync.py +105 -0
  21. tapps_agents/epic/orchestrator.py +1 -2
  22. tapps_agents/epic/parser.py +427 -423
  23. tapps_agents/experts/adaptive_domain_detector.py +0 -2
  24. tapps_agents/experts/knowledge/api-design-integration/api-security-patterns.md +15 -15
  25. tapps_agents/experts/knowledge/api-design-integration/external-api-integration.md +19 -44
  26. tapps_agents/health/checks/outcomes.backup_20260204_064058.py +324 -0
  27. tapps_agents/health/checks/outcomes.backup_20260204_064256.py +324 -0
  28. tapps_agents/health/checks/outcomes.backup_20260204_064600.py +324 -0
  29. tapps_agents/health/checks/outcomes.py +134 -46
  30. tapps_agents/health/orchestrator.py +12 -4
  31. tapps_agents/hooks/__init__.py +33 -0
  32. tapps_agents/hooks/config.py +140 -0
  33. tapps_agents/hooks/events.py +135 -0
  34. tapps_agents/hooks/executor.py +128 -0
  35. tapps_agents/hooks/manager.py +143 -0
  36. tapps_agents/session/__init__.py +19 -0
  37. tapps_agents/session/manager.py +256 -0
  38. tapps_agents/simple_mode/code_snippet_handler.py +382 -0
  39. tapps_agents/simple_mode/intent_parser.py +29 -4
  40. tapps_agents/simple_mode/orchestrators/base.py +185 -59
  41. tapps_agents/simple_mode/orchestrators/build_orchestrator.py +2667 -2642
  42. tapps_agents/simple_mode/orchestrators/fix_orchestrator.py +2 -2
  43. tapps_agents/simple_mode/workflow_suggester.py +37 -3
  44. tapps_agents/workflow/agent_handlers/implementer_handler.py +18 -3
  45. tapps_agents/workflow/cursor_executor.py +2196 -2118
  46. tapps_agents/workflow/direct_execution_fallback.py +16 -3
  47. tapps_agents/workflow/message_formatter.py +2 -1
  48. tapps_agents/workflow/parallel_executor.py +43 -4
  49. tapps_agents/workflow/parser.py +375 -357
  50. tapps_agents/workflow/rules_generator.py +337 -337
  51. tapps_agents/workflow/skill_invoker.py +9 -3
  52. {tapps_agents-3.5.39.dist-info → tapps_agents-3.5.40.dist-info}/METADATA +5 -1
  53. {tapps_agents-3.5.39.dist-info → tapps_agents-3.5.40.dist-info}/RECORD +57 -53
  54. tapps_agents/agents/analyst/SKILL.md +0 -85
  55. tapps_agents/agents/architect/SKILL.md +0 -80
  56. tapps_agents/agents/debugger/SKILL.md +0 -66
  57. tapps_agents/agents/designer/SKILL.md +0 -78
  58. tapps_agents/agents/documenter/SKILL.md +0 -95
  59. tapps_agents/agents/enhancer/SKILL.md +0 -189
  60. tapps_agents/agents/implementer/SKILL.md +0 -117
  61. tapps_agents/agents/improver/SKILL.md +0 -55
  62. tapps_agents/agents/ops/SKILL.md +0 -64
  63. tapps_agents/agents/orchestrator/SKILL.md +0 -238
  64. tapps_agents/agents/planner/story_template.md +0 -37
  65. tapps_agents/agents/reviewer/templates/quality-dashboard.html.j2 +0 -150
  66. tapps_agents/agents/tester/SKILL.md +0 -71
  67. {tapps_agents-3.5.39.dist-info → tapps_agents-3.5.40.dist-info}/WHEEL +0 -0
  68. {tapps_agents-3.5.39.dist-info → tapps_agents-3.5.40.dist-info}/entry_points.txt +0 -0
  69. {tapps_agents-3.5.39.dist-info → tapps_agents-3.5.40.dist-info}/licenses/LICENSE +0 -0
  70. {tapps_agents-3.5.39.dist-info → tapps_agents-3.5.40.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,324 @@
1
+ """
2
+ Outcome Health Check.
3
+
4
+ Checks quality trends and improvement metrics.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import json
10
+ from datetime import datetime, timedelta
11
+ from pathlib import Path
12
+
13
+ from ...workflow.analytics_dashboard_cursor import CursorAnalyticsAccessor
14
+ from ...workflow.review_artifact import ReviewArtifact
15
+ from ..base import HealthCheck, HealthCheckResult
16
+
17
+
18
+ class OutcomeHealthCheck(HealthCheck):
19
+ """Health check for quality trends and outcomes."""
20
+
21
+ def __init__(self, project_root: Path | None = None, reports_dir: Path | None = None):
22
+ """
23
+ Initialize outcome health check.
24
+
25
+ Args:
26
+ project_root: Project root directory
27
+ reports_dir: Reports directory (defaults to .tapps-agents/reports)
28
+ """
29
+ super().__init__(name="outcomes", dependencies=["environment", "execution"])
30
+ self.project_root = project_root or Path.cwd()
31
+ self.reports_dir = reports_dir or (self.project_root / ".tapps-agents" / "reports")
32
+ self.accessor = CursorAnalyticsAccessor()
33
+
34
+ def _compute_outcomes_from_execution_metrics(self, days: int = 30) -> dict:
35
+ """
36
+ Compute outcomes from execution metrics when review artifacts don't exist.
37
+
38
+ Args:
39
+ days: Number of days to look back for metrics
40
+
41
+ Returns:
42
+ Dictionary with review_executions_count, success_rate, and gate_pass_rate
43
+ """
44
+ try:
45
+ from datetime import UTC
46
+ from ...workflow.execution_metrics import ExecutionMetricsCollector
47
+ import logging
48
+
49
+ collector = ExecutionMetricsCollector(project_root=self.project_root)
50
+
51
+ # Get metrics with reasonable limit (5000 max for ~30 days of heavy usage)
52
+ MAX_METRICS_TO_SCAN = 5000
53
+ all_metrics = collector.get_metrics(limit=MAX_METRICS_TO_SCAN)
54
+
55
+ # Log warning if we hit the limit
56
+ if len(all_metrics) >= MAX_METRICS_TO_SCAN:
57
+ logging.getLogger(__name__).warning(
58
+ "Hit metrics scan limit (%d); results may be incomplete",
59
+ MAX_METRICS_TO_SCAN
60
+ )
61
+
62
+ # Filter for review executions within the last N days (timezone-aware)
63
+ cutoff_date = datetime.now(UTC) - timedelta(days=days)
64
+ review_metrics = []
65
+ for m in all_metrics:
66
+ # Parse timestamp and ensure timezone-aware comparison
67
+ try:
68
+ ts = datetime.fromisoformat(m.started_at.replace("Z", "+00:00"))
69
+ # Convert naive datetime to UTC if needed
70
+ if ts.tzinfo is None:
71
+ from datetime import UTC
72
+ ts = ts.replace(tzinfo=UTC)
73
+
74
+ if ts >= cutoff_date:
75
+ if m.command == "review" or (m.skill and "reviewer" in (m.skill or "").lower()):
76
+ review_metrics.append(m)
77
+ except (ValueError, AttributeError):
78
+ # Skip metrics with invalid timestamps
79
+ continue
80
+
81
+ if not review_metrics:
82
+ return {
83
+ "review_executions_count": 0,
84
+ "success_rate": 0.0,
85
+ "gate_pass_rate": None,
86
+ }
87
+
88
+ total = len(review_metrics)
89
+ success_count = sum(1 for m in review_metrics if m.status == "success")
90
+ success_rate = (success_count / total * 100) if total > 0 else 0.0
91
+
92
+ # Calculate gate pass rate (only for metrics that have gate_pass field)
93
+ gate_pass_metrics = [m for m in review_metrics if m.gate_pass is not None]
94
+ if gate_pass_metrics:
95
+ gate_pass_count = sum(1 for m in gate_pass_metrics if m.gate_pass is True)
96
+ gate_pass_rate = (gate_pass_count / len(gate_pass_metrics) * 100)
97
+ else:
98
+ gate_pass_rate = None
99
+
100
+ return {
101
+ "review_executions_count": total,
102
+ "success_rate": success_rate,
103
+ "gate_pass_rate": gate_pass_rate,
104
+ }
105
+
106
+ except Exception as e:
107
+ # If fallback fails, log and return empty result
108
+ import logging
109
+ logging.getLogger(__name__).debug(
110
+ "Failed to compute outcomes from execution metrics: %s", e
111
+ )
112
+ return {
113
+ "review_executions_count": 0,
114
+ "success_rate": 0.0,
115
+ "gate_pass_rate": None,
116
+ }
117
+
118
+ def run(self) -> HealthCheckResult:
119
+ """
120
+ Run outcome health check.
121
+
122
+ Returns:
123
+ HealthCheckResult with outcome trends
124
+ """
125
+ try:
126
+ # Get analytics data for trends
127
+ dashboard_data = self.accessor.get_dashboard_data()
128
+ agents_data = dashboard_data.get("agents", [])
129
+ workflows_data = dashboard_data.get("workflows", [])
130
+
131
+ # Look for review artifacts in reports directory
132
+ review_artifacts = []
133
+ if self.reports_dir.exists():
134
+ for artifact_file in self.reports_dir.rglob("review_*.json"):
135
+ try:
136
+ with open(artifact_file, encoding="utf-8") as f:
137
+ data = json.load(f)
138
+ artifact = ReviewArtifact.from_dict(data)
139
+ if artifact.overall_score is not None:
140
+ review_artifacts.append(artifact)
141
+ except Exception:
142
+ continue
143
+
144
+ # Calculate trends from review artifacts
145
+ score_trend = "unknown"
146
+ avg_score = 0.0
147
+ score_change = 0.0
148
+
149
+ if review_artifacts:
150
+ # Sort by timestamp
151
+ review_artifacts.sort(key=lambda a: a.timestamp)
152
+
153
+ # Get recent artifacts (last 30 days)
154
+ thirty_days_ago = datetime.now() - timedelta(days=30)
155
+ recent_artifacts = [
156
+ a
157
+ for a in review_artifacts
158
+ if datetime.fromisoformat(a.timestamp.replace("Z", "+00:00")) >= thirty_days_ago
159
+ ]
160
+
161
+ if recent_artifacts:
162
+ scores = [a.overall_score for a in recent_artifacts if a.overall_score is not None]
163
+ if scores:
164
+ avg_score = sum(scores) / len(scores)
165
+
166
+ # Calculate trend (compare first half to second half)
167
+ if len(scores) >= 4:
168
+ first_half = scores[: len(scores) // 2]
169
+ second_half = scores[len(scores) // 2 :]
170
+ first_avg = sum(first_half) / len(first_half)
171
+ second_avg = sum(second_half) / len(second_half)
172
+ score_change = second_avg - first_avg
173
+
174
+ if score_change > 5.0:
175
+ score_trend = "improving"
176
+ elif score_change < -5.0:
177
+ score_trend = "degrading"
178
+ else:
179
+ score_trend = "stable"
180
+
181
+ # Count quality improvement workflows
182
+ quality_workflows = [
183
+ w
184
+ for w in workflows_data
185
+ if "quality" in w.get("workflow_name", "").lower()
186
+ or "improve" in w.get("workflow_name", "").lower()
187
+ ]
188
+ improvement_cycles = len(quality_workflows)
189
+
190
+ # Calculate health score
191
+ score = 100.0
192
+ issues = []
193
+ remediation = []
194
+
195
+ # Check if we have any data; if not, try fallback to execution metrics (review steps)
196
+ if not review_artifacts and not agents_data:
197
+ # Fallback: derive outcomes from execution metrics (review steps, gate_pass)
198
+ import logging
199
+ fallback_data = self._compute_outcomes_from_execution_metrics(days=30)
200
+
201
+ if fallback_data["review_executions_count"] > 0:
202
+ total = fallback_data["review_executions_count"]
203
+ success_rate = fallback_data["success_rate"]
204
+ gate_pass_rate = fallback_data["gate_pass_rate"]
205
+
206
+ # Calculate score: 60 base + 10 if success_rate ≥80% + 5 if gate_pass_rate ≥70%
207
+ fallback_score = 60.0
208
+ if success_rate >= 80.0:
209
+ fallback_score += 10.0
210
+ if gate_pass_rate is not None and gate_pass_rate >= 70.0:
211
+ fallback_score += 5.0
212
+
213
+ # Build message
214
+ gate_msg = f"{gate_pass_rate:.0f}% passed gate" if gate_pass_rate is not None else "no gate data"
215
+ message = (
216
+ f"Outcomes derived from execution metrics: {total} review steps, "
217
+ f"{gate_msg}"
218
+ )
219
+
220
+ logging.getLogger(__name__).info(
221
+ "Outcomes fallback activated: %d review executions processed", total
222
+ )
223
+
224
+ return HealthCheckResult(
225
+ name=self.name,
226
+ status="degraded",
227
+ score=fallback_score,
228
+ message=message,
229
+ details={
230
+ "average_score": 0.0,
231
+ "score_trend": "unknown",
232
+ "score_change": 0.0,
233
+ "review_artifacts_count": 0,
234
+ "improvement_cycles": 0,
235
+ "reports_dir": str(self.reports_dir),
236
+ "fallback_used": True,
237
+ "fallback_source": "execution_metrics",
238
+ "review_executions_count": total,
239
+ "success_rate": success_rate,
240
+ "gate_pass_rate": gate_pass_rate,
241
+ "issues": [],
242
+ },
243
+ remediation=[
244
+ "Run reviewer agent or quality workflows to generate review artifacts"
245
+ ],
246
+ )
247
+
248
+ score = 50.0
249
+ issues.append("No quality metrics available")
250
+ remediation.append("Run reviewer agent or quality workflows to generate metrics")
251
+ else:
252
+ # Check score trend
253
+ if score_trend == "degrading":
254
+ score -= 20.0
255
+ issues.append(f"Quality scores declining: {score_change:.1f} point change")
256
+ remediation.append("Investigate recent code changes causing quality decline")
257
+ elif score_trend == "improving":
258
+ # Bonus for improvement
259
+ score = min(100.0, score + 5.0)
260
+
261
+ # Check average score
262
+ if avg_score > 0:
263
+ if avg_score < 60.0:
264
+ score -= 30.0
265
+ issues.append(f"Low average quality score: {avg_score:.1f}/100")
266
+ remediation.append("Run quality improvement workflows")
267
+ elif avg_score < 75.0:
268
+ score -= 15.0
269
+ issues.append(f"Moderate quality score: {avg_score:.1f}/100")
270
+
271
+ # Check improvement activity
272
+ if improvement_cycles == 0:
273
+ score -= 10.0
274
+ issues.append("No quality improvement workflows run")
275
+ remediation.append("Run quality workflows to improve code quality")
276
+
277
+ # Determine status
278
+ if score >= 85.0:
279
+ status = "healthy"
280
+ elif score >= 70.0:
281
+ status = "degraded"
282
+ else:
283
+ status = "unhealthy"
284
+
285
+ # Build message
286
+ message_parts = []
287
+ if avg_score > 0:
288
+ message_parts.append(f"Avg score: {avg_score:.1f}")
289
+ if score_trend != "unknown":
290
+ message_parts.append(f"Trend: {score_trend}")
291
+ if improvement_cycles > 0:
292
+ message_parts.append(f"Improvements: {improvement_cycles}")
293
+ if not message_parts:
294
+ message = "No outcome data available"
295
+ else:
296
+ message = " | ".join(message_parts)
297
+
298
+ return HealthCheckResult(
299
+ name=self.name,
300
+ status=status,
301
+ score=max(0.0, score),
302
+ message=message,
303
+ details={
304
+ "average_score": avg_score,
305
+ "score_trend": score_trend,
306
+ "score_change": score_change,
307
+ "review_artifacts_count": len(review_artifacts),
308
+ "improvement_cycles": improvement_cycles,
309
+ "reports_dir": str(self.reports_dir),
310
+ "issues": issues,
311
+ },
312
+ remediation=remediation if remediation else None,
313
+ )
314
+
315
+ except Exception as e:
316
+ return HealthCheckResult(
317
+ name=self.name,
318
+ status="unhealthy",
319
+ score=0.0,
320
+ message=f"Outcome check failed: {e}",
321
+ details={"error": str(e), "reports_dir": str(self.reports_dir)},
322
+ remediation=["Check reports directory and analytics access"],
323
+ )
324
+
@@ -0,0 +1,324 @@
1
+ """
2
+ Outcome Health Check.
3
+
4
+ Checks quality trends and improvement metrics.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import json
10
+ from datetime import datetime, timedelta
11
+ from pathlib import Path
12
+
13
+ from ...workflow.analytics_dashboard_cursor import CursorAnalyticsAccessor
14
+ from ...workflow.review_artifact import ReviewArtifact
15
+ from ..base import HealthCheck, HealthCheckResult
16
+
17
+
18
+ class OutcomeHealthCheck(HealthCheck):
19
+ """Health check for quality trends and outcomes."""
20
+
21
+ def __init__(self, project_root: Path | None = None, reports_dir: Path | None = None):
22
+ """
23
+ Initialize outcome health check.
24
+
25
+ Args:
26
+ project_root: Project root directory
27
+ reports_dir: Reports directory (defaults to .tapps-agents/reports)
28
+ """
29
+ super().__init__(name="outcomes", dependencies=["environment", "execution"])
30
+ self.project_root = project_root or Path.cwd()
31
+ self.reports_dir = reports_dir or (self.project_root / ".tapps-agents" / "reports")
32
+ self.accessor = CursorAnalyticsAccessor()
33
+
34
+ def _compute_outcomes_from_execution_metrics(self, days: int = 30) -> dict:
35
+ """
36
+ Compute outcomes from execution metrics when review artifacts don't exist.
37
+
38
+ Args:
39
+ days: Number of days to look back for metrics
40
+
41
+ Returns:
42
+ Dictionary with review_executions_count, success_rate, and gate_pass_rate
43
+ """
44
+ try:
45
+ from datetime import UTC
46
+ from ...workflow.execution_metrics import ExecutionMetricsCollector
47
+ import logging
48
+
49
+ collector = ExecutionMetricsCollector(project_root=self.project_root)
50
+
51
+ # Get metrics with reasonable limit (5000 max for ~30 days of heavy usage)
52
+ MAX_METRICS_TO_SCAN = 5000
53
+ all_metrics = collector.get_metrics(limit=MAX_METRICS_TO_SCAN)
54
+
55
+ # Log warning if we hit the limit
56
+ if len(all_metrics) >= MAX_METRICS_TO_SCAN:
57
+ logging.getLogger(__name__).warning(
58
+ "Hit metrics scan limit (%d); results may be incomplete",
59
+ MAX_METRICS_TO_SCAN
60
+ )
61
+
62
+ # Filter for review executions within the last N days (timezone-aware)
63
+ cutoff_date = datetime.now(UTC) - timedelta(days=days)
64
+ review_metrics = []
65
+ for m in all_metrics:
66
+ # Parse timestamp and ensure timezone-aware comparison
67
+ try:
68
+ ts = datetime.fromisoformat(m.started_at.replace("Z", "+00:00"))
69
+ # Convert naive datetime to UTC if needed
70
+ if ts.tzinfo is None:
71
+ from datetime import UTC
72
+ ts = ts.replace(tzinfo=UTC)
73
+
74
+ if ts >= cutoff_date:
75
+ if m.command == "review" or (m.skill and "reviewer" in (m.skill or "").lower()):
76
+ review_metrics.append(m)
77
+ except (ValueError, AttributeError):
78
+ # Skip metrics with invalid timestamps
79
+ continue
80
+
81
+ if not review_metrics:
82
+ return {
83
+ "review_executions_count": 0,
84
+ "success_rate": 0.0,
85
+ "gate_pass_rate": None,
86
+ }
87
+
88
+ total = len(review_metrics)
89
+ success_count = sum(1 for m in review_metrics if m.status == "success")
90
+ success_rate = (success_count / total * 100) if total > 0 else 0.0
91
+
92
+ # Calculate gate pass rate (only for metrics that have gate_pass field)
93
+ gate_pass_metrics = [m for m in review_metrics if m.gate_pass is not None]
94
+ if gate_pass_metrics:
95
+ gate_pass_count = sum(1 for m in gate_pass_metrics if m.gate_pass is True)
96
+ gate_pass_rate = (gate_pass_count / len(gate_pass_metrics) * 100)
97
+ else:
98
+ gate_pass_rate = None
99
+
100
+ return {
101
+ "review_executions_count": total,
102
+ "success_rate": success_rate,
103
+ "gate_pass_rate": gate_pass_rate,
104
+ }
105
+
106
+ except Exception as e:
107
+ # If fallback fails, log and return empty result
108
+ import logging
109
+ logging.getLogger(__name__).debug(
110
+ "Failed to compute outcomes from execution metrics: %s", e
111
+ )
112
+ return {
113
+ "review_executions_count": 0,
114
+ "success_rate": 0.0,
115
+ "gate_pass_rate": None,
116
+ }
117
+
118
+ def run(self) -> HealthCheckResult:
119
+ """
120
+ Run outcome health check.
121
+
122
+ Returns:
123
+ HealthCheckResult with outcome trends
124
+ """
125
+ try:
126
+ # Get analytics data for trends
127
+ dashboard_data = self.accessor.get_dashboard_data()
128
+ agents_data = dashboard_data.get("agents", [])
129
+ workflows_data = dashboard_data.get("workflows", [])
130
+
131
+ # Look for review artifacts in reports directory
132
+ review_artifacts = []
133
+ if self.reports_dir.exists():
134
+ for artifact_file in self.reports_dir.rglob("review_*.json"):
135
+ try:
136
+ with open(artifact_file, encoding="utf-8") as f:
137
+ data = json.load(f)
138
+ artifact = ReviewArtifact.from_dict(data)
139
+ if artifact.overall_score is not None:
140
+ review_artifacts.append(artifact)
141
+ except Exception:
142
+ continue
143
+
144
+ # Calculate trends from review artifacts
145
+ score_trend = "unknown"
146
+ avg_score = 0.0
147
+ score_change = 0.0
148
+
149
+ if review_artifacts:
150
+ # Sort by timestamp
151
+ review_artifacts.sort(key=lambda a: a.timestamp)
152
+
153
+ # Get recent artifacts (last 30 days)
154
+ thirty_days_ago = datetime.now() - timedelta(days=30)
155
+ recent_artifacts = [
156
+ a
157
+ for a in review_artifacts
158
+ if datetime.fromisoformat(a.timestamp.replace("Z", "+00:00")) >= thirty_days_ago
159
+ ]
160
+
161
+ if recent_artifacts:
162
+ scores = [a.overall_score for a in recent_artifacts if a.overall_score is not None]
163
+ if scores:
164
+ avg_score = sum(scores) / len(scores)
165
+
166
+ # Calculate trend (compare first half to second half)
167
+ if len(scores) >= 4:
168
+ first_half = scores[: len(scores) // 2]
169
+ second_half = scores[len(scores) // 2 :]
170
+ first_avg = sum(first_half) / len(first_half)
171
+ second_avg = sum(second_half) / len(second_half)
172
+ score_change = second_avg - first_avg
173
+
174
+ if score_change > 5.0:
175
+ score_trend = "improving"
176
+ elif score_change < -5.0:
177
+ score_trend = "degrading"
178
+ else:
179
+ score_trend = "stable"
180
+
181
+ # Count quality improvement workflows
182
+ quality_workflows = [
183
+ w
184
+ for w in workflows_data
185
+ if "quality" in w.get("workflow_name", "").lower()
186
+ or "improve" in w.get("workflow_name", "").lower()
187
+ ]
188
+ improvement_cycles = len(quality_workflows)
189
+
190
+ # Calculate health score
191
+ score = 100.0
192
+ issues = []
193
+ remediation = []
194
+
195
+ # Check if we have any data; if not, try fallback to execution metrics (review steps)
196
+ if not review_artifacts and not agents_data:
197
+ # Fallback: derive outcomes from execution metrics (review steps, gate_pass)
198
+ import logging
199
+ fallback_data = self._compute_outcomes_from_execution_metrics(days=30)
200
+
201
+ if fallback_data["review_executions_count"] > 0:
202
+ total = fallback_data["review_executions_count"]
203
+ success_rate = fallback_data["success_rate"]
204
+ gate_pass_rate = fallback_data["gate_pass_rate"]
205
+
206
+ # Calculate score: 60 base + 10 if success_rate ≥80% + 5 if gate_pass_rate ≥70%
207
+ fallback_score = 60.0
208
+ if success_rate >= 80.0:
209
+ fallback_score += 10.0
210
+ if gate_pass_rate is not None and gate_pass_rate >= 70.0:
211
+ fallback_score += 5.0
212
+
213
+ # Build message
214
+ gate_msg = f"{gate_pass_rate:.0f}% passed gate" if gate_pass_rate is not None else "no gate data"
215
+ message = (
216
+ f"Outcomes derived from execution metrics: {total} review steps, "
217
+ f"{gate_msg}"
218
+ )
219
+
220
+ logging.getLogger(__name__).info(
221
+ "Outcomes fallback activated: %d review executions processed", total
222
+ )
223
+
224
+ return HealthCheckResult(
225
+ name=self.name,
226
+ status="degraded",
227
+ score=fallback_score,
228
+ message=message,
229
+ details={
230
+ "average_score": 0.0,
231
+ "score_trend": "unknown",
232
+ "score_change": 0.0,
233
+ "review_artifacts_count": 0,
234
+ "improvement_cycles": 0,
235
+ "reports_dir": str(self.reports_dir),
236
+ "fallback_used": True,
237
+ "fallback_source": "execution_metrics",
238
+ "review_executions_count": total,
239
+ "success_rate": success_rate,
240
+ "gate_pass_rate": gate_pass_rate,
241
+ "issues": [],
242
+ },
243
+ remediation=[
244
+ "Run reviewer agent or quality workflows to generate review artifacts"
245
+ ],
246
+ )
247
+
248
+ score = 50.0
249
+ issues.append("No quality metrics available")
250
+ remediation.append("Run reviewer agent or quality workflows to generate metrics")
251
+ else:
252
+ # Check score trend
253
+ if score_trend == "degrading":
254
+ score -= 20.0
255
+ issues.append(f"Quality scores declining: {score_change:.1f} point change")
256
+ remediation.append("Investigate recent code changes causing quality decline")
257
+ elif score_trend == "improving":
258
+ # Bonus for improvement
259
+ score = min(100.0, score + 5.0)
260
+
261
+ # Check average score
262
+ if avg_score > 0:
263
+ if avg_score < 60.0:
264
+ score -= 30.0
265
+ issues.append(f"Low average quality score: {avg_score:.1f}/100")
266
+ remediation.append("Run quality improvement workflows")
267
+ elif avg_score < 75.0:
268
+ score -= 15.0
269
+ issues.append(f"Moderate quality score: {avg_score:.1f}/100")
270
+
271
+ # Check improvement activity
272
+ if improvement_cycles == 0:
273
+ score -= 10.0
274
+ issues.append("No quality improvement workflows run")
275
+ remediation.append("Run quality workflows to improve code quality")
276
+
277
+ # Determine status
278
+ if score >= 85.0:
279
+ status = "healthy"
280
+ elif score >= 70.0:
281
+ status = "degraded"
282
+ else:
283
+ status = "unhealthy"
284
+
285
+ # Build message
286
+ message_parts = []
287
+ if avg_score > 0:
288
+ message_parts.append(f"Avg score: {avg_score:.1f}")
289
+ if score_trend != "unknown":
290
+ message_parts.append(f"Trend: {score_trend}")
291
+ if improvement_cycles > 0:
292
+ message_parts.append(f"Improvements: {improvement_cycles}")
293
+ if not message_parts:
294
+ message = "No outcome data available"
295
+ else:
296
+ message = " | ".join(message_parts)
297
+
298
+ return HealthCheckResult(
299
+ name=self.name,
300
+ status=status,
301
+ score=max(0.0, score),
302
+ message=message,
303
+ details={
304
+ "average_score": avg_score,
305
+ "score_trend": score_trend,
306
+ "score_change": score_change,
307
+ "review_artifacts_count": len(review_artifacts),
308
+ "improvement_cycles": improvement_cycles,
309
+ "reports_dir": str(self.reports_dir),
310
+ "issues": issues,
311
+ },
312
+ remediation=remediation if remediation else None,
313
+ )
314
+
315
+ except Exception as e:
316
+ return HealthCheckResult(
317
+ name=self.name,
318
+ status="unhealthy",
319
+ score=0.0,
320
+ message=f"Outcome check failed: {e}",
321
+ details={"error": str(e), "reports_dir": str(self.reports_dir)},
322
+ remediation=["Check reports directory and analytics access"],
323
+ )
324
+