tapps-agents 3.5.39__py3-none-any.whl → 3.5.40__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tapps_agents/__init__.py +2 -2
- tapps_agents/agents/enhancer/agent.py +2728 -2728
- tapps_agents/agents/implementer/agent.py +35 -13
- tapps_agents/agents/reviewer/agent.py +43 -10
- tapps_agents/agents/reviewer/scoring.py +59 -68
- tapps_agents/agents/reviewer/tools/__init__.py +24 -0
- tapps_agents/agents/reviewer/tools/ruff_grouping.py +250 -0
- tapps_agents/agents/reviewer/tools/scoped_mypy.py +284 -0
- tapps_agents/beads/__init__.py +11 -0
- tapps_agents/beads/hydration.py +213 -0
- tapps_agents/beads/specs.py +206 -0
- tapps_agents/cli/commands/health.py +19 -3
- tapps_agents/cli/commands/simple_mode.py +842 -676
- tapps_agents/cli/commands/task.py +219 -0
- tapps_agents/cli/commands/top_level.py +13 -0
- tapps_agents/cli/main.py +658 -651
- tapps_agents/cli/parsers/top_level.py +1978 -1881
- tapps_agents/core/config.py +1622 -1622
- tapps_agents/core/init_project.py +3012 -2897
- tapps_agents/epic/markdown_sync.py +105 -0
- tapps_agents/epic/orchestrator.py +1 -2
- tapps_agents/epic/parser.py +427 -423
- tapps_agents/experts/adaptive_domain_detector.py +0 -2
- tapps_agents/experts/knowledge/api-design-integration/api-security-patterns.md +15 -15
- tapps_agents/experts/knowledge/api-design-integration/external-api-integration.md +19 -44
- tapps_agents/health/checks/outcomes.backup_20260204_064058.py +324 -0
- tapps_agents/health/checks/outcomes.backup_20260204_064256.py +324 -0
- tapps_agents/health/checks/outcomes.backup_20260204_064600.py +324 -0
- tapps_agents/health/checks/outcomes.py +134 -46
- tapps_agents/health/orchestrator.py +12 -4
- tapps_agents/hooks/__init__.py +33 -0
- tapps_agents/hooks/config.py +140 -0
- tapps_agents/hooks/events.py +135 -0
- tapps_agents/hooks/executor.py +128 -0
- tapps_agents/hooks/manager.py +143 -0
- tapps_agents/session/__init__.py +19 -0
- tapps_agents/session/manager.py +256 -0
- tapps_agents/simple_mode/code_snippet_handler.py +382 -0
- tapps_agents/simple_mode/intent_parser.py +29 -4
- tapps_agents/simple_mode/orchestrators/base.py +185 -59
- tapps_agents/simple_mode/orchestrators/build_orchestrator.py +2667 -2642
- tapps_agents/simple_mode/orchestrators/fix_orchestrator.py +2 -2
- tapps_agents/simple_mode/workflow_suggester.py +37 -3
- tapps_agents/workflow/agent_handlers/implementer_handler.py +18 -3
- tapps_agents/workflow/cursor_executor.py +2196 -2118
- tapps_agents/workflow/direct_execution_fallback.py +16 -3
- tapps_agents/workflow/message_formatter.py +2 -1
- tapps_agents/workflow/parallel_executor.py +43 -4
- tapps_agents/workflow/parser.py +375 -357
- tapps_agents/workflow/rules_generator.py +337 -337
- tapps_agents/workflow/skill_invoker.py +9 -3
- {tapps_agents-3.5.39.dist-info → tapps_agents-3.5.40.dist-info}/METADATA +5 -1
- {tapps_agents-3.5.39.dist-info → tapps_agents-3.5.40.dist-info}/RECORD +57 -53
- tapps_agents/agents/analyst/SKILL.md +0 -85
- tapps_agents/agents/architect/SKILL.md +0 -80
- tapps_agents/agents/debugger/SKILL.md +0 -66
- tapps_agents/agents/designer/SKILL.md +0 -78
- tapps_agents/agents/documenter/SKILL.md +0 -95
- tapps_agents/agents/enhancer/SKILL.md +0 -189
- tapps_agents/agents/implementer/SKILL.md +0 -117
- tapps_agents/agents/improver/SKILL.md +0 -55
- tapps_agents/agents/ops/SKILL.md +0 -64
- tapps_agents/agents/orchestrator/SKILL.md +0 -238
- tapps_agents/agents/planner/story_template.md +0 -37
- tapps_agents/agents/reviewer/templates/quality-dashboard.html.j2 +0 -150
- tapps_agents/agents/tester/SKILL.md +0 -71
- {tapps_agents-3.5.39.dist-info → tapps_agents-3.5.40.dist-info}/WHEEL +0 -0
- {tapps_agents-3.5.39.dist-info → tapps_agents-3.5.40.dist-info}/entry_points.txt +0 -0
- {tapps_agents-3.5.39.dist-info → tapps_agents-3.5.40.dist-info}/licenses/LICENSE +0 -0
- {tapps_agents-3.5.39.dist-info → tapps_agents-3.5.40.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,324 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Outcome Health Check.
|
|
3
|
+
|
|
4
|
+
Checks quality trends and improvement metrics.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import json
|
|
10
|
+
from datetime import datetime, timedelta
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
|
|
13
|
+
from ...workflow.analytics_dashboard_cursor import CursorAnalyticsAccessor
|
|
14
|
+
from ...workflow.review_artifact import ReviewArtifact
|
|
15
|
+
from ..base import HealthCheck, HealthCheckResult
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class OutcomeHealthCheck(HealthCheck):
|
|
19
|
+
"""Health check for quality trends and outcomes."""
|
|
20
|
+
|
|
21
|
+
def __init__(self, project_root: Path | None = None, reports_dir: Path | None = None):
|
|
22
|
+
"""
|
|
23
|
+
Initialize outcome health check.
|
|
24
|
+
|
|
25
|
+
Args:
|
|
26
|
+
project_root: Project root directory
|
|
27
|
+
reports_dir: Reports directory (defaults to .tapps-agents/reports)
|
|
28
|
+
"""
|
|
29
|
+
super().__init__(name="outcomes", dependencies=["environment", "execution"])
|
|
30
|
+
self.project_root = project_root or Path.cwd()
|
|
31
|
+
self.reports_dir = reports_dir or (self.project_root / ".tapps-agents" / "reports")
|
|
32
|
+
self.accessor = CursorAnalyticsAccessor()
|
|
33
|
+
|
|
34
|
+
def _compute_outcomes_from_execution_metrics(self, days: int = 30) -> dict:
|
|
35
|
+
"""
|
|
36
|
+
Compute outcomes from execution metrics when review artifacts don't exist.
|
|
37
|
+
|
|
38
|
+
Args:
|
|
39
|
+
days: Number of days to look back for metrics
|
|
40
|
+
|
|
41
|
+
Returns:
|
|
42
|
+
Dictionary with review_executions_count, success_rate, and gate_pass_rate
|
|
43
|
+
"""
|
|
44
|
+
try:
|
|
45
|
+
from datetime import UTC
|
|
46
|
+
from ...workflow.execution_metrics import ExecutionMetricsCollector
|
|
47
|
+
import logging
|
|
48
|
+
|
|
49
|
+
collector = ExecutionMetricsCollector(project_root=self.project_root)
|
|
50
|
+
|
|
51
|
+
# Get metrics with reasonable limit (5000 max for ~30 days of heavy usage)
|
|
52
|
+
MAX_METRICS_TO_SCAN = 5000
|
|
53
|
+
all_metrics = collector.get_metrics(limit=MAX_METRICS_TO_SCAN)
|
|
54
|
+
|
|
55
|
+
# Log warning if we hit the limit
|
|
56
|
+
if len(all_metrics) >= MAX_METRICS_TO_SCAN:
|
|
57
|
+
logging.getLogger(__name__).warning(
|
|
58
|
+
"Hit metrics scan limit (%d); results may be incomplete",
|
|
59
|
+
MAX_METRICS_TO_SCAN
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
# Filter for review executions within the last N days (timezone-aware)
|
|
63
|
+
cutoff_date = datetime.now(UTC) - timedelta(days=days)
|
|
64
|
+
review_metrics = []
|
|
65
|
+
for m in all_metrics:
|
|
66
|
+
# Parse timestamp and ensure timezone-aware comparison
|
|
67
|
+
try:
|
|
68
|
+
ts = datetime.fromisoformat(m.started_at.replace("Z", "+00:00"))
|
|
69
|
+
# Convert naive datetime to UTC if needed
|
|
70
|
+
if ts.tzinfo is None:
|
|
71
|
+
from datetime import UTC
|
|
72
|
+
ts = ts.replace(tzinfo=UTC)
|
|
73
|
+
|
|
74
|
+
if ts >= cutoff_date:
|
|
75
|
+
if m.command == "review" or (m.skill and "reviewer" in (m.skill or "").lower()):
|
|
76
|
+
review_metrics.append(m)
|
|
77
|
+
except (ValueError, AttributeError):
|
|
78
|
+
# Skip metrics with invalid timestamps
|
|
79
|
+
continue
|
|
80
|
+
|
|
81
|
+
if not review_metrics:
|
|
82
|
+
return {
|
|
83
|
+
"review_executions_count": 0,
|
|
84
|
+
"success_rate": 0.0,
|
|
85
|
+
"gate_pass_rate": None,
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
total = len(review_metrics)
|
|
89
|
+
success_count = sum(1 for m in review_metrics if m.status == "success")
|
|
90
|
+
success_rate = (success_count / total * 100) if total > 0 else 0.0
|
|
91
|
+
|
|
92
|
+
# Calculate gate pass rate (only for metrics that have gate_pass field)
|
|
93
|
+
gate_pass_metrics = [m for m in review_metrics if m.gate_pass is not None]
|
|
94
|
+
if gate_pass_metrics:
|
|
95
|
+
gate_pass_count = sum(1 for m in gate_pass_metrics if m.gate_pass is True)
|
|
96
|
+
gate_pass_rate = (gate_pass_count / len(gate_pass_metrics) * 100)
|
|
97
|
+
else:
|
|
98
|
+
gate_pass_rate = None
|
|
99
|
+
|
|
100
|
+
return {
|
|
101
|
+
"review_executions_count": total,
|
|
102
|
+
"success_rate": success_rate,
|
|
103
|
+
"gate_pass_rate": gate_pass_rate,
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
except Exception as e:
|
|
107
|
+
# If fallback fails, log and return empty result
|
|
108
|
+
import logging
|
|
109
|
+
logging.getLogger(__name__).debug(
|
|
110
|
+
"Failed to compute outcomes from execution metrics: %s", e
|
|
111
|
+
)
|
|
112
|
+
return {
|
|
113
|
+
"review_executions_count": 0,
|
|
114
|
+
"success_rate": 0.0,
|
|
115
|
+
"gate_pass_rate": None,
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
def run(self) -> HealthCheckResult:
|
|
119
|
+
"""
|
|
120
|
+
Run outcome health check.
|
|
121
|
+
|
|
122
|
+
Returns:
|
|
123
|
+
HealthCheckResult with outcome trends
|
|
124
|
+
"""
|
|
125
|
+
try:
|
|
126
|
+
# Get analytics data for trends
|
|
127
|
+
dashboard_data = self.accessor.get_dashboard_data()
|
|
128
|
+
agents_data = dashboard_data.get("agents", [])
|
|
129
|
+
workflows_data = dashboard_data.get("workflows", [])
|
|
130
|
+
|
|
131
|
+
# Look for review artifacts in reports directory
|
|
132
|
+
review_artifacts = []
|
|
133
|
+
if self.reports_dir.exists():
|
|
134
|
+
for artifact_file in self.reports_dir.rglob("review_*.json"):
|
|
135
|
+
try:
|
|
136
|
+
with open(artifact_file, encoding="utf-8") as f:
|
|
137
|
+
data = json.load(f)
|
|
138
|
+
artifact = ReviewArtifact.from_dict(data)
|
|
139
|
+
if artifact.overall_score is not None:
|
|
140
|
+
review_artifacts.append(artifact)
|
|
141
|
+
except Exception:
|
|
142
|
+
continue
|
|
143
|
+
|
|
144
|
+
# Calculate trends from review artifacts
|
|
145
|
+
score_trend = "unknown"
|
|
146
|
+
avg_score = 0.0
|
|
147
|
+
score_change = 0.0
|
|
148
|
+
|
|
149
|
+
if review_artifacts:
|
|
150
|
+
# Sort by timestamp
|
|
151
|
+
review_artifacts.sort(key=lambda a: a.timestamp)
|
|
152
|
+
|
|
153
|
+
# Get recent artifacts (last 30 days)
|
|
154
|
+
thirty_days_ago = datetime.now() - timedelta(days=30)
|
|
155
|
+
recent_artifacts = [
|
|
156
|
+
a
|
|
157
|
+
for a in review_artifacts
|
|
158
|
+
if datetime.fromisoformat(a.timestamp.replace("Z", "+00:00")) >= thirty_days_ago
|
|
159
|
+
]
|
|
160
|
+
|
|
161
|
+
if recent_artifacts:
|
|
162
|
+
scores = [a.overall_score for a in recent_artifacts if a.overall_score is not None]
|
|
163
|
+
if scores:
|
|
164
|
+
avg_score = sum(scores) / len(scores)
|
|
165
|
+
|
|
166
|
+
# Calculate trend (compare first half to second half)
|
|
167
|
+
if len(scores) >= 4:
|
|
168
|
+
first_half = scores[: len(scores) // 2]
|
|
169
|
+
second_half = scores[len(scores) // 2 :]
|
|
170
|
+
first_avg = sum(first_half) / len(first_half)
|
|
171
|
+
second_avg = sum(second_half) / len(second_half)
|
|
172
|
+
score_change = second_avg - first_avg
|
|
173
|
+
|
|
174
|
+
if score_change > 5.0:
|
|
175
|
+
score_trend = "improving"
|
|
176
|
+
elif score_change < -5.0:
|
|
177
|
+
score_trend = "degrading"
|
|
178
|
+
else:
|
|
179
|
+
score_trend = "stable"
|
|
180
|
+
|
|
181
|
+
# Count quality improvement workflows
|
|
182
|
+
quality_workflows = [
|
|
183
|
+
w
|
|
184
|
+
for w in workflows_data
|
|
185
|
+
if "quality" in w.get("workflow_name", "").lower()
|
|
186
|
+
or "improve" in w.get("workflow_name", "").lower()
|
|
187
|
+
]
|
|
188
|
+
improvement_cycles = len(quality_workflows)
|
|
189
|
+
|
|
190
|
+
# Calculate health score
|
|
191
|
+
score = 100.0
|
|
192
|
+
issues = []
|
|
193
|
+
remediation = []
|
|
194
|
+
|
|
195
|
+
# Check if we have any data; if not, try fallback to execution metrics (review steps)
|
|
196
|
+
if not review_artifacts and not agents_data:
|
|
197
|
+
# Fallback: derive outcomes from execution metrics (review steps, gate_pass)
|
|
198
|
+
import logging
|
|
199
|
+
fallback_data = self._compute_outcomes_from_execution_metrics(days=30)
|
|
200
|
+
|
|
201
|
+
if fallback_data["review_executions_count"] > 0:
|
|
202
|
+
total = fallback_data["review_executions_count"]
|
|
203
|
+
success_rate = fallback_data["success_rate"]
|
|
204
|
+
gate_pass_rate = fallback_data["gate_pass_rate"]
|
|
205
|
+
|
|
206
|
+
# Calculate score: 60 base + 10 if success_rate ≥80% + 5 if gate_pass_rate ≥70%
|
|
207
|
+
fallback_score = 60.0
|
|
208
|
+
if success_rate >= 80.0:
|
|
209
|
+
fallback_score += 10.0
|
|
210
|
+
if gate_pass_rate is not None and gate_pass_rate >= 70.0:
|
|
211
|
+
fallback_score += 5.0
|
|
212
|
+
|
|
213
|
+
# Build message
|
|
214
|
+
gate_msg = f"{gate_pass_rate:.0f}% passed gate" if gate_pass_rate is not None else "no gate data"
|
|
215
|
+
message = (
|
|
216
|
+
f"Outcomes derived from execution metrics: {total} review steps, "
|
|
217
|
+
f"{gate_msg}"
|
|
218
|
+
)
|
|
219
|
+
|
|
220
|
+
logging.getLogger(__name__).info(
|
|
221
|
+
"Outcomes fallback activated: %d review executions processed", total
|
|
222
|
+
)
|
|
223
|
+
|
|
224
|
+
return HealthCheckResult(
|
|
225
|
+
name=self.name,
|
|
226
|
+
status="degraded",
|
|
227
|
+
score=fallback_score,
|
|
228
|
+
message=message,
|
|
229
|
+
details={
|
|
230
|
+
"average_score": 0.0,
|
|
231
|
+
"score_trend": "unknown",
|
|
232
|
+
"score_change": 0.0,
|
|
233
|
+
"review_artifacts_count": 0,
|
|
234
|
+
"improvement_cycles": 0,
|
|
235
|
+
"reports_dir": str(self.reports_dir),
|
|
236
|
+
"fallback_used": True,
|
|
237
|
+
"fallback_source": "execution_metrics",
|
|
238
|
+
"review_executions_count": total,
|
|
239
|
+
"success_rate": success_rate,
|
|
240
|
+
"gate_pass_rate": gate_pass_rate,
|
|
241
|
+
"issues": [],
|
|
242
|
+
},
|
|
243
|
+
remediation=[
|
|
244
|
+
"Run reviewer agent or quality workflows to generate review artifacts"
|
|
245
|
+
],
|
|
246
|
+
)
|
|
247
|
+
|
|
248
|
+
score = 50.0
|
|
249
|
+
issues.append("No quality metrics available")
|
|
250
|
+
remediation.append("Run reviewer agent or quality workflows to generate metrics")
|
|
251
|
+
else:
|
|
252
|
+
# Check score trend
|
|
253
|
+
if score_trend == "degrading":
|
|
254
|
+
score -= 20.0
|
|
255
|
+
issues.append(f"Quality scores declining: {score_change:.1f} point change")
|
|
256
|
+
remediation.append("Investigate recent code changes causing quality decline")
|
|
257
|
+
elif score_trend == "improving":
|
|
258
|
+
# Bonus for improvement
|
|
259
|
+
score = min(100.0, score + 5.0)
|
|
260
|
+
|
|
261
|
+
# Check average score
|
|
262
|
+
if avg_score > 0:
|
|
263
|
+
if avg_score < 60.0:
|
|
264
|
+
score -= 30.0
|
|
265
|
+
issues.append(f"Low average quality score: {avg_score:.1f}/100")
|
|
266
|
+
remediation.append("Run quality improvement workflows")
|
|
267
|
+
elif avg_score < 75.0:
|
|
268
|
+
score -= 15.0
|
|
269
|
+
issues.append(f"Moderate quality score: {avg_score:.1f}/100")
|
|
270
|
+
|
|
271
|
+
# Check improvement activity
|
|
272
|
+
if improvement_cycles == 0:
|
|
273
|
+
score -= 10.0
|
|
274
|
+
issues.append("No quality improvement workflows run")
|
|
275
|
+
remediation.append("Run quality workflows to improve code quality")
|
|
276
|
+
|
|
277
|
+
# Determine status
|
|
278
|
+
if score >= 85.0:
|
|
279
|
+
status = "healthy"
|
|
280
|
+
elif score >= 70.0:
|
|
281
|
+
status = "degraded"
|
|
282
|
+
else:
|
|
283
|
+
status = "unhealthy"
|
|
284
|
+
|
|
285
|
+
# Build message
|
|
286
|
+
message_parts = []
|
|
287
|
+
if avg_score > 0:
|
|
288
|
+
message_parts.append(f"Avg score: {avg_score:.1f}")
|
|
289
|
+
if score_trend != "unknown":
|
|
290
|
+
message_parts.append(f"Trend: {score_trend}")
|
|
291
|
+
if improvement_cycles > 0:
|
|
292
|
+
message_parts.append(f"Improvements: {improvement_cycles}")
|
|
293
|
+
if not message_parts:
|
|
294
|
+
message = "No outcome data available"
|
|
295
|
+
else:
|
|
296
|
+
message = " | ".join(message_parts)
|
|
297
|
+
|
|
298
|
+
return HealthCheckResult(
|
|
299
|
+
name=self.name,
|
|
300
|
+
status=status,
|
|
301
|
+
score=max(0.0, score),
|
|
302
|
+
message=message,
|
|
303
|
+
details={
|
|
304
|
+
"average_score": avg_score,
|
|
305
|
+
"score_trend": score_trend,
|
|
306
|
+
"score_change": score_change,
|
|
307
|
+
"review_artifacts_count": len(review_artifacts),
|
|
308
|
+
"improvement_cycles": improvement_cycles,
|
|
309
|
+
"reports_dir": str(self.reports_dir),
|
|
310
|
+
"issues": issues,
|
|
311
|
+
},
|
|
312
|
+
remediation=remediation if remediation else None,
|
|
313
|
+
)
|
|
314
|
+
|
|
315
|
+
except Exception as e:
|
|
316
|
+
return HealthCheckResult(
|
|
317
|
+
name=self.name,
|
|
318
|
+
status="unhealthy",
|
|
319
|
+
score=0.0,
|
|
320
|
+
message=f"Outcome check failed: {e}",
|
|
321
|
+
details={"error": str(e), "reports_dir": str(self.reports_dir)},
|
|
322
|
+
remediation=["Check reports directory and analytics access"],
|
|
323
|
+
)
|
|
324
|
+
|
|
@@ -0,0 +1,324 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Outcome Health Check.
|
|
3
|
+
|
|
4
|
+
Checks quality trends and improvement metrics.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import json
|
|
10
|
+
from datetime import datetime, timedelta
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
|
|
13
|
+
from ...workflow.analytics_dashboard_cursor import CursorAnalyticsAccessor
|
|
14
|
+
from ...workflow.review_artifact import ReviewArtifact
|
|
15
|
+
from ..base import HealthCheck, HealthCheckResult
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class OutcomeHealthCheck(HealthCheck):
|
|
19
|
+
"""Health check for quality trends and outcomes."""
|
|
20
|
+
|
|
21
|
+
def __init__(self, project_root: Path | None = None, reports_dir: Path | None = None):
|
|
22
|
+
"""
|
|
23
|
+
Initialize outcome health check.
|
|
24
|
+
|
|
25
|
+
Args:
|
|
26
|
+
project_root: Project root directory
|
|
27
|
+
reports_dir: Reports directory (defaults to .tapps-agents/reports)
|
|
28
|
+
"""
|
|
29
|
+
super().__init__(name="outcomes", dependencies=["environment", "execution"])
|
|
30
|
+
self.project_root = project_root or Path.cwd()
|
|
31
|
+
self.reports_dir = reports_dir or (self.project_root / ".tapps-agents" / "reports")
|
|
32
|
+
self.accessor = CursorAnalyticsAccessor()
|
|
33
|
+
|
|
34
|
+
def _compute_outcomes_from_execution_metrics(self, days: int = 30) -> dict:
|
|
35
|
+
"""
|
|
36
|
+
Compute outcomes from execution metrics when review artifacts don't exist.
|
|
37
|
+
|
|
38
|
+
Args:
|
|
39
|
+
days: Number of days to look back for metrics
|
|
40
|
+
|
|
41
|
+
Returns:
|
|
42
|
+
Dictionary with review_executions_count, success_rate, and gate_pass_rate
|
|
43
|
+
"""
|
|
44
|
+
try:
|
|
45
|
+
from datetime import UTC
|
|
46
|
+
from ...workflow.execution_metrics import ExecutionMetricsCollector
|
|
47
|
+
import logging
|
|
48
|
+
|
|
49
|
+
collector = ExecutionMetricsCollector(project_root=self.project_root)
|
|
50
|
+
|
|
51
|
+
# Get metrics with reasonable limit (5000 max for ~30 days of heavy usage)
|
|
52
|
+
MAX_METRICS_TO_SCAN = 5000
|
|
53
|
+
all_metrics = collector.get_metrics(limit=MAX_METRICS_TO_SCAN)
|
|
54
|
+
|
|
55
|
+
# Log warning if we hit the limit
|
|
56
|
+
if len(all_metrics) >= MAX_METRICS_TO_SCAN:
|
|
57
|
+
logging.getLogger(__name__).warning(
|
|
58
|
+
"Hit metrics scan limit (%d); results may be incomplete",
|
|
59
|
+
MAX_METRICS_TO_SCAN
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
# Filter for review executions within the last N days (timezone-aware)
|
|
63
|
+
cutoff_date = datetime.now(UTC) - timedelta(days=days)
|
|
64
|
+
review_metrics = []
|
|
65
|
+
for m in all_metrics:
|
|
66
|
+
# Parse timestamp and ensure timezone-aware comparison
|
|
67
|
+
try:
|
|
68
|
+
ts = datetime.fromisoformat(m.started_at.replace("Z", "+00:00"))
|
|
69
|
+
# Convert naive datetime to UTC if needed
|
|
70
|
+
if ts.tzinfo is None:
|
|
71
|
+
from datetime import UTC
|
|
72
|
+
ts = ts.replace(tzinfo=UTC)
|
|
73
|
+
|
|
74
|
+
if ts >= cutoff_date:
|
|
75
|
+
if m.command == "review" or (m.skill and "reviewer" in (m.skill or "").lower()):
|
|
76
|
+
review_metrics.append(m)
|
|
77
|
+
except (ValueError, AttributeError):
|
|
78
|
+
# Skip metrics with invalid timestamps
|
|
79
|
+
continue
|
|
80
|
+
|
|
81
|
+
if not review_metrics:
|
|
82
|
+
return {
|
|
83
|
+
"review_executions_count": 0,
|
|
84
|
+
"success_rate": 0.0,
|
|
85
|
+
"gate_pass_rate": None,
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
total = len(review_metrics)
|
|
89
|
+
success_count = sum(1 for m in review_metrics if m.status == "success")
|
|
90
|
+
success_rate = (success_count / total * 100) if total > 0 else 0.0
|
|
91
|
+
|
|
92
|
+
# Calculate gate pass rate (only for metrics that have gate_pass field)
|
|
93
|
+
gate_pass_metrics = [m for m in review_metrics if m.gate_pass is not None]
|
|
94
|
+
if gate_pass_metrics:
|
|
95
|
+
gate_pass_count = sum(1 for m in gate_pass_metrics if m.gate_pass is True)
|
|
96
|
+
gate_pass_rate = (gate_pass_count / len(gate_pass_metrics) * 100)
|
|
97
|
+
else:
|
|
98
|
+
gate_pass_rate = None
|
|
99
|
+
|
|
100
|
+
return {
|
|
101
|
+
"review_executions_count": total,
|
|
102
|
+
"success_rate": success_rate,
|
|
103
|
+
"gate_pass_rate": gate_pass_rate,
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
except Exception as e:
|
|
107
|
+
# If fallback fails, log and return empty result
|
|
108
|
+
import logging
|
|
109
|
+
logging.getLogger(__name__).debug(
|
|
110
|
+
"Failed to compute outcomes from execution metrics: %s", e
|
|
111
|
+
)
|
|
112
|
+
return {
|
|
113
|
+
"review_executions_count": 0,
|
|
114
|
+
"success_rate": 0.0,
|
|
115
|
+
"gate_pass_rate": None,
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
def run(self) -> HealthCheckResult:
|
|
119
|
+
"""
|
|
120
|
+
Run outcome health check.
|
|
121
|
+
|
|
122
|
+
Returns:
|
|
123
|
+
HealthCheckResult with outcome trends
|
|
124
|
+
"""
|
|
125
|
+
try:
|
|
126
|
+
# Get analytics data for trends
|
|
127
|
+
dashboard_data = self.accessor.get_dashboard_data()
|
|
128
|
+
agents_data = dashboard_data.get("agents", [])
|
|
129
|
+
workflows_data = dashboard_data.get("workflows", [])
|
|
130
|
+
|
|
131
|
+
# Look for review artifacts in reports directory
|
|
132
|
+
review_artifacts = []
|
|
133
|
+
if self.reports_dir.exists():
|
|
134
|
+
for artifact_file in self.reports_dir.rglob("review_*.json"):
|
|
135
|
+
try:
|
|
136
|
+
with open(artifact_file, encoding="utf-8") as f:
|
|
137
|
+
data = json.load(f)
|
|
138
|
+
artifact = ReviewArtifact.from_dict(data)
|
|
139
|
+
if artifact.overall_score is not None:
|
|
140
|
+
review_artifacts.append(artifact)
|
|
141
|
+
except Exception:
|
|
142
|
+
continue
|
|
143
|
+
|
|
144
|
+
# Calculate trends from review artifacts
|
|
145
|
+
score_trend = "unknown"
|
|
146
|
+
avg_score = 0.0
|
|
147
|
+
score_change = 0.0
|
|
148
|
+
|
|
149
|
+
if review_artifacts:
|
|
150
|
+
# Sort by timestamp
|
|
151
|
+
review_artifacts.sort(key=lambda a: a.timestamp)
|
|
152
|
+
|
|
153
|
+
# Get recent artifacts (last 30 days)
|
|
154
|
+
thirty_days_ago = datetime.now() - timedelta(days=30)
|
|
155
|
+
recent_artifacts = [
|
|
156
|
+
a
|
|
157
|
+
for a in review_artifacts
|
|
158
|
+
if datetime.fromisoformat(a.timestamp.replace("Z", "+00:00")) >= thirty_days_ago
|
|
159
|
+
]
|
|
160
|
+
|
|
161
|
+
if recent_artifacts:
|
|
162
|
+
scores = [a.overall_score for a in recent_artifacts if a.overall_score is not None]
|
|
163
|
+
if scores:
|
|
164
|
+
avg_score = sum(scores) / len(scores)
|
|
165
|
+
|
|
166
|
+
# Calculate trend (compare first half to second half)
|
|
167
|
+
if len(scores) >= 4:
|
|
168
|
+
first_half = scores[: len(scores) // 2]
|
|
169
|
+
second_half = scores[len(scores) // 2 :]
|
|
170
|
+
first_avg = sum(first_half) / len(first_half)
|
|
171
|
+
second_avg = sum(second_half) / len(second_half)
|
|
172
|
+
score_change = second_avg - first_avg
|
|
173
|
+
|
|
174
|
+
if score_change > 5.0:
|
|
175
|
+
score_trend = "improving"
|
|
176
|
+
elif score_change < -5.0:
|
|
177
|
+
score_trend = "degrading"
|
|
178
|
+
else:
|
|
179
|
+
score_trend = "stable"
|
|
180
|
+
|
|
181
|
+
# Count quality improvement workflows
|
|
182
|
+
quality_workflows = [
|
|
183
|
+
w
|
|
184
|
+
for w in workflows_data
|
|
185
|
+
if "quality" in w.get("workflow_name", "").lower()
|
|
186
|
+
or "improve" in w.get("workflow_name", "").lower()
|
|
187
|
+
]
|
|
188
|
+
improvement_cycles = len(quality_workflows)
|
|
189
|
+
|
|
190
|
+
# Calculate health score
|
|
191
|
+
score = 100.0
|
|
192
|
+
issues = []
|
|
193
|
+
remediation = []
|
|
194
|
+
|
|
195
|
+
# Check if we have any data; if not, try fallback to execution metrics (review steps)
|
|
196
|
+
if not review_artifacts and not agents_data:
|
|
197
|
+
# Fallback: derive outcomes from execution metrics (review steps, gate_pass)
|
|
198
|
+
import logging
|
|
199
|
+
fallback_data = self._compute_outcomes_from_execution_metrics(days=30)
|
|
200
|
+
|
|
201
|
+
if fallback_data["review_executions_count"] > 0:
|
|
202
|
+
total = fallback_data["review_executions_count"]
|
|
203
|
+
success_rate = fallback_data["success_rate"]
|
|
204
|
+
gate_pass_rate = fallback_data["gate_pass_rate"]
|
|
205
|
+
|
|
206
|
+
# Calculate score: 60 base + 10 if success_rate ≥80% + 5 if gate_pass_rate ≥70%
|
|
207
|
+
fallback_score = 60.0
|
|
208
|
+
if success_rate >= 80.0:
|
|
209
|
+
fallback_score += 10.0
|
|
210
|
+
if gate_pass_rate is not None and gate_pass_rate >= 70.0:
|
|
211
|
+
fallback_score += 5.0
|
|
212
|
+
|
|
213
|
+
# Build message
|
|
214
|
+
gate_msg = f"{gate_pass_rate:.0f}% passed gate" if gate_pass_rate is not None else "no gate data"
|
|
215
|
+
message = (
|
|
216
|
+
f"Outcomes derived from execution metrics: {total} review steps, "
|
|
217
|
+
f"{gate_msg}"
|
|
218
|
+
)
|
|
219
|
+
|
|
220
|
+
logging.getLogger(__name__).info(
|
|
221
|
+
"Outcomes fallback activated: %d review executions processed", total
|
|
222
|
+
)
|
|
223
|
+
|
|
224
|
+
return HealthCheckResult(
|
|
225
|
+
name=self.name,
|
|
226
|
+
status="degraded",
|
|
227
|
+
score=fallback_score,
|
|
228
|
+
message=message,
|
|
229
|
+
details={
|
|
230
|
+
"average_score": 0.0,
|
|
231
|
+
"score_trend": "unknown",
|
|
232
|
+
"score_change": 0.0,
|
|
233
|
+
"review_artifacts_count": 0,
|
|
234
|
+
"improvement_cycles": 0,
|
|
235
|
+
"reports_dir": str(self.reports_dir),
|
|
236
|
+
"fallback_used": True,
|
|
237
|
+
"fallback_source": "execution_metrics",
|
|
238
|
+
"review_executions_count": total,
|
|
239
|
+
"success_rate": success_rate,
|
|
240
|
+
"gate_pass_rate": gate_pass_rate,
|
|
241
|
+
"issues": [],
|
|
242
|
+
},
|
|
243
|
+
remediation=[
|
|
244
|
+
"Run reviewer agent or quality workflows to generate review artifacts"
|
|
245
|
+
],
|
|
246
|
+
)
|
|
247
|
+
|
|
248
|
+
score = 50.0
|
|
249
|
+
issues.append("No quality metrics available")
|
|
250
|
+
remediation.append("Run reviewer agent or quality workflows to generate metrics")
|
|
251
|
+
else:
|
|
252
|
+
# Check score trend
|
|
253
|
+
if score_trend == "degrading":
|
|
254
|
+
score -= 20.0
|
|
255
|
+
issues.append(f"Quality scores declining: {score_change:.1f} point change")
|
|
256
|
+
remediation.append("Investigate recent code changes causing quality decline")
|
|
257
|
+
elif score_trend == "improving":
|
|
258
|
+
# Bonus for improvement
|
|
259
|
+
score = min(100.0, score + 5.0)
|
|
260
|
+
|
|
261
|
+
# Check average score
|
|
262
|
+
if avg_score > 0:
|
|
263
|
+
if avg_score < 60.0:
|
|
264
|
+
score -= 30.0
|
|
265
|
+
issues.append(f"Low average quality score: {avg_score:.1f}/100")
|
|
266
|
+
remediation.append("Run quality improvement workflows")
|
|
267
|
+
elif avg_score < 75.0:
|
|
268
|
+
score -= 15.0
|
|
269
|
+
issues.append(f"Moderate quality score: {avg_score:.1f}/100")
|
|
270
|
+
|
|
271
|
+
# Check improvement activity
|
|
272
|
+
if improvement_cycles == 0:
|
|
273
|
+
score -= 10.0
|
|
274
|
+
issues.append("No quality improvement workflows run")
|
|
275
|
+
remediation.append("Run quality workflows to improve code quality")
|
|
276
|
+
|
|
277
|
+
# Determine status
|
|
278
|
+
if score >= 85.0:
|
|
279
|
+
status = "healthy"
|
|
280
|
+
elif score >= 70.0:
|
|
281
|
+
status = "degraded"
|
|
282
|
+
else:
|
|
283
|
+
status = "unhealthy"
|
|
284
|
+
|
|
285
|
+
# Build message
|
|
286
|
+
message_parts = []
|
|
287
|
+
if avg_score > 0:
|
|
288
|
+
message_parts.append(f"Avg score: {avg_score:.1f}")
|
|
289
|
+
if score_trend != "unknown":
|
|
290
|
+
message_parts.append(f"Trend: {score_trend}")
|
|
291
|
+
if improvement_cycles > 0:
|
|
292
|
+
message_parts.append(f"Improvements: {improvement_cycles}")
|
|
293
|
+
if not message_parts:
|
|
294
|
+
message = "No outcome data available"
|
|
295
|
+
else:
|
|
296
|
+
message = " | ".join(message_parts)
|
|
297
|
+
|
|
298
|
+
return HealthCheckResult(
|
|
299
|
+
name=self.name,
|
|
300
|
+
status=status,
|
|
301
|
+
score=max(0.0, score),
|
|
302
|
+
message=message,
|
|
303
|
+
details={
|
|
304
|
+
"average_score": avg_score,
|
|
305
|
+
"score_trend": score_trend,
|
|
306
|
+
"score_change": score_change,
|
|
307
|
+
"review_artifacts_count": len(review_artifacts),
|
|
308
|
+
"improvement_cycles": improvement_cycles,
|
|
309
|
+
"reports_dir": str(self.reports_dir),
|
|
310
|
+
"issues": issues,
|
|
311
|
+
},
|
|
312
|
+
remediation=remediation if remediation else None,
|
|
313
|
+
)
|
|
314
|
+
|
|
315
|
+
except Exception as e:
|
|
316
|
+
return HealthCheckResult(
|
|
317
|
+
name=self.name,
|
|
318
|
+
status="unhealthy",
|
|
319
|
+
score=0.0,
|
|
320
|
+
message=f"Outcome check failed: {e}",
|
|
321
|
+
details={"error": str(e), "reports_dir": str(self.reports_dir)},
|
|
322
|
+
remediation=["Check reports directory and analytics access"],
|
|
323
|
+
)
|
|
324
|
+
|