up-cli 0.1.1__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. up/__init__.py +1 -1
  2. up/ai_cli.py +229 -0
  3. up/cli.py +75 -4
  4. up/commands/agent.py +521 -0
  5. up/commands/bisect.py +343 -0
  6. up/commands/branch.py +350 -0
  7. up/commands/dashboard.py +248 -0
  8. up/commands/init.py +195 -6
  9. up/commands/learn.py +1741 -0
  10. up/commands/memory.py +545 -0
  11. up/commands/new.py +108 -10
  12. up/commands/provenance.py +267 -0
  13. up/commands/review.py +239 -0
  14. up/commands/start.py +1124 -0
  15. up/commands/status.py +360 -0
  16. up/commands/summarize.py +122 -0
  17. up/commands/sync.py +317 -0
  18. up/commands/vibe.py +304 -0
  19. up/context.py +421 -0
  20. up/core/__init__.py +69 -0
  21. up/core/checkpoint.py +479 -0
  22. up/core/provenance.py +364 -0
  23. up/core/state.py +678 -0
  24. up/events.py +512 -0
  25. up/git/__init__.py +37 -0
  26. up/git/utils.py +270 -0
  27. up/git/worktree.py +331 -0
  28. up/learn/__init__.py +155 -0
  29. up/learn/analyzer.py +227 -0
  30. up/learn/plan.py +374 -0
  31. up/learn/research.py +511 -0
  32. up/learn/utils.py +117 -0
  33. up/memory.py +1096 -0
  34. up/parallel.py +551 -0
  35. up/summarizer.py +407 -0
  36. up/templates/__init__.py +70 -2
  37. up/templates/config/__init__.py +502 -20
  38. up/templates/docs/SKILL.md +28 -0
  39. up/templates/docs/__init__.py +341 -0
  40. up/templates/docs/standards/HEADERS.md +24 -0
  41. up/templates/docs/standards/STRUCTURE.md +18 -0
  42. up/templates/docs/standards/TEMPLATES.md +19 -0
  43. up/templates/learn/__init__.py +567 -14
  44. up/templates/loop/__init__.py +546 -27
  45. up/templates/mcp/__init__.py +474 -0
  46. up/templates/projects/__init__.py +786 -0
  47. up/ui/__init__.py +14 -0
  48. up/ui/loop_display.py +650 -0
  49. up/ui/theme.py +137 -0
  50. up_cli-0.5.0.dist-info/METADATA +519 -0
  51. up_cli-0.5.0.dist-info/RECORD +55 -0
  52. up_cli-0.1.1.dist-info/METADATA +0 -186
  53. up_cli-0.1.1.dist-info/RECORD +0 -14
  54. {up_cli-0.1.1.dist-info → up_cli-0.5.0.dist-info}/WHEEL +0 -0
  55. {up_cli-0.1.1.dist-info → up_cli-0.5.0.dist-info}/entry_points.txt +0 -0
up/core/state.py ADDED
@@ -0,0 +1,678 @@
1
+ """Unified state management for up-cli.
2
+
3
+ This module provides a single source of truth for all up-cli state,
4
+ consolidating previously fragmented state files:
5
+ - .loop_state.json → state.loop
6
+ - .claude/context_budget.json → state.context
7
+ - .parallel_state.json → state.parallel
8
+ - .worktrees/*/.agent_state.json → state.agents
9
+
10
+ All state is now stored in .up/state.json
11
+ Configuration is stored in .up/config.json
12
+ """
13
+
14
+ import json
15
+ from dataclasses import dataclass, field, asdict
16
+ from datetime import datetime
17
+ from pathlib import Path
18
+ from typing import Optional, Dict, List, Any
19
+
20
+
21
+ # =============================================================================
22
+ # Configuration Data Class
23
+ # =============================================================================
24
+
25
+ @dataclass
26
+ class UpConfig:
27
+ """Configuration for up-cli (stored in .up/config.json).
28
+
29
+ All hardcoded values are now configurable here.
30
+ """
31
+ # Doom loop detection
32
+ doom_loop_threshold: int = 3 # Consecutive failures before doom loop
33
+
34
+ # Circuit breaker
35
+ circuit_breaker_cooldown_minutes: int = 5 # Minutes before HALF_OPEN
36
+ circuit_breaker_failure_threshold: int = 3 # Failures before OPEN
37
+
38
+ # Checkpoints
39
+ checkpoint_retention_count: int = 50 # Max checkpoints to keep
40
+
41
+ # Context budget
42
+ context_budget_tokens: int = 100_000
43
+ context_warning_threshold: float = 0.8 # 80%
44
+ context_critical_threshold: float = 0.9 # 90%
45
+
46
+ # AI execution
47
+ default_ai_timeout_seconds: int = 600 # 10 minutes
48
+
49
+ # Parallel execution
50
+ default_parallel_workers: int = 3
51
+
52
+ def to_dict(self) -> dict:
53
+ return asdict(self)
54
+
55
+ @classmethod
56
+ def from_dict(cls, data: dict) -> "UpConfig":
57
+ return cls(**{
58
+ k: v for k, v in data.items()
59
+ if k in cls.__dataclass_fields__
60
+ })
61
+
62
+
63
+ # =============================================================================
64
+ # State Data Classes
65
+ # =============================================================================
66
+
67
+ @dataclass
68
+ class CircuitBreakerState:
69
+ """Circuit breaker state for a specific operation."""
70
+ failures: int = 0
71
+ state: str = "CLOSED" # CLOSED, OPEN, HALF_OPEN
72
+ last_failure: Optional[str] = None
73
+ opened_at: Optional[str] = None
74
+ # Configurable thresholds (set from UpConfig)
75
+ failure_threshold: int = 3
76
+ cooldown_minutes: int = 5
77
+
78
+ def record_failure(self):
79
+ """Record a failure."""
80
+ self.failures += 1
81
+ self.last_failure = datetime.now().isoformat()
82
+ if self.failures >= self.failure_threshold:
83
+ self.state = "OPEN"
84
+ self.opened_at = datetime.now().isoformat()
85
+
86
+ def record_success(self):
87
+ """Record a success."""
88
+ if self.state == "HALF_OPEN":
89
+ self.state = "CLOSED"
90
+ self.failures = 0
91
+ elif self.state == "CLOSED":
92
+ self.failures = max(0, self.failures - 1)
93
+
94
+ def is_open(self) -> bool:
95
+ """Check if circuit is open."""
96
+ return self.state == "OPEN"
97
+
98
+ def try_reset(self) -> bool:
99
+ """Try to reset from OPEN to HALF_OPEN after cooldown.
100
+
101
+ Should be called before attempting an operation to check
102
+ if we can try again after the cooldown period.
103
+
104
+ Returns:
105
+ True if transitioned to HALF_OPEN, False otherwise
106
+ """
107
+ if self.state != "OPEN" or not self.opened_at:
108
+ return False
109
+
110
+ opened = datetime.fromisoformat(self.opened_at)
111
+ if (datetime.now() - opened).total_seconds() > self.cooldown_minutes * 60:
112
+ self.state = "HALF_OPEN"
113
+ return True
114
+ return False
115
+
116
+ def can_execute(self) -> bool:
117
+ """Check if we can execute an operation.
118
+
119
+ Returns True if circuit is CLOSED or HALF_OPEN (allowing a test).
120
+ Automatically tries to reset if currently OPEN and cooldown expired.
121
+ """
122
+ if self.state == "CLOSED":
123
+ return True
124
+ if self.state == "HALF_OPEN":
125
+ return True
126
+ # Try to reset from OPEN
127
+ self.try_reset()
128
+ return self.state != "OPEN"
129
+
130
+
131
+ @dataclass
132
+ class LoopState:
133
+ """Product loop execution state."""
134
+ iteration: int = 0
135
+ phase: str = "IDLE" # IDLE, OBSERVE, CHECKPOINT, EXECUTE, VERIFY, COMMIT
136
+ current_task: Optional[str] = None
137
+ tasks_completed: List[str] = field(default_factory=list)
138
+ tasks_failed: List[str] = field(default_factory=list)
139
+ last_checkpoint: Optional[str] = None
140
+ started_at: Optional[str] = None
141
+ interrupted_at: Optional[str] = None
142
+
143
+ # Doom loop detection (threshold set from UpConfig)
144
+ consecutive_failures: int = 0
145
+ doom_loop_threshold: int = 3 # Default, overridden by config
146
+
147
+
148
+ @dataclass
149
+ class ContextState:
150
+ """Context window budget tracking."""
151
+ budget: int = 100_000
152
+ total_tokens: int = 0
153
+ warning_threshold: float = 0.8
154
+ critical_threshold: float = 0.9
155
+ session_start: str = field(default_factory=lambda: datetime.now().isoformat())
156
+ entries: List[Dict[str, Any]] = field(default_factory=list)
157
+
158
+ @property
159
+ def usage_percent(self) -> float:
160
+ """Get usage as percentage."""
161
+ return (self.total_tokens / self.budget) * 100 if self.budget > 0 else 0
162
+
163
+ @property
164
+ def remaining_tokens(self) -> int:
165
+ """Get remaining token budget."""
166
+ return max(0, self.budget - self.total_tokens)
167
+
168
+ @property
169
+ def status(self) -> str:
170
+ """Get status: OK, WARNING, or CRITICAL."""
171
+ ratio = self.total_tokens / self.budget if self.budget > 0 else 0
172
+ if ratio >= self.critical_threshold:
173
+ return "CRITICAL"
174
+ elif ratio >= self.warning_threshold:
175
+ return "WARNING"
176
+ return "OK"
177
+
178
+
179
+ @dataclass
180
+ class AgentState:
181
+ """State of a single agent worktree."""
182
+ task_id: str
183
+ task_title: str = ""
184
+ branch: str = ""
185
+ worktree_path: str = ""
186
+ status: str = "created" # created, executing, verifying, passed, failed, merged
187
+ phase: str = "INIT"
188
+ started_at: str = field(default_factory=lambda: datetime.now().isoformat())
189
+ completed_at: Optional[str] = None
190
+ commits: int = 0
191
+ error: Optional[str] = None
192
+
193
+
194
+ @dataclass
195
+ class ParallelState:
196
+ """Parallel execution state."""
197
+ active: bool = False
198
+ max_workers: int = 3
199
+ current_batch: int = 0
200
+ agents: List[str] = field(default_factory=list) # List of agent task_ids
201
+
202
+
203
+ @dataclass
204
+ class MetricsState:
205
+ """Performance metrics."""
206
+ total_tasks: int = 0
207
+ completed_tasks: int = 0
208
+ failed_tasks: int = 0
209
+ total_time_seconds: float = 0
210
+ total_rollbacks: int = 0
211
+ total_checkpoints: int = 0
212
+
213
+ @property
214
+ def success_rate(self) -> float:
215
+ """Calculate success rate."""
216
+ total = self.completed_tasks + self.failed_tasks
217
+ return self.completed_tasks / total if total > 0 else 0
218
+
219
+
220
+ @dataclass
221
+ class UnifiedState:
222
+ """Unified state for all up-cli operations.
223
+
224
+ This is the single source of truth, stored in .up/state.json
225
+ """
226
+ version: str = "2.0"
227
+
228
+ # Core states
229
+ loop: LoopState = field(default_factory=LoopState)
230
+ context: ContextState = field(default_factory=ContextState)
231
+ parallel: ParallelState = field(default_factory=ParallelState)
232
+ metrics: MetricsState = field(default_factory=MetricsState)
233
+
234
+ # Circuit breakers (keyed by operation name)
235
+ circuit_breakers: Dict[str, CircuitBreakerState] = field(default_factory=dict)
236
+
237
+ # Agent states (keyed by task_id)
238
+ agents: Dict[str, AgentState] = field(default_factory=dict)
239
+
240
+ # Checkpoints (list of checkpoint IDs)
241
+ checkpoints: List[str] = field(default_factory=list)
242
+
243
+ # Timestamps
244
+ created_at: str = field(default_factory=lambda: datetime.now().isoformat())
245
+ updated_at: str = field(default_factory=lambda: datetime.now().isoformat())
246
+
247
+ def get_circuit_breaker(self, name: str, config: Optional["UpConfig"] = None) -> CircuitBreakerState:
248
+ """Get or create a circuit breaker by name.
249
+
250
+ Args:
251
+ name: Circuit breaker identifier
252
+ config: Optional config to apply thresholds (used by StateManager)
253
+ """
254
+ if name not in self.circuit_breakers:
255
+ cb = CircuitBreakerState()
256
+ if config:
257
+ cb.failure_threshold = config.circuit_breaker_failure_threshold
258
+ cb.cooldown_minutes = config.circuit_breaker_cooldown_minutes
259
+ self.circuit_breakers[name] = cb
260
+ return self.circuit_breakers[name]
261
+
262
+ def to_dict(self) -> dict:
263
+ """Convert to dictionary for JSON serialization."""
264
+ return {
265
+ "version": self.version,
266
+ "loop": asdict(self.loop),
267
+ "context": asdict(self.context),
268
+ "parallel": asdict(self.parallel),
269
+ "metrics": asdict(self.metrics),
270
+ "circuit_breakers": {
271
+ k: asdict(v) for k, v in self.circuit_breakers.items()
272
+ },
273
+ "agents": {
274
+ k: asdict(v) for k, v in self.agents.items()
275
+ },
276
+ "checkpoints": self.checkpoints,
277
+ "created_at": self.created_at,
278
+ "updated_at": self.updated_at,
279
+ }
280
+
281
+ @classmethod
282
+ def from_dict(cls, data: dict) -> "UnifiedState":
283
+ """Create from dictionary."""
284
+ state = cls()
285
+ state.version = data.get("version", "2.0")
286
+
287
+ # Load loop state
288
+ if "loop" in data:
289
+ loop_data = data["loop"]
290
+ state.loop = LoopState(**{
291
+ k: v for k, v in loop_data.items()
292
+ if k in LoopState.__dataclass_fields__
293
+ })
294
+
295
+ # Load context state
296
+ if "context" in data:
297
+ ctx_data = data["context"]
298
+ state.context = ContextState(**{
299
+ k: v for k, v in ctx_data.items()
300
+ if k in ContextState.__dataclass_fields__
301
+ })
302
+
303
+ # Load parallel state
304
+ if "parallel" in data:
305
+ par_data = data["parallel"]
306
+ state.parallel = ParallelState(**{
307
+ k: v for k, v in par_data.items()
308
+ if k in ParallelState.__dataclass_fields__
309
+ })
310
+
311
+ # Load metrics state
312
+ if "metrics" in data:
313
+ met_data = data["metrics"]
314
+ state.metrics = MetricsState(**{
315
+ k: v for k, v in met_data.items()
316
+ if k in MetricsState.__dataclass_fields__
317
+ })
318
+
319
+ # Load circuit breakers
320
+ if "circuit_breakers" in data:
321
+ for name, cb_data in data["circuit_breakers"].items():
322
+ state.circuit_breakers[name] = CircuitBreakerState(**cb_data)
323
+
324
+ # Load agents
325
+ if "agents" in data:
326
+ for task_id, agent_data in data["agents"].items():
327
+ state.agents[task_id] = AgentState(**{
328
+ k: v for k, v in agent_data.items()
329
+ if k in AgentState.__dataclass_fields__
330
+ })
331
+
332
+ state.checkpoints = data.get("checkpoints", [])
333
+ state.created_at = data.get("created_at", datetime.now().isoformat())
334
+ state.updated_at = data.get("updated_at", datetime.now().isoformat())
335
+
336
+ return state
337
+
338
+
339
+ # =============================================================================
340
+ # State Manager
341
+ # =============================================================================
342
+
343
+ class StateManager:
344
+ """Manages unified state for up-cli.
345
+
346
+ Provides:
347
+ - Load/save to .up/state.json
348
+ - Configuration via .up/config.json
349
+ - Migration from old state files
350
+ - Atomic updates with timestamps
351
+ """
352
+
353
+ STATE_DIR = ".up"
354
+ STATE_FILE = "state.json"
355
+ CONFIG_FILE = "config.json"
356
+
357
+ # Old state file locations for migration
358
+ OLD_LOOP_STATE = ".loop_state.json"
359
+ OLD_CONTEXT_STATE = ".claude/context_budget.json"
360
+ OLD_PARALLEL_STATE = ".parallel_state.json"
361
+
362
+ def __init__(self, workspace: Optional[Path] = None):
363
+ """Initialize state manager.
364
+
365
+ Args:
366
+ workspace: Project root directory (defaults to cwd)
367
+ """
368
+ self.workspace = workspace or Path.cwd()
369
+ self.state_dir = self.workspace / self.STATE_DIR
370
+ self.state_file = self.state_dir / self.STATE_FILE
371
+ self.config_file = self.state_dir / self.CONFIG_FILE
372
+ self._state: Optional[UnifiedState] = None
373
+ self._config: Optional[UpConfig] = None
374
+
375
+ @property
376
+ def config(self) -> UpConfig:
377
+ """Get configuration, loading if necessary."""
378
+ if self._config is None:
379
+ self._config = self._load_config()
380
+ return self._config
381
+
382
+ def _load_config(self) -> UpConfig:
383
+ """Load configuration from file."""
384
+ if self.config_file.exists():
385
+ try:
386
+ data = json.loads(self.config_file.read_text())
387
+ return UpConfig.from_dict(data)
388
+ except (json.JSONDecodeError, TypeError):
389
+ pass
390
+ return UpConfig()
391
+
392
+ def save_config(self) -> None:
393
+ """Save configuration to file."""
394
+ if self._config is None:
395
+ return
396
+ self.state_dir.mkdir(parents=True, exist_ok=True)
397
+ self.config_file.write_text(json.dumps(self._config.to_dict(), indent=2))
398
+
399
+ def update_config(self, **kwargs) -> None:
400
+ """Update configuration values."""
401
+ for key, value in kwargs.items():
402
+ if hasattr(self.config, key):
403
+ setattr(self.config, key)
404
+ self.save_config()
405
+ # Apply new config to state
406
+ self._apply_config_to_state()
407
+
408
+ @property
409
+ def state(self) -> UnifiedState:
410
+ """Get current state, loading if necessary."""
411
+ if self._state is None:
412
+ self._state = self.load()
413
+ return self._state
414
+
415
+ def _apply_config_to_state(self) -> None:
416
+ """Apply configuration values to state objects."""
417
+ if self._state is None:
418
+ return
419
+
420
+ cfg = self.config
421
+
422
+ # Apply doom loop threshold
423
+ self._state.loop.doom_loop_threshold = cfg.doom_loop_threshold
424
+
425
+ # Apply context budget config
426
+ self._state.context.budget = cfg.context_budget_tokens
427
+ self._state.context.warning_threshold = cfg.context_warning_threshold
428
+ self._state.context.critical_threshold = cfg.context_critical_threshold
429
+
430
+ # Apply parallel config
431
+ self._state.parallel.max_workers = cfg.default_parallel_workers
432
+
433
+ # Apply circuit breaker config to all breakers
434
+ for cb in self._state.circuit_breakers.values():
435
+ cb.failure_threshold = cfg.circuit_breaker_failure_threshold
436
+ cb.cooldown_minutes = cfg.circuit_breaker_cooldown_minutes
437
+
438
+ def load(self) -> UnifiedState:
439
+ """Load state from file, migrating old files if needed."""
440
+ # Try loading new unified state
441
+ if self.state_file.exists():
442
+ try:
443
+ data = json.loads(self.state_file.read_text())
444
+ self._state = UnifiedState.from_dict(data)
445
+ # Apply configuration
446
+ self._apply_config_to_state()
447
+ return self._state
448
+ except (json.JSONDecodeError, TypeError, KeyError) as e:
449
+ # Corrupted state, start fresh but try to migrate
450
+ pass
451
+
452
+ # No unified state, try migration
453
+ self._state = self._migrate_old_states()
454
+ # Apply configuration
455
+ self._apply_config_to_state()
456
+ return self._state
457
+
458
+ def save(self) -> None:
459
+ """Save current state to file."""
460
+ if self._state is None:
461
+ return
462
+
463
+ # Update timestamp
464
+ self._state.updated_at = datetime.now().isoformat()
465
+
466
+ # Ensure directory exists
467
+ self.state_dir.mkdir(parents=True, exist_ok=True)
468
+
469
+ # Write atomically (write to temp, then rename)
470
+ temp_file = self.state_file.with_suffix(".tmp")
471
+ temp_file.write_text(json.dumps(self._state.to_dict(), indent=2))
472
+ temp_file.rename(self.state_file)
473
+
474
+ def reset(self) -> UnifiedState:
475
+ """Reset to fresh state."""
476
+ self._state = UnifiedState()
477
+ self.save()
478
+ return self._state
479
+
480
+ def _migrate_old_states(self) -> UnifiedState:
481
+ """Migrate from old fragmented state files."""
482
+ state = UnifiedState()
483
+ migrated = []
484
+
485
+ # Migrate old loop state
486
+ old_loop = self.workspace / self.OLD_LOOP_STATE
487
+ if old_loop.exists():
488
+ try:
489
+ data = json.loads(old_loop.read_text())
490
+ state.loop = LoopState(
491
+ iteration=data.get("iteration", 0),
492
+ phase=data.get("phase", "IDLE"),
493
+ current_task=data.get("current_task"),
494
+ tasks_completed=data.get("tasks_completed", []),
495
+ last_checkpoint=data.get("last_checkpoint"),
496
+ started_at=data.get("started_at"),
497
+ interrupted_at=data.get("interrupted_at"),
498
+ )
499
+ # Migrate circuit breakers
500
+ if "circuit_breaker" in data:
501
+ for name, cb_data in data["circuit_breaker"].items():
502
+ if isinstance(cb_data, dict):
503
+ state.circuit_breakers[name] = CircuitBreakerState(
504
+ failures=cb_data.get("failures", 0),
505
+ state=cb_data.get("state", "CLOSED"),
506
+ )
507
+ migrated.append(str(old_loop))
508
+ except (json.JSONDecodeError, KeyError):
509
+ pass
510
+
511
+ # Migrate old context budget
512
+ old_context = self.workspace / self.OLD_CONTEXT_STATE
513
+ if old_context.exists():
514
+ try:
515
+ data = json.loads(old_context.read_text())
516
+ state.context = ContextState(
517
+ budget=data.get("budget", 100_000),
518
+ total_tokens=data.get("total_tokens", 0),
519
+ warning_threshold=data.get("warning_threshold", 0.8),
520
+ critical_threshold=data.get("critical_threshold", 0.9),
521
+ session_start=data.get("session_start", datetime.now().isoformat()),
522
+ entries=data.get("entries", []),
523
+ )
524
+ migrated.append(str(old_context))
525
+ except (json.JSONDecodeError, KeyError):
526
+ pass
527
+
528
+ # Migrate old parallel state
529
+ old_parallel = self.workspace / self.OLD_PARALLEL_STATE
530
+ if old_parallel.exists():
531
+ try:
532
+ data = json.loads(old_parallel.read_text())
533
+ state.parallel = ParallelState(
534
+ active=data.get("mode") == "parallel",
535
+ max_workers=data.get("parallel_limit", 3),
536
+ current_batch=data.get("iteration", 0),
537
+ agents=data.get("active_worktrees", []),
538
+ )
539
+ migrated.append(str(old_parallel))
540
+ except (json.JSONDecodeError, KeyError):
541
+ pass
542
+
543
+ # Log migration
544
+ if migrated:
545
+ state.version = "2.0"
546
+ # Save migrated state
547
+ self._state = state
548
+ self.save()
549
+
550
+ # Optionally rename old files (don't delete yet for safety)
551
+ for old_file in migrated:
552
+ old_path = Path(old_file)
553
+ if old_path.exists():
554
+ backup = old_path.with_suffix(old_path.suffix + ".migrated")
555
+ try:
556
+ old_path.rename(backup)
557
+ except OSError:
558
+ pass # Can't rename, leave it
559
+
560
+ self._state = state
561
+ return state
562
+
563
+ # =========================================================================
564
+ # Convenience Methods
565
+ # =========================================================================
566
+
567
+ def update_loop(self, **kwargs) -> None:
568
+ """Update loop state fields."""
569
+ for key, value in kwargs.items():
570
+ if hasattr(self.state.loop, key):
571
+ setattr(self.state.loop, key, value)
572
+ self.save()
573
+
574
+ def update_context(self, **kwargs) -> None:
575
+ """Update context state fields."""
576
+ for key, value in kwargs.items():
577
+ if hasattr(self.state.context, key):
578
+ setattr(self.state.context, key, value)
579
+ self.save()
580
+
581
+ def record_task_complete(self, task_id: str) -> None:
582
+ """Record a task completion."""
583
+ if task_id not in self.state.loop.tasks_completed:
584
+ self.state.loop.tasks_completed.append(task_id)
585
+ self.state.metrics.completed_tasks += 1
586
+ self.state.loop.consecutive_failures = 0 # Reset doom loop counter
587
+ self.save()
588
+
589
+ def record_task_failed(self, task_id: str) -> None:
590
+ """Record a task failure."""
591
+ if task_id not in self.state.loop.tasks_failed:
592
+ self.state.loop.tasks_failed.append(task_id)
593
+ self.state.metrics.failed_tasks += 1
594
+ self.state.loop.consecutive_failures += 1
595
+ self.save()
596
+
597
+ def check_doom_loop(self) -> tuple[bool, str]:
598
+ """Check if we're in a doom loop.
599
+
600
+ Uses configurable threshold from .up/config.json
601
+
602
+ Returns:
603
+ Tuple of (is_doom_loop, message)
604
+ """
605
+ failures = self.state.loop.consecutive_failures
606
+ threshold = self.config.doom_loop_threshold
607
+
608
+ if failures >= threshold:
609
+ return True, (
610
+ f"⚠️ DOOM LOOP DETECTED: {failures} consecutive failures. "
611
+ f"Consider running 'up reset' instead of continuing."
612
+ )
613
+ elif failures >= threshold - 1:
614
+ return False, (
615
+ f"⚡ Warning: {failures} consecutive failures. "
616
+ f"One more failure will trigger doom loop detection."
617
+ )
618
+ return False, ""
619
+
620
+ def get_circuit_breaker(self, name: str) -> CircuitBreakerState:
621
+ """Get or create a circuit breaker with config applied."""
622
+ return self.state.get_circuit_breaker(name, self.config)
623
+
624
+ def add_agent(self, agent: AgentState) -> None:
625
+ """Add an agent to state."""
626
+ self.state.agents[agent.task_id] = agent
627
+ if agent.task_id not in self.state.parallel.agents:
628
+ self.state.parallel.agents.append(agent.task_id)
629
+ self.save()
630
+
631
+ def remove_agent(self, task_id: str) -> None:
632
+ """Remove an agent from state."""
633
+ if task_id in self.state.agents:
634
+ del self.state.agents[task_id]
635
+ if task_id in self.state.parallel.agents:
636
+ self.state.parallel.agents.remove(task_id)
637
+ self.save()
638
+
639
+ def add_checkpoint(self, checkpoint_id: str) -> None:
640
+ """Record a checkpoint."""
641
+ self.state.checkpoints.append(checkpoint_id)
642
+ self.state.loop.last_checkpoint = checkpoint_id
643
+ self.state.metrics.total_checkpoints += 1
644
+ # Keep only configured number of checkpoints
645
+ retention = self.config.checkpoint_retention_count
646
+ if len(self.state.checkpoints) > retention:
647
+ self.state.checkpoints = self.state.checkpoints[-retention:]
648
+ self.save()
649
+
650
+ def record_rollback(self) -> None:
651
+ """Record a rollback."""
652
+ self.state.metrics.total_rollbacks += 1
653
+ self.save()
654
+
655
+
656
+ # =============================================================================
657
+ # Module-level convenience functions
658
+ # =============================================================================
659
+
660
+ _default_manager: Optional[StateManager] = None
661
+
662
+
663
+ def get_state_manager(workspace: Optional[Path] = None) -> StateManager:
664
+ """Get or create the default state manager."""
665
+ global _default_manager
666
+ if _default_manager is None or (workspace and _default_manager.workspace != workspace):
667
+ _default_manager = StateManager(workspace)
668
+ return _default_manager
669
+
670
+
671
+ def get_state(workspace: Optional[Path] = None) -> UnifiedState:
672
+ """Get current unified state."""
673
+ return get_state_manager(workspace).state
674
+
675
+
676
+ def save_state(workspace: Optional[Path] = None) -> None:
677
+ """Save current state."""
678
+ get_state_manager(workspace).save()