specsmith 0.2.3.dev51__py3-none-any.whl → 0.2.3.dev55__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
specsmith/__init__.py CHANGED
@@ -8,4 +8,4 @@ from importlib.metadata import version as _pkg_version
8
8
  try:
9
9
  __version__: str = _pkg_version("specsmith")
10
10
  except PackageNotFoundError: # running from source without install
11
- __version__ = "0.2.2"
11
+ __version__ = "0.2.3"
specsmith/cli.py CHANGED
@@ -1501,6 +1501,168 @@ def credits_budget(
1501
1501
  console.print(f" Enabled: {budget.enabled}")
1502
1502
 
1503
1503
 
1504
+ @credits.group(name="limits")
1505
+ def credits_limits() -> None:
1506
+ """Manage persisted per-model RPM/TPM profiles."""
1507
+
1508
+
1509
+ @credits_limits.command(name="list")
1510
+ @click.option("--project-dir", type=click.Path(exists=True), default=".")
1511
+ def credits_limits_list(project_dir: str) -> None:
1512
+ """List configured local model rate-limit profiles."""
1513
+ from specsmith.rate_limits import load_rate_limit_profiles
1514
+
1515
+ root = Path(project_dir).resolve()
1516
+ profiles = sorted(
1517
+ load_rate_limit_profiles(root),
1518
+ key=lambda profile: (profile.provider, profile.model),
1519
+ )
1520
+ if not profiles:
1521
+ console.print("[yellow]No model rate-limit profiles configured.[/yellow]")
1522
+ return
1523
+
1524
+ for profile in profiles:
1525
+ console.print(
1526
+ " "
1527
+ f"{profile.provider}/{profile.model} "
1528
+ f"RPM={profile.rpm_limit} TPM={profile.tpm_limit} "
1529
+ f"target={profile.utilization_target:.2f} "
1530
+ f"concurrency={profile.concurrency_cap}"
1531
+ )
1532
+
1533
+
1534
+ @credits_limits.command(name="set")
1535
+ @click.option("--project-dir", type=click.Path(exists=True), default=".")
1536
+ @click.option("--provider", required=True, help="Provider key, such as openai or anthropic.")
1537
+ @click.option("--model", required=True, help="Model key, such as gpt-5.4.")
1538
+ @click.option("--rpm", "rpm_limit", type=int, required=True, help="Requests per minute limit.")
1539
+ @click.option("--tpm", "tpm_limit", type=int, required=True, help="Tokens per minute limit.")
1540
+ @click.option("--target", "utilization_target", type=float, default=0.7, show_default=True)
1541
+ @click.option("--concurrency", "concurrency_cap", type=int, default=1, show_default=True)
1542
+ def credits_limits_set(
1543
+ project_dir: str,
1544
+ provider: str,
1545
+ model: str,
1546
+ rpm_limit: int,
1547
+ tpm_limit: int,
1548
+ utilization_target: float,
1549
+ concurrency_cap: int,
1550
+ ) -> None:
1551
+ """Create or replace a local model rate-limit profile."""
1552
+ from specsmith.rate_limits import (
1553
+ ModelRateLimitProfile,
1554
+ load_rate_limit_profiles,
1555
+ save_rate_limit_profiles,
1556
+ )
1557
+
1558
+ root = Path(project_dir).resolve()
1559
+ updated_profile = ModelRateLimitProfile(
1560
+ provider=provider,
1561
+ model=model,
1562
+ rpm_limit=rpm_limit,
1563
+ tpm_limit=tpm_limit,
1564
+ utilization_target=utilization_target,
1565
+ concurrency_cap=concurrency_cap,
1566
+ source="override",
1567
+ )
1568
+ profiles = {profile.key: profile for profile in load_rate_limit_profiles(root)}
1569
+ profiles[updated_profile.key] = updated_profile
1570
+ save_rate_limit_profiles(root, list(profiles.values()))
1571
+ console.print(
1572
+ "[green]✓[/green] "
1573
+ f"Saved {updated_profile.provider}/{updated_profile.model} "
1574
+ f"(RPM={updated_profile.rpm_limit}, TPM={updated_profile.tpm_limit}, "
1575
+ f"target={updated_profile.utilization_target:.2f}, "
1576
+ f"concurrency={updated_profile.concurrency_cap})"
1577
+ )
1578
+
1579
+
1580
+ @credits_limits.command(name="status")
1581
+ @click.option("--project-dir", type=click.Path(exists=True), default=".")
1582
+ @click.option("--provider", required=True, help="Provider key, such as openai or anthropic.")
1583
+ @click.option("--model", required=True, help="Model key, such as gpt-5.4.")
1584
+ def credits_limits_status(project_dir: str, provider: str, model: str) -> None:
1585
+ """Show rolling-window snapshot for a model (RPM, TPM, concurrency, moving averages)."""
1586
+ from specsmith.rate_limits import (
1587
+ BUILTIN_PROFILES,
1588
+ load_rate_limit_profiles,
1589
+ load_rate_limit_scheduler,
1590
+ )
1591
+
1592
+ root = Path(project_dir).resolve()
1593
+ profiles = load_rate_limit_profiles(root, defaults=BUILTIN_PROFILES)
1594
+ scheduler = load_rate_limit_scheduler(root, profiles)
1595
+
1596
+ try:
1597
+ snap = scheduler.snapshot(provider, model)
1598
+ except KeyError:
1599
+ console.print(
1600
+ f"[red]No profile found for {provider}/{model}.[/red] "
1601
+ "Use 'specsmith credits limits set' to configure one."
1602
+ )
1603
+ raise SystemExit(1) from None
1604
+
1605
+ console.print(f"[bold]{snap.provider}/{snap.model}[/bold]")
1606
+ console.print(
1607
+ f" RPM: {snap.rolling_request_count} / {snap.effective_rpm_limit} "
1608
+ f"(limit {snap.rpm_limit}, target {snap.effective_rpm_limit})"
1609
+ )
1610
+ console.print(
1611
+ f" TPM: {snap.rolling_token_count:,} / {snap.effective_tpm_limit:,} "
1612
+ f"(limit {snap.tpm_limit:,})"
1613
+ )
1614
+ console.print(
1615
+ f" Utilization: RPM {snap.request_utilization:.1%} TPM {snap.token_utilization:.1%}"
1616
+ )
1617
+ console.print(
1618
+ f" Concurrency: {snap.in_flight} in-flight / {snap.current_concurrency_cap} cap "
1619
+ f"(base {snap.base_concurrency_cap})"
1620
+ )
1621
+ console.print(
1622
+ f" Moving avg: {snap.moving_average_requests:.1f} req/window "
1623
+ f"{snap.moving_average_tokens:,.0f} tok/window"
1624
+ )
1625
+
1626
+
1627
+ @credits_limits.command(name="defaults")
1628
+ @click.option("--project-dir", type=click.Path(exists=True), default=".")
1629
+ @click.option(
1630
+ "--install",
1631
+ is_flag=True,
1632
+ default=False,
1633
+ help="Merge built-in defaults into the local project config (existing overrides preserved).",
1634
+ )
1635
+ def credits_limits_defaults(project_dir: str, install: bool) -> None:
1636
+ """List (or install) built-in RPM/TPM profiles for common provider/model paths."""
1637
+ from specsmith.rate_limits import (
1638
+ BUILTIN_PROFILES,
1639
+ load_rate_limit_profiles,
1640
+ save_rate_limit_profiles,
1641
+ )
1642
+
1643
+ console.print("[bold]Built-in model rate-limit profiles[/bold]")
1644
+ console.print("[dim](conservative defaults — local overrides take precedence)[/dim]\n")
1645
+ for profile in BUILTIN_PROFILES:
1646
+ console.print(
1647
+ f" {profile.provider}/{profile.model:25s} "
1648
+ f"RPM={profile.rpm_limit:<6} TPM={profile.tpm_limit:>12,} "
1649
+ f"target={profile.utilization_target:.2f}"
1650
+ )
1651
+
1652
+ if install:
1653
+ root = Path(project_dir).resolve()
1654
+ # Load existing local profiles; they take precedence over built-ins
1655
+ existing = {p.key: p for p in load_rate_limit_profiles(root)}
1656
+ merged = {p.key: p for p in BUILTIN_PROFILES}
1657
+ merged.update(existing) # local overrides win
1658
+ save_rate_limit_profiles(root, list(merged.values()))
1659
+ added = len(merged) - len(existing)
1660
+ console.print(
1661
+ f"\n[green]\u2713[/green] Installed {added} new default(s) to "
1662
+ ".specsmith/model-rate-limits.json (existing profiles preserved)."
1663
+ )
1664
+
1665
+
1504
1666
  main.add_command(credits)
1505
1667
 
1506
1668
 
@@ -0,0 +1,784 @@
1
+ # SPDX-License-Identifier: MIT
2
+ # Copyright (c) 2026 BitConcepts, LLC. All rights reserved.
3
+ """Runtime model rate-limit profiles, pacing, and retry helpers."""
4
+
5
+ from __future__ import annotations
6
+
7
+ import json
8
+ import math
9
+ import random
10
+ import re
11
+ import time
12
+ from collections import deque
13
+ from collections.abc import Callable
14
+ from dataclasses import asdict, dataclass, field
15
+ from pathlib import Path
16
+
17
+ _STATE_DIR = ".specsmith"
18
+ _PROFILE_FILE = "model-rate-limits.json"
19
+ _RUNTIME_STATE_FILE = "model-rate-limit-state.json"
20
+ _ROLLING_WINDOW_SECONDS = 60.0
21
+ _CONCURRENCY_POLL_SECONDS = 0.1
22
+ _MOVING_AVERAGE_ALPHA = 0.25
23
+ _RETRY_JITTER_RATIO = 0.2
24
+
25
+ _WAIT_PATTERNS = (
26
+ re.compile(
27
+ r"please\s+try\s+again\s+in\s+(?P<value>\d+(?:\.\d+)?)\s*(?P<unit>ms|s|sec|secs|seconds)",
28
+ flags=re.IGNORECASE,
29
+ ),
30
+ re.compile(
31
+ r"retry\s+(?:after|in)\s+(?P<value>\d+(?:\.\d+)?)\s*(?P<unit>ms|s|sec|secs|seconds)",
32
+ flags=re.IGNORECASE,
33
+ ),
34
+ )
35
+
36
+
37
+ @dataclass(slots=True)
38
+ class ModelRateLimitProfile:
39
+ """Rate-limit profile for a single provider/model path."""
40
+
41
+ provider: str
42
+ model: str
43
+ rpm_limit: int
44
+ tpm_limit: int
45
+ utilization_target: float = 0.7
46
+ concurrency_cap: int = 1
47
+ source: str = "local"
48
+
49
+ def __post_init__(self) -> None:
50
+ self.provider = _normalize_key_part(self.provider)
51
+ self.model = _normalize_key_part(self.model)
52
+ if self.rpm_limit <= 0:
53
+ raise ValueError("rpm_limit must be greater than zero")
54
+ if self.tpm_limit <= 0:
55
+ raise ValueError("tpm_limit must be greater than zero")
56
+ if not 0 < self.utilization_target <= 1:
57
+ raise ValueError("utilization_target must be greater than 0 and at most 1")
58
+ if self.concurrency_cap <= 0:
59
+ raise ValueError("concurrency_cap must be greater than zero")
60
+
61
+ @property
62
+ def key(self) -> str:
63
+ """Stable provider/model key."""
64
+ return _profile_key(self.provider, self.model)
65
+
66
+ @property
67
+ def effective_rpm_limit(self) -> int:
68
+ """Budgeted request ceiling after utilization target is applied."""
69
+ return max(1, int(math.floor(self.rpm_limit * self.utilization_target)))
70
+
71
+ @property
72
+ def effective_tpm_limit(self) -> int:
73
+ """Budgeted token ceiling after utilization target is applied."""
74
+ return max(1, int(math.floor(self.tpm_limit * self.utilization_target)))
75
+
76
+ def matches(self, provider: str, model: str) -> bool:
77
+ """Check if this profile applies to the given provider/model."""
78
+ provider = _normalize_key_part(provider)
79
+ model = _normalize_key_part(model)
80
+ provider_match = self.provider == "*" or self.provider == provider
81
+ model_match = self.model == "*" or self.model == model
82
+ return provider_match and model_match
83
+
84
+
85
+ @dataclass(slots=True)
86
+ class RateLimitReservation:
87
+ """A pre-dispatch budget reservation for a single request."""
88
+
89
+ reservation_id: str
90
+ provider: str
91
+ model: str
92
+ estimated_input_tokens: int
93
+ max_output_tokens: int
94
+ estimated_total_tokens: int
95
+ acquired_at: float
96
+ waited_seconds: float = 0.0
97
+
98
+
99
+ @dataclass(slots=True)
100
+ class RateLimitSnapshot:
101
+ """Current rolling-window and moving-average state for a model."""
102
+
103
+ provider: str
104
+ model: str
105
+ rpm_limit: int
106
+ tpm_limit: int
107
+ effective_rpm_limit: int
108
+ effective_tpm_limit: int
109
+ rolling_request_count: int
110
+ rolling_token_count: int
111
+ moving_average_requests: float
112
+ moving_average_tokens: float
113
+ request_utilization: float
114
+ token_utilization: float
115
+ base_concurrency_cap: int
116
+ current_concurrency_cap: int
117
+ in_flight: int
118
+
119
+
120
+ @dataclass(slots=True)
121
+ class RateLimitErrorDetails:
122
+ """Normalized provider rate-limit classification."""
123
+
124
+ is_rate_limit: bool
125
+ message: str
126
+ status_code: int | None = None
127
+ retry_after_seconds: float | None = None
128
+
129
+
130
+ @dataclass(slots=True)
131
+ class _TokenEvent:
132
+ timestamp: float
133
+ tokens: int
134
+ reservation_id: str
135
+
136
+
137
+ @dataclass(slots=True)
138
+ class _ModelRuntimeState:
139
+ request_timestamps: deque[float] = field(default_factory=deque)
140
+ token_events: deque[_TokenEvent] = field(default_factory=deque)
141
+ active_reservations: set[str] = field(default_factory=set)
142
+ in_flight: int = 0
143
+ current_concurrency_cap: int = 0
144
+ success_streak: int = 0
145
+ moving_average_requests: float = 0.0
146
+ moving_average_tokens: float = 0.0
147
+
148
+
149
+ def _normalize_key_part(value: str) -> str:
150
+ return value.strip().lower()
151
+
152
+
153
+ def _profile_key(provider: str, model: str) -> str:
154
+ return f"{_normalize_key_part(provider)}::{_normalize_key_part(model)}"
155
+
156
+
157
+ def _get_profile_path(root: Path) -> Path:
158
+ path = root / _STATE_DIR / _PROFILE_FILE
159
+ path.parent.mkdir(parents=True, exist_ok=True)
160
+ return path
161
+
162
+
163
+ def _get_runtime_state_path(root: Path) -> Path:
164
+ path = root / _STATE_DIR / _RUNTIME_STATE_FILE
165
+ path.parent.mkdir(parents=True, exist_ok=True)
166
+ return path
167
+
168
+
169
+ # ---------------------------------------------------------------------------
170
+ # Built-in provider/model defaults
171
+ # ---------------------------------------------------------------------------
172
+ # These are conservative starting points. Account and tier limits vary.
173
+ # Local overrides (saved in .specsmith/model-rate-limits.json) always take
174
+ # precedence over these defaults — see load_rate_limit_profiles().
175
+
176
+
177
+ def _default( # noqa: PLR0913
178
+ provider: str,
179
+ model: str,
180
+ rpm: int,
181
+ tpm: int,
182
+ utilization_target: float = 0.7,
183
+ concurrency_cap: int = 1,
184
+ ) -> ModelRateLimitProfile:
185
+ return ModelRateLimitProfile(
186
+ provider=provider,
187
+ model=model,
188
+ rpm_limit=rpm,
189
+ tpm_limit=tpm,
190
+ utilization_target=utilization_target,
191
+ concurrency_cap=concurrency_cap,
192
+ source="default",
193
+ )
194
+
195
+
196
+ BUILTIN_PROFILES: list[ModelRateLimitProfile] = [
197
+ # --- OpenAI ---
198
+ _default("openai", "gpt-4o", rpm=500, tpm=30_000_000),
199
+ _default("openai", "gpt-4o-mini", rpm=500, tpm=200_000_000),
200
+ _default("openai", "gpt-4-turbo", rpm=500, tpm=800_000),
201
+ _default("openai", "gpt-3.5-turbo", rpm=3500, tpm=90_000),
202
+ _default("openai", "o1", rpm=500, tpm=30_000_000),
203
+ _default("openai", "o1-mini", rpm=1000, tpm=200_000_000),
204
+ _default("openai", "o3-mini", rpm=1000, tpm=200_000_000),
205
+ # gpt-5.4 — org quota from issue #59 example: 500K TPM
206
+ _default("openai", "gpt-5.4", rpm=60, tpm=500_000),
207
+ # Wildcard fallback for unknown OpenAI models
208
+ _default("openai", "*", rpm=500, tpm=500_000),
209
+ # --- Anthropic ---
210
+ _default("anthropic", "claude-opus-4", rpm=2000, tpm=40_000_000),
211
+ _default("anthropic", "claude-sonnet-4", rpm=2000, tpm=40_000_000),
212
+ _default("anthropic", "claude-haiku-3-5", rpm=2000, tpm=200_000_000),
213
+ _default("anthropic", "claude-3-5-sonnet", rpm=2000, tpm=40_000_000),
214
+ _default("anthropic", "claude-3-5-haiku", rpm=2000, tpm=200_000_000),
215
+ _default("anthropic", "claude-3-opus", rpm=2000, tpm=40_000_000),
216
+ # Wildcard fallback for unknown Anthropic models
217
+ _default("anthropic", "*", rpm=2000, tpm=40_000_000),
218
+ # --- Google ---
219
+ _default("google", "gemini-1.5-pro", rpm=360, tpm=4_000_000),
220
+ _default("google", "gemini-1.5-flash", rpm=1000, tpm=4_000_000),
221
+ _default("google", "gemini-2.0-flash", rpm=1000, tpm=4_000_000),
222
+ _default("google", "gemini-2.5-pro", rpm=360, tpm=4_000_000),
223
+ # Wildcard fallback for unknown Google models
224
+ _default("google", "*", rpm=360, tpm=4_000_000),
225
+ ]
226
+ """Conservative built-in RPM/TPM profiles for common provider/model paths.
227
+
228
+ Use ``specsmith credits limits defaults`` to inspect these values or install
229
+ them into a project's local override file.
230
+ """
231
+
232
+
233
+ def save_rate_limit_profiles(root: Path, profiles: list[ModelRateLimitProfile]) -> None:
234
+ """Persist local rate-limit profiles."""
235
+ path = _get_profile_path(root)
236
+ path.write_text(
237
+ json.dumps([asdict(profile) for profile in profiles], indent=2),
238
+ encoding="utf-8",
239
+ )
240
+
241
+
242
+ def load_rate_limit_profiles(
243
+ root: Path,
244
+ *,
245
+ defaults: list[ModelRateLimitProfile] | None = None,
246
+ ) -> list[ModelRateLimitProfile]:
247
+ """Load local profiles, overriding any provided defaults by provider/model key."""
248
+ merged = {profile.key: profile for profile in defaults or []}
249
+ path = _get_profile_path(root)
250
+ if not path.exists():
251
+ return list(merged.values())
252
+
253
+ try:
254
+ raw_profiles = json.loads(path.read_text(encoding="utf-8"))
255
+ except json.JSONDecodeError:
256
+ return list(merged.values())
257
+
258
+ for item in raw_profiles:
259
+ profile = ModelRateLimitProfile(**item)
260
+ merged[profile.key] = profile
261
+ return list(merged.values())
262
+
263
+
264
+ def classify_rate_limit_error(error: object) -> RateLimitErrorDetails:
265
+ """Normalize provider-specific 429/rate-limit failures."""
266
+ status_code = _extract_status_code(error)
267
+ message = _extract_message(error)
268
+ headers = _extract_headers(error)
269
+
270
+ retry_after = _parse_retry_after_headers(headers)
271
+ if retry_after is None:
272
+ retry_after = _parse_retry_after_message(message)
273
+
274
+ message_lower = message.lower()
275
+ code = _extract_error_code(error)
276
+ is_rate_limit = bool(
277
+ status_code == 429
278
+ or code in {"rate_limit_exceeded", "rate_limited"}
279
+ or "rate limit" in message_lower
280
+ or "too many requests" in message_lower
281
+ )
282
+
283
+ return RateLimitErrorDetails(
284
+ is_rate_limit=is_rate_limit,
285
+ message=message,
286
+ status_code=status_code,
287
+ retry_after_seconds=retry_after,
288
+ )
289
+
290
+
291
+ def compute_retry_delay(
292
+ error: object,
293
+ attempt: int,
294
+ *,
295
+ base_delay_seconds: float = 1.0,
296
+ max_delay_seconds: float = 60.0,
297
+ random_fn: Callable[[], float] | None = None,
298
+ ) -> float:
299
+ """Return provider-prescribed wait or exponential backoff plus jitter."""
300
+ details = classify_rate_limit_error(error)
301
+ if details.retry_after_seconds is not None:
302
+ return details.retry_after_seconds
303
+
304
+ jitter_source = random_fn or random.random
305
+ backoff = min(max_delay_seconds, base_delay_seconds * (2 ** max(attempt - 1, 0)))
306
+ return float(backoff + (backoff * _RETRY_JITTER_RATIO * jitter_source()))
307
+
308
+
309
+ class RateLimitScheduler:
310
+ """Rolling-window model scheduler with pacing and adaptive concurrency."""
311
+
312
+ def __init__(
313
+ self,
314
+ profiles: list[ModelRateLimitProfile],
315
+ *,
316
+ clock: Callable[[], float] | None = None,
317
+ sleep_fn: Callable[[float], None] | None = None,
318
+ random_fn: Callable[[], float] | None = None,
319
+ window_seconds: float = _ROLLING_WINDOW_SECONDS,
320
+ restore_after_successes: int = 3,
321
+ concurrency_poll_seconds: float = _CONCURRENCY_POLL_SECONDS,
322
+ ) -> None:
323
+ self._profiles = {profile.key: profile for profile in profiles}
324
+ self._states: dict[str, _ModelRuntimeState] = {}
325
+ self._reservations: dict[str, RateLimitReservation] = {}
326
+ self._clock = clock or time.monotonic
327
+ self._sleep_fn = sleep_fn or time.sleep
328
+ self._random_fn = random_fn or random.random
329
+ self._window_seconds = window_seconds
330
+ self._restore_after_successes = restore_after_successes
331
+ self._concurrency_poll_seconds = concurrency_poll_seconds
332
+ self._reservation_counter = 0
333
+
334
+ def upsert_profiles(self, profiles: list[ModelRateLimitProfile]) -> None:
335
+ """Add or replace profiles by provider/model key."""
336
+ for profile in profiles:
337
+ self._profiles[profile.key] = profile
338
+
339
+ def acquire(
340
+ self,
341
+ provider: str,
342
+ model: str,
343
+ *,
344
+ estimated_input_tokens: int,
345
+ max_output_tokens: int,
346
+ ) -> RateLimitReservation:
347
+ """Block until RPM/TPM budget and concurrency are available."""
348
+ profile = self._resolve_profile(provider, model)
349
+ state = self._get_state(profile)
350
+ waited_seconds = 0.0
351
+ estimated_total_tokens = max(0, estimated_input_tokens) + max(0, max_output_tokens)
352
+
353
+ while True:
354
+ now = self._clock()
355
+ self._purge_expired(state, now)
356
+ wait_seconds = self._compute_wait_seconds(state, profile, estimated_total_tokens, now)
357
+ if wait_seconds <= 0:
358
+ break
359
+ self._sleep_fn(wait_seconds)
360
+ waited_seconds += wait_seconds
361
+
362
+ now = self._clock()
363
+ reservation = RateLimitReservation(
364
+ reservation_id=self._next_reservation_id(),
365
+ provider=profile.provider,
366
+ model=profile.model,
367
+ estimated_input_tokens=max(0, estimated_input_tokens),
368
+ max_output_tokens=max(0, max_output_tokens),
369
+ estimated_total_tokens=estimated_total_tokens,
370
+ acquired_at=now,
371
+ waited_seconds=waited_seconds,
372
+ )
373
+ state.request_timestamps.append(now)
374
+ state.token_events.append(
375
+ _TokenEvent(
376
+ timestamp=now,
377
+ tokens=estimated_total_tokens,
378
+ reservation_id=reservation.reservation_id,
379
+ )
380
+ )
381
+ state.active_reservations.add(reservation.reservation_id)
382
+ state.in_flight += 1
383
+ self._reservations[reservation.reservation_id] = reservation
384
+ self._update_moving_averages(state)
385
+ return reservation
386
+
387
+ def record_success(
388
+ self,
389
+ reservation: RateLimitReservation | str,
390
+ *,
391
+ actual_input_tokens: int | None = None,
392
+ actual_output_tokens: int | None = None,
393
+ ) -> RateLimitSnapshot:
394
+ """Finalize a successful request and optionally reconcile token estimate."""
395
+ reservation = self._coerce_reservation(reservation)
396
+ profile = self._resolve_profile(reservation.provider, reservation.model)
397
+ state = self._get_state(profile)
398
+ actual_total_tokens = reservation.estimated_total_tokens
399
+ if actual_input_tokens is not None or actual_output_tokens is not None:
400
+ actual_total_tokens = max(0, actual_input_tokens or 0) + max(
401
+ 0, actual_output_tokens or 0
402
+ )
403
+ self._update_token_event(state, reservation.reservation_id, actual_total_tokens)
404
+
405
+ self._release_reservation(state, reservation.reservation_id)
406
+ if state.current_concurrency_cap < profile.concurrency_cap:
407
+ state.success_streak += 1
408
+ if state.success_streak >= self._restore_after_successes:
409
+ state.current_concurrency_cap += 1
410
+ state.success_streak = 0
411
+ self._update_moving_averages(state)
412
+ _ = actual_total_tokens
413
+ return self.snapshot(profile.provider, profile.model)
414
+
415
+ def record_rate_limit(
416
+ self,
417
+ reservation: RateLimitReservation | str,
418
+ error: object,
419
+ *,
420
+ attempt: int,
421
+ ) -> float:
422
+ """Apply concurrency reduction and return the delay before retry."""
423
+ reservation = self._coerce_reservation(reservation)
424
+ profile = self._resolve_profile(reservation.provider, reservation.model)
425
+ state = self._get_state(profile)
426
+ self._release_reservation(state, reservation.reservation_id)
427
+ state.success_streak = 0
428
+ state.current_concurrency_cap = max(1, math.ceil(state.current_concurrency_cap / 2))
429
+ self._update_moving_averages(state)
430
+ return compute_retry_delay(error, attempt, random_fn=self._random_fn)
431
+
432
+ def get_reservation(self, reservation_id: str) -> RateLimitReservation:
433
+ """Get an active reservation by identifier."""
434
+ if reservation_id not in self._reservations:
435
+ raise KeyError(f"Unknown reservation id: {reservation_id}")
436
+ return self._reservations[reservation_id]
437
+
438
+ def snapshot(self, provider: str, model: str) -> RateLimitSnapshot:
439
+ """Get rolling-window and concurrency state for a provider/model."""
440
+ profile = self._resolve_profile(provider, model)
441
+ state = self._get_state(profile)
442
+ self._purge_expired(state, self._clock())
443
+ rolling_requests = len(state.request_timestamps)
444
+ rolling_tokens = sum(event.tokens for event in state.token_events)
445
+ self._update_moving_averages(state)
446
+ return RateLimitSnapshot(
447
+ provider=profile.provider,
448
+ model=profile.model,
449
+ rpm_limit=profile.rpm_limit,
450
+ tpm_limit=profile.tpm_limit,
451
+ effective_rpm_limit=profile.effective_rpm_limit,
452
+ effective_tpm_limit=profile.effective_tpm_limit,
453
+ rolling_request_count=rolling_requests,
454
+ rolling_token_count=rolling_tokens,
455
+ moving_average_requests=state.moving_average_requests,
456
+ moving_average_tokens=state.moving_average_tokens,
457
+ request_utilization=rolling_requests / profile.effective_rpm_limit,
458
+ token_utilization=rolling_tokens / profile.effective_tpm_limit,
459
+ base_concurrency_cap=profile.concurrency_cap,
460
+ current_concurrency_cap=state.current_concurrency_cap,
461
+ in_flight=state.in_flight,
462
+ )
463
+
464
+ def _resolve_profile(self, provider: str, model: str) -> ModelRateLimitProfile:
465
+ provider = _normalize_key_part(provider)
466
+ model = _normalize_key_part(model)
467
+ exact_key = _profile_key(provider, model)
468
+ if exact_key in self._profiles:
469
+ return self._profiles[exact_key]
470
+
471
+ for wildcard_key in (
472
+ _profile_key(provider, "*"),
473
+ _profile_key("*", model),
474
+ _profile_key("*", "*"),
475
+ ):
476
+ if wildcard_key in self._profiles:
477
+ return self._profiles[wildcard_key]
478
+
479
+ raise KeyError(f"No rate-limit profile configured for {provider}/{model}")
480
+
481
+ def _get_state(self, profile: ModelRateLimitProfile) -> _ModelRuntimeState:
482
+ if profile.key not in self._states:
483
+ self._states[profile.key] = _ModelRuntimeState(
484
+ current_concurrency_cap=profile.concurrency_cap
485
+ )
486
+ return self._states[profile.key]
487
+
488
+ def _compute_wait_seconds(
489
+ self,
490
+ state: _ModelRuntimeState,
491
+ profile: ModelRateLimitProfile,
492
+ estimated_total_tokens: int,
493
+ now: float,
494
+ ) -> float:
495
+ request_wait = 0.0
496
+ if len(state.request_timestamps) + 1 > profile.effective_rpm_limit:
497
+ oldest_request = state.request_timestamps[0]
498
+ request_wait = max(0.0, oldest_request + self._window_seconds - now)
499
+
500
+ token_wait = self._compute_token_wait_seconds(
501
+ state=state,
502
+ token_limit=profile.effective_tpm_limit,
503
+ estimated_total_tokens=estimated_total_tokens,
504
+ now=now,
505
+ )
506
+
507
+ concurrency_wait = (
508
+ self._concurrency_poll_seconds
509
+ if state.in_flight >= state.current_concurrency_cap
510
+ else 0.0
511
+ )
512
+ return max(request_wait, token_wait, concurrency_wait)
513
+
514
+ def _compute_token_wait_seconds(
515
+ self,
516
+ *,
517
+ state: _ModelRuntimeState,
518
+ token_limit: int,
519
+ estimated_total_tokens: int,
520
+ now: float,
521
+ ) -> float:
522
+ current_tokens = sum(event.tokens for event in state.token_events)
523
+ if current_tokens + estimated_total_tokens <= token_limit:
524
+ return 0.0
525
+
526
+ tokens_to_free = current_tokens + estimated_total_tokens - token_limit
527
+ freed_tokens = 0
528
+ wait_seconds = 0.0
529
+ for event in state.token_events:
530
+ freed_tokens += event.tokens
531
+ wait_seconds = max(0.0, event.timestamp + self._window_seconds - now)
532
+ if freed_tokens >= tokens_to_free:
533
+ return wait_seconds
534
+ return self._window_seconds
535
+
536
+ def _purge_expired(self, state: _ModelRuntimeState, now: float) -> None:
537
+ cutoff = now - self._window_seconds
538
+ while state.request_timestamps and state.request_timestamps[0] <= cutoff:
539
+ state.request_timestamps.popleft()
540
+ while state.token_events and state.token_events[0].timestamp <= cutoff:
541
+ state.token_events.popleft()
542
+
543
+ def _release_reservation(self, state: _ModelRuntimeState, reservation_id: str) -> None:
544
+ if reservation_id in state.active_reservations:
545
+ state.active_reservations.remove(reservation_id)
546
+ state.in_flight = max(0, state.in_flight - 1)
547
+ self._reservations.pop(reservation_id, None)
548
+
549
+ def _update_token_event(
550
+ self,
551
+ state: _ModelRuntimeState,
552
+ reservation_id: str,
553
+ actual_total_tokens: int,
554
+ ) -> None:
555
+ for event in state.token_events:
556
+ if event.reservation_id == reservation_id:
557
+ event.tokens = actual_total_tokens
558
+ break
559
+
560
+ def _update_moving_averages(self, state: _ModelRuntimeState) -> None:
561
+ current_requests = float(len(state.request_timestamps))
562
+ current_tokens = float(sum(event.tokens for event in state.token_events))
563
+ state.moving_average_requests = (_MOVING_AVERAGE_ALPHA * current_requests) + (
564
+ (1 - _MOVING_AVERAGE_ALPHA) * state.moving_average_requests
565
+ )
566
+ state.moving_average_tokens = (_MOVING_AVERAGE_ALPHA * current_tokens) + (
567
+ (1 - _MOVING_AVERAGE_ALPHA) * state.moving_average_tokens
568
+ )
569
+
570
+ def _next_reservation_id(self) -> str:
571
+ self._reservation_counter += 1
572
+ return f"reservation-{self._reservation_counter}"
573
+
574
+ def export_state(self) -> dict[str, object]:
575
+ """Serialize the scheduler state for persistence."""
576
+ now = self._clock()
577
+ states: dict[str, object] = {}
578
+ for key, state in self._states.items():
579
+ self._purge_expired(state, now)
580
+ self._update_moving_averages(state)
581
+ states[key] = {
582
+ "request_timestamps": list(state.request_timestamps),
583
+ "token_events": [
584
+ {
585
+ "timestamp": event.timestamp,
586
+ "tokens": event.tokens,
587
+ "reservation_id": event.reservation_id,
588
+ }
589
+ for event in state.token_events
590
+ ],
591
+ "active_reservations": sorted(state.active_reservations),
592
+ "in_flight": state.in_flight,
593
+ "current_concurrency_cap": state.current_concurrency_cap,
594
+ "success_streak": state.success_streak,
595
+ "moving_average_requests": state.moving_average_requests,
596
+ "moving_average_tokens": state.moving_average_tokens,
597
+ }
598
+
599
+ return {
600
+ "states": states,
601
+ "reservations": {
602
+ reservation_id: asdict(reservation)
603
+ for reservation_id, reservation in self._reservations.items()
604
+ },
605
+ "reservation_counter": self._reservation_counter,
606
+ }
607
+
608
+ def import_state(self, data: dict[str, object]) -> None:
609
+ """Restore scheduler state from persistence."""
610
+ self._states = {}
611
+ raw_states = data.get("states", {})
612
+ if isinstance(raw_states, dict):
613
+ for key, raw_state in raw_states.items():
614
+ if not isinstance(raw_state, dict):
615
+ continue
616
+ token_events: deque[_TokenEvent] = deque()
617
+ for raw_event in raw_state.get("token_events", []):
618
+ if not isinstance(raw_event, dict):
619
+ continue
620
+ token_events.append(
621
+ _TokenEvent(
622
+ timestamp=float(raw_event["timestamp"]),
623
+ tokens=int(raw_event["tokens"]),
624
+ reservation_id=str(raw_event["reservation_id"]),
625
+ )
626
+ )
627
+ self._states[str(key)] = _ModelRuntimeState(
628
+ request_timestamps=deque(
629
+ float(timestamp) for timestamp in raw_state.get("request_timestamps", [])
630
+ ),
631
+ token_events=token_events,
632
+ active_reservations=set(
633
+ str(reservation_id)
634
+ for reservation_id in raw_state.get("active_reservations", [])
635
+ ),
636
+ in_flight=int(raw_state.get("in_flight", 0)),
637
+ current_concurrency_cap=int(raw_state.get("current_concurrency_cap", 0)),
638
+ success_streak=int(raw_state.get("success_streak", 0)),
639
+ moving_average_requests=float(raw_state.get("moving_average_requests", 0.0)),
640
+ moving_average_tokens=float(raw_state.get("moving_average_tokens", 0.0)),
641
+ )
642
+
643
+ self._reservations = {}
644
+ raw_reservations = data.get("reservations", {})
645
+ if isinstance(raw_reservations, dict):
646
+ for reservation_id, raw_reservation in raw_reservations.items():
647
+ if not isinstance(raw_reservation, dict):
648
+ continue
649
+ reservation = RateLimitReservation(**raw_reservation)
650
+ self._reservations[str(reservation_id)] = reservation
651
+
652
+ reservation_counter = data.get("reservation_counter", 0)
653
+ if isinstance(reservation_counter, int):
654
+ self._reservation_counter = reservation_counter
655
+
656
+ def _coerce_reservation(self, reservation: RateLimitReservation | str) -> RateLimitReservation:
657
+ if isinstance(reservation, RateLimitReservation):
658
+ return reservation
659
+ return self.get_reservation(reservation)
660
+
661
+
662
+ def _extract_status_code(error: object) -> int | None:
663
+ status_code = getattr(error, "status_code", None)
664
+ if isinstance(status_code, int):
665
+ return status_code
666
+
667
+ response = getattr(error, "response", None)
668
+ response_status = getattr(response, "status_code", None)
669
+ if isinstance(response_status, int):
670
+ return response_status
671
+ return None
672
+
673
+
674
+ def _extract_headers(error: object) -> dict[str, str]:
675
+ response = getattr(error, "response", None)
676
+ headers = getattr(response, "headers", None)
677
+ if isinstance(headers, dict):
678
+ return {str(k).lower(): str(v) for k, v in headers.items()}
679
+ if isinstance(error, dict):
680
+ raw_headers = error.get("headers")
681
+ if isinstance(raw_headers, dict):
682
+ return {str(k).lower(): str(v) for k, v in raw_headers.items()}
683
+ return {}
684
+
685
+
686
+ def _extract_message(error: object) -> str:
687
+ if isinstance(error, str):
688
+ return error
689
+ if isinstance(error, dict):
690
+ parts: list[str] = []
691
+ for key in ("message", "detail", "error"):
692
+ value = error.get(key)
693
+ if isinstance(value, str):
694
+ parts.append(value)
695
+ if parts:
696
+ return " ".join(parts)
697
+ return json.dumps(error)
698
+ message = getattr(error, "message", None)
699
+ if isinstance(message, str) and message:
700
+ return message
701
+ args = getattr(error, "args", ())
702
+ if args:
703
+ return " ".join(str(arg) for arg in args if arg)
704
+ return str(error)
705
+
706
+
707
+ def _extract_error_code(error: object) -> str | None:
708
+ if isinstance(error, dict):
709
+ code = error.get("code")
710
+ if isinstance(code, str):
711
+ return code.lower()
712
+ code = getattr(error, "code", None)
713
+ if isinstance(code, str):
714
+ return code.lower()
715
+ return None
716
+
717
+
718
+ def _parse_retry_after_headers(headers: dict[str, str]) -> float | None:
719
+ for key in ("retry-after", "retry-after-ms", "x-ratelimit-reset-after"):
720
+ value = headers.get(key)
721
+ if value is None:
722
+ continue
723
+ try:
724
+ numeric = float(value)
725
+ except ValueError:
726
+ continue
727
+ if key == "retry-after-ms":
728
+ return numeric / 1000.0
729
+ return numeric
730
+ return None
731
+
732
+
733
+ def _parse_retry_after_message(message: str) -> float | None:
734
+ for pattern in _WAIT_PATTERNS:
735
+ match = pattern.search(message)
736
+ if not match:
737
+ continue
738
+ value = float(match.group("value"))
739
+ unit = match.group("unit").lower()
740
+ if unit == "ms":
741
+ return value / 1000.0
742
+ return value
743
+ return None
744
+
745
+
746
+ def load_rate_limit_scheduler(
747
+ root: Path,
748
+ profiles: list[ModelRateLimitProfile],
749
+ *,
750
+ clock: Callable[[], float] | None = None,
751
+ sleep_fn: Callable[[float], None] | None = None,
752
+ random_fn: Callable[[], float] | None = None,
753
+ window_seconds: float = _ROLLING_WINDOW_SECONDS,
754
+ restore_after_successes: int = 3,
755
+ concurrency_poll_seconds: float = _CONCURRENCY_POLL_SECONDS,
756
+ ) -> RateLimitScheduler:
757
+ """Build a scheduler and hydrate any saved runtime state."""
758
+ scheduler = RateLimitScheduler(
759
+ profiles,
760
+ clock=clock,
761
+ sleep_fn=sleep_fn,
762
+ random_fn=random_fn,
763
+ window_seconds=window_seconds,
764
+ restore_after_successes=restore_after_successes,
765
+ concurrency_poll_seconds=concurrency_poll_seconds,
766
+ )
767
+ path = _get_runtime_state_path(root)
768
+ if not path.exists():
769
+ return scheduler
770
+
771
+ try:
772
+ raw_state = json.loads(path.read_text(encoding="utf-8"))
773
+ except json.JSONDecodeError:
774
+ return scheduler
775
+
776
+ if isinstance(raw_state, dict):
777
+ scheduler.import_state(raw_state)
778
+ return scheduler
779
+
780
+
781
+ def save_rate_limit_scheduler(root: Path, scheduler: RateLimitScheduler) -> None:
782
+ """Persist current runtime scheduler state."""
783
+ path = _get_runtime_state_path(root)
784
+ path.write_text(json.dumps(scheduler.export_state(), indent=2), encoding="utf-8")
@@ -34,6 +34,18 @@ All agent-invoked commands MUST have a timeout. No command may run indefinitely.
34
34
  ### H10 — No hardcoded versions
35
35
  Version strings MUST NOT be hardcoded in documentation, tests, or source code outside of `pyproject.toml`. Use `importlib.metadata.version()` at runtime. Use `{{ "{{ version }}" }}` placeholders in documentation resolved at build time.
36
36
 
37
+ ### H11 — No unbounded loops or blocking I/O without a deadline
38
+ Every loop or blocking wait in agent-written scripts and automation MUST have:
39
+
40
+ - An explicit deadline or iteration cap (e.g. a `deadline` timestamp, a `max_attempts` counter, or a `timeout` parameter).
41
+ - A fallback exit path that executes when the deadline is reached.
42
+ - A diagnostic message emitted if the timeout fires (self-diagnosing failures).
43
+
44
+ Examples of violating patterns: `while True:` / `while ($true)` / `for (;;)` with no deadline guard; serial-port or I/O polling loops with no deadline; `sleep` inside a loop with no termination condition. `specsmith validate` checks scripts under `scripts/` for these patterns.
45
+
46
+ ### H12 — Windows multi-step automation via .cmd files
47
+ On Windows, multi-step or heavily-quoted automation sequences MUST be written to a temporary `.cmd` file and executed from there. Do NOT emit these as inline shell invocations or as `.ps1` files unless there is a concrete PowerShell-only requirement. Inline multi-line quoting on Windows is fragile and causes avoidable hangs.
48
+
37
49
  ---
38
50
 
39
51
  ## Stop Conditions
specsmith/validator.py CHANGED
@@ -39,6 +39,30 @@ class ValidationReport:
39
39
 
40
40
  _REQ_PATTERN = re.compile(r"\b(REQ-[A-Z]+-\d+)\b")
41
41
 
42
+ # Infinite-loop patterns (Python, PowerShell, shell)
43
+ _INFINITE_LOOP_PATTERNS = (
44
+ re.compile(r"while\s+True\s*:"), # Python
45
+ re.compile(r"while\s*\(\s*\$true\s*\)", re.IGNORECASE), # PowerShell
46
+ re.compile(r"while\s+true\s*[;{]", re.IGNORECASE), # bash/sh
47
+ re.compile(r"while\s+:\s*[;{\n]"), # bash/sh `while :`
48
+ re.compile(r"for\s*\(\s*;;\s*\)"), # C-style for(;;)
49
+ )
50
+
51
+ # Deadline/timeout guard keywords — presence anywhere in the file suppresses the warning
52
+ _DEADLINE_GUARD_PATTERNS = (
53
+ re.compile(r"deadline", re.IGNORECASE),
54
+ re.compile(r"timeout", re.IGNORECASE),
55
+ re.compile(r"max_iter", re.IGNORECASE),
56
+ re.compile(r"max_attempt", re.IGNORECASE),
57
+ re.compile(r"time\.monotonic\("),
58
+ re.compile(r"time\.time\("),
59
+ re.compile(r"Get-Date", re.IGNORECASE),
60
+ re.compile(r"\$SECONDS"),
61
+ )
62
+
63
+ # Script file extensions to scan (exclude general source dirs to avoid false positives)
64
+ _SCRIPT_EXTENSIONS = {".sh", ".cmd", ".ps1", ".bash"}
65
+
42
66
 
43
67
  def _check_scaffold_yml(root: Path) -> list[ValidationResult]:
44
68
  """Check that scaffold.yml exists and is valid YAML."""
@@ -208,6 +232,74 @@ def _check_architecture_reqs(root: Path) -> list[ValidationResult]:
208
232
  return results
209
233
 
210
234
 
235
+ def _check_blocking_loops(root: Path) -> list[ValidationResult]:
236
+ """Scan script files for unbounded loops without a deadline/timeout guard.
237
+
238
+ Checks .sh, .cmd, .ps1, .bash files under scripts/ and the project root.
239
+ A file is flagged only when it contains an infinite-loop pattern AND lacks
240
+ any recognised deadline/timeout guard anywhere in the file body.
241
+ This is a heuristic check; results are warnings rather than hard failures.
242
+ """
243
+ results: list[ValidationResult] = []
244
+
245
+ # Collect script files: root-level + scripts/ subdirectory
246
+ candidates: list[Path] = []
247
+ for path in root.iterdir():
248
+ if path.is_file() and path.suffix.lower() in _SCRIPT_EXTENSIONS:
249
+ candidates.append(path)
250
+ scripts_dir = root / "scripts"
251
+ if scripts_dir.is_dir():
252
+ for path in scripts_dir.rglob("*"):
253
+ if path.is_file() and path.suffix.lower() in _SCRIPT_EXTENSIONS:
254
+ candidates.append(path)
255
+
256
+ if not candidates:
257
+ return results
258
+
259
+ flagged: list[str] = []
260
+ for script_path in candidates:
261
+ try:
262
+ text = script_path.read_text(encoding="utf-8", errors="replace")
263
+ except OSError:
264
+ continue
265
+
266
+ has_infinite_loop = any(p.search(text) for p in _INFINITE_LOOP_PATTERNS)
267
+ if not has_infinite_loop:
268
+ continue
269
+
270
+ has_deadline_guard = any(p.search(text) for p in _DEADLINE_GUARD_PATTERNS)
271
+ if not has_deadline_guard:
272
+ try:
273
+ rel = script_path.relative_to(root)
274
+ except ValueError:
275
+ rel = script_path
276
+ flagged.append(str(rel))
277
+
278
+ if flagged:
279
+ for name in flagged:
280
+ results.append(
281
+ ValidationResult(
282
+ name=f"blocking-loop:{name}",
283
+ passed=False,
284
+ message=(
285
+ f"{name}: unbounded loop detected without a deadline/timeout guard "
286
+ "(H11 violation). Add an explicit deadline, iteration cap, and "
287
+ "fallback exit path."
288
+ ),
289
+ )
290
+ )
291
+ else:
292
+ results.append(
293
+ ValidationResult(
294
+ name="blocking-loops",
295
+ passed=True,
296
+ message=f"{len(candidates)} script file(s) checked — no unbounded loops found",
297
+ )
298
+ )
299
+
300
+ return results
301
+
302
+
211
303
  def run_validate(root: Path) -> ValidationReport:
212
304
  """Run all validation checks and return a report."""
213
305
  report = ValidationReport()
@@ -215,4 +307,5 @@ def run_validate(root: Path) -> ValidationReport:
215
307
  report.results.extend(_check_agents_md_refs(root))
216
308
  report.results.extend(_check_req_ids_unique(root))
217
309
  report.results.extend(_check_architecture_reqs(root))
310
+ report.results.extend(_check_blocking_loops(root))
218
311
  return report
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: specsmith
3
- Version: 0.2.3.dev51
3
+ Version: 0.2.3.dev55
4
4
  Summary: Forge governed project scaffolds from the Agentic AI Development Workflow Specification.
5
5
  Author: BitConcepts
6
6
  License: MIT
@@ -129,7 +129,7 @@ Each type gets: tool-aware CI (correct lint/test/security/build tools), domain-s
129
129
  | `import` | Adopt an existing project (merge mode) |
130
130
  | `audit` | Drift detection and health checks (`--fix` to auto-repair) |
131
131
  | `architect` | Interactive architecture generation |
132
- | `validate` | Governance file consistency checks |
132
+ | `validate` | Governance consistency + H11 blocking-loop detection |
133
133
  | `compress` | Archive old ledger entries |
134
134
  | `upgrade` | Update governance to new spec version |
135
135
  | `status` | CI/PR/alert status from VCS platform |
@@ -137,7 +137,13 @@ Each type gets: tool-aware CI (correct lint/test/security/build tools), domain-s
137
137
  | `export` | Compliance report with REQ↔TEST coverage |
138
138
  | `doctor` | Check if verification tools are installed |
139
139
  | `self-update` | Update specsmith (channel-aware) |
140
- | `credits` | AI credit tracking, analysis, and budgets |
140
+ | `credits` | AI credit tracking, analysis, budgets, and rate-limit pacing |
141
+ | `exec` / `ps` / `abort` | Tracked process execution with PID tracking and timeout |
142
+ | `commit` / `push` / `sync` | Governance-aware VCS operations |
143
+ | `branch` / `pr` | Strategy-aware branching and PR creation |
144
+ | `ledger` | Structured ledger add/list/stats |
145
+ | `req` | Requirements list/add/trace/gaps/orphans |
146
+ | `session-end` | End-of-session checklist |
141
147
 
142
148
  ## 7 Agent Integrations
143
149
 
@@ -147,9 +153,36 @@ AGENTS.md (cross-platform standard), Warp/Oz, Claude Code, GitHub Copilot, Curso
147
153
 
148
154
  GitHub Actions, GitLab CI, Bitbucket Pipelines — all with tool-aware CI generated from the verification tool registry. Dependabot/Renovate configured per language ecosystem.
149
155
 
156
+ ## Governance Rules (H1–H12)
157
+
158
+ specsmith-governed projects enforce 12 hard rules. Two were added in v0.2.3 for agentic workflows:
159
+
160
+ - **H11** — Every loop or blocking wait in agent-written scripts must have a deadline, a fallback exit, and a diagnostic message on timeout. `specsmith validate` enforces this automatically.
161
+ - **H12** — On Windows, multi-step automation goes into a `.cmd` file, not inline shell invocations or `.ps1` files.
162
+
163
+ See [Governance Model](https://specsmith.readthedocs.io/en/stable/governance/) for the full rule set.
164
+
165
+ ## Proactive Rate Limit Pacing
166
+
167
+ specsmith ships a rolling-window scheduler that paces AI provider requests before dispatch:
168
+
169
+ - Built-in RPM/TPM profiles for OpenAI, Anthropic, and Google models (including wildcard fallbacks)
170
+ - Pre-dispatch budget check: sleeps until the 60-second window refills instead of overshooting
171
+ - Parses OpenAI-style `"Please try again in 10.793s"` messages and obeys them
172
+ - Adaptive concurrency: halved after a 429, gradually restored after consecutive successes
173
+ - Local overrides always take precedence over built-in defaults
174
+
175
+ ```bash
176
+ specsmith credits limits defaults # list built-in profiles
177
+ specsmith credits limits defaults --install # merge into project config
178
+ specsmith credits limits status --provider openai --model gpt-5.4
179
+ ```
180
+
181
+ See [Rate Limit Pacing](https://specsmith.readthedocs.io/en/stable/rate-limits/) for full details.
182
+
150
183
  ## Documentation
151
184
 
152
- **[specsmith.readthedocs.io](https://specsmith.readthedocs.io)** — Full user manual with tutorials, command reference, project type details, tool registry, governance model, troubleshooting.
185
+ **[specsmith.readthedocs.io](https://specsmith.readthedocs.io)** — Full user manual with tutorials, command reference, project type details, tool registry, governance model, rate-limit pacing, troubleshooting.
153
186
 
154
187
  ## Links
155
188
 
@@ -1,8 +1,8 @@
1
- specsmith/__init__.py,sha256=qoRmTGyIq0Tiw4ejj0FTwdtAVEyGeZ6LfNCqWbhWjdU,402
1
+ specsmith/__init__.py,sha256=VI4gLAUb2VPV4nYai2qNB2ougBPMBOutVULWUVZ_Z-g,402
2
2
  specsmith/__main__.py,sha256=OEU30g2x5ATD1bzoo7iwwj6tST_41sgGnwPIK5dF6ao,189
3
3
  specsmith/architect.py,sha256=7lOy8PXCYMAmt6TmH25oMTvPTWke_ZC2ekjNjcuD9Zg,5439
4
4
  specsmith/auditor.py,sha256=ENpDtxI3BMsEgiY8NWi-O_0y1wvVVfoT9xUYlWzgNvw,20346
5
- specsmith/cli.py,sha256=eMOdEAvtdHvCuIj8-dwXKLH-wXWodLtJhqlX6_BbAXo,60215
5
+ specsmith/cli.py,sha256=9-kglxR5AP7HMKFZMpV1gN-wkVb5mtBN8YdRJAbWMy0,66578
6
6
  specsmith/compressor.py,sha256=IqMF6WZMbqyA0KLQmBxRyM6xZF0g34-9lQxWcYz3tFs,4448
7
7
  specsmith/config.py,sha256=TCQ5LC9guFFLhLO028l5vp_dfpyfpgvbNFVTvWDEnds,9784
8
8
  specsmith/credit_analyzer.py,sha256=InobNXNzgM6dafYopKPJ7Vmod0M6wjv-TsXbxi36SbY,7052
@@ -14,6 +14,7 @@ specsmith/exporter.py,sha256=HKkSOA9JZ9vzb5498pFKxjcigv0yu6_T69yKy0FX4EQ,5780
14
14
  specsmith/importer.py,sha256=njw6Gi0gPhzpb28czH2V2nEPRIQKIe-TVZvaY73QAzI,40784
15
15
  specsmith/ledger.py,sha256=Ge6npHZo09rcoflbbvYU3EO1WN2JiZ6wJ7FktFxmq5I,2473
16
16
  specsmith/plugins.py,sha256=QE31Lpex1nIQ_ZO967j_m3YOfExWYRxbzJGiyFA_v9M,2099
17
+ specsmith/rate_limits.py,sha256=1ebGpMFCFvGNl6O-9YpfW3BM8_3pgGzvxNNjacgBB8I,29406
17
18
  specsmith/releaser.py,sha256=wULrJlm9T2rQVctR0xtCt5-6BPlM2FO2zNZg2mszP0I,3569
18
19
  specsmith/requirements.py,sha256=YidhhfkQGxWAXIg8PDW3j2pRXaqERCZU5Cn_TaYVaGY,4044
19
20
  specsmith/scaffolder.py,sha256=gk-3U-2iOsHSAPwUDtFX_QuY9AO-T-9GjLTGwvKUTuw,15017
@@ -21,7 +22,7 @@ specsmith/session.py,sha256=Q3yN6ldDfcEOWUiMntvgpMdc9dB6yxUe52Vbb2Fd50Y,4481
21
22
  specsmith/tools.py,sha256=922vA1vqZLf8JwgMvd_qDnS3u8jDTXCUuLLSN-k7u_g,13205
22
23
  specsmith/updater.py,sha256=egv6QmYswjb8bvlYTNG71SKCHf507_9haHR1VJXCtjc,5235
23
24
  specsmith/upgrader.py,sha256=Hvjowhg2leGHRQMdF0enHwvJhh8I45UFAX64YkL3jLM,12790
24
- specsmith/validator.py,sha256=JHwDbdETQwPlV50kyk4h89VekOSL6XNRjCbUiB5-daE,6344
25
+ specsmith/validator.py,sha256=rZ2klbBVgO_RZ8d1Ff0t4tfPQ92B8ncdBHavciIgIAQ,9791
25
26
  specsmith/vcs_commands.py,sha256=xDJb6FsREPQ8waF1rq5_KMxc8P1Gh9YrIUSjCitMnAU,10245
26
27
  specsmith/commands/__init__.py,sha256=NYHgaRqsMP_H_D6D7ETrlKQTUt-QHBHfr_kD0q8b0VI,132
27
28
  specsmith/integrations/__init__.py,sha256=K2Rfb1xhrqHQUqhpw4v2DJuoIvZJ3W0QTKsTCRTQjQ0,1627
@@ -58,7 +59,7 @@ specsmith/templates/go/main.go.j2,sha256=CbjkeC4vx8H5uqIS9x0-2DpWN7Id0YJw52KTbjX
58
59
  specsmith/templates/governance/context-budget.md.j2,sha256=jqrNcufKRD3D6bpqRULzgnD5ORsZuRVzprB4g_YOX5M,2451
59
60
  specsmith/templates/governance/drift-metrics.md.j2,sha256=XianRFmioDLzKWRkfDZjnf5dhGkIXzNsNBusCq6XTBc,1557
60
61
  specsmith/templates/governance/roles.md.j2,sha256=hTdeTgvv7voTvTyQgLZ08Ildwgy21Xl0p0Oo0bDhNdA,1075
61
- specsmith/templates/governance/rules.md.j2,sha256=e9735eU5HOWi3P_OUZhMT0mYcMb9vylE_Lu2yIq3gTk,2115
62
+ specsmith/templates/governance/rules.md.j2,sha256=H-hC6Pp8hg0HBZjiDHM4MW5XSwVcdO5zEUJgXQOQi5o,3196
62
63
  specsmith/templates/governance/verification.md.j2,sha256=imuPhZ1Uh_dqYXsViUSfQ3gMg0KkR8313f67CV_Z734,1665
63
64
  specsmith/templates/governance/workflow.md.j2,sha256=Tt3QdPvRFS7F5FZv__8bfU4a_jEYSCRCZSQOsa6fK-A,2358
64
65
  specsmith/templates/js/package.json.j2,sha256=_bZLBzRPItYGFGgVTQZ8jG7zXsIlKn0DoSM43oHpwLE,1095
@@ -79,9 +80,9 @@ specsmith/vcs/base.py,sha256=IzyC3xtO8npaNbGZoAzgQyc9iSKleJFnp4GA0otTJfU,4098
79
80
  specsmith/vcs/bitbucket.py,sha256=U5cGqpybuPngjpu5GDM8aTZPm9bvZf1099OSBwU_Lro,5014
80
81
  specsmith/vcs/github.py,sha256=OVTLs9egmM_Smrxm4-Lo525Pi59x0PDMOOwqNNiM7lA,11995
81
82
  specsmith/vcs/gitlab.py,sha256=fDYYoDBic0KBnD_QWYf_K3GmdZOEartLXhzNXf4D4-0,5136
82
- specsmith-0.2.3.dev51.dist-info/licenses/LICENSE,sha256=jCLkf20ZMVU47ykbfIxw-5eukhnaeIsxSflw07hVNlY,1074
83
- specsmith-0.2.3.dev51.dist-info/METADATA,sha256=DhyozEDdUdnetivvrvrdJEndv3kCsSayzmE0kYlYylY,6837
84
- specsmith-0.2.3.dev51.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
85
- specsmith-0.2.3.dev51.dist-info/entry_points.txt,sha256=X_zb8_KvBONFGgkQYKIbW1UtWd3Ck7E11l3SyHO7omA,49
86
- specsmith-0.2.3.dev51.dist-info/top_level.txt,sha256=UsM7ZABbv3N5s5-tcyJSIE1m_G4p-ezZGK4zgJ3x1Hc,10
87
- specsmith-0.2.3.dev51.dist-info/RECORD,,
83
+ specsmith-0.2.3.dev55.dist-info/licenses/LICENSE,sha256=jCLkf20ZMVU47ykbfIxw-5eukhnaeIsxSflw07hVNlY,1074
84
+ specsmith-0.2.3.dev55.dist-info/METADATA,sha256=b9od7EuHnV9HSujw9OvyyyJgcn89Zvbmp7aixvFWEMU,8675
85
+ specsmith-0.2.3.dev55.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
86
+ specsmith-0.2.3.dev55.dist-info/entry_points.txt,sha256=X_zb8_KvBONFGgkQYKIbW1UtWd3Ck7E11l3SyHO7omA,49
87
+ specsmith-0.2.3.dev55.dist-info/top_level.txt,sha256=UsM7ZABbv3N5s5-tcyJSIE1m_G4p-ezZGK4zgJ3x1Hc,10
88
+ specsmith-0.2.3.dev55.dist-info/RECORD,,