tokenmaxxing 0.1.6__tar.gz → 0.2.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: tokenmaxxing
3
- Version: 0.1.6
3
+ Version: 0.2.1
4
4
  Summary: Menu bar app showing your live Claude Code session and weekly usage as a colored progress bar.
5
5
  Project-URL: Homepage, https://github.com/alvations/tokenmaxxing
6
6
  Project-URL: Repository, https://github.com/alvations/tokenmaxxing
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "tokenmaxxing"
7
- version = "0.1.6"
7
+ version = "0.2.1"
8
8
  description = "Menu bar app showing your live Claude Code session and weekly usage as a colored progress bar."
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.9"
@@ -1,6 +1,6 @@
1
1
  """tokenmaxxing — menu bar app for Claude Code session and weekly usage."""
2
2
 
3
- __version__ = "0.1.6"
3
+ __version__ = "0.2.1"
4
4
 
5
5
  from tokenmaxxing.app import main
6
6
 
@@ -50,6 +50,20 @@ except ImportError: # pragma: no cover
50
50
 
51
51
  USAGE_URL = "https://api.anthropic.com/api/oauth/usage"
52
52
  OAUTH_API_VERSION = "oauth-2025-04-20"
53
+
54
+ # The /api/oauth/usage endpoint is aggressively rate-limited (per-IP cooldown of
55
+ # ~1h that resets on every probe). It's also not how the official Claude Code
56
+ # client reads its rate limits — it reads them from the
57
+ # anthropic-ratelimit-unified-* response headers on regular inference calls.
58
+ # Make a 1-token /v1/messages call and parse those headers — same data, no
59
+ # /usage throttle.
60
+ MESSAGES_URL = "https://api.anthropic.com/v1/messages"
61
+ ANTHROPIC_VERSION = "2023-06-01"
62
+ PROBE_MODEL = "claude-haiku-4-5-20251001" # cheapest current model
63
+ HEADER_5H_UTIL = "anthropic-ratelimit-unified-5h-utilization"
64
+ HEADER_5H_RESET = "anthropic-ratelimit-unified-5h-reset"
65
+ HEADER_7D_UTIL = "anthropic-ratelimit-unified-7d-utilization"
66
+ HEADER_7D_RESET = "anthropic-ratelimit-unified-7d-reset"
53
67
  KEYCHAIN_SERVICE = "Claude Code-credentials"
54
68
  CREDENTIALS_FILE = Path.home() / ".claude" / ".credentials.json"
55
69
  DASHBOARD_URL = "https://claude.ai/settings/usage"
@@ -60,7 +74,12 @@ APP_NAME = "Claude"
60
74
  LOADING_TEXT = "loading…"
61
75
  TIMER_INTERVAL = 1
62
76
 
63
- REFRESH_SECONDS = 300 # 5 min: OAuth usage endpoint has aggressive undocumented rate limits
77
+ REFRESH_SECONDS = 1800 # 30 min: /api/oauth/usage tolerates ~hourly polling but trips
78
+ # at ~5 min cadence with a sticky multi-hour cooldown. 30 min keeps
79
+ # us well below the throttle threshold; on-demand menu opens still
80
+ # refresh instantly.
81
+ SUSTAINED_429_THRESHOLD = 3 # after this many consecutive 429s, fall back to /v1/messages
82
+ # headers (which cost ~10 tokens/poll) until /usage recovers
64
83
  STALE_AFTER_SECONDS = 600 # 10 min: only mark cached data "(stale)" past this age — one
65
84
  # missed poll cycle (~5 min) shouldn't trigger the warning
66
85
  REFRESH_MAX_BACKOFF = 1800 # Max backoff: 30 minutes
@@ -460,23 +479,46 @@ def get_access_token() -> Optional[str]:
460
479
  return oauth_data.get("accessToken") if oauth_data else None
461
480
 
462
481
 
463
- def fetch_usage(oauth_data: Optional[dict] = None):
482
+ def _epoch_to_iso(epoch: float) -> str:
483
+ """Convert unix-epoch seconds to ISO-8601 with Z suffix (the format the
484
+ /api/oauth/usage payload uses)."""
485
+ return datetime.fromtimestamp(epoch, tz=timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
486
+
487
+
488
+ def fetch_usage(oauth_data: Optional[dict] = None, allow_messages_fallback: bool = False):
464
489
  """Returns (payload_dict, error_str, is_rate_limited, retry_after_seconds).
465
490
 
466
- `retry_after_seconds` is the int parsed from the server's Retry-After
467
- header on 429s (or None if absent / non-429). Lets the worker schedule
468
- the next poll exactly when the rate-limit window expires instead of
469
- guessing via exponential backoff.
491
+ Primary path: /api/oauth/usage token-free, same endpoint the official
492
+ Claude Code client uses (see claude-code submodule services/api/usage.ts).
493
+ Honors the server's Retry-After on 429 so we never poll inside the
494
+ cooldown window.
495
+
496
+ Fallback: if the caller passes allow_messages_fallback=True (the worker
497
+ flips this on after SUSTAINED_429_THRESHOLD consecutive 429s), we make
498
+ one 1-token /v1/messages probe and parse the anthropic-ratelimit-unified-*
499
+ response headers — same data, but costs ~10 tokens per call. Used only to
500
+ rescue the menu from a stuck cooldown; the primary path resumes as soon
501
+ as /usage stops 429ing.
470
502
  """
471
- # Prefer a fresh token lifted from a running `claude` process — the
472
- # keychain token can't refresh in-place for Pro/Max users and will 401
473
- # once it expires. Fall back to the keychain only if no claude session
474
- # is alive.
475
503
  token = _lift_claude_env_token()
476
504
  if not token and oauth_data:
477
505
  token = oauth_data.get("accessToken")
478
506
  if not token:
479
507
  return None, "no Claude Code token (start a claude session)", False, None
508
+ payload, err, is_rate_limited, retry_after = _fetch_usage_oauth(token)
509
+ if payload is not None:
510
+ return payload, None, False, None
511
+ if is_rate_limited and allow_messages_fallback:
512
+ msg_payload, msg_err, msg_is_rate, msg_retry = _fetch_usage_messages(token)
513
+ if msg_payload is not None:
514
+ return msg_payload, None, False, None
515
+ # Fallback also failed — surface the /usage 429 since that's the path
516
+ # we want to recover, not the messages probe failure.
517
+ return None, err, is_rate_limited, retry_after
518
+
519
+
520
+ def _fetch_usage_oauth(token: str):
521
+ """Hit /api/oauth/usage. Zero token cost. Throttled if polled too often."""
480
522
  req = urllib.request.Request(
481
523
  USAGE_URL,
482
524
  headers={
@@ -505,6 +547,72 @@ def fetch_usage(oauth_data: Optional[dict] = None):
505
547
  return None, str(e), False, None
506
548
 
507
549
 
550
+ def _fetch_usage_messages(token: str):
551
+ """Paid fallback: 1-token /v1/messages probe; read rate-limit headers."""
552
+ body = json.dumps({
553
+ "model": PROBE_MODEL,
554
+ "max_tokens": 1,
555
+ "messages": [{"role": "user", "content": "."}],
556
+ }).encode("utf-8")
557
+ req = urllib.request.Request(
558
+ MESSAGES_URL,
559
+ data=body,
560
+ method="POST",
561
+ headers={
562
+ "Authorization": f"Bearer {token}",
563
+ "anthropic-version": ANTHROPIC_VERSION,
564
+ "anthropic-beta": OAUTH_API_VERSION,
565
+ "content-type": "application/json",
566
+ "User-Agent": "claude-limit-app/1.0",
567
+ },
568
+ )
569
+ try:
570
+ with urllib.request.urlopen(req, timeout=HTTP_TIMEOUT) as resp:
571
+ return _payload_from_ratelimit_headers(resp.headers), None, False, None
572
+ except urllib.error.HTTPError as e:
573
+ payload = _payload_from_ratelimit_headers(e.headers)
574
+ if payload:
575
+ return payload, None, False, None
576
+ if e.code == HTTP_STATUS_UNAUTHORIZED:
577
+ return None, "auth expired (start a claude session)", False, None
578
+ return None, f"HTTP {e.code} on /v1/messages fallback", False, None
579
+ except urllib.error.URLError as e:
580
+ return None, f"net: {e.reason}", False, None
581
+ except Exception as e: # noqa: BLE001
582
+ return None, str(e), False, None
583
+
584
+
585
+ def _payload_from_ratelimit_headers(headers) -> Optional[dict]:
586
+ """Parse anthropic-ratelimit-unified-* response headers into the
587
+ /api/oauth/usage payload shape the rest of the app expects.
588
+
589
+ Returns None if neither five-hour nor seven-day data is present (e.g. on
590
+ a non-OAuth request or a free-tier user without subscription limits)."""
591
+ def _f(name):
592
+ v = headers.get(name)
593
+ try:
594
+ return float(v) if v is not None else None
595
+ except (TypeError, ValueError):
596
+ return None
597
+
598
+ out = {}
599
+ util_5h, reset_5h = _f(HEADER_5H_UTIL), _f(HEADER_5H_RESET)
600
+ if util_5h is not None:
601
+ view = {"utilization": util_5h * 100.0} # headers are 0..1; payload is 0..100
602
+ if reset_5h is not None:
603
+ view["resets_at"] = _epoch_to_iso(reset_5h)
604
+ out["five_hour"] = view
605
+ util_7d, reset_7d = _f(HEADER_7D_UTIL), _f(HEADER_7D_RESET)
606
+ if util_7d is not None:
607
+ view = {"utilization": util_7d * 100.0}
608
+ if reset_7d is not None:
609
+ view["resets_at"] = _epoch_to_iso(reset_7d)
610
+ out["seven_day"] = view
611
+ # seven_day_sonnet has no equivalent header — leave absent so the menu
612
+ # row renders as "—" rather than misleading stale data.
613
+ return out or None
614
+
615
+
508
616
  def bar(pct: Optional[float], width: int = BAR_WIDTH) -> str:
509
617
  """Solid Unicode progress bar."""
510
618
  if pct is None:
@@ -692,7 +800,14 @@ class ClaudeMonitorApp(rumps.App):
692
800
  continue
693
801
 
694
802
  oauth_data = _get_oauth_data()
695
- payload, err, is_rate_limited, retry_after = fetch_usage(oauth_data)
803
+ # After SUSTAINED_429_THRESHOLD consecutive /api/oauth/usage 429s,
804
+ # unstick the menu by falling back to the paid /v1/messages
805
+ # header path. We don't enable the fallback by default because it
806
+ # costs ~10 tokens/poll — we only pay that when the free endpoint
807
+ # is genuinely stuck in a multi-hour cooldown.
808
+ allow_fallback = consecutive_failures >= SUSTAINED_429_THRESHOLD
809
+ payload, err, is_rate_limited, retry_after = fetch_usage(
810
+ oauth_data, allow_messages_fallback=allow_fallback)
696
811
  # Drop any Refresh-now clicks that arrived during the poll — the
697
812
  # in-flight poll already satisfies them, and re-polling immediately
698
813
  # can trip the OAuth endpoint's rate limit.
File without changes
File without changes
File without changes