tokenjam 0.2.2__tar.gz → 0.2.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (215) hide show
  1. {tokenjam-0.2.2 → tokenjam-0.2.3}/CLAUDE.md +10 -1
  2. {tokenjam-0.2.2 → tokenjam-0.2.3}/PKG-INFO +98 -23
  3. {tokenjam-0.2.2 → tokenjam-0.2.3}/README.md +97 -22
  4. tokenjam-0.2.3/docs/screenshots/tj-alerts.png +0 -0
  5. tokenjam-0.2.3/docs/screenshots/tj-budget.png +0 -0
  6. tokenjam-0.2.3/docs/screenshots/tj-cost.png +0 -0
  7. tokenjam-0.2.3/docs/screenshots/tj-status.png +0 -0
  8. tokenjam-0.2.3/docs/screenshots/tj-traces.png +0 -0
  9. {tokenjam-0.2.2 → tokenjam-0.2.3}/pyproject.toml +1 -1
  10. {tokenjam-0.2.2 → tokenjam-0.2.3}/sdk-ts/package-lock.json +2 -2
  11. {tokenjam-0.2.2 → tokenjam-0.2.3}/sdk-ts/package.json +1 -1
  12. {tokenjam-0.2.2 → tokenjam-0.2.3}/tests/integration/test_cli.py +78 -0
  13. {tokenjam-0.2.2 → tokenjam-0.2.3}/tests/manual-new-release-tests.md +30 -47
  14. {tokenjam-0.2.2 → tokenjam-0.2.3}/tests/manual-pre-release-testing.md +27 -2
  15. tokenjam-0.2.3/tests/unit/test_backfill.py +179 -0
  16. tokenjam-0.2.3/tests/unit/test_optimize.py +211 -0
  17. tokenjam-0.2.3/tokenjam/cli/cmd_backfill.py +110 -0
  18. {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/cli/cmd_onboard.py +45 -1
  19. tokenjam-0.2.3/tokenjam/cli/cmd_optimize.py +232 -0
  20. {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/cli/main.py +4 -0
  21. tokenjam-0.2.3/tokenjam/core/backfill.py +454 -0
  22. {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/core/config.py +41 -0
  23. tokenjam-0.2.3/tokenjam/core/optimize.py +570 -0
  24. {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/core/pricing.py +20 -2
  25. {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/mcp/server.py +65 -0
  26. {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/pricing/models.toml +12 -0
  27. {tokenjam-0.2.2 → tokenjam-0.2.3}/.github/CODEOWNERS +0 -0
  28. {tokenjam-0.2.2 → tokenjam-0.2.3}/.github/ISSUE_TEMPLATE/bug_report.md +0 -0
  29. {tokenjam-0.2.2 → tokenjam-0.2.3}/.github/ISSUE_TEMPLATE/feature_request.md +0 -0
  30. {tokenjam-0.2.2 → tokenjam-0.2.3}/.github/ISSUE_TEMPLATE/integration_request.md +0 -0
  31. {tokenjam-0.2.2 → tokenjam-0.2.3}/.github/pull_request_template.md +0 -0
  32. {tokenjam-0.2.2 → tokenjam-0.2.3}/.github/workflows/ci.yml +0 -0
  33. {tokenjam-0.2.2 → tokenjam-0.2.3}/.github/workflows/publish-npm.yml +0 -0
  34. {tokenjam-0.2.2 → tokenjam-0.2.3}/.github/workflows/publish-pypi.yml +0 -0
  35. {tokenjam-0.2.2 → tokenjam-0.2.3}/.gitignore +0 -0
  36. {tokenjam-0.2.2 → tokenjam-0.2.3}/AGENTS.md +0 -0
  37. {tokenjam-0.2.2 → tokenjam-0.2.3}/CHANGELOG.md +0 -0
  38. {tokenjam-0.2.2 → tokenjam-0.2.3}/CONTRIBUTING.md +0 -0
  39. {tokenjam-0.2.2 → tokenjam-0.2.3}/LICENSE +0 -0
  40. {tokenjam-0.2.2 → tokenjam-0.2.3}/Makefile +0 -0
  41. {tokenjam-0.2.2 → tokenjam-0.2.3}/SECURITY.md +0 -0
  42. {tokenjam-0.2.2 → tokenjam-0.2.3}/docs/alerts.md +0 -0
  43. {tokenjam-0.2.2 → tokenjam-0.2.3}/docs/architecture.md +0 -0
  44. {tokenjam-0.2.2 → tokenjam-0.2.3}/docs/claude-code-integration.md +0 -0
  45. {tokenjam-0.2.2 → tokenjam-0.2.3}/docs/cli-reference.md +0 -0
  46. {tokenjam-0.2.2 → tokenjam-0.2.3}/docs/configuration.md +0 -0
  47. {tokenjam-0.2.2 → tokenjam-0.2.3}/docs/export.md +0 -0
  48. {tokenjam-0.2.2 → tokenjam-0.2.3}/docs/framework-support.md +0 -0
  49. {tokenjam-0.2.2 → tokenjam-0.2.3}/docs/nemoclaw-integration.md +0 -0
  50. {tokenjam-0.2.2 → tokenjam-0.2.3}/docs/openclaw.md +0 -0
  51. {tokenjam-0.2.2 → tokenjam-0.2.3}/examples/README.md +0 -0
  52. {tokenjam-0.2.2 → tokenjam-0.2.3}/examples/alerts_and_drift/_shared.py +0 -0
  53. {tokenjam-0.2.2 → tokenjam-0.2.3}/examples/alerts_and_drift/budget_breach_demo.py +0 -0
  54. {tokenjam-0.2.2 → tokenjam-0.2.3}/examples/alerts_and_drift/drift_demo.py +0 -0
  55. {tokenjam-0.2.2 → tokenjam-0.2.3}/examples/alerts_and_drift/sensitive_actions_demo.py +0 -0
  56. {tokenjam-0.2.2 → tokenjam-0.2.3}/examples/multi/rag_pipeline.py +0 -0
  57. {tokenjam-0.2.2 → tokenjam-0.2.3}/examples/multi/research_team.py +0 -0
  58. {tokenjam-0.2.2 → tokenjam-0.2.3}/examples/multi/router_agent.py +0 -0
  59. {tokenjam-0.2.2 → tokenjam-0.2.3}/examples/multi/sample_docs/agent_patterns.txt +0 -0
  60. {tokenjam-0.2.2 → tokenjam-0.2.3}/examples/multi/sample_docs/cost_management.txt +0 -0
  61. {tokenjam-0.2.2 → tokenjam-0.2.3}/examples/multi/sample_docs/observability.txt +0 -0
  62. {tokenjam-0.2.2 → tokenjam-0.2.3}/examples/multi/sample_docs/safety.txt +0 -0
  63. {tokenjam-0.2.2 → tokenjam-0.2.3}/examples/openclaw/README.md +0 -0
  64. {tokenjam-0.2.2 → tokenjam-0.2.3}/examples/single_framework/autogen_agent.py +0 -0
  65. {tokenjam-0.2.2 → tokenjam-0.2.3}/examples/single_framework/crewai_agent.py +0 -0
  66. {tokenjam-0.2.2 → tokenjam-0.2.3}/examples/single_framework/langchain_agent.py +0 -0
  67. {tokenjam-0.2.2 → tokenjam-0.2.3}/examples/single_framework/langgraph_agent.py +0 -0
  68. {tokenjam-0.2.2 → tokenjam-0.2.3}/examples/single_framework/llamaindex_agent.py +0 -0
  69. {tokenjam-0.2.2 → tokenjam-0.2.3}/examples/single_provider/anthropic_agent.py +0 -0
  70. {tokenjam-0.2.2 → tokenjam-0.2.3}/examples/single_provider/bedrock_agent.py +0 -0
  71. {tokenjam-0.2.2 → tokenjam-0.2.3}/examples/single_provider/gemini_agent.py +0 -0
  72. {tokenjam-0.2.2 → tokenjam-0.2.3}/examples/single_provider/litellm_agent.py +0 -0
  73. {tokenjam-0.2.2 → tokenjam-0.2.3}/examples/single_provider/openai_agent.py +0 -0
  74. {tokenjam-0.2.2 → tokenjam-0.2.3}/examples/single_provider/openai_agents_sdk_agent.py +0 -0
  75. {tokenjam-0.2.2 → tokenjam-0.2.3}/incidents/hallucination-drift/BLOG.md +0 -0
  76. {tokenjam-0.2.2 → tokenjam-0.2.3}/incidents/hallucination-drift/README.md +0 -0
  77. {tokenjam-0.2.2 → tokenjam-0.2.3}/incidents/hallucination-drift/scenario.py +0 -0
  78. {tokenjam-0.2.2 → tokenjam-0.2.3}/incidents/retry-loop/BLOG.md +0 -0
  79. {tokenjam-0.2.2 → tokenjam-0.2.3}/incidents/retry-loop/README.md +0 -0
  80. {tokenjam-0.2.2 → tokenjam-0.2.3}/incidents/retry-loop/scenario.py +0 -0
  81. {tokenjam-0.2.2 → tokenjam-0.2.3}/incidents/surprise-cost/BLOG.md +0 -0
  82. {tokenjam-0.2.2 → tokenjam-0.2.3}/incidents/surprise-cost/README.md +0 -0
  83. {tokenjam-0.2.2 → tokenjam-0.2.3}/incidents/surprise-cost/scenario.py +0 -0
  84. {tokenjam-0.2.2 → tokenjam-0.2.3}/pricing/models.toml +0 -0
  85. {tokenjam-0.2.2 → tokenjam-0.2.3}/sdk-ts/README.md +0 -0
  86. {tokenjam-0.2.2 → tokenjam-0.2.3}/sdk-ts/src/client.test.ts +0 -0
  87. {tokenjam-0.2.2 → tokenjam-0.2.3}/sdk-ts/src/client.ts +0 -0
  88. {tokenjam-0.2.2 → tokenjam-0.2.3}/sdk-ts/src/index.ts +0 -0
  89. {tokenjam-0.2.2 → tokenjam-0.2.3}/sdk-ts/src/semconv.test.ts +0 -0
  90. {tokenjam-0.2.2 → tokenjam-0.2.3}/sdk-ts/src/semconv.ts +0 -0
  91. {tokenjam-0.2.2 → tokenjam-0.2.3}/sdk-ts/src/span-builder.test.ts +0 -0
  92. {tokenjam-0.2.2 → tokenjam-0.2.3}/sdk-ts/src/span-builder.ts +0 -0
  93. {tokenjam-0.2.2 → tokenjam-0.2.3}/sdk-ts/src/types.ts +0 -0
  94. {tokenjam-0.2.2 → tokenjam-0.2.3}/sdk-ts/tsconfig.json +0 -0
  95. {tokenjam-0.2.2 → tokenjam-0.2.3}/tests/__init__.py +0 -0
  96. {tokenjam-0.2.2 → tokenjam-0.2.3}/tests/agents/__init__.py +0 -0
  97. {tokenjam-0.2.2 → tokenjam-0.2.3}/tests/agents/email_agent_budget_breach.py +0 -0
  98. {tokenjam-0.2.2 → tokenjam-0.2.3}/tests/agents/email_agent_drift.py +0 -0
  99. {tokenjam-0.2.2 → tokenjam-0.2.3}/tests/agents/email_agent_loop.py +0 -0
  100. {tokenjam-0.2.2 → tokenjam-0.2.3}/tests/agents/email_agent_normal.py +0 -0
  101. {tokenjam-0.2.2 → tokenjam-0.2.3}/tests/agents/mock_llm.py +0 -0
  102. {tokenjam-0.2.2 → tokenjam-0.2.3}/tests/agents/test_mock_scenarios.py +0 -0
  103. {tokenjam-0.2.2 → tokenjam-0.2.3}/tests/conftest.py +0 -0
  104. {tokenjam-0.2.2 → tokenjam-0.2.3}/tests/e2e/__init__.py +0 -0
  105. {tokenjam-0.2.2 → tokenjam-0.2.3}/tests/e2e/conftest.py +0 -0
  106. {tokenjam-0.2.2 → tokenjam-0.2.3}/tests/e2e/test_real_llm.py +0 -0
  107. {tokenjam-0.2.2 → tokenjam-0.2.3}/tests/factories.py +0 -0
  108. {tokenjam-0.2.2 → tokenjam-0.2.3}/tests/integration/__init__.py +0 -0
  109. {tokenjam-0.2.2 → tokenjam-0.2.3}/tests/integration/test_api.py +0 -0
  110. {tokenjam-0.2.2 → tokenjam-0.2.3}/tests/integration/test_db.py +0 -0
  111. {tokenjam-0.2.2 → tokenjam-0.2.3}/tests/integration/test_demos.py +0 -0
  112. {tokenjam-0.2.2 → tokenjam-0.2.3}/tests/integration/test_full_pipeline.py +0 -0
  113. {tokenjam-0.2.2 → tokenjam-0.2.3}/tests/integration/test_logs_api.py +0 -0
  114. {tokenjam-0.2.2 → tokenjam-0.2.3}/tests/synthetic/__init__.py +0 -0
  115. {tokenjam-0.2.2 → tokenjam-0.2.3}/tests/synthetic/test_alert_rules.py +0 -0
  116. {tokenjam-0.2.2 → tokenjam-0.2.3}/tests/synthetic/test_cost_tracking.py +0 -0
  117. {tokenjam-0.2.2 → tokenjam-0.2.3}/tests/synthetic/test_drift_detection.py +0 -0
  118. {tokenjam-0.2.2 → tokenjam-0.2.3}/tests/synthetic/test_ingest.py +0 -0
  119. {tokenjam-0.2.2 → tokenjam-0.2.3}/tests/synthetic/test_schema_validation.py +0 -0
  120. {tokenjam-0.2.2 → tokenjam-0.2.3}/tests/toy_agent/toy_agent.py +0 -0
  121. {tokenjam-0.2.2 → tokenjam-0.2.3}/tests/unit/__init__.py +0 -0
  122. {tokenjam-0.2.2 → tokenjam-0.2.3}/tests/unit/test_alerts.py +0 -0
  123. {tokenjam-0.2.2 → tokenjam-0.2.3}/tests/unit/test_cmd_stop.py +0 -0
  124. {tokenjam-0.2.2 → tokenjam-0.2.3}/tests/unit/test_config.py +0 -0
  125. {tokenjam-0.2.2 → tokenjam-0.2.3}/tests/unit/test_cost.py +0 -0
  126. {tokenjam-0.2.2 → tokenjam-0.2.3}/tests/unit/test_demo_env.py +0 -0
  127. {tokenjam-0.2.2 → tokenjam-0.2.3}/tests/unit/test_demo_scenarios.py +0 -0
  128. {tokenjam-0.2.2 → tokenjam-0.2.3}/tests/unit/test_drift.py +0 -0
  129. {tokenjam-0.2.2 → tokenjam-0.2.3}/tests/unit/test_formatting.py +0 -0
  130. {tokenjam-0.2.2 → tokenjam-0.2.3}/tests/unit/test_litellm_client.py +0 -0
  131. {tokenjam-0.2.2 → tokenjam-0.2.3}/tests/unit/test_litellm_integration.py +0 -0
  132. {tokenjam-0.2.2 → tokenjam-0.2.3}/tests/unit/test_logs_converter.py +0 -0
  133. {tokenjam-0.2.2 → tokenjam-0.2.3}/tests/unit/test_mcp_server.py +0 -0
  134. {tokenjam-0.2.2 → tokenjam-0.2.3}/tests/unit/test_models.py +0 -0
  135. {tokenjam-0.2.2 → tokenjam-0.2.3}/tests/unit/test_onboard_codex.py +0 -0
  136. {tokenjam-0.2.2 → tokenjam-0.2.3}/tests/unit/test_onboard_daemon.py +0 -0
  137. {tokenjam-0.2.2 → tokenjam-0.2.3}/tests/unit/test_openclaw_ingest.py +0 -0
  138. {tokenjam-0.2.2 → tokenjam-0.2.3}/tests/unit/test_spans_stats_repair.py +0 -0
  139. {tokenjam-0.2.2 → tokenjam-0.2.3}/tests/unit/test_time_parse.py +0 -0
  140. {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/__init__.py +0 -0
  141. {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/api/__init__.py +0 -0
  142. {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/api/app.py +0 -0
  143. {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/api/deps.py +0 -0
  144. {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/api/middleware.py +0 -0
  145. {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/api/routes/__init__.py +0 -0
  146. {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/api/routes/agents.py +0 -0
  147. {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/api/routes/alerts.py +0 -0
  148. {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/api/routes/budget.py +0 -0
  149. {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/api/routes/cost.py +0 -0
  150. {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/api/routes/drift.py +0 -0
  151. {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/api/routes/logs.py +0 -0
  152. {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/api/routes/metrics.py +0 -0
  153. {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/api/routes/otlp.py +0 -0
  154. {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/api/routes/spans.py +0 -0
  155. {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/api/routes/status.py +0 -0
  156. {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/api/routes/tools.py +0 -0
  157. {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/api/routes/traces.py +0 -0
  158. {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/cli/__init__.py +0 -0
  159. {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/cli/cmd_alerts.py +0 -0
  160. {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/cli/cmd_budget.py +0 -0
  161. {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/cli/cmd_cost.py +0 -0
  162. {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/cli/cmd_demo.py +0 -0
  163. {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/cli/cmd_doctor.py +0 -0
  164. {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/cli/cmd_drift.py +0 -0
  165. {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/cli/cmd_export.py +0 -0
  166. {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/cli/cmd_mcp.py +0 -0
  167. {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/cli/cmd_serve.py +0 -0
  168. {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/cli/cmd_status.py +0 -0
  169. {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/cli/cmd_stop.py +0 -0
  170. {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/cli/cmd_tools.py +0 -0
  171. {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/cli/cmd_traces.py +0 -0
  172. {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/cli/cmd_uninstall.py +0 -0
  173. {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/core/__init__.py +0 -0
  174. {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/core/alerts.py +0 -0
  175. {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/core/api_backend.py +0 -0
  176. {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/core/cost.py +0 -0
  177. {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/core/db.py +0 -0
  178. {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/core/drift.py +0 -0
  179. {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/core/ingest.py +0 -0
  180. {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/core/models.py +0 -0
  181. {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/core/retention.py +0 -0
  182. {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/core/schema_validator.py +0 -0
  183. {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/demo/__init__.py +0 -0
  184. {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/demo/env.py +0 -0
  185. {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/mcp/__init__.py +0 -0
  186. {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/otel/__init__.py +0 -0
  187. {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/otel/exporters.py +0 -0
  188. {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/otel/provider.py +0 -0
  189. {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/otel/semconv.py +0 -0
  190. {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/py.typed +0 -0
  191. {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/sdk/__init__.py +0 -0
  192. {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/sdk/agent.py +0 -0
  193. {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/sdk/bootstrap.py +0 -0
  194. {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/sdk/client.py +0 -0
  195. {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/sdk/http_exporter.py +0 -0
  196. {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/sdk/integrations/__init__.py +0 -0
  197. {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/sdk/integrations/anthropic.py +0 -0
  198. {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/sdk/integrations/autogen.py +0 -0
  199. {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/sdk/integrations/base.py +0 -0
  200. {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/sdk/integrations/bedrock.py +0 -0
  201. {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/sdk/integrations/crewai.py +0 -0
  202. {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/sdk/integrations/gemini.py +0 -0
  203. {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/sdk/integrations/langchain.py +0 -0
  204. {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/sdk/integrations/langgraph.py +0 -0
  205. {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/sdk/integrations/litellm.py +0 -0
  206. {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/sdk/integrations/llamaindex.py +0 -0
  207. {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/sdk/integrations/nemoclaw.py +0 -0
  208. {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/sdk/integrations/openai.py +0 -0
  209. {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/sdk/integrations/openai_agents_sdk.py +0 -0
  210. {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/sdk/transport.py +0 -0
  211. {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/ui/index.html +0 -0
  212. {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/utils/__init__.py +0 -0
  213. {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/utils/formatting.py +0 -0
  214. {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/utils/ids.py +0 -0
  215. {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/utils/time_parse.py +0 -0
@@ -92,6 +92,8 @@ Post-ingest hooks run synchronously after each span is written to DB:
92
92
  - **`tokenjam/core/cost.py`**: `calculate_cost()` (pure function, rounds to 8dp) + `CostEngine` (post-ingest hook that updates `spans.cost_usd` and `sessions.total_cost_usd` via `db.conn` — see db.py note). Pricing loaded from `pricing/models.toml`.
93
93
  - **`tokenjam/core/alerts.py`**: `AlertEngine` with 13 alert types, `CooldownTracker` (in-memory, per agent+type, resets on restart), `AlertDispatcher` routing to 6 channel types (stdout, file, ntfy, webhook, Discord, Telegram). `AlertEngine.fire()` is the external entry point for other modules (SchemaValidator, DriftDetector) to fire alerts. Suppressed alerts are still persisted to DB but not dispatched to channels. Hardcoded thresholds: retry loop fires at 4+ identical tool calls in last 6 spans; failure rate fires at >20% errors in last 20 spans (checked every 5th error); session duration default 3600s. Stdout and file channels always include full detail regardless of `include_captured_content` config.
94
94
  - **`tokenjam/core/drift.py`**: `DriftDetector` — Z-score based behavioral drift detection, fires at session end.
95
+ - **`tokenjam/core/optimize.py`**: Two analyzers used by `tj optimize` and the `get_optimize_report` MCP tool. `analyze_model_downgrade()` flags sessions whose structural shape (input < 5K tokens AND output < 500 tokens AND tool_calls ≤ 5) matches a class of work where a cheaper alternative model is worth reviewing — never claims quality equivalence. `MODEL_DOWNGRADE_CAVEAT` is in the dataclass default so it cannot be removed accidentally. `project_budget()` projects current cycle spend against a `[budget.<provider>]` ceiling; only fires when budget > 0. Both functions operate on `db.conn` directly.
96
+ - **`tokenjam/core/backfill.py`**: Parses Claude Code on-disk session JSONL files into `NormalizedSpan`s. Cost is recomputed from `pricing/models.toml` because the on-disk format has no `cost_usd`. The parser tolerates the dated `claude-<family>-<ver>-YYYYMMDD` model-name suffixes Anthropic ships (handled by `core/pricing.py.get_rates()`, which strips the trailing 8-digit date suffix when no exact pricing match exists). Idempotency relies on deterministic span IDs derived from `(session_id, message uuid)` / `(session_id, tool_use id)`.
95
97
  - **`tokenjam/core/schema_validator.py`**: Validates tool outputs against declared or genson-inferred JSON Schema. Only fires on `gen_ai.tool.call` spans with `gen_ai.tool.output` in attributes. Schema priority: 1) declared file from agent config `output_schema`, 2) inferred schema from `DriftBaseline.output_schema_inferred`. Caches schemas in-memory per agent.
96
98
  - **`tokenjam/core/models.py`**: All domain dataclasses — `NormalizedSpan`, `SessionRecord`, `Alert`, `DriftBaseline`, filter types, etc.
97
99
  - **`tokenjam/core/config.py`**: `TjConfig` dataclass tree, TOML loading/writing, config file discovery.
@@ -129,6 +131,8 @@ Post-ingest hooks run synchronously after each span is written to DB:
129
131
  | `tj mcp` | `cmd_mcp.py` | Start the stdio MCP server for Claude Code integration |
130
132
  | `tj uninstall` | `cmd_uninstall.py` | Remove all TokenJam data, config, and daemon |
131
133
  | `tj doctor` | `cmd_doctor.py` | Health checks (config, DB, secrets, webhooks, drift readiness, schema-vs-capture consistency). Exit 0 = ok, 1 = warnings, 2 = errors |
134
+ | `tj optimize` | `cmd_optimize.py` | Two analyzers: model-downgrade candidates + per-provider budget projection. `--since 30d`, `--only model\|budget`, `--budget <provider>`, `--budget-usd <amount>`. JSON output supported. Opens the live DB read-only so it works alongside a running `tj serve`. |
135
+ | `tj backfill claude-code` | `cmd_backfill.py` | Parse `~/.claude/projects/*.jsonl` and ingest historical sessions. Idempotent — deterministic span IDs (SHA-256 of `session_id + uuid`) mean re-runs skip already-ingested rows. Auto-invoked at the end of `tj onboard --claude-code`. Future agent log formats (Codex, etc.) plug in as additional subcommands. |
132
136
 
133
137
  All commands support `--json` for machine-readable output. Commands that query alerts use exit code 1 if active (unacknowledged, unsuppressed) alerts exist.
134
138
 
@@ -167,12 +171,17 @@ When a span has a `conversation_id` matching an existing session, it's attribute
167
171
  11. **OTel TracerProvider is global and set-once** — `trace.set_tracer_provider()` only works once per process. In tests, set the provider once at module level (not per-test in a fixture) and clear spans between tests. Use a custom `_CollectingExporter(SpanExporter)` since `InMemorySpanExporter` is not available in the installed OTel version. See `tests/agents/test_mock_scenarios.py` for the SDK test pattern and `tests/integration/test_full_pipeline.py` for the pipeline pattern.
168
172
  12. **New SDK integrations must call `ensure_initialised()`** — every `patch_*()` convenience function must call `from tokenjam.sdk.bootstrap import ensure_initialised; ensure_initialised()` before installing hooks. This lazily bootstraps the TracerProvider + IngestPipeline on first use.
169
173
  13. **PyPI package name is `tokenjam`, not `ocw`** — `pip install tokenjam` is the correct install command. The CLI command is `tj` and the Python package directory is `tokenjam/`. The published package name on PyPI is `tokenjam`. Never write `pip install ocw` in docs, examples, or comments.
170
- 14. **Version bump on release** — both `pyproject.toml` (`version = "X.Y.Z"`) and `sdk-ts/package.json` (`"version": "X.Y.Z"`) must be bumped to the new version before creating a GitHub release. The publish workflows (`publish-pypi.yml`, `publish-npm.yml`) trigger on `release published` events and will fail with 403 if the version already exists on PyPI/npm.
174
+ 14. **`tj optimize` output must never claim quality equivalence** — the model-downgrade finding flags structural candidates only. Every user-visible string says "looks like" / "candidate" / "review before switching" never "safe to downgrade" or "would have worked." The `MODEL_DOWNGRADE_CAVEAT` constant lives on `DowngradeFinding` as a dataclass default so it can't be removed by accident; it must also appear in human-readable CLI output. Equivalent honesty applies to future optimize analyzers (cache-opportunity, prompt-bloat).
175
+ 15. **Version bump on release** — both `pyproject.toml` (`version = "X.Y.Z"`) and `sdk-ts/package.json` (`"version": "X.Y.Z"`) must be bumped to the new version before creating a GitHub release. The publish workflows (`publish-pypi.yml`, `publish-npm.yml`) trigger on `release published` events and will fail with 403 if the version already exists on PyPI/npm.
171
176
 
172
177
  ## Config
173
178
 
174
179
  Config is TOML, discovered at: `tj.toml` -> `.tj/config.toml` -> `~/.config/tj/config.toml`. Override with `--config` or `TJ_CONFIG` env var. Full config hierarchy is in `tokenjam/core/config.py` (`TjConfig` dataclass).
175
180
 
181
+ Two distinct budget concepts coexist — do not conflate:
182
+ - **`[defaults.budget]` / `[agents.<id>.budget]`** (`daily_usd`, `session_usd`) — per-agent alert thresholds checked on every span by `AlertEngine`.
183
+ - **`[budget.<provider>]`** (`usd`, `cycle_start_day`, `applies_to_services`) — periodic monthly ceilings used only by `tj optimize` projections. Read-only at projection time; no alerts fire from these. `tj onboard --claude-code` writes a default `[budget.anthropic] usd = 200` if no provider budget is configured. The analyzer scopes spend by `provider` column and (optionally) by `agent_id IN applies_to_services`.
184
+
176
185
  `tj onboard --claude-code` and `tj onboard --codex` always write to the **global** config (`~/.config/tj/config.toml`) regardless of cwd. This is intentional: each coding-agent integration reads one ingest secret from a single global location (`~/.claude/settings.json` or `~/.codex/config.toml`), and per-project configs would rotate that secret on every onboard, breaking auth for previously onboarded projects. Onboarded Claude Code project paths are tracked in `~/.config/tj/projects.json` for clean uninstall. Codex onboarding is fully project-agnostic — Codex hardcodes `service.name=codex_exec` in its binary, so there is one Codex agent ID for all projects.
177
186
 
178
187
  ## Daemon (launchd / systemd)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: tokenjam
3
- Version: 0.2.2
3
+ Version: 0.2.3
4
4
  Summary: TokenJam — local-first OTel-native observability for Autonomous AI agents
5
5
  Project-URL: Homepage, https://opencla.watch
6
6
  Project-URL: Repository, https://github.com/Metabuilder-Labs/openclawwatch
@@ -85,6 +85,46 @@ Your agent sends emails, writes files, calls APIs, and spends your money — all
85
85
 
86
86
  ## What you get
87
87
 
88
+ **Cost optimization for Claude Code — out of the box.** Run `tj onboard --claude-code` and TokenJam reads your existing Claude Code session logs (up to 30 days, whatever your local retention has kept) so you can run `tj optimize` immediately:
89
+
90
+ ```
91
+ $ tj optimize --agent claude-code-myproj
92
+ Analyzing 39 sessions, 1.8M tokens, $160.3500 spend (last 30d,
93
+ claude-code-myproj)…
94
+
95
+ ① Model downgrade: 13% of sessions match a smaller-model candidate shape
96
+ • 5 of 39 sessions matched structural heuristics
97
+ • Would have cost ~$0.0140 on the smaller model vs $2.2500 actual (in
98
+ window)
99
+ • Projected savings if pattern holds: $2.2400/mo
100
+ • Pattern: claude-opus-4-7 → claude-haiku-4-5
101
+
102
+ Examples:
103
+ 2cce7903.. 2 tool calls 0.8s $0.4500 (claude-opus-4-7)
104
+ e292ccbe.. 2 tool calls 0.8s $0.4500 (claude-opus-4-7)
105
+ d59cb502.. 2 tool calls 0.8s $0.4500 (claude-opus-4-7)
106
+
107
+ ! Candidate-flagging heuristic, not a quality judgment. Review the
108
+ example sessions before changing models.
109
+
110
+ ② Budget projection (anthropic, $200.0000/cycle): comfortably within budget
111
+ Run rate $160.3500/mo — 19% of cycle budget unused.
112
+ ```
113
+
114
+ Two analyzers reading the same spans you'd otherwise pay LangSmith to host: structural model-downgrade candidate flagging (never claims quality equivalence — surfaces examples to review) and per-provider monthly budget projection. Works with **any** agent already sending TokenJam data, not just Claude Code.
115
+
116
+ Try a tighter budget to see the over-budget renderer:
117
+
118
+ ```
119
+ $ tj optimize --budget anthropic --budget-usd 50
120
+ ② Budget projection (anthropic, $50.0000/cycle): projected to exceed cycle
121
+ budget
122
+ • Monthly run rate: $160.3500 (3.2× the budget)
123
+ • At current pace, budget exhausted on 2026-05-15 (0.0 day(s) from now)
124
+ • Days remaining in cycle: 16
125
+ • Projected cycle total: $162.8700, overage: $112.8700
126
+ ```
127
+
88
128
  **Real-time cost tracking.** Every LLM call is priced as it happens — by agent, model, session, and tool. Budget alerts fire before you hit the limit, not after.
89
129
 
90
130
  **Safety alerts.** Configure any tool call as a sensitive action (`send_email`, `delete_file`, `submit_form`) and get notified instantly via ntfy, Discord, Telegram, webhook, or stdout.
@@ -108,10 +148,11 @@ For **Claude Code**, **Codex**, and any agent that already emits OpenTelemetry.
108
148
  ```bash
109
149
  pip install "tokenjam[mcp]"
110
150
  tj onboard --claude-code # or: tj onboard --codex
111
- # Restart your coding agent
151
+ tj optimize # see cost-saving candidates + budget projection
152
+ # Restart your coding agent for live telemetry
112
153
  ```
113
154
 
114
- Every session, API call, tool use, and error is now a tracked span with cost and alert evaluation. The MCP server gives your coding agent 13 tools to query its own telemetry mid-session — just ask "how much have I spent today?" or "are there any active alerts?"
155
+ `tj onboard --claude-code` auto-backfills your existing session logs from `~/.claude/projects/` so `tj optimize` works on the first run no waiting for new data to accumulate. The MCP server gives your coding agent 14 tools to query its own telemetry mid-session — just ask "how much have I spent today?" or "where could I save money?"
115
156
 
116
157
  [Full Claude Code & Codex setup →](#claude-code--coding-agents)
117
158
 
@@ -194,9 +235,6 @@ export OTEL_EXPORTER_OTLP_ENDPOINT=http://127.0.0.1:7391
194
235
 
195
236
  ```
196
237
  tj status
197
- ```
198
-
199
- ```
200
238
  ● my-email-agent completed (2m 14s)
201
239
 
202
240
  Cost today: $0.0340 / $5.0000 limit
@@ -207,17 +245,17 @@ tj status
207
245
  send_email called (sensitive action: critical)
208
246
  ```
209
247
 
210
- https://github.com/user-attachments/assets/b94d13f6-1432-40d4-b093-6958d74f0e65
211
-
212
248
  ```bash
213
- tj status # current state, cost, active alerts
214
- tj traces # full span history with waterfall view
215
- tj cost --since 7d # cost breakdown by agent, model, day
216
- tj alerts # everything that fired while you were away
217
- tj budget # view and set daily/session cost limits
218
- tj drift # behavioral drift Z-scores vs baseline
219
- tj tools # tool call history with error rates
220
- tj serve # start the web UI + REST API
249
+ tj status # current state, cost, active alerts
250
+ tj traces # full span history with waterfall view
251
+ tj cost --since 7d # cost breakdown by agent, model, day
252
+ tj optimize # cost-saving candidates + budget projection
253
+ tj backfill claude-code # ingest historical sessions from ~/.claude/projects/
254
+ tj alerts # everything that fired while you were away
255
+ tj budget # view and set daily/session cost limits
256
+ tj drift # behavioral drift Z-scores vs baseline
257
+ tj tools # tool call history with error rates
258
+ tj serve # start the web UI + REST API
221
259
  ```
222
260
 
223
261
  ---
@@ -226,8 +264,6 @@ tj serve # start the web UI + REST API
226
264
 
227
265
  `tj serve` starts a local dashboard at `http://127.0.0.1:7391/`.
228
266
 
229
- https://github.com/user-attachments/assets/ff09caec-3487-4542-8628-d62b7d92591f
230
-
231
267
  - **Status** — agent overview with cost, tokens, tool calls, and active alerts
232
268
  - **Traces** — trace list with span waterfall visualization
233
269
  - **Cost** — breakdown by agent, model, day, or tool
@@ -237,6 +273,24 @@ https://github.com/user-attachments/assets/ff09caec-3487-4542-8628-d62b7d92591f
237
273
 
238
274
  No signup, no cloud — runs entirely on your machine.
239
275
 
276
+ ### Screenshots
277
+
278
+ <table>
279
+ <tr>
280
+ <td width="50%"><strong>Status</strong> — agent overview with cost, tokens, tool calls, and active alerts.<br><br><img src="docs/screenshots/tj-status.png" alt="tj status page" /></td>
281
+ <td width="50%"><strong>Traces</strong> — recent traces with cost, duration, and span count. Click a row for the waterfall view.<br><br><img src="docs/screenshots/tj-traces.png" alt="tj traces page" /></td>
282
+ </tr>
283
+ <tr>
284
+ <td width="50%"><strong>Cost</strong> — spend broken down by day, agent, model, or tool.<br><br><img src="docs/screenshots/tj-cost.png" alt="tj cost page" /></td>
285
+ <td width="50%"><strong>Alerts</strong> — full alert history with severity filter and inline detail expansion.<br><br><img src="docs/screenshots/tj-alerts.png" alt="tj alerts page" /></td>
286
+ </tr>
287
+ <tr>
288
+ <td colspan="2"><strong>Budget</strong> — view and edit daily/per-session cost limits per agent, with recent budget alerts inline.<br><br><img src="docs/screenshots/tj-budget.png" alt="tj budget page" /></td>
289
+ </tr>
290
+ </table>
291
+
292
+
293
+
240
294
  ---
241
295
 
242
296
  ## tj vs LangSmith vs Langfuse
@@ -248,6 +302,7 @@ LangSmith and Langfuse are excellent for tracing LLM API calls and running evals
248
302
  | Signup required | ❌ | ✅ | ✅ | ✅ |
249
303
  | Data leaves your machine | ❌ | ✅ | cloud only | ✅ |
250
304
  | Real-time sensitive action alerts | ✅ | ❌ | ❌ | ❌ |
305
+ | Model-downgrade cost recommendations | ✅ | ❌ | ❌ | ❌ |
251
306
  | Behavioral drift detection | ✅ | ❌ | ❌ | ❌ |
252
307
  | Local-first, no cloud required | ✅ | ❌ | self-host only | ❌ |
253
308
  | OTel GenAI SemConv native | ✅ | partial | partial | partial |
@@ -261,13 +316,13 @@ LangSmith and Langfuse are excellent for tracing LLM API calls and running evals
261
316
 
262
317
  ### Claude Code
263
318
 
264
- Monitor every Claude Code session costs, tool calls, API requests, errors with two commands:
319
+ Monitor every Claude Code session and get cost-optimization recommendations from your existing usage in three commands:
265
320
 
266
321
  ```bash
267
322
  pip install "tokenjam[mcp]"
268
- tj onboard --claude-code
269
- # Restart Claude Code, then:
270
- tj status --agent claude-code-<project>
323
+ tj onboard --claude-code # auto-backfills your existing session logs
324
+ tj optimize # cost-saving candidates + budget projection
325
+ # Then restart Claude Code so live telemetry starts flowing
271
326
  ```
272
327
 
273
328
  `tj onboard --claude-code` does everything in one shot:
@@ -277,9 +332,28 @@ tj status --agent claude-code-<project>
277
332
  - Registers the MCP server globally (`claude mcp add --scope user tj -- tj mcp`)
278
333
  - Installs a background daemon (launchd on macOS, systemd on Linux)
279
334
  - Adds Docker harness-compatible OTLP env vars to `~/.zshrc`
335
+ - **Reads your existing `~/.claude/projects/*.jsonl` session logs** and ingests them into the local DB so `tj optimize` returns real numbers on first run (idempotent — safe to re-run)
336
+ - Writes a sensible default `[budget.anthropic] usd = 200` for the budget projector to project against — edit `~/.config/tj/config.toml` to change
280
337
 
281
338
  **Claude Code must be restarted** after running `tj onboard --claude-code`.
282
339
 
340
+ #### `tj optimize` — what you actually get
341
+
342
+ Two analyzers run over the spans TokenJam has captured. The output is read-only recommendations — `tj optimize` never changes how your agent runs.
343
+
344
+ **① Model-downgrade candidates.** Flags sessions whose structural shape (short input, short output, few tool calls) matches a class of work where a cheaper model in the same provider family is worth reviewing. Never asserts the cheaper model *would have produced the same answer* — only that the shape is worth a look. Real examples are surfaced so you can spot-check before changing models.
345
+
346
+ **② Budget projection.** Per-provider monthly projection against any `[budget.<provider>]` ceiling you've configured. Scopes spend by provider — an Anthropic budget excludes OpenAI spend. Shows exhaustion date, projected overage, and what the run rate would drop to if you acted on the downgrade candidates.
347
+
348
+ ```bash
349
+ tj optimize # both analyzers, last 30 days
350
+ tj optimize --only budget # just the projection
351
+ tj optimize --budget anthropic --budget-usd 50 # test a different ceiling
352
+ tj optimize --json # machine-readable for piping
353
+ ```
354
+
355
+ Works alongside a running `tj serve` (read-only fallback). Also exposed as the `get_optimize_report` MCP tool — your coding agent can ask itself "where could I save money?" mid-session.
356
+
283
357
  **Adding more projects** — run once per project directory:
284
358
 
285
359
  ```bash
@@ -292,10 +366,11 @@ Each project gets its own agent ID (`claude-code-<repo-name>`), all sharing one
292
366
 
293
367
  ### MCP server
294
368
 
295
- The MCP server gives Claude Code direct access to your observability data inside the session. 13 tools available after restart:
369
+ The MCP server gives Claude Code direct access to your observability data inside the session. 14 tools available after restart:
296
370
 
297
371
  | Tool | What it does |
298
372
  |---|---|
373
+ | `get_optimize_report` | Cost-saving candidates and budget projection — fires for either question (e.g. "where could I save money?" / "will I exceed my budget?") |
299
374
  | `get_status` | Current agent state — tokens, cost, active alerts |
300
375
  | `get_budget_headroom` | Budget limit vs spend |
301
376
  | `list_active_sessions` | All running sessions across agents |
@@ -29,6 +29,46 @@ Your agent sends emails, writes files, calls APIs, and spends your money — all
29
29
 
30
30
  ## What you get
31
31
 
32
+ **Cost optimization for Claude Code — out of the box.** Run `tj onboard --claude-code` and TokenJam reads your existing Claude Code session logs (up to 30 days, whatever your local retention has kept) so you can run `tj optimize` immediately:
33
+
34
+ ```
35
+ $ tj optimize --agent claude-code-myproj
36
+ Analyzing 39 sessions, 1.8M tokens, $160.3500 spend (last 30d,
37
+ claude-code-myproj)…
38
+
39
+ ① Model downgrade: 13% of sessions match a smaller-model candidate shape
40
+ • 5 of 39 sessions matched structural heuristics
41
+ • Would have cost ~$0.0140 on the smaller model vs $2.2500 actual (in
42
+ window)
43
+ • Projected savings if pattern holds: $2.2400/mo
44
+ • Pattern: claude-opus-4-7 → claude-haiku-4-5
45
+
46
+ Examples:
47
+ 2cce7903.. 2 tool calls 0.8s $0.4500 (claude-opus-4-7)
48
+ e292ccbe.. 2 tool calls 0.8s $0.4500 (claude-opus-4-7)
49
+ d59cb502.. 2 tool calls 0.8s $0.4500 (claude-opus-4-7)
50
+
51
+ ! Candidate-flagging heuristic, not a quality judgment. Review the
52
+ example sessions before changing models.
53
+
54
+ ② Budget projection (anthropic, $200.0000/cycle): comfortably within budget
55
+ Run rate $160.3500/mo — 19% of cycle budget unused.
56
+ ```
57
+
58
+ Two analyzers reading the same spans you'd otherwise pay LangSmith to host: structural model-downgrade candidate flagging (never claims quality equivalence — surfaces examples to review) and per-provider monthly budget projection. Works with **any** agent already sending TokenJam data, not just Claude Code.
59
+
60
+ Try a tighter budget to see the over-budget renderer:
61
+
62
+ ```
63
+ $ tj optimize --budget anthropic --budget-usd 50
64
+ ② Budget projection (anthropic, $50.0000/cycle): projected to exceed cycle
65
+ budget
66
+ • Monthly run rate: $160.3500 (3.2× the budget)
67
+ • At current pace, budget exhausted on 2026-05-15 (0.0 day(s) from now)
68
+ • Days remaining in cycle: 16
69
+ • Projected cycle total: $162.8700, overage: $112.8700
70
+ ```
71
+
32
72
  **Real-time cost tracking.** Every LLM call is priced as it happens — by agent, model, session, and tool. Budget alerts fire before you hit the limit, not after.
33
73
 
34
74
  **Safety alerts.** Configure any tool call as a sensitive action (`send_email`, `delete_file`, `submit_form`) and get notified instantly via ntfy, Discord, Telegram, webhook, or stdout.
@@ -52,10 +92,11 @@ For **Claude Code**, **Codex**, and any agent that already emits OpenTelemetry.
52
92
  ```bash
53
93
  pip install "tokenjam[mcp]"
54
94
  tj onboard --claude-code # or: tj onboard --codex
55
- # Restart your coding agent
95
+ tj optimize # see cost-saving candidates + budget projection
96
+ # Restart your coding agent for live telemetry
56
97
  ```
57
98
 
58
- Every session, API call, tool use, and error is now a tracked span with cost and alert evaluation. The MCP server gives your coding agent 13 tools to query its own telemetry mid-session — just ask "how much have I spent today?" or "are there any active alerts?"
99
+ `tj onboard --claude-code` auto-backfills your existing session logs from `~/.claude/projects/` so `tj optimize` works on the first run no waiting for new data to accumulate. The MCP server gives your coding agent 14 tools to query its own telemetry mid-session — just ask "how much have I spent today?" or "where could I save money?"
59
100
 
60
101
  [Full Claude Code & Codex setup →](#claude-code--coding-agents)
61
102
 
@@ -138,9 +179,6 @@ export OTEL_EXPORTER_OTLP_ENDPOINT=http://127.0.0.1:7391
138
179
 
139
180
  ```
140
181
  tj status
141
- ```
142
-
143
- ```
144
182
  ● my-email-agent completed (2m 14s)
145
183
 
146
184
  Cost today: $0.0340 / $5.0000 limit
@@ -151,17 +189,17 @@ tj status
151
189
  send_email called (sensitive action: critical)
152
190
  ```
153
191
 
154
- https://github.com/user-attachments/assets/b94d13f6-1432-40d4-b093-6958d74f0e65
155
-
156
192
  ```bash
157
- tj status # current state, cost, active alerts
158
- tj traces # full span history with waterfall view
159
- tj cost --since 7d # cost breakdown by agent, model, day
160
- tj alerts # everything that fired while you were away
161
- tj budget # view and set daily/session cost limits
162
- tj drift # behavioral drift Z-scores vs baseline
163
- tj tools # tool call history with error rates
164
- tj serve # start the web UI + REST API
193
+ tj status # current state, cost, active alerts
194
+ tj traces # full span history with waterfall view
195
+ tj cost --since 7d # cost breakdown by agent, model, day
196
+ tj optimize # cost-saving candidates + budget projection
197
+ tj backfill claude-code # ingest historical sessions from ~/.claude/projects/
198
+ tj alerts # everything that fired while you were away
199
+ tj budget # view and set daily/session cost limits
200
+ tj drift # behavioral drift Z-scores vs baseline
201
+ tj tools # tool call history with error rates
202
+ tj serve # start the web UI + REST API
165
203
  ```
166
204
 
167
205
  ---
@@ -170,8 +208,6 @@ tj serve # start the web UI + REST API
170
208
 
171
209
  `tj serve` starts a local dashboard at `http://127.0.0.1:7391/`.
172
210
 
173
- https://github.com/user-attachments/assets/ff09caec-3487-4542-8628-d62b7d92591f
174
-
175
211
  - **Status** — agent overview with cost, tokens, tool calls, and active alerts
176
212
  - **Traces** — trace list with span waterfall visualization
177
213
  - **Cost** — breakdown by agent, model, day, or tool
@@ -181,6 +217,24 @@ https://github.com/user-attachments/assets/ff09caec-3487-4542-8628-d62b7d92591f
181
217
 
182
218
  No signup, no cloud — runs entirely on your machine.
183
219
 
220
+ ### Screenshots
221
+
222
+ <table>
223
+ <tr>
224
+ <td width="50%"><strong>Status</strong> — agent overview with cost, tokens, tool calls, and active alerts.<br><br><img src="docs/screenshots/tj-status.png" alt="tj status page" /></td>
225
+ <td width="50%"><strong>Traces</strong> — recent traces with cost, duration, and span count. Click a row for the waterfall view.<br><br><img src="docs/screenshots/tj-traces.png" alt="tj traces page" /></td>
226
+ </tr>
227
+ <tr>
228
+ <td width="50%"><strong>Cost</strong> — spend broken down by day, agent, model, or tool.<br><br><img src="docs/screenshots/tj-cost.png" alt="tj cost page" /></td>
229
+ <td width="50%"><strong>Alerts</strong> — full alert history with severity filter and inline detail expansion.<br><br><img src="docs/screenshots/tj-alerts.png" alt="tj alerts page" /></td>
230
+ </tr>
231
+ <tr>
232
+ <td colspan="2"><strong>Budget</strong> — view and edit daily/per-session cost limits per agent, with recent budget alerts inline.<br><br><img src="docs/screenshots/tj-budget.png" alt="tj budget page" /></td>
233
+ </tr>
234
+ </table>
235
+
236
+
237
+
184
238
  ---
185
239
 
186
240
  ## tj vs LangSmith vs Langfuse
@@ -192,6 +246,7 @@ LangSmith and Langfuse are excellent for tracing LLM API calls and running evals
192
246
  | Signup required | ❌ | ✅ | ✅ | ✅ |
193
247
  | Data leaves your machine | ❌ | ✅ | cloud only | ✅ |
194
248
  | Real-time sensitive action alerts | ✅ | ❌ | ❌ | ❌ |
249
+ | Model-downgrade cost recommendations | ✅ | ❌ | ❌ | ❌ |
195
250
  | Behavioral drift detection | ✅ | ❌ | ❌ | ❌ |
196
251
  | Local-first, no cloud required | ✅ | ❌ | self-host only | ❌ |
197
252
  | OTel GenAI SemConv native | ✅ | partial | partial | partial |
@@ -205,13 +260,13 @@ LangSmith and Langfuse are excellent for tracing LLM API calls and running evals
205
260
 
206
261
  ### Claude Code
207
262
 
208
- Monitor every Claude Code session costs, tool calls, API requests, errors with two commands:
263
+ Monitor every Claude Code session and get cost-optimization recommendations from your existing usage in three commands:
209
264
 
210
265
  ```bash
211
266
  pip install "tokenjam[mcp]"
212
- tj onboard --claude-code
213
- # Restart Claude Code, then:
214
- tj status --agent claude-code-<project>
267
+ tj onboard --claude-code # auto-backfills your existing session logs
268
+ tj optimize # cost-saving candidates + budget projection
269
+ # Then restart Claude Code so live telemetry starts flowing
215
270
  ```
216
271
 
217
272
  `tj onboard --claude-code` does everything in one shot:
@@ -221,9 +276,28 @@ tj status --agent claude-code-<project>
221
276
  - Registers the MCP server globally (`claude mcp add --scope user tj -- tj mcp`)
222
277
  - Installs a background daemon (launchd on macOS, systemd on Linux)
223
278
  - Adds Docker harness-compatible OTLP env vars to `~/.zshrc`
279
+ - **Reads your existing `~/.claude/projects/*.jsonl` session logs** and ingests them into the local DB so `tj optimize` returns real numbers on first run (idempotent — safe to re-run)
280
+ - Writes a sensible default `[budget.anthropic] usd = 200` for the budget projector to project against — edit `~/.config/tj/config.toml` to change
224
281
 
225
282
  **Claude Code must be restarted** after running `tj onboard --claude-code`.
226
283
 
284
+ #### `tj optimize` — what you actually get
285
+
286
+ Two analyzers run over the spans TokenJam has captured. The output is read-only recommendations — `tj optimize` never changes how your agent runs.
287
+
288
+ **① Model-downgrade candidates.** Flags sessions whose structural shape (short input, short output, few tool calls) matches a class of work where a cheaper model in the same provider family is worth reviewing. Never asserts the cheaper model *would have produced the same answer* — only that the shape is worth a look. Real examples are surfaced so you can spot-check before changing models.
289
+
290
+ **② Budget projection.** Per-provider monthly projection against any `[budget.<provider>]` ceiling you've configured. Scopes spend by provider — an Anthropic budget excludes OpenAI spend. Shows exhaustion date, projected overage, and what the run rate would drop to if you acted on the downgrade candidates.
291
+
292
+ ```bash
293
+ tj optimize # both analyzers, last 30 days
294
+ tj optimize --only budget # just the projection
295
+ tj optimize --budget anthropic --budget-usd 50 # test a different ceiling
296
+ tj optimize --json # machine-readable for piping
297
+ ```
298
+
299
+ Works alongside a running `tj serve` (read-only fallback). Also exposed as the `get_optimize_report` MCP tool — your coding agent can ask itself "where could I save money?" mid-session.
300
+
227
301
  **Adding more projects** — run once per project directory:
228
302
 
229
303
  ```bash
@@ -236,10 +310,11 @@ Each project gets its own agent ID (`claude-code-<repo-name>`), all sharing one
236
310
 
237
311
  ### MCP server
238
312
 
239
- The MCP server gives Claude Code direct access to your observability data inside the session. 13 tools available after restart:
313
+ The MCP server gives Claude Code direct access to your observability data inside the session. 14 tools available after restart:
240
314
 
241
315
  | Tool | What it does |
242
316
  |---|---|
317
+ | `get_optimize_report` | Cost-saving candidates and budget projection — fires for either question (e.g. "where could I save money?" / "will I exceed my budget?") |
243
318
  | `get_status` | Current agent state — tokens, cost, active alerts |
244
319
  | `get_budget_headroom` | Budget limit vs spend |
245
320
  | `list_active_sessions` | All running sessions across agents |
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "tokenjam"
7
- version = "0.2.2"
7
+ version = "0.2.3"
8
8
  description = "TokenJam — local-first OTel-native observability for Autonomous AI agents"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.10"
@@ -1,12 +1,12 @@
1
1
  {
2
2
  "name": "@tokenjam/sdk",
3
- "version": "0.2.2",
3
+ "version": "0.2.3",
4
4
  "lockfileVersion": 3,
5
5
  "requires": true,
6
6
  "packages": {
7
7
  "": {
8
8
  "name": "@tokenjam/sdk",
9
- "version": "0.2.2",
9
+ "version": "0.2.3",
10
10
  "license": "MIT",
11
11
  "devDependencies": {
12
12
  "@types/node": "^25.5.0",
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@tokenjam/sdk",
3
- "version": "0.2.2",
3
+ "version": "0.2.3",
4
4
  "description": "TypeScript SDK for TokenJam — local-first observability for AI agents",
5
5
  "main": "dist/index.js",
6
6
  "types": "dist/index.d.ts",
@@ -579,6 +579,84 @@ def test_budget_set_agent_writes_config(runner, db, config, tmp_path):
579
579
  assert saved_config.agents["test-agent"].budget.session_usd == 0.25
580
580
 
581
581
 
582
+ def test_optimize_empty_db_outputs_friendly_message(runner, db, config):
583
+ result = _invoke(runner, db, config, ["optimize"])
584
+ assert result.exit_code == 0
585
+ assert "No usage data found" in result.output
586
+
587
+
588
+ def test_optimize_flags_downgrade_candidate(runner, db, config):
589
+ """A small Opus session in the window should appear as a candidate."""
590
+ from datetime import timedelta
591
+ from tests.factories import make_llm_span
592
+ from tokenjam.utils.time_parse import utcnow
593
+
594
+ start = utcnow() - timedelta(days=2)
595
+ span = make_llm_span(
596
+ agent_id="test-agent",
597
+ model="claude-opus-4-7",
598
+ provider="anthropic",
599
+ input_tokens=1000,
600
+ output_tokens=200,
601
+ cost_usd=0.030,
602
+ session_id="s-opus",
603
+ start_time=start,
604
+ )
605
+ db.insert_span(span)
606
+
607
+ result = _invoke(runner, db, config, ["optimize"])
608
+ assert result.exit_code == 0
609
+ assert "Model downgrade" in result.output
610
+ # Mandatory caveat must appear in human output
611
+ assert "Candidate-flagging heuristic" in result.output
612
+
613
+
614
+ def test_optimize_json_output_includes_caveat(runner, db, config):
615
+ from datetime import timedelta
616
+ from tests.factories import make_llm_span
617
+ from tokenjam.utils.time_parse import utcnow
618
+
619
+ span = make_llm_span(
620
+ agent_id="test-agent", model="claude-opus-4-7", provider="anthropic",
621
+ input_tokens=1000, output_tokens=200, cost_usd=0.030,
622
+ session_id="s", start_time=utcnow() - timedelta(days=1),
623
+ )
624
+ db.insert_span(span)
625
+
626
+ result = _invoke(runner, db, config, ["optimize", "--json"])
627
+ assert result.exit_code == 0
628
+ data = json.loads(result.output)
629
+ assert data["downgrade"] is not None
630
+ assert "Candidate-flagging heuristic" in data["downgrade"]["caveat"]
631
+
632
+
633
+ def test_optimize_budget_projection_from_config(runner, db):
634
+ """Budget configured via [budget.anthropic] should surface a projection."""
635
+ from datetime import timedelta
636
+ from tests.factories import make_llm_span
637
+ from tokenjam.core.config import ProviderBudget
638
+ from tokenjam.utils.time_parse import utcnow
639
+
640
+ cfg = TjConfig(
641
+ version="1",
642
+ agents={"test-agent": AgentConfig(budget=BudgetConfig(daily_usd=5.0))},
643
+ budgets={"anthropic": ProviderBudget(usd=10.0, cycle_start_day=1)},
644
+ )
645
+ # Insert spend that exceeds the small budget
646
+ for i in range(5):
647
+ span = make_llm_span(
648
+ agent_id="test-agent", model="claude-opus-4-7", provider="anthropic",
649
+ input_tokens=10_000, output_tokens=1_000, cost_usd=20.0,
650
+ session_id=f"s{i}", start_time=utcnow() - timedelta(days=1),
651
+ )
652
+ db.insert_span(span)
653
+
654
+ result = _invoke(runner, db, cfg, ["optimize", "--only", "budget"])
655
+ assert result.exit_code == 0
656
+ assert "Budget projection" in result.output
657
+ assert "anthropic" in result.output
658
+
659
+
582
660
  def test_budget_set_negative_daily_rejected(runner, db, config, tmp_path):
583
661
  """tj budget --daily -5 should error, not silently clear the limit."""
584
662
  config_file = tmp_path / "config.toml"