tokenjam 0.2.2__tar.gz → 0.2.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {tokenjam-0.2.2 → tokenjam-0.2.3}/CLAUDE.md +10 -1
- {tokenjam-0.2.2 → tokenjam-0.2.3}/PKG-INFO +98 -23
- {tokenjam-0.2.2 → tokenjam-0.2.3}/README.md +97 -22
- tokenjam-0.2.3/docs/screenshots/tj-alerts.png +0 -0
- tokenjam-0.2.3/docs/screenshots/tj-budget.png +0 -0
- tokenjam-0.2.3/docs/screenshots/tj-cost.png +0 -0
- tokenjam-0.2.3/docs/screenshots/tj-status.png +0 -0
- tokenjam-0.2.3/docs/screenshots/tj-traces.png +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/pyproject.toml +1 -1
- {tokenjam-0.2.2 → tokenjam-0.2.3}/sdk-ts/package-lock.json +2 -2
- {tokenjam-0.2.2 → tokenjam-0.2.3}/sdk-ts/package.json +1 -1
- {tokenjam-0.2.2 → tokenjam-0.2.3}/tests/integration/test_cli.py +78 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/tests/manual-new-release-tests.md +30 -47
- {tokenjam-0.2.2 → tokenjam-0.2.3}/tests/manual-pre-release-testing.md +27 -2
- tokenjam-0.2.3/tests/unit/test_backfill.py +179 -0
- tokenjam-0.2.3/tests/unit/test_optimize.py +211 -0
- tokenjam-0.2.3/tokenjam/cli/cmd_backfill.py +110 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/cli/cmd_onboard.py +45 -1
- tokenjam-0.2.3/tokenjam/cli/cmd_optimize.py +232 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/cli/main.py +4 -0
- tokenjam-0.2.3/tokenjam/core/backfill.py +454 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/core/config.py +41 -0
- tokenjam-0.2.3/tokenjam/core/optimize.py +570 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/core/pricing.py +20 -2
- {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/mcp/server.py +65 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/pricing/models.toml +12 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/.github/CODEOWNERS +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/.github/ISSUE_TEMPLATE/bug_report.md +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/.github/ISSUE_TEMPLATE/feature_request.md +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/.github/ISSUE_TEMPLATE/integration_request.md +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/.github/pull_request_template.md +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/.github/workflows/ci.yml +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/.github/workflows/publish-npm.yml +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/.github/workflows/publish-pypi.yml +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/.gitignore +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/AGENTS.md +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/CHANGELOG.md +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/CONTRIBUTING.md +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/LICENSE +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/Makefile +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/SECURITY.md +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/docs/alerts.md +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/docs/architecture.md +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/docs/claude-code-integration.md +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/docs/cli-reference.md +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/docs/configuration.md +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/docs/export.md +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/docs/framework-support.md +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/docs/nemoclaw-integration.md +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/docs/openclaw.md +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/examples/README.md +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/examples/alerts_and_drift/_shared.py +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/examples/alerts_and_drift/budget_breach_demo.py +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/examples/alerts_and_drift/drift_demo.py +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/examples/alerts_and_drift/sensitive_actions_demo.py +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/examples/multi/rag_pipeline.py +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/examples/multi/research_team.py +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/examples/multi/router_agent.py +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/examples/multi/sample_docs/agent_patterns.txt +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/examples/multi/sample_docs/cost_management.txt +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/examples/multi/sample_docs/observability.txt +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/examples/multi/sample_docs/safety.txt +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/examples/openclaw/README.md +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/examples/single_framework/autogen_agent.py +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/examples/single_framework/crewai_agent.py +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/examples/single_framework/langchain_agent.py +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/examples/single_framework/langgraph_agent.py +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/examples/single_framework/llamaindex_agent.py +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/examples/single_provider/anthropic_agent.py +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/examples/single_provider/bedrock_agent.py +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/examples/single_provider/gemini_agent.py +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/examples/single_provider/litellm_agent.py +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/examples/single_provider/openai_agent.py +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/examples/single_provider/openai_agents_sdk_agent.py +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/incidents/hallucination-drift/BLOG.md +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/incidents/hallucination-drift/README.md +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/incidents/hallucination-drift/scenario.py +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/incidents/retry-loop/BLOG.md +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/incidents/retry-loop/README.md +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/incidents/retry-loop/scenario.py +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/incidents/surprise-cost/BLOG.md +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/incidents/surprise-cost/README.md +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/incidents/surprise-cost/scenario.py +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/pricing/models.toml +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/sdk-ts/README.md +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/sdk-ts/src/client.test.ts +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/sdk-ts/src/client.ts +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/sdk-ts/src/index.ts +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/sdk-ts/src/semconv.test.ts +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/sdk-ts/src/semconv.ts +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/sdk-ts/src/span-builder.test.ts +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/sdk-ts/src/span-builder.ts +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/sdk-ts/src/types.ts +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/sdk-ts/tsconfig.json +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/tests/__init__.py +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/tests/agents/__init__.py +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/tests/agents/email_agent_budget_breach.py +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/tests/agents/email_agent_drift.py +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/tests/agents/email_agent_loop.py +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/tests/agents/email_agent_normal.py +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/tests/agents/mock_llm.py +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/tests/agents/test_mock_scenarios.py +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/tests/conftest.py +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/tests/e2e/__init__.py +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/tests/e2e/conftest.py +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/tests/e2e/test_real_llm.py +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/tests/factories.py +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/tests/integration/__init__.py +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/tests/integration/test_api.py +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/tests/integration/test_db.py +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/tests/integration/test_demos.py +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/tests/integration/test_full_pipeline.py +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/tests/integration/test_logs_api.py +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/tests/synthetic/__init__.py +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/tests/synthetic/test_alert_rules.py +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/tests/synthetic/test_cost_tracking.py +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/tests/synthetic/test_drift_detection.py +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/tests/synthetic/test_ingest.py +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/tests/synthetic/test_schema_validation.py +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/tests/toy_agent/toy_agent.py +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/tests/unit/__init__.py +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/tests/unit/test_alerts.py +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/tests/unit/test_cmd_stop.py +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/tests/unit/test_config.py +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/tests/unit/test_cost.py +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/tests/unit/test_demo_env.py +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/tests/unit/test_demo_scenarios.py +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/tests/unit/test_drift.py +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/tests/unit/test_formatting.py +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/tests/unit/test_litellm_client.py +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/tests/unit/test_litellm_integration.py +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/tests/unit/test_logs_converter.py +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/tests/unit/test_mcp_server.py +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/tests/unit/test_models.py +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/tests/unit/test_onboard_codex.py +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/tests/unit/test_onboard_daemon.py +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/tests/unit/test_openclaw_ingest.py +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/tests/unit/test_spans_stats_repair.py +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/tests/unit/test_time_parse.py +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/__init__.py +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/api/__init__.py +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/api/app.py +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/api/deps.py +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/api/middleware.py +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/api/routes/__init__.py +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/api/routes/agents.py +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/api/routes/alerts.py +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/api/routes/budget.py +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/api/routes/cost.py +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/api/routes/drift.py +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/api/routes/logs.py +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/api/routes/metrics.py +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/api/routes/otlp.py +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/api/routes/spans.py +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/api/routes/status.py +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/api/routes/tools.py +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/api/routes/traces.py +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/cli/__init__.py +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/cli/cmd_alerts.py +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/cli/cmd_budget.py +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/cli/cmd_cost.py +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/cli/cmd_demo.py +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/cli/cmd_doctor.py +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/cli/cmd_drift.py +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/cli/cmd_export.py +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/cli/cmd_mcp.py +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/cli/cmd_serve.py +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/cli/cmd_status.py +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/cli/cmd_stop.py +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/cli/cmd_tools.py +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/cli/cmd_traces.py +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/cli/cmd_uninstall.py +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/core/__init__.py +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/core/alerts.py +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/core/api_backend.py +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/core/cost.py +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/core/db.py +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/core/drift.py +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/core/ingest.py +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/core/models.py +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/core/retention.py +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/core/schema_validator.py +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/demo/__init__.py +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/demo/env.py +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/mcp/__init__.py +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/otel/__init__.py +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/otel/exporters.py +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/otel/provider.py +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/otel/semconv.py +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/py.typed +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/sdk/__init__.py +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/sdk/agent.py +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/sdk/bootstrap.py +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/sdk/client.py +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/sdk/http_exporter.py +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/sdk/integrations/__init__.py +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/sdk/integrations/anthropic.py +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/sdk/integrations/autogen.py +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/sdk/integrations/base.py +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/sdk/integrations/bedrock.py +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/sdk/integrations/crewai.py +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/sdk/integrations/gemini.py +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/sdk/integrations/langchain.py +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/sdk/integrations/langgraph.py +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/sdk/integrations/litellm.py +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/sdk/integrations/llamaindex.py +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/sdk/integrations/nemoclaw.py +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/sdk/integrations/openai.py +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/sdk/integrations/openai_agents_sdk.py +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/sdk/transport.py +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/ui/index.html +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/utils/__init__.py +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/utils/formatting.py +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/utils/ids.py +0 -0
- {tokenjam-0.2.2 → tokenjam-0.2.3}/tokenjam/utils/time_parse.py +0 -0
|
@@ -92,6 +92,8 @@ Post-ingest hooks run synchronously after each span is written to DB:
|
|
|
92
92
|
- **`tokenjam/core/cost.py`**: `calculate_cost()` (pure function, rounds to 8dp) + `CostEngine` (post-ingest hook that updates `spans.cost_usd` and `sessions.total_cost_usd` via `db.conn` — see db.py note). Pricing loaded from `pricing/models.toml`.
|
|
93
93
|
- **`tokenjam/core/alerts.py`**: `AlertEngine` with 13 alert types, `CooldownTracker` (in-memory, per agent+type, resets on restart), `AlertDispatcher` routing to 6 channel types (stdout, file, ntfy, webhook, Discord, Telegram). `AlertEngine.fire()` is the external entry point for other modules (SchemaValidator, DriftDetector) to fire alerts. Suppressed alerts are still persisted to DB but not dispatched to channels. Hardcoded thresholds: retry loop fires at 4+ identical tool calls in last 6 spans; failure rate fires at >20% errors in last 20 spans (checked every 5th error); session duration default 3600s. Stdout and file channels always include full detail regardless of `include_captured_content` config.
|
|
94
94
|
- **`tokenjam/core/drift.py`**: `DriftDetector` — Z-score based behavioral drift detection, fires at session end.
|
|
95
|
+
- **`tokenjam/core/optimize.py`**: Two analyzers used by `tj optimize` and the `get_optimize_report` MCP tool. `analyze_model_downgrade()` flags sessions whose structural shape (input < 5K tokens AND output < 500 tokens AND tool_calls ≤ 5) matches a class of work where a cheaper alternative model is worth reviewing — never claims quality equivalence. `MODEL_DOWNGRADE_CAVEAT` is in the dataclass default so it cannot be removed accidentally. `project_budget()` projects current cycle spend against a `[budget.<provider>]` ceiling; only fires when budget > 0. Both functions operate on `db.conn` directly.
|
|
96
|
+
- **`tokenjam/core/backfill.py`**: Parses Claude Code on-disk session JSONL files into `NormalizedSpan`s. Cost is recomputed from `pricing/models.toml` because the on-disk format has no `cost_usd`. The parser tolerates the dated `claude-<family>-<ver>-YYYYMMDD` model-name suffixes Anthropic ships (handled by `core/pricing.py.get_rates()`, which strips the trailing 8-digit date suffix when no exact pricing match exists). Idempotency relies on deterministic span IDs derived from `(session_id, message uuid)` / `(session_id, tool_use id)`.
|
|
95
97
|
- **`tokenjam/core/schema_validator.py`**: Validates tool outputs against declared or genson-inferred JSON Schema. Only fires on `gen_ai.tool.call` spans with `gen_ai.tool.output` in attributes. Schema priority: 1) declared file from agent config `output_schema`, 2) inferred schema from `DriftBaseline.output_schema_inferred`. Caches schemas in-memory per agent.
|
|
96
98
|
- **`tokenjam/core/models.py`**: All domain dataclasses — `NormalizedSpan`, `SessionRecord`, `Alert`, `DriftBaseline`, filter types, etc.
|
|
97
99
|
- **`tokenjam/core/config.py`**: `TjConfig` dataclass tree, TOML loading/writing, config file discovery.
|
|
@@ -129,6 +131,8 @@ Post-ingest hooks run synchronously after each span is written to DB:
|
|
|
129
131
|
| `tj mcp` | `cmd_mcp.py` | Start the stdio MCP server for Claude Code integration |
|
|
130
132
|
| `tj uninstall` | `cmd_uninstall.py` | Remove all TokenJam data, config, and daemon |
|
|
131
133
|
| `tj doctor` | `cmd_doctor.py` | Health checks (config, DB, secrets, webhooks, drift readiness, schema-vs-capture consistency). Exit 0 = ok, 1 = warnings, 2 = errors |
|
|
134
|
+
| `tj optimize` | `cmd_optimize.py` | Two analyzers: model-downgrade candidates + per-provider budget projection. `--since 30d`, `--only model\|budget`, `--budget <provider>`, `--budget-usd <amount>`. JSON output supported. Opens the live DB read-only so it works alongside a running `tj serve`. |
|
|
135
|
+
| `tj backfill claude-code` | `cmd_backfill.py` | Parse `~/.claude/projects/*.jsonl` and ingest historical sessions. Idempotent — deterministic span IDs (SHA-256 of `session_id + uuid`) mean re-runs skip already-ingested rows. Auto-invoked at the end of `tj onboard --claude-code`. Future agent log formats (Codex, etc.) plug in as additional subcommands. |
|
|
132
136
|
|
|
133
137
|
All commands support `--json` for machine-readable output. Commands that query alerts use exit code 1 if active (unacknowledged, unsuppressed) alerts exist.
|
|
134
138
|
|
|
@@ -167,12 +171,17 @@ When a span has a `conversation_id` matching an existing session, it's attribute
|
|
|
167
171
|
11. **OTel TracerProvider is global and set-once** — `trace.set_tracer_provider()` only works once per process. In tests, set the provider once at module level (not per-test in a fixture) and clear spans between tests. Use a custom `_CollectingExporter(SpanExporter)` since `InMemorySpanExporter` is not available in the installed OTel version. See `tests/agents/test_mock_scenarios.py` for the SDK test pattern and `tests/integration/test_full_pipeline.py` for the pipeline pattern.
|
|
168
172
|
12. **New SDK integrations must call `ensure_initialised()`** — every `patch_*()` convenience function must call `from tokenjam.sdk.bootstrap import ensure_initialised; ensure_initialised()` before installing hooks. This lazily bootstraps the TracerProvider + IngestPipeline on first use.
|
|
169
173
|
13. **PyPI package name is `tokenjam`, not `ocw`** — `pip install tokenjam` is the correct install command. The CLI command is `tj` and the Python package directory is `tokenjam/`. The published package name on PyPI is `tokenjam`. Never write `pip install ocw` in docs, examples, or comments.
|
|
170
|
-
14.
|
|
174
|
+
14. **`tj optimize` output must never claim quality equivalence** — the model-downgrade finding flags structural candidates only. Every user-visible string says "looks like" / "candidate" / "review before switching" — never "safe to downgrade" or "would have worked." The `MODEL_DOWNGRADE_CAVEAT` constant lives on `DowngradeFinding` as a dataclass default so it can't be removed by accident; it must also appear in human-readable CLI output. Equivalent honesty applies to future optimize analyzers (cache-opportunity, prompt-bloat).
|
|
175
|
+
15. **Version bump on release** — both `pyproject.toml` (`version = "X.Y.Z"`) and `sdk-ts/package.json` (`"version": "X.Y.Z"`) must be bumped to the new version before creating a GitHub release. The publish workflows (`publish-pypi.yml`, `publish-npm.yml`) trigger on `release published` events and will fail with 403 if the version already exists on PyPI/npm.
|
|
171
176
|
|
|
172
177
|
## Config
|
|
173
178
|
|
|
174
179
|
Config is TOML, discovered at: `tj.toml` -> `.tj/config.toml` -> `~/.config/tj/config.toml`. Override with `--config` or `TJ_CONFIG` env var. Full config hierarchy is in `tokenjam/core/config.py` (`TjConfig` dataclass).
|
|
175
180
|
|
|
181
|
+
Two distinct budget concepts coexist — do not conflate:
|
|
182
|
+
- **`[defaults.budget]` / `[agents.<id>.budget]`** (`daily_usd`, `session_usd`) — per-agent alert thresholds checked on every span by `AlertEngine`.
|
|
183
|
+
- **`[budget.<provider>]`** (`usd`, `cycle_start_day`, `applies_to_services`) — periodic monthly ceilings used only by `tj optimize` projections. Read-only at projection time; no alerts fire from these. `tj onboard --claude-code` writes a default `[budget.anthropic] usd = 200` if no provider budget is configured. The analyzer scopes spend by `provider` column and (optionally) by `agent_id IN applies_to_services`.
|
|
184
|
+
|
|
176
185
|
`tj onboard --claude-code` and `tj onboard --codex` always write to the **global** config (`~/.config/tj/config.toml`) regardless of cwd. This is intentional: each coding-agent integration reads one ingest secret from a single global location (`~/.claude/settings.json` or `~/.codex/config.toml`), and per-project configs would rotate that secret on every onboard, breaking auth for previously onboarded projects. Onboarded Claude Code project paths are tracked in `~/.config/tj/projects.json` for clean uninstall. Codex onboarding is fully project-agnostic — Codex hardcodes `service.name=codex_exec` in its binary, so there is one Codex agent ID for all projects.
|
|
177
186
|
|
|
178
187
|
## Daemon (launchd / systemd)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: tokenjam
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.3
|
|
4
4
|
Summary: TokenJam — local-first OTel-native observability for Autonomous AI agents
|
|
5
5
|
Project-URL: Homepage, https://opencla.watch
|
|
6
6
|
Project-URL: Repository, https://github.com/Metabuilder-Labs/openclawwatch
|
|
@@ -85,6 +85,46 @@ Your agent sends emails, writes files, calls APIs, and spends your money — all
|
|
|
85
85
|
|
|
86
86
|
## What you get
|
|
87
87
|
|
|
88
|
+
**Cost optimization for Claude Code — out of the box.** Run `tj onboard --claude-code` and TokenJam reads your existing Claude Code session logs (up to 30 days, whatever your local retention has kept) so you can run `tj optimize` immediately:
|
|
89
|
+
|
|
90
|
+
```
|
|
91
|
+
$ tj optimize --agent claude-code-myproj
|
|
92
|
+
Analyzing 39 sessions, 1.8M tokens, $160.3500 spend (last 30d,
|
|
93
|
+
claude-code-myproj)…
|
|
94
|
+
|
|
95
|
+
① Model downgrade: 13% of sessions match a smaller-model candidate shape
|
|
96
|
+
• 5 of 39 sessions matched structural heuristics
|
|
97
|
+
• Would have cost ~$0.0140 on the smaller model vs $2.2500 actual (in
|
|
98
|
+
window)
|
|
99
|
+
• Projected savings if pattern holds: $2.2400/mo
|
|
100
|
+
• Pattern: claude-opus-4-7 → claude-haiku-4-5
|
|
101
|
+
|
|
102
|
+
Examples:
|
|
103
|
+
2cce7903.. 2 tool calls 0.8s $0.4500 (claude-opus-4-7)
|
|
104
|
+
e292ccbe.. 2 tool calls 0.8s $0.4500 (claude-opus-4-7)
|
|
105
|
+
d59cb502.. 2 tool calls 0.8s $0.4500 (claude-opus-4-7)
|
|
106
|
+
|
|
107
|
+
! Candidate-flagging heuristic, not a quality judgment. Review the
|
|
108
|
+
example sessions before changing models.
|
|
109
|
+
|
|
110
|
+
② Budget projection (anthropic, $200.0000/cycle): comfortably within budget
|
|
111
|
+
Run rate $160.3500/mo — 19% of cycle budget unused.
|
|
112
|
+
```
|
|
113
|
+
|
|
114
|
+
Two analyzers reading the same spans you'd otherwise pay LangSmith to host: structural model-downgrade candidate flagging (never claims quality equivalence — surfaces examples to review) and per-provider monthly budget projection. Works with **any** agent already sending TokenJam data, not just Claude Code.
|
|
115
|
+
|
|
116
|
+
Try a tighter budget to see the over-budget renderer:
|
|
117
|
+
|
|
118
|
+
```
|
|
119
|
+
$ tj optimize --budget anthropic --budget-usd 50
|
|
120
|
+
② Budget projection (anthropic, $50.0000/cycle): projected to exceed cycle
|
|
121
|
+
budget
|
|
122
|
+
• Monthly run rate: $160.3500 (3.2× the budget)
|
|
123
|
+
• At current pace, budget exhausted on 2026-05-15 (0.0 day(s) from now)
|
|
124
|
+
• Days remaining in cycle: 16
|
|
125
|
+
• Projected cycle total: $162.8700, overage: $112.8700
|
|
126
|
+
```
|
|
127
|
+
|
|
88
128
|
**Real-time cost tracking.** Every LLM call is priced as it happens — by agent, model, session, and tool. Budget alerts fire before you hit the limit, not after.
|
|
89
129
|
|
|
90
130
|
**Safety alerts.** Configure any tool call as a sensitive action (`send_email`, `delete_file`, `submit_form`) and get notified instantly via ntfy, Discord, Telegram, webhook, or stdout.
|
|
@@ -108,10 +148,11 @@ For **Claude Code**, **Codex**, and any agent that already emits OpenTelemetry.
|
|
|
108
148
|
```bash
|
|
109
149
|
pip install "tokenjam[mcp]"
|
|
110
150
|
tj onboard --claude-code # or: tj onboard --codex
|
|
111
|
-
#
|
|
151
|
+
tj optimize # see cost-saving candidates + budget projection
|
|
152
|
+
# Restart your coding agent for live telemetry
|
|
112
153
|
```
|
|
113
154
|
|
|
114
|
-
|
|
155
|
+
`tj onboard --claude-code` auto-backfills your existing session logs from `~/.claude/projects/` so `tj optimize` works on the first run — no waiting for new data to accumulate. The MCP server gives your coding agent 14 tools to query its own telemetry mid-session — just ask "how much have I spent today?" or "where could I save money?"
|
|
115
156
|
|
|
116
157
|
[Full Claude Code & Codex setup →](#claude-code--coding-agents)
|
|
117
158
|
|
|
@@ -194,9 +235,6 @@ export OTEL_EXPORTER_OTLP_ENDPOINT=http://127.0.0.1:7391
|
|
|
194
235
|
|
|
195
236
|
```
|
|
196
237
|
tj status
|
|
197
|
-
```
|
|
198
|
-
|
|
199
|
-
```
|
|
200
238
|
● my-email-agent completed (2m 14s)
|
|
201
239
|
|
|
202
240
|
Cost today: $0.0340 / $5.0000 limit
|
|
@@ -207,17 +245,17 @@ tj status
|
|
|
207
245
|
send_email called (sensitive action: critical)
|
|
208
246
|
```
|
|
209
247
|
|
|
210
|
-
https://github.com/user-attachments/assets/b94d13f6-1432-40d4-b093-6958d74f0e65
|
|
211
|
-
|
|
212
248
|
```bash
|
|
213
|
-
tj status
|
|
214
|
-
tj traces
|
|
215
|
-
tj cost --since 7d
|
|
216
|
-
tj
|
|
217
|
-
tj
|
|
218
|
-
tj
|
|
219
|
-
tj
|
|
220
|
-
tj
|
|
249
|
+
tj status # current state, cost, active alerts
|
|
250
|
+
tj traces # full span history with waterfall view
|
|
251
|
+
tj cost --since 7d # cost breakdown by agent, model, day
|
|
252
|
+
tj optimize # cost-saving candidates + budget projection
|
|
253
|
+
tj backfill claude-code # ingest historical sessions from ~/.claude/projects/
|
|
254
|
+
tj alerts # everything that fired while you were away
|
|
255
|
+
tj budget # view and set daily/session cost limits
|
|
256
|
+
tj drift # behavioral drift Z-scores vs baseline
|
|
257
|
+
tj tools # tool call history with error rates
|
|
258
|
+
tj serve # start the web UI + REST API
|
|
221
259
|
```
|
|
222
260
|
|
|
223
261
|
---
|
|
@@ -226,8 +264,6 @@ tj serve # start the web UI + REST API
|
|
|
226
264
|
|
|
227
265
|
`tj serve` starts a local dashboard at `http://127.0.0.1:7391/`.
|
|
228
266
|
|
|
229
|
-
https://github.com/user-attachments/assets/ff09caec-3487-4542-8628-d62b7d92591f
|
|
230
|
-
|
|
231
267
|
- **Status** — agent overview with cost, tokens, tool calls, and active alerts
|
|
232
268
|
- **Traces** — trace list with span waterfall visualization
|
|
233
269
|
- **Cost** — breakdown by agent, model, day, or tool
|
|
@@ -237,6 +273,24 @@ https://github.com/user-attachments/assets/ff09caec-3487-4542-8628-d62b7d92591f
|
|
|
237
273
|
|
|
238
274
|
No signup, no cloud — runs entirely on your machine.
|
|
239
275
|
|
|
276
|
+
### Screenshots
|
|
277
|
+
|
|
278
|
+
<table>
|
|
279
|
+
<tr>
|
|
280
|
+
<td width="50%"><strong>Status</strong> — agent overview with cost, tokens, tool calls, and active alerts.<br><br><img src="docs/screenshots/tj-status.png" alt="tj status page" /></td>
|
|
281
|
+
<td width="50%"><strong>Traces</strong> — recent traces with cost, duration, and span count. Click a row for the waterfall view.<br><br><img src="docs/screenshots/tj-traces.png" alt="tj traces page" /></td>
|
|
282
|
+
</tr>
|
|
283
|
+
<tr>
|
|
284
|
+
<td width="50%"><strong>Cost</strong> — spend broken down by day, agent, model, or tool.<br><br><img src="docs/screenshots/tj-cost.png" alt="tj cost page" /></td>
|
|
285
|
+
<td width="50%"><strong>Alerts</strong> — full alert history with severity filter and inline detail expansion.<br><br><img src="docs/screenshots/tj-alerts.png" alt="tj alerts page" /></td>
|
|
286
|
+
</tr>
|
|
287
|
+
<tr>
|
|
288
|
+
<td colspan="2"><strong>Budget</strong> — view and edit daily/per-session cost limits per agent, with recent budget alerts inline.<br><br><img src="docs/screenshots/tj-budget.png" alt="tj budget page" /></td>
|
|
289
|
+
</tr>
|
|
290
|
+
</table>
|
|
291
|
+
|
|
292
|
+
|
|
293
|
+
|
|
240
294
|
---
|
|
241
295
|
|
|
242
296
|
## tj vs LangSmith vs Langfuse
|
|
@@ -248,6 +302,7 @@ LangSmith and Langfuse are excellent for tracing LLM API calls and running evals
|
|
|
248
302
|
| Signup required | ❌ | ✅ | ✅ | ✅ |
|
|
249
303
|
| Data leaves your machine | ❌ | ✅ | cloud only | ✅ |
|
|
250
304
|
| Real-time sensitive action alerts | ✅ | ❌ | ❌ | ❌ |
|
|
305
|
+
| Model-downgrade cost recommendations | ✅ | ❌ | ❌ | ❌ |
|
|
251
306
|
| Behavioral drift detection | ✅ | ❌ | ❌ | ❌ |
|
|
252
307
|
| Local-first, no cloud required | ✅ | ❌ | self-host only | ❌ |
|
|
253
308
|
| OTel GenAI SemConv native | ✅ | partial | partial | partial |
|
|
@@ -261,13 +316,13 @@ LangSmith and Langfuse are excellent for tracing LLM API calls and running evals
|
|
|
261
316
|
|
|
262
317
|
### Claude Code
|
|
263
318
|
|
|
264
|
-
Monitor every Claude Code session
|
|
319
|
+
Monitor every Claude Code session and get cost-optimization recommendations from your existing usage in three commands:
|
|
265
320
|
|
|
266
321
|
```bash
|
|
267
322
|
pip install "tokenjam[mcp]"
|
|
268
|
-
tj onboard --claude-code
|
|
269
|
-
#
|
|
270
|
-
|
|
323
|
+
tj onboard --claude-code # auto-backfills your existing session logs
|
|
324
|
+
tj optimize # cost-saving candidates + budget projection
|
|
325
|
+
# Then restart Claude Code so live telemetry starts flowing
|
|
271
326
|
```
|
|
272
327
|
|
|
273
328
|
`tj onboard --claude-code` does everything in one shot:
|
|
@@ -277,9 +332,28 @@ tj status --agent claude-code-<project>
|
|
|
277
332
|
- Registers the MCP server globally (`claude mcp add --scope user tj -- tj mcp`)
|
|
278
333
|
- Installs a background daemon (launchd on macOS, systemd on Linux)
|
|
279
334
|
- Adds Docker harness-compatible OTLP env vars to `~/.zshrc`
|
|
335
|
+
- **Reads your existing `~/.claude/projects/*.jsonl` session logs** and ingests them into the local DB so `tj optimize` returns real numbers on first run (idempotent — safe to re-run)
|
|
336
|
+
- Writes a sensible default `[budget.anthropic] usd = 200` for the budget projector to project against — edit `~/.config/tj/config.toml` to change
|
|
280
337
|
|
|
281
338
|
**Claude Code must be restarted** after running `tj onboard --claude-code`.
|
|
282
339
|
|
|
340
|
+
#### `tj optimize` — what you actually get
|
|
341
|
+
|
|
342
|
+
Two analyzers run over the spans TokenJam has captured. The output is read-only recommendations — `tj optimize` never changes how your agent runs.
|
|
343
|
+
|
|
344
|
+
**① Model-downgrade candidates.** Flags sessions whose structural shape (short input, short output, few tool calls) matches a class of work where a cheaper model in the same provider family is worth reviewing. Never asserts the cheaper model *would have produced the same answer* — only that the shape is worth a look. Real examples are surfaced so you can spot-check before changing models.
|
|
345
|
+
|
|
346
|
+
**② Budget projection.** Per-provider monthly projection against any `[budget.<provider>]` ceiling you've configured. Scopes spend by provider — an Anthropic budget excludes OpenAI spend. Shows exhaustion date, projected overage, and what the run rate would drop to if you acted on the downgrade candidates.
|
|
347
|
+
|
|
348
|
+
```bash
|
|
349
|
+
tj optimize # both analyzers, last 30 days
|
|
350
|
+
tj optimize --only budget # just the projection
|
|
351
|
+
tj optimize --budget anthropic --budget-usd 50 # test a different ceiling
|
|
352
|
+
tj optimize --json # machine-readable for piping
|
|
353
|
+
```
|
|
354
|
+
|
|
355
|
+
Works alongside a running `tj serve` (read-only fallback). Also exposed as the `get_optimize_report` MCP tool — your coding agent can ask itself "where could I save money?" mid-session.
|
|
356
|
+
|
|
283
357
|
**Adding more projects** — run once per project directory:
|
|
284
358
|
|
|
285
359
|
```bash
|
|
@@ -292,10 +366,11 @@ Each project gets its own agent ID (`claude-code-<repo-name>`), all sharing one
|
|
|
292
366
|
|
|
293
367
|
### MCP server
|
|
294
368
|
|
|
295
|
-
The MCP server gives Claude Code direct access to your observability data inside the session.
|
|
369
|
+
The MCP server gives Claude Code direct access to your observability data inside the session. 14 tools available after restart:
|
|
296
370
|
|
|
297
371
|
| Tool | What it does |
|
|
298
372
|
|---|---|
|
|
373
|
+
| `get_optimize_report` | Cost-saving candidates and budget projection — fires for either question (e.g. "where could I save money?" / "will I exceed my budget?") |
|
|
299
374
|
| `get_status` | Current agent state — tokens, cost, active alerts |
|
|
300
375
|
| `get_budget_headroom` | Budget limit vs spend |
|
|
301
376
|
| `list_active_sessions` | All running sessions across agents |
|
|
@@ -29,6 +29,46 @@ Your agent sends emails, writes files, calls APIs, and spends your money — all
|
|
|
29
29
|
|
|
30
30
|
## What you get
|
|
31
31
|
|
|
32
|
+
**Cost optimization for Claude Code — out of the box.** Run `tj onboard --claude-code` and TokenJam reads your existing Claude Code session logs (up to 30 days, whatever your local retention has kept) so you can run `tj optimize` immediately:
|
|
33
|
+
|
|
34
|
+
```
|
|
35
|
+
$ tj optimize --agent claude-code-myproj
|
|
36
|
+
Analyzing 39 sessions, 1.8M tokens, $160.3500 spend (last 30d,
|
|
37
|
+
claude-code-myproj)…
|
|
38
|
+
|
|
39
|
+
① Model downgrade: 13% of sessions match a smaller-model candidate shape
|
|
40
|
+
• 5 of 39 sessions matched structural heuristics
|
|
41
|
+
• Would have cost ~$0.0140 on the smaller model vs $2.2500 actual (in
|
|
42
|
+
window)
|
|
43
|
+
• Projected savings if pattern holds: $2.2400/mo
|
|
44
|
+
• Pattern: claude-opus-4-7 → claude-haiku-4-5
|
|
45
|
+
|
|
46
|
+
Examples:
|
|
47
|
+
2cce7903.. 2 tool calls 0.8s $0.4500 (claude-opus-4-7)
|
|
48
|
+
e292ccbe.. 2 tool calls 0.8s $0.4500 (claude-opus-4-7)
|
|
49
|
+
d59cb502.. 2 tool calls 0.8s $0.4500 (claude-opus-4-7)
|
|
50
|
+
|
|
51
|
+
! Candidate-flagging heuristic, not a quality judgment. Review the
|
|
52
|
+
example sessions before changing models.
|
|
53
|
+
|
|
54
|
+
② Budget projection (anthropic, $200.0000/cycle): comfortably within budget
|
|
55
|
+
Run rate $160.3500/mo — 19% of cycle budget unused.
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
Two analyzers reading the same spans you'd otherwise pay LangSmith to host: structural model-downgrade candidate flagging (never claims quality equivalence — surfaces examples to review) and per-provider monthly budget projection. Works with **any** agent already sending TokenJam data, not just Claude Code.
|
|
59
|
+
|
|
60
|
+
Try a tighter budget to see the over-budget renderer:
|
|
61
|
+
|
|
62
|
+
```
|
|
63
|
+
$ tj optimize --budget anthropic --budget-usd 50
|
|
64
|
+
② Budget projection (anthropic, $50.0000/cycle): projected to exceed cycle
|
|
65
|
+
budget
|
|
66
|
+
• Monthly run rate: $160.3500 (3.2× the budget)
|
|
67
|
+
• At current pace, budget exhausted on 2026-05-15 (0.0 day(s) from now)
|
|
68
|
+
• Days remaining in cycle: 16
|
|
69
|
+
• Projected cycle total: $162.8700, overage: $112.8700
|
|
70
|
+
```
|
|
71
|
+
|
|
32
72
|
**Real-time cost tracking.** Every LLM call is priced as it happens — by agent, model, session, and tool. Budget alerts fire before you hit the limit, not after.
|
|
33
73
|
|
|
34
74
|
**Safety alerts.** Configure any tool call as a sensitive action (`send_email`, `delete_file`, `submit_form`) and get notified instantly via ntfy, Discord, Telegram, webhook, or stdout.
|
|
@@ -52,10 +92,11 @@ For **Claude Code**, **Codex**, and any agent that already emits OpenTelemetry.
|
|
|
52
92
|
```bash
|
|
53
93
|
pip install "tokenjam[mcp]"
|
|
54
94
|
tj onboard --claude-code # or: tj onboard --codex
|
|
55
|
-
#
|
|
95
|
+
tj optimize # see cost-saving candidates + budget projection
|
|
96
|
+
# Restart your coding agent for live telemetry
|
|
56
97
|
```
|
|
57
98
|
|
|
58
|
-
|
|
99
|
+
`tj onboard --claude-code` auto-backfills your existing session logs from `~/.claude/projects/` so `tj optimize` works on the first run — no waiting for new data to accumulate. The MCP server gives your coding agent 14 tools to query its own telemetry mid-session — just ask "how much have I spent today?" or "where could I save money?"
|
|
59
100
|
|
|
60
101
|
[Full Claude Code & Codex setup →](#claude-code--coding-agents)
|
|
61
102
|
|
|
@@ -138,9 +179,6 @@ export OTEL_EXPORTER_OTLP_ENDPOINT=http://127.0.0.1:7391
|
|
|
138
179
|
|
|
139
180
|
```
|
|
140
181
|
tj status
|
|
141
|
-
```
|
|
142
|
-
|
|
143
|
-
```
|
|
144
182
|
● my-email-agent completed (2m 14s)
|
|
145
183
|
|
|
146
184
|
Cost today: $0.0340 / $5.0000 limit
|
|
@@ -151,17 +189,17 @@ tj status
|
|
|
151
189
|
send_email called (sensitive action: critical)
|
|
152
190
|
```
|
|
153
191
|
|
|
154
|
-
https://github.com/user-attachments/assets/b94d13f6-1432-40d4-b093-6958d74f0e65
|
|
155
|
-
|
|
156
192
|
```bash
|
|
157
|
-
tj status
|
|
158
|
-
tj traces
|
|
159
|
-
tj cost --since 7d
|
|
160
|
-
tj
|
|
161
|
-
tj
|
|
162
|
-
tj
|
|
163
|
-
tj
|
|
164
|
-
tj
|
|
193
|
+
tj status # current state, cost, active alerts
|
|
194
|
+
tj traces # full span history with waterfall view
|
|
195
|
+
tj cost --since 7d # cost breakdown by agent, model, day
|
|
196
|
+
tj optimize # cost-saving candidates + budget projection
|
|
197
|
+
tj backfill claude-code # ingest historical sessions from ~/.claude/projects/
|
|
198
|
+
tj alerts # everything that fired while you were away
|
|
199
|
+
tj budget # view and set daily/session cost limits
|
|
200
|
+
tj drift # behavioral drift Z-scores vs baseline
|
|
201
|
+
tj tools # tool call history with error rates
|
|
202
|
+
tj serve # start the web UI + REST API
|
|
165
203
|
```
|
|
166
204
|
|
|
167
205
|
---
|
|
@@ -170,8 +208,6 @@ tj serve # start the web UI + REST API
|
|
|
170
208
|
|
|
171
209
|
`tj serve` starts a local dashboard at `http://127.0.0.1:7391/`.
|
|
172
210
|
|
|
173
|
-
https://github.com/user-attachments/assets/ff09caec-3487-4542-8628-d62b7d92591f
|
|
174
|
-
|
|
175
211
|
- **Status** — agent overview with cost, tokens, tool calls, and active alerts
|
|
176
212
|
- **Traces** — trace list with span waterfall visualization
|
|
177
213
|
- **Cost** — breakdown by agent, model, day, or tool
|
|
@@ -181,6 +217,24 @@ https://github.com/user-attachments/assets/ff09caec-3487-4542-8628-d62b7d92591f
|
|
|
181
217
|
|
|
182
218
|
No signup, no cloud — runs entirely on your machine.
|
|
183
219
|
|
|
220
|
+
### Screenshots
|
|
221
|
+
|
|
222
|
+
<table>
|
|
223
|
+
<tr>
|
|
224
|
+
<td width="50%"><strong>Status</strong> — agent overview with cost, tokens, tool calls, and active alerts.<br><br><img src="docs/screenshots/tj-status.png" alt="tj status page" /></td>
|
|
225
|
+
<td width="50%"><strong>Traces</strong> — recent traces with cost, duration, and span count. Click a row for the waterfall view.<br><br><img src="docs/screenshots/tj-traces.png" alt="tj traces page" /></td>
|
|
226
|
+
</tr>
|
|
227
|
+
<tr>
|
|
228
|
+
<td width="50%"><strong>Cost</strong> — spend broken down by day, agent, model, or tool.<br><br><img src="docs/screenshots/tj-cost.png" alt="tj cost page" /></td>
|
|
229
|
+
<td width="50%"><strong>Alerts</strong> — full alert history with severity filter and inline detail expansion.<br><br><img src="docs/screenshots/tj-alerts.png" alt="tj alerts page" /></td>
|
|
230
|
+
</tr>
|
|
231
|
+
<tr>
|
|
232
|
+
<td colspan="2"><strong>Budget</strong> — view and edit daily/per-session cost limits per agent, with recent budget alerts inline.<br><br><img src="docs/screenshots/tj-budget.png" alt="tj budget page" /></td>
|
|
233
|
+
</tr>
|
|
234
|
+
</table>
|
|
235
|
+
|
|
236
|
+
|
|
237
|
+
|
|
184
238
|
---
|
|
185
239
|
|
|
186
240
|
## tj vs LangSmith vs Langfuse
|
|
@@ -192,6 +246,7 @@ LangSmith and Langfuse are excellent for tracing LLM API calls and running evals
|
|
|
192
246
|
| Signup required | ❌ | ✅ | ✅ | ✅ |
|
|
193
247
|
| Data leaves your machine | ❌ | ✅ | cloud only | ✅ |
|
|
194
248
|
| Real-time sensitive action alerts | ✅ | ❌ | ❌ | ❌ |
|
|
249
|
+
| Model-downgrade cost recommendations | ✅ | ❌ | ❌ | ❌ |
|
|
195
250
|
| Behavioral drift detection | ✅ | ❌ | ❌ | ❌ |
|
|
196
251
|
| Local-first, no cloud required | ✅ | ❌ | self-host only | ❌ |
|
|
197
252
|
| OTel GenAI SemConv native | ✅ | partial | partial | partial |
|
|
@@ -205,13 +260,13 @@ LangSmith and Langfuse are excellent for tracing LLM API calls and running evals
|
|
|
205
260
|
|
|
206
261
|
### Claude Code
|
|
207
262
|
|
|
208
|
-
Monitor every Claude Code session
|
|
263
|
+
Monitor every Claude Code session and get cost-optimization recommendations from your existing usage in three commands:
|
|
209
264
|
|
|
210
265
|
```bash
|
|
211
266
|
pip install "tokenjam[mcp]"
|
|
212
|
-
tj onboard --claude-code
|
|
213
|
-
#
|
|
214
|
-
|
|
267
|
+
tj onboard --claude-code # auto-backfills your existing session logs
|
|
268
|
+
tj optimize # cost-saving candidates + budget projection
|
|
269
|
+
# Then restart Claude Code so live telemetry starts flowing
|
|
215
270
|
```
|
|
216
271
|
|
|
217
272
|
`tj onboard --claude-code` does everything in one shot:
|
|
@@ -221,9 +276,28 @@ tj status --agent claude-code-<project>
|
|
|
221
276
|
- Registers the MCP server globally (`claude mcp add --scope user tj -- tj mcp`)
|
|
222
277
|
- Installs a background daemon (launchd on macOS, systemd on Linux)
|
|
223
278
|
- Adds Docker harness-compatible OTLP env vars to `~/.zshrc`
|
|
279
|
+
- **Reads your existing `~/.claude/projects/*.jsonl` session logs** and ingests them into the local DB so `tj optimize` returns real numbers on first run (idempotent — safe to re-run)
|
|
280
|
+
- Writes a sensible default `[budget.anthropic] usd = 200` for the budget projector to project against — edit `~/.config/tj/config.toml` to change
|
|
224
281
|
|
|
225
282
|
**Claude Code must be restarted** after running `tj onboard --claude-code`.
|
|
226
283
|
|
|
284
|
+
#### `tj optimize` — what you actually get
|
|
285
|
+
|
|
286
|
+
Two analyzers run over the spans TokenJam has captured. The output is read-only recommendations — `tj optimize` never changes how your agent runs.
|
|
287
|
+
|
|
288
|
+
**① Model-downgrade candidates.** Flags sessions whose structural shape (short input, short output, few tool calls) matches a class of work where a cheaper model in the same provider family is worth reviewing. Never asserts the cheaper model *would have produced the same answer* — only that the shape is worth a look. Real examples are surfaced so you can spot-check before changing models.
|
|
289
|
+
|
|
290
|
+
**② Budget projection.** Per-provider monthly projection against any `[budget.<provider>]` ceiling you've configured. Scopes spend by provider — an Anthropic budget excludes OpenAI spend. Shows exhaustion date, projected overage, and what the run rate would drop to if you acted on the downgrade candidates.
|
|
291
|
+
|
|
292
|
+
```bash
|
|
293
|
+
tj optimize # both analyzers, last 30 days
|
|
294
|
+
tj optimize --only budget # just the projection
|
|
295
|
+
tj optimize --budget anthropic --budget-usd 50 # test a different ceiling
|
|
296
|
+
tj optimize --json # machine-readable for piping
|
|
297
|
+
```
|
|
298
|
+
|
|
299
|
+
Works alongside a running `tj serve` (read-only fallback). Also exposed as the `get_optimize_report` MCP tool — your coding agent can ask itself "where could I save money?" mid-session.
|
|
300
|
+
|
|
227
301
|
**Adding more projects** — run once per project directory:
|
|
228
302
|
|
|
229
303
|
```bash
|
|
@@ -236,10 +310,11 @@ Each project gets its own agent ID (`claude-code-<repo-name>`), all sharing one
|
|
|
236
310
|
|
|
237
311
|
### MCP server
|
|
238
312
|
|
|
239
|
-
The MCP server gives Claude Code direct access to your observability data inside the session.
|
|
313
|
+
The MCP server gives Claude Code direct access to your observability data inside the session. 14 tools available after restart:
|
|
240
314
|
|
|
241
315
|
| Tool | What it does |
|
|
242
316
|
|---|---|
|
|
317
|
+
| `get_optimize_report` | Cost-saving candidates and budget projection — fires for either question (e.g. "where could I save money?" / "will I exceed my budget?") |
|
|
243
318
|
| `get_status` | Current agent state — tokens, cost, active alerts |
|
|
244
319
|
| `get_budget_headroom` | Budget limit vs spend |
|
|
245
320
|
| `list_active_sessions` | All running sessions across agents |
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@tokenjam/sdk",
|
|
3
|
-
"version": "0.2.
|
|
3
|
+
"version": "0.2.3",
|
|
4
4
|
"lockfileVersion": 3,
|
|
5
5
|
"requires": true,
|
|
6
6
|
"packages": {
|
|
7
7
|
"": {
|
|
8
8
|
"name": "@tokenjam/sdk",
|
|
9
|
-
"version": "0.2.
|
|
9
|
+
"version": "0.2.3",
|
|
10
10
|
"license": "MIT",
|
|
11
11
|
"devDependencies": {
|
|
12
12
|
"@types/node": "^25.5.0",
|
|
@@ -579,6 +579,84 @@ def test_budget_set_agent_writes_config(runner, db, config, tmp_path):
|
|
|
579
579
|
assert saved_config.agents["test-agent"].budget.session_usd == 0.25
|
|
580
580
|
|
|
581
581
|
|
|
582
|
+
def test_optimize_empty_db_outputs_friendly_message(runner, db, config):
|
|
583
|
+
result = _invoke(runner, db, config, ["optimize"])
|
|
584
|
+
assert result.exit_code == 0
|
|
585
|
+
assert "No usage data found" in result.output
|
|
586
|
+
|
|
587
|
+
|
|
588
|
+
def test_optimize_flags_downgrade_candidate(runner, db, config):
|
|
589
|
+
"""A small Opus session in the window should appear as a candidate."""
|
|
590
|
+
from datetime import timedelta
|
|
591
|
+
from tests.factories import make_llm_span
|
|
592
|
+
from tokenjam.utils.time_parse import utcnow
|
|
593
|
+
|
|
594
|
+
start = utcnow() - timedelta(days=2)
|
|
595
|
+
span = make_llm_span(
|
|
596
|
+
agent_id="test-agent",
|
|
597
|
+
model="claude-opus-4-7",
|
|
598
|
+
provider="anthropic",
|
|
599
|
+
input_tokens=1000,
|
|
600
|
+
output_tokens=200,
|
|
601
|
+
cost_usd=0.030,
|
|
602
|
+
session_id="s-opus",
|
|
603
|
+
start_time=start,
|
|
604
|
+
)
|
|
605
|
+
db.insert_span(span)
|
|
606
|
+
|
|
607
|
+
result = _invoke(runner, db, config, ["optimize"])
|
|
608
|
+
assert result.exit_code == 0
|
|
609
|
+
assert "Model downgrade" in result.output
|
|
610
|
+
# Mandatory caveat must appear in human output
|
|
611
|
+
assert "Candidate-flagging heuristic" in result.output
|
|
612
|
+
|
|
613
|
+
|
|
614
|
+
def test_optimize_json_output_includes_caveat(runner, db, config):
|
|
615
|
+
from datetime import timedelta
|
|
616
|
+
from tests.factories import make_llm_span
|
|
617
|
+
from tokenjam.utils.time_parse import utcnow
|
|
618
|
+
|
|
619
|
+
span = make_llm_span(
|
|
620
|
+
agent_id="test-agent", model="claude-opus-4-7", provider="anthropic",
|
|
621
|
+
input_tokens=1000, output_tokens=200, cost_usd=0.030,
|
|
622
|
+
session_id="s", start_time=utcnow() - timedelta(days=1),
|
|
623
|
+
)
|
|
624
|
+
db.insert_span(span)
|
|
625
|
+
|
|
626
|
+
result = _invoke(runner, db, config, ["optimize", "--json"])
|
|
627
|
+
assert result.exit_code == 0
|
|
628
|
+
data = json.loads(result.output)
|
|
629
|
+
assert data["downgrade"] is not None
|
|
630
|
+
assert "Candidate-flagging heuristic" in data["downgrade"]["caveat"]
|
|
631
|
+
|
|
632
|
+
|
|
633
|
+
def test_optimize_budget_projection_from_config(runner, db):
|
|
634
|
+
"""Budget configured via [budget.anthropic] should surface a projection."""
|
|
635
|
+
from datetime import timedelta
|
|
636
|
+
from tests.factories import make_llm_span
|
|
637
|
+
from tokenjam.core.config import ProviderBudget
|
|
638
|
+
from tokenjam.utils.time_parse import utcnow
|
|
639
|
+
|
|
640
|
+
cfg = TjConfig(
|
|
641
|
+
version="1",
|
|
642
|
+
agents={"test-agent": AgentConfig(budget=BudgetConfig(daily_usd=5.0))},
|
|
643
|
+
budgets={"anthropic": ProviderBudget(usd=10.0, cycle_start_day=1)},
|
|
644
|
+
)
|
|
645
|
+
# Insert spend that exceeds the small budget
|
|
646
|
+
for i in range(5):
|
|
647
|
+
span = make_llm_span(
|
|
648
|
+
agent_id="test-agent", model="claude-opus-4-7", provider="anthropic",
|
|
649
|
+
input_tokens=10_000, output_tokens=1_000, cost_usd=20.0,
|
|
650
|
+
session_id=f"s{i}", start_time=utcnow() - timedelta(days=1),
|
|
651
|
+
)
|
|
652
|
+
db.insert_span(span)
|
|
653
|
+
|
|
654
|
+
result = _invoke(runner, db, cfg, ["optimize", "--only", "budget"])
|
|
655
|
+
assert result.exit_code == 0
|
|
656
|
+
assert "Budget projection" in result.output
|
|
657
|
+
assert "anthropic" in result.output
|
|
658
|
+
|
|
659
|
+
|
|
582
660
|
def test_budget_set_negative_daily_rejected(runner, db, config, tmp_path):
|
|
583
661
|
"""tj budget --daily -5 should error, not silently clear the limit."""
|
|
584
662
|
config_file = tmp_path / "config.toml"
|