tokenmizer 0.2.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (108) hide show
  1. tokenmizer-0.2.4/.claude-plugin/marketplace.json +31 -0
  2. tokenmizer-0.2.4/.claude-plugin/plugin.json +39 -0
  3. tokenmizer-0.2.4/.claude-plugin/skills/analyze/SKILL.md +58 -0
  4. tokenmizer-0.2.4/.claude-plugin/skills/checkpoint/SKILL.md +56 -0
  5. tokenmizer-0.2.4/.claude-plugin/skills/resume/SKILL.md +49 -0
  6. tokenmizer-0.2.4/.claude-plugin/skills/stats/SKILL.md +57 -0
  7. tokenmizer-0.2.4/.github/ISSUE_TEMPLATE/bug_report.md +24 -0
  8. tokenmizer-0.2.4/.github/ISSUE_TEMPLATE/extraction_miss.md +19 -0
  9. tokenmizer-0.2.4/.github/PULL_REQUEST_TEMPLATE.md +20 -0
  10. tokenmizer-0.2.4/.github/workflows/ci.yml +101 -0
  11. tokenmizer-0.2.4/.github/workflows/release.yml +57 -0
  12. tokenmizer-0.2.4/.gitignore +15 -0
  13. tokenmizer-0.2.4/.mcp.json +12 -0
  14. tokenmizer-0.2.4/CHANGELOG.md +280 -0
  15. tokenmizer-0.2.4/CONTRIBUTING.md +159 -0
  16. tokenmizer-0.2.4/Dockerfile +51 -0
  17. tokenmizer-0.2.4/LICENSE +21 -0
  18. tokenmizer-0.2.4/PKG-INFO +529 -0
  19. tokenmizer-0.2.4/README.md +454 -0
  20. tokenmizer-0.2.4/SECURITY.md +166 -0
  21. tokenmizer-0.2.4/TESTING.md +83 -0
  22. tokenmizer-0.2.4/USAGE.md +526 -0
  23. tokenmizer-0.2.4/benchmarks/__init__.py +0 -0
  24. tokenmizer-0.2.4/benchmarks/checkpoint_accuracy/__init__.py +0 -0
  25. tokenmizer-0.2.4/benchmarks/checkpoint_accuracy/runner.py +213 -0
  26. tokenmizer-0.2.4/benchmarks/checkpoint_accuracy/runner_v2.py +289 -0
  27. tokenmizer-0.2.4/benchmarks/checkpoint_accuracy/runner_v3.py +250 -0
  28. tokenmizer-0.2.4/benchmarks/graph_retrieval/__init__.py +0 -0
  29. tokenmizer-0.2.4/benchmarks/graph_retrieval/runner.py +98 -0
  30. tokenmizer-0.2.4/benchmarks/latency/__init__.py +0 -0
  31. tokenmizer-0.2.4/benchmarks/latency/runner.py +97 -0
  32. tokenmizer-0.2.4/benchmarks/resume_quality/__init__.py +0 -0
  33. tokenmizer-0.2.4/docker-compose.yml +43 -0
  34. tokenmizer-0.2.4/docs/assets/architecture.svg +169 -0
  35. tokenmizer-0.2.4/docs/assets/logo.svg +246 -0
  36. tokenmizer-0.2.4/examples/basic_usage.py +156 -0
  37. tokenmizer-0.2.4/pyproject.toml +113 -0
  38. tokenmizer-0.2.4/scripts/install.sh +260 -0
  39. tokenmizer-0.2.4/scripts/run_stdlib_tests.py +398 -0
  40. tokenmizer-0.2.4/scripts/setup.sh +89 -0
  41. tokenmizer-0.2.4/scripts/static_audit.py +126 -0
  42. tokenmizer-0.2.4/tests/__init__.py +0 -0
  43. tokenmizer-0.2.4/tests/chaos/__init__.py +0 -0
  44. tokenmizer-0.2.4/tests/chaos/test_recovery.py +154 -0
  45. tokenmizer-0.2.4/tests/conftest.py +14 -0
  46. tokenmizer-0.2.4/tests/integration/__init__.py +0 -0
  47. tokenmizer-0.2.4/tests/integration/test_api_endpoint.py +159 -0
  48. tokenmizer-0.2.4/tests/integration/test_checkpoint.py +142 -0
  49. tokenmizer-0.2.4/tests/memory_accuracy/__init__.py +0 -0
  50. tokenmizer-0.2.4/tests/memory_accuracy/test_retention.py +160 -0
  51. tokenmizer-0.2.4/tests/unit/__init__.py +0 -0
  52. tokenmizer-0.2.4/tests/unit/test_cache.py +102 -0
  53. tokenmizer-0.2.4/tests/unit/test_compression_correctness.py +151 -0
  54. tokenmizer-0.2.4/tests/unit/test_decision_cache_async.py +274 -0
  55. tokenmizer-0.2.4/tests/unit/test_file_intelligence.py +330 -0
  56. tokenmizer-0.2.4/tests/unit/test_graph.py +182 -0
  57. tokenmizer-0.2.4/tests/unit/test_graph_persistence.py +200 -0
  58. tokenmizer-0.2.4/tests/unit/test_hybrid_extractor.py +148 -0
  59. tokenmizer-0.2.4/tests/unit/test_rate_limiter.py +48 -0
  60. tokenmizer-0.2.4/tests/unit/test_security.py +294 -0
  61. tokenmizer-0.2.4/tests/unit/test_tokenizer.py +48 -0
  62. tokenmizer-0.2.4/tests/unit/test_validator.py +186 -0
  63. tokenmizer-0.2.4/tokenmizer/__init__.py +21 -0
  64. tokenmizer-0.2.4/tokenmizer/agents/__init__.py +0 -0
  65. tokenmizer-0.2.4/tokenmizer/analytics/__init__.py +0 -0
  66. tokenmizer-0.2.4/tokenmizer/analytics/engine.py +188 -0
  67. tokenmizer-0.2.4/tokenmizer/api/__init__.py +0 -0
  68. tokenmizer-0.2.4/tokenmizer/api/app.py +958 -0
  69. tokenmizer-0.2.4/tokenmizer/api/rate_limiter.py +110 -0
  70. tokenmizer-0.2.4/tokenmizer/checkpoints/__init__.py +0 -0
  71. tokenmizer-0.2.4/tokenmizer/checkpoints/manager.py +383 -0
  72. tokenmizer-0.2.4/tokenmizer/cli.py +153 -0
  73. tokenmizer-0.2.4/tokenmizer/compression/__init__.py +0 -0
  74. tokenmizer-0.2.4/tokenmizer/compression/engine.py +669 -0
  75. tokenmizer-0.2.4/tokenmizer/compression/output_trimmer.py +95 -0
  76. tokenmizer-0.2.4/tokenmizer/compression/window.py +104 -0
  77. tokenmizer-0.2.4/tokenmizer/config/__init__.py +0 -0
  78. tokenmizer-0.2.4/tokenmizer/config/settings.py +170 -0
  79. tokenmizer-0.2.4/tokenmizer/core/__init__.py +0 -0
  80. tokenmizer-0.2.4/tokenmizer/core/dto.py +196 -0
  81. tokenmizer-0.2.4/tokenmizer/core/errors.py +35 -0
  82. tokenmizer-0.2.4/tokenmizer/core/tokenizer.py +96 -0
  83. tokenmizer-0.2.4/tokenmizer/dashboard/__init__.py +0 -0
  84. tokenmizer-0.2.4/tokenmizer/dashboard/page.py +267 -0
  85. tokenmizer-0.2.4/tokenmizer/filters/__init__.py +0 -0
  86. tokenmizer-0.2.4/tokenmizer/filters/file_intelligence.py +960 -0
  87. tokenmizer-0.2.4/tokenmizer/graph_memory/__init__.py +0 -0
  88. tokenmizer-0.2.4/tokenmizer/graph_memory/decision_tracker.py +225 -0
  89. tokenmizer-0.2.4/tokenmizer/graph_memory/graph.py +1287 -0
  90. tokenmizer-0.2.4/tokenmizer/graph_memory/helpers.py +121 -0
  91. tokenmizer-0.2.4/tokenmizer/graph_memory/hybrid_extractor.py +703 -0
  92. tokenmizer-0.2.4/tokenmizer/graph_memory/types.py +134 -0
  93. tokenmizer-0.2.4/tokenmizer/graph_memory/validator.py +304 -0
  94. tokenmizer-0.2.4/tokenmizer/graph_memory/visualization.py +228 -0
  95. tokenmizer-0.2.4/tokenmizer/mcp/__init__.py +0 -0
  96. tokenmizer-0.2.4/tokenmizer/mcp/server.py +368 -0
  97. tokenmizer-0.2.4/tokenmizer/providers/__init__.py +0 -0
  98. tokenmizer-0.2.4/tokenmizer/providers/providers.py +456 -0
  99. tokenmizer-0.2.4/tokenmizer/security/__init__.py +0 -0
  100. tokenmizer-0.2.4/tokenmizer/security/auth.py +95 -0
  101. tokenmizer-0.2.4/tokenmizer/security/middleware.py +138 -0
  102. tokenmizer-0.2.4/tokenmizer/security/redaction.py +126 -0
  103. tokenmizer-0.2.4/tokenmizer/semantic_cache/__init__.py +0 -0
  104. tokenmizer-0.2.4/tokenmizer/semantic_cache/cache.py +383 -0
  105. tokenmizer-0.2.4/tokenmizer/state/__init__.py +0 -0
  106. tokenmizer-0.2.4/tokenmizer/state/backend.py +137 -0
  107. tokenmizer-0.2.4/tokenmizer/storage/__init__.py +56 -0
  108. tokenmizer-0.2.4/tokenmizer.yaml +70 -0
@@ -0,0 +1,31 @@
1
+ {
2
+ "name": "tokenmizer",
3
+ "version": "0.2.3",
4
+ "description": "Never lose your AI context again. Graph-backed memory, session checkpointing, and file intelligence for any LLM.",
5
+ "homepage": "https://github.com/Shweta-Mishra-ai/tokenmizer",
6
+ "repository": "https://github.com/Shweta-Mishra-ai/tokenmizer",
7
+ "author": {
8
+ "name": "Shweta Mishra",
9
+ "url": "https://github.com/Shweta-Mishra-ai"
10
+ },
11
+ "license": "MIT",
12
+ "keywords": [
13
+ "memory",
14
+ "checkpoint",
15
+ "resume",
16
+ "context",
17
+ "graph",
18
+ "token-optimization",
19
+ "mcp",
20
+ "anthropic",
21
+ "openai",
22
+ "llm"
23
+ ],
24
+ "plugins": [
25
+ {
26
+ "name": "tokenmizer",
27
+ "path": ".",
28
+ "description": "Graph memory + checkpoints + file intelligence for any LLM session"
29
+ }
30
+ ]
31
+ }
@@ -0,0 +1,39 @@
1
+ {
2
+ "name": "tokenmizer",
3
+ "version": "0.2.3",
4
+ "description": "Never lose your AI context again. Graph-backed memory, session checkpointing, and file intelligence for Claude Code and any LLM.",
5
+ "author": {
6
+ "name": "Shweta Mishra",
7
+ "url": "https://github.com/Shweta-Mishra-ai"
8
+ },
9
+ "homepage": "https://github.com/Shweta-Mishra-ai/tokenmizer",
10
+ "repository": "https://github.com/Shweta-Mishra-ai/tokenmizer",
11
+ "license": "MIT",
12
+ "keywords": [
13
+ "memory",
14
+ "checkpoint",
15
+ "resume",
16
+ "context",
17
+ "token-optimization",
18
+ "graph",
19
+ "mcp"
20
+ ],
21
+ "skills": [
22
+ "./skills/checkpoint",
23
+ "./skills/resume",
24
+ "./skills/analyze",
25
+ "./skills/stats"
26
+ ],
27
+ "mcpServers": {
28
+ "tokenmizer": {
29
+ "command": "python3",
30
+ "args": [
31
+ "-m",
32
+ "tokenmizer.mcp.server"
33
+ ],
34
+ "env": {
35
+ "TOKENMIZER_URL": "http://localhost:8000"
36
+ }
37
+ }
38
+ }
39
+ }
@@ -0,0 +1,58 @@
1
+ ---
2
+ name: analyze
3
+ description: Analyze a large file (CSV, Excel, PDF, JSON, code) and return a token-efficient summary. Instead of reading thousands of rows or pages, get schema + statistics + sample in under 500 tokens. Use when user mentions a file path, asks to analyze data, pastes many rows, or references a CSV/Excel/PDF/JSON file.
4
+ ---
5
+
6
+ Analyze a file using TokenMizer's file intelligence layer.
7
+
8
+ ## IMPORTANT rule
9
+
10
+ **Never ask the user to paste the file content.** Always call TokenMizer to analyze it from the path. Pasting a 50,000-row CSV = 400,000 tokens. TokenMizer reduces it to ~450 tokens.
11
+
12
+ ## What to do
13
+
14
+ Parse $ARGUMENTS:
15
+ - First word = file path
16
+ - Remaining words = query (what user wants to know)
17
+
18
+ ```bash
19
+ FILE_PATH=$(echo "$ARGUMENTS" | awk '{print $1}')
20
+ QUERY=$(echo "$ARGUMENTS" | cut -d' ' -f2-)
21
+
22
+ python3 -c "
23
+ from tokenmizer.filters.file_intelligence import FileIntelligence
24
+ fi = FileIntelligence()
25
+ result = fi.process(
26
+ open('${FILE_PATH}', 'rb').read(),
27
+ '${FILE_PATH}'.split('/')[-1],
28
+ token_budget=600,
29
+ query='${QUERY}'
30
+ )
31
+ print(f'File: {result.file_type} | {result.original_tokens:,} → {result.extracted_tokens} tokens ({result.savings_pct:.0f}% saved)')
32
+ print()
33
+ print(result.content)
34
+ "
35
+ ```
36
+
37
+ ## Token savings by file type
38
+
39
+ | Type | Typical savings |
40
+ |---|---|
41
+ | CSV (50k rows) | 99.9% |
42
+ | PDF (200 pages) | 98.8% |
43
+ | Excel (10 sheets) | 99.7% |
44
+ | JSON (1k items) | 95% |
45
+ | Code (large file) | 60-80% |
46
+
47
+ ## If TokenMizer not installed
48
+
49
+ ```bash
50
+ pip install "tokenmizer[anthropic]"
51
+ ```
52
+
53
+ ## Examples of $ARGUMENTS
54
+
55
+ - `/data/sales.csv` → analyze with no specific query
56
+ - `/data/sales.csv which regions are underperforming` → targeted analysis
57
+ - `/reports/Q1.pdf key findings and risks` → relevant page extraction
58
+ - `/data/users.xlsx find inactive accounts` → per-sheet analysis
@@ -0,0 +1,56 @@
1
+ ---
2
+ name: checkpoint
3
+ description: Save the current session to TokenMizer graph memory. Creates a persistent checkpoint with all tasks, decisions, files, and errors — resumable in any future session. Use when user says "save", "checkpoint", "remember this", "I'm done for today", or session is getting long.
4
+ ---
5
+
6
+ Save the current session to TokenMizer graph memory.
7
+
8
+ ## What to do
9
+
10
+ 1. Ask the user for a session ID if not provided (suggest a slug based on what you're working on, e.g. "auth-service", "data-pipeline", "my-project")
11
+ 2. Call the TokenMizer checkpoint API:
12
+
13
+ ```bash
14
+ curl -s -X POST "http://localhost:8000/api/checkpoint?session_id=$ARGUMENTS" \
15
+ -H "Content-Type: application/json"
16
+ ```
17
+
18
+ 3. Show the user what was saved:
19
+ - Checkpoint ID
20
+ - Number of nodes in graph
21
+ - Resume token count
22
+ - The resume context block
23
+
24
+ ## If TokenMizer is not running
25
+
26
+ Tell the user to start it first:
27
+ ```bash
28
+ tokenmizer serve
29
+ # or
30
+ python3 -m tokenmizer.api.app
31
+ ```
32
+
33
+ ## Session ID rules
34
+ - Use lowercase slugs: `auth-service` not `Auth Service`
35
+ - Keep it short and meaningful
36
+ - Same ID across sessions for the same project
37
+
38
+ ## Example output to show user
39
+
40
+ ```
41
+ ✅ Session 'auth-service' saved
42
+
43
+ Checkpoint: ckpt_a3f9b2
44
+ Nodes: 14 (6 tasks, 4 decisions, 3 files, 1 error)
45
+ Resume size: 247 tokens
46
+
47
+ Resume context:
48
+ Goal: Build FastAPI auth service with JWT
49
+ Done: Project setup | User model | Login endpoint | Fix 422
50
+ In progress: Refresh token rotation
51
+ Decided: PostgreSQL | bcrypt | Redis for tokens
52
+ Files: api/auth.py, api/models.py, config.py
53
+ Continue: Implement token refresh endpoint
54
+ ```
55
+
56
+ If $ARGUMENTS is empty, ask: "What should I call this session? (e.g. my-project)"
@@ -0,0 +1,49 @@
1
+ ---
2
+ name: resume
3
+ description: Load a previous session from TokenMizer graph memory. Returns a compact context block (100-600 tokens) covering goals, completed work, decisions, open tasks, and files. Inject this as system context to continue exactly where you left off. Use when user says "resume", "continue from last time", "load my project", "what did we do on X", or starts a session on a known project.
4
+ ---
5
+
6
+ Load a previous session from TokenMizer graph memory.
7
+
8
+ ## What to do
9
+
10
+ 1. Get the session ID from $ARGUMENTS (or ask the user if not provided)
11
+ 2. Determine the level from $ARGUMENTS:
12
+ - `critical` = ~100 tokens, only open blockers + key decisions
13
+ - `standard` = ~300 tokens, normal resume (default)
14
+ - `full` = ~600 tokens, everything including env, schemas, endpoints
15
+
16
+ 3. Call the TokenMizer resume API:
17
+
18
+ ```bash
19
+ SESSION_ID=$(echo "$ARGUMENTS" | awk '{print $1}')
20
+ LEVEL=$(echo "$ARGUMENTS" | awk '{print $2}')
21
+ LEVEL=${LEVEL:-standard}
22
+
23
+ curl -s "http://localhost:8000/api/resume/${SESSION_ID}?level=${LEVEL}"
24
+ ```
25
+
26
+ 4. Inject the returned `resume_context` as a system message at the top of the conversation — NOT as a user message.
27
+
28
+ 5. Tell the user: "Loaded [X] tokens of context for '[session-id]'. Continuing from: [next_action]"
29
+
30
+ ## Format for system injection
31
+
32
+ ```
33
+ [TokenMizer — session: {session_id}]
34
+ {resume_context}
35
+ [End of session context — continue from here]
36
+ ```
37
+
38
+ ## If no checkpoint found
39
+
40
+ Tell the user: "No checkpoint found for '[session-id]'. Either the session hasn't been checkpointed yet, or TokenMizer isn't running."
41
+
42
+ Suggest: `tokenmizer checkpoint {session-id}` to save the current session first.
43
+
44
+ ## Examples of $ARGUMENTS
45
+
46
+ - `auth-service` → load standard resume for auth-service
47
+ - `auth-service full` → load full 600-token resume
48
+ - `auth-service critical` → load critical 100-token resume
49
+ - (empty) → ask user which project to resume
@@ -0,0 +1,57 @@
1
+ ---
2
+ name: stats
3
+ description: Show TokenMizer token savings stats — how many tokens saved today, this week, cache hit rate, and which layers are saving the most. Use when user asks about token usage, costs, savings, or "how much have we saved".
4
+ ---
5
+
6
+ Show TokenMizer token savings analytics.
7
+
8
+ ## What to do
9
+
10
+ ```bash
11
+ curl -s http://localhost:8000/api/stats
12
+ ```
13
+
14
+ Then also fetch cache stats:
15
+ ```bash
16
+ curl -s http://localhost:8000/api/cache/stats
17
+ ```
18
+
19
+ ## Format the output clearly
20
+
21
+ Show:
22
+ - Tokens saved today and this week
23
+ - Cost saved in USD
24
+ - Cache hit rate
25
+ - Which layer saved the most (compression / cache / windowing / file extraction)
26
+ - Suggestions if savings are low
27
+
28
+ ## Example output
29
+
30
+ ```
31
+ TokenMizer Stats
32
+ ─────────────────────────
33
+ Today: 12,450 tokens saved (34%) — $0.0373
34
+ Week: 87,200 tokens saved (31%) — $0.2616
35
+
36
+ By layer:
37
+ File extraction: 45,000 tokens
38
+ Semantic cache: 28,000 tokens
39
+ Smart window: 9,200 tokens
40
+ Compression: 3,800 tokens
41
+ Output trim: 1,200 tokens
42
+
43
+ Cache: 847 entries | 68% hit rate | 8% full
44
+ ─────────────────────────
45
+ Dashboard: http://localhost:8000
46
+ ```
47
+
48
+ ## If nothing saved yet
49
+
50
+ Tell user to start using TokenMizer as their proxy:
51
+ ```python
52
+ from openai import OpenAI
53
+ client = OpenAI(
54
+ api_key="your-key",
55
+ base_url="http://localhost:8000/v1"
56
+ )
57
+ ```
@@ -0,0 +1,24 @@
1
+ ---
2
+ name: Bug Report
3
+ about: Something is broken
4
+ title: '[BUG] '
5
+ labels: bug
6
+ ---
7
+
8
+ **Python version:**
9
+ **Provider:**
10
+ **OS:**
11
+
12
+ **What happened:**
13
+
14
+ **Expected:**
15
+
16
+ **Reproduction:**
17
+ ```python
18
+ # minimal code
19
+ ```
20
+
21
+ **Logs:**
22
+ ```
23
+ paste relevant logs
24
+ ```
@@ -0,0 +1,19 @@
1
+ ---
2
+ name: Extraction Miss
3
+ about: Graph memory missed a task, decision, or file from a real session
4
+ title: '[EXTRACTION] '
5
+ labels: extraction-quality
6
+ ---
7
+
8
+ **Message where extraction should have happened:**
9
+ ```
10
+ [assistant]: The login endpoint is now working...
11
+ ```
12
+
13
+ **What should have been extracted:**
14
+ - Node type: task / decision / file
15
+ - Expected label: "..."
16
+
17
+ **What was actually extracted (if anything):**
18
+
19
+ **Session type:** coding / research / writing / other
@@ -0,0 +1,20 @@
1
+ ## What this PR does
2
+
3
+ <!-- Brief description -->
4
+
5
+ ## Type
6
+ - [ ] Bug fix
7
+ - [ ] Extraction improvement (graph_memory/)
8
+ - [ ] New provider
9
+ - [ ] Performance
10
+ - [ ] Documentation
11
+
12
+ ## Tests
13
+ - [ ] `pytest tests/ -v` passes
14
+ - [ ] `ruff check tokenmizer/` clean
15
+ - [ ] Memory accuracy test added/updated (if extraction change)
16
+
17
+ ## Checklist
18
+ - [ ] No raw dicts crossing layer boundaries (use DTOs)
19
+ - [ ] No `os.getenv()` outside `config/settings.py`
20
+ - [ ] External imports are lazy (inside functions, with try/except ImportError)
@@ -0,0 +1,101 @@
1
+ name: CI
2
+
3
+ on:
4
+ push:
5
+ branches: [main, develop]
6
+ pull_request:
7
+ branches: [main]
8
+
9
+ jobs:
10
+ test:
11
+ runs-on: ubuntu-latest
12
+ strategy:
13
+ matrix:
14
+ python-version: ["3.10", "3.11", "3.12"]
15
+
16
+ steps:
17
+ - uses: actions/checkout@v4
18
+
19
+ - name: Set up Python ${{ matrix.python-version }}
20
+ uses: actions/setup-python@v5
21
+ with:
22
+ python-version: ${{ matrix.python-version }}
23
+
24
+ - name: Cache pip
25
+ uses: actions/cache@v4
26
+ with:
27
+ path: ~/.cache/pip
28
+ key: ${{ runner.os }}-pip-${{ hashFiles('pyproject.toml') }}
29
+ restore-keys: |
30
+ ${{ runner.os }}-pip-
31
+
32
+ - name: Install dependencies
33
+ run: |
34
+ python -m pip install --upgrade pip
35
+ pip install -e ".[dev]"
36
+
37
+ - name: Lint (blocking)
38
+ run: ruff check tokenmizer/ tests/
39
+
40
+ # Coverage threshold lives in pyproject.toml [tool.coverage.report]
41
+ # fail_under — single source of truth, do NOT override it here.
42
+ - name: Run tests with coverage
43
+ run: |
44
+ pytest tests/ \
45
+ --cov=tokenmizer \
46
+ --cov-report=term-missing \
47
+ --cov-report=xml \
48
+ -v
49
+
50
+ - name: Upload coverage
51
+ uses: codecov/codecov-action@v4
52
+ if: matrix.python-version == '3.12'
53
+ with:
54
+ file: ./coverage.xml
55
+ continue-on-error: true
56
+
57
+ benchmark:
58
+ runs-on: ubuntu-latest
59
+ needs: test
60
+ if: github.ref == 'refs/heads/main'
61
+
62
+ steps:
63
+ - uses: actions/checkout@v4
64
+
65
+ - name: Set up Python
66
+ uses: actions/setup-python@v5
67
+ with:
68
+ python-version: "3.12"
69
+
70
+ - name: Install
71
+ run: pip install -e ".[dev]"
72
+
73
+ - name: Run checkpoint accuracy benchmark
74
+ run: python -m benchmarks.checkpoint_accuracy.runner
75
+
76
+ - name: Upload benchmark results
77
+ uses: actions/upload-artifact@v4
78
+ with:
79
+ name: benchmark-results
80
+ path: benchmark_results.json
81
+
82
+ docker:
83
+ runs-on: ubuntu-latest
84
+ needs: test
85
+ if: github.ref == 'refs/heads/main'
86
+
87
+ steps:
88
+ - uses: actions/checkout@v4
89
+
90
+ - name: Build Docker image
91
+ run: docker build -t tokenmizer:latest .
92
+
93
+ - name: Test Docker health
94
+ run: |
95
+ docker run -d --name tm-test \
96
+ -e TOKENMIZER_ANTHROPIC_API_KEY=test \
97
+ -p 8001:8000 \
98
+ tokenmizer:latest
99
+ sleep 8
100
+ curl -f http://localhost:8001/health
101
+ docker stop tm-test
@@ -0,0 +1,57 @@
1
+ name: Release to PyPI
2
+
3
+ # Publishes via PyPI Trusted Publishing (OIDC) — NO API token stored anywhere.
4
+ # One-time setup on pypi.org (must be done by the project owner):
5
+ # pypi.org → Your account → Publishing → Add a new pending publisher:
6
+ # PyPI project name: tokenmizer
7
+ # Owner: Shweta-Mishra-ai
8
+ # Repository: tokenmizer
9
+ # Workflow name: release.yml
10
+ # Environment: pypi
11
+ # Then create a GitHub Release (tag v0.2.4 etc.) and this workflow publishes.
12
+
13
+ on:
14
+ release:
15
+ types: [published]
16
+
17
+ jobs:
18
+ build:
19
+ runs-on: ubuntu-latest
20
+ steps:
21
+ - uses: actions/checkout@v4
22
+
23
+ - uses: actions/setup-python@v5
24
+ with:
25
+ python-version: "3.12"
26
+
27
+ - name: Run tests first — never publish a broken build
28
+ run: |
29
+ pip install -e ".[dev]"
30
+ pytest tests/ -q
31
+ ruff check tokenmizer/ tests/
32
+
33
+ - name: Build sdist + wheel
34
+ run: |
35
+ pip install build twine
36
+ python -m build
37
+ twine check dist/*
38
+
39
+ - uses: actions/upload-artifact@v4
40
+ with:
41
+ name: dist
42
+ path: dist/
43
+
44
+ publish:
45
+ needs: build
46
+ runs-on: ubuntu-latest
47
+ environment: pypi
48
+ permissions:
49
+ id-token: write # required for Trusted Publishing — no secrets used
50
+ steps:
51
+ - uses: actions/download-artifact@v4
52
+ with:
53
+ name: dist
54
+ path: dist/
55
+
56
+ - name: Publish to PyPI
57
+ uses: pypa/gh-action-pypi-publish@release/v1
@@ -0,0 +1,15 @@
1
+ __pycache__/
2
+ *.py[cod]
3
+ *.egg-info/
4
+ dist/
5
+ build/
6
+ .venv/
7
+ venv/
8
+ .pytest_cache/
9
+ .coverage
10
+ benchmark_v*_results.json
11
+ # checkpoints/ # removed: conflicts with tokenmizer/checkpoints/ module
12
+ *.db
13
+ *.db-wal
14
+ *.db-shm
15
+ .DS_Store
@@ -0,0 +1,12 @@
1
+ {
2
+ "mcpServers": {
3
+ "tokenmizer": {
4
+ "command": "python3",
5
+ "args": ["-m", "tokenmizer.mcp.server"],
6
+ "env": {
7
+ "TOKENMIZER_URL": "http://localhost:8000",
8
+ "TOKENMIZER_API_KEY": ""
9
+ }
10
+ }
11
+ }
12
+ }