tokenmizer 0.2.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tokenmizer-0.2.4/.claude-plugin/marketplace.json +31 -0
- tokenmizer-0.2.4/.claude-plugin/plugin.json +39 -0
- tokenmizer-0.2.4/.claude-plugin/skills/analyze/SKILL.md +58 -0
- tokenmizer-0.2.4/.claude-plugin/skills/checkpoint/SKILL.md +56 -0
- tokenmizer-0.2.4/.claude-plugin/skills/resume/SKILL.md +49 -0
- tokenmizer-0.2.4/.claude-plugin/skills/stats/SKILL.md +57 -0
- tokenmizer-0.2.4/.github/ISSUE_TEMPLATE/bug_report.md +24 -0
- tokenmizer-0.2.4/.github/ISSUE_TEMPLATE/extraction_miss.md +19 -0
- tokenmizer-0.2.4/.github/PULL_REQUEST_TEMPLATE.md +20 -0
- tokenmizer-0.2.4/.github/workflows/ci.yml +101 -0
- tokenmizer-0.2.4/.github/workflows/release.yml +57 -0
- tokenmizer-0.2.4/.gitignore +15 -0
- tokenmizer-0.2.4/.mcp.json +12 -0
- tokenmizer-0.2.4/CHANGELOG.md +280 -0
- tokenmizer-0.2.4/CONTRIBUTING.md +159 -0
- tokenmizer-0.2.4/Dockerfile +51 -0
- tokenmizer-0.2.4/LICENSE +21 -0
- tokenmizer-0.2.4/PKG-INFO +529 -0
- tokenmizer-0.2.4/README.md +454 -0
- tokenmizer-0.2.4/SECURITY.md +166 -0
- tokenmizer-0.2.4/TESTING.md +83 -0
- tokenmizer-0.2.4/USAGE.md +526 -0
- tokenmizer-0.2.4/benchmarks/__init__.py +0 -0
- tokenmizer-0.2.4/benchmarks/checkpoint_accuracy/__init__.py +0 -0
- tokenmizer-0.2.4/benchmarks/checkpoint_accuracy/runner.py +213 -0
- tokenmizer-0.2.4/benchmarks/checkpoint_accuracy/runner_v2.py +289 -0
- tokenmizer-0.2.4/benchmarks/checkpoint_accuracy/runner_v3.py +250 -0
- tokenmizer-0.2.4/benchmarks/graph_retrieval/__init__.py +0 -0
- tokenmizer-0.2.4/benchmarks/graph_retrieval/runner.py +98 -0
- tokenmizer-0.2.4/benchmarks/latency/__init__.py +0 -0
- tokenmizer-0.2.4/benchmarks/latency/runner.py +97 -0
- tokenmizer-0.2.4/benchmarks/resume_quality/__init__.py +0 -0
- tokenmizer-0.2.4/docker-compose.yml +43 -0
- tokenmizer-0.2.4/docs/assets/architecture.svg +169 -0
- tokenmizer-0.2.4/docs/assets/logo.svg +246 -0
- tokenmizer-0.2.4/examples/basic_usage.py +156 -0
- tokenmizer-0.2.4/pyproject.toml +113 -0
- tokenmizer-0.2.4/scripts/install.sh +260 -0
- tokenmizer-0.2.4/scripts/run_stdlib_tests.py +398 -0
- tokenmizer-0.2.4/scripts/setup.sh +89 -0
- tokenmizer-0.2.4/scripts/static_audit.py +126 -0
- tokenmizer-0.2.4/tests/__init__.py +0 -0
- tokenmizer-0.2.4/tests/chaos/__init__.py +0 -0
- tokenmizer-0.2.4/tests/chaos/test_recovery.py +154 -0
- tokenmizer-0.2.4/tests/conftest.py +14 -0
- tokenmizer-0.2.4/tests/integration/__init__.py +0 -0
- tokenmizer-0.2.4/tests/integration/test_api_endpoint.py +159 -0
- tokenmizer-0.2.4/tests/integration/test_checkpoint.py +142 -0
- tokenmizer-0.2.4/tests/memory_accuracy/__init__.py +0 -0
- tokenmizer-0.2.4/tests/memory_accuracy/test_retention.py +160 -0
- tokenmizer-0.2.4/tests/unit/__init__.py +0 -0
- tokenmizer-0.2.4/tests/unit/test_cache.py +102 -0
- tokenmizer-0.2.4/tests/unit/test_compression_correctness.py +151 -0
- tokenmizer-0.2.4/tests/unit/test_decision_cache_async.py +274 -0
- tokenmizer-0.2.4/tests/unit/test_file_intelligence.py +330 -0
- tokenmizer-0.2.4/tests/unit/test_graph.py +182 -0
- tokenmizer-0.2.4/tests/unit/test_graph_persistence.py +200 -0
- tokenmizer-0.2.4/tests/unit/test_hybrid_extractor.py +148 -0
- tokenmizer-0.2.4/tests/unit/test_rate_limiter.py +48 -0
- tokenmizer-0.2.4/tests/unit/test_security.py +294 -0
- tokenmizer-0.2.4/tests/unit/test_tokenizer.py +48 -0
- tokenmizer-0.2.4/tests/unit/test_validator.py +186 -0
- tokenmizer-0.2.4/tokenmizer/__init__.py +21 -0
- tokenmizer-0.2.4/tokenmizer/agents/__init__.py +0 -0
- tokenmizer-0.2.4/tokenmizer/analytics/__init__.py +0 -0
- tokenmizer-0.2.4/tokenmizer/analytics/engine.py +188 -0
- tokenmizer-0.2.4/tokenmizer/api/__init__.py +0 -0
- tokenmizer-0.2.4/tokenmizer/api/app.py +958 -0
- tokenmizer-0.2.4/tokenmizer/api/rate_limiter.py +110 -0
- tokenmizer-0.2.4/tokenmizer/checkpoints/__init__.py +0 -0
- tokenmizer-0.2.4/tokenmizer/checkpoints/manager.py +383 -0
- tokenmizer-0.2.4/tokenmizer/cli.py +153 -0
- tokenmizer-0.2.4/tokenmizer/compression/__init__.py +0 -0
- tokenmizer-0.2.4/tokenmizer/compression/engine.py +669 -0
- tokenmizer-0.2.4/tokenmizer/compression/output_trimmer.py +95 -0
- tokenmizer-0.2.4/tokenmizer/compression/window.py +104 -0
- tokenmizer-0.2.4/tokenmizer/config/__init__.py +0 -0
- tokenmizer-0.2.4/tokenmizer/config/settings.py +170 -0
- tokenmizer-0.2.4/tokenmizer/core/__init__.py +0 -0
- tokenmizer-0.2.4/tokenmizer/core/dto.py +196 -0
- tokenmizer-0.2.4/tokenmizer/core/errors.py +35 -0
- tokenmizer-0.2.4/tokenmizer/core/tokenizer.py +96 -0
- tokenmizer-0.2.4/tokenmizer/dashboard/__init__.py +0 -0
- tokenmizer-0.2.4/tokenmizer/dashboard/page.py +267 -0
- tokenmizer-0.2.4/tokenmizer/filters/__init__.py +0 -0
- tokenmizer-0.2.4/tokenmizer/filters/file_intelligence.py +960 -0
- tokenmizer-0.2.4/tokenmizer/graph_memory/__init__.py +0 -0
- tokenmizer-0.2.4/tokenmizer/graph_memory/decision_tracker.py +225 -0
- tokenmizer-0.2.4/tokenmizer/graph_memory/graph.py +1287 -0
- tokenmizer-0.2.4/tokenmizer/graph_memory/helpers.py +121 -0
- tokenmizer-0.2.4/tokenmizer/graph_memory/hybrid_extractor.py +703 -0
- tokenmizer-0.2.4/tokenmizer/graph_memory/types.py +134 -0
- tokenmizer-0.2.4/tokenmizer/graph_memory/validator.py +304 -0
- tokenmizer-0.2.4/tokenmizer/graph_memory/visualization.py +228 -0
- tokenmizer-0.2.4/tokenmizer/mcp/__init__.py +0 -0
- tokenmizer-0.2.4/tokenmizer/mcp/server.py +368 -0
- tokenmizer-0.2.4/tokenmizer/providers/__init__.py +0 -0
- tokenmizer-0.2.4/tokenmizer/providers/providers.py +456 -0
- tokenmizer-0.2.4/tokenmizer/security/__init__.py +0 -0
- tokenmizer-0.2.4/tokenmizer/security/auth.py +95 -0
- tokenmizer-0.2.4/tokenmizer/security/middleware.py +138 -0
- tokenmizer-0.2.4/tokenmizer/security/redaction.py +126 -0
- tokenmizer-0.2.4/tokenmizer/semantic_cache/__init__.py +0 -0
- tokenmizer-0.2.4/tokenmizer/semantic_cache/cache.py +383 -0
- tokenmizer-0.2.4/tokenmizer/state/__init__.py +0 -0
- tokenmizer-0.2.4/tokenmizer/state/backend.py +137 -0
- tokenmizer-0.2.4/tokenmizer/storage/__init__.py +56 -0
- tokenmizer-0.2.4/tokenmizer.yaml +70 -0
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "tokenmizer",
|
|
3
|
+
"version": "0.2.3",
|
|
4
|
+
"description": "Never lose your AI context again. Graph-backed memory, session checkpointing, and file intelligence for any LLM.",
|
|
5
|
+
"homepage": "https://github.com/Shweta-Mishra-ai/tokenmizer",
|
|
6
|
+
"repository": "https://github.com/Shweta-Mishra-ai/tokenmizer",
|
|
7
|
+
"author": {
|
|
8
|
+
"name": "Shweta Mishra",
|
|
9
|
+
"url": "https://github.com/Shweta-Mishra-ai"
|
|
10
|
+
},
|
|
11
|
+
"license": "MIT",
|
|
12
|
+
"keywords": [
|
|
13
|
+
"memory",
|
|
14
|
+
"checkpoint",
|
|
15
|
+
"resume",
|
|
16
|
+
"context",
|
|
17
|
+
"graph",
|
|
18
|
+
"token-optimization",
|
|
19
|
+
"mcp",
|
|
20
|
+
"anthropic",
|
|
21
|
+
"openai",
|
|
22
|
+
"llm"
|
|
23
|
+
],
|
|
24
|
+
"plugins": [
|
|
25
|
+
{
|
|
26
|
+
"name": "tokenmizer",
|
|
27
|
+
"path": ".",
|
|
28
|
+
"description": "Graph memory + checkpoints + file intelligence for any LLM session"
|
|
29
|
+
}
|
|
30
|
+
]
|
|
31
|
+
}
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "tokenmizer",
|
|
3
|
+
"version": "0.2.3",
|
|
4
|
+
"description": "Never lose your AI context again. Graph-backed memory, session checkpointing, and file intelligence for Claude Code and any LLM.",
|
|
5
|
+
"author": {
|
|
6
|
+
"name": "Shweta Mishra",
|
|
7
|
+
"url": "https://github.com/Shweta-Mishra-ai"
|
|
8
|
+
},
|
|
9
|
+
"homepage": "https://github.com/Shweta-Mishra-ai/tokenmizer",
|
|
10
|
+
"repository": "https://github.com/Shweta-Mishra-ai/tokenmizer",
|
|
11
|
+
"license": "MIT",
|
|
12
|
+
"keywords": [
|
|
13
|
+
"memory",
|
|
14
|
+
"checkpoint",
|
|
15
|
+
"resume",
|
|
16
|
+
"context",
|
|
17
|
+
"token-optimization",
|
|
18
|
+
"graph",
|
|
19
|
+
"mcp"
|
|
20
|
+
],
|
|
21
|
+
"skills": [
|
|
22
|
+
"./skills/checkpoint",
|
|
23
|
+
"./skills/resume",
|
|
24
|
+
"./skills/analyze",
|
|
25
|
+
"./skills/stats"
|
|
26
|
+
],
|
|
27
|
+
"mcpServers": {
|
|
28
|
+
"tokenmizer": {
|
|
29
|
+
"command": "python3",
|
|
30
|
+
"args": [
|
|
31
|
+
"-m",
|
|
32
|
+
"tokenmizer.mcp.server"
|
|
33
|
+
],
|
|
34
|
+
"env": {
|
|
35
|
+
"TOKENMIZER_URL": "http://localhost:8000"
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
}
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: analyze
|
|
3
|
+
description: Analyze a large file (CSV, Excel, PDF, JSON, code) and return a token-efficient summary. Instead of reading thousands of rows or pages, get schema + statistics + sample in under 500 tokens. Use when user mentions a file path, asks to analyze data, pastes many rows, or references a CSV/Excel/PDF/JSON file.
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
Analyze a file using TokenMizer's file intelligence layer.
|
|
7
|
+
|
|
8
|
+
## IMPORTANT rule
|
|
9
|
+
|
|
10
|
+
**Never ask the user to paste the file content.** Always call TokenMizer to analyze it from the path. Pasting a 50,000-row CSV = 400,000 tokens. TokenMizer reduces it to ~450 tokens.
|
|
11
|
+
|
|
12
|
+
## What to do
|
|
13
|
+
|
|
14
|
+
Parse $ARGUMENTS:
|
|
15
|
+
- First word = file path
|
|
16
|
+
- Remaining words = query (what user wants to know)
|
|
17
|
+
|
|
18
|
+
```bash
|
|
19
|
+
FILE_PATH=$(echo "$ARGUMENTS" | awk '{print $1}')
|
|
20
|
+
QUERY=$(echo "$ARGUMENTS" | cut -d' ' -f2-)
|
|
21
|
+
|
|
22
|
+
python3 -c "
|
|
23
|
+
from tokenmizer.filters.file_intelligence import FileIntelligence
|
|
24
|
+
fi = FileIntelligence()
|
|
25
|
+
result = fi.process(
|
|
26
|
+
open('${FILE_PATH}', 'rb').read(),
|
|
27
|
+
'${FILE_PATH}'.split('/')[-1],
|
|
28
|
+
token_budget=600,
|
|
29
|
+
query='${QUERY}'
|
|
30
|
+
)
|
|
31
|
+
print(f'File: {result.file_type} | {result.original_tokens:,} → {result.extracted_tokens} tokens ({result.savings_pct:.0f}% saved)')
|
|
32
|
+
print()
|
|
33
|
+
print(result.content)
|
|
34
|
+
"
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
## Token savings by file type
|
|
38
|
+
|
|
39
|
+
| Type | Typical savings |
|
|
40
|
+
|---|---|
|
|
41
|
+
| CSV (50k rows) | 99.9% |
|
|
42
|
+
| PDF (200 pages) | 98.8% |
|
|
43
|
+
| Excel (10 sheets) | 99.7% |
|
|
44
|
+
| JSON (1k items) | 95% |
|
|
45
|
+
| Code (large file) | 60-80% |
|
|
46
|
+
|
|
47
|
+
## If TokenMizer not installed
|
|
48
|
+
|
|
49
|
+
```bash
|
|
50
|
+
pip install "tokenmizer[anthropic]"
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
## Examples of $ARGUMENTS
|
|
54
|
+
|
|
55
|
+
- `/data/sales.csv` → analyze with no specific query
|
|
56
|
+
- `/data/sales.csv which regions are underperforming` → targeted analysis
|
|
57
|
+
- `/reports/Q1.pdf key findings and risks` → relevant page extraction
|
|
58
|
+
- `/data/users.xlsx find inactive accounts` → per-sheet analysis
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: checkpoint
|
|
3
|
+
description: Save the current session to TokenMizer graph memory. Creates a persistent checkpoint with all tasks, decisions, files, and errors — resumable in any future session. Use when user says "save", "checkpoint", "remember this", "I'm done for today", or session is getting long.
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
Save the current session to TokenMizer graph memory.
|
|
7
|
+
|
|
8
|
+
## What to do
|
|
9
|
+
|
|
10
|
+
1. Ask the user for a session ID if not provided (suggest a slug based on what you're working on, e.g. "auth-service", "data-pipeline", "my-project")
|
|
11
|
+
2. Call the TokenMizer checkpoint API:
|
|
12
|
+
|
|
13
|
+
```bash
|
|
14
|
+
curl -s -X POST "http://localhost:8000/api/checkpoint?session_id=$ARGUMENTS" \
|
|
15
|
+
-H "Content-Type: application/json"
|
|
16
|
+
```
|
|
17
|
+
|
|
18
|
+
3. Show the user what was saved:
|
|
19
|
+
- Checkpoint ID
|
|
20
|
+
- Number of nodes in graph
|
|
21
|
+
- Resume token count
|
|
22
|
+
- The resume context block
|
|
23
|
+
|
|
24
|
+
## If TokenMizer is not running
|
|
25
|
+
|
|
26
|
+
Tell the user to start it first:
|
|
27
|
+
```bash
|
|
28
|
+
tokenmizer serve
|
|
29
|
+
# or
|
|
30
|
+
python3 -m tokenmizer.api.app
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
## Session ID rules
|
|
34
|
+
- Use lowercase slugs: `auth-service` not `Auth Service`
|
|
35
|
+
- Keep it short and meaningful
|
|
36
|
+
- Same ID across sessions for the same project
|
|
37
|
+
|
|
38
|
+
## Example output to show user
|
|
39
|
+
|
|
40
|
+
```
|
|
41
|
+
✅ Session 'auth-service' saved
|
|
42
|
+
|
|
43
|
+
Checkpoint: ckpt_a3f9b2
|
|
44
|
+
Nodes: 14 (6 tasks, 4 decisions, 3 files, 1 error)
|
|
45
|
+
Resume size: 247 tokens
|
|
46
|
+
|
|
47
|
+
Resume context:
|
|
48
|
+
Goal: Build FastAPI auth service with JWT
|
|
49
|
+
Done: Project setup | User model | Login endpoint | Fix 422
|
|
50
|
+
In progress: Refresh token rotation
|
|
51
|
+
Decided: PostgreSQL | bcrypt | Redis for tokens
|
|
52
|
+
Files: api/auth.py, api/models.py, config.py
|
|
53
|
+
Continue: Implement token refresh endpoint
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
If $ARGUMENTS is empty, ask: "What should I call this session? (e.g. my-project)"
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: resume
|
|
3
|
+
description: Load a previous session from TokenMizer graph memory. Returns a compact context block (100-600 tokens) covering goals, completed work, decisions, open tasks, and files. Inject this as system context to continue exactly where you left off. Use when user says "resume", "continue from last time", "load my project", "what did we do on X", or starts a session on a known project.
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
Load a previous session from TokenMizer graph memory.
|
|
7
|
+
|
|
8
|
+
## What to do
|
|
9
|
+
|
|
10
|
+
1. Get the session ID from $ARGUMENTS (or ask the user if not provided)
|
|
11
|
+
2. Determine the level from $ARGUMENTS:
|
|
12
|
+
- `critical` = ~100 tokens, only open blockers + key decisions
|
|
13
|
+
- `standard` = ~300 tokens, normal resume (default)
|
|
14
|
+
- `full` = ~600 tokens, everything including env, schemas, endpoints
|
|
15
|
+
|
|
16
|
+
3. Call the TokenMizer resume API:
|
|
17
|
+
|
|
18
|
+
```bash
|
|
19
|
+
SESSION_ID=$(echo "$ARGUMENTS" | awk '{print $1}')
|
|
20
|
+
LEVEL=$(echo "$ARGUMENTS" | awk '{print $2}')
|
|
21
|
+
LEVEL=${LEVEL:-standard}
|
|
22
|
+
|
|
23
|
+
curl -s "http://localhost:8000/api/resume/${SESSION_ID}?level=${LEVEL}"
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
4. Inject the returned `resume_context` as a system message at the top of the conversation — NOT as a user message.
|
|
27
|
+
|
|
28
|
+
5. Tell the user: "Loaded [X] tokens of context for '[session-id]'. Continuing from: [next_action]"
|
|
29
|
+
|
|
30
|
+
## Format for system injection
|
|
31
|
+
|
|
32
|
+
```
|
|
33
|
+
[TokenMizer — session: {session_id}]
|
|
34
|
+
{resume_context}
|
|
35
|
+
[End of session context — continue from here]
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
## If no checkpoint found
|
|
39
|
+
|
|
40
|
+
Tell the user: "No checkpoint found for '[session-id]'. Either the session hasn't been checkpointed yet, or TokenMizer isn't running."
|
|
41
|
+
|
|
42
|
+
Suggest: `tokenmizer checkpoint {session-id}` to save the current session first.
|
|
43
|
+
|
|
44
|
+
## Examples of $ARGUMENTS
|
|
45
|
+
|
|
46
|
+
- `auth-service` → load standard resume for auth-service
|
|
47
|
+
- `auth-service full` → load full 600-token resume
|
|
48
|
+
- `auth-service critical` → load critical 100-token resume
|
|
49
|
+
- (empty) → ask user which project to resume
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: stats
|
|
3
|
+
description: Show TokenMizer token savings stats — how many tokens saved today, this week, cache hit rate, and which layers are saving the most. Use when user asks about token usage, costs, savings, or "how much have we saved".
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
Show TokenMizer token savings analytics.
|
|
7
|
+
|
|
8
|
+
## What to do
|
|
9
|
+
|
|
10
|
+
```bash
|
|
11
|
+
curl -s http://localhost:8000/api/stats
|
|
12
|
+
```
|
|
13
|
+
|
|
14
|
+
Then also fetch cache stats:
|
|
15
|
+
```bash
|
|
16
|
+
curl -s http://localhost:8000/api/cache/stats
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
## Format the output clearly
|
|
20
|
+
|
|
21
|
+
Show:
|
|
22
|
+
- Tokens saved today and this week
|
|
23
|
+
- Cost saved in USD
|
|
24
|
+
- Cache hit rate
|
|
25
|
+
- Which layer saved the most (compression / cache / windowing / file extraction)
|
|
26
|
+
- Suggestions if savings are low
|
|
27
|
+
|
|
28
|
+
## Example output
|
|
29
|
+
|
|
30
|
+
```
|
|
31
|
+
TokenMizer Stats
|
|
32
|
+
─────────────────────────
|
|
33
|
+
Today: 12,450 tokens saved (34%) — $0.0373
|
|
34
|
+
Week: 87,200 tokens saved (31%) — $0.2616
|
|
35
|
+
|
|
36
|
+
By layer:
|
|
37
|
+
File extraction: 45,000 tokens
|
|
38
|
+
Semantic cache: 28,000 tokens
|
|
39
|
+
Smart window: 9,200 tokens
|
|
40
|
+
Compression: 3,800 tokens
|
|
41
|
+
Output trim: 1,200 tokens
|
|
42
|
+
|
|
43
|
+
Cache: 847 entries | 68% hit rate | 8% full
|
|
44
|
+
─────────────────────────
|
|
45
|
+
Dashboard: http://localhost:8000
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
## If nothing saved yet
|
|
49
|
+
|
|
50
|
+
Tell user to start using TokenMizer as their proxy:
|
|
51
|
+
```python
|
|
52
|
+
from openai import OpenAI
|
|
53
|
+
client = OpenAI(
|
|
54
|
+
api_key="your-key",
|
|
55
|
+
base_url="http://localhost:8000/v1"
|
|
56
|
+
)
|
|
57
|
+
```
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: Bug Report
|
|
3
|
+
about: Something is broken
|
|
4
|
+
title: '[BUG] '
|
|
5
|
+
labels: bug
|
|
6
|
+
---
|
|
7
|
+
|
|
8
|
+
**Python version:**
|
|
9
|
+
**Provider:**
|
|
10
|
+
**OS:**
|
|
11
|
+
|
|
12
|
+
**What happened:**
|
|
13
|
+
|
|
14
|
+
**Expected:**
|
|
15
|
+
|
|
16
|
+
**Reproduction:**
|
|
17
|
+
```python
|
|
18
|
+
# minimal code
|
|
19
|
+
```
|
|
20
|
+
|
|
21
|
+
**Logs:**
|
|
22
|
+
```
|
|
23
|
+
paste relevant logs
|
|
24
|
+
```
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: Extraction Miss
|
|
3
|
+
about: Graph memory missed a task, decision, or file from a real session
|
|
4
|
+
title: '[EXTRACTION] '
|
|
5
|
+
labels: extraction-quality
|
|
6
|
+
---
|
|
7
|
+
|
|
8
|
+
**Message where extraction should have happened:**
|
|
9
|
+
```
|
|
10
|
+
[assistant]: The login endpoint is now working...
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
**What should have been extracted:**
|
|
14
|
+
- Node type: task / decision / file
|
|
15
|
+
- Expected label: "..."
|
|
16
|
+
|
|
17
|
+
**What was actually extracted (if anything):**
|
|
18
|
+
|
|
19
|
+
**Session type:** coding / research / writing / other
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
## What this PR does
|
|
2
|
+
|
|
3
|
+
<!-- Brief description -->
|
|
4
|
+
|
|
5
|
+
## Type
|
|
6
|
+
- [ ] Bug fix
|
|
7
|
+
- [ ] Extraction improvement (graph_memory/)
|
|
8
|
+
- [ ] New provider
|
|
9
|
+
- [ ] Performance
|
|
10
|
+
- [ ] Documentation
|
|
11
|
+
|
|
12
|
+
## Tests
|
|
13
|
+
- [ ] `pytest tests/ -v` passes
|
|
14
|
+
- [ ] `ruff check tokenmizer/` clean
|
|
15
|
+
- [ ] Memory accuracy test added/updated (if extraction change)
|
|
16
|
+
|
|
17
|
+
## Checklist
|
|
18
|
+
- [ ] No raw dicts crossing layer boundaries (use DTOs)
|
|
19
|
+
- [ ] No `os.getenv()` outside `config/settings.py`
|
|
20
|
+
- [ ] External imports are lazy (inside functions, with try/except ImportError)
|
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
name: CI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [main, develop]
|
|
6
|
+
pull_request:
|
|
7
|
+
branches: [main]
|
|
8
|
+
|
|
9
|
+
jobs:
|
|
10
|
+
test:
|
|
11
|
+
runs-on: ubuntu-latest
|
|
12
|
+
strategy:
|
|
13
|
+
matrix:
|
|
14
|
+
python-version: ["3.10", "3.11", "3.12"]
|
|
15
|
+
|
|
16
|
+
steps:
|
|
17
|
+
- uses: actions/checkout@v4
|
|
18
|
+
|
|
19
|
+
- name: Set up Python ${{ matrix.python-version }}
|
|
20
|
+
uses: actions/setup-python@v5
|
|
21
|
+
with:
|
|
22
|
+
python-version: ${{ matrix.python-version }}
|
|
23
|
+
|
|
24
|
+
- name: Cache pip
|
|
25
|
+
uses: actions/cache@v4
|
|
26
|
+
with:
|
|
27
|
+
path: ~/.cache/pip
|
|
28
|
+
key: ${{ runner.os }}-pip-${{ hashFiles('pyproject.toml') }}
|
|
29
|
+
restore-keys: |
|
|
30
|
+
${{ runner.os }}-pip-
|
|
31
|
+
|
|
32
|
+
- name: Install dependencies
|
|
33
|
+
run: |
|
|
34
|
+
python -m pip install --upgrade pip
|
|
35
|
+
pip install -e ".[dev]"
|
|
36
|
+
|
|
37
|
+
- name: Lint (blocking)
|
|
38
|
+
run: ruff check tokenmizer/ tests/
|
|
39
|
+
|
|
40
|
+
# Coverage threshold lives in pyproject.toml [tool.coverage.report]
|
|
41
|
+
# fail_under — single source of truth, do NOT override it here.
|
|
42
|
+
- name: Run tests with coverage
|
|
43
|
+
run: |
|
|
44
|
+
pytest tests/ \
|
|
45
|
+
--cov=tokenmizer \
|
|
46
|
+
--cov-report=term-missing \
|
|
47
|
+
--cov-report=xml \
|
|
48
|
+
-v
|
|
49
|
+
|
|
50
|
+
- name: Upload coverage
|
|
51
|
+
uses: codecov/codecov-action@v4
|
|
52
|
+
if: matrix.python-version == '3.12'
|
|
53
|
+
with:
|
|
54
|
+
file: ./coverage.xml
|
|
55
|
+
continue-on-error: true
|
|
56
|
+
|
|
57
|
+
benchmark:
|
|
58
|
+
runs-on: ubuntu-latest
|
|
59
|
+
needs: test
|
|
60
|
+
if: github.ref == 'refs/heads/main'
|
|
61
|
+
|
|
62
|
+
steps:
|
|
63
|
+
- uses: actions/checkout@v4
|
|
64
|
+
|
|
65
|
+
- name: Set up Python
|
|
66
|
+
uses: actions/setup-python@v5
|
|
67
|
+
with:
|
|
68
|
+
python-version: "3.12"
|
|
69
|
+
|
|
70
|
+
- name: Install
|
|
71
|
+
run: pip install -e ".[dev]"
|
|
72
|
+
|
|
73
|
+
- name: Run checkpoint accuracy benchmark
|
|
74
|
+
run: python -m benchmarks.checkpoint_accuracy.runner
|
|
75
|
+
|
|
76
|
+
- name: Upload benchmark results
|
|
77
|
+
uses: actions/upload-artifact@v4
|
|
78
|
+
with:
|
|
79
|
+
name: benchmark-results
|
|
80
|
+
path: benchmark_results.json
|
|
81
|
+
|
|
82
|
+
docker:
|
|
83
|
+
runs-on: ubuntu-latest
|
|
84
|
+
needs: test
|
|
85
|
+
if: github.ref == 'refs/heads/main'
|
|
86
|
+
|
|
87
|
+
steps:
|
|
88
|
+
- uses: actions/checkout@v4
|
|
89
|
+
|
|
90
|
+
- name: Build Docker image
|
|
91
|
+
run: docker build -t tokenmizer:latest .
|
|
92
|
+
|
|
93
|
+
- name: Test Docker health
|
|
94
|
+
run: |
|
|
95
|
+
docker run -d --name tm-test \
|
|
96
|
+
-e TOKENMIZER_ANTHROPIC_API_KEY=test \
|
|
97
|
+
-p 8001:8000 \
|
|
98
|
+
tokenmizer:latest
|
|
99
|
+
sleep 8
|
|
100
|
+
curl -f http://localhost:8001/health
|
|
101
|
+
docker stop tm-test
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
name: Release to PyPI
|
|
2
|
+
|
|
3
|
+
# Publishes via PyPI Trusted Publishing (OIDC) — NO API token stored anywhere.
|
|
4
|
+
# One-time setup on pypi.org (must be done by the project owner):
|
|
5
|
+
# pypi.org → Your account → Publishing → Add a new pending publisher:
|
|
6
|
+
# PyPI project name: tokenmizer
|
|
7
|
+
# Owner: Shweta-Mishra-ai
|
|
8
|
+
# Repository: tokenmizer
|
|
9
|
+
# Workflow name: release.yml
|
|
10
|
+
# Environment: pypi
|
|
11
|
+
# Then create a GitHub Release (tag v0.2.4 etc.) and this workflow publishes.
|
|
12
|
+
|
|
13
|
+
on:
|
|
14
|
+
release:
|
|
15
|
+
types: [published]
|
|
16
|
+
|
|
17
|
+
jobs:
|
|
18
|
+
build:
|
|
19
|
+
runs-on: ubuntu-latest
|
|
20
|
+
steps:
|
|
21
|
+
- uses: actions/checkout@v4
|
|
22
|
+
|
|
23
|
+
- uses: actions/setup-python@v5
|
|
24
|
+
with:
|
|
25
|
+
python-version: "3.12"
|
|
26
|
+
|
|
27
|
+
- name: Run tests first — never publish a broken build
|
|
28
|
+
run: |
|
|
29
|
+
pip install -e ".[dev]"
|
|
30
|
+
pytest tests/ -q
|
|
31
|
+
ruff check tokenmizer/ tests/
|
|
32
|
+
|
|
33
|
+
- name: Build sdist + wheel
|
|
34
|
+
run: |
|
|
35
|
+
pip install build twine
|
|
36
|
+
python -m build
|
|
37
|
+
twine check dist/*
|
|
38
|
+
|
|
39
|
+
- uses: actions/upload-artifact@v4
|
|
40
|
+
with:
|
|
41
|
+
name: dist
|
|
42
|
+
path: dist/
|
|
43
|
+
|
|
44
|
+
publish:
|
|
45
|
+
needs: build
|
|
46
|
+
runs-on: ubuntu-latest
|
|
47
|
+
environment: pypi
|
|
48
|
+
permissions:
|
|
49
|
+
id-token: write # required for Trusted Publishing — no secrets used
|
|
50
|
+
steps:
|
|
51
|
+
- uses: actions/download-artifact@v4
|
|
52
|
+
with:
|
|
53
|
+
name: dist
|
|
54
|
+
path: dist/
|
|
55
|
+
|
|
56
|
+
- name: Publish to PyPI
|
|
57
|
+
uses: pypa/gh-action-pypi-publish@release/v1
|