tokenkeeper 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. tokenkeeper-0.1.0/.github/ISSUE_TEMPLATE/bug_report.yml +87 -0
  2. tokenkeeper-0.1.0/.github/ISSUE_TEMPLATE/feature_request.yml +36 -0
  3. tokenkeeper-0.1.0/.github/pull_request_template.md +11 -0
  4. tokenkeeper-0.1.0/.github/workflows/ci.yml +47 -0
  5. tokenkeeper-0.1.0/.github/workflows/publish.yml +57 -0
  6. tokenkeeper-0.1.0/.gitignore +47 -0
  7. tokenkeeper-0.1.0/.mcp.json +11 -0
  8. tokenkeeper-0.1.0/.mcp.json.example +11 -0
  9. tokenkeeper-0.1.0/.python-version +1 -0
  10. tokenkeeper-0.1.0/CHANGELOG.md +27 -0
  11. tokenkeeper-0.1.0/CONTRIBUTING.md +84 -0
  12. tokenkeeper-0.1.0/IMPLEMENTATION-GUIDE.md +393 -0
  13. tokenkeeper-0.1.0/LICENSE +21 -0
  14. tokenkeeper-0.1.0/PKG-INFO +268 -0
  15. tokenkeeper-0.1.0/QUICKSTART.md +354 -0
  16. tokenkeeper-0.1.0/README.md +235 -0
  17. tokenkeeper-0.1.0/main.py +16 -0
  18. tokenkeeper-0.1.0/pyproject.toml +66 -0
  19. tokenkeeper-0.1.0/scripts/__init__.py +1 -0
  20. tokenkeeper-0.1.0/scripts/health_check.py +303 -0
  21. tokenkeeper-0.1.0/scripts/validate_chromadb.py +372 -0
  22. tokenkeeper-0.1.0/scripts/validate_ollama.py +311 -0
  23. tokenkeeper-0.1.0/scripts/validate_timeout.py +329 -0
  24. tokenkeeper-0.1.0/src/tokenkeeper/__init__.py +3 -0
  25. tokenkeeper-0.1.0/src/tokenkeeper/__main__.py +12 -0
  26. tokenkeeper-0.1.0/src/tokenkeeper/bm25_index.py +203 -0
  27. tokenkeeper-0.1.0/src/tokenkeeper/config.py +252 -0
  28. tokenkeeper-0.1.0/src/tokenkeeper/discovery.py +174 -0
  29. tokenkeeper-0.1.0/src/tokenkeeper/embeddings.py +376 -0
  30. tokenkeeper-0.1.0/src/tokenkeeper/health.py +240 -0
  31. tokenkeeper-0.1.0/src/tokenkeeper/indexer.py +575 -0
  32. tokenkeeper-0.1.0/src/tokenkeeper/ingestion.py +1317 -0
  33. tokenkeeper-0.1.0/src/tokenkeeper/search.py +395 -0
  34. tokenkeeper-0.1.0/src/tokenkeeper/server.py +723 -0
  35. tokenkeeper-0.1.0/src/tokenkeeper/storage.py +291 -0
  36. tokenkeeper-0.1.0/src/tokenkeeper/types.py +87 -0
  37. tokenkeeper-0.1.0/src/tokenkeeper/watcher.py +264 -0
  38. tokenkeeper-0.1.0/tests/__init__.py +1 -0
  39. tokenkeeper-0.1.0/tests/test_acceptance.py +980 -0
  40. tokenkeeper-0.1.0/tests/test_agent_comparison.py +888 -0
  41. tokenkeeper-0.1.0/tests/test_bm25_index.py +198 -0
  42. tokenkeeper-0.1.0/tests/test_chromadb.py +278 -0
  43. tokenkeeper-0.1.0/tests/test_config.py +337 -0
  44. tokenkeeper-0.1.0/tests/test_discovery.py +487 -0
  45. tokenkeeper-0.1.0/tests/test_embeddings.py +409 -0
  46. tokenkeeper-0.1.0/tests/test_health.py +202 -0
  47. tokenkeeper-0.1.0/tests/test_health_check.py +281 -0
  48. tokenkeeper-0.1.0/tests/test_indexer.py +797 -0
  49. tokenkeeper-0.1.0/tests/test_ingestion.py +928 -0
  50. tokenkeeper-0.1.0/tests/test_integration_embedding.py +281 -0
  51. tokenkeeper-0.1.0/tests/test_integration_ingestion.py +278 -0
  52. tokenkeeper-0.1.0/tests/test_integration_mcp.py +664 -0
  53. tokenkeeper-0.1.0/tests/test_integration_search.py +449 -0
  54. tokenkeeper-0.1.0/tests/test_ollama.py +161 -0
  55. tokenkeeper-0.1.0/tests/test_practical_token_savings.py +536 -0
  56. tokenkeeper-0.1.0/tests/test_search.py +661 -0
  57. tokenkeeper-0.1.0/tests/test_server.py +417 -0
  58. tokenkeeper-0.1.0/tests/test_storage.py +292 -0
  59. tokenkeeper-0.1.0/tests/test_timeout.py +125 -0
  60. tokenkeeper-0.1.0/tests/test_token_benchmarks.py +1357 -0
  61. tokenkeeper-0.1.0/tests/test_watcher.py +389 -0
  62. tokenkeeper-0.1.0/uv.lock +2927 -0
@@ -0,0 +1,87 @@
1
+ name: Bug Report
2
+ description: Report a bug or unexpected behavior
3
+ labels: [bug]
4
+ body:
5
+ - type: textarea
6
+ id: description
7
+ attributes:
8
+ label: Description
9
+ description: What happened?
10
+ placeholder: A clear description of the bug.
11
+ validations:
12
+ required: true
13
+
14
+ - type: textarea
15
+ id: steps-to-reproduce
16
+ attributes:
17
+ label: Steps to Reproduce
18
+ description: How can we reproduce this?
19
+ placeholder: |
20
+ 1. Install tokenkeeper
21
+ 2. Run `tokenkeeper`
22
+ 3. ...
23
+ validations:
24
+ required: true
25
+
26
+ - type: textarea
27
+ id: expected-behavior
28
+ attributes:
29
+ label: Expected Behavior
30
+ description: What should have happened?
31
+ validations:
32
+ required: true
33
+
34
+ - type: textarea
35
+ id: actual-behavior
36
+ attributes:
37
+ label: Actual Behavior
38
+ description: What actually happened?
39
+ placeholder: Include error messages or stack traces if available.
40
+ validations:
41
+ required: true
42
+
43
+ - type: dropdown
44
+ id: os
45
+ attributes:
46
+ label: Operating System
47
+ options:
48
+ - Windows
49
+ - macOS
50
+ - Linux
51
+ validations:
52
+ required: true
53
+
54
+ - type: input
55
+ id: python-version
56
+ attributes:
57
+ label: Python Version
58
+ description: Output of `python --version`
59
+ placeholder: "3.12.0"
60
+ validations:
61
+ required: true
62
+
63
+ - type: input
64
+ id: tokenkeeper-version
65
+ attributes:
66
+ label: tokenkeeper Version
67
+ description: Output of `pip show tokenkeeper | grep Version`
68
+ placeholder: "0.1.0"
69
+ validations:
70
+ required: true
71
+
72
+ - type: input
73
+ id: ollama-version
74
+ attributes:
75
+ label: Ollama Version
76
+ description: Output of `ollama --version` (if applicable)
77
+ placeholder: "0.6.2"
78
+ validations:
79
+ required: false
80
+
81
+ - type: textarea
82
+ id: additional-context
83
+ attributes:
84
+ label: Additional Context
85
+ description: Logs, screenshots, configuration, or anything else that might help.
86
+ validations:
87
+ required: false
@@ -0,0 +1,36 @@
1
+ name: Feature Request
2
+ description: Suggest a new feature or enhancement
3
+ labels: [enhancement]
4
+ body:
5
+ - type: textarea
6
+ id: use-case
7
+ attributes:
8
+ label: Use Case
9
+ description: What problem does this solve? Describe the situation where this feature would be useful.
10
+ placeholder: I want to be able to ...
11
+ validations:
12
+ required: true
13
+
14
+ - type: textarea
15
+ id: proposed-solution
16
+ attributes:
17
+ label: Proposed Solution
18
+ description: How should it work? Describe your ideal implementation.
19
+ validations:
20
+ required: true
21
+
22
+ - type: textarea
23
+ id: alternatives-considered
24
+ attributes:
25
+ label: Alternatives Considered
26
+ description: Other approaches you have thought about and why they are less ideal.
27
+ validations:
28
+ required: false
29
+
30
+ - type: textarea
31
+ id: additional-context
32
+ attributes:
33
+ label: Additional Context
34
+ description: Mockups, examples, links, or anything else that helps illustrate the request.
35
+ validations:
36
+ required: false
@@ -0,0 +1,11 @@
1
+ ## Summary
2
+
3
+ <!-- What does this PR do and why? -->
4
+
5
+ ## Checklist
6
+
7
+ - [ ] Tests pass (`uv run pytest tests/ -m "not ollama"`)
8
+ - [ ] New tests added for new functionality
9
+ - [ ] Documentation updated (if applicable)
10
+ - [ ] CHANGELOG.md updated under "Unreleased"
11
+ - [ ] No breaking changes (or documented in PR description)
@@ -0,0 +1,47 @@
1
+ name: CI
2
+
3
+ on:
4
+ push:
5
+ branches: [master]
6
+ pull_request:
7
+ branches: [master]
8
+
9
+ permissions:
10
+ contents: read
11
+
12
+ jobs:
13
+ test:
14
+ runs-on: ${{ matrix.os }}
15
+ strategy:
16
+ fail-fast: false
17
+ matrix:
18
+ os: [ubuntu-latest, windows-latest]
19
+ steps:
20
+ - uses: actions/checkout@v4
21
+
22
+ - name: Install uv
23
+ uses: astral-sh/setup-uv@v7
24
+ with:
25
+ enable-cache: true
26
+
27
+ - name: Install Python
28
+ run: uv python install
29
+
30
+ - name: Install dependencies
31
+ run: uv sync --locked --dev
32
+
33
+ - name: Run tests
34
+ if: matrix.os != 'ubuntu-latest'
35
+ run: uv run pytest tests/ -m "not ollama" -v --tb=short
36
+
37
+ - name: Run tests with coverage
38
+ if: matrix.os == 'ubuntu-latest'
39
+ run: uv run pytest tests/ -m "not ollama" -v --tb=short --cov=src/tokenkeeper --cov-report=xml --cov-report=term-missing
40
+
41
+ - name: Upload coverage to Codecov
42
+ if: matrix.os == 'ubuntu-latest'
43
+ uses: codecov/codecov-action@v5
44
+ with:
45
+ files: ./coverage.xml
46
+ token: ${{ secrets.CODECOV_TOKEN }}
47
+ fail_ci_if_error: false
@@ -0,0 +1,57 @@
1
+ name: Publish to PyPI
2
+
3
+ on:
4
+ push:
5
+ tags:
6
+ - 'v*'
7
+
8
+ permissions:
9
+ contents: read
10
+ id-token: write
11
+
12
+ jobs:
13
+ build:
14
+ runs-on: ubuntu-latest
15
+ steps:
16
+ - uses: actions/checkout@v4
17
+
18
+ - name: Install uv
19
+ uses: astral-sh/setup-uv@v7
20
+ with:
21
+ enable-cache: true
22
+
23
+ - name: Install Python
24
+ run: uv python install
25
+
26
+ - name: Install dependencies
27
+ run: uv sync --locked --dev
28
+
29
+ - name: Run tests
30
+ run: uv run pytest tests/ -m "not ollama" -v --tb=short
31
+
32
+ - name: Build package
33
+ run: uv build
34
+
35
+ - name: Upload build artifacts
36
+ uses: actions/upload-artifact@v4
37
+ with:
38
+ name: dist
39
+ path: dist/
40
+
41
+ publish:
42
+ needs: build
43
+ runs-on: ubuntu-latest
44
+ environment: pypi
45
+ permissions:
46
+ id-token: write
47
+ steps:
48
+ - name: Download build artifacts
49
+ uses: actions/download-artifact@v4
50
+ with:
51
+ name: dist
52
+ path: dist/
53
+
54
+ - name: Publish to PyPI
55
+ uses: pypa/gh-action-pypi-publish@release/v1
56
+ with:
57
+ packages-dir: dist/
@@ -0,0 +1,47 @@
1
+ # Virtual environment
2
+ .venv/
3
+
4
+ # Python
5
+ __pycache__/
6
+ *.py[cod]
7
+ *$py.class
8
+ *.egg-info/
9
+ dist/
10
+ build/
11
+
12
+ # IDE
13
+ .vscode/
14
+ .idea/
15
+
16
+ # OS
17
+ .DS_Store
18
+ Thumbs.db
19
+
20
+ # Environment
21
+ .env
22
+ .env.local
23
+
24
+ # RAG data directory (per-project, generated at runtime)
25
+ .rag/
26
+
27
+ # Windows artifacts
28
+ NUL
29
+
30
+ # Acceptance test cache (cloned repos)
31
+ .acceptance-cache/
32
+
33
+ # Internal planning docs (not for distribution)
34
+ .planning/
35
+
36
+ # Test artifacts
37
+ _seg_test/
38
+ .pytest_cache/
39
+ .coverage
40
+ coverage.xml
41
+ htmlcov/
42
+
43
+ # Claude Code project settings (personal paths)
44
+ .claude/
45
+
46
+ # Old git history backup
47
+ .git.bak/
@@ -0,0 +1,11 @@
1
+ {
2
+ "mcpServers": {
3
+ "tokenkeeper": {
4
+ "command": "/path/to/TokenKeeper/.venv/bin/python",
5
+ "args": ["-m", "tokenkeeper"],
6
+ "env": {
7
+ "TOKENKEEPER_PROJECT": "${workspaceFolder}"
8
+ }
9
+ }
10
+ }
11
+ }
@@ -0,0 +1,11 @@
1
+ {
2
+ "mcpServers": {
3
+ "tokenkeeper": {
4
+ "command": "/path/to/TokenKeeper/.venv/bin/python",
5
+ "args": ["-m", "tokenkeeper"],
6
+ "env": {
7
+ "TOKENKEEPER_PROJECT": "${workspaceFolder}"
8
+ }
9
+ }
10
+ }
11
+ }
@@ -0,0 +1 @@
1
+ 3.12
@@ -0,0 +1,27 @@
1
+ # Changelog
2
+
3
+ All notable changes to this project will be documented in this file.
4
+
5
+ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
6
+ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
+
8
+ ## [Unreleased]
9
+
10
+ ## [0.1.0] - 2026-02-20
11
+
12
+ ### Added
13
+
14
+ - Initial release of TokenKeeper (tokenkeeper)
15
+ - Hybrid search: semantic (vector) + keyword (BM25) with Reciprocal Rank Fusion
16
+ - Local-first architecture: Ollama embeddings, ChromaDB persistent storage
17
+ - FastMCP server with 4 tools: search_knowledge, indexing_status, reindex_documents, get_index_stats
18
+ - File watcher for automatic reindexing on changes
19
+ - Per-project isolation via .rag/ directory
20
+ - Content modes: docs, code, both
21
+ - Heading-aware markdown chunking with structure protection
22
+ - Language-aware code chunking (Python AST, TypeScript/JavaScript regex)
23
+ - Google Gemini embedding provider (optional cloud alternative)
24
+ - 445 tests passing
25
+
26
+ [Unreleased]: https://github.com/admin-sosys/TokenKeeper/compare/v0.1.0...HEAD
27
+ [0.1.0]: https://github.com/admin-sosys/TokenKeeper/releases/tag/v0.1.0
@@ -0,0 +1,84 @@
1
+ # Contributing to TokenKeeper
2
+
3
+ Thank you for your interest in contributing to TokenKeeper! This guide will help you get started.
4
+
5
+ ## Development Setup
6
+
7
+ ### Prerequisites
8
+
9
+ - **Python 3.10+** (up to 3.13)
10
+ - **[Ollama](https://ollama.com/)** installed and running (for embedding tests)
11
+ - **[uv](https://docs.astral.sh/uv/)** package manager
12
+
13
+ ### Clone and Install
14
+
15
+ ```bash
16
+ git clone https://github.com/admin-sosys/TokenKeeper.git
17
+ cd TokenKeeper
18
+ uv sync --dev
19
+ ```
20
+
21
+ ### Pull the Embedding Model
22
+
23
+ ```bash
24
+ ollama pull nomic-embed-text
25
+ ```
26
+
27
+ This downloads the `nomic-embed-text` model used for local embeddings (768 dimensions, runs on CPU).
28
+
29
+ ## Running Tests
30
+
31
+ Run the full test suite:
32
+
33
+ ```bash
34
+ uv run pytest tests/ -v
35
+ ```
36
+
37
+ Run tests **without Ollama** (useful if Ollama is not installed):
38
+
39
+ ```bash
40
+ uv run pytest tests/ -m "not ollama" -v
41
+ ```
42
+
43
+ Run tests with **coverage reporting**:
44
+
45
+ ```bash
46
+ uv run pytest tests/ -m "not ollama" --cov=src/tokenkeeper --cov-report=term-missing
47
+ ```
48
+
49
+ ## Code Style
50
+
51
+ - **Follow existing patterns** in the codebase. When in doubt, look at how similar code is structured nearby.
52
+ - **Type hints** are required for all public functions and methods.
53
+ - **Pydantic** models are used for configuration and data validation. Use them for new config or structured data types.
54
+ - Keep functions focused and small. Prefer composition over inheritance.
55
+
56
+ ## Security
57
+
58
+ **Never commit secrets, tokens, API keys, or credentials.** This repository has GitHub secret scanning and push protection enabled — pushes containing detected secrets will be blocked automatically.
59
+
60
+ - Store all secrets in `.env` (which is gitignored)
61
+ - Use `${{ secrets.NAME }}` for GitHub Actions workflows
62
+ - If you accidentally commit a secret, revoke it immediately and notify the maintainers
63
+
64
+ ## Pull Request Process
65
+
66
+ 1. **Fork** the repository and create a feature branch from `master`.
67
+ 2. **Write tests** for any new functionality. We aim for 80%+ test coverage.
68
+ 3. **Ensure all tests pass** before submitting:
69
+ ```bash
70
+ uv run pytest tests/ -m "not ollama" -v
71
+ ```
72
+ 4. **Update `CHANGELOG.md`** under the "Unreleased" section with a brief description of your changes.
73
+ 5. **Submit a pull request** with a clear description of what you changed and why.
74
+
75
+ ## Reporting Issues
76
+
77
+ We use GitHub Issues to track bugs and feature requests.
78
+
79
+ - **Found a bug?** Use the [Bug Report](https://github.com/admin-sosys/TokenKeeper/issues/new?template=bug_report.yml) template. Include steps to reproduce, expected vs. actual behavior, and your environment details.
80
+ - **Have an idea?** Use the [Feature Request](https://github.com/admin-sosys/TokenKeeper/issues/new?template=feature_request.yml) template. Describe the use case, your proposed solution, and any alternatives you considered.
81
+
82
+ ## License
83
+
84
+ By contributing, you agree that your contributions will be licensed under the [MIT License](LICENSE).