tokenkeeper 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tokenkeeper-0.1.0/.github/ISSUE_TEMPLATE/bug_report.yml +87 -0
- tokenkeeper-0.1.0/.github/ISSUE_TEMPLATE/feature_request.yml +36 -0
- tokenkeeper-0.1.0/.github/pull_request_template.md +11 -0
- tokenkeeper-0.1.0/.github/workflows/ci.yml +47 -0
- tokenkeeper-0.1.0/.github/workflows/publish.yml +57 -0
- tokenkeeper-0.1.0/.gitignore +47 -0
- tokenkeeper-0.1.0/.mcp.json +11 -0
- tokenkeeper-0.1.0/.mcp.json.example +11 -0
- tokenkeeper-0.1.0/.python-version +1 -0
- tokenkeeper-0.1.0/CHANGELOG.md +27 -0
- tokenkeeper-0.1.0/CONTRIBUTING.md +84 -0
- tokenkeeper-0.1.0/IMPLEMENTATION-GUIDE.md +393 -0
- tokenkeeper-0.1.0/LICENSE +21 -0
- tokenkeeper-0.1.0/PKG-INFO +268 -0
- tokenkeeper-0.1.0/QUICKSTART.md +354 -0
- tokenkeeper-0.1.0/README.md +235 -0
- tokenkeeper-0.1.0/main.py +16 -0
- tokenkeeper-0.1.0/pyproject.toml +66 -0
- tokenkeeper-0.1.0/scripts/__init__.py +1 -0
- tokenkeeper-0.1.0/scripts/health_check.py +303 -0
- tokenkeeper-0.1.0/scripts/validate_chromadb.py +372 -0
- tokenkeeper-0.1.0/scripts/validate_ollama.py +311 -0
- tokenkeeper-0.1.0/scripts/validate_timeout.py +329 -0
- tokenkeeper-0.1.0/src/tokenkeeper/__init__.py +3 -0
- tokenkeeper-0.1.0/src/tokenkeeper/__main__.py +12 -0
- tokenkeeper-0.1.0/src/tokenkeeper/bm25_index.py +203 -0
- tokenkeeper-0.1.0/src/tokenkeeper/config.py +252 -0
- tokenkeeper-0.1.0/src/tokenkeeper/discovery.py +174 -0
- tokenkeeper-0.1.0/src/tokenkeeper/embeddings.py +376 -0
- tokenkeeper-0.1.0/src/tokenkeeper/health.py +240 -0
- tokenkeeper-0.1.0/src/tokenkeeper/indexer.py +575 -0
- tokenkeeper-0.1.0/src/tokenkeeper/ingestion.py +1317 -0
- tokenkeeper-0.1.0/src/tokenkeeper/search.py +395 -0
- tokenkeeper-0.1.0/src/tokenkeeper/server.py +723 -0
- tokenkeeper-0.1.0/src/tokenkeeper/storage.py +291 -0
- tokenkeeper-0.1.0/src/tokenkeeper/types.py +87 -0
- tokenkeeper-0.1.0/src/tokenkeeper/watcher.py +264 -0
- tokenkeeper-0.1.0/tests/__init__.py +1 -0
- tokenkeeper-0.1.0/tests/test_acceptance.py +980 -0
- tokenkeeper-0.1.0/tests/test_agent_comparison.py +888 -0
- tokenkeeper-0.1.0/tests/test_bm25_index.py +198 -0
- tokenkeeper-0.1.0/tests/test_chromadb.py +278 -0
- tokenkeeper-0.1.0/tests/test_config.py +337 -0
- tokenkeeper-0.1.0/tests/test_discovery.py +487 -0
- tokenkeeper-0.1.0/tests/test_embeddings.py +409 -0
- tokenkeeper-0.1.0/tests/test_health.py +202 -0
- tokenkeeper-0.1.0/tests/test_health_check.py +281 -0
- tokenkeeper-0.1.0/tests/test_indexer.py +797 -0
- tokenkeeper-0.1.0/tests/test_ingestion.py +928 -0
- tokenkeeper-0.1.0/tests/test_integration_embedding.py +281 -0
- tokenkeeper-0.1.0/tests/test_integration_ingestion.py +278 -0
- tokenkeeper-0.1.0/tests/test_integration_mcp.py +664 -0
- tokenkeeper-0.1.0/tests/test_integration_search.py +449 -0
- tokenkeeper-0.1.0/tests/test_ollama.py +161 -0
- tokenkeeper-0.1.0/tests/test_practical_token_savings.py +536 -0
- tokenkeeper-0.1.0/tests/test_search.py +661 -0
- tokenkeeper-0.1.0/tests/test_server.py +417 -0
- tokenkeeper-0.1.0/tests/test_storage.py +292 -0
- tokenkeeper-0.1.0/tests/test_timeout.py +125 -0
- tokenkeeper-0.1.0/tests/test_token_benchmarks.py +1357 -0
- tokenkeeper-0.1.0/tests/test_watcher.py +389 -0
- tokenkeeper-0.1.0/uv.lock +2927 -0
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
name: Bug Report
|
|
2
|
+
description: Report a bug or unexpected behavior
|
|
3
|
+
labels: [bug]
|
|
4
|
+
body:
|
|
5
|
+
- type: textarea
|
|
6
|
+
id: description
|
|
7
|
+
attributes:
|
|
8
|
+
label: Description
|
|
9
|
+
description: What happened?
|
|
10
|
+
placeholder: A clear description of the bug.
|
|
11
|
+
validations:
|
|
12
|
+
required: true
|
|
13
|
+
|
|
14
|
+
- type: textarea
|
|
15
|
+
id: steps-to-reproduce
|
|
16
|
+
attributes:
|
|
17
|
+
label: Steps to Reproduce
|
|
18
|
+
description: How can we reproduce this?
|
|
19
|
+
placeholder: |
|
|
20
|
+
1. Install tokenkeeper
|
|
21
|
+
2. Run `tokenkeeper`
|
|
22
|
+
3. ...
|
|
23
|
+
validations:
|
|
24
|
+
required: true
|
|
25
|
+
|
|
26
|
+
- type: textarea
|
|
27
|
+
id: expected-behavior
|
|
28
|
+
attributes:
|
|
29
|
+
label: Expected Behavior
|
|
30
|
+
description: What should have happened?
|
|
31
|
+
validations:
|
|
32
|
+
required: true
|
|
33
|
+
|
|
34
|
+
- type: textarea
|
|
35
|
+
id: actual-behavior
|
|
36
|
+
attributes:
|
|
37
|
+
label: Actual Behavior
|
|
38
|
+
description: What actually happened?
|
|
39
|
+
placeholder: Include error messages or stack traces if available.
|
|
40
|
+
validations:
|
|
41
|
+
required: true
|
|
42
|
+
|
|
43
|
+
- type: dropdown
|
|
44
|
+
id: os
|
|
45
|
+
attributes:
|
|
46
|
+
label: Operating System
|
|
47
|
+
options:
|
|
48
|
+
- Windows
|
|
49
|
+
- macOS
|
|
50
|
+
- Linux
|
|
51
|
+
validations:
|
|
52
|
+
required: true
|
|
53
|
+
|
|
54
|
+
- type: input
|
|
55
|
+
id: python-version
|
|
56
|
+
attributes:
|
|
57
|
+
label: Python Version
|
|
58
|
+
description: Output of `python --version`
|
|
59
|
+
placeholder: "3.12.0"
|
|
60
|
+
validations:
|
|
61
|
+
required: true
|
|
62
|
+
|
|
63
|
+
- type: input
|
|
64
|
+
id: tokenkeeper-version
|
|
65
|
+
attributes:
|
|
66
|
+
label: tokenkeeper Version
|
|
67
|
+
description: Output of `pip show tokenkeeper | grep Version`
|
|
68
|
+
placeholder: "0.1.0"
|
|
69
|
+
validations:
|
|
70
|
+
required: true
|
|
71
|
+
|
|
72
|
+
- type: input
|
|
73
|
+
id: ollama-version
|
|
74
|
+
attributes:
|
|
75
|
+
label: Ollama Version
|
|
76
|
+
description: Output of `ollama --version` (if applicable)
|
|
77
|
+
placeholder: "0.6.2"
|
|
78
|
+
validations:
|
|
79
|
+
required: false
|
|
80
|
+
|
|
81
|
+
- type: textarea
|
|
82
|
+
id: additional-context
|
|
83
|
+
attributes:
|
|
84
|
+
label: Additional Context
|
|
85
|
+
description: Logs, screenshots, configuration, or anything else that might help.
|
|
86
|
+
validations:
|
|
87
|
+
required: false
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
name: Feature Request
|
|
2
|
+
description: Suggest a new feature or enhancement
|
|
3
|
+
labels: [enhancement]
|
|
4
|
+
body:
|
|
5
|
+
- type: textarea
|
|
6
|
+
id: use-case
|
|
7
|
+
attributes:
|
|
8
|
+
label: Use Case
|
|
9
|
+
description: What problem does this solve? Describe the situation where this feature would be useful.
|
|
10
|
+
placeholder: I want to be able to ...
|
|
11
|
+
validations:
|
|
12
|
+
required: true
|
|
13
|
+
|
|
14
|
+
- type: textarea
|
|
15
|
+
id: proposed-solution
|
|
16
|
+
attributes:
|
|
17
|
+
label: Proposed Solution
|
|
18
|
+
description: How should it work? Describe your ideal implementation.
|
|
19
|
+
validations:
|
|
20
|
+
required: true
|
|
21
|
+
|
|
22
|
+
- type: textarea
|
|
23
|
+
id: alternatives-considered
|
|
24
|
+
attributes:
|
|
25
|
+
label: Alternatives Considered
|
|
26
|
+
description: Other approaches you have thought about and why they are less ideal.
|
|
27
|
+
validations:
|
|
28
|
+
required: false
|
|
29
|
+
|
|
30
|
+
- type: textarea
|
|
31
|
+
id: additional-context
|
|
32
|
+
attributes:
|
|
33
|
+
label: Additional Context
|
|
34
|
+
description: Mockups, examples, links, or anything else that helps illustrate the request.
|
|
35
|
+
validations:
|
|
36
|
+
required: false
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
## Summary
|
|
2
|
+
|
|
3
|
+
<!-- What does this PR do and why? -->
|
|
4
|
+
|
|
5
|
+
## Checklist
|
|
6
|
+
|
|
7
|
+
- [ ] Tests pass (`uv run pytest tests/ -m "not ollama"`)
|
|
8
|
+
- [ ] New tests added for new functionality
|
|
9
|
+
- [ ] Documentation updated (if applicable)
|
|
10
|
+
- [ ] CHANGELOG.md updated under "Unreleased"
|
|
11
|
+
- [ ] No breaking changes (or documented in PR description)
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
name: CI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [master]
|
|
6
|
+
pull_request:
|
|
7
|
+
branches: [master]
|
|
8
|
+
|
|
9
|
+
permissions:
|
|
10
|
+
contents: read
|
|
11
|
+
|
|
12
|
+
jobs:
|
|
13
|
+
test:
|
|
14
|
+
runs-on: ${{ matrix.os }}
|
|
15
|
+
strategy:
|
|
16
|
+
fail-fast: false
|
|
17
|
+
matrix:
|
|
18
|
+
os: [ubuntu-latest, windows-latest]
|
|
19
|
+
steps:
|
|
20
|
+
- uses: actions/checkout@v4
|
|
21
|
+
|
|
22
|
+
- name: Install uv
|
|
23
|
+
uses: astral-sh/setup-uv@v7
|
|
24
|
+
with:
|
|
25
|
+
enable-cache: true
|
|
26
|
+
|
|
27
|
+
- name: Install Python
|
|
28
|
+
run: uv python install
|
|
29
|
+
|
|
30
|
+
- name: Install dependencies
|
|
31
|
+
run: uv sync --locked --dev
|
|
32
|
+
|
|
33
|
+
- name: Run tests
|
|
34
|
+
if: matrix.os != 'ubuntu-latest'
|
|
35
|
+
run: uv run pytest tests/ -m "not ollama" -v --tb=short
|
|
36
|
+
|
|
37
|
+
- name: Run tests with coverage
|
|
38
|
+
if: matrix.os == 'ubuntu-latest'
|
|
39
|
+
run: uv run pytest tests/ -m "not ollama" -v --tb=short --cov=src/tokenkeeper --cov-report=xml --cov-report=term-missing
|
|
40
|
+
|
|
41
|
+
- name: Upload coverage to Codecov
|
|
42
|
+
if: matrix.os == 'ubuntu-latest'
|
|
43
|
+
uses: codecov/codecov-action@v5
|
|
44
|
+
with:
|
|
45
|
+
files: ./coverage.xml
|
|
46
|
+
token: ${{ secrets.CODECOV_TOKEN }}
|
|
47
|
+
fail_ci_if_error: false
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
name: Publish to PyPI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
tags:
|
|
6
|
+
- 'v*'
|
|
7
|
+
|
|
8
|
+
permissions:
|
|
9
|
+
contents: read
|
|
10
|
+
id-token: write
|
|
11
|
+
|
|
12
|
+
jobs:
|
|
13
|
+
build:
|
|
14
|
+
runs-on: ubuntu-latest
|
|
15
|
+
steps:
|
|
16
|
+
- uses: actions/checkout@v4
|
|
17
|
+
|
|
18
|
+
- name: Install uv
|
|
19
|
+
uses: astral-sh/setup-uv@v7
|
|
20
|
+
with:
|
|
21
|
+
enable-cache: true
|
|
22
|
+
|
|
23
|
+
- name: Install Python
|
|
24
|
+
run: uv python install
|
|
25
|
+
|
|
26
|
+
- name: Install dependencies
|
|
27
|
+
run: uv sync --locked --dev
|
|
28
|
+
|
|
29
|
+
- name: Run tests
|
|
30
|
+
run: uv run pytest tests/ -m "not ollama" -v --tb=short
|
|
31
|
+
|
|
32
|
+
- name: Build package
|
|
33
|
+
run: uv build
|
|
34
|
+
|
|
35
|
+
- name: Upload build artifacts
|
|
36
|
+
uses: actions/upload-artifact@v4
|
|
37
|
+
with:
|
|
38
|
+
name: dist
|
|
39
|
+
path: dist/
|
|
40
|
+
|
|
41
|
+
publish:
|
|
42
|
+
needs: build
|
|
43
|
+
runs-on: ubuntu-latest
|
|
44
|
+
environment: pypi
|
|
45
|
+
permissions:
|
|
46
|
+
id-token: write
|
|
47
|
+
steps:
|
|
48
|
+
- name: Download build artifacts
|
|
49
|
+
uses: actions/download-artifact@v4
|
|
50
|
+
with:
|
|
51
|
+
name: dist
|
|
52
|
+
path: dist/
|
|
53
|
+
|
|
54
|
+
- name: Publish to PyPI
|
|
55
|
+
uses: pypa/gh-action-pypi-publish@release/v1
|
|
56
|
+
with:
|
|
57
|
+
packages-dir: dist/
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
# Virtual environment
|
|
2
|
+
.venv/
|
|
3
|
+
|
|
4
|
+
# Python
|
|
5
|
+
__pycache__/
|
|
6
|
+
*.py[cod]
|
|
7
|
+
*$py.class
|
|
8
|
+
*.egg-info/
|
|
9
|
+
dist/
|
|
10
|
+
build/
|
|
11
|
+
|
|
12
|
+
# IDE
|
|
13
|
+
.vscode/
|
|
14
|
+
.idea/
|
|
15
|
+
|
|
16
|
+
# OS
|
|
17
|
+
.DS_Store
|
|
18
|
+
Thumbs.db
|
|
19
|
+
|
|
20
|
+
# Environment
|
|
21
|
+
.env
|
|
22
|
+
.env.local
|
|
23
|
+
|
|
24
|
+
# RAG data directory (per-project, generated at runtime)
|
|
25
|
+
.rag/
|
|
26
|
+
|
|
27
|
+
# Windows artifacts
|
|
28
|
+
NUL
|
|
29
|
+
|
|
30
|
+
# Acceptance test cache (cloned repos)
|
|
31
|
+
.acceptance-cache/
|
|
32
|
+
|
|
33
|
+
# Internal planning docs (not for distribution)
|
|
34
|
+
.planning/
|
|
35
|
+
|
|
36
|
+
# Test artifacts
|
|
37
|
+
_seg_test/
|
|
38
|
+
.pytest_cache/
|
|
39
|
+
.coverage
|
|
40
|
+
coverage.xml
|
|
41
|
+
htmlcov/
|
|
42
|
+
|
|
43
|
+
# Claude Code project settings (personal paths)
|
|
44
|
+
.claude/
|
|
45
|
+
|
|
46
|
+
# Old git history backup
|
|
47
|
+
.git.bak/
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
3.12
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
All notable changes to this project will be documented in this file.
|
|
4
|
+
|
|
5
|
+
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
|
|
6
|
+
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
7
|
+
|
|
8
|
+
## [Unreleased]
|
|
9
|
+
|
|
10
|
+
## [0.1.0] - 2026-02-20
|
|
11
|
+
|
|
12
|
+
### Added
|
|
13
|
+
|
|
14
|
+
- Initial release of TokenKeeper (tokenkeeper)
|
|
15
|
+
- Hybrid search: semantic (vector) + keyword (BM25) with Reciprocal Rank Fusion
|
|
16
|
+
- Local-first architecture: Ollama embeddings, ChromaDB persistent storage
|
|
17
|
+
- FastMCP server with 4 tools: search_knowledge, indexing_status, reindex_documents, get_index_stats
|
|
18
|
+
- File watcher for automatic reindexing on changes
|
|
19
|
+
- Per-project isolation via .rag/ directory
|
|
20
|
+
- Content modes: docs, code, both
|
|
21
|
+
- Heading-aware markdown chunking with structure protection
|
|
22
|
+
- Language-aware code chunking (Python AST, TypeScript/JavaScript regex)
|
|
23
|
+
- Google Gemini embedding provider (optional cloud alternative)
|
|
24
|
+
- 445 tests passing
|
|
25
|
+
|
|
26
|
+
[Unreleased]: https://github.com/admin-sosys/TokenKeeper/compare/v0.1.0...HEAD
|
|
27
|
+
[0.1.0]: https://github.com/admin-sosys/TokenKeeper/releases/tag/v0.1.0
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
# Contributing to TokenKeeper
|
|
2
|
+
|
|
3
|
+
Thank you for your interest in contributing to TokenKeeper! This guide will help you get started.
|
|
4
|
+
|
|
5
|
+
## Development Setup
|
|
6
|
+
|
|
7
|
+
### Prerequisites
|
|
8
|
+
|
|
9
|
+
- **Python 3.10+** (up to 3.13)
|
|
10
|
+
- **[Ollama](https://ollama.com/)** installed and running (for embedding tests)
|
|
11
|
+
- **[uv](https://docs.astral.sh/uv/)** package manager
|
|
12
|
+
|
|
13
|
+
### Clone and Install
|
|
14
|
+
|
|
15
|
+
```bash
|
|
16
|
+
git clone https://github.com/admin-sosys/TokenKeeper.git
|
|
17
|
+
cd TokenKeeper
|
|
18
|
+
uv sync --dev
|
|
19
|
+
```
|
|
20
|
+
|
|
21
|
+
### Pull the Embedding Model
|
|
22
|
+
|
|
23
|
+
```bash
|
|
24
|
+
ollama pull nomic-embed-text
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
This downloads the `nomic-embed-text` model used for local embeddings (768 dimensions, runs on CPU).
|
|
28
|
+
|
|
29
|
+
## Running Tests
|
|
30
|
+
|
|
31
|
+
Run the full test suite:
|
|
32
|
+
|
|
33
|
+
```bash
|
|
34
|
+
uv run pytest tests/ -v
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
Run tests **without Ollama** (useful if Ollama is not installed):
|
|
38
|
+
|
|
39
|
+
```bash
|
|
40
|
+
uv run pytest tests/ -m "not ollama" -v
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
Run tests with **coverage reporting**:
|
|
44
|
+
|
|
45
|
+
```bash
|
|
46
|
+
uv run pytest tests/ -m "not ollama" --cov=src/tokenkeeper --cov-report=term-missing
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
## Code Style
|
|
50
|
+
|
|
51
|
+
- **Follow existing patterns** in the codebase. When in doubt, look at how similar code is structured nearby.
|
|
52
|
+
- **Type hints** are required for all public functions and methods.
|
|
53
|
+
- **Pydantic** models are used for configuration and data validation. Use them for new config or structured data types.
|
|
54
|
+
- Keep functions focused and small. Prefer composition over inheritance.
|
|
55
|
+
|
|
56
|
+
## Security
|
|
57
|
+
|
|
58
|
+
**Never commit secrets, tokens, API keys, or credentials.** This repository has GitHub secret scanning and push protection enabled — pushes containing detected secrets will be blocked automatically.
|
|
59
|
+
|
|
60
|
+
- Store all secrets in `.env` (which is gitignored)
|
|
61
|
+
- Use `${{ secrets.NAME }}` for GitHub Actions workflows
|
|
62
|
+
- If you accidentally commit a secret, revoke it immediately and notify the maintainers
|
|
63
|
+
|
|
64
|
+
## Pull Request Process
|
|
65
|
+
|
|
66
|
+
1. **Fork** the repository and create a feature branch from `master`.
|
|
67
|
+
2. **Write tests** for any new functionality. We aim for 80%+ test coverage.
|
|
68
|
+
3. **Ensure all tests pass** before submitting:
|
|
69
|
+
```bash
|
|
70
|
+
uv run pytest tests/ -m "not ollama" -v
|
|
71
|
+
```
|
|
72
|
+
4. **Update `CHANGELOG.md`** under the "Unreleased" section with a brief description of your changes.
|
|
73
|
+
5. **Submit a pull request** with a clear description of what you changed and why.
|
|
74
|
+
|
|
75
|
+
## Reporting Issues
|
|
76
|
+
|
|
77
|
+
We use GitHub Issues to track bugs and feature requests.
|
|
78
|
+
|
|
79
|
+
- **Found a bug?** Use the [Bug Report](https://github.com/admin-sosys/TokenKeeper/issues/new?template=bug_report.yml) template. Include steps to reproduce, expected vs. actual behavior, and your environment details.
|
|
80
|
+
- **Have an idea?** Use the [Feature Request](https://github.com/admin-sosys/TokenKeeper/issues/new?template=feature_request.yml) template. Describe the use case, your proposed solution, and any alternatives you considered.
|
|
81
|
+
|
|
82
|
+
## License
|
|
83
|
+
|
|
84
|
+
By contributing, you agree that your contributions will be licensed under the [MIT License](LICENSE).
|