yacodebase-mcp 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- yacodebase_mcp-0.1.0/.github/workflows/ci.yml +37 -0
- yacodebase_mcp-0.1.0/.github/workflows/publish.yml +41 -0
- yacodebase_mcp-0.1.0/.gitignore +6 -0
- yacodebase_mcp-0.1.0/AGENTS.md +1 -0
- yacodebase_mcp-0.1.0/CLAUDE.md +77 -0
- yacodebase_mcp-0.1.0/LICENSE +21 -0
- yacodebase_mcp-0.1.0/PKG-INFO +179 -0
- yacodebase_mcp-0.1.0/README.md +145 -0
- yacodebase_mcp-0.1.0/docs/superpowers/plans/2026-05-20-ast-chunking.md +613 -0
- yacodebase_mcp-0.1.0/docs/superpowers/plans/2026-05-20-codebase-search-mcp.md +1324 -0
- yacodebase_mcp-0.1.0/docs/superpowers/plans/2026-05-20-global-embedding-config.md +962 -0
- yacodebase_mcp-0.1.0/docs/superpowers/specs/2026-05-20-ast-chunking-design.md +152 -0
- yacodebase_mcp-0.1.0/docs/superpowers/specs/2026-05-20-codebase-search-mcp-design.md +200 -0
- yacodebase_mcp-0.1.0/docs/superpowers/specs/2026-05-20-global-embedding-config-design.md +165 -0
- yacodebase_mcp-0.1.0/pyproject.toml +64 -0
- yacodebase_mcp-0.1.0/src/codebase_mcp/__init__.py +0 -0
- yacodebase_mcp-0.1.0/src/codebase_mcp/ast_chunker.py +119 -0
- yacodebase_mcp-0.1.0/src/codebase_mcp/cli.py +168 -0
- yacodebase_mcp-0.1.0/src/codebase_mcp/indexer.py +156 -0
- yacodebase_mcp-0.1.0/src/codebase_mcp/searcher.py +84 -0
- yacodebase_mcp-0.1.0/src/codebase_mcp/server.py +30 -0
- yacodebase_mcp-0.1.0/src/codebase_mcp/settings.py +57 -0
- yacodebase_mcp-0.1.0/src/codebase_mcp/store.py +78 -0
- yacodebase_mcp-0.1.0/tests/__init__.py +0 -0
- yacodebase_mcp-0.1.0/tests/conftest.py +20 -0
- yacodebase_mcp-0.1.0/tests/test_ast_chunker.py +218 -0
- yacodebase_mcp-0.1.0/tests/test_cli.py +195 -0
- yacodebase_mcp-0.1.0/tests/test_indexer.py +179 -0
- yacodebase_mcp-0.1.0/tests/test_integration.py +73 -0
- yacodebase_mcp-0.1.0/tests/test_searcher.py +143 -0
- yacodebase_mcp-0.1.0/tests/test_settings.py +67 -0
- yacodebase_mcp-0.1.0/tests/test_store.py +74 -0
- yacodebase_mcp-0.1.0/uv.lock +2033 -0
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
name: CI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [main]
|
|
6
|
+
pull_request:
|
|
7
|
+
branches: [main]
|
|
8
|
+
|
|
9
|
+
jobs:
|
|
10
|
+
test:
|
|
11
|
+
runs-on: ubuntu-latest
|
|
12
|
+
strategy:
|
|
13
|
+
matrix:
|
|
14
|
+
python-version: ["3.11", "3.12"]
|
|
15
|
+
|
|
16
|
+
steps:
|
|
17
|
+
- uses: actions/checkout@v4
|
|
18
|
+
|
|
19
|
+
- name: Install uv
|
|
20
|
+
uses: astral-sh/setup-uv@v5
|
|
21
|
+
with:
|
|
22
|
+
enable-cache: true
|
|
23
|
+
|
|
24
|
+
- name: Set up Python ${{ matrix.python-version }}
|
|
25
|
+
run: uv python install ${{ matrix.python-version }}
|
|
26
|
+
|
|
27
|
+
- name: Install dependencies
|
|
28
|
+
run: uv sync --dev
|
|
29
|
+
|
|
30
|
+
- name: Lint
|
|
31
|
+
run: uv run ruff check .
|
|
32
|
+
|
|
33
|
+
- name: Format check
|
|
34
|
+
run: uv run ruff format --check .
|
|
35
|
+
|
|
36
|
+
- name: Run tests
|
|
37
|
+
run: uv run pytest -v
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
name: Publish to PyPI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
release:
|
|
5
|
+
types: [published]
|
|
6
|
+
|
|
7
|
+
jobs:
|
|
8
|
+
build:
|
|
9
|
+
runs-on: ubuntu-latest
|
|
10
|
+
steps:
|
|
11
|
+
- uses: actions/checkout@v4
|
|
12
|
+
|
|
13
|
+
- name: Install uv
|
|
14
|
+
uses: astral-sh/setup-uv@v5
|
|
15
|
+
with:
|
|
16
|
+
enable-cache: true
|
|
17
|
+
|
|
18
|
+
- name: Build package
|
|
19
|
+
run: uv build
|
|
20
|
+
|
|
21
|
+
- name: Upload dist
|
|
22
|
+
uses: actions/upload-artifact@v4
|
|
23
|
+
with:
|
|
24
|
+
name: dist
|
|
25
|
+
path: dist/
|
|
26
|
+
|
|
27
|
+
publish:
|
|
28
|
+
needs: build
|
|
29
|
+
runs-on: ubuntu-latest
|
|
30
|
+
environment: pypi
|
|
31
|
+
permissions:
|
|
32
|
+
id-token: write # required for Trusted Publishing (OIDC)
|
|
33
|
+
steps:
|
|
34
|
+
- name: Download dist
|
|
35
|
+
uses: actions/download-artifact@v4
|
|
36
|
+
with:
|
|
37
|
+
name: dist
|
|
38
|
+
path: dist/
|
|
39
|
+
|
|
40
|
+
- name: Publish to PyPI
|
|
41
|
+
uses: pypa/gh-action-pypi-publish@release/v1
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
CLAUDE.md
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
# codebase-mcp — dev guide
|
|
2
|
+
|
|
3
|
+
## Setup
|
|
4
|
+
|
|
5
|
+
```bash
|
|
6
|
+
uv sync
|
|
7
|
+
```
|
|
8
|
+
|
|
9
|
+
All commands use `.venv/bin/` prefix or `uv run`.
|
|
10
|
+
|
|
11
|
+
## Run tests
|
|
12
|
+
|
|
13
|
+
```bash
|
|
14
|
+
.venv/bin/pytest
|
|
15
|
+
.venv/bin/pytest -v
|
|
16
|
+
.venv/bin/pytest tests/test_ast_chunker.py -v # specific module
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
## Lint / format
|
|
20
|
+
|
|
21
|
+
```bash
|
|
22
|
+
.venv/bin/ruff check src tests
|
|
23
|
+
.venv/bin/ruff format src tests
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
Line length: 100. Rules: E, F, I (pycodestyle, pyflakes, isort).
|
|
27
|
+
|
|
28
|
+
## Project structure
|
|
29
|
+
|
|
30
|
+
```
|
|
31
|
+
src/codebase_mcp/
|
|
32
|
+
cli.py # Click CLI: index, reindex, list, remove, serve, config *
|
|
33
|
+
server.py # FastMCP server — exposes search_codebase + list_indexed_repos
|
|
34
|
+
indexer.py # File walking, chunking, embedding, Qdrant upsert
|
|
35
|
+
ast_chunker.py # tree-sitter AST chunking (function/method boundaries)
|
|
36
|
+
searcher.py # Query embedding + Qdrant search + result formatting
|
|
37
|
+
store.py # Qdrant client, config.json r/w, repo metadata
|
|
38
|
+
settings.py # settings.json r/w (embedding model, vector size, api_key, api_base)
|
|
39
|
+
tests/
|
|
40
|
+
test_ast_chunker.py
|
|
41
|
+
test_cli.py
|
|
42
|
+
test_indexer.py
|
|
43
|
+
test_integration.py
|
|
44
|
+
test_searcher.py
|
|
45
|
+
test_settings.py
|
|
46
|
+
test_store.py
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
## Key design decisions
|
|
50
|
+
|
|
51
|
+
**AST chunking → line fallback**: `indexer.chunk_file` tries `ast_chunker.chunk_file_ast` first. If the language is unsupported or tree-sitter fails, falls back to 100-line sliding window (20-line overlap). This ensures semantic boundaries for supported languages without breaking on unknown file types.
|
|
52
|
+
|
|
53
|
+
**In-process Qdrant**: No external service needed. `store.get_client()` returns a `QdrantClient` pointing at `~/.codebase-mcp/qdrant/`. One collection per repo, named by `repo_id` (hash of abs path).
|
|
54
|
+
|
|
55
|
+
**OpenAI-compatible embeddings**: `indexer` and `searcher` both instantiate `openai.OpenAI(api_key=..., base_url=...)` from settings. Any OpenAI-compatible provider works by setting `api_base`.
|
|
56
|
+
|
|
57
|
+
**Vector size mismatch detection**: `searcher.search` reads actual vector dim from Qdrant and skips repos where it doesn't match current model's `vector_size`. Emits a warning prompting reindex.
|
|
58
|
+
|
|
59
|
+
**MAX_CHUNK_CHARS = 16000**: Both `ast_chunker` and `indexer` truncate chunk text at 16k chars (~8192 tokens at 2 chars/token for dense code). Applied post-collection in `indexer.index_repo` as final safety.
|
|
60
|
+
|
|
61
|
+
**Config precedence**: `settings.json` fields override env vars. `api_key=None` in settings → falls back to `OPENAI_API_KEY` env var (handled by OpenAI SDK). `api_base=None` → uses OpenAI default.
|
|
62
|
+
|
|
63
|
+
## Adding a new language
|
|
64
|
+
|
|
65
|
+
1. Add tree-sitter grammar to `pyproject.toml` dependencies.
|
|
66
|
+
2. Add entry to `ast_chunker.EXT_TO_LANG` mapping extension → language name.
|
|
67
|
+
3. Add entry to `ast_chunker.SEMANTIC_NODES` mapping language → relevant node type set.
|
|
68
|
+
4. Add parser instantiation branch in `ast_chunker._get_parser`.
|
|
69
|
+
5. Add extension to `indexer.INDEXED_EXTENSIONS`.
|
|
70
|
+
|
|
71
|
+
## Data dir
|
|
72
|
+
|
|
73
|
+
`~/.codebase-mcp/` — created on first use by `store._data_dir()`.
|
|
74
|
+
|
|
75
|
+
- `config.json` — repo registry: `{abs_path: {repo_id, chunk_count, last_indexed}}`
|
|
76
|
+
- `settings.json` — persistent settings (only non-null, non-default values written)
|
|
77
|
+
- `qdrant/` — Qdrant on-disk storage
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2024 gzamboni
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,179 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: yacodebase-mcp
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: CLI + MCP server that indexes local codebases into Qdrant using OpenAI-compatible embeddings, then exposes semantic search to Claude Code and other MCP clients
|
|
5
|
+
Project-URL: Homepage, https://github.com/gzamboni/codebase-mcp
|
|
6
|
+
Project-URL: Repository, https://github.com/gzamboni/codebase-mcp
|
|
7
|
+
Project-URL: Issues, https://github.com/gzamboni/codebase-mcp/issues
|
|
8
|
+
Author-email: gzamboni <gzamboni@gmail.com>
|
|
9
|
+
License: MIT
|
|
10
|
+
License-File: LICENSE
|
|
11
|
+
Keywords: claude,codebase,embeddings,mcp,qdrant,semantic-search
|
|
12
|
+
Classifier: Development Status :: 3 - Alpha
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
15
|
+
Classifier: Programming Language :: Python :: 3
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
18
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
19
|
+
Requires-Python: >=3.11
|
|
20
|
+
Requires-Dist: click>=8.0
|
|
21
|
+
Requires-Dist: fastmcp>=2.0
|
|
22
|
+
Requires-Dist: openai>=1.0
|
|
23
|
+
Requires-Dist: qdrant-client>=1.9
|
|
24
|
+
Requires-Dist: rich>=13.0
|
|
25
|
+
Requires-Dist: tree-sitter-go>=0.23
|
|
26
|
+
Requires-Dist: tree-sitter-hcl>=0.23
|
|
27
|
+
Requires-Dist: tree-sitter-java>=0.23
|
|
28
|
+
Requires-Dist: tree-sitter-javascript>=0.23
|
|
29
|
+
Requires-Dist: tree-sitter-python>=0.23
|
|
30
|
+
Requires-Dist: tree-sitter-rust>=0.23
|
|
31
|
+
Requires-Dist: tree-sitter-typescript>=0.23
|
|
32
|
+
Requires-Dist: tree-sitter>=0.23
|
|
33
|
+
Description-Content-Type: text/markdown
|
|
34
|
+
|
|
35
|
+
# codebase-mcp
|
|
36
|
+
|
|
37
|
+
Vector search MCP server for codebases. Index repos locally with AST-aware chunking; let Claude (or any MCP client) search them via semantic similarity.
|
|
38
|
+
|
|
39
|
+
## How it works
|
|
40
|
+
|
|
41
|
+
1. **Index** — walks repo files, chunks them using tree-sitter AST (function/method boundaries) with line-based fallback, embeds via OpenAI-compatible API, stores in in-process Qdrant.
|
|
42
|
+
2. **Serve** — exposes two MCP tools (`search_codebase`, `list_indexed_repos`) over stdio.
|
|
43
|
+
3. **Search** — embeds the query, retrieves top-8 chunks across all indexed repos (or a specific one), returns ranked results with file path and line numbers.
|
|
44
|
+
|
|
45
|
+
## Install
|
|
46
|
+
|
|
47
|
+
```bash
|
|
48
|
+
uv tool install /path/to/codebase-mcp
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
Or for development:
|
|
52
|
+
|
|
53
|
+
```bash
|
|
54
|
+
uv sync
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
## CLI
|
|
58
|
+
|
|
59
|
+
```bash
|
|
60
|
+
# Index a repo (fails if already indexed)
|
|
61
|
+
codebase-mcp index ~/Code/myproject
|
|
62
|
+
|
|
63
|
+
# Re-index after changes (replaces existing index)
|
|
64
|
+
codebase-mcp reindex ~/Code/myproject
|
|
65
|
+
|
|
66
|
+
# List indexed repos with chunk counts
|
|
67
|
+
codebase-mcp list
|
|
68
|
+
|
|
69
|
+
# Remove a repo from the index
|
|
70
|
+
codebase-mcp remove ~/Code/myproject
|
|
71
|
+
|
|
72
|
+
# Start MCP server (stdio, used by Claude Code)
|
|
73
|
+
codebase-mcp serve
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
### Config commands
|
|
77
|
+
|
|
78
|
+
```bash
|
|
79
|
+
# Show current settings
|
|
80
|
+
codebase-mcp config list
|
|
81
|
+
|
|
82
|
+
# Set embedding model (known models auto-resolve vector size)
|
|
83
|
+
codebase-mcp config set embedding-model text-embedding-3-large
|
|
84
|
+
codebase-mcp config set embedding-model my-custom-model --vector-size 768
|
|
85
|
+
|
|
86
|
+
# Set API credentials
|
|
87
|
+
codebase-mcp config set api-key sk-...
|
|
88
|
+
codebase-mcp config set api-base https://my-provider.com/v1
|
|
89
|
+
|
|
90
|
+
# Revert a setting to default / env var fallback
|
|
91
|
+
codebase-mcp config unset embedding-model
|
|
92
|
+
codebase-mcp config unset api-key
|
|
93
|
+
codebase-mcp config unset api-base
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
**Known models** (vector size auto-detected):
|
|
97
|
+
|
|
98
|
+
| Model | Vector size |
|
|
99
|
+
|---|---|
|
|
100
|
+
| `text-embedding-3-small` | 1536 |
|
|
101
|
+
| `text-embedding-3-large` | 3072 |
|
|
102
|
+
| `text-embedding-ada-002` | 1536 |
|
|
103
|
+
|
|
104
|
+
Default: `text-embedding-3-small`.
|
|
105
|
+
|
|
106
|
+
## Claude Code config
|
|
107
|
+
|
|
108
|
+
Add to `~/.claude/settings.json`:
|
|
109
|
+
|
|
110
|
+
```json
|
|
111
|
+
{
|
|
112
|
+
"mcpServers": {
|
|
113
|
+
"codebase-search": {
|
|
114
|
+
"command": "codebase-mcp",
|
|
115
|
+
"args": ["serve"],
|
|
116
|
+
"env": { "OPENAI_API_KEY": "sk-..." }
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
API key can also be set via `codebase-mcp config set api-key sk-...` (persisted in `~/.codebase-mcp/settings.json`), which takes precedence over the env var.
|
|
123
|
+
|
|
124
|
+
## MCP tools
|
|
125
|
+
|
|
126
|
+
### `search_codebase`
|
|
127
|
+
|
|
128
|
+
Search indexed repos for relevant code and docs.
|
|
129
|
+
|
|
130
|
+
| Parameter | Type | Description |
|
|
131
|
+
|---|---|---|
|
|
132
|
+
| `query` | string | Natural language description of what to find |
|
|
133
|
+
| `repo_path` | string (optional) | Absolute path to a specific repo; omit to search all |
|
|
134
|
+
|
|
135
|
+
Returns top-8 results ranked by similarity, each with file path, line range, score, and code block.
|
|
136
|
+
|
|
137
|
+
### `list_indexed_repos`
|
|
138
|
+
|
|
139
|
+
List all indexed repos with chunk count and last indexed timestamp. No parameters.
|
|
140
|
+
|
|
141
|
+
## Supported languages (AST chunking)
|
|
142
|
+
|
|
143
|
+
| Language | Extensions | Chunk boundary |
|
|
144
|
+
|---|---|---|
|
|
145
|
+
| Python | `.py` | `function_definition`, `decorated_definition` |
|
|
146
|
+
| TypeScript | `.ts` | `function_declaration`, `method_definition`, `arrow_function` |
|
|
147
|
+
| TSX | `.tsx` | same as TypeScript |
|
|
148
|
+
| JavaScript | `.js`, `.jsx` | `function_declaration`, `method_definition`, `arrow_function` |
|
|
149
|
+
| Go | `.go` | `function_declaration`, `method_declaration` |
|
|
150
|
+
| Rust | `.rs` | `function_item` |
|
|
151
|
+
| Java | `.java` | `method_declaration`, `constructor_declaration` |
|
|
152
|
+
| HCL/Terraform | `.tf` | `block` |
|
|
153
|
+
|
|
154
|
+
Files without AST support (`.md`, `.yaml`, `.toml`, `.json`, `.rb`, `.cpp`, `.c`, `.h`) fall back to 100-line sliding window with 20-line overlap.
|
|
155
|
+
|
|
156
|
+
## Data storage
|
|
157
|
+
|
|
158
|
+
All data lives in `~/.codebase-mcp/`:
|
|
159
|
+
|
|
160
|
+
```
|
|
161
|
+
~/.codebase-mcp/
|
|
162
|
+
config.json # indexed repo metadata (paths, repo_ids, chunk counts, timestamps)
|
|
163
|
+
settings.json # embedding model, vector size, api_key, api_base
|
|
164
|
+
qdrant/ # Qdrant in-process storage (one collection per repo)
|
|
165
|
+
```
|
|
166
|
+
|
|
167
|
+
Each repo gets a stable `repo_id` derived from its absolute path (used as Qdrant collection name). Reindexing replaces the collection in-place.
|
|
168
|
+
|
|
169
|
+
## OpenAI-compatible providers
|
|
170
|
+
|
|
171
|
+
Set `api-base` to use any OpenAI-compatible embedding API (e.g. Ollama, vLLM, Azure):
|
|
172
|
+
|
|
173
|
+
```bash
|
|
174
|
+
codebase-mcp config set api-base http://localhost:11434/v1
|
|
175
|
+
codebase-mcp config set api-key ollama
|
|
176
|
+
codebase-mcp config set embedding-model nomic-embed-text --vector-size 768
|
|
177
|
+
```
|
|
178
|
+
|
|
179
|
+
After changing the model, reindex all repos (vector dimensions must match).
|
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
# codebase-mcp
|
|
2
|
+
|
|
3
|
+
Vector search MCP server for codebases. Index repos locally with AST-aware chunking; let Claude (or any MCP client) search them via semantic similarity.
|
|
4
|
+
|
|
5
|
+
## How it works
|
|
6
|
+
|
|
7
|
+
1. **Index** — walks repo files, chunks them using tree-sitter AST (function/method boundaries) with line-based fallback, embeds via OpenAI-compatible API, stores in in-process Qdrant.
|
|
8
|
+
2. **Serve** — exposes two MCP tools (`search_codebase`, `list_indexed_repos`) over stdio.
|
|
9
|
+
3. **Search** — embeds the query, retrieves top-8 chunks across all indexed repos (or a specific one), returns ranked results with file path and line numbers.
|
|
10
|
+
|
|
11
|
+
## Install
|
|
12
|
+
|
|
13
|
+
```bash
|
|
14
|
+
uv tool install /path/to/codebase-mcp
|
|
15
|
+
```
|
|
16
|
+
|
|
17
|
+
Or for development:
|
|
18
|
+
|
|
19
|
+
```bash
|
|
20
|
+
uv sync
|
|
21
|
+
```
|
|
22
|
+
|
|
23
|
+
## CLI
|
|
24
|
+
|
|
25
|
+
```bash
|
|
26
|
+
# Index a repo (fails if already indexed)
|
|
27
|
+
codebase-mcp index ~/Code/myproject
|
|
28
|
+
|
|
29
|
+
# Re-index after changes (replaces existing index)
|
|
30
|
+
codebase-mcp reindex ~/Code/myproject
|
|
31
|
+
|
|
32
|
+
# List indexed repos with chunk counts
|
|
33
|
+
codebase-mcp list
|
|
34
|
+
|
|
35
|
+
# Remove a repo from the index
|
|
36
|
+
codebase-mcp remove ~/Code/myproject
|
|
37
|
+
|
|
38
|
+
# Start MCP server (stdio, used by Claude Code)
|
|
39
|
+
codebase-mcp serve
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
### Config commands
|
|
43
|
+
|
|
44
|
+
```bash
|
|
45
|
+
# Show current settings
|
|
46
|
+
codebase-mcp config list
|
|
47
|
+
|
|
48
|
+
# Set embedding model (known models auto-resolve vector size)
|
|
49
|
+
codebase-mcp config set embedding-model text-embedding-3-large
|
|
50
|
+
codebase-mcp config set embedding-model my-custom-model --vector-size 768
|
|
51
|
+
|
|
52
|
+
# Set API credentials
|
|
53
|
+
codebase-mcp config set api-key sk-...
|
|
54
|
+
codebase-mcp config set api-base https://my-provider.com/v1
|
|
55
|
+
|
|
56
|
+
# Revert a setting to default / env var fallback
|
|
57
|
+
codebase-mcp config unset embedding-model
|
|
58
|
+
codebase-mcp config unset api-key
|
|
59
|
+
codebase-mcp config unset api-base
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
**Known models** (vector size auto-detected):
|
|
63
|
+
|
|
64
|
+
| Model | Vector size |
|
|
65
|
+
|---|---|
|
|
66
|
+
| `text-embedding-3-small` | 1536 |
|
|
67
|
+
| `text-embedding-3-large` | 3072 |
|
|
68
|
+
| `text-embedding-ada-002` | 1536 |
|
|
69
|
+
|
|
70
|
+
Default: `text-embedding-3-small`.
|
|
71
|
+
|
|
72
|
+
## Claude Code config
|
|
73
|
+
|
|
74
|
+
Add to `~/.claude/settings.json`:
|
|
75
|
+
|
|
76
|
+
```json
|
|
77
|
+
{
|
|
78
|
+
"mcpServers": {
|
|
79
|
+
"codebase-search": {
|
|
80
|
+
"command": "codebase-mcp",
|
|
81
|
+
"args": ["serve"],
|
|
82
|
+
"env": { "OPENAI_API_KEY": "sk-..." }
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
API key can also be set via `codebase-mcp config set api-key sk-...` (persisted in `~/.codebase-mcp/settings.json`), which takes precedence over the env var.
|
|
89
|
+
|
|
90
|
+
## MCP tools
|
|
91
|
+
|
|
92
|
+
### `search_codebase`
|
|
93
|
+
|
|
94
|
+
Search indexed repos for relevant code and docs.
|
|
95
|
+
|
|
96
|
+
| Parameter | Type | Description |
|
|
97
|
+
|---|---|---|
|
|
98
|
+
| `query` | string | Natural language description of what to find |
|
|
99
|
+
| `repo_path` | string (optional) | Absolute path to a specific repo; omit to search all |
|
|
100
|
+
|
|
101
|
+
Returns top-8 results ranked by similarity, each with file path, line range, score, and code block.
|
|
102
|
+
|
|
103
|
+
### `list_indexed_repos`
|
|
104
|
+
|
|
105
|
+
List all indexed repos with chunk count and last indexed timestamp. No parameters.
|
|
106
|
+
|
|
107
|
+
## Supported languages (AST chunking)
|
|
108
|
+
|
|
109
|
+
| Language | Extensions | Chunk boundary |
|
|
110
|
+
|---|---|---|
|
|
111
|
+
| Python | `.py` | `function_definition`, `decorated_definition` |
|
|
112
|
+
| TypeScript | `.ts` | `function_declaration`, `method_definition`, `arrow_function` |
|
|
113
|
+
| TSX | `.tsx` | same as TypeScript |
|
|
114
|
+
| JavaScript | `.js`, `.jsx` | `function_declaration`, `method_definition`, `arrow_function` |
|
|
115
|
+
| Go | `.go` | `function_declaration`, `method_declaration` |
|
|
116
|
+
| Rust | `.rs` | `function_item` |
|
|
117
|
+
| Java | `.java` | `method_declaration`, `constructor_declaration` |
|
|
118
|
+
| HCL/Terraform | `.tf` | `block` |
|
|
119
|
+
|
|
120
|
+
Files without AST support (`.md`, `.yaml`, `.toml`, `.json`, `.rb`, `.cpp`, `.c`, `.h`) fall back to 100-line sliding window with 20-line overlap.
|
|
121
|
+
|
|
122
|
+
## Data storage
|
|
123
|
+
|
|
124
|
+
All data lives in `~/.codebase-mcp/`:
|
|
125
|
+
|
|
126
|
+
```
|
|
127
|
+
~/.codebase-mcp/
|
|
128
|
+
config.json # indexed repo metadata (paths, repo_ids, chunk counts, timestamps)
|
|
129
|
+
settings.json # embedding model, vector size, api_key, api_base
|
|
130
|
+
qdrant/ # Qdrant in-process storage (one collection per repo)
|
|
131
|
+
```
|
|
132
|
+
|
|
133
|
+
Each repo gets a stable `repo_id` derived from its absolute path (used as Qdrant collection name). Reindexing replaces the collection in-place.
|
|
134
|
+
|
|
135
|
+
## OpenAI-compatible providers
|
|
136
|
+
|
|
137
|
+
Set `api-base` to use any OpenAI-compatible embedding API (e.g. Ollama, vLLM, Azure):
|
|
138
|
+
|
|
139
|
+
```bash
|
|
140
|
+
codebase-mcp config set api-base http://localhost:11434/v1
|
|
141
|
+
codebase-mcp config set api-key ollama
|
|
142
|
+
codebase-mcp config set embedding-model nomic-embed-text --vector-size 768
|
|
143
|
+
```
|
|
144
|
+
|
|
145
|
+
After changing the model, reindex all repos (vector dimensions must match).
|