source-mcp 0.1.3b1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- source_mcp-0.1.3b1/.env.example +15 -0
- source_mcp-0.1.3b1/.gitignore +77 -0
- source_mcp-0.1.3b1/GEMINI.md +62 -0
- source_mcp-0.1.3b1/LICENSE +21 -0
- source_mcp-0.1.3b1/PKG-INFO +128 -0
- source_mcp-0.1.3b1/README.md +110 -0
- source_mcp-0.1.3b1/pyproject.toml +38 -0
- source_mcp-0.1.3b1/src/__init__.py +0 -0
- source_mcp-0.1.3b1/src/config.py +26 -0
- source_mcp-0.1.3b1/src/main.py +163 -0
- source_mcp-0.1.3b1/src/services/__init__.py +0 -0
- source_mcp-0.1.3b1/src/services/file_filter.py +181 -0
- source_mcp-0.1.3b1/src/services/indexer.py +597 -0
- source_mcp-0.1.3b1/src/services/monitor.py +105 -0
- source_mcp-0.1.3b1/src/web/__init__.py +0 -0
- source_mcp-0.1.3b1/src/web/app.py +125 -0
- source_mcp-0.1.3b1/src/web/templates/index.html +381 -0
- source_mcp-0.1.3b1/tests/scripts/generate_test_data.py +124 -0
- source_mcp-0.1.3b1/tests/test_api.py +80 -0
- source_mcp-0.1.3b1/tests/test_chunker.py +63 -0
- source_mcp-0.1.3b1/tests/test_e2e_real.py +188 -0
- source_mcp-0.1.3b1/tests/test_file_filter.py +109 -0
- source_mcp-0.1.3b1/tests/test_indexer.py +140 -0
- source_mcp-0.1.3b1/tests/test_integration.py +211 -0
- source_mcp-0.1.3b1/uv.lock +2269 -0
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
# RMCP Environment Configuration
|
|
2
|
+
# Copy this to .env and fill in your values
|
|
3
|
+
|
|
4
|
+
# Provider: "fastembed" or "openai"
|
|
5
|
+
EMBEDDING_PROVIDER=openai
|
|
6
|
+
|
|
7
|
+
# OpenAI API Key (Required for openai provider)
|
|
8
|
+
OPENAI_API_KEY=your_key_here
|
|
9
|
+
|
|
10
|
+
# Optional: Override default models
|
|
11
|
+
# EMBEDDING_MODEL=text-embedding-3-small
|
|
12
|
+
# EMBEDDING_MODEL=sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2
|
|
13
|
+
|
|
14
|
+
# Storage Path
|
|
15
|
+
ZVEC_PATH=./zvec_db
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
# General
|
|
2
|
+
.DS_Store
|
|
3
|
+
.DS_Store?
|
|
4
|
+
._*
|
|
5
|
+
.Spotlight-V100
|
|
6
|
+
.Trashes
|
|
7
|
+
ehthumbs.db
|
|
8
|
+
Thumbs.db
|
|
9
|
+
*.log
|
|
10
|
+
|
|
11
|
+
# Python-generated files
|
|
12
|
+
__pycache__/
|
|
13
|
+
*.py[cod]
|
|
14
|
+
*$py.class
|
|
15
|
+
*.so
|
|
16
|
+
.Python
|
|
17
|
+
build/
|
|
18
|
+
develop-eggs/
|
|
19
|
+
dist/
|
|
20
|
+
downloads/
|
|
21
|
+
eggs/
|
|
22
|
+
.eggs/
|
|
23
|
+
lib/
|
|
24
|
+
lib64/
|
|
25
|
+
parts/
|
|
26
|
+
sdist/
|
|
27
|
+
var/
|
|
28
|
+
wheels/
|
|
29
|
+
share/python-wheels/
|
|
30
|
+
*.egg-info/
|
|
31
|
+
.installed.cfg
|
|
32
|
+
*.egg
|
|
33
|
+
MANIFEST
|
|
34
|
+
|
|
35
|
+
# Virtual environments
|
|
36
|
+
.venv/
|
|
37
|
+
env/
|
|
38
|
+
venv/
|
|
39
|
+
ENV/
|
|
40
|
+
env.bak/
|
|
41
|
+
venv.bak/
|
|
42
|
+
.python-version
|
|
43
|
+
|
|
44
|
+
# Environment variables
|
|
45
|
+
.env
|
|
46
|
+
.env.*
|
|
47
|
+
!.env.example
|
|
48
|
+
|
|
49
|
+
# IDEs and Editors
|
|
50
|
+
.idea/
|
|
51
|
+
.vscode/
|
|
52
|
+
*.swp
|
|
53
|
+
*.swo
|
|
54
|
+
*~
|
|
55
|
+
*.sublime-project
|
|
56
|
+
*.sublime-workspace
|
|
57
|
+
|
|
58
|
+
# Testing, Linting and Coverage
|
|
59
|
+
.pytest_cache/
|
|
60
|
+
.mypy_cache/
|
|
61
|
+
.ruff_cache/
|
|
62
|
+
.hypothesis/
|
|
63
|
+
.tox/
|
|
64
|
+
.coverage
|
|
65
|
+
.coverage.*
|
|
66
|
+
htmlcov/
|
|
67
|
+
coverage.xml
|
|
68
|
+
nosetests.xml
|
|
69
|
+
|
|
70
|
+
# Jupyter Notebooks
|
|
71
|
+
.ipynb_checkpoints
|
|
72
|
+
|
|
73
|
+
# Project Specific Data (Local State)
|
|
74
|
+
zvec_db/
|
|
75
|
+
.rmcp/
|
|
76
|
+
.rmcp_manifest.json
|
|
77
|
+
artifacts/
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
# Source-MCP (formerly RAG-MCP) - Gemini Instructions & Guide
|
|
2
|
+
|
|
3
|
+
This file contains useful instructions and summaries of changes made during our sessions.
|
|
4
|
+
|
|
5
|
+
## 🚀 How to Run
|
|
6
|
+
|
|
7
|
+
### Manual Run (Terminal)
|
|
8
|
+
|
|
9
|
+
Ensure you have `uv` installed.
|
|
10
|
+
|
|
11
|
+
```bash
|
|
12
|
+
# Set required environment variables
|
|
13
|
+
export EMBEDDING_PROVIDER="openai" # or "fastembed"
|
|
14
|
+
export OPENAI_API_KEY="sk-..." # required for openai
|
|
15
|
+
export ZVEC_PATH="./zvec_db" # optional override
|
|
16
|
+
|
|
17
|
+
uv run python -m src.main --path .
|
|
18
|
+
```
|
|
19
|
+
|
|
20
|
+
### Environment Variables (`.env`)
|
|
21
|
+
|
|
22
|
+
You can create a `.env` file in the root directory:
|
|
23
|
+
|
|
24
|
+
```bash
|
|
25
|
+
EMBEDDING_PROVIDER=openai
|
|
26
|
+
OPENAI_API_KEY=your_key
|
|
27
|
+
ZVEC_PATH=./zvec_db
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
## 🛠 Features
|
|
31
|
+
|
|
32
|
+
### 1. OpenAI & FastEmbed Support
|
|
33
|
+
|
|
34
|
+
- **OpenAI**: Defaults to `text-embedding-3-small` (1536 dims).
|
|
35
|
+
- **FastEmbed**: Defaults to `sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2` (384 dims, multilingual).
|
|
36
|
+
- **Auto-Migration**: If you change providers/models and the vector dimension changes, the service detects it via `meta.json` in the DB folder and **automatically recreates** the index.
|
|
37
|
+
|
|
38
|
+
### 2. Incremental Indexing
|
|
39
|
+
|
|
40
|
+
- Uses `.source-mcp_manifest.json` to store file fingerprints (mtime + size).
|
|
41
|
+
- Only indexes new or modified files on startup.
|
|
42
|
+
|
|
43
|
+
### 3. Web Dashboard (Port 8000)
|
|
44
|
+
|
|
45
|
+
- **Live Logs**: Includes an **Auto-scroll toggle** (click the pulse indicator).
|
|
46
|
+
- **Reindex Base**: A red button to force-wipe the DB and manifest for a fresh full scan.
|
|
47
|
+
- **Search Debug**: Special endpoint `/api/search/debug?q=...` to see raw scores.
|
|
48
|
+
|
|
49
|
+
## 🧪 Testing
|
|
50
|
+
|
|
51
|
+
```bash
|
|
52
|
+
uv run python -m pytest tests/ -v
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
## 📝 Configuration
|
|
56
|
+
|
|
57
|
+
- `src/config.py`: Contains default settings.
|
|
58
|
+
- `mcp_config.json`: Use this to configure Source-MCP as an MCP server for Gemini/Claude.
|
|
59
|
+
|
|
60
|
+
## Communication Language
|
|
61
|
+
|
|
62
|
+
- ALL interactions, explanations, and commit messages MUST be in Russian unless explicitly requested otherwise.
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Source-MCP
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: source-mcp
|
|
3
|
+
Version: 0.1.3b1
|
|
4
|
+
Summary: A Model Context Protocol (MCP) server for semantic search and Retrieval-Augmented Generation (RAG) over local codebases and documents.
|
|
5
|
+
License-File: LICENSE
|
|
6
|
+
Requires-Python: >=3.10
|
|
7
|
+
Requires-Dist: fastapi>=0.129.0
|
|
8
|
+
Requires-Dist: fastembed>=0.7.4
|
|
9
|
+
Requires-Dist: jinja2>=3.1.6
|
|
10
|
+
Requires-Dist: mcp[cli]>=1.26.0
|
|
11
|
+
Requires-Dist: openai>=2.21.0
|
|
12
|
+
Requires-Dist: pathspec>=1.0.4
|
|
13
|
+
Requires-Dist: python-multipart>=0.0.22
|
|
14
|
+
Requires-Dist: uvicorn>=0.41.0
|
|
15
|
+
Requires-Dist: watchdog>=6.0.0
|
|
16
|
+
Requires-Dist: zvec>=0.2.0
|
|
17
|
+
Description-Content-Type: text/markdown
|
|
18
|
+
|
|
19
|
+
<div align="center">
|
|
20
|
+
<h1>🔍 Source-MCP</h1>
|
|
21
|
+
<p><strong>A Model Context Protocol (MCP) server for semantic search and Retrieval-Augmented Generation (RAG) over local codebases and documents.</strong></p>
|
|
22
|
+
</div>
|
|
23
|
+
|
|
24
|
+
---
|
|
25
|
+
|
|
26
|
+
## 📖 Overview
|
|
27
|
+
|
|
28
|
+
**Source-MCP** leverages the [Model Context Protocol](https://modelcontextprotocol.io) to provide AI assistants (like Claude, Gemini, and others) with direct access to local files through semantic search.
|
|
29
|
+
|
|
30
|
+
Instead of manually copy-pasting code or documentation into your prompts, Source-MCP automatically indexes your local repository, generates vector embeddings, and enables the AI to semantically search and retrieve only the most relevant files.
|
|
31
|
+
|
|
32
|
+
## ✨ Key Features
|
|
33
|
+
|
|
34
|
+
- **Dual Embedding Support:**
|
|
35
|
+
- **OpenAI:** Uses robust `text-embedding-3-small` (1536 dimensions) for high-quality enterprise embeddings.
|
|
36
|
+
- **FastEmbed (Local):** Uses `sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2` (384 dims). Runs entirely locally, no API keys required, and supports multilingual inquiries.
|
|
37
|
+
- **Smart Incremental Indexing:** Uses file fingerprints (modified time + size) to only index new or modified files, ensuring lightning-fast startup times.
|
|
38
|
+
- **Auto-Migration:** Automatically detects embedding dimension changes (e.g., switching from OpenAI to FastEmbed) and safely recreates the vector index.
|
|
39
|
+
- **Web Dashboard (Port 8000):**
|
|
40
|
+
- **Live Logs:** View real-time indexing and search activity with auto-scroll.
|
|
41
|
+
- **Reindex Base:** Force-wipe the vector DB and manifest for a completely fresh full scan.
|
|
42
|
+
- **Reindex Base:** Force-wipe the vector DB and manifest for a completely fresh full scan.
|
|
43
|
+
- **Search Debugging:** Special endpoint (`/api/search/debug?q=...`) to test raw semantic search scores.
|
|
44
|
+
|
|
45
|
+
## 🤔 Why local embeddings and `zvec`?
|
|
46
|
+
|
|
47
|
+
We use [**zvec**](https://github.com/alibaba/zvec), a lightweight, high-performance vector database maintained by Alibaba. `zvec` is embedded directly into the Python process, eliminating the need to set up or run external vector servers (like Pinecone, Milvus, or Qdrant). Combined with `FastEmbed`, this allows Source-MCP to build the entire semantic search pipeline **fully offline**, quickly, and entirely on your local machine.
|
|
48
|
+
|
|
49
|
+
## 🚀 Installation & Setup
|
|
50
|
+
|
|
51
|
+
1. **Prerequisites:** Ensure you have Python 3.10+ and [`uv`](https://github.com/astral-sh/uv) installed.
|
|
52
|
+
2. **Clone the repository:**
|
|
53
|
+
|
|
54
|
+
```bash
|
|
55
|
+
git clone https://github.com/AlexShimmy/source-mcp.git
|
|
56
|
+
cd source-mcp
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
3. **Install Dependencies:**
|
|
60
|
+
|
|
61
|
+
```bash
|
|
62
|
+
# uv will automatically handle virtual environment creation and dependencies
|
|
63
|
+
uv sync
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
## ⚙️ Configuration
|
|
67
|
+
|
|
68
|
+
Create a `.env` file in the root directory (you can copy `.env.example` if available).
|
|
69
|
+
|
|
70
|
+
```bash
|
|
71
|
+
# Choose your provider: "openai" or "fastembed"
|
|
72
|
+
EMBEDDING_PROVIDER=openai
|
|
73
|
+
|
|
74
|
+
# Required ONLY if using OpenAI
|
|
75
|
+
# Required ONLY if using OpenAI
|
|
76
|
+
OPENAI_API_KEY=sk-your-openai-api-key
|
|
77
|
+
|
|
78
|
+
# Optional: Path to store the vector database (Defaults to `.source-mcp/zvec_db` in the index dir)
|
|
79
|
+
ZVEC_PATH=./zvec_db
|
|
80
|
+
|
|
81
|
+
# Optional: Which directory to index (Defaults to current directory)
|
|
82
|
+
SOURCE_MCP_INDEX_DIR=/path/to/your/project
|
|
83
|
+
|
|
84
|
+
# Optional: Port for the Web Dashboard (Defaults to 8000)
|
|
85
|
+
WEB_PORT=8000
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
## 🖱️ Usage
|
|
89
|
+
|
|
90
|
+
### Running Manually (Terminal & Dashboard)
|
|
91
|
+
|
|
92
|
+
To start the MCP server manually and access the web dashboard:
|
|
93
|
+
|
|
94
|
+
```bash
|
|
95
|
+
uv run python -m src.main --path .
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
- The **MCP protocol** will listen on `stdio`.
|
|
99
|
+
- The **Web Dashboard** will be available at [http://localhost:8000](http://localhost:8000).
|
|
100
|
+
|
|
101
|
+
### 🔌 MCP Configuration
|
|
102
|
+
|
|
103
|
+
The config is the same for all clients (Claude Desktop, Cursor, VS Code / Cline, etc.):
|
|
104
|
+
|
|
105
|
+
```json
|
|
106
|
+
{
|
|
107
|
+
"mcpServers": {
|
|
108
|
+
"source-mcp": {
|
|
109
|
+
"command": "uv",
|
|
110
|
+
"args": ["--directory", "/absolute/path/to/source-mcp", "run", "python", "-m", "src.main"]
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
All other settings (such as `SOURCE_MCP_INDEX_DIR`, `EMBEDDING_PROVIDER` or `OPENAI_API_KEY`) should be configured via the `.env` file in the root directory of Source-MCP.
|
|
117
|
+
|
|
118
|
+
## 🧪 Testing
|
|
119
|
+
|
|
120
|
+
The project uses `pytest` for unit and end-to-end tests. To run the test suite:
|
|
121
|
+
|
|
122
|
+
```bash
|
|
123
|
+
uv run python -m pytest tests/ -v
|
|
124
|
+
```
|
|
125
|
+
|
|
126
|
+
## 📜 License
|
|
127
|
+
|
|
128
|
+
This project is licensed under the MIT License - see the LICENSE file for details.
|
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
<div align="center">
|
|
2
|
+
<h1>🔍 Source-MCP</h1>
|
|
3
|
+
<p><strong>A Model Context Protocol (MCP) server for semantic search and Retrieval-Augmented Generation (RAG) over local codebases and documents.</strong></p>
|
|
4
|
+
</div>
|
|
5
|
+
|
|
6
|
+
---
|
|
7
|
+
|
|
8
|
+
## 📖 Overview
|
|
9
|
+
|
|
10
|
+
**Source-MCP** leverages the [Model Context Protocol](https://modelcontextprotocol.io) to provide AI assistants (like Claude, Gemini, and others) with direct access to local files through semantic search.
|
|
11
|
+
|
|
12
|
+
Instead of manually copy-pasting code or documentation into your prompts, Source-MCP automatically indexes your local repository, generates vector embeddings, and enables the AI to semantically search and retrieve only the most relevant files.
|
|
13
|
+
|
|
14
|
+
## ✨ Key Features
|
|
15
|
+
|
|
16
|
+
- **Dual Embedding Support:**
|
|
17
|
+
- **OpenAI:** Uses robust `text-embedding-3-small` (1536 dimensions) for high-quality enterprise embeddings.
|
|
18
|
+
- **FastEmbed (Local):** Uses `sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2` (384 dims). Runs entirely locally, no API keys required, and supports multilingual inquiries.
|
|
19
|
+
- **Smart Incremental Indexing:** Uses file fingerprints (modified time + size) to only index new or modified files, ensuring lightning-fast startup times.
|
|
20
|
+
- **Auto-Migration:** Automatically detects embedding dimension changes (e.g., switching from OpenAI to FastEmbed) and safely recreates the vector index.
|
|
21
|
+
- **Web Dashboard (Port 8000):**
|
|
22
|
+
- **Live Logs:** View real-time indexing and search activity with auto-scroll.
|
|
23
|
+
- **Reindex Base:** Force-wipe the vector DB and manifest for a completely fresh full scan.
|
|
24
|
+
- **Reindex Base:** Force-wipe the vector DB and manifest for a completely fresh full scan.
|
|
25
|
+
- **Search Debugging:** Special endpoint (`/api/search/debug?q=...`) to test raw semantic search scores.
|
|
26
|
+
|
|
27
|
+
## 🤔 Why local embeddings and `zvec`?
|
|
28
|
+
|
|
29
|
+
We use [**zvec**](https://github.com/alibaba/zvec), a lightweight, high-performance vector database maintained by Alibaba. `zvec` is embedded directly into the Python process, eliminating the need to set up or run external vector servers (like Pinecone, Milvus, or Qdrant). Combined with `FastEmbed`, this allows Source-MCP to build the entire semantic search pipeline **fully offline**, quickly, and entirely on your local machine.
|
|
30
|
+
|
|
31
|
+
## 🚀 Installation & Setup
|
|
32
|
+
|
|
33
|
+
1. **Prerequisites:** Ensure you have Python 3.10+ and [`uv`](https://github.com/astral-sh/uv) installed.
|
|
34
|
+
2. **Clone the repository:**
|
|
35
|
+
|
|
36
|
+
```bash
|
|
37
|
+
git clone https://github.com/AlexShimmy/source-mcp.git
|
|
38
|
+
cd source-mcp
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
3. **Install Dependencies:**
|
|
42
|
+
|
|
43
|
+
```bash
|
|
44
|
+
# uv will automatically handle virtual environment creation and dependencies
|
|
45
|
+
uv sync
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
## ⚙️ Configuration
|
|
49
|
+
|
|
50
|
+
Create a `.env` file in the root directory (you can copy `.env.example` if available).
|
|
51
|
+
|
|
52
|
+
```bash
|
|
53
|
+
# Choose your provider: "openai" or "fastembed"
|
|
54
|
+
EMBEDDING_PROVIDER=openai
|
|
55
|
+
|
|
56
|
+
# Required ONLY if using OpenAI
|
|
57
|
+
# Required ONLY if using OpenAI
|
|
58
|
+
OPENAI_API_KEY=sk-your-openai-api-key
|
|
59
|
+
|
|
60
|
+
# Optional: Path to store the vector database (Defaults to `.source-mcp/zvec_db` in the index dir)
|
|
61
|
+
ZVEC_PATH=./zvec_db
|
|
62
|
+
|
|
63
|
+
# Optional: Which directory to index (Defaults to current directory)
|
|
64
|
+
SOURCE_MCP_INDEX_DIR=/path/to/your/project
|
|
65
|
+
|
|
66
|
+
# Optional: Port for the Web Dashboard (Defaults to 8000)
|
|
67
|
+
WEB_PORT=8000
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
## 🖱️ Usage
|
|
71
|
+
|
|
72
|
+
### Running Manually (Terminal & Dashboard)
|
|
73
|
+
|
|
74
|
+
To start the MCP server manually and access the web dashboard:
|
|
75
|
+
|
|
76
|
+
```bash
|
|
77
|
+
uv run python -m src.main --path .
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
- The **MCP protocol** will listen on `stdio`.
|
|
81
|
+
- The **Web Dashboard** will be available at [http://localhost:8000](http://localhost:8000).
|
|
82
|
+
|
|
83
|
+
### 🔌 MCP Configuration
|
|
84
|
+
|
|
85
|
+
The config is the same for all clients (Claude Desktop, Cursor, VS Code / Cline, etc.):
|
|
86
|
+
|
|
87
|
+
```json
|
|
88
|
+
{
|
|
89
|
+
"mcpServers": {
|
|
90
|
+
"source-mcp": {
|
|
91
|
+
"command": "uv",
|
|
92
|
+
"args": ["--directory", "/absolute/path/to/source-mcp", "run", "python", "-m", "src.main"]
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
All other settings (such as `SOURCE_MCP_INDEX_DIR`, `EMBEDDING_PROVIDER` or `OPENAI_API_KEY`) should be configured via the `.env` file in the root directory of Source-MCP.
|
|
99
|
+
|
|
100
|
+
## 🧪 Testing
|
|
101
|
+
|
|
102
|
+
The project uses `pytest` for unit and end-to-end tests. To run the test suite:
|
|
103
|
+
|
|
104
|
+
```bash
|
|
105
|
+
uv run python -m pytest tests/ -v
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
## 📜 License
|
|
109
|
+
|
|
110
|
+
This project is licensed under the MIT License - see the LICENSE file for details.
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "source-mcp"
|
|
3
|
+
version = "0.1.3b1"
|
|
4
|
+
description = "A Model Context Protocol (MCP) server for semantic search and Retrieval-Augmented Generation (RAG) over local codebases and documents."
|
|
5
|
+
readme = "README.md"
|
|
6
|
+
requires-python = ">=3.10"
|
|
7
|
+
dependencies = [
|
|
8
|
+
"fastapi>=0.129.0",
|
|
9
|
+
"fastembed>=0.7.4",
|
|
10
|
+
"jinja2>=3.1.6",
|
|
11
|
+
"mcp[cli]>=1.26.0",
|
|
12
|
+
"openai>=2.21.0",
|
|
13
|
+
"pathspec>=1.0.4",
|
|
14
|
+
"python-multipart>=0.0.22",
|
|
15
|
+
"uvicorn>=0.41.0",
|
|
16
|
+
"watchdog>=6.0.0",
|
|
17
|
+
"zvec>=0.2.0",
|
|
18
|
+
]
|
|
19
|
+
|
|
20
|
+
[build-system]
|
|
21
|
+
requires = ["hatchling"]
|
|
22
|
+
build-backend = "hatchling.build"
|
|
23
|
+
|
|
24
|
+
[tool.hatch.build.targets.wheel]
|
|
25
|
+
packages = ["src"]
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
[project.scripts]
|
|
30
|
+
source-mcp = "src.main:main"
|
|
31
|
+
|
|
32
|
+
[dependency-groups]
|
|
33
|
+
dev = [
|
|
34
|
+
"httpx>=0.28.1",
|
|
35
|
+
"pytest>=9.0.2",
|
|
36
|
+
"pytest-asyncio>=1.3.0",
|
|
37
|
+
"pytest-cov>=7.0.0",
|
|
38
|
+
]
|
|
File without changes
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
from pydantic_settings import BaseSettings, SettingsConfigDict
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
import os
|
|
6
|
+
|
|
7
|
+
class Settings(BaseSettings):
|
|
8
|
+
model_config = SettingsConfigDict(env_file=".env", extra="ignore")
|
|
9
|
+
|
|
10
|
+
docs_path: str = os.getenv("SOURCE_MCP_INDEX_DIR", ".")
|
|
11
|
+
zvec_path: str = "./zvec_db"
|
|
12
|
+
|
|
13
|
+
# Embedding settings
|
|
14
|
+
embedding_provider: str = "fastembed" # "fastembed" or "openai"
|
|
15
|
+
# FastEmbed model (384 dims)
|
|
16
|
+
# embedding_model: str = "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"
|
|
17
|
+
# OpenAI model (1536 dims for text-embedding-3-small)
|
|
18
|
+
embedding_model: str | None = None
|
|
19
|
+
openai_api_key: str | None = None
|
|
20
|
+
|
|
21
|
+
# Web Dashboard settings
|
|
22
|
+
web_port: int = 8000
|
|
23
|
+
host: str = "127.0.0.1"
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
settings = Settings()
|
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
import sys
|
|
2
|
+
import os
|
|
3
|
+
import argparse
|
|
4
|
+
import threading
|
|
5
|
+
import webbrowser
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
import uvicorn
|
|
9
|
+
from dotenv import load_dotenv
|
|
10
|
+
from mcp.server.fastmcp import FastMCP
|
|
11
|
+
|
|
12
|
+
from .config import settings
|
|
13
|
+
from .services.indexer import indexer
|
|
14
|
+
from .services.monitor import logger, monitor
|
|
15
|
+
from .web.app import app as web_app
|
|
16
|
+
|
|
17
|
+
# ── MCP Server ──────────────────────────────────────────────
|
|
18
|
+
mcp = FastMCP("Source-MCP Local RAG Server")
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@mcp.tool()
|
|
22
|
+
async def search_knowledge_base(query: str, limit: int = 5) -> str:
|
|
23
|
+
"""
|
|
24
|
+
Search for relevant context in the indexed documents.
|
|
25
|
+
|
|
26
|
+
Args:
|
|
27
|
+
query: The question or topic to search for.
|
|
28
|
+
limit: Maximum number of text chunks to return.
|
|
29
|
+
"""
|
|
30
|
+
logger.info(f"Received search query: {query}")
|
|
31
|
+
results = indexer.query(query, limit)
|
|
32
|
+
|
|
33
|
+
if not results:
|
|
34
|
+
return "No relevant information found in the local knowledge base."
|
|
35
|
+
|
|
36
|
+
formatted_results = "\n\n---\n\n".join(results)
|
|
37
|
+
return f"Found {len(results)} relevant chunks:\n\n{formatted_results}"
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
@mcp.tool()
|
|
41
|
+
async def get_index_stats() -> str:
|
|
42
|
+
"""Get current statistics about the vector index."""
|
|
43
|
+
stats = indexer.get_stats()
|
|
44
|
+
return str(stats)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
# ── Background services ────────────────────────────────────
|
|
48
|
+
def start_background_services():
|
|
49
|
+
logger.info("Starting background services...")
|
|
50
|
+
indexer.start_watching()
|
|
51
|
+
threading.Thread(target=indexer.index_directory, daemon=True).start()
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def run_dashboard():
|
|
55
|
+
"""Run the FastAPI dashboard in a separate thread."""
|
|
56
|
+
logger.info(f"Starting Dashboard at http://{settings.host}:{settings.web_port}")
|
|
57
|
+
config = uvicorn.Config(
|
|
58
|
+
web_app,
|
|
59
|
+
host=settings.host,
|
|
60
|
+
port=settings.web_port,
|
|
61
|
+
log_level="error",
|
|
62
|
+
)
|
|
63
|
+
server = uvicorn.Server(config)
|
|
64
|
+
server.run()
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
# ── CLI entry-point ─────────────────────────────────────────
|
|
68
|
+
def main():
|
|
69
|
+
parser = argparse.ArgumentParser(description="Source-MCP Server")
|
|
70
|
+
parser.add_argument("--path", type=str, help="Path to the documents directory")
|
|
71
|
+
parser.add_argument("--embed-model", type=str, help="HuggingFace embedding model name")
|
|
72
|
+
parser.add_argument("--web-port", type=int, help="Port for the Web Dashboard")
|
|
73
|
+
parser.add_argument("--no-browser", action="store_true", help="Don't auto-open browser")
|
|
74
|
+
|
|
75
|
+
args, _unknown = parser.parse_known_args()
|
|
76
|
+
|
|
77
|
+
# Determine project root
|
|
78
|
+
if args.path:
|
|
79
|
+
project_path = Path(args.path).resolve()
|
|
80
|
+
elif os.getenv("SOURCE_MCP_INDEX_DIR"):
|
|
81
|
+
project_path = Path(os.getenv("SOURCE_MCP_INDEX_DIR")).resolve()
|
|
82
|
+
else:
|
|
83
|
+
project_path = Path(".").resolve()
|
|
84
|
+
|
|
85
|
+
# Load environment from project root (override existing env vars)
|
|
86
|
+
env_path = project_path / ".env"
|
|
87
|
+
if env_path.exists():
|
|
88
|
+
logger.info(f"Loading environment from {env_path}")
|
|
89
|
+
load_dotenv(env_path, override=True)
|
|
90
|
+
|
|
91
|
+
# Update settings from environment (pydantic settings are already init, so we update manually)
|
|
92
|
+
settings.docs_path = str(project_path)
|
|
93
|
+
|
|
94
|
+
# Use project-local storage for index
|
|
95
|
+
sourcemcp_dir = project_path / ".source-mcp"
|
|
96
|
+
settings.zvec_path = str(sourcemcp_dir / "zvec_db")
|
|
97
|
+
|
|
98
|
+
# Update config from env vars if present
|
|
99
|
+
if os.getenv("EMBEDDING_PROVIDER"):
|
|
100
|
+
settings.embedding_provider = os.getenv("EMBEDDING_PROVIDER")
|
|
101
|
+
if os.getenv("EMBEDDING_MODEL"):
|
|
102
|
+
settings.embedding_model = os.getenv("EMBEDDING_MODEL")
|
|
103
|
+
if os.getenv("OPENAI_API_KEY"):
|
|
104
|
+
settings.openai_api_key = os.getenv("OPENAI_API_KEY")
|
|
105
|
+
if os.getenv("WEB_PORT"):
|
|
106
|
+
settings.web_port = int(os.getenv("WEB_PORT"))
|
|
107
|
+
|
|
108
|
+
# CLI overrides env
|
|
109
|
+
if args.embed_model:
|
|
110
|
+
settings.embedding_model = args.embed_model
|
|
111
|
+
if args.web_port:
|
|
112
|
+
settings.web_port = args.web_port
|
|
113
|
+
|
|
114
|
+
logger.info(f"Project path: {settings.docs_path}")
|
|
115
|
+
logger.info(f"Index path: {settings.zvec_path}")
|
|
116
|
+
|
|
117
|
+
# Ensure directories exist
|
|
118
|
+
Path(settings.docs_path).mkdir(parents=True, exist_ok=True)
|
|
119
|
+
Path(settings.zvec_path).mkdir(parents=True, exist_ok=True)
|
|
120
|
+
|
|
121
|
+
# Initialize indexer (delayed to avoid side-effects on import)
|
|
122
|
+
try:
|
|
123
|
+
indexer.configure()
|
|
124
|
+
indexer.initialize()
|
|
125
|
+
except Exception as e:
|
|
126
|
+
logger.error(f"Failed to initialize indexer: {e}")
|
|
127
|
+
sys.exit(1)
|
|
128
|
+
|
|
129
|
+
# Start dashboard
|
|
130
|
+
threading.Thread(target=run_dashboard, daemon=True).start()
|
|
131
|
+
|
|
132
|
+
# Auto-open browser (with delay for server startup)
|
|
133
|
+
if not args.no_browser:
|
|
134
|
+
def open_browser():
|
|
135
|
+
import time
|
|
136
|
+
time.sleep(1.5)
|
|
137
|
+
url = f"http://{settings.host}:{settings.web_port}"
|
|
138
|
+
logger.info(f"Opening dashboard in browser: {url}")
|
|
139
|
+
try:
|
|
140
|
+
webbrowser.open(url)
|
|
141
|
+
except Exception:
|
|
142
|
+
pass
|
|
143
|
+
|
|
144
|
+
threading.Thread(target=open_browser, daemon=True).start()
|
|
145
|
+
|
|
146
|
+
# Start indexer & watcher
|
|
147
|
+
start_background_services()
|
|
148
|
+
|
|
149
|
+
# Run MCP server (blocks)
|
|
150
|
+
try:
|
|
151
|
+
mcp.run(transport="stdio")
|
|
152
|
+
except KeyboardInterrupt:
|
|
153
|
+
logger.info("Server stopping...")
|
|
154
|
+
except Exception as e:
|
|
155
|
+
logger.error(f"MCP Server error: {e}")
|
|
156
|
+
sys.exit(1)
|
|
157
|
+
finally:
|
|
158
|
+
logger.info("Stopping indexer watcher...")
|
|
159
|
+
indexer.stop_watching()
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
if __name__ == "__main__":
|
|
163
|
+
main()
|
|
File without changes
|