wet-mcp 1.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- wet_mcp-1.0.0/.gitignore +53 -0
- wet_mcp-1.0.0/LICENSE +21 -0
- wet_mcp-1.0.0/PKG-INFO +182 -0
- wet_mcp-1.0.0/README.md +152 -0
- wet_mcp-1.0.0/pyproject.toml +89 -0
- wet_mcp-1.0.0/src/wet_mcp/__init__.py +6 -0
- wet_mcp-1.0.0/src/wet_mcp/__main__.py +6 -0
- wet_mcp-1.0.0/src/wet_mcp/config.py +32 -0
- wet_mcp-1.0.0/src/wet_mcp/docker_manager.py +146 -0
- wet_mcp-1.0.0/src/wet_mcp/docs/__init__.py +1 -0
- wet_mcp-1.0.0/src/wet_mcp/docs/help.md +55 -0
- wet_mcp-1.0.0/src/wet_mcp/docs/media.md +58 -0
- wet_mcp-1.0.0/src/wet_mcp/docs/web.md +73 -0
- wet_mcp-1.0.0/src/wet_mcp/searxng_settings.yml +30 -0
- wet_mcp-1.0.0/src/wet_mcp/server.py +172 -0
- wet_mcp-1.0.0/src/wet_mcp/setup.py +94 -0
- wet_mcp-1.0.0/src/wet_mcp/sources/__init__.py +1 -0
- wet_mcp-1.0.0/src/wet_mcp/sources/crawler.py +296 -0
- wet_mcp-1.0.0/src/wet_mcp/sources/searxng.py +72 -0
wet_mcp-1.0.0/.gitignore
ADDED
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
# Python
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
*$py.class
|
|
5
|
+
*.so
|
|
6
|
+
.Python
|
|
7
|
+
build/
|
|
8
|
+
develop-eggs/
|
|
9
|
+
dist/
|
|
10
|
+
downloads/
|
|
11
|
+
eggs/
|
|
12
|
+
.eggs/
|
|
13
|
+
lib/
|
|
14
|
+
lib64/
|
|
15
|
+
parts/
|
|
16
|
+
sdist/
|
|
17
|
+
var/
|
|
18
|
+
wheels/
|
|
19
|
+
*.egg-info/
|
|
20
|
+
.installed.cfg
|
|
21
|
+
*.egg
|
|
22
|
+
|
|
23
|
+
# Virtual environments
|
|
24
|
+
.venv/
|
|
25
|
+
venv/
|
|
26
|
+
ENV/
|
|
27
|
+
|
|
28
|
+
# IDE
|
|
29
|
+
.idea/
|
|
30
|
+
*.swp
|
|
31
|
+
*.swo
|
|
32
|
+
.cursor/
|
|
33
|
+
|
|
34
|
+
# Testing
|
|
35
|
+
.pytest_cache/
|
|
36
|
+
.coverage
|
|
37
|
+
htmlcov/
|
|
38
|
+
.tox/
|
|
39
|
+
.nox/
|
|
40
|
+
|
|
41
|
+
# Misc
|
|
42
|
+
.DS_Store
|
|
43
|
+
*.log
|
|
44
|
+
.env
|
|
45
|
+
.env.local
|
|
46
|
+
*.bak
|
|
47
|
+
|
|
48
|
+
# Ruff
|
|
49
|
+
.ruff_cache/
|
|
50
|
+
|
|
51
|
+
# Build artifacts
|
|
52
|
+
*.whl
|
|
53
|
+
node_modules/
|
wet_mcp-1.0.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 n24q02m
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
wet_mcp-1.0.0/PKG-INFO
ADDED
|
@@ -0,0 +1,182 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: wet-mcp
|
|
3
|
+
Version: 1.0.0
|
|
4
|
+
Summary: Open-source MCP Server thay thế Tavily - Web search, extract, crawl với SearXNG
|
|
5
|
+
Project-URL: Homepage, https://github.com/n24q02m/wet-mcp
|
|
6
|
+
Project-URL: Repository, https://github.com/n24q02m/wet-mcp.git
|
|
7
|
+
Project-URL: Issues, https://github.com/n24q02m/wet-mcp/issues
|
|
8
|
+
Author-email: n24q02m <quangminh2422004@gmail.com>
|
|
9
|
+
License: MIT
|
|
10
|
+
License-File: LICENSE
|
|
11
|
+
Keywords: crawl4ai,mcp,searxng,tavily-alternative,web-scraping
|
|
12
|
+
Classifier: Development Status :: 3 - Alpha
|
|
13
|
+
Classifier: Environment :: Console
|
|
14
|
+
Classifier: Intended Audience :: Developers
|
|
15
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
16
|
+
Classifier: Operating System :: OS Independent
|
|
17
|
+
Classifier: Programming Language :: Python :: 3
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
19
|
+
Classifier: Topic :: Internet :: WWW/HTTP
|
|
20
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
21
|
+
Requires-Python: ==3.13.*
|
|
22
|
+
Requires-Dist: crawl4ai>=0.8.0
|
|
23
|
+
Requires-Dist: httpx>=0.27.0
|
|
24
|
+
Requires-Dist: loguru>=0.7.0
|
|
25
|
+
Requires-Dist: mcp[cli]>=1.0.0
|
|
26
|
+
Requires-Dist: pydantic-settings>=2.0.0
|
|
27
|
+
Requires-Dist: pydantic>=2.0.0
|
|
28
|
+
Requires-Dist: python-on-whales>=0.73.0
|
|
29
|
+
Description-Content-Type: text/markdown
|
|
30
|
+
|
|
31
|
+
# WET - Web ExTract MCP Server
|
|
32
|
+
|
|
33
|
+
[](https://badge.fury.io/py/wet-mcp)
|
|
34
|
+
[](https://opensource.org/licenses/MIT)
|
|
35
|
+
|
|
36
|
+
> **Open-source MCP Server thay thế Tavily cho web scraping & multimodal extraction**
|
|
37
|
+
|
|
38
|
+
Zero-install experience: chỉ cần `uvx wet-mcp` - tự động setup và quản lý SearXNG container.
|
|
39
|
+
|
|
40
|
+
## Features
|
|
41
|
+
|
|
42
|
+
| Feature | Description |
|
|
43
|
+
|:--------|:------------|
|
|
44
|
+
| **Web Search** | Tìm kiếm qua SearXNG (metasearch: Google, Bing, DuckDuckGo, Brave) |
|
|
45
|
+
| **Content Extract** | Trích xuất nội dung sạch (Markdown/Text/HTML) |
|
|
46
|
+
| **Deep Crawl** | Đi qua nhiều trang con từ URL gốc với depth control |
|
|
47
|
+
| **Site Map** | Khám phá cấu trúc URL của website |
|
|
48
|
+
| **Media** | List và download images, videos, audio files |
|
|
49
|
+
| **Anti-bot** | Stealth mode bypass Cloudflare, Medium, LinkedIn, Twitter |
|
|
50
|
+
|
|
51
|
+
## Quick Start
|
|
52
|
+
|
|
53
|
+
### Prerequisites
|
|
54
|
+
|
|
55
|
+
- Docker daemon running (for SearXNG)
|
|
56
|
+
- Python 3.13+ (hoặc dùng uvx)
|
|
57
|
+
|
|
58
|
+
### MCP Client Configuration
|
|
59
|
+
|
|
60
|
+
**Claude Desktop / Cursor / Windsurf / Antigravity:**
|
|
61
|
+
|
|
62
|
+
```json
|
|
63
|
+
{
|
|
64
|
+
"mcpServers": {
|
|
65
|
+
"wet": {
|
|
66
|
+
"command": "uvx",
|
|
67
|
+
"args": ["wet-mcp"]
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
**Đó là tất cả!** Khi MCP client gọi wet-mcp lần đầu:
|
|
74
|
+
1. Tự động install Playwright chromium
|
|
75
|
+
2. Tự động pull SearXNG Docker image
|
|
76
|
+
3. Start `wet-searxng` container
|
|
77
|
+
4. Chạy MCP server
|
|
78
|
+
|
|
79
|
+
### Without uvx
|
|
80
|
+
|
|
81
|
+
```bash
|
|
82
|
+
pip install wet-mcp
|
|
83
|
+
wet-mcp
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
## Tools
|
|
87
|
+
|
|
88
|
+
| Tool | Actions | Description |
|
|
89
|
+
|:-----|:--------|:------------|
|
|
90
|
+
| `web` | search, extract, crawl, map | Web operations |
|
|
91
|
+
| `media` | list, download | Media discovery & download |
|
|
92
|
+
| `help` | - | Full documentation |
|
|
93
|
+
|
|
94
|
+
### Examples
|
|
95
|
+
|
|
96
|
+
```python
|
|
97
|
+
# Search
|
|
98
|
+
{"action": "search", "query": "python web scraping", "max_results": 10}
|
|
99
|
+
|
|
100
|
+
# Extract content
|
|
101
|
+
{"action": "extract", "urls": ["https://example.com"]}
|
|
102
|
+
|
|
103
|
+
# Crawl with depth
|
|
104
|
+
{"action": "crawl", "urls": ["https://docs.python.org"], "depth": 2}
|
|
105
|
+
|
|
106
|
+
# Map site structure
|
|
107
|
+
{"action": "map", "urls": ["https://example.com"]}
|
|
108
|
+
|
|
109
|
+
# List media
|
|
110
|
+
{"action": "list", "url": "https://github.com/python/cpython"}
|
|
111
|
+
|
|
112
|
+
# Download media
|
|
113
|
+
{"action": "download", "media_urls": ["https://example.com/image.png"]}
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
## Tech Stack
|
|
117
|
+
|
|
118
|
+
| Component | Technology |
|
|
119
|
+
|:----------|:-----------|
|
|
120
|
+
| Language | Python 3.13 |
|
|
121
|
+
| MCP Framework | FastMCP |
|
|
122
|
+
| Web Search | SearXNG (auto-managed Docker) |
|
|
123
|
+
| Web Crawling | Crawl4AI |
|
|
124
|
+
| Docker Management | python-on-whales |
|
|
125
|
+
|
|
126
|
+
## How It Works
|
|
127
|
+
|
|
128
|
+
```
|
|
129
|
+
┌─────────────────────────────────────────────────────────┐
|
|
130
|
+
│ MCP Client │
|
|
131
|
+
│ (Claude, Cursor, Windsurf) │
|
|
132
|
+
└─────────────────────┬───────────────────────────────────┘
|
|
133
|
+
│ MCP Protocol
|
|
134
|
+
▼
|
|
135
|
+
┌─────────────────────────────────────────────────────────┐
|
|
136
|
+
│ WET MCP Server │
|
|
137
|
+
│ ┌──────────┐ ┌──────────┐ ┌──────────────────────┐ │
|
|
138
|
+
│ │ web │ │ media │ │ help │ │
|
|
139
|
+
│ │ (search, │ │ (list, │ │ (full documentation)│ │
|
|
140
|
+
│ │ extract, │ │ download)│ └──────────────────────┘ │
|
|
141
|
+
│ │ crawl, │ └────┬─────┘ │
|
|
142
|
+
│ │ map) │ │ │
|
|
143
|
+
│ └────┬─────┘ │ │
|
|
144
|
+
│ │ │ │
|
|
145
|
+
│ ▼ ▼ │
|
|
146
|
+
│ ┌──────────┐ ┌──────────┐ │
|
|
147
|
+
│ │ SearXNG │ │ Crawl4AI │ │
|
|
148
|
+
│ │ (Docker) │ │(Playwright)│ │
|
|
149
|
+
│ └──────────┘ └──────────┘ │
|
|
150
|
+
└─────────────────────────────────────────────────────────┘
|
|
151
|
+
```
|
|
152
|
+
|
|
153
|
+
## Configuration
|
|
154
|
+
|
|
155
|
+
Environment variables:
|
|
156
|
+
|
|
157
|
+
| Variable | Default | Description |
|
|
158
|
+
|:---------|:--------|:------------|
|
|
159
|
+
| `WET_AUTO_DOCKER` | `true` | Auto-manage SearXNG container |
|
|
160
|
+
| `WET_SEARXNG_PORT` | `8080` | SearXNG container port |
|
|
161
|
+
| `SEARXNG_URL` | `http://localhost:8080` | External SearXNG URL |
|
|
162
|
+
| `LOG_LEVEL` | `INFO` | Logging level |
|
|
163
|
+
|
|
164
|
+
## Container Management
|
|
165
|
+
|
|
166
|
+
```bash
|
|
167
|
+
# View SearXNG logs
|
|
168
|
+
docker logs wet-searxng
|
|
169
|
+
|
|
170
|
+
# Stop SearXNG
|
|
171
|
+
docker stop wet-searxng
|
|
172
|
+
|
|
173
|
+
# Remove container (will be recreated on next run)
|
|
174
|
+
docker rm wet-searxng
|
|
175
|
+
|
|
176
|
+
# Reset auto-setup (forces re-install Playwright)
|
|
177
|
+
rm ~/.wet-mcp/.setup-complete
|
|
178
|
+
```
|
|
179
|
+
|
|
180
|
+
## License
|
|
181
|
+
|
|
182
|
+
MIT License
|
wet_mcp-1.0.0/README.md
ADDED
|
@@ -0,0 +1,152 @@
|
|
|
1
|
+
# WET - Web ExTract MCP Server
|
|
2
|
+
|
|
3
|
+
[](https://badge.fury.io/py/wet-mcp)
|
|
4
|
+
[](https://opensource.org/licenses/MIT)
|
|
5
|
+
|
|
6
|
+
> **Open-source MCP Server thay thế Tavily cho web scraping & multimodal extraction**
|
|
7
|
+
|
|
8
|
+
Zero-install experience: chỉ cần `uvx wet-mcp` - tự động setup và quản lý SearXNG container.
|
|
9
|
+
|
|
10
|
+
## Features
|
|
11
|
+
|
|
12
|
+
| Feature | Description |
|
|
13
|
+
|:--------|:------------|
|
|
14
|
+
| **Web Search** | Tìm kiếm qua SearXNG (metasearch: Google, Bing, DuckDuckGo, Brave) |
|
|
15
|
+
| **Content Extract** | Trích xuất nội dung sạch (Markdown/Text/HTML) |
|
|
16
|
+
| **Deep Crawl** | Đi qua nhiều trang con từ URL gốc với depth control |
|
|
17
|
+
| **Site Map** | Khám phá cấu trúc URL của website |
|
|
18
|
+
| **Media** | List và download images, videos, audio files |
|
|
19
|
+
| **Anti-bot** | Stealth mode bypass Cloudflare, Medium, LinkedIn, Twitter |
|
|
20
|
+
|
|
21
|
+
## Quick Start
|
|
22
|
+
|
|
23
|
+
### Prerequisites
|
|
24
|
+
|
|
25
|
+
- Docker daemon running (for SearXNG)
|
|
26
|
+
- Python 3.13+ (hoặc dùng uvx)
|
|
27
|
+
|
|
28
|
+
### MCP Client Configuration
|
|
29
|
+
|
|
30
|
+
**Claude Desktop / Cursor / Windsurf / Antigravity:**
|
|
31
|
+
|
|
32
|
+
```json
|
|
33
|
+
{
|
|
34
|
+
"mcpServers": {
|
|
35
|
+
"wet": {
|
|
36
|
+
"command": "uvx",
|
|
37
|
+
"args": ["wet-mcp"]
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
**Đó là tất cả!** Khi MCP client gọi wet-mcp lần đầu:
|
|
44
|
+
1. Tự động install Playwright chromium
|
|
45
|
+
2. Tự động pull SearXNG Docker image
|
|
46
|
+
3. Start `wet-searxng` container
|
|
47
|
+
4. Chạy MCP server
|
|
48
|
+
|
|
49
|
+
### Without uvx
|
|
50
|
+
|
|
51
|
+
```bash
|
|
52
|
+
pip install wet-mcp
|
|
53
|
+
wet-mcp
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
## Tools
|
|
57
|
+
|
|
58
|
+
| Tool | Actions | Description |
|
|
59
|
+
|:-----|:--------|:------------|
|
|
60
|
+
| `web` | search, extract, crawl, map | Web operations |
|
|
61
|
+
| `media` | list, download | Media discovery & download |
|
|
62
|
+
| `help` | - | Full documentation |
|
|
63
|
+
|
|
64
|
+
### Examples
|
|
65
|
+
|
|
66
|
+
```python
|
|
67
|
+
# Search
|
|
68
|
+
{"action": "search", "query": "python web scraping", "max_results": 10}
|
|
69
|
+
|
|
70
|
+
# Extract content
|
|
71
|
+
{"action": "extract", "urls": ["https://example.com"]}
|
|
72
|
+
|
|
73
|
+
# Crawl with depth
|
|
74
|
+
{"action": "crawl", "urls": ["https://docs.python.org"], "depth": 2}
|
|
75
|
+
|
|
76
|
+
# Map site structure
|
|
77
|
+
{"action": "map", "urls": ["https://example.com"]}
|
|
78
|
+
|
|
79
|
+
# List media
|
|
80
|
+
{"action": "list", "url": "https://github.com/python/cpython"}
|
|
81
|
+
|
|
82
|
+
# Download media
|
|
83
|
+
{"action": "download", "media_urls": ["https://example.com/image.png"]}
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
## Tech Stack
|
|
87
|
+
|
|
88
|
+
| Component | Technology |
|
|
89
|
+
|:----------|:-----------|
|
|
90
|
+
| Language | Python 3.13 |
|
|
91
|
+
| MCP Framework | FastMCP |
|
|
92
|
+
| Web Search | SearXNG (auto-managed Docker) |
|
|
93
|
+
| Web Crawling | Crawl4AI |
|
|
94
|
+
| Docker Management | python-on-whales |
|
|
95
|
+
|
|
96
|
+
## How It Works
|
|
97
|
+
|
|
98
|
+
```
|
|
99
|
+
┌─────────────────────────────────────────────────────────┐
|
|
100
|
+
│ MCP Client │
|
|
101
|
+
│ (Claude, Cursor, Windsurf) │
|
|
102
|
+
└─────────────────────┬───────────────────────────────────┘
|
|
103
|
+
│ MCP Protocol
|
|
104
|
+
▼
|
|
105
|
+
┌─────────────────────────────────────────────────────────┐
|
|
106
|
+
│ WET MCP Server │
|
|
107
|
+
│ ┌──────────┐ ┌──────────┐ ┌──────────────────────┐ │
|
|
108
|
+
│ │ web │ │ media │ │ help │ │
|
|
109
|
+
│ │ (search, │ │ (list, │ │ (full documentation)│ │
|
|
110
|
+
│ │ extract, │ │ download)│ └──────────────────────┘ │
|
|
111
|
+
│ │ crawl, │ └────┬─────┘ │
|
|
112
|
+
│ │ map) │ │ │
|
|
113
|
+
│ └────┬─────┘ │ │
|
|
114
|
+
│ │ │ │
|
|
115
|
+
│ ▼ ▼ │
|
|
116
|
+
│ ┌──────────┐ ┌──────────┐ │
|
|
117
|
+
│ │ SearXNG │ │ Crawl4AI │ │
|
|
118
|
+
│ │ (Docker) │ │(Playwright)│ │
|
|
119
|
+
│ └──────────┘ └──────────┘ │
|
|
120
|
+
└─────────────────────────────────────────────────────────┘
|
|
121
|
+
```
|
|
122
|
+
|
|
123
|
+
## Configuration
|
|
124
|
+
|
|
125
|
+
Environment variables:
|
|
126
|
+
|
|
127
|
+
| Variable | Default | Description |
|
|
128
|
+
|:---------|:--------|:------------|
|
|
129
|
+
| `WET_AUTO_DOCKER` | `true` | Auto-manage SearXNG container |
|
|
130
|
+
| `WET_SEARXNG_PORT` | `8080` | SearXNG container port |
|
|
131
|
+
| `SEARXNG_URL` | `http://localhost:8080` | External SearXNG URL |
|
|
132
|
+
| `LOG_LEVEL` | `INFO` | Logging level |
|
|
133
|
+
|
|
134
|
+
## Container Management
|
|
135
|
+
|
|
136
|
+
```bash
|
|
137
|
+
# View SearXNG logs
|
|
138
|
+
docker logs wet-searxng
|
|
139
|
+
|
|
140
|
+
# Stop SearXNG
|
|
141
|
+
docker stop wet-searxng
|
|
142
|
+
|
|
143
|
+
# Remove container (will be recreated on next run)
|
|
144
|
+
docker rm wet-searxng
|
|
145
|
+
|
|
146
|
+
# Reset auto-setup (forces re-install Playwright)
|
|
147
|
+
rm ~/.wet-mcp/.setup-complete
|
|
148
|
+
```
|
|
149
|
+
|
|
150
|
+
## License
|
|
151
|
+
|
|
152
|
+
MIT License
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "wet-mcp"
|
|
3
|
+
version = "1.0.0"
|
|
4
|
+
description = "Open-source MCP Server thay thế Tavily - Web search, extract, crawl với SearXNG"
|
|
5
|
+
readme = "README.md"
|
|
6
|
+
license = { text = "MIT" }
|
|
7
|
+
authors = [{ name = "n24q02m", email = "quangminh2422004@gmail.com" }]
|
|
8
|
+
keywords = ["mcp", "web-scraping", "searxng", "crawl4ai", "tavily-alternative"]
|
|
9
|
+
classifiers = [
|
|
10
|
+
"Development Status :: 3 - Alpha",
|
|
11
|
+
"Environment :: Console",
|
|
12
|
+
"Intended Audience :: Developers",
|
|
13
|
+
"License :: OSI Approved :: MIT License",
|
|
14
|
+
"Operating System :: OS Independent",
|
|
15
|
+
"Programming Language :: Python :: 3",
|
|
16
|
+
"Programming Language :: Python :: 3.13",
|
|
17
|
+
"Topic :: Internet :: WWW/HTTP",
|
|
18
|
+
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
|
19
|
+
]
|
|
20
|
+
requires-python = "==3.13.*"
|
|
21
|
+
dependencies = [
|
|
22
|
+
# MCP Server
|
|
23
|
+
"mcp[cli]>=1.0.0",
|
|
24
|
+
# Docker Management
|
|
25
|
+
"python-on-whales>=0.73.0",
|
|
26
|
+
# Web Crawling
|
|
27
|
+
"crawl4ai>=0.8.0",
|
|
28
|
+
# HTTP Client
|
|
29
|
+
"httpx>=0.27.0",
|
|
30
|
+
# Config
|
|
31
|
+
"pydantic>=2.0.0",
|
|
32
|
+
"pydantic-settings>=2.0.0",
|
|
33
|
+
# Logging
|
|
34
|
+
"loguru>=0.7.0",
|
|
35
|
+
]
|
|
36
|
+
|
|
37
|
+
[dependency-groups]
|
|
38
|
+
dev = [
|
|
39
|
+
"pytest>=8.0.0",
|
|
40
|
+
"pytest-asyncio>=0.25.0",
|
|
41
|
+
"ruff>=0.8.0",
|
|
42
|
+
"ty>=0.0.1a10",
|
|
43
|
+
"pre-commit>=4.0.0",
|
|
44
|
+
]
|
|
45
|
+
|
|
46
|
+
[project.scripts]
|
|
47
|
+
wet-mcp = "wet_mcp:main"
|
|
48
|
+
|
|
49
|
+
[project.urls]
|
|
50
|
+
Homepage = "https://github.com/n24q02m/wet-mcp"
|
|
51
|
+
Repository = "https://github.com/n24q02m/wet-mcp.git"
|
|
52
|
+
Issues = "https://github.com/n24q02m/wet-mcp/issues"
|
|
53
|
+
|
|
54
|
+
[build-system]
|
|
55
|
+
requires = ["hatchling"]
|
|
56
|
+
build-backend = "hatchling.build"
|
|
57
|
+
|
|
58
|
+
[tool.hatch.build.targets.wheel]
|
|
59
|
+
packages = ["src/wet_mcp"]
|
|
60
|
+
|
|
61
|
+
[tool.hatch.build.targets.wheel.sources]
|
|
62
|
+
"src" = ""
|
|
63
|
+
|
|
64
|
+
[tool.hatch.build.targets.sdist]
|
|
65
|
+
include = [
|
|
66
|
+
"src/wet_mcp/**/*.py",
|
|
67
|
+
"src/wet_mcp/**/*.yml",
|
|
68
|
+
"src/wet_mcp/**/*.md",
|
|
69
|
+
]
|
|
70
|
+
|
|
71
|
+
[tool.pytest.ini_options]
|
|
72
|
+
asyncio_mode = "auto"
|
|
73
|
+
asyncio_default_fixture_loop_scope = "function"
|
|
74
|
+
testpaths = ["tests"]
|
|
75
|
+
python_files = ["test_*.py"]
|
|
76
|
+
|
|
77
|
+
[tool.ty]
|
|
78
|
+
rules = { unresolved-import = "ignore", unresolved-attribute = "ignore", possibly-missing-attribute = "ignore" }
|
|
79
|
+
|
|
80
|
+
[tool.ruff]
|
|
81
|
+
line-length = 88
|
|
82
|
+
target-version = "py313"
|
|
83
|
+
|
|
84
|
+
[tool.ruff.lint]
|
|
85
|
+
select = ["E", "F", "W", "I", "UP", "B", "C4"]
|
|
86
|
+
ignore = ["E501"]
|
|
87
|
+
|
|
88
|
+
[tool.ruff.format]
|
|
89
|
+
quote-style = "double"
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
"""Configuration settings for WET MCP Server."""
|
|
2
|
+
|
|
3
|
+
from pydantic_settings import BaseSettings
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class Settings(BaseSettings):
|
|
7
|
+
"""WET MCP Server configuration."""
|
|
8
|
+
|
|
9
|
+
# SearXNG
|
|
10
|
+
searxng_url: str = "http://localhost:8080"
|
|
11
|
+
searxng_timeout: int = 30
|
|
12
|
+
|
|
13
|
+
# Crawler
|
|
14
|
+
crawler_headless: bool = True
|
|
15
|
+
crawler_timeout: int = 60
|
|
16
|
+
|
|
17
|
+
# Docker Management
|
|
18
|
+
wet_auto_docker: bool = True
|
|
19
|
+
wet_container_name: str = "wet-searxng"
|
|
20
|
+
wet_searxng_image: str = "searxng/searxng:latest"
|
|
21
|
+
wet_searxng_port: int = 8080
|
|
22
|
+
|
|
23
|
+
# Media
|
|
24
|
+
download_dir: str = "~/.wet-mcp/downloads"
|
|
25
|
+
|
|
26
|
+
# Logging
|
|
27
|
+
log_level: str = "INFO"
|
|
28
|
+
|
|
29
|
+
model_config = {"env_prefix": "", "case_sensitive": False}
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
settings = Settings()
|
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
"""Docker container management for SearXNG."""
|
|
2
|
+
|
|
3
|
+
import socket
|
|
4
|
+
from importlib.resources import files
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
from loguru import logger
|
|
8
|
+
|
|
9
|
+
from wet_mcp.config import settings
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def _find_available_port(start_port: int, max_tries: int = 10) -> int:
|
|
13
|
+
"""Find an available port starting from start_port."""
|
|
14
|
+
for offset in range(max_tries):
|
|
15
|
+
port = start_port + offset
|
|
16
|
+
try:
|
|
17
|
+
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
|
|
18
|
+
s.bind(("localhost", port))
|
|
19
|
+
return port
|
|
20
|
+
except OSError:
|
|
21
|
+
continue
|
|
22
|
+
# Fallback to original port
|
|
23
|
+
return start_port
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def _get_settings_path() -> Path:
|
|
27
|
+
"""Get path to SearXNG settings file.
|
|
28
|
+
|
|
29
|
+
Copies bundled settings.yml to user config directory for Docker mounting.
|
|
30
|
+
Uses ~/.wet-mcp/ which is typically shared with Docker.
|
|
31
|
+
"""
|
|
32
|
+
config_dir = Path.home() / ".wet-mcp"
|
|
33
|
+
config_dir.mkdir(parents=True, exist_ok=True)
|
|
34
|
+
|
|
35
|
+
settings_file = config_dir / "searxng_settings.yml"
|
|
36
|
+
|
|
37
|
+
# Copy bundled settings if not exists
|
|
38
|
+
if not settings_file.exists():
|
|
39
|
+
bundled = files("wet_mcp").joinpath("searxng_settings.yml")
|
|
40
|
+
settings_file.write_text(bundled.read_text())
|
|
41
|
+
logger.debug(f"Copied SearXNG settings to: {settings_file}")
|
|
42
|
+
|
|
43
|
+
return settings_file
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def ensure_searxng() -> str:
|
|
47
|
+
"""Start SearXNG container if not running. Returns URL.
|
|
48
|
+
|
|
49
|
+
This function handles:
|
|
50
|
+
- Automatic container creation if it doesn't exist
|
|
51
|
+
- Port conflict resolution (tries next available port)
|
|
52
|
+
- SearXNG configuration for JSON API format via settings.yml mount
|
|
53
|
+
- Graceful fallback to external SearXNG URL if Docker unavailable
|
|
54
|
+
"""
|
|
55
|
+
if not settings.wet_auto_docker:
|
|
56
|
+
logger.info("Auto Docker disabled, using external SearXNG")
|
|
57
|
+
return settings.searxng_url
|
|
58
|
+
|
|
59
|
+
try:
|
|
60
|
+
from python_on_whales import DockerException, docker
|
|
61
|
+
except ImportError:
|
|
62
|
+
logger.warning("python-on-whales not installed, using external SearXNG")
|
|
63
|
+
return settings.searxng_url
|
|
64
|
+
|
|
65
|
+
container_name = settings.wet_container_name
|
|
66
|
+
image = settings.wet_searxng_image
|
|
67
|
+
preferred_port = settings.wet_searxng_port
|
|
68
|
+
|
|
69
|
+
try:
|
|
70
|
+
if docker.container.exists(container_name):
|
|
71
|
+
container = docker.container.inspect(container_name)
|
|
72
|
+
if container.state.running:
|
|
73
|
+
logger.debug(f"SearXNG container already running: {container_name}")
|
|
74
|
+
# Extract port from running container
|
|
75
|
+
ports = container.network_settings.ports
|
|
76
|
+
if ports and "8080/tcp" in ports and ports["8080/tcp"]:
|
|
77
|
+
port = int(ports["8080/tcp"][0].get("HostPort", preferred_port))
|
|
78
|
+
else:
|
|
79
|
+
port = preferred_port
|
|
80
|
+
else:
|
|
81
|
+
logger.info(f"Starting stopped container: {container_name}")
|
|
82
|
+
docker.container.start(container_name)
|
|
83
|
+
port = preferred_port
|
|
84
|
+
else:
|
|
85
|
+
# Find available port to avoid conflicts
|
|
86
|
+
port = _find_available_port(preferred_port)
|
|
87
|
+
if port != preferred_port:
|
|
88
|
+
logger.info(f"Port {preferred_port} in use, using {port}")
|
|
89
|
+
|
|
90
|
+
# Get settings file path
|
|
91
|
+
settings_path = _get_settings_path()
|
|
92
|
+
|
|
93
|
+
logger.info(f"Starting SearXNG container: {container_name}")
|
|
94
|
+
docker.run(
|
|
95
|
+
image,
|
|
96
|
+
name=container_name,
|
|
97
|
+
detach=True,
|
|
98
|
+
publish=[(port, 8080)],
|
|
99
|
+
volumes=[(str(settings_path), "/etc/searxng/settings.yml", "ro")],
|
|
100
|
+
envs={
|
|
101
|
+
"SEARXNG_SECRET": "wet-internal",
|
|
102
|
+
},
|
|
103
|
+
)
|
|
104
|
+
logger.info(f"SearXNG container started on port {port}")
|
|
105
|
+
|
|
106
|
+
return f"http://localhost:{port}"
|
|
107
|
+
|
|
108
|
+
except DockerException as e:
|
|
109
|
+
logger.warning(f"Docker not available: {e}")
|
|
110
|
+
logger.warning("Falling back to external SearXNG URL")
|
|
111
|
+
return settings.searxng_url
|
|
112
|
+
except Exception as e:
|
|
113
|
+
logger.error(f"Failed to start SearXNG: {e}")
|
|
114
|
+
return settings.searxng_url
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def stop_searxng() -> None:
|
|
118
|
+
"""Stop SearXNG container if running."""
|
|
119
|
+
if not settings.wet_auto_docker:
|
|
120
|
+
return
|
|
121
|
+
|
|
122
|
+
try:
|
|
123
|
+
from python_on_whales import docker
|
|
124
|
+
|
|
125
|
+
container_name = settings.wet_container_name
|
|
126
|
+
if docker.container.exists(container_name):
|
|
127
|
+
logger.info(f"Stopping container: {container_name}")
|
|
128
|
+
docker.container.stop(container_name)
|
|
129
|
+
except Exception as e:
|
|
130
|
+
logger.debug(f"Failed to stop container: {e}")
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def remove_searxng() -> None:
|
|
134
|
+
"""Stop and remove SearXNG container."""
|
|
135
|
+
if not settings.wet_auto_docker:
|
|
136
|
+
return
|
|
137
|
+
|
|
138
|
+
try:
|
|
139
|
+
from python_on_whales import docker
|
|
140
|
+
|
|
141
|
+
container_name = settings.wet_container_name
|
|
142
|
+
if docker.container.exists(container_name):
|
|
143
|
+
logger.info(f"Removing container: {container_name}")
|
|
144
|
+
docker.container.remove(container_name, force=True)
|
|
145
|
+
except Exception as e:
|
|
146
|
+
logger.debug(f"Failed to remove container: {e}")
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Docs package for WET MCP Server."""
|