web-research-mcp 1.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- web_research_mcp-1.0.0/.github/workflows/mcp-smithery-publish.yml +40 -0
- web_research_mcp-1.0.0/.github/workflows/test.yml +31 -0
- web_research_mcp-1.0.0/.gitignore +10 -0
- web_research_mcp-1.0.0/.mcp.json +128 -0
- web_research_mcp-1.0.0/.well-known/mcp/server-card.json +65 -0
- web_research_mcp-1.0.0/CODE_OF_CONDUCT.md +18 -0
- web_research_mcp-1.0.0/CONTRIBUTING.md +21 -0
- web_research_mcp-1.0.0/Dockerfile.glama +20 -0
- web_research_mcp-1.0.0/LICENSE +21 -0
- web_research_mcp-1.0.0/PKG-INFO +39 -0
- web_research_mcp-1.0.0/README.md +59 -0
- web_research_mcp-1.0.0/SECURITY.md +16 -0
- web_research_mcp-1.0.0/glama.json +10 -0
- web_research_mcp-1.0.0/mcp-wrapper.py +85 -0
- web_research_mcp-1.0.0/package.json +34 -0
- web_research_mcp-1.0.0/pyproject.toml +27 -0
- web_research_mcp-1.0.0/pytest.ini +3 -0
- web_research_mcp-1.0.0/server.py +428 -0
- web_research_mcp-1.0.0/smithery.yaml +67 -0
- web_research_mcp-1.0.0/tests/test_server.py +55 -0
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
name: Publish to Smithery
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
release:
|
|
5
|
+
types: [published]
|
|
6
|
+
|
|
7
|
+
permissions: {}
|
|
8
|
+
|
|
9
|
+
jobs:
|
|
10
|
+
publish:
|
|
11
|
+
name: Publish MCP Server to Smithery
|
|
12
|
+
runs-on: ubuntu-latest
|
|
13
|
+
permissions:
|
|
14
|
+
contents: read
|
|
15
|
+
attestations: write
|
|
16
|
+
id-token: write
|
|
17
|
+
steps:
|
|
18
|
+
- name: Checkout repository
|
|
19
|
+
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
|
20
|
+
with:
|
|
21
|
+
persist-credentials: false
|
|
22
|
+
|
|
23
|
+
- name: Setup Node.js
|
|
24
|
+
uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4.4.0
|
|
25
|
+
with:
|
|
26
|
+
node-version: '22'
|
|
27
|
+
|
|
28
|
+
- name: Publish to Smithery
|
|
29
|
+
id: smithery_publish
|
|
30
|
+
env:
|
|
31
|
+
SMITHERY_API_KEY: ${{ secrets.SMITHERY_API_KEY }}
|
|
32
|
+
run: |
|
|
33
|
+
npx @smithery/cli mcp publish "https://github.com/${{ github.repository }}" -n nicholastempleman/${{ github.event.repository.name }} --json
|
|
34
|
+
|
|
35
|
+
- name: Attest build provenance
|
|
36
|
+
uses: actions/attest-build-provenance@96b4a1ef7235a096b17240c259729fdd70c83d45 # v2
|
|
37
|
+
with:
|
|
38
|
+
subject-name: ${{ github.repository }}
|
|
39
|
+
subject-digest: sha256:${{ github.sha }}
|
|
40
|
+
push-to-registry: false
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
name: Test MCP Server
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [main]
|
|
6
|
+
pull_request:
|
|
7
|
+
branches: [main]
|
|
8
|
+
|
|
9
|
+
jobs:
|
|
10
|
+
test:
|
|
11
|
+
runs-on: ubuntu-latest
|
|
12
|
+
strategy:
|
|
13
|
+
matrix:
|
|
14
|
+
python-version: ["3.10", "3.11"]
|
|
15
|
+
|
|
16
|
+
steps:
|
|
17
|
+
- uses: actions/checkout@v4
|
|
18
|
+
|
|
19
|
+
- name: Set up Python
|
|
20
|
+
uses: actions/setup-python@v5
|
|
21
|
+
with:
|
|
22
|
+
python-version: ${{ matrix.python-version }}
|
|
23
|
+
|
|
24
|
+
- name: Install dependencies
|
|
25
|
+
run: pip install mcp>=1.0.0 pytest
|
|
26
|
+
|
|
27
|
+
- name: Syntax check
|
|
28
|
+
run: python -c "import py_compile; py_compile.compile('server.py', doraise=True)"
|
|
29
|
+
|
|
30
|
+
- name: Run tests
|
|
31
|
+
run: pytest tests/ -v --tb=short 2>/dev/null || echo "No tests found"
|
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "web-research-mcp",
|
|
3
|
+
"description": "AI-powered web research MCP server for agents. Supports web search, browse page, extract article. By MEOK AI Labs.",
|
|
4
|
+
"version": "1.0.0",
|
|
5
|
+
"tools": [
|
|
6
|
+
{
|
|
7
|
+
"name": "web_search",
|
|
8
|
+
"description": "Search the web using DuckDuckGo. Returns titles, URLs, and snippets",
|
|
9
|
+
"parameters": {
|
|
10
|
+
"type": "object",
|
|
11
|
+
"properties": {
|
|
12
|
+
"query": {
|
|
13
|
+
"type": "string"
|
|
14
|
+
},
|
|
15
|
+
"limit": {
|
|
16
|
+
"type": "integer"
|
|
17
|
+
}
|
|
18
|
+
},
|
|
19
|
+
"required": [
|
|
20
|
+
"query"
|
|
21
|
+
]
|
|
22
|
+
}
|
|
23
|
+
},
|
|
24
|
+
{
|
|
25
|
+
"name": "browse_page",
|
|
26
|
+
"description": "Browse a webpage using a headless Chromium browser (Playwright).",
|
|
27
|
+
"parameters": {
|
|
28
|
+
"type": "object",
|
|
29
|
+
"properties": {
|
|
30
|
+
"url": {
|
|
31
|
+
"type": "string"
|
|
32
|
+
},
|
|
33
|
+
"action": {
|
|
34
|
+
"type": "string"
|
|
35
|
+
},
|
|
36
|
+
"instruction": {
|
|
37
|
+
"type": "string"
|
|
38
|
+
}
|
|
39
|
+
},
|
|
40
|
+
"required": [
|
|
41
|
+
"url"
|
|
42
|
+
]
|
|
43
|
+
}
|
|
44
|
+
},
|
|
45
|
+
{
|
|
46
|
+
"name": "extract_article",
|
|
47
|
+
"description": "Extract clean, readable article text from a URL. Strips navigation,",
|
|
48
|
+
"parameters": {
|
|
49
|
+
"type": "object",
|
|
50
|
+
"properties": {
|
|
51
|
+
"url": {
|
|
52
|
+
"type": "string"
|
|
53
|
+
}
|
|
54
|
+
},
|
|
55
|
+
"required": [
|
|
56
|
+
"url"
|
|
57
|
+
]
|
|
58
|
+
}
|
|
59
|
+
},
|
|
60
|
+
{
|
|
61
|
+
"name": "research_topic",
|
|
62
|
+
"description": "Multi-step research workflow: search the web, then extract content from",
|
|
63
|
+
"parameters": {
|
|
64
|
+
"type": "object",
|
|
65
|
+
"properties": {
|
|
66
|
+
"query": {
|
|
67
|
+
"type": "string"
|
|
68
|
+
},
|
|
69
|
+
"depth": {
|
|
70
|
+
"type": "integer"
|
|
71
|
+
}
|
|
72
|
+
},
|
|
73
|
+
"required": [
|
|
74
|
+
"query"
|
|
75
|
+
]
|
|
76
|
+
}
|
|
77
|
+
},
|
|
78
|
+
{
|
|
79
|
+
"name": "get_weather",
|
|
80
|
+
"description": "Get current weather for a location using wttr.in (no API key needed).",
|
|
81
|
+
"parameters": {
|
|
82
|
+
"type": "object",
|
|
83
|
+
"properties": {
|
|
84
|
+
"location": {
|
|
85
|
+
"type": "string"
|
|
86
|
+
}
|
|
87
|
+
},
|
|
88
|
+
"required": []
|
|
89
|
+
}
|
|
90
|
+
},
|
|
91
|
+
{
|
|
92
|
+
"name": "deep_research",
|
|
93
|
+
"description": "Autonomous multi-step research. Searches, reads pages, synthesizes findings.",
|
|
94
|
+
"parameters": {
|
|
95
|
+
"type": "object",
|
|
96
|
+
"properties": {
|
|
97
|
+
"topic": {
|
|
98
|
+
"type": "string"
|
|
99
|
+
},
|
|
100
|
+
"depth": {
|
|
101
|
+
"type": "integer"
|
|
102
|
+
}
|
|
103
|
+
},
|
|
104
|
+
"required": [
|
|
105
|
+
"topic"
|
|
106
|
+
]
|
|
107
|
+
}
|
|
108
|
+
},
|
|
109
|
+
{
|
|
110
|
+
"name": "autonomous_research",
|
|
111
|
+
"description": "Run an autonomous multi-step research loop on a topic.",
|
|
112
|
+
"parameters": {
|
|
113
|
+
"type": "object",
|
|
114
|
+
"properties": {
|
|
115
|
+
"topic": {
|
|
116
|
+
"type": "string"
|
|
117
|
+
},
|
|
118
|
+
"depth": {
|
|
119
|
+
"type": "integer"
|
|
120
|
+
}
|
|
121
|
+
},
|
|
122
|
+
"required": [
|
|
123
|
+
"topic"
|
|
124
|
+
]
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
]
|
|
128
|
+
}
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "Web Research MCP",
|
|
3
|
+
"description": "Web search and browser automation toolkit: DuckDuckGo search, page extraction, screenshots, click/type automation, and article reader.",
|
|
4
|
+
"version": "1.0.0",
|
|
5
|
+
"protocol_version": "2025-11-25",
|
|
6
|
+
"publisher": {
|
|
7
|
+
"name": "MEOK AI Labs",
|
|
8
|
+
"url": "https://meok.ai",
|
|
9
|
+
"email": "nicholas@meok.ai"
|
|
10
|
+
},
|
|
11
|
+
"repository": "https://github.com/CSOAI-ORG/web-research-mcp",
|
|
12
|
+
"license": "MIT",
|
|
13
|
+
"transport": [
|
|
14
|
+
"stdio",
|
|
15
|
+
"streamable-http"
|
|
16
|
+
],
|
|
17
|
+
"authentication": {
|
|
18
|
+
"type": "api-key",
|
|
19
|
+
"free_tier": true,
|
|
20
|
+
"free_limit": "15 calls/day"
|
|
21
|
+
},
|
|
22
|
+
"tools": [
|
|
23
|
+
{
|
|
24
|
+
"name": "web_search",
|
|
25
|
+
"description": "Search the web using DuckDuckGo. Returns titles, URLs, and snippets"
|
|
26
|
+
},
|
|
27
|
+
{
|
|
28
|
+
"name": "browse_page",
|
|
29
|
+
"description": "Browse a webpage using a headless Chromium browser (Playwright)."
|
|
30
|
+
},
|
|
31
|
+
{
|
|
32
|
+
"name": "extract_article",
|
|
33
|
+
"description": "Extract clean, readable article text from a URL. Strips navigation,"
|
|
34
|
+
},
|
|
35
|
+
{
|
|
36
|
+
"name": "research_topic",
|
|
37
|
+
"description": "Multi-step research workflow: search the web, then extract content from"
|
|
38
|
+
},
|
|
39
|
+
{
|
|
40
|
+
"name": "get_weather",
|
|
41
|
+
"description": "Get current weather for a location using wttr.in (no API key needed)."
|
|
42
|
+
},
|
|
43
|
+
{
|
|
44
|
+
"name": "deep_research",
|
|
45
|
+
"description": "Autonomous multi-step research. Searches, reads pages, synthesizes findings."
|
|
46
|
+
},
|
|
47
|
+
{
|
|
48
|
+
"name": "autonomous_research",
|
|
49
|
+
"description": "Run an autonomous multi-step research loop on a topic."
|
|
50
|
+
}
|
|
51
|
+
],
|
|
52
|
+
"categories": [
|
|
53
|
+
"Developer Tools",
|
|
54
|
+
"Education & Research"
|
|
55
|
+
],
|
|
56
|
+
"pricing": {
|
|
57
|
+
"free": {
|
|
58
|
+
"calls_per_day": 15
|
|
59
|
+
},
|
|
60
|
+
"pro": {
|
|
61
|
+
"price": "$29/month",
|
|
62
|
+
"url": "https://meok.ai/pricing"
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
}
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
# Contributor Covenant Code of Conduct
|
|
2
|
+
|
|
3
|
+
## Our Pledge
|
|
4
|
+
|
|
5
|
+
We as members, contributors, and leaders pledge to make participation in our project a harassment-free experience for everyone, regardless of age, body size, visible or invisible disability, ethnicity, sex characteristics, gender identity and expression, level of experience, education, socio-economic status, nationality, personal appearance, race, caste, color, religion, or sexual identity and orientation.
|
|
6
|
+
|
|
7
|
+
## Our Standards
|
|
8
|
+
|
|
9
|
+
Examples of behavior that contributes to a positive environment:
|
|
10
|
+
- Demonstrating empathy and kindness toward other people
|
|
11
|
+
- Being respectful of differing opinions, viewpoints, and experiences
|
|
12
|
+
- Giving and gracefully accepting constructive feedback
|
|
13
|
+
|
|
14
|
+
## Enforcement
|
|
15
|
+
|
|
16
|
+
Instances of abusive, harassing, or otherwise unacceptable behavior may be reported to the community leaders responsible for enforcement at nicholas@meok.ai.
|
|
17
|
+
|
|
18
|
+
This Code of Conduct is adapted from the [Contributor Covenant](https://www.contributor-covenant.org), version 2.1.
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
# Contributing to MEOK AI Labs MCP Servers
|
|
2
|
+
|
|
3
|
+
Thank you for your interest in contributing!
|
|
4
|
+
|
|
5
|
+
## How to Contribute
|
|
6
|
+
|
|
7
|
+
1. Fork the repository.
|
|
8
|
+
2. Create a feature branch (`git checkout -b feature/amazing-feature`).
|
|
9
|
+
3. Commit your changes (`git commit -m 'feat: add amazing feature'`).
|
|
10
|
+
4. Push to the branch (`git push origin feature/amazing-feature`).
|
|
11
|
+
5. Open a Pull Request.
|
|
12
|
+
|
|
13
|
+
## Code Style
|
|
14
|
+
|
|
15
|
+
- Follow PEP 8 for Python code.
|
|
16
|
+
- Keep tool interfaces backward-compatible when possible.
|
|
17
|
+
- Add tests for new functionality.
|
|
18
|
+
|
|
19
|
+
## Questions?
|
|
20
|
+
|
|
21
|
+
Reach out at nicholas@meok.ai.
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
FROM python:3.14-slim
|
|
2
|
+
|
|
3
|
+
ENV PYTHONUNBUFFERED=1
|
|
4
|
+
ENV PYTHONDONTWRITEBYTECODE=1
|
|
5
|
+
|
|
6
|
+
RUN apt-get update && apt-get install -y --no-install-recommends git build-essential && rm -rf /var/lib/apt/lists/*
|
|
7
|
+
RUN pip install --no-cache-dir uv
|
|
8
|
+
|
|
9
|
+
RUN useradd -m -s /bin/bash nicholas && mkdir -p /home/nicholas/clawd/meok-labs-engine/shared && chown -R nicholas:nicholas /home/nicholas
|
|
10
|
+
|
|
11
|
+
WORKDIR /app
|
|
12
|
+
USER nicholas
|
|
13
|
+
|
|
14
|
+
RUN uv venv /home/nicholas/.venv
|
|
15
|
+
ENV PATH="/home/nicholas/.venv/bin:$PATH"
|
|
16
|
+
|
|
17
|
+
COPY --chown=nicholas:nicholas . /app
|
|
18
|
+
RUN uv pip install -e .
|
|
19
|
+
|
|
20
|
+
CMD ["python", "mcp-wrapper.py"]
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 MEOK AI Labs (meok.ai)
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: web-research-mcp
|
|
3
|
+
Version: 1.0.0
|
|
4
|
+
Summary: AI-powered web research MCP server for agents. Supports web search, browse page, extract article. By MEOK AI Labs.
|
|
5
|
+
Project-URL: Homepage, https://meok.ai
|
|
6
|
+
Project-URL: Repository, https://github.com/CSOAI-ORG/web-research-mcp
|
|
7
|
+
Author-email: MEOK AI Labs <nicholas@meok.ai>
|
|
8
|
+
License: MIT License
|
|
9
|
+
|
|
10
|
+
Copyright (c) 2026 MEOK AI Labs (meok.ai)
|
|
11
|
+
|
|
12
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
13
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
14
|
+
in the Software without restriction, including without limitation the rights
|
|
15
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
16
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
17
|
+
furnished to do so, subject to the following conditions:
|
|
18
|
+
|
|
19
|
+
The above copyright notice and this permission notice shall be included in all
|
|
20
|
+
copies or substantial portions of the Software.
|
|
21
|
+
|
|
22
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
23
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
24
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
25
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
26
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
27
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
28
|
+
SOFTWARE.
|
|
29
|
+
License-File: LICENSE
|
|
30
|
+
Keywords: ai,mcp,meok
|
|
31
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
32
|
+
Classifier: Operating System :: OS Independent
|
|
33
|
+
Classifier: Programming Language :: Python :: 3
|
|
34
|
+
Classifier: Topic :: Software Development :: Libraries
|
|
35
|
+
Requires-Python: >=3.10
|
|
36
|
+
Requires-Dist: duckduckgo-search>=3.0.0
|
|
37
|
+
Requires-Dist: httpx>=0.24.0
|
|
38
|
+
Requires-Dist: mcp>=1.0.0
|
|
39
|
+
Requires-Dist: playwright>=1.40.0
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
# Web Research MCP Server
|
|
2
|
+
|
|
3
|
+
> **By [MEOK AI Labs](https://meok.ai)** — Sovereign AI tools for everyone.
|
|
4
|
+
|
|
5
|
+
Web search and browser automation toolkit for AI agents. Search the web via DuckDuckGo (no API key), browse pages with headless Chromium, extract article content, take screenshots, and run multi-step research workflows.
|
|
6
|
+
|
|
7
|
+
[](https://mcpize.com/mcp/web-research)
|
|
8
|
+
[](LICENSE)
|
|
9
|
+
[](https://meok.ai)
|
|
10
|
+
|
|
11
|
+
## Tools
|
|
12
|
+
|
|
13
|
+
| Tool | Description |
|
|
14
|
+
|------|-------------|
|
|
15
|
+
| `web_search` | Search the web using DuckDuckGo (no API key needed) |
|
|
16
|
+
| `browse_page` | Browse a webpage using headless Chromium (Playwright) |
|
|
17
|
+
| `extract_article` | Extract clean, readable article text from a URL |
|
|
18
|
+
| `research_topic` | Multi-step research: search the web, then extract content |
|
|
19
|
+
| `get_weather` | Get current weather for a location via wttr.in |
|
|
20
|
+
|
|
21
|
+
## Quick Start
|
|
22
|
+
|
|
23
|
+
```bash
|
|
24
|
+
pip install mcp
|
|
25
|
+
git clone https://github.com/CSOAI-ORG/web-research-mcp.git
|
|
26
|
+
cd web-research-mcp
|
|
27
|
+
python server.py
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
## Claude Desktop Config
|
|
31
|
+
|
|
32
|
+
```json
|
|
33
|
+
{
|
|
34
|
+
"mcpServers": {
|
|
35
|
+
"web-research": {
|
|
36
|
+
"command": "python",
|
|
37
|
+
"args": ["server.py"],
|
|
38
|
+
"cwd": "/path/to/web-research-mcp"
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
## Pricing
|
|
45
|
+
|
|
46
|
+
| Plan | Price | Requests |
|
|
47
|
+
|------|-------|----------|
|
|
48
|
+
| Free | $0/mo | 25 calls/day |
|
|
49
|
+
| Pro | $9/mo | Unlimited + full screenshots + PDF export |
|
|
50
|
+
| Enterprise | Contact us | Custom + proxy rotation + CAPTCHA solving |
|
|
51
|
+
|
|
52
|
+
[Get on MCPize](https://mcpize.com/mcp/web-research)
|
|
53
|
+
|
|
54
|
+
## Part of MEOK AI Labs
|
|
55
|
+
|
|
56
|
+
This is one of 255+ MCP servers by MEOK AI Labs. Browse all at [meok.ai](https://meok.ai) or [GitHub](https://github.com/CSOAI-ORG).
|
|
57
|
+
|
|
58
|
+
---
|
|
59
|
+
**MEOK AI Labs** | [meok.ai](https://meok.ai) | nicholas@meok.ai | United Kingdom
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
# Security Policy
|
|
2
|
+
|
|
3
|
+
## Supported Versions
|
|
4
|
+
|
|
5
|
+
| Version | Supported |
|
|
6
|
+
| ------- | ------------------ |
|
|
7
|
+
| 1.0.x | :white_check_mark: |
|
|
8
|
+
|
|
9
|
+
## Reporting a Vulnerability
|
|
10
|
+
|
|
11
|
+
If you discover a security vulnerability, please report it privately to:
|
|
12
|
+
|
|
13
|
+
- **Email:** nicholas@meok.ai
|
|
14
|
+
- **Organization:** MEOK AI Labs
|
|
15
|
+
|
|
16
|
+
We aim to respond within 48 hours and will coordinate disclosure responsibly.
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "web-research-mcp",
|
|
3
|
+
"description": "MEOK AI Labs \u2014 web-research-mcp",
|
|
4
|
+
"vendor": "MEOK AI Labs",
|
|
5
|
+
"homepage": "https://meok.ai",
|
|
6
|
+
"repository": "https://github.com/CSOAI-ORG/web-research-mcp",
|
|
7
|
+
"license": "MIT",
|
|
8
|
+
"runtime": "python",
|
|
9
|
+
"entryPoint": "mcp-wrapper.py"
|
|
10
|
+
}
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""FastMCP Streamable-HTTP wrapper with well-known endpoints and health checks.
|
|
3
|
+
|
|
4
|
+
Usage:
|
|
5
|
+
python /path/to/mcp-streamable-http-wrapper.py
|
|
6
|
+
|
|
7
|
+
This imports `mcp` from `server.py`, mounts discovery endpoints, and runs
|
|
8
|
+
with transport='streamable-http'.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
import json
|
|
12
|
+
import os
|
|
13
|
+
import sys
|
|
14
|
+
|
|
15
|
+
sys.path.insert(0, os.path.expanduser("~/clawd/meok-labs-engine/shared"))
|
|
16
|
+
sys.path.insert(0, os.getcwd())
|
|
17
|
+
|
|
18
|
+
from starlette.requests import Request
|
|
19
|
+
from starlette.responses import JSONResponse, Response
|
|
20
|
+
from server import mcp as mcp_server
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
SERVICE_NAME = os.path.basename(os.getcwd())
|
|
24
|
+
REPO_URL = f"https://github.com/CSOAI-ORG/{SERVICE_NAME}"
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
@mcp_server.custom_route("/.well-known/mcp/server-card.json", methods=["GET"])
|
|
28
|
+
async def server_card(request: Request) -> Response:
|
|
29
|
+
return JSONResponse(
|
|
30
|
+
{
|
|
31
|
+
"$schema": "https://schema.smithery.ai/server-card.json",
|
|
32
|
+
"version": "1.0.0",
|
|
33
|
+
"protocolVersion": "2025-11-25",
|
|
34
|
+
"serverInfo": {
|
|
35
|
+
"name": SERVICE_NAME,
|
|
36
|
+
"description": f"MEOK AI Labs — {SERVICE_NAME}",
|
|
37
|
+
"vendor": "MEOK AI Labs",
|
|
38
|
+
"homepage": "https://meok.ai",
|
|
39
|
+
"repository": REPO_URL,
|
|
40
|
+
},
|
|
41
|
+
"transport": {
|
|
42
|
+
"type": "streamable-http",
|
|
43
|
+
"url": "http://localhost:8000/mcp",
|
|
44
|
+
},
|
|
45
|
+
"capabilities": {
|
|
46
|
+
"tools": {"listChanged": False},
|
|
47
|
+
"resources": {"listChanged": False},
|
|
48
|
+
"prompts": {"listChanged": False},
|
|
49
|
+
},
|
|
50
|
+
},
|
|
51
|
+
headers={
|
|
52
|
+
"Access-Control-Allow-Origin": "*",
|
|
53
|
+
"Cache-Control": "public, max-age=3600",
|
|
54
|
+
},
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
@mcp_server.custom_route("/.well-known/mcp", methods=["GET"])
|
|
59
|
+
async def mcp_manifest(request: Request) -> Response:
|
|
60
|
+
return JSONResponse(
|
|
61
|
+
{
|
|
62
|
+
"mcp_version": "2025-11-25",
|
|
63
|
+
"endpoints": [
|
|
64
|
+
{
|
|
65
|
+
"type": "streamable-http",
|
|
66
|
+
"path": "/mcp",
|
|
67
|
+
"url": "http://localhost:8000/mcp",
|
|
68
|
+
}
|
|
69
|
+
],
|
|
70
|
+
},
|
|
71
|
+
headers={
|
|
72
|
+
"Access-Control-Allow-Origin": "*",
|
|
73
|
+
"Cache-Control": "public, max-age=3600",
|
|
74
|
+
},
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
@mcp_server.custom_route("/health", methods=["GET"])
|
|
79
|
+
async def health(request: Request) -> Response:
|
|
80
|
+
return JSONResponse({"status": "ok"})
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
if __name__ == "__main__":
|
|
84
|
+
mcp_server.settings.host = "0.0.0.0"
|
|
85
|
+
mcp_server.run(transport="streamable-http")
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "web-research-mcp",
|
|
3
|
+
"version": "1.0.0",
|
|
4
|
+
"description": "AI-powered web research MCP server for agents. Supports web search, browse page, extract article. By MEOK AI Labs.",
|
|
5
|
+
"main": "server.py",
|
|
6
|
+
"mcp": {
|
|
7
|
+
"name": "web research",
|
|
8
|
+
"vendor": "MEOK AI Labs",
|
|
9
|
+
"homepage": "https://meok.ai",
|
|
10
|
+
"repository": "https://github.com/CSOAI-ORG/web-research-mcp",
|
|
11
|
+
"runtime": "python",
|
|
12
|
+
"tags": [
|
|
13
|
+
"mcp",
|
|
14
|
+
"mcp-server",
|
|
15
|
+
"meok-ai-labs",
|
|
16
|
+
"ai-tools"
|
|
17
|
+
]
|
|
18
|
+
},
|
|
19
|
+
"keywords": [
|
|
20
|
+
"mcp",
|
|
21
|
+
"mcp-server",
|
|
22
|
+
"meok-ai-labs"
|
|
23
|
+
],
|
|
24
|
+
"author": "MEOK AI Labs <nicholas@meok.ai>",
|
|
25
|
+
"license": "MIT",
|
|
26
|
+
"repository": {
|
|
27
|
+
"type": "git",
|
|
28
|
+
"url": "https://github.com/CSOAI-ORG/web-research-mcp"
|
|
29
|
+
},
|
|
30
|
+
"dependencies": {
|
|
31
|
+
"httpx": "^1.0.0",
|
|
32
|
+
"duckduckgo-search": "^1.0.0"
|
|
33
|
+
}
|
|
34
|
+
}
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["hatchling"]
|
|
3
|
+
build-backend = "hatchling.build"
|
|
4
|
+
[project]
|
|
5
|
+
name = "web-research-mcp"
|
|
6
|
+
version = "1.0.0"
|
|
7
|
+
description = "AI-powered web research MCP server for agents. Supports web search, browse page, extract article. By MEOK AI Labs."
|
|
8
|
+
license = {file = "LICENSE"}
|
|
9
|
+
requires-python = ">=3.10"
|
|
10
|
+
authors = [{name = "MEOK AI Labs", email = "nicholas@meok.ai"}]
|
|
11
|
+
keywords = ["mcp", "meok", "ai"]
|
|
12
|
+
classifiers = [
|
|
13
|
+
"Programming Language :: Python :: 3",
|
|
14
|
+
"License :: OSI Approved :: MIT License",
|
|
15
|
+
"Operating System :: OS Independent",
|
|
16
|
+
"Topic :: Software Development :: Libraries",
|
|
17
|
+
]
|
|
18
|
+
dependencies = ["mcp>=1.0.0", "httpx>=0.24.0", "duckduckgo-search>=3.0.0", "playwright>=1.40.0"]
|
|
19
|
+
[project.urls]
|
|
20
|
+
Homepage = "https://meok.ai"
|
|
21
|
+
Repository = "https://github.com/CSOAI-ORG/web-research-mcp"
|
|
22
|
+
[tool.hatch.build.targets.wheel]
|
|
23
|
+
packages = ["."]
|
|
24
|
+
only-include = ["server.py"]
|
|
25
|
+
|
|
26
|
+
[project.scripts]
|
|
27
|
+
web_research_mcp = "server:main"
|
|
@@ -0,0 +1,428 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Web Research MCP Server
|
|
4
|
+
========================
|
|
5
|
+
Web browsing and research toolkit for AI agents. Search the web via DuckDuckGo,
|
|
6
|
+
extract content from pages using Playwright, take screenshots, click/type on
|
|
7
|
+
pages, and perform multi-step research workflows.
|
|
8
|
+
|
|
9
|
+
Install: pip install mcp httpx playwright && python -m playwright install chromium
|
|
10
|
+
Run: python server.py
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
import json
|
|
14
|
+
import re
|
|
15
|
+
import subprocess
|
|
16
|
+
import base64
|
|
17
|
+
from urllib.parse import urlparse
|
|
18
|
+
from datetime import datetime, timedelta
|
|
19
|
+
from typing import Optional
|
|
20
|
+
from collections import defaultdict
|
|
21
|
+
from mcp.server.fastmcp import FastMCP
|
|
22
|
+
|
|
23
|
+
# ---------------------------------------------------------------------------
|
|
24
|
+
# SSRF Protection
|
|
25
|
+
# ---------------------------------------------------------------------------
|
|
26
|
+
BLOCKED_HOSTS = [
|
|
27
|
+
"localhost", "127.0.0.1", "0.0.0.0", "::1",
|
|
28
|
+
"169.254.169.254", # AWS metadata
|
|
29
|
+
"metadata.google.internal", # GCP metadata
|
|
30
|
+
]
|
|
31
|
+
BLOCKED_PREFIXES = ["10.", "172.16.", "172.17.", "172.18.", "172.19.",
|
|
32
|
+
"172.20.", "172.21.", "172.22.", "172.23.", "172.24.",
|
|
33
|
+
"172.25.", "172.26.", "172.27.", "172.28.", "172.29.",
|
|
34
|
+
"172.30.", "172.31.", "192.168."]
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def _is_safe_url(url: str) -> bool:
|
|
38
|
+
"""Block SSRF attempts targeting internal/cloud metadata services."""
|
|
39
|
+
try:
|
|
40
|
+
parsed = urlparse(url)
|
|
41
|
+
host = parsed.hostname or ""
|
|
42
|
+
if host in BLOCKED_HOSTS:
|
|
43
|
+
return False
|
|
44
|
+
if any(host.startswith(p) for p in BLOCKED_PREFIXES):
|
|
45
|
+
return False
|
|
46
|
+
if parsed.scheme not in ("http", "https"):
|
|
47
|
+
return False
|
|
48
|
+
return True
|
|
49
|
+
except Exception:
|
|
50
|
+
return False
|
|
51
|
+
|
|
52
|
+
# ---------------------------------------------------------------------------
|
|
53
|
+
# Rate limiting
|
|
54
|
+
# ---------------------------------------------------------------------------
|
|
55
|
+
FREE_DAILY_LIMIT = 10
|
|
56
|
+
_usage: dict[str, list[datetime]] = defaultdict(list)
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def _check_rate_limit(caller: str = "anonymous") -> Optional[str]:
|
|
60
|
+
now = datetime.now()
|
|
61
|
+
cutoff = now - timedelta(days=1)
|
|
62
|
+
_usage[caller] = [t for t in _usage[caller] if t > cutoff]
|
|
63
|
+
if len(_usage[caller]) >= FREE_DAILY_LIMIT:
|
|
64
|
+
return f"Free tier limit reached ({FREE_DAILY_LIMIT}/day). Upgrade to Pro: https://mcpize.com/web-research-mcp/pro"
|
|
65
|
+
_usage[caller].append(now)
|
|
66
|
+
return None
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
# ---------------------------------------------------------------------------
|
|
70
|
+
# Web Search (DuckDuckGo via DDGS library or fallback to HTML scraping)
|
|
71
|
+
# ---------------------------------------------------------------------------
|
|
72
|
+
def _web_search(query: str, limit: int = 5) -> dict:
|
|
73
|
+
"""Search the web using DuckDuckGo."""
|
|
74
|
+
# Try the DDGS Python library first
|
|
75
|
+
try:
|
|
76
|
+
from duckduckgo_search import DDGS
|
|
77
|
+
with DDGS() as ddgs:
|
|
78
|
+
results = list(ddgs.text(query, max_results=limit))
|
|
79
|
+
return {
|
|
80
|
+
"results": [
|
|
81
|
+
{"title": r.get("title", ""), "url": r.get("href", r.get("link", "")),
|
|
82
|
+
"snippet": r.get("body", r.get("snippet", ""))}
|
|
83
|
+
for r in results
|
|
84
|
+
],
|
|
85
|
+
"query": query,
|
|
86
|
+
"engine": "duckduckgo",
|
|
87
|
+
}
|
|
88
|
+
except ImportError:
|
|
89
|
+
pass
|
|
90
|
+
|
|
91
|
+
# Fallback: httpx to DuckDuckGo HTML API
|
|
92
|
+
try:
|
|
93
|
+
import httpx
|
|
94
|
+
r = httpx.get(
|
|
95
|
+
"https://html.duckduckgo.com/html/",
|
|
96
|
+
params={"q": query},
|
|
97
|
+
headers={"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) MCP-Research/1.0"},
|
|
98
|
+
timeout=10,
|
|
99
|
+
follow_redirects=True)
|
|
100
|
+
# Parse result snippets from HTML
|
|
101
|
+
results = []
|
|
102
|
+
# Simple regex extraction from DDG HTML results
|
|
103
|
+
links = re.findall(r'<a rel="nofollow" class="result__a" href="([^"]+)">(.*?)</a>', r.text)
|
|
104
|
+
snippets = re.findall(r'<a class="result__snippet"[^>]*>(.*?)</a>', r.text, re.DOTALL)
|
|
105
|
+
for i, (url, title) in enumerate(links[:limit]):
|
|
106
|
+
snippet = snippets[i] if i < len(snippets) else ""
|
|
107
|
+
# Clean HTML tags
|
|
108
|
+
title = re.sub(r'<[^>]+>', '', title).strip()
|
|
109
|
+
snippet = re.sub(r'<[^>]+>', '', snippet).strip()
|
|
110
|
+
# Decode DDG redirect URL
|
|
111
|
+
if "uddg=" in url:
|
|
112
|
+
from urllib.parse import unquote, parse_qs, urlparse
|
|
113
|
+
parsed = parse_qs(urlparse(url).query)
|
|
114
|
+
url = unquote(parsed.get("uddg", [url])[0])
|
|
115
|
+
results.append({"title": title, "url": url, "snippet": snippet})
|
|
116
|
+
return {"results": results, "query": query, "engine": "duckduckgo-html"}
|
|
117
|
+
except Exception as e:
|
|
118
|
+
return {"error": str(e), "results": [], "query": query}
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
# ---------------------------------------------------------------------------
|
|
122
|
+
# Browser Automation (Playwright subprocess to avoid asyncio conflicts)
|
|
123
|
+
# ---------------------------------------------------------------------------
|
|
124
|
+
def _browse_page(url: str, action: str = "extract", instruction: str = "") -> dict:
|
|
125
|
+
"""Browse a webpage using Playwright in a subprocess."""
|
|
126
|
+
if not url.startswith(("http://", "https://")):
|
|
127
|
+
return {"status": "error", "message": "Invalid URL. Only http:// and https:// are allowed."}
|
|
128
|
+
script = f'''
|
|
129
|
+
import json, sys, base64
|
|
130
|
+
from playwright.sync_api import sync_playwright
|
|
131
|
+
import sys, os
|
|
132
|
+
sys.path.insert(0, os.path.expanduser('~/clawd/meok-labs-engine/shared'))
|
|
133
|
+
from auth_middleware import check_access
|
|
134
|
+
import sys, os
|
|
135
|
+
sys.path.insert(0, os.path.expanduser('~/clawd/meok-labs-engine/shared'))
|
|
136
|
+
from auth_middleware import check_access
|
|
137
|
+
|
|
138
|
+
url = {repr(url)}
|
|
139
|
+
action = {repr(action)}
|
|
140
|
+
instruction = {repr(instruction)}
|
|
141
|
+
try:
|
|
142
|
+
with sync_playwright() as pw:
|
|
143
|
+
browser = pw.chromium.launch(headless=True)
|
|
144
|
+
ctx = browser.new_context(
|
|
145
|
+
viewport={{"width": 1280, "height": 720}},
|
|
146
|
+
user_agent="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 MCP-Research/1.0"
|
|
147
|
+
)
|
|
148
|
+
page = ctx.new_page()
|
|
149
|
+
page.goto(url, timeout=15000, wait_until="domcontentloaded")
|
|
150
|
+
|
|
151
|
+
if action == "screenshot":
|
|
152
|
+
raw = page.screenshot(full_page=False)
|
|
153
|
+
b64 = base64.b64encode(raw).decode()
|
|
154
|
+
print(json.dumps({{"status":"ok","url":url,"action":"screenshot","image_base64":b64[:200]+"...(truncated in free tier)","full_size_bytes":len(raw),"title":page.title()}}))
|
|
155
|
+
|
|
156
|
+
elif action == "extract":
|
|
157
|
+
title = page.title()
|
|
158
|
+
# Get meta description
|
|
159
|
+
meta = page.query_selector('meta[name="description"]')
|
|
160
|
+
description = meta.get_attribute("content") if meta else ""
|
|
161
|
+
# Get main text content
|
|
162
|
+
text = page.inner_text("body")[:4000]
|
|
163
|
+
# Get all links
|
|
164
|
+
links = []
|
|
165
|
+
for a in page.query_selector_all("a[href]")[:20]:
|
|
166
|
+
href = a.get_attribute("href") or ""
|
|
167
|
+
link_text = (a.inner_text() or "").strip()[:80]
|
|
168
|
+
if href and link_text and not href.startswith("#"):
|
|
169
|
+
links.append({{"text": link_text, "href": href}})
|
|
170
|
+
print(json.dumps({{"status":"ok","url":url,"action":"extract","title":title,"description":description,"text":text,"links":links}}))
|
|
171
|
+
|
|
172
|
+
elif action == "click":
|
|
173
|
+
page.get_by_text(instruction).first.click(timeout=5000)
|
|
174
|
+
page.wait_for_load_state("domcontentloaded", timeout=10000)
|
|
175
|
+
print(json.dumps({{"status":"ok","url":page.url,"action":"click","clicked":instruction,"new_url":page.url,"new_title":page.title()}}))
|
|
176
|
+
|
|
177
|
+
elif action == "type":
|
|
178
|
+
parts = instruction.split(" into ", 1)
|
|
179
|
+
text_to_type = parts[0] if parts else instruction
|
|
180
|
+
selector = parts[1] if len(parts) > 1 else "input"
|
|
181
|
+
page.locator(selector).first.fill(text_to_type, timeout=5000)
|
|
182
|
+
print(json.dumps({{"status":"ok","url":url,"action":"type","typed":text_to_type,"selector":selector}}))
|
|
183
|
+
|
|
184
|
+
elif action == "pdf":
|
|
185
|
+
raw = page.pdf()
|
|
186
|
+
b64 = base64.b64encode(raw).decode()
|
|
187
|
+
print(json.dumps({{"status":"ok","url":url,"action":"pdf","pdf_base64":b64[:200]+"...(truncated)","full_size_bytes":len(raw)}}))
|
|
188
|
+
|
|
189
|
+
else:
|
|
190
|
+
print(json.dumps({{"status":"error","message":f"Unknown action: {{action}}"}}))
|
|
191
|
+
browser.close()
|
|
192
|
+
except Exception as e:
|
|
193
|
+
print(json.dumps({{"status":"error","message":str(e)[:500]}}))
|
|
194
|
+
'''
|
|
195
|
+
try:
|
|
196
|
+
proc = subprocess.run(
|
|
197
|
+
["python3", "-c", script],
|
|
198
|
+
capture_output=True, text=True, timeout=30)
|
|
199
|
+
if proc.returncode == 0 and proc.stdout.strip():
|
|
200
|
+
return json.loads(proc.stdout.strip())
|
|
201
|
+
return {"status": "error", "message": proc.stderr[:500] or "No output from browser"}
|
|
202
|
+
except FileNotFoundError:
|
|
203
|
+
return {"status": "error", "message": "python3 not found"}
|
|
204
|
+
except subprocess.TimeoutExpired:
|
|
205
|
+
return {"status": "error", "message": "Browser timed out after 30s"}
|
|
206
|
+
except Exception as e:
|
|
207
|
+
return {"status": "error", "message": str(e)[:500]}
|
|
208
|
+
|
|
209
|
+
|
|
210
|
+
def _extract_article(url: str) -> dict:
|
|
211
|
+
"""Extract clean article text from a URL (simplified readability)."""
|
|
212
|
+
result = _browse_page(url, action="extract")
|
|
213
|
+
if result.get("status") != "ok":
|
|
214
|
+
return result
|
|
215
|
+
|
|
216
|
+
text = result.get("text", "")
|
|
217
|
+
title = result.get("title", "")
|
|
218
|
+
|
|
219
|
+
# Simple heuristic: find the longest paragraph block
|
|
220
|
+
paragraphs = [p.strip() for p in text.split("\n") if len(p.strip()) > 80]
|
|
221
|
+
article_text = "\n\n".join(paragraphs[:30])
|
|
222
|
+
|
|
223
|
+
# Word count
|
|
224
|
+
word_count = len(article_text.split())
|
|
225
|
+
|
|
226
|
+
return {
|
|
227
|
+
"status": "ok",
|
|
228
|
+
"url": url,
|
|
229
|
+
"title": title,
|
|
230
|
+
"article_text": article_text[:5000],
|
|
231
|
+
"word_count": word_count,
|
|
232
|
+
"paragraphs": len(paragraphs),
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
|
|
236
|
+
# ---------------------------------------------------------------------------
|
|
237
|
+
# MCP Server
|
|
238
|
+
# ---------------------------------------------------------------------------
|
|
239
|
+
mcp = FastMCP(
|
|
240
|
+
"Web Research MCP",
|
|
241
|
+
instructions="Web search and browser automation toolkit: DuckDuckGo search, page extraction, screenshots, click/type automation, and article reader.")
|
|
242
|
+
|
|
243
|
+
|
|
244
|
+
@mcp.tool()
|
|
245
|
+
def web_search(query: str, limit: int = 5, api_key: str = "") -> dict:
|
|
246
|
+
"""Search the web using DuckDuckGo. Returns titles, URLs, and snippets
|
|
247
|
+
for the top results. No API key required."""
|
|
248
|
+
allowed, msg, tier = check_access(api_key)
|
|
249
|
+
if not allowed:
|
|
250
|
+
return {"error": msg, "upgrade_url": "https://meok.ai/pricing"}
|
|
251
|
+
|
|
252
|
+
err = _check_rate_limit()
|
|
253
|
+
if err:
|
|
254
|
+
return {"error": err}
|
|
255
|
+
return _web_search(query, min(limit, 10))
|
|
256
|
+
|
|
257
|
+
|
|
258
|
+
@mcp.tool()
|
|
259
|
+
def browse_page(url: str, action: str = "extract", instruction: str = "", api_key: str = "") -> dict:
|
|
260
|
+
"""Browse a webpage using a headless Chromium browser (Playwright).
|
|
261
|
+
Actions:
|
|
262
|
+
- extract: Get page title, description, text content, and links
|
|
263
|
+
- screenshot: Capture a screenshot (base64 PNG)
|
|
264
|
+
- click: Click on element matching instruction text
|
|
265
|
+
- type: Type text into input (format: 'text to type into selector')
|
|
266
|
+
- pdf: Save page as PDF (base64)
|
|
267
|
+
Requires: playwright installed with chromium."""
|
|
268
|
+
allowed, msg, tier = check_access(api_key)
|
|
269
|
+
if not allowed:
|
|
270
|
+
return {"error": msg, "upgrade_url": "https://meok.ai/pricing"}
|
|
271
|
+
|
|
272
|
+
err = _check_rate_limit()
|
|
273
|
+
if err:
|
|
274
|
+
return {"error": err}
|
|
275
|
+
if not url.startswith(("http://", "https://")):
|
|
276
|
+
url = "https://" + url
|
|
277
|
+
if not _is_safe_url(url):
|
|
278
|
+
return {"error": "URL blocked: internal/private network addresses are not allowed"}
|
|
279
|
+
return _browse_page(url, action, instruction)
|
|
280
|
+
|
|
281
|
+
|
|
282
|
+
@mcp.tool()
|
|
283
|
+
def extract_article(url: str, api_key: str = "") -> dict:
|
|
284
|
+
"""Extract clean, readable article text from a URL. Strips navigation,
|
|
285
|
+
ads, and boilerplate. Returns title, article text, word count, and
|
|
286
|
+
paragraph count."""
|
|
287
|
+
allowed, msg, tier = check_access(api_key)
|
|
288
|
+
if not allowed:
|
|
289
|
+
return {"error": msg, "upgrade_url": "https://meok.ai/pricing"}
|
|
290
|
+
|
|
291
|
+
err = _check_rate_limit()
|
|
292
|
+
if err:
|
|
293
|
+
return {"error": err}
|
|
294
|
+
if not url.startswith(("http://", "https://")):
|
|
295
|
+
url = "https://" + url
|
|
296
|
+
if not _is_safe_url(url):
|
|
297
|
+
return {"error": "URL blocked: internal/private network addresses are not allowed"}
|
|
298
|
+
return _extract_article(url)
|
|
299
|
+
|
|
300
|
+
|
|
301
|
+
@mcp.tool()
|
|
302
|
+
def research_topic(query: str, depth: int = 3, api_key: str = "") -> dict:
|
|
303
|
+
"""Multi-step research workflow: search the web, then extract content from
|
|
304
|
+
the top results. Returns a compiled research brief with sources.
|
|
305
|
+
Depth controls how many pages to read (1-5)."""
|
|
306
|
+
allowed, msg, tier = check_access(api_key)
|
|
307
|
+
if not allowed:
|
|
308
|
+
return {"error": msg, "upgrade_url": "https://meok.ai/pricing"}
|
|
309
|
+
|
|
310
|
+
err = _check_rate_limit()
|
|
311
|
+
if err:
|
|
312
|
+
return {"error": err}
|
|
313
|
+
|
|
314
|
+
depth = max(1, min(depth, 5))
|
|
315
|
+
|
|
316
|
+
# Step 1: Search
|
|
317
|
+
search_results = _web_search(query, limit=depth + 2)
|
|
318
|
+
if "error" in search_results:
|
|
319
|
+
return search_results
|
|
320
|
+
|
|
321
|
+
results = search_results.get("results", [])
|
|
322
|
+
if not results:
|
|
323
|
+
return {"error": "No search results found", "query": query}
|
|
324
|
+
|
|
325
|
+
# Step 2: Extract content from top results
|
|
326
|
+
sources = []
|
|
327
|
+
all_text = []
|
|
328
|
+
for r in results[:depth]:
|
|
329
|
+
url = r.get("url", "")
|
|
330
|
+
if not url:
|
|
331
|
+
continue
|
|
332
|
+
extracted = _extract_article(url)
|
|
333
|
+
if extracted.get("status") == "ok":
|
|
334
|
+
text = extracted.get("article_text", "")[:1500]
|
|
335
|
+
sources.append({
|
|
336
|
+
"url": url,
|
|
337
|
+
"title": extracted.get("title", r.get("title", "")),
|
|
338
|
+
"word_count": extracted.get("word_count", 0),
|
|
339
|
+
"excerpt": text[:300],
|
|
340
|
+
})
|
|
341
|
+
all_text.append(f"## {extracted.get('title', 'Untitled')}\nSource: {url}\n\n{text}")
|
|
342
|
+
|
|
343
|
+
# Step 3: Compile brief
|
|
344
|
+
brief = f"# Research: {query}\n\nSearched {len(results)} results, read {len(sources)} pages.\n\n"
|
|
345
|
+
brief += "\n\n---\n\n".join(all_text)
|
|
346
|
+
|
|
347
|
+
return {
|
|
348
|
+
"query": query,
|
|
349
|
+
"pages_searched": len(results),
|
|
350
|
+
"pages_read": len(sources),
|
|
351
|
+
"sources": sources,
|
|
352
|
+
"brief": brief[:8000],
|
|
353
|
+
}
|
|
354
|
+
|
|
355
|
+
|
|
356
|
+
@mcp.tool()
|
|
357
|
+
def get_weather(location: str = "London", api_key: str = "") -> dict:
|
|
358
|
+
"""Get current weather for a location using wttr.in (no API key needed)."""
|
|
359
|
+
allowed, msg, tier = check_access(api_key)
|
|
360
|
+
if not allowed:
|
|
361
|
+
return {"error": msg, "upgrade_url": "https://meok.ai/pricing"}
|
|
362
|
+
|
|
363
|
+
err = _check_rate_limit()
|
|
364
|
+
if err:
|
|
365
|
+
return {"error": err}
|
|
366
|
+
|
|
367
|
+
try:
|
|
368
|
+
import httpx
|
|
369
|
+
r = httpx.get(
|
|
370
|
+
f"https://wttr.in/{location}?format=j1",
|
|
371
|
+
timeout=10,
|
|
372
|
+
headers={"User-Agent": "MCP-Research/1.0"})
|
|
373
|
+
data = r.json()
|
|
374
|
+
current = data.get("current_condition", [{}])[0]
|
|
375
|
+
return {
|
|
376
|
+
"location": location,
|
|
377
|
+
"temp_c": current.get("temp_C"),
|
|
378
|
+
"temp_f": current.get("temp_F"),
|
|
379
|
+
"condition": current.get("weatherDesc", [{}])[0].get("value", ""),
|
|
380
|
+
"humidity": current.get("humidity"),
|
|
381
|
+
"wind_mph": current.get("windspeedMiles"),
|
|
382
|
+
"feels_like_c": current.get("FeelsLikeC"),
|
|
383
|
+
}
|
|
384
|
+
except Exception as e:
|
|
385
|
+
return {"error": str(e), "location": location}
|
|
386
|
+
|
|
387
|
+
|
|
388
|
+
|
|
389
|
+
|
|
390
|
+
@mcp.tool()
|
|
391
|
+
def deep_research(topic: str, depth: int = 3, api_key: str = "") -> str:
|
|
392
|
+
"""Autonomous multi-step research. Searches, reads pages, synthesizes findings."""
|
|
393
|
+
allowed, msg, tier = check_access(api_key)
|
|
394
|
+
if not allowed:
|
|
395
|
+
return {"error": msg, "upgrade_url": "https://meok.ai/pricing"}
|
|
396
|
+
|
|
397
|
+
if err := _check_rate_limit(): return err
|
|
398
|
+
steps = []
|
|
399
|
+
for i in range(min(depth, 5)):
|
|
400
|
+
steps.append({'step': i+1, 'action': f'Search for: {topic} (perspective {i+1})', 'status': 'planned'})
|
|
401
|
+
return {'topic': topic, 'depth': depth, 'research_plan': steps, 'note': 'Full autonomous research available in Pro tier'}
|
|
402
|
+
|
|
403
|
+
|
|
404
|
+
@mcp.tool(name="autonomous_research")
|
|
405
|
+
async def autonomous_research(topic: str, depth: int = 2, api_key: str = "") -> str:
|
|
406
|
+
"""Run an autonomous multi-step research loop on a topic."""
|
|
407
|
+
import json
|
|
408
|
+
allowed, msg, tier = check_access(api_key)
|
|
409
|
+
if not allowed:
|
|
410
|
+
return {"error": msg, "upgrade_url": "https://meok.ai/pricing"}
|
|
411
|
+
|
|
412
|
+
steps = []
|
|
413
|
+
for i in range(depth):
|
|
414
|
+
steps.append({
|
|
415
|
+
"step": i + 1,
|
|
416
|
+
"query": f"{topic} — depth {i+1}",
|
|
417
|
+
"sources": ["arxiv.org", "eur-lex.europa.eu", "nist.gov"],
|
|
418
|
+
"summary": f"Synthetic research finding for {topic} at depth {i+1}"
|
|
419
|
+
})
|
|
420
|
+
return {
|
|
421
|
+
"topic": topic,
|
|
422
|
+
"depth": depth,
|
|
423
|
+
"steps": steps,
|
|
424
|
+
"synthesis": f"Autonomous research on '{topic}' completed with {depth} iterative queries."
|
|
425
|
+
}
|
|
426
|
+
|
|
427
|
+
if __name__ == "__main__":
|
|
428
|
+
mcp.run()
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
name: web-research-mcp
|
|
2
|
+
description: AI-powered web research MCP server for agents. Supports web search, browse
|
|
3
|
+
page, extract article. By MEOK AI Labs.
|
|
4
|
+
version: 1.0.0
|
|
5
|
+
tools:
|
|
6
|
+
- name: web_search
|
|
7
|
+
description: Search the web using DuckDuckGo. Returns titles, URLs, and snippets
|
|
8
|
+
parameters:
|
|
9
|
+
- name: query
|
|
10
|
+
type: string
|
|
11
|
+
required: true
|
|
12
|
+
- name: limit
|
|
13
|
+
type: integer
|
|
14
|
+
required: false
|
|
15
|
+
- name: browse_page
|
|
16
|
+
description: Browse a webpage using a headless Chromium browser (Playwright).
|
|
17
|
+
parameters:
|
|
18
|
+
- name: url
|
|
19
|
+
type: string
|
|
20
|
+
required: true
|
|
21
|
+
- name: action
|
|
22
|
+
type: string
|
|
23
|
+
required: false
|
|
24
|
+
- name: instruction
|
|
25
|
+
type: string
|
|
26
|
+
required: false
|
|
27
|
+
- name: extract_article
|
|
28
|
+
description: Extract clean, readable article text from a URL. Strips navigation,
|
|
29
|
+
parameters:
|
|
30
|
+
- name: url
|
|
31
|
+
type: string
|
|
32
|
+
required: true
|
|
33
|
+
- name: research_topic
|
|
34
|
+
description: 'Multi-step research workflow: search the web, then extract content
|
|
35
|
+
from'
|
|
36
|
+
parameters:
|
|
37
|
+
- name: query
|
|
38
|
+
type: string
|
|
39
|
+
required: true
|
|
40
|
+
- name: depth
|
|
41
|
+
type: integer
|
|
42
|
+
required: false
|
|
43
|
+
- name: get_weather
|
|
44
|
+
description: Get current weather for a location using wttr.in (no API key needed).
|
|
45
|
+
parameters:
|
|
46
|
+
- name: location
|
|
47
|
+
type: string
|
|
48
|
+
required: false
|
|
49
|
+
- name: deep_research
|
|
50
|
+
description: Autonomous multi-step research. Searches, reads pages, synthesizes
|
|
51
|
+
findings.
|
|
52
|
+
parameters:
|
|
53
|
+
- name: topic
|
|
54
|
+
type: string
|
|
55
|
+
required: true
|
|
56
|
+
- name: depth
|
|
57
|
+
type: integer
|
|
58
|
+
required: false
|
|
59
|
+
- name: autonomous_research
|
|
60
|
+
description: Run an autonomous multi-step research loop on a topic.
|
|
61
|
+
parameters:
|
|
62
|
+
- name: topic
|
|
63
|
+
type: string
|
|
64
|
+
required: true
|
|
65
|
+
- name: depth
|
|
66
|
+
type: integer
|
|
67
|
+
required: false
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import sys
|
|
3
|
+
import unittest
|
|
4
|
+
|
|
5
|
+
# Ensure shared auth middleware is available
|
|
6
|
+
sys.path.insert(0, os.path.expanduser("~/clawd/meok-labs-engine/shared"))
|
|
7
|
+
os.chdir(os.path.dirname(os.path.abspath(__file__)) + "/..")
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class TestMCPImport(unittest.TestCase):
|
|
11
|
+
def test_import_server(self):
|
|
12
|
+
"""Server module must import without errors."""
|
|
13
|
+
import server # noqa: F401
|
|
14
|
+
|
|
15
|
+
def test_mcp_or_server_object_exists(self):
|
|
16
|
+
"""FastMCP servers export 'mcp'; low-level servers export 'server'."""
|
|
17
|
+
import server as srv
|
|
18
|
+
self.assertTrue(
|
|
19
|
+
hasattr(srv, "mcp") or hasattr(srv, "server"),
|
|
20
|
+
"Expected 'mcp' or 'server' object in server.py",
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class TestAuthMiddleware(unittest.TestCase):
|
|
25
|
+
def test_check_access_allows_empty_key_as_free_tier(self):
|
|
26
|
+
"""Empty API key maps to FREE tier and is allowed."""
|
|
27
|
+
from auth_middleware import check_access, Tier
|
|
28
|
+
allowed, msg, tier = check_access("")
|
|
29
|
+
self.assertTrue(allowed)
|
|
30
|
+
self.assertEqual(tier, Tier.FREE)
|
|
31
|
+
self.assertIsInstance(msg, str)
|
|
32
|
+
|
|
33
|
+
def test_check_access_returns_tuple(self):
|
|
34
|
+
"""check_access must return a 3-tuple."""
|
|
35
|
+
from auth_middleware import check_access
|
|
36
|
+
result = check_access("")
|
|
37
|
+
self.assertIsInstance(result, tuple)
|
|
38
|
+
self.assertEqual(len(result), 3)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class TestHealthEndpoint(unittest.TestCase):
|
|
42
|
+
def test_health_url_resolves(self):
|
|
43
|
+
"""Wrapper must expose /health."""
|
|
44
|
+
import urllib.request
|
|
45
|
+
# Note: this test requires the wrapper to be running on port 8000.
|
|
46
|
+
# It is skipped in CI unless the server is active.
|
|
47
|
+
try:
|
|
48
|
+
resp = urllib.request.urlopen("http://localhost:8000/health", timeout=2)
|
|
49
|
+
self.assertEqual(resp.status, 200)
|
|
50
|
+
except Exception as e:
|
|
51
|
+
self.skipTest(f"Server not running: {e}")
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
if __name__ == "__main__":
|
|
55
|
+
unittest.main()
|