testmcpy 0.2.2__tar.gz → 0.2.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {testmcpy-0.2.2/testmcpy.egg-info → testmcpy-0.2.4}/PKG-INFO +29 -11
- {testmcpy-0.2.2 → testmcpy-0.2.4}/README.md +24 -6
- {testmcpy-0.2.2 → testmcpy-0.2.4}/pyproject.toml +5 -5
- {testmcpy-0.2.2 → testmcpy-0.2.4}/testmcpy/cli.py +197 -3
- {testmcpy-0.2.2 → testmcpy-0.2.4}/testmcpy/config.py +36 -25
- {testmcpy-0.2.2 → testmcpy-0.2.4}/testmcpy/evals/base_evaluators.py +122 -0
- testmcpy-0.2.4/testmcpy/formatters/__init__.py +46 -0
- testmcpy-0.2.4/testmcpy/formatters/base.py +232 -0
- testmcpy-0.2.4/testmcpy/formatters/curl.py +87 -0
- testmcpy-0.2.4/testmcpy/formatters/graphql.py +136 -0
- testmcpy-0.2.4/testmcpy/formatters/javascript_client.py +115 -0
- testmcpy-0.2.4/testmcpy/formatters/json_yaml.py +61 -0
- testmcpy-0.2.4/testmcpy/formatters/protobuf.py +136 -0
- testmcpy-0.2.4/testmcpy/formatters/python.py +157 -0
- testmcpy-0.2.4/testmcpy/formatters/python_client.py +143 -0
- testmcpy-0.2.4/testmcpy/formatters/thrift.py +117 -0
- testmcpy-0.2.4/testmcpy/formatters/typescript.py +144 -0
- testmcpy-0.2.4/testmcpy/formatters/typescript_client.py +140 -0
- {testmcpy-0.2.2 → testmcpy-0.2.4}/testmcpy/mcp_profiles.py +143 -12
- testmcpy-0.2.4/testmcpy/server/api.py +2803 -0
- {testmcpy-0.2.2 → testmcpy-0.2.4}/testmcpy/src/llm_integration.py +30 -11
- {testmcpy-0.2.2 → testmcpy-0.2.4}/testmcpy/src/mcp_client.py +179 -19
- {testmcpy-0.2.2 → testmcpy-0.2.4}/testmcpy/src/test_runner.py +12 -1
- testmcpy-0.2.4/testmcpy/ui/dist/assets/index-45O3yN-_.css +1 -0
- testmcpy-0.2.4/testmcpy/ui/dist/assets/index-rflbClOY.js +458 -0
- testmcpy-0.2.4/testmcpy/ui/dist/index.html +14 -0
- testmcpy-0.2.4/testmcpy/ui/src/App.jsx +309 -0
- testmcpy-0.2.4/testmcpy/ui/src/components/MCPProfileSelector.jsx +171 -0
- testmcpy-0.2.4/testmcpy/ui/src/components/OptimizeDocsModal.jsx +396 -0
- testmcpy-0.2.4/testmcpy/ui/src/components/SchemaCodeViewer.jsx +205 -0
- testmcpy-0.2.4/testmcpy/ui/src/components/TestGenerationModal.jsx +357 -0
- {testmcpy-0.2.2 → testmcpy-0.2.4}/testmcpy/ui/src/components/TestResultPanel.jsx +44 -20
- {testmcpy-0.2.2 → testmcpy-0.2.4}/testmcpy/ui/src/index.css +16 -0
- {testmcpy-0.2.2 → testmcpy-0.2.4}/testmcpy/ui/src/pages/ChatInterface.jsx +153 -8
- {testmcpy-0.2.2 → testmcpy-0.2.4}/testmcpy/ui/src/pages/Configuration.jsx +20 -13
- testmcpy-0.2.4/testmcpy/ui/src/pages/MCPExplorer.jsx +922 -0
- testmcpy-0.2.4/testmcpy/ui/src/pages/MCPProfiles.jsx +1190 -0
- {testmcpy-0.2.2 → testmcpy-0.2.4}/testmcpy/ui/src/pages/TestManager.jsx +124 -20
- testmcpy-0.2.4/testmcpy/ui/src/utils/__tests__/formatConverters.test.js +170 -0
- testmcpy-0.2.4/testmcpy/ui/src/utils/formatConverters.js +807 -0
- {testmcpy-0.2.2 → testmcpy-0.2.4/testmcpy.egg-info}/PKG-INFO +29 -11
- {testmcpy-0.2.2 → testmcpy-0.2.4}/testmcpy.egg-info/SOURCES.txt +24 -0
- {testmcpy-0.2.2 → testmcpy-0.2.4}/testmcpy.egg-info/requires.txt +4 -4
- testmcpy-0.2.4/tests/test_api_optimize_docs.py +264 -0
- testmcpy-0.2.4/tests/test_mcp_client_auth.py +216 -0
- testmcpy-0.2.2/testmcpy/server/api.py +0 -681
- testmcpy-0.2.2/testmcpy/ui/src/App.jsx +0 -148
- testmcpy-0.2.2/testmcpy/ui/src/pages/MCPExplorer.jsx +0 -278
- {testmcpy-0.2.2 → testmcpy-0.2.4}/LICENSE +0 -0
- {testmcpy-0.2.2 → testmcpy-0.2.4}/MANIFEST.in +0 -0
- {testmcpy-0.2.2 → testmcpy-0.2.4}/NOTICE +0 -0
- {testmcpy-0.2.2 → testmcpy-0.2.4}/setup.cfg +0 -0
- {testmcpy-0.2.2 → testmcpy-0.2.4}/testmcpy/__init__.py +0 -0
- {testmcpy-0.2.2 → testmcpy-0.2.4}/testmcpy/evals/__init__.py +0 -0
- {testmcpy-0.2.2 → testmcpy-0.2.4}/testmcpy/research/claude_sdk_detailed_exploration.py +0 -0
- {testmcpy-0.2.2 → testmcpy-0.2.4}/testmcpy/research/claude_sdk_poc.py +0 -0
- {testmcpy-0.2.2 → testmcpy-0.2.4}/testmcpy/research/claude_sdk_working_poc.py +0 -0
- {testmcpy-0.2.2 → testmcpy-0.2.4}/testmcpy/research/test_ollama_tools.py +0 -0
- {testmcpy-0.2.2 → testmcpy-0.2.4}/testmcpy/server/__init__.py +0 -0
- {testmcpy-0.2.2 → testmcpy-0.2.4}/testmcpy/server/websocket.py +0 -0
- {testmcpy-0.2.2 → testmcpy-0.2.4}/testmcpy/src/__init__.py +0 -0
- {testmcpy-0.2.2 → testmcpy-0.2.4}/testmcpy/ui/README.md +0 -0
- {testmcpy-0.2.2 → testmcpy-0.2.4}/testmcpy/ui/index.html +0 -0
- {testmcpy-0.2.2 → testmcpy-0.2.4}/testmcpy/ui/package-lock.json +0 -0
- {testmcpy-0.2.2 → testmcpy-0.2.4}/testmcpy/ui/package.json +0 -0
- {testmcpy-0.2.2 → testmcpy-0.2.4}/testmcpy/ui/postcss.config.js +0 -0
- {testmcpy-0.2.2 → testmcpy-0.2.4}/testmcpy/ui/src/components/ParameterCard.jsx +0 -0
- {testmcpy-0.2.2 → testmcpy-0.2.4}/testmcpy/ui/src/components/TestStatusIndicator.jsx +0 -0
- {testmcpy-0.2.2 → testmcpy-0.2.4}/testmcpy/ui/src/components/TypeBadge.jsx +0 -0
- {testmcpy-0.2.2 → testmcpy-0.2.4}/testmcpy/ui/src/main.jsx +0 -0
- {testmcpy-0.2.2 → testmcpy-0.2.4}/testmcpy/ui/tailwind.config.js +0 -0
- {testmcpy-0.2.2 → testmcpy-0.2.4}/testmcpy/ui/vite.config.js +0 -0
- {testmcpy-0.2.2 → testmcpy-0.2.4}/testmcpy.egg-info/dependency_links.txt +0 -0
- {testmcpy-0.2.2 → testmcpy-0.2.4}/testmcpy.egg-info/entry_points.txt +0 -0
- {testmcpy-0.2.2 → testmcpy-0.2.4}/testmcpy.egg-info/top_level.txt +0 -0
- {testmcpy-0.2.2 → testmcpy-0.2.4}/tests/test_url_protection.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: testmcpy
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.4
|
|
4
4
|
Summary: A comprehensive testing framework for validating LLM tool calling capabilities with MCP services
|
|
5
5
|
Author: Amin Ghadersohi
|
|
6
6
|
License-Expression: Apache-2.0
|
|
@@ -42,26 +42,34 @@ Requires-Dist: types-pyyaml>=6.0.0; extra == "dev"
|
|
|
42
42
|
Requires-Dist: types-requests>=2.28.0; extra == "dev"
|
|
43
43
|
Provides-Extra: server
|
|
44
44
|
Requires-Dist: fastapi<1.0.0,>=0.104.0; extra == "server"
|
|
45
|
-
Requires-Dist: uvicorn<1.0.0,>=0.24.0; extra == "server"
|
|
46
|
-
Requires-Dist: websockets<
|
|
45
|
+
Requires-Dist: uvicorn[standard]<1.0.0,>=0.24.0; extra == "server"
|
|
46
|
+
Requires-Dist: websockets<15.0,>=14.0; extra == "server"
|
|
47
47
|
Provides-Extra: sdk
|
|
48
48
|
Requires-Dist: claude-agent-sdk>=0.1.0; extra == "sdk"
|
|
49
49
|
Provides-Extra: all
|
|
50
50
|
Requires-Dist: fastapi<1.0.0,>=0.104.0; extra == "all"
|
|
51
|
-
Requires-Dist: uvicorn<1.0.0,>=0.24.0; extra == "all"
|
|
52
|
-
Requires-Dist: websockets<
|
|
51
|
+
Requires-Dist: uvicorn[standard]<1.0.0,>=0.24.0; extra == "all"
|
|
52
|
+
Requires-Dist: websockets<15.0,>=14.0; extra == "all"
|
|
53
53
|
Requires-Dist: claude-agent-sdk>=0.1.0; extra == "all"
|
|
54
54
|
Dynamic: license-file
|
|
55
55
|
|
|
56
|
-
|
|
56
|
+
<p align="center">
|
|
57
|
+
<img src="docs/logos/logo.svg" alt="testmcpy logo" width="600">
|
|
58
|
+
</p>
|
|
57
59
|
|
|
58
|
-
|
|
60
|
+
<p align="center">
|
|
61
|
+
<strong>Test and benchmark LLMs with MCP tools in minutes.</strong>
|
|
62
|
+
</p>
|
|
59
63
|
|
|
60
|
-
|
|
64
|
+
<p align="center">
|
|
65
|
+
A testing framework for validating how LLMs call tools via Model Context Protocol (MCP) - compare Claude, GPT-4, Llama, and other models' accuracy, cost, and performance.
|
|
66
|
+
</p>
|
|
61
67
|
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
68
|
+
<p align="center">
|
|
69
|
+
<a href="https://www.python.org/downloads/"><img src="https://img.shields.io/badge/python-3.9+-blue.svg" alt="Python 3.9+"></a>
|
|
70
|
+
<a href="LICENSE"><img src="https://img.shields.io/badge/License-Apache%202.0-blue.svg" alt="License"></a>
|
|
71
|
+
<a href="https://pypi.org/project/testmcpy/"><img src="https://img.shields.io/badge/pypi-testmcpy-blue" alt="PyPI"></a>
|
|
72
|
+
</p>
|
|
65
73
|
|
|
66
74
|
[Screenshot: CLI test runner with colorful progress bars and results]
|
|
67
75
|
|
|
@@ -121,6 +129,16 @@ Comprehensive validation out of the box:
|
|
|
121
129
|
- **Optional web interface**: Visual tool explorer and interactive chat
|
|
122
130
|
- **Real-time feedback**: Watch tests execute with live updates
|
|
123
131
|
|
|
132
|
+
When you start testmcpy, you're greeted with a beautiful terminal interface:
|
|
133
|
+
|
|
134
|
+
```
|
|
135
|
+
▀█▀ █▀▀ █▀ ▀█▀ █▀▄▀█ █▀▀ █▀█ █▄█
|
|
136
|
+
█ ██▄ ▄█ █ █ ▀ █ █▄▄ █▀▀ █
|
|
137
|
+
|
|
138
|
+
🧪 Test • 📊 Benchmark • ✓ Validate
|
|
139
|
+
MCP Testing Framework
|
|
140
|
+
```
|
|
141
|
+
|
|
124
142
|
[Screenshot: Split view of CLI and Web UI running the same test]
|
|
125
143
|
|
|
126
144
|
### YAML Test Definitions
|
|
@@ -1,12 +1,20 @@
|
|
|
1
|
-
|
|
1
|
+
<p align="center">
|
|
2
|
+
<img src="docs/logos/logo.svg" alt="testmcpy logo" width="600">
|
|
3
|
+
</p>
|
|
2
4
|
|
|
3
|
-
|
|
5
|
+
<p align="center">
|
|
6
|
+
<strong>Test and benchmark LLMs with MCP tools in minutes.</strong>
|
|
7
|
+
</p>
|
|
4
8
|
|
|
5
|
-
|
|
9
|
+
<p align="center">
|
|
10
|
+
A testing framework for validating how LLMs call tools via Model Context Protocol (MCP) - compare Claude, GPT-4, Llama, and other models' accuracy, cost, and performance.
|
|
11
|
+
</p>
|
|
6
12
|
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
13
|
+
<p align="center">
|
|
14
|
+
<a href="https://www.python.org/downloads/"><img src="https://img.shields.io/badge/python-3.9+-blue.svg" alt="Python 3.9+"></a>
|
|
15
|
+
<a href="LICENSE"><img src="https://img.shields.io/badge/License-Apache%202.0-blue.svg" alt="License"></a>
|
|
16
|
+
<a href="https://pypi.org/project/testmcpy/"><img src="https://img.shields.io/badge/pypi-testmcpy-blue" alt="PyPI"></a>
|
|
17
|
+
</p>
|
|
10
18
|
|
|
11
19
|
[Screenshot: CLI test runner with colorful progress bars and results]
|
|
12
20
|
|
|
@@ -66,6 +74,16 @@ Comprehensive validation out of the box:
|
|
|
66
74
|
- **Optional web interface**: Visual tool explorer and interactive chat
|
|
67
75
|
- **Real-time feedback**: Watch tests execute with live updates
|
|
68
76
|
|
|
77
|
+
When you start testmcpy, you're greeted with a beautiful terminal interface:
|
|
78
|
+
|
|
79
|
+
```
|
|
80
|
+
▀█▀ █▀▀ █▀ ▀█▀ █▀▄▀█ █▀▀ █▀█ █▄█
|
|
81
|
+
█ ██▄ ▄█ █ █ ▀ █ █▄▄ █▀▀ █
|
|
82
|
+
|
|
83
|
+
🧪 Test • 📊 Benchmark • ✓ Validate
|
|
84
|
+
MCP Testing Framework
|
|
85
|
+
```
|
|
86
|
+
|
|
69
87
|
[Screenshot: Split view of CLI and Web UI running the same test]
|
|
70
88
|
|
|
71
89
|
### YAML Test Definitions
|
|
@@ -88,7 +88,7 @@ testmcpy = [
|
|
|
88
88
|
|
|
89
89
|
[project]
|
|
90
90
|
name = "testmcpy"
|
|
91
|
-
version = "0.2.
|
|
91
|
+
version = "0.2.4"
|
|
92
92
|
description = "A comprehensive testing framework for validating LLM tool calling capabilities with MCP services"
|
|
93
93
|
authors = [{name = "Amin Ghadersohi"}]
|
|
94
94
|
license = "Apache-2.0"
|
|
@@ -132,16 +132,16 @@ dev = [
|
|
|
132
132
|
]
|
|
133
133
|
server = [
|
|
134
134
|
"fastapi>=0.104.0,<1.0.0",
|
|
135
|
-
"uvicorn>=0.24.0,<1.0.0",
|
|
136
|
-
"websockets>=
|
|
135
|
+
"uvicorn[standard]>=0.24.0,<1.0.0",
|
|
136
|
+
"websockets>=14.0,<15.0",
|
|
137
137
|
]
|
|
138
138
|
sdk = [
|
|
139
139
|
"claude-agent-sdk>=0.1.0",
|
|
140
140
|
]
|
|
141
141
|
all = [
|
|
142
142
|
"fastapi>=0.104.0,<1.0.0",
|
|
143
|
-
"uvicorn>=0.24.0,<1.0.0",
|
|
144
|
-
"websockets>=
|
|
143
|
+
"uvicorn[standard]>=0.24.0,<1.0.0",
|
|
144
|
+
"websockets>=14.0,<15.0",
|
|
145
145
|
"claude-agent-sdk>=0.1.0",
|
|
146
146
|
]
|
|
147
147
|
|
|
@@ -40,10 +40,23 @@ app = typer.Typer(
|
|
|
40
40
|
console = Console()
|
|
41
41
|
|
|
42
42
|
|
|
43
|
+
def print_logo():
|
|
44
|
+
"""Print testmcpy ASCII logo."""
|
|
45
|
+
logo = """
|
|
46
|
+
[bold cyan]▀█▀ █▀▀ █▀ ▀█▀ █▀▄▀█ █▀▀ █▀█ █▄█[/bold cyan]
|
|
47
|
+
[bold cyan] █ ██▄ ▄█ █ █ ▀ █ █▄▄ █▀▀ █ [/bold cyan]
|
|
48
|
+
|
|
49
|
+
[dim]🧪 Test • 📊 Benchmark • ✓ Validate[/dim]
|
|
50
|
+
[dim]MCP Testing Framework[/dim]
|
|
51
|
+
"""
|
|
52
|
+
console.print(logo)
|
|
53
|
+
|
|
54
|
+
|
|
43
55
|
def version_callback(value: bool):
|
|
44
56
|
"""Display version and exit."""
|
|
45
57
|
if value:
|
|
46
|
-
|
|
58
|
+
print_logo()
|
|
59
|
+
console.print(f"\n Version: [green]{__version__}[/green]")
|
|
47
60
|
raise typer.Exit()
|
|
48
61
|
|
|
49
62
|
|
|
@@ -1263,6 +1276,9 @@ def serve(
|
|
|
1263
1276
|
This command starts a FastAPI server that serves a beautiful React-based UI
|
|
1264
1277
|
for inspecting MCP tools, interactive chat, and test management.
|
|
1265
1278
|
"""
|
|
1279
|
+
# Show logo
|
|
1280
|
+
print_logo()
|
|
1281
|
+
|
|
1266
1282
|
# Show authentication steps
|
|
1267
1283
|
console.print("\n[bold cyan]Authentication Setup[/bold cyan]")
|
|
1268
1284
|
console.print("[dim]━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[/dim]")
|
|
@@ -1397,10 +1413,25 @@ def serve(
|
|
|
1397
1413
|
if not no_browser:
|
|
1398
1414
|
import threading
|
|
1399
1415
|
import webbrowser
|
|
1416
|
+
import requests
|
|
1400
1417
|
|
|
1401
1418
|
def open_browser():
|
|
1402
|
-
|
|
1403
|
-
|
|
1419
|
+
# Wait for server to be ready by checking health endpoint
|
|
1420
|
+
url = f"http://{host}:{port}/"
|
|
1421
|
+
max_attempts = 30
|
|
1422
|
+
for i in range(max_attempts):
|
|
1423
|
+
try:
|
|
1424
|
+
response = requests.get(url, timeout=1)
|
|
1425
|
+
if response.status_code == 200:
|
|
1426
|
+
# Server is ready
|
|
1427
|
+
webbrowser.open(url)
|
|
1428
|
+
return
|
|
1429
|
+
except (requests.exceptions.ConnectionError, requests.exceptions.Timeout):
|
|
1430
|
+
pass
|
|
1431
|
+
time.sleep(0.2) # Wait 200ms between attempts
|
|
1432
|
+
|
|
1433
|
+
# If server didn't start after max attempts, open anyway
|
|
1434
|
+
webbrowser.open(url)
|
|
1404
1435
|
|
|
1405
1436
|
threading.Thread(target=open_browser, daemon=True).start()
|
|
1406
1437
|
|
|
@@ -1732,6 +1763,169 @@ def config_mcp(
|
|
|
1732
1763
|
return
|
|
1733
1764
|
|
|
1734
1765
|
|
|
1766
|
+
@app.command()
|
|
1767
|
+
def export(
|
|
1768
|
+
tool_name: str | None = typer.Argument(None, help="Tool name to export (or use --all)"),
|
|
1769
|
+
format: str = typer.Option("typescript", "--format", "-f", help="Export format"),
|
|
1770
|
+
output: Path | None = typer.Option(None, "--output", "-o", help="Output file"),
|
|
1771
|
+
all: bool = typer.Option(False, "--all", help="Export all tools"),
|
|
1772
|
+
profile: str | None = typer.Option(None, "--profile", help="MCP profile"),
|
|
1773
|
+
mcp_url: str | None = typer.Option(None, "--mcp-url", help="MCP service URL"),
|
|
1774
|
+
):
|
|
1775
|
+
"""
|
|
1776
|
+
Export MCP tool schemas in various formats.
|
|
1777
|
+
|
|
1778
|
+
Supported formats: typescript, python, protobuf, thrift, graphql, curl, json, yaml
|
|
1779
|
+
|
|
1780
|
+
Examples:
|
|
1781
|
+
# Export as TypeScript
|
|
1782
|
+
testmcpy export get_chart_data --format typescript
|
|
1783
|
+
|
|
1784
|
+
# Export all tools as Python to file
|
|
1785
|
+
testmcpy export --all --format python -o schemas.py
|
|
1786
|
+
|
|
1787
|
+
# Generate cURL command
|
|
1788
|
+
testmcpy export list_datasets --format curl
|
|
1789
|
+
|
|
1790
|
+
# Use specific profile
|
|
1791
|
+
testmcpy export search --format protobuf --profile production
|
|
1792
|
+
"""
|
|
1793
|
+
from testmcpy.formatters import FORMATS
|
|
1794
|
+
|
|
1795
|
+
# Load config with profile if specified
|
|
1796
|
+
if profile:
|
|
1797
|
+
from testmcpy.config import Config
|
|
1798
|
+
|
|
1799
|
+
cfg = Config(profile=profile)
|
|
1800
|
+
effective_mcp_url = mcp_url or cfg.mcp_url
|
|
1801
|
+
else:
|
|
1802
|
+
effective_mcp_url = mcp_url or DEFAULT_MCP_URL
|
|
1803
|
+
|
|
1804
|
+
# Validate format
|
|
1805
|
+
if format not in FORMATS:
|
|
1806
|
+
console.print(f"[red]Error: Unknown format '{format}'[/red]")
|
|
1807
|
+
console.print(f"[yellow]Supported formats: {', '.join(FORMATS.keys())}[/yellow]")
|
|
1808
|
+
raise typer.Exit(1)
|
|
1809
|
+
|
|
1810
|
+
# Validate that either tool_name or --all is provided
|
|
1811
|
+
if not tool_name and not all:
|
|
1812
|
+
console.print("[red]Error: Either specify a tool name or use --all flag[/red]")
|
|
1813
|
+
console.print("[yellow]Example: testmcpy export my_tool --format typescript[/yellow]")
|
|
1814
|
+
raise typer.Exit(1)
|
|
1815
|
+
|
|
1816
|
+
async def export_schemas():
|
|
1817
|
+
from testmcpy.src.mcp_client import MCPClient
|
|
1818
|
+
|
|
1819
|
+
console.print(
|
|
1820
|
+
Panel.fit(
|
|
1821
|
+
f"[bold cyan]Export MCP Tool Schemas[/bold cyan]\n"
|
|
1822
|
+
f"Format: {FORMATS[format]['label']} | Service: {effective_mcp_url}",
|
|
1823
|
+
border_style="cyan",
|
|
1824
|
+
)
|
|
1825
|
+
)
|
|
1826
|
+
|
|
1827
|
+
try:
|
|
1828
|
+
with console.status("[bold green]Connecting to MCP service...[/bold green]"):
|
|
1829
|
+
async with MCPClient(effective_mcp_url) as client:
|
|
1830
|
+
tools = await client.list_tools()
|
|
1831
|
+
|
|
1832
|
+
if not tools:
|
|
1833
|
+
console.print("[yellow]No tools found in MCP service[/yellow]")
|
|
1834
|
+
return
|
|
1835
|
+
|
|
1836
|
+
# Filter tools if specific tool requested
|
|
1837
|
+
if not all:
|
|
1838
|
+
tools = [t for t in tools if t.name == tool_name]
|
|
1839
|
+
if not tools:
|
|
1840
|
+
console.print(f"[red]Error: Tool '{tool_name}' not found[/red]")
|
|
1841
|
+
console.print(
|
|
1842
|
+
f"[yellow]Available tools: {', '.join([t.name for t in await client.list_tools()])}[/yellow]"
|
|
1843
|
+
)
|
|
1844
|
+
return
|
|
1845
|
+
|
|
1846
|
+
console.print(
|
|
1847
|
+
f"[green]✓ Found {len(tools)} tool(s) to export[/green]\n"
|
|
1848
|
+
)
|
|
1849
|
+
|
|
1850
|
+
# Get the conversion function
|
|
1851
|
+
convert_func = FORMATS[format]["convert"]
|
|
1852
|
+
language = FORMATS[format]["language"]
|
|
1853
|
+
|
|
1854
|
+
# Generate output
|
|
1855
|
+
output_lines = []
|
|
1856
|
+
|
|
1857
|
+
for i, tool in enumerate(tools):
|
|
1858
|
+
# Add separator between tools when exporting all
|
|
1859
|
+
if all and i > 0:
|
|
1860
|
+
if format in ["typescript", "python"]:
|
|
1861
|
+
output_lines.append("\n\n")
|
|
1862
|
+
elif format in ["protobuf", "thrift", "graphql"]:
|
|
1863
|
+
output_lines.append("\n")
|
|
1864
|
+
elif format == "curl":
|
|
1865
|
+
output_lines.append("\n" + "=" * 80 + "\n\n")
|
|
1866
|
+
else:
|
|
1867
|
+
output_lines.append("\n---\n\n")
|
|
1868
|
+
|
|
1869
|
+
# Add tool name comment for clarity when exporting all
|
|
1870
|
+
if all:
|
|
1871
|
+
if format in ["typescript", "python", "protobuf", "thrift", "graphql"]:
|
|
1872
|
+
output_lines.append(f"// Tool: {tool.name}\n")
|
|
1873
|
+
elif format == "yaml":
|
|
1874
|
+
output_lines.append(f"# Tool: {tool.name}\n")
|
|
1875
|
+
|
|
1876
|
+
# Convert schema
|
|
1877
|
+
if format == "curl":
|
|
1878
|
+
converted = convert_func(tool.input_schema, tool.name)
|
|
1879
|
+
elif format in ["json", "yaml"]:
|
|
1880
|
+
# For JSON/YAML, include tool metadata
|
|
1881
|
+
schema_with_metadata = {
|
|
1882
|
+
"name": tool.name,
|
|
1883
|
+
"description": tool.description,
|
|
1884
|
+
"input_schema": tool.input_schema,
|
|
1885
|
+
}
|
|
1886
|
+
converted = convert_func(schema_with_metadata)
|
|
1887
|
+
else:
|
|
1888
|
+
# For code formats, use a nice name
|
|
1889
|
+
name = "".join(
|
|
1890
|
+
word.capitalize() for word in tool.name.replace("-", "_").split("_")
|
|
1891
|
+
)
|
|
1892
|
+
if format == "typescript":
|
|
1893
|
+
name = f"{name}Params"
|
|
1894
|
+
elif format == "python":
|
|
1895
|
+
name = f"{name}Params"
|
|
1896
|
+
elif format == "protobuf":
|
|
1897
|
+
name = f"{name}Request"
|
|
1898
|
+
elif format == "thrift":
|
|
1899
|
+
name = f"{name}Request"
|
|
1900
|
+
elif format == "graphql":
|
|
1901
|
+
name = f"{name}Input"
|
|
1902
|
+
|
|
1903
|
+
converted = convert_func(tool.input_schema, name)
|
|
1904
|
+
|
|
1905
|
+
output_lines.append(converted)
|
|
1906
|
+
|
|
1907
|
+
output_text = "".join(output_lines)
|
|
1908
|
+
|
|
1909
|
+
# Display or save output
|
|
1910
|
+
if output:
|
|
1911
|
+
output.write_text(output_text)
|
|
1912
|
+
console.print(f"[green]✓ Exported to {output}[/green]")
|
|
1913
|
+
else:
|
|
1914
|
+
# Display with syntax highlighting
|
|
1915
|
+
console.print(Syntax(output_text, language, theme="monokai"))
|
|
1916
|
+
|
|
1917
|
+
except Exception as e:
|
|
1918
|
+
console.print(
|
|
1919
|
+
Panel(
|
|
1920
|
+
f"[red]Error exporting schemas:[/red]\n{str(e)}",
|
|
1921
|
+
title="[red]Error[/red]",
|
|
1922
|
+
border_style="red",
|
|
1923
|
+
)
|
|
1924
|
+
)
|
|
1925
|
+
|
|
1926
|
+
asyncio.run(export_schemas())
|
|
1927
|
+
|
|
1928
|
+
|
|
1735
1929
|
@app.command()
|
|
1736
1930
|
def doctor():
|
|
1737
1931
|
"""
|
|
@@ -102,7 +102,11 @@ class Config:
|
|
|
102
102
|
self._sources[key] = "Default"
|
|
103
103
|
|
|
104
104
|
def _load_profile(self, profile_id: str | None = None):
|
|
105
|
-
"""Load configuration from MCP profile.
|
|
105
|
+
"""Load configuration from MCP profile.
|
|
106
|
+
|
|
107
|
+
For backward compatibility with single-MCP configs, if a profile has only one MCP,
|
|
108
|
+
we'll use that MCP's URL and auth as the default MCP_URL and auth settings.
|
|
109
|
+
"""
|
|
106
110
|
try:
|
|
107
111
|
profile = load_profile(profile_id)
|
|
108
112
|
if not profile:
|
|
@@ -110,30 +114,37 @@ class Config:
|
|
|
110
114
|
|
|
111
115
|
self._profile = profile
|
|
112
116
|
|
|
113
|
-
#
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
self.
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
self.
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
117
|
+
# For backward compatibility: if profile has exactly one MCP, use it as default
|
|
118
|
+
if profile.mcps and len(profile.mcps) == 1:
|
|
119
|
+
first_mcp = profile.mcps[0]
|
|
120
|
+
|
|
121
|
+
# Set MCP URL
|
|
122
|
+
self._config["MCP_URL"] = first_mcp.mcp_url
|
|
123
|
+
self._sources["MCP_URL"] = f"Profile ({profile.profile_id})"
|
|
124
|
+
|
|
125
|
+
# Set auth configuration based on auth type
|
|
126
|
+
if first_mcp.auth.auth_type == "bearer" and first_mcp.auth.token:
|
|
127
|
+
self._config["MCP_AUTH_TOKEN"] = first_mcp.auth.token
|
|
128
|
+
self._sources["MCP_AUTH_TOKEN"] = f"Profile ({profile.profile_id})"
|
|
129
|
+
|
|
130
|
+
elif first_mcp.auth.auth_type == "jwt":
|
|
131
|
+
if first_mcp.auth.api_url:
|
|
132
|
+
self._config["MCP_AUTH_API_URL"] = first_mcp.auth.api_url
|
|
133
|
+
self._sources["MCP_AUTH_API_URL"] = f"Profile ({profile.profile_id})"
|
|
134
|
+
if first_mcp.auth.api_token:
|
|
135
|
+
self._config["MCP_AUTH_API_TOKEN"] = first_mcp.auth.api_token
|
|
136
|
+
self._sources["MCP_AUTH_API_TOKEN"] = f"Profile ({profile.profile_id})"
|
|
137
|
+
if first_mcp.auth.api_secret:
|
|
138
|
+
self._config["MCP_AUTH_API_SECRET"] = first_mcp.auth.api_secret
|
|
139
|
+
self._sources["MCP_AUTH_API_SECRET"] = f"Profile ({profile.profile_id})"
|
|
140
|
+
|
|
141
|
+
# OAuth not yet implemented in auth flow, but store for future use
|
|
142
|
+
elif first_mcp.auth.auth_type == "oauth":
|
|
143
|
+
# Store OAuth config for future use
|
|
144
|
+
pass
|
|
145
|
+
|
|
146
|
+
# If profile has multiple MCPs, don't set default MCP_URL/auth
|
|
147
|
+
# The API will handle loading all MCPs from the profile
|
|
137
148
|
|
|
138
149
|
except Exception as e:
|
|
139
150
|
import warnings
|
|
@@ -731,6 +731,127 @@ class ToolCallCount(BaseEvaluator):
|
|
|
731
731
|
)
|
|
732
732
|
|
|
733
733
|
|
|
734
|
+
class ToolCallSequence(BaseEvaluator):
|
|
735
|
+
"""Check that tools were called in a specific order."""
|
|
736
|
+
|
|
737
|
+
def __init__(
|
|
738
|
+
self,
|
|
739
|
+
sequence: list[str],
|
|
740
|
+
strict: bool = True,
|
|
741
|
+
allow_intermediate: bool = False,
|
|
742
|
+
):
|
|
743
|
+
"""
|
|
744
|
+
Check tool call sequence.
|
|
745
|
+
|
|
746
|
+
Args:
|
|
747
|
+
sequence: List of tool names that should be called in order
|
|
748
|
+
strict: If True, sequence must match exactly (no extra tools).
|
|
749
|
+
If False, only checks that sequence appears in order.
|
|
750
|
+
allow_intermediate: If True, allows other tools between sequence steps.
|
|
751
|
+
Only applies when strict=False.
|
|
752
|
+
|
|
753
|
+
Examples:
|
|
754
|
+
# Strict sequence - must be exactly these tools in this order
|
|
755
|
+
ToolCallSequence(["list_datasets", "generate_chart"], strict=True)
|
|
756
|
+
|
|
757
|
+
# Loose sequence - these tools must appear in order, but other tools allowed
|
|
758
|
+
ToolCallSequence(["list_datasets", "generate_chart"], strict=False, allow_intermediate=True)
|
|
759
|
+
"""
|
|
760
|
+
self.sequence = sequence
|
|
761
|
+
self.strict = strict
|
|
762
|
+
self.allow_intermediate = allow_intermediate
|
|
763
|
+
|
|
764
|
+
@property
|
|
765
|
+
def name(self) -> str:
|
|
766
|
+
return f"tool_call_sequence:{' -> '.join(self.sequence)}"
|
|
767
|
+
|
|
768
|
+
@property
|
|
769
|
+
def description(self) -> str:
|
|
770
|
+
if self.strict:
|
|
771
|
+
return f"Checks that tools are called in exact sequence: {' -> '.join(self.sequence)}"
|
|
772
|
+
elif self.allow_intermediate:
|
|
773
|
+
return f"Checks that tools appear in order (other tools allowed): {' -> '.join(self.sequence)}"
|
|
774
|
+
else:
|
|
775
|
+
return f"Checks that only these tools are called in order: {' -> '.join(self.sequence)}"
|
|
776
|
+
|
|
777
|
+
def evaluate(self, context: dict[str, Any]) -> EvalResult:
|
|
778
|
+
tool_calls = context.get("tool_calls", [])
|
|
779
|
+
|
|
780
|
+
if not tool_calls:
|
|
781
|
+
return EvalResult(
|
|
782
|
+
passed=False,
|
|
783
|
+
score=0.0,
|
|
784
|
+
reason="No tool calls found in response",
|
|
785
|
+
)
|
|
786
|
+
|
|
787
|
+
actual_sequence = [call.get("name") for call in tool_calls]
|
|
788
|
+
|
|
789
|
+
if self.strict:
|
|
790
|
+
# Exact match required
|
|
791
|
+
if actual_sequence == self.sequence:
|
|
792
|
+
return EvalResult(
|
|
793
|
+
passed=True,
|
|
794
|
+
score=1.0,
|
|
795
|
+
reason=f"Tools called in exact sequence: {' -> '.join(actual_sequence)}",
|
|
796
|
+
details={"actual_sequence": actual_sequence, "expected_sequence": self.sequence},
|
|
797
|
+
)
|
|
798
|
+
else:
|
|
799
|
+
return EvalResult(
|
|
800
|
+
passed=False,
|
|
801
|
+
score=0.0,
|
|
802
|
+
reason=f"Sequence mismatch. Expected: {' -> '.join(self.sequence)}, Got: {' -> '.join(actual_sequence)}",
|
|
803
|
+
details={"actual_sequence": actual_sequence, "expected_sequence": self.sequence},
|
|
804
|
+
)
|
|
805
|
+
|
|
806
|
+
# Non-strict mode: check if sequence appears in order
|
|
807
|
+
sequence_idx = 0
|
|
808
|
+
found_positions = []
|
|
809
|
+
|
|
810
|
+
for i, tool_name in enumerate(actual_sequence):
|
|
811
|
+
if sequence_idx < len(self.sequence) and tool_name == self.sequence[sequence_idx]:
|
|
812
|
+
found_positions.append(i)
|
|
813
|
+
sequence_idx += 1
|
|
814
|
+
elif not self.allow_intermediate and tool_name not in self.sequence:
|
|
815
|
+
# Found a tool not in our sequence and intermediates not allowed
|
|
816
|
+
return EvalResult(
|
|
817
|
+
passed=False,
|
|
818
|
+
score=sequence_idx / len(self.sequence),
|
|
819
|
+
reason=f"Unexpected tool '{tool_name}' at position {i}. Only {self.sequence} allowed.",
|
|
820
|
+
details={
|
|
821
|
+
"actual_sequence": actual_sequence,
|
|
822
|
+
"expected_sequence": self.sequence,
|
|
823
|
+
"found_up_to_index": sequence_idx,
|
|
824
|
+
"unexpected_tool": tool_name,
|
|
825
|
+
},
|
|
826
|
+
)
|
|
827
|
+
|
|
828
|
+
# Check if we found all tools in the sequence
|
|
829
|
+
if sequence_idx == len(self.sequence):
|
|
830
|
+
return EvalResult(
|
|
831
|
+
passed=True,
|
|
832
|
+
score=1.0,
|
|
833
|
+
reason=f"Required tools called in correct order: {' -> '.join([actual_sequence[i] for i in found_positions])}",
|
|
834
|
+
details={
|
|
835
|
+
"actual_sequence": actual_sequence,
|
|
836
|
+
"expected_sequence": self.sequence,
|
|
837
|
+
"found_positions": found_positions,
|
|
838
|
+
},
|
|
839
|
+
)
|
|
840
|
+
else:
|
|
841
|
+
missing_tools = self.sequence[sequence_idx:]
|
|
842
|
+
return EvalResult(
|
|
843
|
+
passed=False,
|
|
844
|
+
score=sequence_idx / len(self.sequence),
|
|
845
|
+
reason=f"Incomplete sequence. Found {sequence_idx}/{len(self.sequence)} tools. Missing: {' -> '.join(missing_tools)}",
|
|
846
|
+
details={
|
|
847
|
+
"actual_sequence": actual_sequence,
|
|
848
|
+
"expected_sequence": self.sequence,
|
|
849
|
+
"found_up_to_index": sequence_idx,
|
|
850
|
+
"missing_tools": missing_tools,
|
|
851
|
+
},
|
|
852
|
+
)
|
|
853
|
+
|
|
854
|
+
|
|
734
855
|
# Superset-specific evaluators
|
|
735
856
|
|
|
736
857
|
|
|
@@ -898,6 +1019,7 @@ def create_evaluator(name: str, **kwargs) -> BaseEvaluator:
|
|
|
898
1019
|
"tool_called_with_parameters": ToolCalledWithParameters,
|
|
899
1020
|
"parameter_value_in_range": ParameterValueInRange,
|
|
900
1021
|
"tool_call_count": ToolCallCount,
|
|
1022
|
+
"tool_call_sequence": ToolCallSequence,
|
|
901
1023
|
# Superset-specific evaluators
|
|
902
1024
|
"was_superset_chart_created": WasSupersetChartCreated,
|
|
903
1025
|
"sql_query_valid": SQLQueryValid,
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Schema formatters for converting JSON Schema to various formats.
|
|
3
|
+
|
|
4
|
+
This module provides formatters for converting MCP tool schemas to
|
|
5
|
+
TypeScript, Python, Protobuf, Thrift, GraphQL, cURL, JSON, and YAML.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from .curl import to_curl
|
|
9
|
+
from .graphql import to_graphql
|
|
10
|
+
from .javascript_client import to_javascript_client
|
|
11
|
+
from .json_yaml import to_json, to_yaml
|
|
12
|
+
from .protobuf import to_protobuf
|
|
13
|
+
from .python import to_python
|
|
14
|
+
from .python_client import to_python_client
|
|
15
|
+
from .thrift import to_thrift
|
|
16
|
+
from .typescript import to_typescript
|
|
17
|
+
from .typescript_client import to_typescript_client
|
|
18
|
+
|
|
19
|
+
__all__ = [
|
|
20
|
+
"to_typescript",
|
|
21
|
+
"to_python",
|
|
22
|
+
"to_protobuf",
|
|
23
|
+
"to_thrift",
|
|
24
|
+
"to_graphql",
|
|
25
|
+
"to_curl",
|
|
26
|
+
"to_json",
|
|
27
|
+
"to_yaml",
|
|
28
|
+
"to_python_client",
|
|
29
|
+
"to_javascript_client",
|
|
30
|
+
"to_typescript_client",
|
|
31
|
+
]
|
|
32
|
+
|
|
33
|
+
# Format registry for easy lookup
|
|
34
|
+
FORMATS = {
|
|
35
|
+
"json": {"label": "JSON", "language": "json", "convert": to_json},
|
|
36
|
+
"yaml": {"label": "YAML", "language": "yaml", "convert": to_yaml},
|
|
37
|
+
"typescript": {"label": "TypeScript", "language": "typescript", "convert": to_typescript},
|
|
38
|
+
"python": {"label": "Python", "language": "python", "convert": to_python},
|
|
39
|
+
"protobuf": {"label": "Protobuf", "language": "protobuf", "convert": to_protobuf},
|
|
40
|
+
"thrift": {"label": "Thrift", "language": "thrift", "convert": to_thrift},
|
|
41
|
+
"graphql": {"label": "GraphQL", "language": "graphql", "convert": to_graphql},
|
|
42
|
+
"curl": {"label": "cURL", "language": "bash", "convert": to_curl},
|
|
43
|
+
"python_client": {"label": "Python Client", "language": "python", "convert": to_python_client},
|
|
44
|
+
"javascript_client": {"label": "JavaScript Client", "language": "javascript", "convert": to_javascript_client},
|
|
45
|
+
"typescript_client": {"label": "TypeScript Client", "language": "typescript", "convert": to_typescript_client},
|
|
46
|
+
}
|