testmcpy 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
testmcpy/cli.py ADDED
@@ -0,0 +1,778 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ MCP Testing Framework CLI - Test and validate LLM+MCP interactions.
4
+
5
+ This CLI provides commands for testing LLM tool calling capabilities with MCP services,
6
+ running evaluation suites, and generating reports.
7
+ """
8
+
9
+ import asyncio
10
+ import json
11
+ import os
12
+ import logging
13
+ from pathlib import Path
14
+ from typing import Optional, List
15
+ from enum import Enum
16
+
17
+ import typer
18
+ from rich.console import Console
19
+ from rich.table import Table
20
+ from rich.panel import Panel
21
+ from rich.progress import Progress, SpinnerColumn, TextColumn, BarColumn
22
+ from rich.syntax import Syntax
23
+ from rich import print as rprint
24
+ import yaml
25
+ from dotenv import load_dotenv
26
+
27
+ # Suppress MCP notification validation warnings
28
+ logging.getLogger().setLevel(logging.ERROR)
29
+
30
+ # Load environment variables from .env file
31
+ load_dotenv(Path(__file__).parent.parent / ".env")
32
+
33
+ app = typer.Typer(
34
+ name="testmcpy",
35
+ help="MCP Testing Framework - Test LLM tool calling with MCP services",
36
+ add_completion=False,
37
+ )
38
+
39
+ console = Console()
40
+
41
+ # Config defaults from environment variables
42
+ DEFAULT_MODEL = os.getenv("DEFAULT_MODEL", "llama3.1:8b")
43
+ DEFAULT_PROVIDER = os.getenv("DEFAULT_PROVIDER", "ollama")
44
+ DEFAULT_MCP_URL = os.getenv("MCP_URL", "http://localhost:5008/mcp/")
45
+
46
+
47
+ class OutputFormat(str, Enum):
48
+ """Output format options."""
49
+ yaml = "yaml"
50
+ json = "json"
51
+ table = "table"
52
+
53
+
54
+ class ModelProvider(str, Enum):
55
+ """Supported model providers."""
56
+ ollama = "ollama"
57
+ openai = "openai"
58
+ local = "local"
59
+ anthropic = "anthropic"
60
+ claude_sdk = "claude-sdk"
61
+ claude_cli = "claude-cli"
62
+
63
+
64
+ @app.command()
65
+ def research(
66
+ model: str = typer.Option(DEFAULT_MODEL, "--model", "-m", help="Model to test"),
67
+ provider: ModelProvider = typer.Option(DEFAULT_PROVIDER, "--provider", "-p", help="Model provider"),
68
+ mcp_url: str = typer.Option(DEFAULT_MCP_URL, "--mcp-url", help="MCP service URL"),
69
+ output: Optional[Path] = typer.Option(None, "--output", "-o", help="Output file for results"),
70
+ format: OutputFormat = typer.Option(OutputFormat.table, "--format", "-f", help="Output format"),
71
+ ):
72
+ """
73
+ Research and test LLM tool calling capabilities.
74
+
75
+ This command tests whether a given LLM model can successfully call MCP tools.
76
+ """
77
+ console.print(Panel.fit(
78
+ "[bold cyan]MCP Testing Framework - Research Mode[/bold cyan]\n"
79
+ f"Testing {model} via {provider.value}",
80
+ border_style="cyan"
81
+ ))
82
+
83
+ async def run_research():
84
+ # Import here to avoid circular dependencies
85
+ from testmcpy.research.test_ollama_tools import OllamaToolTester, MCPServiceTester, TestResult
86
+
87
+ # Test MCP connection
88
+ console.print("\n[bold]Testing MCP Service[/bold]")
89
+ mcp_tester = MCPServiceTester(mcp_url)
90
+
91
+ with Progress(
92
+ SpinnerColumn(),
93
+ TextColumn("[progress.description]{task.description}"),
94
+ console=console,
95
+ ) as progress:
96
+ task = progress.add_task("Connecting to MCP service...", total=None)
97
+
98
+ connected = await mcp_tester.test_connection()
99
+ progress.update(task, completed=True)
100
+
101
+ if connected:
102
+ console.print("[green]✓ MCP service is reachable[/green]")
103
+ tools = await mcp_tester.list_tools()
104
+ if tools:
105
+ console.print(f"[green]✓ Found {len(tools)} MCP tools[/green]")
106
+ else:
107
+ console.print("[red]✗ MCP service not reachable[/red]")
108
+
109
+ # Test model
110
+ console.print(f"\n[bold]Testing Model: {model}[/bold]")
111
+
112
+ if provider == ModelProvider.ollama:
113
+ tester = OllamaToolTester()
114
+
115
+ # Define test tools
116
+ test_tools = [{
117
+ "type": "function",
118
+ "function": {
119
+ "name": "get_chart_data",
120
+ "description": "Get data for a specific chart",
121
+ "parameters": {
122
+ "type": "object",
123
+ "properties": {
124
+ "chart_id": {"type": "integer", "description": "Chart ID"}
125
+ },
126
+ "required": ["chart_id"]
127
+ }
128
+ }
129
+ }]
130
+
131
+ # Test prompt
132
+ test_prompt = "Get the data for chart ID 42"
133
+
134
+ # Run test
135
+ result = await tester.test_tool_calling(model, test_prompt, test_tools)
136
+
137
+ # Display results
138
+ if format == OutputFormat.table:
139
+ table = Table(show_header=True, header_style="bold cyan")
140
+ table.add_column("Property", style="dim")
141
+ table.add_column("Value")
142
+
143
+ table.add_row("Model", model)
144
+ table.add_row("Success", "✓" if result.success else "✗")
145
+ table.add_row("Tool Called", "✓" if result.tool_called else "✗")
146
+ table.add_row("Tool Name", result.tool_name or "-")
147
+ table.add_row("Response Time", f"{result.response_time:.2f}s")
148
+
149
+ if result.error:
150
+ table.add_row("Error", f"[red]{result.error}[/red]")
151
+
152
+ console.print(table)
153
+
154
+ elif format == OutputFormat.json:
155
+ output_data = {
156
+ "model": result.model,
157
+ "success": result.success,
158
+ "tool_called": result.tool_called,
159
+ "tool_name": result.tool_name,
160
+ "response_time": result.response_time,
161
+ "error": result.error,
162
+ }
163
+ console.print(Syntax(json.dumps(output_data, indent=2), "json"))
164
+
165
+ elif format == OutputFormat.yaml:
166
+ output_data = {
167
+ "model": result.model,
168
+ "success": result.success,
169
+ "tool_called": result.tool_called,
170
+ "tool_name": result.tool_name,
171
+ "response_time": result.response_time,
172
+ "error": result.error,
173
+ }
174
+ console.print(Syntax(yaml.dump(output_data), "yaml"))
175
+
176
+ # Save to file if requested
177
+ if output:
178
+ output_data = {
179
+ "model": result.model,
180
+ "provider": provider.value,
181
+ "success": result.success,
182
+ "tool_called": result.tool_called,
183
+ "tool_name": result.tool_name,
184
+ "response_time": result.response_time,
185
+ "error": result.error,
186
+ "raw_response": result.raw_response,
187
+ }
188
+
189
+ if format == OutputFormat.json:
190
+ output.write_text(json.dumps(output_data, indent=2))
191
+ else:
192
+ output.write_text(yaml.dump(output_data))
193
+
194
+ console.print(f"\n[green]Results saved to {output}[/green]")
195
+
196
+ await tester.close()
197
+
198
+ await mcp_tester.close()
199
+
200
+ asyncio.run(run_research())
201
+
202
+
203
+ @app.command()
204
+ def run(
205
+ test_path: Path = typer.Argument(..., help="Path to test file or directory"),
206
+ model: str = typer.Option(DEFAULT_MODEL, "--model", "-m", help="Model to use"),
207
+ provider: ModelProvider = typer.Option(DEFAULT_PROVIDER, "--provider", "-p", help="Model provider"),
208
+ mcp_url: str = typer.Option(DEFAULT_MCP_URL, "--mcp-url", help="MCP service URL"),
209
+ output: Optional[Path] = typer.Option(None, "--output", "-o", help="Output report file"),
210
+ verbose: bool = typer.Option(False, "--verbose", "-v", help="Verbose output"),
211
+ dry_run: bool = typer.Option(False, "--dry-run", help="Don't actually run tests"),
212
+ hide_tool_output: bool = typer.Option(False, "--hide-tool-output", help="Hide detailed tool call output in verbose mode"),
213
+ ):
214
+ """
215
+ Run test cases against MCP service.
216
+
217
+ This command executes test cases defined in YAML/JSON files.
218
+ """
219
+ console.print(Panel.fit(
220
+ "[bold cyan]MCP Testing Framework - Run Tests[/bold cyan]\n"
221
+ f"Model: {model} | Provider: {provider.value}",
222
+ border_style="cyan"
223
+ ))
224
+
225
+ async def run_tests():
226
+ # Import test runner
227
+ from testmcpy.src.test_runner import TestRunner, TestCase
228
+
229
+ runner = TestRunner(
230
+ model=model,
231
+ provider=provider.value,
232
+ mcp_url=mcp_url,
233
+ verbose=verbose,
234
+ hide_tool_output=hide_tool_output
235
+ )
236
+
237
+ # Load test cases
238
+ test_cases = []
239
+ if test_path.is_file():
240
+ with open(test_path) as f:
241
+ if test_path.suffix == ".json":
242
+ data = json.load(f)
243
+ else:
244
+ data = yaml.safe_load(f)
245
+
246
+ if "tests" in data:
247
+ for test_data in data["tests"]:
248
+ test_cases.append(TestCase.from_dict(test_data))
249
+ else:
250
+ test_cases.append(TestCase.from_dict(data))
251
+
252
+ elif test_path.is_dir():
253
+ for file in test_path.glob("*.yaml"):
254
+ with open(file) as f:
255
+ data = yaml.safe_load(f)
256
+ if "tests" in data:
257
+ for test_data in data["tests"]:
258
+ test_cases.append(TestCase.from_dict(test_data))
259
+
260
+ console.print(f"\n[bold]Found {len(test_cases)} test case(s)[/bold]")
261
+
262
+ if dry_run:
263
+ console.print("[yellow]DRY RUN - Not executing tests[/yellow]")
264
+ for i, test in enumerate(test_cases, 1):
265
+ console.print(f"{i}. {test.name}: {test.prompt[:50]}...")
266
+ return
267
+
268
+ # Run tests
269
+ results = await runner.run_tests(test_cases)
270
+
271
+ # Display results
272
+ table = Table(show_header=True, header_style="bold cyan")
273
+ table.add_column("Test", style="dim")
274
+ table.add_column("Status")
275
+ table.add_column("Score")
276
+ table.add_column("Time")
277
+ table.add_column("Details")
278
+
279
+ total_passed = 0
280
+ total_cost = 0.0
281
+ total_tokens = 0
282
+ for result in results:
283
+ status = "[green]PASS[/green]" if result.passed else "[red]FAIL[/red]"
284
+ if result.passed:
285
+ total_passed += 1
286
+
287
+ # Aggregate cost and tokens from TestResult
288
+ total_cost += result.cost
289
+ if result.token_usage and 'total' in result.token_usage:
290
+ total_tokens += result.token_usage['total']
291
+
292
+ table.add_row(
293
+ result.test_name,
294
+ status,
295
+ f"{result.score:.2f}",
296
+ f"{result.duration:.2f}s",
297
+ result.reason or "-"
298
+ )
299
+
300
+ console.print(table)
301
+
302
+ # Summary with cost and tokens
303
+ summary_parts = [f"{total_passed}/{len(results)} tests passed"]
304
+ if total_tokens > 0:
305
+ summary_parts.append(f"{total_tokens:,} tokens")
306
+ if total_cost > 0:
307
+ summary_parts.append(f"${total_cost:.4f}")
308
+
309
+ console.print(f"\n[bold]Summary:[/bold] {' | '.join(summary_parts)}")
310
+
311
+ # Save report if requested
312
+ if output:
313
+ report_data = {
314
+ "model": model,
315
+ "provider": provider.value,
316
+ "summary": {
317
+ "total": len(results),
318
+ "passed": total_passed,
319
+ "failed": len(results) - total_passed,
320
+ },
321
+ "results": [r.to_dict() for r in results]
322
+ }
323
+
324
+ if output.suffix == ".json":
325
+ output.write_text(json.dumps(report_data, indent=2))
326
+ else:
327
+ output.write_text(yaml.dump(report_data))
328
+
329
+ console.print(f"\n[green]Report saved to {output}[/green]")
330
+
331
+ asyncio.run(run_tests())
332
+
333
+
334
+ @app.command()
335
+ def tools(
336
+ mcp_url: str = typer.Option(DEFAULT_MCP_URL, "--mcp-url", help="MCP service URL"),
337
+ format: OutputFormat = typer.Option(OutputFormat.table, "--format", "-f", help="Output format"),
338
+ detail: bool = typer.Option(False, "--detail", "-d", help="Show detailed parameter schemas"),
339
+ filter: Optional[str] = typer.Option(None, "--filter", help="Filter tools by name"),
340
+ ):
341
+ """
342
+ List available MCP tools with beautiful formatting.
343
+
344
+ This command connects to the MCP service and displays all available tools
345
+ with their descriptions and parameter schemas in a readable format.
346
+ """
347
+ async def list_tools():
348
+ from testmcpy.src.mcp_client import MCPClient
349
+ from rich.tree import Tree
350
+ from rich.json import JSON
351
+ from rich.markdown import Markdown
352
+
353
+ console.print(Panel.fit(
354
+ f"[bold cyan]MCP Tools Explorer[/bold cyan]\n"
355
+ f"Service: {mcp_url}",
356
+ border_style="cyan"
357
+ ))
358
+
359
+ try:
360
+ with console.status("[bold green]Connecting to MCP service...[/bold green]"):
361
+ async with MCPClient(mcp_url) as client:
362
+ all_tools = await client.list_tools()
363
+
364
+ # Apply filter if provided
365
+ if filter:
366
+ tools = [t for t in all_tools if filter.lower() in t.name.lower()]
367
+ if not tools:
368
+ console.print(f"[yellow]No tools found matching '{filter}'[/yellow]")
369
+ return
370
+ else:
371
+ tools = all_tools
372
+
373
+ if format == OutputFormat.table:
374
+ if detail:
375
+ # Detailed view with individual panels for each tool
376
+ for i, tool in enumerate(tools, 1):
377
+ # Create a panel for each tool
378
+ tool_content = []
379
+
380
+ # Description
381
+ tool_content.append(f"[bold]Description:[/bold]")
382
+ desc_lines = tool.description.split('\n')
383
+ for line in desc_lines[:5]: # First 5 lines
384
+ if line.strip():
385
+ tool_content.append(f" {line.strip()}")
386
+ if len(desc_lines) > 5:
387
+ tool_content.append(f" [dim]... and {len(desc_lines) - 5} more lines[/dim]")
388
+
389
+ tool_content.append("")
390
+
391
+ # Parameters
392
+ if tool.input_schema:
393
+ tool_content.append(f"[bold]Parameters:[/bold]")
394
+ props = tool.input_schema.get('properties', {})
395
+ required = tool.input_schema.get('required', [])
396
+
397
+ if props:
398
+ for param_name, param_info in props.items():
399
+ param_type = param_info.get('type', 'any')
400
+ param_desc = param_info.get('description', '')
401
+ is_required = '✓' if param_name in required else ' '
402
+
403
+ tool_content.append(f" [{is_required}] [cyan]{param_name}[/cyan]: [yellow]{param_type}[/yellow]")
404
+ if param_desc:
405
+ # Wrap long descriptions
406
+ if len(param_desc) > 60:
407
+ param_desc = param_desc[:60] + "..."
408
+ tool_content.append(f" [dim]{param_desc}[/dim]")
409
+ else:
410
+ tool_content.append(" [dim]No parameters required[/dim]")
411
+ else:
412
+ tool_content.append(f"[dim]No parameter schema[/dim]")
413
+
414
+ panel = Panel(
415
+ "\n".join(tool_content),
416
+ title=f"[bold green]{i}. {tool.name}[/bold green]",
417
+ border_style="green",
418
+ expand=False
419
+ )
420
+ console.print(panel)
421
+ console.print() # Spacing between tools
422
+ else:
423
+ # Compact table view
424
+ table = Table(
425
+ show_header=True,
426
+ header_style="bold cyan",
427
+ border_style="blue",
428
+ title=f"[bold]Available MCP Tools ({len(tools)})[/bold]",
429
+ title_style="bold magenta"
430
+ )
431
+ table.add_column("#", style="dim", width=4)
432
+ table.add_column("Tool Name", style="bold green", no_wrap=True)
433
+ table.add_column("Description", style="white")
434
+ table.add_column("Params", justify="center", style="cyan")
435
+
436
+ for i, tool in enumerate(tools, 1):
437
+ # Truncate description intelligently
438
+ desc = tool.description
439
+ if len(desc) > 80:
440
+ # Try to cut at sentence or word boundary
441
+ desc = desc[:80].rsplit('. ', 1)[0] + "..."
442
+
443
+ # Count parameters
444
+ param_count = len(tool.input_schema.get('properties', {})) if tool.input_schema else 0
445
+ required_count = len(tool.input_schema.get('required', [])) if tool.input_schema else 0
446
+
447
+ param_str = f"{param_count}"
448
+ if required_count > 0:
449
+ param_str = f"{param_count} ({required_count} req)"
450
+
451
+ table.add_row(
452
+ str(i),
453
+ tool.name,
454
+ desc,
455
+ param_str
456
+ )
457
+
458
+ console.print(table)
459
+
460
+ elif format == OutputFormat.json:
461
+ output_data = [
462
+ {
463
+ "name": tool.name,
464
+ "description": tool.description,
465
+ "input_schema": tool.input_schema
466
+ }
467
+ for tool in tools
468
+ ]
469
+ console.print(Syntax(json.dumps(output_data, indent=2), "json", theme="monokai"))
470
+
471
+ elif format == OutputFormat.yaml:
472
+ output_data = [
473
+ {
474
+ "name": tool.name,
475
+ "description": tool.description,
476
+ "input_schema": tool.input_schema
477
+ }
478
+ for tool in tools
479
+ ]
480
+ console.print(Syntax(yaml.dump(output_data), "yaml", theme="monokai"))
481
+
482
+ # Summary
483
+ summary_parts = []
484
+ summary_parts.append(f"[green]{len(tools)} tool(s) displayed[/green]")
485
+ if filter:
486
+ summary_parts.append(f"[yellow]filtered from {len(all_tools)} total[/yellow]")
487
+
488
+ console.print(f"\n[bold]Summary:[/bold] {' | '.join(summary_parts)}")
489
+
490
+ if not detail and format == OutputFormat.table:
491
+ console.print("[dim]Tip: Use --detail flag to see full parameter schemas[/dim]")
492
+
493
+ except Exception as e:
494
+ console.print(Panel(
495
+ f"[red]Error connecting to MCP service:[/red]\n{str(e)}",
496
+ title="[red]Error[/red]",
497
+ border_style="red"
498
+ ))
499
+
500
+ asyncio.run(list_tools())
501
+
502
+
503
+ @app.command()
504
+ def report(
505
+ report_files: List[Path] = typer.Argument(..., help="Report files to compare"),
506
+ output: Optional[Path] = typer.Option(None, "--output", "-o", help="Output comparison file"),
507
+ ):
508
+ """
509
+ Compare test reports from different models.
510
+
511
+ This command takes multiple report files and generates a comparison.
512
+ """
513
+ console.print(Panel.fit(
514
+ "[bold cyan]MCP Testing Framework - Report Comparison[/bold cyan]",
515
+ border_style="cyan"
516
+ ))
517
+
518
+ reports = []
519
+ for file in report_files:
520
+ with open(file) as f:
521
+ if file.suffix == ".json":
522
+ reports.append(json.load(f))
523
+ else:
524
+ reports.append(yaml.safe_load(f))
525
+
526
+ # Create comparison table
527
+ table = Table(show_header=True, header_style="bold cyan")
528
+ table.add_column("Model", style="dim")
529
+ table.add_column("Provider")
530
+ table.add_column("Total Tests")
531
+ table.add_column("Passed")
532
+ table.add_column("Failed")
533
+ table.add_column("Success Rate")
534
+
535
+ for report in reports:
536
+ summary = report["summary"]
537
+ success_rate = (summary["passed"] / summary["total"] * 100) if summary["total"] > 0 else 0
538
+
539
+ table.add_row(
540
+ report["model"],
541
+ report.get("provider", "unknown"),
542
+ str(summary["total"]),
543
+ f"[green]{summary['passed']}[/green]",
544
+ f"[red]{summary['failed']}[/red]",
545
+ f"{success_rate:.1f}%"
546
+ )
547
+
548
+ console.print(table)
549
+
550
+ # Find tests that failed in one model but not another
551
+ if len(reports) == 2:
552
+ console.print("\n[bold]Differential Analysis[/bold]")
553
+
554
+ r1, r2 = reports[0], reports[1]
555
+ r1_results = {r["test_name"]: r["passed"] for r in r1["results"]}
556
+ r2_results = {r["test_name"]: r["passed"] for r in r2["results"]}
557
+
558
+ # Tests that failed in r1 but passed in r2
559
+ failed_in_1 = [name for name, passed in r1_results.items() if not passed and r2_results.get(name, False)]
560
+ # Tests that failed in r2 but passed in r1
561
+ failed_in_2 = [name for name, passed in r2_results.items() if not passed and r1_results.get(name, False)]
562
+
563
+ if failed_in_1:
564
+ console.print(f"\n[yellow]Tests that failed in {r1['model']} but passed in {r2['model']}:[/yellow]")
565
+ for test in failed_in_1:
566
+ console.print(f" - {test}")
567
+
568
+ if failed_in_2:
569
+ console.print(f"\n[yellow]Tests that failed in {r2['model']} but passed in {r1['model']}:[/yellow]")
570
+ for test in failed_in_2:
571
+ console.print(f" - {test}")
572
+
573
+ # Save comparison if requested
574
+ if output:
575
+ comparison = {
576
+ "reports": reports,
577
+ "comparison": {
578
+ "models": [r["model"] for r in reports],
579
+ "summary": [r["summary"] for r in reports]
580
+ }
581
+ }
582
+
583
+ if output.suffix == ".json":
584
+ output.write_text(json.dumps(comparison, indent=2))
585
+ else:
586
+ output.write_text(yaml.dump(comparison))
587
+
588
+ console.print(f"\n[green]Comparison saved to {output}[/green]")
589
+
590
+
591
+ @app.command()
592
+ def chat(
593
+ model: str = typer.Option(DEFAULT_MODEL, "--model", "-m", help="Model to use"),
594
+ provider: ModelProvider = typer.Option(DEFAULT_PROVIDER, "--provider", "-p", help="Model provider"),
595
+ mcp_url: str = typer.Option(DEFAULT_MCP_URL, "--mcp-url", help="MCP service URL"),
596
+ no_mcp: bool = typer.Option(False, "--no-mcp", help="Chat without MCP tools"),
597
+ ):
598
+ """
599
+ Interactive chat with LLM that has access to MCP tools.
600
+
601
+ Start a chat session where you can directly talk to the LLM and it can use
602
+ MCP tools from your service. Type 'exit' or 'quit' to end the session.
603
+
604
+ Use --no-mcp flag to chat without MCP tools.
605
+ """
606
+ if no_mcp:
607
+ console.print(Panel.fit(
608
+ f"[bold cyan]Interactive Chat with {model}[/bold cyan]\n"
609
+ f"Provider: {provider.value}\nMode: Standalone (no MCP tools)\n\n"
610
+ "[dim]Type your message and press Enter. Type 'exit' or 'quit' to end session.[/dim]",
611
+ border_style="cyan"
612
+ ))
613
+ else:
614
+ console.print(Panel.fit(
615
+ f"[bold cyan]Interactive Chat with {model}[/bold cyan]\n"
616
+ f"Provider: {provider.value}\nMCP Service: {mcp_url}\n\n"
617
+ "[dim]Type your message and press Enter. Type 'exit' or 'quit' to end session.[/dim]",
618
+ border_style="cyan"
619
+ ))
620
+
621
+ async def chat_session():
622
+ import sys
623
+ import os
624
+ sys.path.append(os.path.dirname(os.path.abspath(__file__)))
625
+
626
+ from testmcpy.src.llm_integration import create_llm_provider
627
+ from testmcpy.src.mcp_client import MCPClient
628
+
629
+ # Initialize LLM
630
+ llm = create_llm_provider(provider.value, model)
631
+ await llm.initialize()
632
+
633
+ tools = []
634
+ mcp_client = None
635
+
636
+ if not no_mcp:
637
+ try:
638
+ # Initialize MCP client
639
+ mcp_client = MCPClient(mcp_url)
640
+ await mcp_client.initialize()
641
+
642
+ # Get available tools
643
+ tools = await mcp_client.list_tools()
644
+ console.print(f"[green]Connected to MCP service with {len(tools)} tools available[/green]\n")
645
+ except Exception as e:
646
+ console.print(f"[yellow]MCP connection failed: {e}[/yellow]")
647
+ console.print("[yellow]Continuing without MCP tools...[/yellow]\n")
648
+
649
+ if not tools:
650
+ console.print("[dim]Chat mode: Standalone (no tools available)[/dim]\n")
651
+
652
+ # Chat loop
653
+ while True:
654
+ try:
655
+ # Get user input
656
+ user_input = console.input("[bold blue]You:[/bold blue] ")
657
+
658
+ if user_input.lower() in ['exit', 'quit', 'bye']:
659
+ console.print("[yellow]Goodbye![/yellow]")
660
+ break
661
+
662
+ if not user_input.strip():
663
+ continue
664
+
665
+ # Show thinking indicator
666
+ with console.status("[dim]Thinking...[/dim]"):
667
+ # Convert MCPTool objects to dictionaries for LLM
668
+ tools_dict = []
669
+ for tool in tools:
670
+ tools_dict.append({
671
+ "name": tool.name,
672
+ "description": tool.description,
673
+ "inputSchema": tool.input_schema
674
+ })
675
+
676
+ # Generate response with available tools
677
+ response = await llm.generate_with_tools(user_input, tools_dict)
678
+
679
+ # Display response
680
+ console.print(f"[bold green]{model}:[/bold green] {response.response}")
681
+
682
+ # Show tool calls if any
683
+ if response.tool_calls:
684
+ console.print(f"[dim]Used {len(response.tool_calls)} tool call(s)[/dim]")
685
+ for tool_call in response.tool_calls:
686
+ console.print(f"[dim]→ {tool_call['name']}({tool_call['arguments']})[/dim]")
687
+
688
+ console.print() # Empty line for spacing
689
+
690
+ except KeyboardInterrupt:
691
+ console.print("\n[yellow]Chat interrupted. Goodbye![/yellow]")
692
+ break
693
+ except Exception as e:
694
+ console.print(f"[red]Error: {e}[/red]")
695
+
696
+ # Cleanup
697
+ if mcp_client:
698
+ await mcp_client.close()
699
+ await llm.close()
700
+
701
+ asyncio.run(chat_session())
702
+
703
+
704
+ @app.command()
705
+ def init(
706
+ path: Path = typer.Argument(Path("."), help="Directory to initialize"),
707
+ ):
708
+ """
709
+ Initialize a new MCP test project.
710
+
711
+ This command creates the standard directory structure and example files.
712
+ """
713
+ console.print(Panel.fit(
714
+ "[bold cyan]MCP Testing Framework - Initialize Project[/bold cyan]",
715
+ border_style="cyan"
716
+ ))
717
+
718
+ # Create directories
719
+ dirs = ["tests", "evals", "reports"]
720
+ for dir_name in dirs:
721
+ dir_path = path / dir_name
722
+ dir_path.mkdir(parents=True, exist_ok=True)
723
+ console.print(f"[green]✓ Created {dir_path}[/green]")
724
+
725
+ # Create example test file
726
+ example_test = {
727
+ "version": "1.0",
728
+ "tests": [
729
+ {
730
+ "name": "test_get_chart_data",
731
+ "prompt": "Get the data for chart with ID 123",
732
+ "evaluators": [
733
+ {"name": "was_mcp_tool_called", "args": {"tool_name": "get_chart"}},
734
+ {"name": "execution_successful"},
735
+ {"name": "final_answer_contains", "args": {"expected_content": "chart"}}
736
+ ]
737
+ },
738
+ {
739
+ "name": "test_create_dashboard",
740
+ "prompt": "Create a new dashboard called 'Sales Overview' with a bar chart",
741
+ "evaluators": [
742
+ {"name": "was_superset_chart_created"},
743
+ {"name": "execution_successful"},
744
+ {"name": "within_time_limit", "args": {"max_seconds": 30}}
745
+ ]
746
+ }
747
+ ]
748
+ }
749
+
750
+ test_file = path / "tests" / "example_tests.yaml"
751
+ test_file.write_text(yaml.dump(example_test, default_flow_style=False))
752
+ console.print(f"[green]✓ Created example test file: {test_file}[/green]")
753
+
754
+ # Create config file
755
+ config = {
756
+ "mcp_url": DEFAULT_MCP_URL,
757
+ "default_model": DEFAULT_MODEL,
758
+ "default_provider": DEFAULT_PROVIDER,
759
+ "evaluators": {
760
+ "timeout": 30,
761
+ "max_tokens": 2000,
762
+ "max_cost": 0.10
763
+ }
764
+ }
765
+
766
+ config_file = path / "mcp_test_config.yaml"
767
+ config_file.write_text(yaml.dump(config, default_flow_style=False))
768
+ console.print(f"[green]✓ Created config file: {config_file}[/green]")
769
+
770
+ console.print("\n[bold green]Project initialized successfully![/bold green]")
771
+ console.print("\nNext steps:")
772
+ console.print("1. Edit tests/example_tests.yaml to add your test cases")
773
+ console.print("2. Run: testmcpy research # To test your model")
774
+ console.print("3. Run: testmcpy run tests/ # To run all tests")
775
+
776
+
777
+ if __name__ == "__main__":
778
+ app()