testmcpy 0.3.2__tar.gz → 0.5.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (181) hide show
  1. {testmcpy-0.3.2/testmcpy.egg-info → testmcpy-0.5.0}/PKG-INFO +29 -2
  2. {testmcpy-0.3.2 → testmcpy-0.5.0}/README.md +28 -1
  3. {testmcpy-0.3.2 → testmcpy-0.5.0}/pyproject.toml +1 -1
  4. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/__init__.py +1 -1
  5. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/cli/commands/push.py +2 -2
  6. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/cli/commands/run.py +107 -14
  7. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/evals/base_evaluators.py +28 -3
  8. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/server/api.py +1 -1
  9. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/server/routers/compare.py +4 -0
  10. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/server/routers/results.py +92 -7
  11. testmcpy-0.5.0/testmcpy/server/websocket.py +502 -0
  12. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/src/llm_integration.py +133 -33
  13. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/src/mcp_client.py +16 -19
  14. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/src/test_runner.py +11 -3
  15. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/storage.py +69 -6
  16. testmcpy-0.5.0/testmcpy/ui/dist/assets/index-30Ed2JCz.css +1 -0
  17. testmcpy-0.5.0/testmcpy/ui/dist/assets/index-6JiH0p1L.js +291 -0
  18. testmcpy-0.5.0/testmcpy/ui/dist/index.html +22 -0
  19. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/ui/package-lock.json +4 -4
  20. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/ui/package.json +1 -1
  21. testmcpy-0.5.0/testmcpy/ui/src/components/EditorStatusBar.jsx +96 -0
  22. testmcpy-0.5.0/testmcpy/ui/src/components/EditorTabStrip.jsx +65 -0
  23. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/ui/src/components/StreamingLogViewer.jsx +404 -59
  24. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/ui/src/contexts/TestRunContext.jsx +30 -0
  25. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/ui/src/pages/Reports.jsx +201 -9
  26. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/ui/src/pages/RunComparison.jsx +152 -19
  27. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/ui/src/pages/TestManager.jsx +484 -156
  28. {testmcpy-0.3.2 → testmcpy-0.5.0/testmcpy.egg-info}/PKG-INFO +29 -2
  29. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy.egg-info/SOURCES.txt +5 -0
  30. testmcpy-0.3.2/testmcpy/server/websocket.py +0 -438
  31. {testmcpy-0.3.2 → testmcpy-0.5.0}/LICENSE +0 -0
  32. {testmcpy-0.3.2 → testmcpy-0.5.0}/MANIFEST.in +0 -0
  33. {testmcpy-0.3.2 → testmcpy-0.5.0}/NOTICE +0 -0
  34. {testmcpy-0.3.2 → testmcpy-0.5.0}/setup.cfg +0 -0
  35. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/agent/__init__.py +0 -0
  36. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/agent/hooks.py +0 -0
  37. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/agent/models.py +0 -0
  38. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/agent/orchestrator.py +0 -0
  39. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/agent/prompts.py +0 -0
  40. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/agent/tools.py +0 -0
  41. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/auth_debugger.py +0 -0
  42. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/auth_flow_recorder.py +0 -0
  43. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/cli/__init__.py +0 -0
  44. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/cli/app.py +0 -0
  45. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/cli/commands/__init__.py +0 -0
  46. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/cli/commands/agent.py +0 -0
  47. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/cli/commands/baseline.py +0 -0
  48. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/cli/commands/export_db.py +0 -0
  49. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/cli/commands/mcp.py +0 -0
  50. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/cli/commands/metamorphic.py +0 -0
  51. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/cli/commands/multi_env.py +0 -0
  52. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/cli/commands/mutate.py +0 -0
  53. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/cli/commands/server.py +0 -0
  54. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/cli/commands/tools.py +0 -0
  55. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/cli/commands/tui.py +0 -0
  56. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/cli/commands/wizard.py +0 -0
  57. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/config.py +0 -0
  58. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/core/__init__.py +0 -0
  59. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/core/chat_session.py +0 -0
  60. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/core/docs_optimizer.py +0 -0
  61. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/core/mcp_manager.py +0 -0
  62. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/core/tool_comparison.py +0 -0
  63. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/core/tool_discovery.py +0 -0
  64. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/db.py +0 -0
  65. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/error_handlers.py +0 -0
  66. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/evals/__init__.py +0 -0
  67. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/evals/auth_evaluators.py +0 -0
  68. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/evals/evaluator_packs.py +0 -0
  69. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/formatters/__init__.py +0 -0
  70. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/formatters/base.py +0 -0
  71. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/formatters/curl.py +0 -0
  72. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/formatters/graphql.py +0 -0
  73. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/formatters/javascript_client.py +0 -0
  74. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/formatters/json_yaml.py +0 -0
  75. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/formatters/protobuf.py +0 -0
  76. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/formatters/python.py +0 -0
  77. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/formatters/python_client.py +0 -0
  78. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/formatters/thrift.py +0 -0
  79. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/formatters/typescript.py +0 -0
  80. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/formatters/typescript_client.py +0 -0
  81. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/llm_profiles.py +0 -0
  82. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/mcp_profiles.py +0 -0
  83. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/migrate_json.py +0 -0
  84. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/models.py +0 -0
  85. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/research/claude_sdk_detailed_exploration.py +0 -0
  86. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/research/claude_sdk_poc.py +0 -0
  87. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/research/claude_sdk_working_poc.py +0 -0
  88. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/research/test_ollama_tools.py +0 -0
  89. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/server/__init__.py +0 -0
  90. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/server/api.py.bak +0 -0
  91. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/server/auth_middleware.py +0 -0
  92. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/server/helpers/__init__.py +0 -0
  93. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/server/helpers/mcp_config.py +0 -0
  94. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/server/models.py +0 -0
  95. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/server/routers/__init__.py +0 -0
  96. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/server/routers/agent.py +0 -0
  97. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/server/routers/auth.py +0 -0
  98. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/server/routers/compatibility.py +0 -0
  99. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/server/routers/generation_logs.py +0 -0
  100. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/server/routers/health.py +0 -0
  101. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/server/routers/llm.py +0 -0
  102. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/server/routers/mcp_profiles.py +0 -0
  103. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/server/routers/metrics.py +0 -0
  104. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/server/routers/search.py +0 -0
  105. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/server/routers/security.py +0 -0
  106. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/server/routers/smoke_reports.py +0 -0
  107. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/server/routers/test_profiles.py +0 -0
  108. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/server/routers/tests.py +0 -0
  109. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/server/routers/tools.py +0 -0
  110. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/server/state.py +0 -0
  111. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/smoke_test.py +0 -0
  112. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/src/__init__.py +0 -0
  113. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/src/baseline.py +0 -0
  114. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/src/ci_gate.py +0 -0
  115. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/src/comparison_runner.py +0 -0
  116. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/src/coverage_analyzer.py +0 -0
  117. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/src/html_report.py +0 -0
  118. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/src/metamorphic.py +0 -0
  119. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/src/model_registry.py +0 -0
  120. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/src/models.py +0 -0
  121. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/src/multi_env.py +0 -0
  122. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/src/oauth_flows.py +0 -0
  123. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/src/prompt_mutation.py +0 -0
  124. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/src/report_generator.py +0 -0
  125. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/src/runner_tools.py +0 -0
  126. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/src/schema_diff.py +0 -0
  127. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/src/token_manager.py +0 -0
  128. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/test_profiles.py +0 -0
  129. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/ui/README.md +0 -0
  130. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/ui/index.html +0 -0
  131. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/ui/postcss.config.js +0 -0
  132. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/ui/src/App.jsx +0 -0
  133. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/ui/src/components/CommandPalette.jsx +0 -0
  134. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/ui/src/components/CompareToolsTab.jsx +0 -0
  135. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/ui/src/components/ErrorAlert.jsx +0 -0
  136. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/ui/src/components/ErrorBoundary.jsx +0 -0
  137. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/ui/src/components/LLMProfileSelector.jsx +0 -0
  138. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/ui/src/components/LoadingSpinner.jsx +0 -0
  139. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/ui/src/components/MCPProfileSelector.jsx +0 -0
  140. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/ui/src/components/NotificationProvider.jsx +0 -0
  141. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/ui/src/components/OptimizeDocsModal.jsx +0 -0
  142. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/ui/src/components/OutputDiff.jsx +0 -0
  143. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/ui/src/components/ParameterCard.jsx +0 -0
  144. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/ui/src/components/SchemaCodeViewer.jsx +0 -0
  145. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/ui/src/components/SkeletonLoader.jsx +0 -0
  146. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/ui/src/components/TestGenerationModal.jsx +0 -0
  147. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/ui/src/components/TestProfileSelector.jsx +0 -0
  148. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/ui/src/components/TestResultPanel.jsx +0 -0
  149. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/ui/src/components/TestStatusIndicator.jsx +0 -0
  150. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/ui/src/components/ToolCallTimeline.jsx +0 -0
  151. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/ui/src/components/ToolComparison.jsx +0 -0
  152. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/ui/src/components/ToolDebugModal.jsx +0 -0
  153. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/ui/src/components/TraceView.jsx +0 -0
  154. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/ui/src/components/TypeBadge.jsx +0 -0
  155. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/ui/src/components/Wizard.jsx +0 -0
  156. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/ui/src/contexts/ThemeContext.jsx +0 -0
  157. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/ui/src/hooks/useEditorTheme.js +0 -0
  158. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/ui/src/hooks/useKeyboardShortcuts.js +0 -0
  159. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/ui/src/hooks/useSafeFetch.js +0 -0
  160. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/ui/src/index.css +0 -0
  161. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/ui/src/main.jsx +0 -0
  162. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/ui/src/pages/AuthDebugger.jsx +0 -0
  163. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/ui/src/pages/ChatInterface.jsx +0 -0
  164. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/ui/src/pages/CompatibilityMatrix.jsx +0 -0
  165. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/ui/src/pages/Configuration.jsx +0 -0
  166. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/ui/src/pages/GenerationHistory.jsx +0 -0
  167. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/ui/src/pages/LLMProfiles.jsx +0 -0
  168. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/ui/src/pages/MCPExplorer.jsx +0 -0
  169. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/ui/src/pages/MCPHealth.jsx +0 -0
  170. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/ui/src/pages/MCPProfiles.jsx +0 -0
  171. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/ui/src/pages/MetricsDashboard.jsx +0 -0
  172. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/ui/src/pages/ProfilesManager.jsx +0 -0
  173. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/ui/src/pages/SecurityDashboard.jsx +0 -0
  174. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/ui/src/utils/__tests__/formatConverters.test.js +0 -0
  175. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/ui/src/utils/formatConverters.js +0 -0
  176. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/ui/tailwind.config.js +0 -0
  177. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy/ui/vite.config.js +0 -0
  178. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy.egg-info/dependency_links.txt +0 -0
  179. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy.egg-info/entry_points.txt +0 -0
  180. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy.egg-info/requires.txt +0 -0
  181. {testmcpy-0.3.2 → testmcpy-0.5.0}/testmcpy.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: testmcpy
3
- Version: 0.3.2
3
+ Version: 0.5.0
4
4
  Summary: A comprehensive testing framework for validating LLM tool calling capabilities with MCP services
5
5
  Author: Amin Ghadersohi
6
6
  License-Expression: Apache-2.0
@@ -404,6 +404,33 @@ testmcpy run tests/ --model claude-haiku-4-5
404
404
 
405
405
  **Common options:** `--profile`, `--llm-profile`, `--model`, `--provider`, `--timeout`, `--verbose`, `--output`
406
406
 
407
+ ### Inline MCP Auth (No Config File Needed)
408
+
409
+ Pass MCP auth credentials directly on the command line, bypassing `.mcp_services.yaml`:
410
+
411
+ ```bash
412
+ # JWT auth (e.g., Preset workspaces)
413
+ testmcpy run tests/ \
414
+ --mcp-url https://workspace.example.com/mcp \
415
+ --auth-type jwt \
416
+ --jwt-url https://auth.example.com/v1/auth/ \
417
+ --jwt-token $MCP_JWT_TOKEN \
418
+ --jwt-secret $MCP_JWT_SECRET
419
+
420
+ # Bearer token auth
421
+ testmcpy run tests/ \
422
+ --mcp-url https://workspace.example.com/mcp \
423
+ --auth-type bearer \
424
+ --auth-token $MCP_BEARER_TOKEN
425
+
426
+ # No auth (public MCP endpoint)
427
+ testmcpy run tests/ \
428
+ --mcp-url https://workspace.example.com/mcp \
429
+ --auth-type none
430
+ ```
431
+
432
+ Environment variables are also supported: `MCP_AUTH_TOKEN`, `MCP_JWT_URL`, `MCP_JWT_TOKEN`, `MCP_JWT_SECRET`.
433
+
407
434
  ## Web Interface
408
435
 
409
436
  Optional React-based UI with 15+ pages for visual testing and analytics:
@@ -549,4 +576,4 @@ Apache License 2.0 — See [LICENSE](LICENSE) for details.
549
576
 
550
577
  ---
551
578
 
552
- **Built by [@aminghadersohi](https://github.com/aminghadersohi)** ([Preset](https://preset.io), [Apache Superset](https://github.com/apache/superset)).
579
+ **Built by [@aminghadersohi](https://github.com/aminghadersohi)** at [Preset](https://preset.io).
@@ -334,6 +334,33 @@ testmcpy run tests/ --model claude-haiku-4-5
334
334
 
335
335
  **Common options:** `--profile`, `--llm-profile`, `--model`, `--provider`, `--timeout`, `--verbose`, `--output`
336
336
 
337
+ ### Inline MCP Auth (No Config File Needed)
338
+
339
+ Pass MCP auth credentials directly on the command line, bypassing `.mcp_services.yaml`:
340
+
341
+ ```bash
342
+ # JWT auth (e.g., Preset workspaces)
343
+ testmcpy run tests/ \
344
+ --mcp-url https://workspace.example.com/mcp \
345
+ --auth-type jwt \
346
+ --jwt-url https://auth.example.com/v1/auth/ \
347
+ --jwt-token $MCP_JWT_TOKEN \
348
+ --jwt-secret $MCP_JWT_SECRET
349
+
350
+ # Bearer token auth
351
+ testmcpy run tests/ \
352
+ --mcp-url https://workspace.example.com/mcp \
353
+ --auth-type bearer \
354
+ --auth-token $MCP_BEARER_TOKEN
355
+
356
+ # No auth (public MCP endpoint)
357
+ testmcpy run tests/ \
358
+ --mcp-url https://workspace.example.com/mcp \
359
+ --auth-type none
360
+ ```
361
+
362
+ Environment variables are also supported: `MCP_AUTH_TOKEN`, `MCP_JWT_URL`, `MCP_JWT_TOKEN`, `MCP_JWT_SECRET`.
363
+
337
364
  ## Web Interface
338
365
 
339
366
  Optional React-based UI with 15+ pages for visual testing and analytics:
@@ -479,4 +506,4 @@ Apache License 2.0 — See [LICENSE](LICENSE) for details.
479
506
 
480
507
  ---
481
508
 
482
- **Built by [@aminghadersohi](https://github.com/aminghadersohi)** ([Preset](https://preset.io), [Apache Superset](https://github.com/apache/superset)).
509
+ **Built by [@aminghadersohi](https://github.com/aminghadersohi)** at [Preset](https://preset.io).
@@ -93,7 +93,7 @@ testmcpy = [
93
93
 
94
94
  [project]
95
95
  name = "testmcpy"
96
- version = "0.3.2"
96
+ version = "0.5.0"
97
97
  description = "A comprehensive testing framework for validating LLM tool calling capabilities with MCP services"
98
98
  authors = [{name = "Amin Ghadersohi"}]
99
99
  license = "Apache-2.0"
@@ -11,6 +11,6 @@ try:
11
11
  __version__ = version("testmcpy")
12
12
  except Exception:
13
13
  # Fallback for development or when package not installed
14
- __version__ = "0.3.2"
14
+ __version__ = "0.5.0"
15
15
 
16
16
  __author__ = "testmcpy Contributors"
@@ -17,7 +17,7 @@ def push(
17
17
  None,
18
18
  "--server",
19
19
  "-s",
20
- help="Remote testmcpy server URL (e.g. https://testmcpy.sandbox.preset.io)",
20
+ help="Remote testmcpy server URL (e.g. https://testmcpy.example.com)",
21
21
  ),
22
22
  api_key: Optional[str] = typer.Option(
23
23
  None,
@@ -66,7 +66,7 @@ def push(
66
66
  Panel(
67
67
  "[red]No server URL specified.[/red]\n\n"
68
68
  "Provide the remote testmcpy server URL via:\n"
69
- " [cyan]--server https://testmcpy.sandbox.preset.io[/cyan]\n"
69
+ " [cyan]--server https://testmcpy.example.com[/cyan]\n"
70
70
  " or\n"
71
71
  " [cyan]export TESTMCPY_SERVER_URL=https://...[/cyan]",
72
72
  title="Missing Server",
@@ -2,6 +2,7 @@
2
2
 
3
3
  import asyncio
4
4
  import json
5
+ import uuid
5
6
  from pathlib import Path
6
7
  from typing import Optional
7
8
 
@@ -220,12 +221,63 @@ def run(
220
221
  "--system-prompt-file",
221
222
  help="File containing the system prompt text",
222
223
  ),
224
+ # Inline MCP auth options (bypass .mcp_services.yaml)
225
+ auth_type: Optional[str] = typer.Option(
226
+ None,
227
+ "--auth-type",
228
+ help="MCP auth type: jwt, bearer, oauth, api_key, none",
229
+ ),
230
+ auth_token: Optional[str] = typer.Option(
231
+ None,
232
+ "--auth-token",
233
+ envvar="MCP_AUTH_TOKEN",
234
+ help="Bearer token or API key for MCP auth",
235
+ ),
236
+ jwt_url: Optional[str] = typer.Option(
237
+ None,
238
+ "--jwt-url",
239
+ envvar="MCP_JWT_URL",
240
+ help="JWT auth endpoint URL (for --auth-type jwt)",
241
+ ),
242
+ jwt_token: Optional[str] = typer.Option(
243
+ None,
244
+ "--jwt-token",
245
+ envvar="MCP_JWT_TOKEN",
246
+ help="JWT API token / key name (for --auth-type jwt)",
247
+ ),
248
+ jwt_secret: Optional[str] = typer.Option(
249
+ None,
250
+ "--jwt-secret",
251
+ envvar="MCP_JWT_SECRET",
252
+ help="JWT API secret (for --auth-type jwt)",
253
+ ),
223
254
  ):
224
255
  """
225
256
  Run test cases against MCP service.
226
257
 
227
258
  This command executes test cases defined in YAML/JSON files.
228
259
  """
260
+ # Generate session ID to group multiple runs from the same CLI invocation
261
+ session_id = str(uuid.uuid4())
262
+
263
+ # Build inline auth dict if --auth-type is provided
264
+ inline_auth = None
265
+ if auth_type:
266
+ inline_auth = {"type": auth_type}
267
+ if auth_type == "jwt":
268
+ if jwt_url:
269
+ inline_auth["api_url"] = jwt_url
270
+ if jwt_token:
271
+ inline_auth["api_token"] = jwt_token
272
+ if jwt_secret:
273
+ inline_auth["api_secret"] = jwt_secret
274
+ elif auth_type == "bearer":
275
+ if auth_token:
276
+ inline_auth["token"] = auth_token
277
+ elif auth_type == "api_key":
278
+ if auth_token:
279
+ inline_auth["api_key"] = auth_token
280
+
229
281
  # Load config with profile if specified
230
282
  if profile:
231
283
  from testmcpy.config import Config
@@ -251,17 +303,25 @@ def run(
251
303
 
252
304
  # Get authenticated MCP client
253
305
  mcp_client = None
254
- effective_profile = profile
255
- if not effective_profile:
256
- # Use default profile from config
257
- mcp_config = load_mcp_yaml()
258
- effective_profile = mcp_config.get("default")
259
306
 
260
- if effective_profile:
261
- try:
262
- mcp_client = await get_or_create_mcp_client(effective_profile)
263
- except Exception as e:
264
- console.print(f"[yellow]Warning: Could not load MCP profile: {e}[/yellow]")
307
+ if inline_auth and effective_mcp_url:
308
+ # Use inline auth flags — bypass profile system entirely
309
+ from testmcpy.src.mcp_client import MCPClient
310
+
311
+ mcp_client = MCPClient(effective_mcp_url, auth=inline_auth)
312
+ await mcp_client.initialize()
313
+ else:
314
+ effective_profile = profile
315
+ if not effective_profile:
316
+ # Use default profile from config
317
+ mcp_config = load_mcp_yaml()
318
+ effective_profile = mcp_config.get("default")
319
+
320
+ if effective_profile:
321
+ try:
322
+ mcp_client = await get_or_create_mcp_client(effective_profile)
323
+ except Exception as e:
324
+ console.print(f"[yellow]Warning: Could not load MCP profile: {e}[/yellow]")
265
325
 
266
326
  # Load test cases and detect suite-level provider override
267
327
  test_cases = []
@@ -342,6 +402,11 @@ def run(
342
402
  f"[yellow]Suite-level provider config:[/yellow] {suite_provider_config}"
343
403
  )
344
404
 
405
+ def cli_log_callback(msg: str) -> None:
406
+ """Print runner/provider log messages to console in real-time."""
407
+ if verbose:
408
+ console.print(f" [dim]{msg}[/dim]")
409
+
345
410
  runner = TestRunner(
346
411
  model=effective_model,
347
412
  provider=effective_provider,
@@ -350,6 +415,7 @@ def run(
350
415
  verbose=verbose,
351
416
  hide_tool_output=hide_tool_output,
352
417
  provider_config=suite_provider_config,
418
+ log_callback=cli_log_callback if verbose else None,
353
419
  )
354
420
 
355
421
  console.print(f"\n[bold]Found {len(test_cases)} test case(s)[/bold]")
@@ -384,11 +450,35 @@ def run(
384
450
  f" [dim]Prompt: {test_case.prompt[:80]}{'...' if len(test_case.prompt) > 80 else ''}[/dim]"
385
451
  )
386
452
 
387
- # Run the test
388
- from rich.status import Status
389
-
390
- with Status("[yellow]Executing test...[/yellow]", console=console):
453
+ # Run the test — show live progress instead of static spinner
454
+ if verbose:
455
+ # In verbose mode, let _log() print directly (no spinner overlay)
391
456
  result = await runner._run_test_with_retry(test_case)
457
+ else:
458
+ # In non-verbose mode, update spinner with runner progress
459
+ from rich.status import Status
460
+
461
+ _status = Status("[yellow]Executing test...[/yellow]", console=console)
462
+ _status.start()
463
+
464
+ def update_status(msg: str, _s: Status = _status) -> None:
465
+ msg_lower = msg.lower()
466
+ if "tool call" in msg_lower or "tool_call" in msg_lower:
467
+ _s.update(f"[yellow]Tool call: {msg.split('.')[-1].strip()[:60]}[/yellow]")
468
+ elif "running test" in msg_lower:
469
+ _s.update("[yellow]Running...[/yellow]")
470
+ elif "executing" in msg_lower:
471
+ _s.update("[yellow]Executing tool calls...[/yellow]")
472
+ elif "evaluating" in msg_lower or "evaluator" in msg_lower:
473
+ _s.update("[yellow]Evaluating results...[/yellow]")
474
+
475
+ old_callback = runner.log_callback
476
+ runner.log_callback = update_status
477
+ try:
478
+ result = await runner._run_test_with_retry(test_case)
479
+ finally:
480
+ runner.log_callback = old_callback
481
+ _status.stop()
392
482
 
393
483
  results.append(result)
394
484
 
@@ -487,6 +577,9 @@ def run(
487
577
  "passed": total_passed,
488
578
  "failed": len(results) - total_passed,
489
579
  },
580
+ "metadata": {
581
+ "session_id": session_id,
582
+ },
490
583
  }
491
584
  save_result = save_test_run_to_file(save_data)
492
585
  console.print(f"[dim]Results saved: {save_result.get('run_id', '?')}[/dim]")
@@ -187,6 +187,25 @@ class ExecutionSuccessful(BaseEvaluator):
187
187
  def description(self) -> str:
188
188
  return "Checks if tool execution completed without errors"
189
189
 
190
+ # Tools blocked by the SDK harness — errors from these are expected and should be ignored
191
+ _BLOCKED_TOOLS = {
192
+ "Read",
193
+ "Bash",
194
+ "Edit",
195
+ "Write",
196
+ "Grep",
197
+ "Glob",
198
+ "ToolSearch",
199
+ "Skill",
200
+ "TodoWrite",
201
+ "Agent",
202
+ "WebFetch",
203
+ "WebSearch",
204
+ "NotebookEdit",
205
+ "EnterWorktree",
206
+ "ExitWorktree",
207
+ }
208
+
190
209
  def evaluate(self, context: dict[str, Any]) -> EvalResult:
191
210
  tool_results = context.get("tool_results", [])
192
211
 
@@ -196,9 +215,15 @@ class ExecutionSuccessful(BaseEvaluator):
196
215
  errors = []
197
216
  for result in tool_results:
198
217
  if result.is_error:
199
- errors.append(
200
- {"tool": result.tool_call_id, "error": result.error_message or "Unknown error"}
201
- )
218
+ error_msg = result.error_message or "Unknown error"
219
+ # Skip errors from blocked/disallowed tools (expected failures)
220
+ if "No such tool available" in error_msg or "not enabled" in error_msg:
221
+ continue
222
+ # Also check tool name against known blocked tools
223
+ tool_id = result.tool_call_id or ""
224
+ if any(blocked in tool_id for blocked in self._BLOCKED_TOOLS):
225
+ continue
226
+ errors.append({"tool": tool_id, "error": error_msg})
202
227
 
203
228
  if errors:
204
229
  return EvalResult(
@@ -261,7 +261,7 @@ def is_connection_error(error_msg: str) -> bool:
261
261
 
262
262
  Only returns True for errors where the MCP session is truly dead and
263
263
  the cached client must be discarded. Auth errors (401/403) are NOT
264
- included because the PresetOAuth transport handles token refresh and
264
+ included because the MCPOAuth transport handles token refresh and
265
265
  re-auth internally — evicting the client on an expired token would
266
266
  just trigger a new browser OAuth popup.
267
267
  """
@@ -57,6 +57,9 @@ async def compare_runs(request: CompareRequest) -> dict[str, Any]:
57
57
  "passed": passed,
58
58
  "failed": total - passed,
59
59
  "pass_rate": round((passed / total * 100) if total > 0 else 0, 1),
60
+ "total_cost": run["summary"].get("total_cost_usd", 0),
61
+ "total_tokens": run["summary"].get("total_tokens", 0),
62
+ "total_duration_ms": run["summary"].get("total_duration_ms", 0),
60
63
  }
61
64
  )
62
65
 
@@ -94,6 +97,7 @@ async def compare_runs(request: CompareRequest) -> dict[str, Any]:
94
97
  "answer_snippet": answer_snippet,
95
98
  "tokens_input": match.get("tokens_input", 0),
96
99
  "tokens_output": match.get("tokens_output", 0),
100
+ "cost_usd": match.get("cost_usd", 0),
97
101
  }
98
102
  else:
99
103
  cells[run["run_id"]] = {
@@ -87,6 +87,7 @@ def save_test_run_to_file(data: dict[str, Any]) -> dict[str, Any]:
87
87
  provider=provider,
88
88
  started_at=started_at,
89
89
  mcp_profile_id=data.get("mcp_profile"),
90
+ metadata=data.get("metadata"),
90
91
  )
91
92
 
92
93
  # Save individual question results
@@ -120,13 +121,33 @@ async def save_test_run(data: dict[str, Any]) -> dict[str, Any]:
120
121
 
121
122
 
122
123
  @router.get("/list")
123
- async def list_test_runs(test_file: str | None = None, limit: int = 50) -> dict[str, Any]:
124
+ async def list_test_runs(
125
+ test_file: str | None = None,
126
+ model: str | None = None,
127
+ provider: str | None = None,
128
+ date_from: str | None = None,
129
+ date_to: str | None = None,
130
+ sort_by: str = "started_at",
131
+ sort_order: str = "desc",
132
+ limit: int = 50,
133
+ offset: int = 0,
134
+ ) -> dict[str, Any]:
124
135
  """
125
- List all test runs, optionally filtered by test file.
136
+ List all test runs with filtering, sorting, and pagination.
126
137
  Returns metadata only (not full results).
127
138
  """
128
139
  storage = get_storage()
129
- runs_data = storage.list_runs(test_id=test_file, limit=limit)
140
+ runs_data = storage.list_runs(
141
+ test_id=test_file,
142
+ model=model,
143
+ provider=provider,
144
+ date_from=date_from,
145
+ date_to=date_to,
146
+ sort_by=sort_by,
147
+ sort_order=sort_order,
148
+ limit=limit,
149
+ offset=offset,
150
+ )
130
151
 
131
152
  runs = []
132
153
  for run in runs_data:
@@ -143,13 +164,77 @@ async def list_test_runs(test_file: str | None = None, limit: int = 50) -> dict[
143
164
  "total_tests": run["total_questions"],
144
165
  "passed": run["passed_questions"],
145
166
  "failed": run["total_questions"] - run["passed_questions"],
146
- "total_cost": 0.0,
147
- "total_tokens": 0,
148
- "total_duration": 0.0,
167
+ "total_cost": run.get("total_cost", 0.0),
168
+ "total_tokens": run.get("total_tokens", 0),
169
+ "total_duration": round((run.get("total_duration_ms", 0) or 0) / 1000, 2),
170
+ "session_id": run.get("metadata", {}).get("session_id"),
171
+ }
172
+ )
173
+
174
+ # total reflects page size; a full count query would be needed for true pagination
175
+ # For now, signal "there may be more" if we hit the limit
176
+ return {"runs": runs, "total": len(runs), "has_more": len(runs) >= limit}
177
+
178
+
179
+ @router.get("/filters")
180
+ async def get_filter_options() -> dict[str, Any]:
181
+ """Get distinct values for filter dropdowns (models, providers, test files)."""
182
+ storage = get_storage()
183
+ return storage.get_filter_options()
184
+
185
+
186
+ @router.get("/sessions")
187
+ async def list_sessions(limit: int = 20, run_limit: int = 200) -> dict[str, Any]:
188
+ """List runs grouped by session_id, with aggregate stats per session.
189
+ Only examines the most recent `run_limit` runs to keep the query fast."""
190
+ storage = get_storage()
191
+ all_runs = storage.list_runs(limit=run_limit)
192
+
193
+ # Group by session_id
194
+ sessions: dict[str, list] = {}
195
+ ungrouped = []
196
+ for run in all_runs:
197
+ sid = run.get("metadata", {}).get("session_id")
198
+ if sid:
199
+ sessions.setdefault(sid, []).append(run)
200
+ else:
201
+ ungrouped.append(run)
202
+
203
+ # Build session summaries
204
+ result = []
205
+ for sid, runs in sorted(
206
+ sessions.items(), key=lambda x: x[1][0].get("started_at", ""), reverse=True
207
+ ):
208
+ total_q = sum(r["total_questions"] for r in runs)
209
+ passed_q = sum(r["passed_questions"] for r in runs)
210
+ result.append(
211
+ {
212
+ "session_id": sid,
213
+ "run_count": len(runs),
214
+ "models": list({r["model"] for r in runs}),
215
+ "providers": list({r["provider"] for r in runs}),
216
+ "test_files": [r["test_id"] for r in runs],
217
+ "started_at": min(r["started_at"] for r in runs if r.get("started_at")),
218
+ "total_tests": total_q,
219
+ "passed": passed_q,
220
+ "failed": total_q - passed_q,
221
+ "pass_rate": round(passed_q / total_q * 100, 1) if total_q > 0 else 0,
222
+ "total_cost": round(sum(r.get("total_cost", 0) for r in runs), 4),
223
+ "total_tokens": sum(r.get("total_tokens", 0) for r in runs),
224
+ "runs": [
225
+ {
226
+ "run_id": r["run_id"],
227
+ "test_file": r["test_id"],
228
+ "passed": r["passed_questions"],
229
+ "failed": r["total_questions"] - r["passed_questions"],
230
+ "pass_rate": r["pass_rate"],
231
+ }
232
+ for r in runs
233
+ ],
149
234
  }
150
235
  )
151
236
 
152
- return {"runs": runs, "total": len(runs)}
237
+ return {"sessions": result[:limit], "total": len(result)}
153
238
 
154
239
 
155
240
  @router.get("/run/{run_id}")