testmcpy 0.7.2__tar.gz → 0.7.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (180) hide show
  1. {testmcpy-0.7.2/testmcpy.egg-info → testmcpy-0.7.3}/PKG-INFO +1 -1
  2. {testmcpy-0.7.2 → testmcpy-0.7.3}/pyproject.toml +1 -1
  3. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/__init__.py +1 -1
  4. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/cli/commands/run.py +33 -0
  5. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/src/llm_integration.py +188 -23
  6. {testmcpy-0.7.2 → testmcpy-0.7.3/testmcpy.egg-info}/PKG-INFO +1 -1
  7. {testmcpy-0.7.2 → testmcpy-0.7.3}/LICENSE +0 -0
  8. {testmcpy-0.7.2 → testmcpy-0.7.3}/MANIFEST.in +0 -0
  9. {testmcpy-0.7.2 → testmcpy-0.7.3}/NOTICE +0 -0
  10. {testmcpy-0.7.2 → testmcpy-0.7.3}/README.md +0 -0
  11. {testmcpy-0.7.2 → testmcpy-0.7.3}/setup.cfg +0 -0
  12. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/agent/__init__.py +0 -0
  13. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/agent/hooks.py +0 -0
  14. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/agent/models.py +0 -0
  15. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/agent/orchestrator.py +0 -0
  16. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/agent/prompts.py +0 -0
  17. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/agent/tools.py +0 -0
  18. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/auth_debugger.py +0 -0
  19. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/auth_flow_recorder.py +0 -0
  20. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/cli/__init__.py +0 -0
  21. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/cli/app.py +0 -0
  22. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/cli/commands/__init__.py +0 -0
  23. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/cli/commands/agent.py +0 -0
  24. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/cli/commands/baseline.py +0 -0
  25. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/cli/commands/export_db.py +0 -0
  26. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/cli/commands/mcp.py +0 -0
  27. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/cli/commands/metamorphic.py +0 -0
  28. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/cli/commands/multi_env.py +0 -0
  29. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/cli/commands/mutate.py +0 -0
  30. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/cli/commands/push.py +0 -0
  31. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/cli/commands/server.py +0 -0
  32. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/cli/commands/tools.py +0 -0
  33. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/cli/commands/tui.py +0 -0
  34. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/cli/commands/wizard.py +0 -0
  35. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/config.py +0 -0
  36. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/core/__init__.py +0 -0
  37. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/core/chat_session.py +0 -0
  38. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/core/docs_optimizer.py +0 -0
  39. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/core/mcp_manager.py +0 -0
  40. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/core/tool_comparison.py +0 -0
  41. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/core/tool_discovery.py +0 -0
  42. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/db.py +0 -0
  43. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/error_handlers.py +0 -0
  44. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/evals/__init__.py +0 -0
  45. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/evals/auth_evaluators.py +0 -0
  46. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/evals/base_evaluators.py +0 -0
  47. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/evals/evaluator_packs.py +0 -0
  48. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/formatters/__init__.py +0 -0
  49. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/formatters/base.py +0 -0
  50. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/formatters/curl.py +0 -0
  51. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/formatters/graphql.py +0 -0
  52. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/formatters/javascript_client.py +0 -0
  53. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/formatters/json_yaml.py +0 -0
  54. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/formatters/protobuf.py +0 -0
  55. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/formatters/python.py +0 -0
  56. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/formatters/python_client.py +0 -0
  57. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/formatters/thrift.py +0 -0
  58. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/formatters/typescript.py +0 -0
  59. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/formatters/typescript_client.py +0 -0
  60. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/llm_profiles.py +0 -0
  61. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/mcp_profiles.py +0 -0
  62. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/migrate_json.py +0 -0
  63. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/models.py +0 -0
  64. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/research/claude_sdk_detailed_exploration.py +0 -0
  65. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/research/claude_sdk_poc.py +0 -0
  66. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/research/claude_sdk_working_poc.py +0 -0
  67. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/research/test_ollama_tools.py +0 -0
  68. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/server/__init__.py +0 -0
  69. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/server/api.py +0 -0
  70. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/server/api.py.bak +0 -0
  71. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/server/auth_middleware.py +0 -0
  72. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/server/helpers/__init__.py +0 -0
  73. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/server/helpers/mcp_config.py +0 -0
  74. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/server/models.py +0 -0
  75. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/server/routers/__init__.py +0 -0
  76. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/server/routers/agent.py +0 -0
  77. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/server/routers/auth.py +0 -0
  78. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/server/routers/compare.py +0 -0
  79. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/server/routers/compatibility.py +0 -0
  80. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/server/routers/generation_logs.py +0 -0
  81. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/server/routers/health.py +0 -0
  82. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/server/routers/llm.py +0 -0
  83. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/server/routers/mcp_profiles.py +0 -0
  84. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/server/routers/metrics.py +0 -0
  85. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/server/routers/results.py +0 -0
  86. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/server/routers/search.py +0 -0
  87. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/server/routers/security.py +0 -0
  88. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/server/routers/smoke_reports.py +0 -0
  89. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/server/routers/test_profiles.py +0 -0
  90. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/server/routers/tests.py +0 -0
  91. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/server/routers/tools.py +0 -0
  92. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/server/state.py +0 -0
  93. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/server/websocket.py +0 -0
  94. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/smoke_test.py +0 -0
  95. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/src/__init__.py +0 -0
  96. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/src/baseline.py +0 -0
  97. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/src/ci_gate.py +0 -0
  98. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/src/comparison_runner.py +0 -0
  99. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/src/coverage_analyzer.py +0 -0
  100. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/src/html_report.py +0 -0
  101. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/src/mcp_client.py +0 -0
  102. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/src/metamorphic.py +0 -0
  103. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/src/model_registry.py +0 -0
  104. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/src/models.py +0 -0
  105. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/src/multi_env.py +0 -0
  106. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/src/oauth_flows.py +0 -0
  107. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/src/prompt_mutation.py +0 -0
  108. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/src/report_generator.py +0 -0
  109. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/src/runner_tools.py +0 -0
  110. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/src/schema_diff.py +0 -0
  111. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/src/test_runner.py +0 -0
  112. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/src/token_manager.py +0 -0
  113. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/storage.py +0 -0
  114. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/test_profiles.py +0 -0
  115. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/ui/README.md +0 -0
  116. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/ui/dist/assets/index-30Ed2JCz.css +0 -0
  117. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/ui/dist/assets/index-6JiH0p1L.js +0 -0
  118. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/ui/dist/index.html +0 -0
  119. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/ui/index.html +0 -0
  120. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/ui/package-lock.json +0 -0
  121. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/ui/package.json +0 -0
  122. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/ui/postcss.config.js +0 -0
  123. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/ui/src/App.jsx +0 -0
  124. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/ui/src/components/CommandPalette.jsx +0 -0
  125. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/ui/src/components/CompareToolsTab.jsx +0 -0
  126. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/ui/src/components/EditorStatusBar.jsx +0 -0
  127. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/ui/src/components/EditorTabStrip.jsx +0 -0
  128. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/ui/src/components/ErrorAlert.jsx +0 -0
  129. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/ui/src/components/ErrorBoundary.jsx +0 -0
  130. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/ui/src/components/LLMProfileSelector.jsx +0 -0
  131. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/ui/src/components/LoadingSpinner.jsx +0 -0
  132. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/ui/src/components/MCPProfileSelector.jsx +0 -0
  133. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/ui/src/components/NotificationProvider.jsx +0 -0
  134. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/ui/src/components/OptimizeDocsModal.jsx +0 -0
  135. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/ui/src/components/OutputDiff.jsx +0 -0
  136. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/ui/src/components/ParameterCard.jsx +0 -0
  137. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/ui/src/components/SchemaCodeViewer.jsx +0 -0
  138. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/ui/src/components/SkeletonLoader.jsx +0 -0
  139. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/ui/src/components/StreamingLogViewer.jsx +0 -0
  140. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/ui/src/components/TestGenerationModal.jsx +0 -0
  141. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/ui/src/components/TestProfileSelector.jsx +0 -0
  142. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/ui/src/components/TestResultPanel.jsx +0 -0
  143. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/ui/src/components/TestStatusIndicator.jsx +0 -0
  144. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/ui/src/components/ToolCallTimeline.jsx +0 -0
  145. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/ui/src/components/ToolComparison.jsx +0 -0
  146. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/ui/src/components/ToolDebugModal.jsx +0 -0
  147. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/ui/src/components/TraceView.jsx +0 -0
  148. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/ui/src/components/TypeBadge.jsx +0 -0
  149. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/ui/src/components/Wizard.jsx +0 -0
  150. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/ui/src/contexts/TestRunContext.jsx +0 -0
  151. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/ui/src/contexts/ThemeContext.jsx +0 -0
  152. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/ui/src/hooks/useEditorTheme.js +0 -0
  153. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/ui/src/hooks/useKeyboardShortcuts.js +0 -0
  154. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/ui/src/hooks/useSafeFetch.js +0 -0
  155. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/ui/src/index.css +0 -0
  156. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/ui/src/main.jsx +0 -0
  157. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/ui/src/pages/AuthDebugger.jsx +0 -0
  158. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/ui/src/pages/ChatInterface.jsx +0 -0
  159. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/ui/src/pages/CompatibilityMatrix.jsx +0 -0
  160. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/ui/src/pages/Configuration.jsx +0 -0
  161. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/ui/src/pages/GenerationHistory.jsx +0 -0
  162. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/ui/src/pages/LLMProfiles.jsx +0 -0
  163. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/ui/src/pages/MCPExplorer.jsx +0 -0
  164. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/ui/src/pages/MCPHealth.jsx +0 -0
  165. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/ui/src/pages/MCPProfiles.jsx +0 -0
  166. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/ui/src/pages/MetricsDashboard.jsx +0 -0
  167. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/ui/src/pages/ProfilesManager.jsx +0 -0
  168. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/ui/src/pages/Reports.jsx +0 -0
  169. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/ui/src/pages/RunComparison.jsx +0 -0
  170. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/ui/src/pages/SecurityDashboard.jsx +0 -0
  171. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/ui/src/pages/TestManager.jsx +0 -0
  172. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/ui/src/utils/__tests__/formatConverters.test.js +0 -0
  173. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/ui/src/utils/formatConverters.js +0 -0
  174. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/ui/tailwind.config.js +0 -0
  175. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy/ui/vite.config.js +0 -0
  176. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy.egg-info/SOURCES.txt +0 -0
  177. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy.egg-info/dependency_links.txt +0 -0
  178. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy.egg-info/entry_points.txt +0 -0
  179. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy.egg-info/requires.txt +0 -0
  180. {testmcpy-0.7.2 → testmcpy-0.7.3}/testmcpy.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: testmcpy
3
- Version: 0.7.2
3
+ Version: 0.7.3
4
4
  Summary: A comprehensive testing framework for validating LLM tool calling capabilities with MCP services
5
5
  Author: Amin Ghadersohi
6
6
  License-Expression: Apache-2.0
@@ -93,7 +93,7 @@ testmcpy = [
93
93
 
94
94
  [project]
95
95
  name = "testmcpy"
96
- version = "0.7.2"
96
+ version = "0.7.3"
97
97
  description = "A comprehensive testing framework for validating LLM tool calling capabilities with MCP services"
98
98
  authors = [{name = "Amin Ghadersohi"}]
99
99
  license = "Apache-2.0"
@@ -11,6 +11,6 @@ try:
11
11
  __version__ = version("testmcpy")
12
12
  except Exception:
13
13
  # Fallback for development or when package not installed
14
- __version__ = "0.7.2"
14
+ __version__ = "0.7.3"
15
15
 
16
16
  __author__ = "testmcpy Contributors"
@@ -296,6 +296,18 @@ def run(
296
296
  "provider (default: provider class's _DEFAULT_COMPLETIONS_PATH)"
297
297
  ),
298
298
  ),
299
+ max_concurrent_streams: Optional[int] = typer.Option(
300
+ None,
301
+ "--max-concurrent-streams",
302
+ help=(
303
+ "Process-wide cap on concurrent SSE streams for the "
304
+ "assistant/chatbot provider. Useful when a parent harness "
305
+ "spawns many testmcpy children at once and the chatbot "
306
+ "endpoint stalls under load. None / 0 = unbounded (default). "
307
+ "Limit applies across all AssistantProvider instances in the "
308
+ "process. (SC-106138)"
309
+ ),
310
+ ),
299
311
  ):
300
312
  """
301
313
  Run test cases against MCP service.
@@ -470,6 +482,27 @@ def run(
470
482
  if value is not None:
471
483
  effective_provider_config[key] = value
472
484
 
485
+ # Apply the concurrency limit at the class level so every
486
+ # AssistantProvider instance in this process shares the cap.
487
+ # SC-106138: agor harness fan-out can stall the chatbot when
488
+ # too many SSE streams open at once.
489
+ #
490
+ # Always call configure_concurrency_limit() (even with None)
491
+ # so the CLI flag is a true override/reset — otherwise a
492
+ # prior in-process configuration could leak when run() is
493
+ # invoked multiple times within the same Python process.
494
+ from testmcpy.src.llm_integration import AssistantProvider
495
+
496
+ AssistantProvider.configure_concurrency_limit(max_concurrent_streams)
497
+ if verbose:
498
+ if max_concurrent_streams:
499
+ console.print(
500
+ f"[cyan]Concurrency cap:[/cyan] "
501
+ f"max {max_concurrent_streams} concurrent SSE streams"
502
+ )
503
+ else:
504
+ console.print("[cyan]Concurrency cap:[/cyan] unbounded")
505
+
473
506
  if suite_provider and verbose:
474
507
  console.print(f"[yellow]Suite-level provider override:[/yellow] {suite_provider}")
475
508
  if suite_provider_config:
@@ -2133,6 +2133,72 @@ class AssistantProvider(LLMProvider):
2133
2133
  # (SC-105915). Class-level so subclasses / tests can override.
2134
2134
  SSE_IDLE_ABORT_SECONDS: float = 90.0
2135
2135
 
2136
+ # Hard ceiling on the entire SSE consumption — kicks in even when
2137
+ # bytes ARE flowing (just slowly) so the agor parallel-cycle harness
2138
+ # never sees a child stay alive past this. Distinct from the idle
2139
+ # abort: idle = "no progress at all"; per-call wall-clock = "any
2140
+ # progress, but too slow overall". Observed in c28-c32 against the
2141
+ # staging chatbot (SC-106138). Class-level so callers can override.
2142
+ PER_CALL_WALL_CLOCK_SECONDS: float = 180.0
2143
+
2144
+ # Emit a structured heartbeat log line every N seconds while the SSE
2145
+ # stream is open. Lets a parent harness distinguish "child is still
2146
+ # streaming" from "child is wedged" without parsing every event.
2147
+ HEARTBEAT_SECONDS: float = 10.0
2148
+
2149
+ # Optional process-wide cap on concurrent SSE streams. Set via
2150
+ # ``--max-concurrent-streams`` on ``testmcpy run``. ``None`` =
2151
+ # unbounded. Stored as a class attribute (not instance) so multiple
2152
+ # AssistantProvider instances inside the same process share it.
2153
+ #
2154
+ # The Semaphore itself is lazily allocated inside the event loop on
2155
+ # first use — `asyncio.Semaphore` binds to the running loop, so
2156
+ # creating it at sync configuration time would either fail or bind
2157
+ # to the wrong loop.
2158
+ _max_concurrent_streams: int | None = None
2159
+ _stream_semaphore: asyncio.Semaphore | None = None
2160
+ _stream_semaphore_loop: object | None = None # the loop the sem was bound to
2161
+
2162
+ @classmethod
2163
+ def configure_concurrency_limit(cls, max_streams: int | None) -> None:
2164
+ """Set the process-wide cap on concurrent SSE streams.
2165
+
2166
+ ``None`` (or 0) → unbounded. Positive int → cap. Negative
2167
+ values raise ``ValueError`` (a Semaphore with a negative
2168
+ capacity would crash at acquire time, so reject up front).
2169
+ The class-level ``asyncio.Semaphore`` is created lazily on
2170
+ first use and shared across all AssistantProvider instances
2171
+ in the process. Safe to call multiple times — the semaphore
2172
+ is re-created lazily next time ``_get_stream_semaphore`` is
2173
+ called.
2174
+ """
2175
+ if max_streams is not None and max_streams < 0:
2176
+ raise ValueError(
2177
+ f"max_streams must be a non-negative int or None, "
2178
+ f"got {max_streams!r}. Use 0 or None for unbounded."
2179
+ )
2180
+ if not max_streams:
2181
+ cls._max_concurrent_streams = None
2182
+ else:
2183
+ cls._max_concurrent_streams = max_streams
2184
+ # Drop any existing semaphore so the next acquire rebuilds with
2185
+ # the new limit (and rebinds to the current event loop).
2186
+ cls._stream_semaphore = None
2187
+ cls._stream_semaphore_loop = None
2188
+
2189
+ @classmethod
2190
+ def _get_stream_semaphore(cls) -> asyncio.Semaphore | None:
2191
+ """Return the (lazily-created) class-level Semaphore, or None
2192
+ if no concurrency limit is configured. Rebinds to the running
2193
+ loop if the previously-bound loop is gone (test isolation)."""
2194
+ if not cls._max_concurrent_streams:
2195
+ return None
2196
+ running_loop = asyncio.get_running_loop()
2197
+ if cls._stream_semaphore is None or cls._stream_semaphore_loop is not running_loop:
2198
+ cls._stream_semaphore = asyncio.Semaphore(cls._max_concurrent_streams)
2199
+ cls._stream_semaphore_loop = running_loop
2200
+ return cls._stream_semaphore
2201
+
2136
2202
  def __init__(
2137
2203
  self,
2138
2204
  model: str = "default",
@@ -2257,17 +2323,58 @@ class AssistantProvider(LLMProvider):
2257
2323
 
2258
2324
  log(f"[Assistant] POST {completions_url} (conversation={self._conversation_id})")
2259
2325
 
2260
- # Idle abort: if the SSE stream emits NO recognized event within
2261
- # this many seconds, give up. This catches a chatbot backend that
2262
- # keeps the connection open (so httpx's per-event read timeout
2263
- # never fires) but stops sending real progress observed in
2264
- # eval cycle c29 (SC-105915) where C00_9, C01_9, C02_7 hung
2265
- # despite the per-test wall-clock added in v0.7.1.
2326
+ # Three layers of timeout protection on the SSE consumption:
2327
+ # 1. SSE_IDLE_ABORT_SECONDS — fires when no recognized event
2328
+ # arrives for N seconds (server still sending keepalives
2329
+ # but no real progress). c29 (SC-105915).
2330
+ # 2. PER_CALL_WALL_CLOCK_SECONDS fires when total time on
2331
+ # THIS call exceeds the budget, regardless of progress.
2332
+ # Catches the slow-but-not-stuck case the agor harness
2333
+ # hits in c28-c32 (SC-106138) where bytes keep flowing
2334
+ # but the call takes 5+ minutes.
2335
+ # 3. HEARTBEAT_SECONDS — non-fatal: emits a "still streaming"
2336
+ # log line every N seconds so a parent harness can tell
2337
+ # a slow stream from a wedged one.
2266
2338
  sse_idle_abort_seconds = self.SSE_IDLE_ABORT_SECONDS
2339
+ per_call_wall_clock_seconds = self.PER_CALL_WALL_CLOCK_SECONDS
2340
+ heartbeat_seconds = self.HEARTBEAT_SECONDS
2267
2341
  last_event_at = time.time()
2342
+ last_heartbeat_at = time.time()
2268
2343
  idle_aborted = False
2344
+ wall_clock_aborted = False
2345
+ event_count = 0
2269
2346
 
2270
2347
  state = _SSEStreamState()
2348
+ # Optional process-wide concurrency cap. When unset the semaphore
2349
+ # is None and acquisition is a no-op. Held for the entire SSE
2350
+ # consumption so the cap really does limit parallel streams.
2351
+ sem = type(self)._get_stream_semaphore()
2352
+ sem_held = False
2353
+ if sem is not None:
2354
+ sem_wait_start = time.time()
2355
+ await sem.acquire()
2356
+ sem_held = True
2357
+ sem_wait = time.time() - sem_wait_start
2358
+ if sem_wait > 0.5:
2359
+ log(
2360
+ f"[Assistant] Waited {sem_wait:.1f}s for concurrency-limit "
2361
+ f"semaphore (max={type(self)._max_concurrent_streams})"
2362
+ )
2363
+ # Reset the per-call wall-clock baseline AFTER we actually
2364
+ # got a slot: time spent waiting for the semaphore should
2365
+ # not consume the SSE budget. start_time stays as-is for
2366
+ # the overall LLMResult.duration; the SSE loop uses
2367
+ # `stream_start_time` from here on.
2368
+ stream_start_time = time.time()
2369
+ else:
2370
+ stream_start_time = start_time
2371
+
2372
+ def _release_sem():
2373
+ nonlocal sem_held
2374
+ if sem is not None and sem_held:
2375
+ sem.release()
2376
+ sem_held = False
2377
+
2271
2378
  try:
2272
2379
  async with self._client.stream(
2273
2380
  "POST", completions_url, headers=headers, json=payload, timeout=timeout
@@ -2287,32 +2394,62 @@ class AssistantProvider(LLMProvider):
2287
2394
  # catches the case where the SSE connection stays open
2288
2395
  # but never sends another byte (real-world c29 hang).
2289
2396
  line_iter = resp.aiter_lines().__aiter__()
2290
- budget_str = _format_seconds(sse_idle_abort_seconds)
2397
+ idle_budget_str = _format_seconds(sse_idle_abort_seconds)
2398
+ wall_clock_budget_str = _format_seconds(per_call_wall_clock_seconds)
2291
2399
  while True:
2292
- elapsed = time.time() - last_event_at
2293
- remaining = sse_idle_abort_seconds - elapsed
2294
- if remaining <= 0:
2400
+ now = time.time()
2401
+ # Per-call wall-clock check: total time spent on the
2402
+ # SSE stream itself (NOT counting time waiting for
2403
+ # the concurrency-limit semaphore) exceeded budget.
2404
+ total_elapsed = now - stream_start_time
2405
+ if total_elapsed >= per_call_wall_clock_seconds:
2406
+ log(
2407
+ f"[Assistant] SSE wall-clock abort: per-call budget "
2408
+ f"{wall_clock_budget_str} exceeded "
2409
+ f"({total_elapsed:.0f}s, {event_count} events) — "
2410
+ "closing stream"
2411
+ )
2412
+ wall_clock_aborted = True
2413
+ break
2414
+ # Idle check: no recognized event for too long.
2415
+ elapsed_since_event = now - last_event_at
2416
+ idle_remaining = sse_idle_abort_seconds - elapsed_since_event
2417
+ if idle_remaining <= 0:
2295
2418
  log(
2296
2419
  f"[Assistant] SSE idle abort: no recognized event for "
2297
- f"{budget_str} — closing stream"
2420
+ f"{idle_budget_str} — closing stream"
2298
2421
  )
2299
2422
  idle_aborted = True
2300
2423
  break
2424
+ # Heartbeat: non-fatal "still alive" log.
2425
+ if now - last_heartbeat_at >= heartbeat_seconds:
2426
+ log(
2427
+ f"[Assistant] still streaming … "
2428
+ f"{total_elapsed:.0f}s elapsed, "
2429
+ f"{event_count} events, "
2430
+ f"{elapsed_since_event:.0f}s since last event"
2431
+ )
2432
+ last_heartbeat_at = now
2433
+ # Per-line read budget = min(idle_remaining, time-to-next-heartbeat,
2434
+ # wall-clock-remaining). Smaller waits let the heartbeat /
2435
+ # wall-clock checks fire on schedule even when no bytes arrive.
2436
+ wall_clock_remaining = per_call_wall_clock_seconds - total_elapsed
2437
+ next_heartbeat_in = heartbeat_seconds - (now - last_heartbeat_at)
2438
+ read_timeout = max(
2439
+ 0.05,
2440
+ min(idle_remaining, wall_clock_remaining, next_heartbeat_in),
2441
+ )
2301
2442
  try:
2302
- raw_line = await asyncio.wait_for(line_iter.__anext__(), timeout=remaining)
2443
+ raw_line = await asyncio.wait_for(
2444
+ line_iter.__anext__(), timeout=read_timeout
2445
+ )
2303
2446
  except StopAsyncIteration:
2304
2447
  break
2305
2448
  except asyncio.TimeoutError:
2306
- # Budget is measured since the last *recognized* event.
2307
- # Unrecognized noise (keepalives, malformed events) does
2308
- # NOT reset last_event_at, so this fires correctly even
2309
- # if bytes are arriving without real progress.
2310
- log(
2311
- f"[Assistant] SSE idle abort: no recognized event for "
2312
- f"{budget_str} — closing stream"
2313
- )
2314
- idle_aborted = True
2315
- break
2449
+ # Read deadline expired loop top will re-check
2450
+ # idle / wall-clock / heartbeat. Most likely the
2451
+ # heartbeat tick.
2452
+ continue
2316
2453
 
2317
2454
  line = raw_line.strip()
2318
2455
  if not line:
@@ -2340,10 +2477,12 @@ class AssistantProvider(LLMProvider):
2340
2477
  self._handle_sse_event(current_event, data, state, log)
2341
2478
  # A real event arrived — reset the idle timer.
2342
2479
  last_event_at = time.time()
2480
+ event_count += 1
2343
2481
 
2344
2482
  except httpx.TimeoutException:
2345
2483
  duration = time.time() - start_time
2346
2484
  log(f"[Assistant] TIMEOUT after {duration:.1f}s")
2485
+ _release_sem()
2347
2486
  return LLMResult(
2348
2487
  response=f"Error: Assistant request timed out after {timeout}s",
2349
2488
  tool_calls=state.tool_calls,
@@ -2353,16 +2492,37 @@ class AssistantProvider(LLMProvider):
2353
2492
  except (httpx.HTTPStatusError, httpx.ConnectError, RuntimeError) as e:
2354
2493
  duration = time.time() - start_time
2355
2494
  log(f"[Assistant] Request failed: {e}")
2495
+ _release_sem()
2356
2496
  return LLMResult(
2357
2497
  response=f"Error: {e}",
2358
2498
  tool_calls=state.tool_calls,
2359
2499
  duration=duration,
2360
2500
  logs=logs,
2361
2501
  )
2502
+ finally:
2503
+ # Always release on the success path. ``_release_sem`` is
2504
+ # idempotent (sem_held flag) so it's safe to also call on
2505
+ # the except branches above.
2506
+ _release_sem()
2362
2507
 
2363
2508
  duration = time.time() - start_time
2364
2509
  if state.got_error and not state.response_text:
2365
2510
  state.response_text = f"Error: {state.error_message}"
2511
+ elif wall_clock_aborted and not state.response_text:
2512
+ # Surface the wall-clock abort with the same shape as the
2513
+ # idle abort so evaluators see a clean error string. Uses
2514
+ # stream_elapsed (NOT total duration) so the reported time
2515
+ # matches the actual budget — total `duration` would
2516
+ # include semaphore-wait time which by design isn't
2517
+ # charged against the wall-clock budget.
2518
+ stream_elapsed = time.time() - stream_start_time
2519
+ state.response_text = (
2520
+ f"Error: SSE stream exceeded the per-call wall-clock budget of "
2521
+ f"{_format_seconds(per_call_wall_clock_seconds)} "
2522
+ f"({stream_elapsed:.0f}s elapsed, {event_count} events). "
2523
+ "The stream was making progress but too slowly. Aborted to "
2524
+ "free the runner (SC-106138)."
2525
+ )
2366
2526
  elif idle_aborted and not state.response_text:
2367
2527
  # Surface the idle abort cleanly so evaluators don't see an
2368
2528
  # empty response with no explanation.
@@ -2373,12 +2533,17 @@ class AssistantProvider(LLMProvider):
2373
2533
  "open but stopped emitting progress. Aborted to free the runner."
2374
2534
  )
2375
2535
 
2536
+ abort_marker = ""
2537
+ if idle_aborted:
2538
+ abort_marker = " [SSE idle aborted]"
2539
+ elif wall_clock_aborted:
2540
+ abort_marker = " [SSE wall-clock aborted]"
2376
2541
  log(
2377
2542
  f"[Assistant] Done: {len(state.response_text)} chars, "
2378
2543
  f"{len(state.tool_calls)} tool calls, {state.token_event_count} tokens, "
2379
2544
  f"final={'yes' if state.got_final else 'no'}, "
2380
2545
  f"error={'yes' if state.got_error else 'no'}, "
2381
- f"{duration:.2f}s" + (" [SSE idle aborted]" if idle_aborted else "")
2546
+ f"{duration:.2f}s" + abort_marker
2382
2547
  )
2383
2548
 
2384
2549
  return LLMResult(
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: testmcpy
3
- Version: 0.7.2
3
+ Version: 0.7.3
4
4
  Summary: A comprehensive testing framework for validating LLM tool calling capabilities with MCP services
5
5
  Author: Amin Ghadersohi
6
6
  License-Expression: Apache-2.0
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes