universal-mcp 0.1.24rc2__py3-none-any.whl → 0.1.24rc4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- universal_mcp/agentr/README.md +201 -0
- universal_mcp/agentr/__init__.py +6 -0
- universal_mcp/agentr/agentr.py +30 -0
- universal_mcp/{utils/agentr.py → agentr/client.py} +19 -3
- universal_mcp/agentr/integration.py +104 -0
- universal_mcp/agentr/registry.py +91 -0
- universal_mcp/agentr/server.py +51 -0
- universal_mcp/agents/__init__.py +6 -0
- universal_mcp/agents/auto.py +576 -0
- universal_mcp/agents/base.py +88 -0
- universal_mcp/agents/cli.py +27 -0
- universal_mcp/agents/codeact/__init__.py +243 -0
- universal_mcp/agents/codeact/sandbox.py +27 -0
- universal_mcp/agents/codeact/test.py +15 -0
- universal_mcp/agents/codeact/utils.py +61 -0
- universal_mcp/agents/hil.py +104 -0
- universal_mcp/agents/llm.py +10 -0
- universal_mcp/agents/react.py +58 -0
- universal_mcp/agents/simple.py +40 -0
- universal_mcp/agents/utils.py +111 -0
- universal_mcp/analytics.py +5 -7
- universal_mcp/applications/__init__.py +42 -75
- universal_mcp/applications/application.py +1 -1
- universal_mcp/applications/sample/app.py +245 -0
- universal_mcp/cli.py +10 -3
- universal_mcp/config.py +33 -7
- universal_mcp/exceptions.py +4 -0
- universal_mcp/integrations/__init__.py +0 -15
- universal_mcp/integrations/integration.py +9 -91
- universal_mcp/servers/__init__.py +2 -14
- universal_mcp/servers/server.py +10 -51
- universal_mcp/tools/__init__.py +3 -0
- universal_mcp/tools/adapters.py +20 -11
- universal_mcp/tools/manager.py +29 -56
- universal_mcp/tools/registry.py +41 -0
- universal_mcp/tools/tools.py +22 -1
- universal_mcp/types.py +10 -0
- universal_mcp/utils/common.py +245 -0
- universal_mcp/utils/openapi/api_generator.py +46 -18
- universal_mcp/utils/openapi/cli.py +445 -19
- universal_mcp/utils/openapi/openapi.py +284 -21
- universal_mcp/utils/openapi/postprocessor.py +275 -0
- universal_mcp/utils/openapi/preprocessor.py +1 -1
- universal_mcp/utils/openapi/test_generator.py +287 -0
- universal_mcp/utils/prompts.py +188 -341
- universal_mcp/utils/testing.py +190 -2
- {universal_mcp-0.1.24rc2.dist-info → universal_mcp-0.1.24rc4.dist-info}/METADATA +17 -3
- universal_mcp-0.1.24rc4.dist-info/RECORD +71 -0
- universal_mcp/applications/sample_tool_app.py +0 -80
- universal_mcp/client/agents/__init__.py +0 -4
- universal_mcp/client/agents/base.py +0 -38
- universal_mcp/client/agents/llm.py +0 -115
- universal_mcp/client/agents/react.py +0 -67
- universal_mcp/client/cli.py +0 -181
- universal_mcp-0.1.24rc2.dist-info/RECORD +0 -53
- {universal_mcp-0.1.24rc2.dist-info → universal_mcp-0.1.24rc4.dist-info}/WHEEL +0 -0
- {universal_mcp-0.1.24rc2.dist-info → universal_mcp-0.1.24rc4.dist-info}/entry_points.txt +0 -0
- {universal_mcp-0.1.24rc2.dist-info → universal_mcp-0.1.24rc4.dist-info}/licenses/LICENSE +0 -0
@@ -227,7 +227,7 @@ def generate_description_llm(
|
|
227
227
|
if len(param_context_str) > 1000: # Limit context size
|
228
228
|
param_context_str = param_context_str[:1000] + "..."
|
229
229
|
|
230
|
-
current_description = context.get("current_description"
|
230
|
+
current_description = context.get("current_description")
|
231
231
|
if current_description and isinstance(current_description, str) and current_description.strip():
|
232
232
|
user_prompt = f"""The current description for the API parameter named '{param_name}' located '{param_in}' for the '{method.upper()}' operation at path '{path_key}' is:\n'{current_description.strip()}'\n\nTask: Rewrite and enrich this description so it is clear, self-contained, and makes sense to a user. If the description is cut off, incomplete, or awkward, make it complete and natural. Ensure it is concise and under {MAX_DESCRIPTION_LENGTH} characters. Do not include any links, HTML, markdown, or any notes or comments about the character limit. Respond ONLY with the improved single-line description."""
|
233
233
|
fallback_text = (
|
@@ -0,0 +1,287 @@
|
|
1
|
+
import importlib
|
2
|
+
import os
|
3
|
+
from pathlib import Path
|
4
|
+
|
5
|
+
from langchain_core.messages import HumanMessage, SystemMessage
|
6
|
+
from langchain_openai import AzureChatOpenAI
|
7
|
+
from pydantic import BaseModel, SecretStr
|
8
|
+
|
9
|
+
from universal_mcp.tools import ToolManager
|
10
|
+
from universal_mcp.types import ToolFormat
|
11
|
+
|
12
|
+
|
13
|
+
class TestCaseOutput(BaseModel):
|
14
|
+
"""Single test case for LLM structured output (without app_instance)."""
|
15
|
+
|
16
|
+
tools: list[str]
|
17
|
+
tasks: list[str]
|
18
|
+
validate_query: str
|
19
|
+
|
20
|
+
|
21
|
+
class MultiTestCaseOutput(BaseModel):
|
22
|
+
"""Multiple test cases for LLM structured output."""
|
23
|
+
|
24
|
+
test_cases: list[TestCaseOutput]
|
25
|
+
|
26
|
+
|
27
|
+
def generate_test_cases(app_name: str, class_name: str, output_dir: str = "tests"):
|
28
|
+
"""Generate test cases for a given app and write to specified output directory.
|
29
|
+
|
30
|
+
Args:
|
31
|
+
app_name: Name of the app (e.g., "outlook")
|
32
|
+
class_name: Name of the app class (e.g., "OutlookApp")
|
33
|
+
output_dir: Directory to write the test file (default: "tests")
|
34
|
+
"""
|
35
|
+
# Dynamically import the app class
|
36
|
+
try:
|
37
|
+
module = importlib.import_module(f"universal_mcp_{app_name}.app")
|
38
|
+
app_class = getattr(module, class_name)
|
39
|
+
app = app_class(integration=None) # type: ignore
|
40
|
+
except ImportError as e:
|
41
|
+
raise ImportError(f"Could not import universal_mcp_{app_name}.app: {e}") from e
|
42
|
+
except AttributeError as e:
|
43
|
+
raise AttributeError(f"Class {class_name} not found in universal_mcp_{app_name}.app: {e}") from e
|
44
|
+
|
45
|
+
tool_manager = ToolManager()
|
46
|
+
tool_manager.register_tools_from_app(app, tags=["important"])
|
47
|
+
tool_def = tool_manager.list_tools(format=ToolFormat.OPENAI)
|
48
|
+
|
49
|
+
# Extract tool names for splitting
|
50
|
+
tool_names = [tool["function"]["name"] for tool in tool_def]
|
51
|
+
total_tools = len(tool_names)
|
52
|
+
|
53
|
+
# Create system and user prompts
|
54
|
+
system_prompt = """You are an expert QA Developer experienced in writing comprehensive test cases for API applications.
|
55
|
+
|
56
|
+
CORE PRINCIPLES:
|
57
|
+
- Generate MULTIPLE test cases that each focus on 3-4 tools maximum
|
58
|
+
- Each test case should have exactly 3 validation queries
|
59
|
+
- Ensure complete coverage of available tools across all test cases
|
60
|
+
- Create realistic scenarios that mirror actual user workflows
|
61
|
+
- Split tools logically into related functional groups
|
62
|
+
|
63
|
+
MANDATORY RULES:
|
64
|
+
1. MULTIPLE TEST CASE STRUCTURE:
|
65
|
+
- Generate 3 separate test cases
|
66
|
+
- Each test case should have 3-4 tools maximum
|
67
|
+
- Each test case should have exactly 3 validation queries
|
68
|
+
- Distribute all available tools across the test cases
|
69
|
+
- Group related tools together (e.g., create+edit+delete, search+list, etc.)
|
70
|
+
|
71
|
+
2. TEST CASE INDEPENDENCE:
|
72
|
+
- Each test case MUST be completely independent and self-contained
|
73
|
+
- Tasks within a test case can ONLY reference steps within the SAME test case
|
74
|
+
- NEVER reference "step 1 from Test Case 1" in Test Case 2 - each test case starts fresh
|
75
|
+
- If multiple test cases need user_id, each should get it independently in their first task
|
76
|
+
|
77
|
+
3. TEST CASE DISTRIBUTION:
|
78
|
+
- If there are 9 tools: split as 3,3,3 or 4,3,2
|
79
|
+
- If there are 10 tools: split as 4,3,3 or 3,3,4
|
80
|
+
- If there are 8 tools: split as 3,3,2
|
81
|
+
- Always ensure all tools are covered across all test cases
|
82
|
+
|
83
|
+
4. TASK DEPENDENCY MANAGEMENT:
|
84
|
+
- Always maintain tool dependency relationships within each test case
|
85
|
+
- Ensure tool prerequisites are met before usage within the same test case
|
86
|
+
- Include fallback scenarios for tool failures
|
87
|
+
|
88
|
+
5. NO PLACEHOLDER VALUES - CRITICAL:
|
89
|
+
- NEVER use hardcoded placeholder values like 'user123', 'message_id_123', 'folder_id_456', etc.
|
90
|
+
- ALWAYS reference data from previous API calls within the SAME test case
|
91
|
+
- Use phrases like: "using the user_id from step 1 of this test case", "with the message_id returned from the previous step"
|
92
|
+
- When IDs or identifiers are needed, specify they should come from actual API responses within the current test case
|
93
|
+
- Example: "Using the user_id retrieved in step 1, call..." (where step 1 is in the current test case)
|
94
|
+
|
95
|
+
6. DYNAMIC DATA FLOW WITHIN TEST CASES:
|
96
|
+
- Structure tasks so data flows from one step to the next WITHIN THE SAME TEST CASE
|
97
|
+
- First task should typically fetch initial foundational data (user info, list items, etc.)
|
98
|
+
- Subsequent tasks should use results from previous tasks in the SAME test case
|
99
|
+
- Clearly specify which field/property from previous responses to use
|
100
|
+
- Each test case should be runnable independently
|
101
|
+
|
102
|
+
7. CRUD OPERATIONS COVERAGE:
|
103
|
+
- Analyze available tools to identify CRUD capabilities
|
104
|
+
- CREATE: If create/send/compose tools are available, include operations that create new items
|
105
|
+
- READ: If list/get/retrieve tools are available, include operations that fetch and display information
|
106
|
+
- UPDATE: If update/edit/modify tools are available, include operations that modify existing items
|
107
|
+
- DELETE: If delete/remove tools are available, include operations that remove items
|
108
|
+
- Group related CRUD operations in the same test case when possible
|
109
|
+
|
110
|
+
8. VALIDATION REQUIREMENTS:
|
111
|
+
- Each test case must have exactly 3 validation queries
|
112
|
+
- Write detailed validation queries for every test case
|
113
|
+
- Include both positive and negative validation scenarios
|
114
|
+
- Verify data integrity after each operation
|
115
|
+
- Check for proper error handling and edge cases
|
116
|
+
|
117
|
+
9. FORMATTING STANDARDS:
|
118
|
+
- Use single quotes for nested quotes (e.g., "Send message with subject 'Hello World'")
|
119
|
+
- Structure tasks in logical sequential order within each test case
|
120
|
+
- Include clear, actionable step descriptions
|
121
|
+
- Ensure validation queries are specific and measurable
|
122
|
+
|
123
|
+
EXAMPLE DATA FLOW PATTERN (WITHIN SAME TEST CASE):
|
124
|
+
Test Case 1:
|
125
|
+
- Step 1: "Get the user id."
|
126
|
+
- Step 2: "Send an email to example@gmail.com saying subject: Hello and message: Hey Agentr"
|
127
|
+
- Step 3: "List last 3 email in my inbox."
|
128
|
+
validate_query = (
|
129
|
+
"Based on the conversation history, verify: "
|
130
|
+
"1. Was a user Id retrieved? "
|
131
|
+
"2. Was the email sent successfully (check for success response)? "
|
132
|
+
"3. Were exactly 3 emails listed from the inbox (check if response shows 3 numbered items)? "
|
133
|
+
"4. Does the sent email content 'Hey Agentr' appear in any of the listed email previews?"
|
134
|
+
)
|
135
|
+
|
136
|
+
|
137
|
+
"""
|
138
|
+
|
139
|
+
user_prompt = f"""Generate 3 comprehensive test cases for the application using the available tools.
|
140
|
+
|
141
|
+
AVAILABLE TOOLS: {tool_def}
|
142
|
+
TOTAL TOOLS: {total_tools}
|
143
|
+
|
144
|
+
CRITICAL REQUIREMENTS:
|
145
|
+
1. TOOL DISTRIBUTION:
|
146
|
+
- Split the {total_tools} available tools across 3 test cases
|
147
|
+
- Each test case should have 3-4 tools maximum
|
148
|
+
- Group logically related tools together
|
149
|
+
- Ensure every tool is used in exactly one test case
|
150
|
+
|
151
|
+
2. TEST CASE INDEPENDENCE - CRITICAL:
|
152
|
+
- Each test case MUST be completely self-contained and runnable independently
|
153
|
+
- NEVER reference steps from other test cases (e.g., don't say "using user_id from Test Case 1")
|
154
|
+
- Each test case should start by getting any foundational data it needs (like user_id)
|
155
|
+
- Tasks should only reference steps within the SAME test case (e.g., "using the user_id from step 1" where step 1 is in the current test case)
|
156
|
+
|
157
|
+
3. DATA FLOW - NO HARDCODED VALUES:
|
158
|
+
- NEVER use placeholder values like 'user123', 'id_456', etc.
|
159
|
+
- Always reference data from previous API calls within the same test case
|
160
|
+
- Structure tasks so each step uses results from previous steps in the same test case
|
161
|
+
- Use phrases like: "using the ID returned from step 1", "with the data retrieved in the previous step"
|
162
|
+
|
163
|
+
4. TEST CASE DESIGN:
|
164
|
+
- Test Case 1: Focus on initial data retrieval and creation operations (3-4 tools)
|
165
|
+
* Should start with getting foundational data (e.g., user info, basic listings)
|
166
|
+
- Test Case 2: Focus on data manipulation, updates and management (3-4 tools)
|
167
|
+
* Should independently get any needed foundational data in its first step
|
168
|
+
- Test Case 3: Focus on advanced operations, search, and cleanup (2-4 tools)
|
169
|
+
* Should independently get any needed foundational data in its first step
|
170
|
+
|
171
|
+
5. TASK DESIGN WITH DEPENDENCIES:
|
172
|
+
- Create realistic workflows based on available tool capabilities
|
173
|
+
- Follow logical dependency order within each test case (get data first, then use it)
|
174
|
+
- Include error handling scenarios with actual API error codes
|
175
|
+
- Structure tasks to build upon previous results within the same test case
|
176
|
+
|
177
|
+
6. VALIDATION STRATEGY:
|
178
|
+
- Each test case must have exactly 3 validation queries
|
179
|
+
- Write specific validation queries that check actual API responses
|
180
|
+
- Include success criteria and failure conditions
|
181
|
+
- Verify data integrity and consistency
|
182
|
+
- Check for proper error messages and status codes
|
183
|
+
|
184
|
+
|
185
|
+
|
186
|
+
Generate the 3 test cases now following these requirements, ensuring NO hardcoded placeholder values and complete test case independence."""
|
187
|
+
|
188
|
+
# Setup LLM
|
189
|
+
azure_api_key = os.environ.get("AZURE_OPENAI_API_KEY")
|
190
|
+
if not azure_api_key:
|
191
|
+
raise ValueError("AZURE_OPENAI_API_KEY environment variable is required")
|
192
|
+
|
193
|
+
llm = AzureChatOpenAI(
|
194
|
+
azure_endpoint=os.environ.get("AZURE_OPENAI_ENDPOINT"),
|
195
|
+
azure_deployment=os.environ.get("AZURE_OPENAI_DEPLOYMENT", "o4-mini"),
|
196
|
+
api_key=SecretStr(azure_api_key),
|
197
|
+
api_version=os.environ.get("AZURE_OPENAI_API_VERSION", "2025-03-01-preview"),
|
198
|
+
)
|
199
|
+
|
200
|
+
# Get structured output from LLM using system and user prompts
|
201
|
+
messages = [SystemMessage(content=system_prompt), HumanMessage(content=user_prompt)]
|
202
|
+
|
203
|
+
structured_llm = llm.with_structured_output(MultiTestCaseOutput)
|
204
|
+
response = structured_llm.invoke(messages)
|
205
|
+
|
206
|
+
write_to_file(response, app_name, class_name, output_dir) # type: ignore
|
207
|
+
|
208
|
+
return response
|
209
|
+
|
210
|
+
|
211
|
+
def write_to_file(multi_test_case: MultiTestCaseOutput, app_name: str, class_name: str, output_dir: str):
|
212
|
+
"""Regenerate the entire automation_test.py file with multiple test cases."""
|
213
|
+
|
214
|
+
# Ensure output directory exists
|
215
|
+
output_path = Path(output_dir)
|
216
|
+
output_path.mkdir(parents=True, exist_ok=True)
|
217
|
+
|
218
|
+
file_content = f'''import pytest
|
219
|
+
|
220
|
+
from universal_mcp.utils.testing import (
|
221
|
+
AutomationTestCase,
|
222
|
+
execute_automation_test,
|
223
|
+
create_app_with_integration
|
224
|
+
)
|
225
|
+
from universal_mcp_{app_name}.app import {class_name}
|
226
|
+
|
227
|
+
|
228
|
+
@pytest.fixture
|
229
|
+
def {app_name}_app():
|
230
|
+
return create_app_with_integration("{app_name}", {class_name})
|
231
|
+
|
232
|
+
'''
|
233
|
+
|
234
|
+
# Generate fixtures and test functions for each test case
|
235
|
+
for i, test_case in enumerate(multi_test_case.test_cases, 1):
|
236
|
+
# Format tools array with proper indentation
|
237
|
+
tools_formatted = "[\n"
|
238
|
+
for tool in test_case.tools:
|
239
|
+
escaped_tool = tool.replace('"', '\\"')
|
240
|
+
tools_formatted += f' "{escaped_tool}",\n'
|
241
|
+
tools_formatted += " ]"
|
242
|
+
|
243
|
+
# Format tasks array with proper indentation
|
244
|
+
tasks_formatted = "[\n"
|
245
|
+
for task in test_case.tasks:
|
246
|
+
escaped_task = task.replace('"', '\\"')
|
247
|
+
tasks_formatted += f' "{escaped_task}",\n'
|
248
|
+
tasks_formatted += " ]"
|
249
|
+
|
250
|
+
# Use triple quotes for validation query to avoid escaping issues
|
251
|
+
validation_query = test_case.validate_query
|
252
|
+
|
253
|
+
file_content += f'''
|
254
|
+
@pytest.fixture
|
255
|
+
def {app_name}_test_case_{i}({app_name}_app):
|
256
|
+
"""Test Case {i}"""
|
257
|
+
return AutomationTestCase(
|
258
|
+
app="{app_name}",
|
259
|
+
app_instance={app_name}_app,
|
260
|
+
tools={tools_formatted},
|
261
|
+
tasks={tasks_formatted},
|
262
|
+
validate_query=(
|
263
|
+
"""{validation_query}"""
|
264
|
+
)
|
265
|
+
)
|
266
|
+
|
267
|
+
'''
|
268
|
+
|
269
|
+
# Generate test functions
|
270
|
+
for i, _ in enumerate(multi_test_case.test_cases, 1):
|
271
|
+
file_content += f'''
|
272
|
+
@pytest.mark.asyncio
|
273
|
+
async def test_{app_name}_test_case_{i}({app_name}_test_case_{i}):
|
274
|
+
"""Execute test case {i}"""
|
275
|
+
await execute_automation_test({app_name}_test_case_{i})
|
276
|
+
'''
|
277
|
+
|
278
|
+
file_content += "\n\n "
|
279
|
+
|
280
|
+
# Write the entire file
|
281
|
+
output_file = output_path / "automation_test.py"
|
282
|
+
with open(output_file, "w") as f:
|
283
|
+
f.write(file_content)
|
284
|
+
|
285
|
+
print(f"✅ Generated {output_file} with {len(multi_test_case.test_cases)} test cases for {app_name}")
|
286
|
+
for i, test_case in enumerate(multi_test_case.test_cases, 1):
|
287
|
+
print(f" Test Case {i}: {len(test_case.tools)} tools, {len(test_case.tasks)} tasks")
|