universal-mcp 0.1.24rc2__py3-none-any.whl → 0.1.24rc3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- universal_mcp/agentr/__init__.py +6 -0
- universal_mcp/agentr/agentr.py +30 -0
- universal_mcp/{utils/agentr.py → agentr/client.py} +19 -3
- universal_mcp/agentr/integration.py +104 -0
- universal_mcp/agentr/registry.py +91 -0
- universal_mcp/agentr/server.py +51 -0
- universal_mcp/agents/__init__.py +6 -0
- universal_mcp/agents/auto.py +576 -0
- universal_mcp/agents/base.py +88 -0
- universal_mcp/agents/cli.py +27 -0
- universal_mcp/agents/codeact/__init__.py +243 -0
- universal_mcp/agents/codeact/sandbox.py +27 -0
- universal_mcp/agents/codeact/test.py +15 -0
- universal_mcp/agents/codeact/utils.py +61 -0
- universal_mcp/agents/hil.py +104 -0
- universal_mcp/agents/llm.py +10 -0
- universal_mcp/agents/react.py +58 -0
- universal_mcp/agents/simple.py +40 -0
- universal_mcp/agents/utils.py +111 -0
- universal_mcp/analytics.py +5 -7
- universal_mcp/applications/__init__.py +42 -75
- universal_mcp/applications/application.py +1 -1
- universal_mcp/applications/sample/app.py +245 -0
- universal_mcp/cli.py +10 -3
- universal_mcp/config.py +33 -7
- universal_mcp/exceptions.py +4 -0
- universal_mcp/integrations/__init__.py +0 -15
- universal_mcp/integrations/integration.py +9 -91
- universal_mcp/servers/__init__.py +2 -14
- universal_mcp/servers/server.py +10 -51
- universal_mcp/tools/__init__.py +3 -0
- universal_mcp/tools/adapters.py +20 -11
- universal_mcp/tools/manager.py +29 -56
- universal_mcp/tools/registry.py +41 -0
- universal_mcp/tools/tools.py +22 -1
- universal_mcp/types.py +10 -0
- universal_mcp/utils/common.py +245 -0
- universal_mcp/utils/openapi/api_generator.py +46 -18
- universal_mcp/utils/openapi/cli.py +445 -19
- universal_mcp/utils/openapi/openapi.py +284 -21
- universal_mcp/utils/openapi/postprocessor.py +275 -0
- universal_mcp/utils/openapi/preprocessor.py +1 -1
- universal_mcp/utils/openapi/test_generator.py +287 -0
- universal_mcp/utils/prompts.py +188 -341
- universal_mcp/utils/testing.py +190 -2
- {universal_mcp-0.1.24rc2.dist-info → universal_mcp-0.1.24rc3.dist-info}/METADATA +16 -2
- universal_mcp-0.1.24rc3.dist-info/RECORD +70 -0
- universal_mcp/applications/sample_tool_app.py +0 -80
- universal_mcp/client/agents/__init__.py +0 -4
- universal_mcp/client/agents/base.py +0 -38
- universal_mcp/client/agents/llm.py +0 -115
- universal_mcp/client/agents/react.py +0 -67
- universal_mcp/client/cli.py +0 -181
- universal_mcp-0.1.24rc2.dist-info/RECORD +0 -53
- {universal_mcp-0.1.24rc2.dist-info → universal_mcp-0.1.24rc3.dist-info}/WHEEL +0 -0
- {universal_mcp-0.1.24rc2.dist-info → universal_mcp-0.1.24rc3.dist-info}/entry_points.txt +0 -0
- {universal_mcp-0.1.24rc2.dist-info → universal_mcp-0.1.24rc3.dist-info}/licenses/LICENSE +0 -0
@@ -1,3 +1,4 @@
|
|
1
|
+
import hashlib
|
1
2
|
import json
|
2
3
|
import re
|
3
4
|
import textwrap
|
@@ -11,6 +12,177 @@ from pydantic import BaseModel
|
|
11
12
|
|
12
13
|
from .filters import load_filter_config, should_process_operation
|
13
14
|
|
15
|
+
# Schema registry for tracking unique response schemas to avoid duplicates
|
16
|
+
_schema_registry: dict[str, str] = {} # schema_hash -> model_class_name
|
17
|
+
_generated_models: dict[str, str] = {} # model_class_name -> model_source_code
|
18
|
+
|
19
|
+
|
20
|
+
def _get_schema_hash(schema: dict[str, Any]) -> str:
|
21
|
+
"""Generate a hash for a schema to identify unique schemas."""
|
22
|
+
try:
|
23
|
+
schema_str = json.dumps(schema, sort_keys=True, default=str)
|
24
|
+
return hashlib.md5(schema_str.encode()).hexdigest()[:8]
|
25
|
+
except (TypeError, ValueError):
|
26
|
+
# Fallback to string representation if JSON serialization fails
|
27
|
+
schema_str = str(sorted(schema.items())) if isinstance(schema, dict) else str(schema)
|
28
|
+
return hashlib.md5(schema_str.encode()).hexdigest()[:8]
|
29
|
+
|
30
|
+
|
31
|
+
def _generate_model_name(operation: dict[str, Any], path: str, method: str, schema: dict[str, Any]) -> str:
|
32
|
+
"""Generate a meaningful model name for a response schema."""
|
33
|
+
if "title" in schema:
|
34
|
+
name = schema["title"]
|
35
|
+
else:
|
36
|
+
# Generate name from operation info
|
37
|
+
if "operationId" in operation:
|
38
|
+
name = operation["operationId"] + "Response"
|
39
|
+
else:
|
40
|
+
# Generate from path and method
|
41
|
+
path_parts = [
|
42
|
+
part for part in path.strip("/").split("/") if not (part.startswith("{") and part.endswith("}"))
|
43
|
+
]
|
44
|
+
if path_parts:
|
45
|
+
name = f"{method.capitalize()}{path_parts[-1].capitalize()}Response"
|
46
|
+
else:
|
47
|
+
name = f"{method.capitalize()}Response"
|
48
|
+
|
49
|
+
name = "".join(word.capitalize() for word in re.split(r"[^a-zA-Z0-9]", name) if word)
|
50
|
+
|
51
|
+
if name and name[0].isdigit():
|
52
|
+
name = "Response" + name
|
53
|
+
|
54
|
+
return name or "Response"
|
55
|
+
|
56
|
+
|
57
|
+
def _generate_response_model_class(schema: dict[str, Any], model_name: str) -> str:
|
58
|
+
"""Generate Pydantic model source code from OpenAPI response schema."""
|
59
|
+
if not schema:
|
60
|
+
return ""
|
61
|
+
|
62
|
+
# Handle array responses
|
63
|
+
if schema.get("type") == "array":
|
64
|
+
items_schema = schema.get("items", {})
|
65
|
+
if items_schema and (items_schema.get("properties") or items_schema.get("type") == "object"):
|
66
|
+
# Generate model for array items if it's an object
|
67
|
+
item_model_name = f"{model_name}Item"
|
68
|
+
item_model_code = _generate_response_model_class(items_schema, item_model_name)
|
69
|
+
|
70
|
+
# Create collection model
|
71
|
+
collection_model = f"""
|
72
|
+
class {model_name}(BaseModel):
|
73
|
+
value: List[{item_model_name}]
|
74
|
+
"""
|
75
|
+
return item_model_code + collection_model
|
76
|
+
else:
|
77
|
+
# Fallback for arrays with simple items or no schema
|
78
|
+
item_type = "Any"
|
79
|
+
if items_schema:
|
80
|
+
if items_schema.get("type") == "string":
|
81
|
+
item_type = "str"
|
82
|
+
elif items_schema.get("type") == "integer":
|
83
|
+
item_type = "int"
|
84
|
+
elif items_schema.get("type") == "number":
|
85
|
+
item_type = "float"
|
86
|
+
elif items_schema.get("type") == "boolean":
|
87
|
+
item_type = "bool"
|
88
|
+
|
89
|
+
return f"""
|
90
|
+
class {model_name}(BaseModel):
|
91
|
+
value: List[{item_type}]
|
92
|
+
"""
|
93
|
+
|
94
|
+
# Handle object responses
|
95
|
+
if schema.get("type") == "object" or "properties" in schema:
|
96
|
+
properties, required_fields = _extract_properties_from_schema(schema)
|
97
|
+
|
98
|
+
if not properties:
|
99
|
+
return f"""
|
100
|
+
class {model_name}(BaseModel):
|
101
|
+
pass
|
102
|
+
"""
|
103
|
+
|
104
|
+
field_definitions = []
|
105
|
+
for prop_name, prop_schema in properties.items():
|
106
|
+
field_name = _sanitize_identifier(prop_name)
|
107
|
+
is_required = prop_name in required_fields
|
108
|
+
|
109
|
+
# Handle arrays with object items specially
|
110
|
+
if prop_schema.get("type") == "array" and prop_schema.get("items", {}).get("properties"):
|
111
|
+
# Generate a model for the array items
|
112
|
+
item_model_name = f"{model_name}{field_name.capitalize()}Item"
|
113
|
+
items_schema = prop_schema.get("items", {})
|
114
|
+
|
115
|
+
# Generate the item model and store it globally
|
116
|
+
item_model_code = _generate_response_model_class(items_schema, item_model_name)
|
117
|
+
if item_model_code and item_model_name not in _generated_models:
|
118
|
+
_generated_models[item_model_name] = item_model_code
|
119
|
+
|
120
|
+
python_type = f"List[{item_model_name}]" if is_required else f"Optional[List[{item_model_name}]]"
|
121
|
+
else:
|
122
|
+
python_type = _openapi_type_to_python_type(prop_schema, required=is_required)
|
123
|
+
|
124
|
+
# Handle field aliases for special characters like @odata.context
|
125
|
+
if prop_name != field_name or prop_name.startswith("@"):
|
126
|
+
if is_required:
|
127
|
+
field_definitions.append(f" {field_name}: {python_type} = Field(alias='{prop_name}')")
|
128
|
+
else:
|
129
|
+
field_definitions.append(f" {field_name}: {python_type} = Field(None, alias='{prop_name}')")
|
130
|
+
else:
|
131
|
+
if is_required:
|
132
|
+
field_definitions.append(f" {field_name}: {python_type}")
|
133
|
+
else:
|
134
|
+
field_definitions.append(f" {field_name}: {python_type} = None")
|
135
|
+
|
136
|
+
model_code = f"""
|
137
|
+
class {model_name}(BaseModel):
|
138
|
+
{chr(10).join(field_definitions)}
|
139
|
+
"""
|
140
|
+
return model_code
|
141
|
+
|
142
|
+
# Fallback for other schema types
|
143
|
+
return ""
|
144
|
+
|
145
|
+
|
146
|
+
def _get_or_create_response_model(
|
147
|
+
operation: dict[str, Any], path: str, method: str, schema: dict[str, Any]
|
148
|
+
) -> str | None:
|
149
|
+
"""Get or create a response model for a given schema, avoiding duplicates."""
|
150
|
+
if not schema:
|
151
|
+
return None
|
152
|
+
|
153
|
+
try:
|
154
|
+
# Generate hash for this schema
|
155
|
+
schema_hash = _get_schema_hash(schema)
|
156
|
+
|
157
|
+
# Check if we already have a model for this schema
|
158
|
+
if schema_hash in _schema_registry:
|
159
|
+
return _schema_registry[schema_hash]
|
160
|
+
|
161
|
+
# Generate new model
|
162
|
+
model_name = _generate_model_name(operation, path, method, schema)
|
163
|
+
|
164
|
+
# Ensure unique model name
|
165
|
+
base_name = model_name
|
166
|
+
counter = 1
|
167
|
+
while model_name in _generated_models:
|
168
|
+
model_name = f"{base_name}{counter}"
|
169
|
+
counter += 1
|
170
|
+
|
171
|
+
# Generate model source code
|
172
|
+
model_code = _generate_response_model_class(schema, model_name)
|
173
|
+
|
174
|
+
if model_code:
|
175
|
+
# Register the model
|
176
|
+
_schema_registry[schema_hash] = model_name
|
177
|
+
_generated_models[model_name] = model_code
|
178
|
+
return model_name
|
179
|
+
|
180
|
+
except Exception as e:
|
181
|
+
# If model generation fails, log and continue with fallback
|
182
|
+
print(f"Warning: Could not generate model for {method.upper()} {path}: {e}")
|
183
|
+
|
184
|
+
return None
|
185
|
+
|
14
186
|
|
15
187
|
class Parameters(BaseModel):
|
16
188
|
name: str
|
@@ -220,15 +392,20 @@ def _load_and_resolve_references(path: Path):
|
|
220
392
|
return replace_refs(schema)
|
221
393
|
|
222
394
|
|
223
|
-
def _determine_return_type(operation: dict[str, Any]) -> str:
|
395
|
+
def _determine_return_type(operation: dict[str, Any], path: str, method: str) -> str:
|
224
396
|
"""
|
225
397
|
Determine the return type from the response schema.
|
226
398
|
|
399
|
+
Now generates specific Pydantic model classes for response schemas where possible,
|
400
|
+
falling back to generic types for complex or missing schemas.
|
401
|
+
|
227
402
|
Args:
|
228
403
|
operation (dict): The operation details from the schema.
|
404
|
+
path (str): The API path (e.g., '/users/{user_id}').
|
405
|
+
method (str): The HTTP method (e.g., 'get').
|
229
406
|
|
230
407
|
Returns:
|
231
|
-
str: The appropriate return type annotation (
|
408
|
+
str: The appropriate return type annotation (specific model class name or generic type)
|
232
409
|
"""
|
233
410
|
responses = operation.get("responses", {})
|
234
411
|
# Find successful response (2XX)
|
@@ -247,7 +424,12 @@ def _determine_return_type(operation: dict[str, Any]) -> str:
|
|
247
424
|
if content_type.startswith("application/json") and "schema" in content_info:
|
248
425
|
schema = content_info["schema"]
|
249
426
|
|
250
|
-
#
|
427
|
+
# generate a specific model class for this schema
|
428
|
+
model_name = _get_or_create_response_model(operation, path, method, schema)
|
429
|
+
|
430
|
+
if model_name:
|
431
|
+
return model_name
|
432
|
+
|
251
433
|
if schema.get("type") == "array":
|
252
434
|
return "list[Any]"
|
253
435
|
elif schema.get("type") == "object" or "$ref" in schema:
|
@@ -536,7 +718,7 @@ def _generate_method_code(path, method, operation):
|
|
536
718
|
# --- End Alias duplicate parameter names ---
|
537
719
|
|
538
720
|
# --- Determine Return Type and Body Characteristics ---
|
539
|
-
return_type = _determine_return_type(operation)
|
721
|
+
return_type = _determine_return_type(operation, path, method)
|
540
722
|
|
541
723
|
body_required = has_body and operation["requestBody"].get("required", False) # Remains useful
|
542
724
|
|
@@ -751,7 +933,7 @@ def _generate_method_code(path, method, operation):
|
|
751
933
|
# openapi_path_comment_for_docstring = f"# openapi_path: {path}"
|
752
934
|
# docstring_parts.append(openapi_path_comment_for_docstring)
|
753
935
|
|
754
|
-
return_type = _determine_return_type(operation)
|
936
|
+
return_type = _determine_return_type(operation, path, method)
|
755
937
|
|
756
938
|
# Summary
|
757
939
|
summary = operation.get("summary", "").strip()
|
@@ -978,9 +1160,15 @@ def _generate_method_code(path, method, operation):
|
|
978
1160
|
# using the prepared URL, query parameters, request body data, files, and content type.
|
979
1161
|
# Use convenience methods that automatically handle responses and errors
|
980
1162
|
|
1163
|
+
# Determine the appropriate return statement based on return type
|
1164
|
+
if return_type in ["Any", "dict[str, Any]", "list[Any]"]:
|
1165
|
+
return_statement = " return self._handle_response(response)"
|
1166
|
+
else:
|
1167
|
+
return_statement = f" return {return_type}.model_validate(self._handle_response(response))"
|
1168
|
+
|
981
1169
|
if method_lower == "get":
|
982
1170
|
body_lines.append(" response = self._get(url, params=query_params)")
|
983
|
-
body_lines.append(
|
1171
|
+
body_lines.append(return_statement)
|
984
1172
|
elif method_lower == "post":
|
985
1173
|
if selected_content_type == "multipart/form-data":
|
986
1174
|
body_lines.append(
|
@@ -990,7 +1178,7 @@ def _generate_method_code(path, method, operation):
|
|
990
1178
|
body_lines.append(
|
991
1179
|
f" response = self._post(url, data=request_body_data, params=query_params, content_type='{final_content_type_for_api_call}')"
|
992
1180
|
)
|
993
|
-
body_lines.append(
|
1181
|
+
body_lines.append(return_statement)
|
994
1182
|
elif method_lower == "put":
|
995
1183
|
if selected_content_type == "multipart/form-data":
|
996
1184
|
body_lines.append(
|
@@ -1000,16 +1188,16 @@ def _generate_method_code(path, method, operation):
|
|
1000
1188
|
body_lines.append(
|
1001
1189
|
f" response = self._put(url, data=request_body_data, params=query_params, content_type='{final_content_type_for_api_call}')"
|
1002
1190
|
)
|
1003
|
-
body_lines.append(
|
1191
|
+
body_lines.append(return_statement)
|
1004
1192
|
elif method_lower == "patch":
|
1005
1193
|
body_lines.append(" response = self._patch(url, data=request_body_data, params=query_params)")
|
1006
|
-
body_lines.append(
|
1194
|
+
body_lines.append(return_statement)
|
1007
1195
|
elif method_lower == "delete":
|
1008
1196
|
body_lines.append(" response = self._delete(url, params=query_params)")
|
1009
|
-
body_lines.append(
|
1197
|
+
body_lines.append(return_statement)
|
1010
1198
|
else:
|
1011
1199
|
body_lines.append(f" response = self._{method_lower}(url, data=request_body_data, params=query_params)")
|
1012
|
-
body_lines.append(
|
1200
|
+
body_lines.append(return_statement)
|
1013
1201
|
|
1014
1202
|
# --- Combine Signature, Docstring, and Body for Final Method Code ---
|
1015
1203
|
method_code = signature + formatted_docstring + "\n" + "\n".join(body_lines)
|
@@ -1020,9 +1208,69 @@ def load_schema(path: Path):
|
|
1020
1208
|
return _load_and_resolve_references(path)
|
1021
1209
|
|
1022
1210
|
|
1211
|
+
def generate_schemas_file(schema, class_name: str | None = None, filter_config_path: str | None = None):
|
1212
|
+
"""
|
1213
|
+
Generate a Python file containing only the response schema classes from an OpenAPI schema.
|
1214
|
+
|
1215
|
+
Args:
|
1216
|
+
schema (dict): The OpenAPI schema as a dictionary.
|
1217
|
+
class_name (str | None): Optional class name for context.
|
1218
|
+
filter_config_path (str | None): Optional path to JSON filter configuration file.
|
1219
|
+
|
1220
|
+
Returns:
|
1221
|
+
str: A string containing the Python code for the response schema classes.
|
1222
|
+
"""
|
1223
|
+
global _schema_registry, _generated_models
|
1224
|
+
_schema_registry.clear()
|
1225
|
+
_generated_models.clear()
|
1226
|
+
|
1227
|
+
# Load filter configuration if provided
|
1228
|
+
filter_config = None
|
1229
|
+
if filter_config_path:
|
1230
|
+
filter_config = load_filter_config(filter_config_path)
|
1231
|
+
|
1232
|
+
# Generate response models by processing all operations
|
1233
|
+
for path, path_info in schema.get("paths", {}).items():
|
1234
|
+
for method in path_info:
|
1235
|
+
if method in ["get", "post", "put", "delete", "patch", "options", "head"]:
|
1236
|
+
# Apply filter configuration
|
1237
|
+
if not should_process_operation(path, method, filter_config):
|
1238
|
+
continue
|
1239
|
+
|
1240
|
+
operation = path_info[method]
|
1241
|
+
# Generate response model for this operation
|
1242
|
+
_determine_return_type(operation, path, method)
|
1243
|
+
|
1244
|
+
# Generate the schemas file content
|
1245
|
+
imports = [
|
1246
|
+
"from typing import Any, Optional, List",
|
1247
|
+
"from pydantic import BaseModel, Field",
|
1248
|
+
]
|
1249
|
+
|
1250
|
+
imports_section = "\n".join(imports)
|
1251
|
+
models_section = "\n".join(_generated_models.values()) if _generated_models else ""
|
1252
|
+
|
1253
|
+
if not models_section:
|
1254
|
+
# If no models were generated, create a minimal file
|
1255
|
+
schemas_code = f"""{imports_section}
|
1256
|
+
|
1257
|
+
# No response models were generated for this OpenAPI schema
|
1258
|
+
"""
|
1259
|
+
else:
|
1260
|
+
schemas_code = f"""{imports_section}
|
1261
|
+
|
1262
|
+
# Generated Response Models
|
1263
|
+
|
1264
|
+
{models_section}
|
1265
|
+
"""
|
1266
|
+
|
1267
|
+
return schemas_code
|
1268
|
+
|
1269
|
+
|
1023
1270
|
def generate_api_client(schema, class_name: str | None = None, filter_config_path: str | None = None):
|
1024
1271
|
"""
|
1025
1272
|
Generate a Python API client class from an OpenAPI schema.
|
1273
|
+
Models are not included - they should be generated separately using generate_schemas_file.
|
1026
1274
|
|
1027
1275
|
Args:
|
1028
1276
|
schema (dict): The OpenAPI schema as a dictionary.
|
@@ -1032,6 +1280,10 @@ def generate_api_client(schema, class_name: str | None = None, filter_config_pat
|
|
1032
1280
|
Returns:
|
1033
1281
|
str: A string containing the Python code for the API client class.
|
1034
1282
|
"""
|
1283
|
+
global _schema_registry, _generated_models
|
1284
|
+
_schema_registry.clear()
|
1285
|
+
_generated_models.clear()
|
1286
|
+
|
1035
1287
|
# Load filter configuration if provided
|
1036
1288
|
filter_config = None
|
1037
1289
|
if filter_config_path:
|
@@ -1057,7 +1309,7 @@ def generate_api_client(schema, class_name: str | None = None, filter_config_pat
|
|
1057
1309
|
if api_title:
|
1058
1310
|
# Convert API title to a clean class name
|
1059
1311
|
if class_name:
|
1060
|
-
clean_name = class_name
|
1312
|
+
clean_name = class_name[:-3] if class_name.endswith("App") else class_name.capitalize()
|
1061
1313
|
else:
|
1062
1314
|
base_name = "".join(word.capitalize() for word in api_title.split())
|
1063
1315
|
clean_name = "".join(c for c in base_name if c.isalnum())
|
@@ -1114,21 +1366,32 @@ def generate_api_client(schema, class_name: str | None = None, filter_config_pat
|
|
1114
1366
|
{tools_list}
|
1115
1367
|
]"""
|
1116
1368
|
|
1117
|
-
# Generate class imports
|
1369
|
+
# Generate class imports - import from separate schemas file
|
1118
1370
|
imports = [
|
1119
1371
|
"from typing import Any, Optional, List",
|
1120
1372
|
"from universal_mcp.applications import APIApplication",
|
1121
1373
|
"from universal_mcp.integrations import Integration",
|
1374
|
+
"from .schemas import *",
|
1122
1375
|
]
|
1123
1376
|
|
1124
|
-
# Construct the class code
|
1125
|
-
|
1126
|
-
|
1127
|
-
|
1128
|
-
|
1129
|
-
|
1130
|
-
f
|
1131
|
-
)
|
1377
|
+
# Construct the class code (no model classes since they're in separate file)
|
1378
|
+
imports_section = "\n".join(imports)
|
1379
|
+
|
1380
|
+
class_code_parts = [
|
1381
|
+
imports_section,
|
1382
|
+
"",
|
1383
|
+
f"class {class_name}(APIApplication):",
|
1384
|
+
" def __init__(self, integration: Integration = None, **kwargs) -> None:",
|
1385
|
+
f" super().__init__(name='{class_name.lower()}', integration=integration, **kwargs)",
|
1386
|
+
f' self.base_url = "{base_url}"',
|
1387
|
+
"",
|
1388
|
+
"\n\n".join(methods),
|
1389
|
+
"",
|
1390
|
+
list_tools_method,
|
1391
|
+
"",
|
1392
|
+
]
|
1393
|
+
|
1394
|
+
class_code = "\n".join(class_code_parts)
|
1132
1395
|
return class_code
|
1133
1396
|
|
1134
1397
|
|
@@ -0,0 +1,275 @@
|
|
1
|
+
import ast
|
2
|
+
import re
|
3
|
+
|
4
|
+
import litellm
|
5
|
+
|
6
|
+
|
7
|
+
def add_hint_tags_to_docstrings(input_path: str, output_path: str):
|
8
|
+
"""
|
9
|
+
Reads a Python API client file, inspects each function, and adds appropriate tags to the docstring:
|
10
|
+
- 'readOnlyHint': Tool does not modify its environment (fetching, reading, etc.)
|
11
|
+
- 'destructiveHint': Tool may perform destructive updates
|
12
|
+
- 'openWorldHint': Tool interacts with external entities (3rd party APIs)
|
13
|
+
|
14
|
+
Functions can have multiple tags (e.g., 'readOnlyHint, openWorldHint').
|
15
|
+
Does not alter other tags in the docstring.
|
16
|
+
Writes the modified code to output_path.
|
17
|
+
"""
|
18
|
+
with open(input_path, encoding="utf-8") as f:
|
19
|
+
source = f.read()
|
20
|
+
tree = ast.parse(source)
|
21
|
+
|
22
|
+
# Initialize counters
|
23
|
+
total_functions = 0
|
24
|
+
functions_with_http_methods = 0
|
25
|
+
functions_processed_by_llm = 0
|
26
|
+
functions_tagged = 0
|
27
|
+
llm_failures = 0
|
28
|
+
|
29
|
+
class DocstringTagAdder(ast.NodeTransformer):
|
30
|
+
def _find_http_method(self, node):
|
31
|
+
"""Find the HTTP method used in the function body."""
|
32
|
+
http_methods = []
|
33
|
+
|
34
|
+
def visit_node(n):
|
35
|
+
if (
|
36
|
+
isinstance(n, ast.Call)
|
37
|
+
and isinstance(n.func, ast.Attribute)
|
38
|
+
and isinstance(n.func.value, ast.Name)
|
39
|
+
and n.func.value.id == "self"
|
40
|
+
and n.func.attr in ["_get", "_post", "_put", "_patch", "_delete"]
|
41
|
+
):
|
42
|
+
http_methods.append(n.func.attr[1:])
|
43
|
+
for child in ast.iter_child_nodes(n):
|
44
|
+
visit_node(child)
|
45
|
+
|
46
|
+
visit_node(node)
|
47
|
+
return http_methods[0] if http_methods else None
|
48
|
+
|
49
|
+
def visit_FunctionDef(self, node):
|
50
|
+
nonlocal \
|
51
|
+
total_functions, \
|
52
|
+
functions_with_http_methods, \
|
53
|
+
functions_processed_by_llm, \
|
54
|
+
functions_tagged, \
|
55
|
+
llm_failures
|
56
|
+
|
57
|
+
total_functions += 1
|
58
|
+
print(f"\n[{total_functions}] Processing function: {node.name}")
|
59
|
+
|
60
|
+
http_method = self._find_http_method(node)
|
61
|
+
tag_to_add = None
|
62
|
+
|
63
|
+
if http_method:
|
64
|
+
functions_with_http_methods += 1
|
65
|
+
print(f" └─ Found HTTP method: {http_method.upper()}")
|
66
|
+
|
67
|
+
# Use simple agent to decide tag
|
68
|
+
print(" └─ Calling LLM to determine tag...")
|
69
|
+
tag_to_add = self._get_tag_suggestion_from_agent(node, http_method)
|
70
|
+
|
71
|
+
if tag_to_add:
|
72
|
+
functions_processed_by_llm += 1
|
73
|
+
print(f" └─ LLM suggested tags: {tag_to_add}")
|
74
|
+
else:
|
75
|
+
print(" └─ LLM failed or returned invalid response")
|
76
|
+
else:
|
77
|
+
print(" └─ No HTTP method found - skipping")
|
78
|
+
|
79
|
+
if tag_to_add:
|
80
|
+
docstring = ast.get_docstring(node, clean=False)
|
81
|
+
if docstring is not None:
|
82
|
+
# Look for Tags: section in the docstring
|
83
|
+
tags_match = re.search(r"Tags:\s*(.+)", docstring, re.DOTALL)
|
84
|
+
if tags_match:
|
85
|
+
tags_line = tags_match.group(1).strip()
|
86
|
+
# Parse existing tags
|
87
|
+
existing_tags = [tag.strip() for tag in tags_line.split(",")]
|
88
|
+
|
89
|
+
# Parse new tags to add
|
90
|
+
new_tags_to_add = [tag.strip() for tag in tag_to_add.split(",")]
|
91
|
+
tags_to_add = [tag for tag in new_tags_to_add if tag not in existing_tags]
|
92
|
+
|
93
|
+
if tags_to_add:
|
94
|
+
# Add the new tags to the existing list
|
95
|
+
new_tags_line = tags_line.rstrip() + f", {', '.join(tags_to_add)}"
|
96
|
+
new_docstring = re.sub(r"(Tags:\s*)(.+)", r"\1" + new_tags_line, docstring, flags=re.DOTALL)
|
97
|
+
# Replace docstring
|
98
|
+
if isinstance(node.body[0], ast.Expr) and isinstance(node.body[0].value, ast.Constant):
|
99
|
+
node.body[0].value.value = new_docstring
|
100
|
+
functions_tagged += 1
|
101
|
+
print(f" └─ ✅ Tags '{', '.join(tags_to_add)}' added successfully")
|
102
|
+
else:
|
103
|
+
print(f" └─ ⚠️ All tags '{tag_to_add}' already exist - skipping")
|
104
|
+
else:
|
105
|
+
print(" └─ ⚠️ No 'Tags:' section found in docstring - skipping")
|
106
|
+
else:
|
107
|
+
print(" └─ ⚠️ No docstring found - skipping")
|
108
|
+
return node
|
109
|
+
|
110
|
+
def _get_tag_suggestion_from_agent(self, node, http_method):
|
111
|
+
"""Use a simple agent to decide which tag to add based on function context."""
|
112
|
+
|
113
|
+
function_name = node.name
|
114
|
+
docstring = ast.get_docstring(node, clean=False) or ""
|
115
|
+
parameters = [arg.arg for arg in node.args.args if arg.arg != "self"]
|
116
|
+
|
117
|
+
system_prompt = """You are an expert at analyzing API functions and determining their characteristics.
|
118
|
+
|
119
|
+
Your task is to analyze each function and decide which tags to add:
|
120
|
+
- 'readOnlyHint': Tool does not modify its environment (fetching, reading, etc.)
|
121
|
+
- 'destructiveHint': Tool may perform destructive updates
|
122
|
+
- 'openWorldHint': Tool interacts with external entities (3rd party APIs)
|
123
|
+
|
124
|
+
IMPORTANT:
|
125
|
+
- HTTP method alone is NOT enough to determine the tags. You must analyze the function's actual purpose.
|
126
|
+
- Since these are all API client functions, MOST functions should have 'openWorldHint' (they interact with external APIs).
|
127
|
+
- Only functions that are purely local operations (like reading local files) should NOT have 'openWorldHint'.
|
128
|
+
|
129
|
+
Functions can have multiple tags. For example:
|
130
|
+
- A function that reads from Gmail API: 'readOnlyHint, openWorldHint'
|
131
|
+
- A function that deletes from GitHub API: 'destructiveHint, openWorldHint'
|
132
|
+
- A function that only reads local files: 'readOnlyHint' (no openWorldHint)
|
133
|
+
|
134
|
+
Respond with comma-separated tags (e.g., 'readOnlyHint, openWorldHint') or 'none' if no tags apply."""
|
135
|
+
|
136
|
+
user_prompt = f"""Analyze this API function and decide which tags to add:
|
137
|
+
|
138
|
+
Function Name: {function_name}
|
139
|
+
HTTP Method: {http_method}
|
140
|
+
Parameters: {", ".join(parameters)}
|
141
|
+
Docstring: {docstring[:1000]}...
|
142
|
+
|
143
|
+
Based on this information, which tags should this function get?
|
144
|
+
|
145
|
+
Think through:
|
146
|
+
1. What does this function actually do? (from name and docstring)
|
147
|
+
2. Does it modify its environment or just read/fetch?
|
148
|
+
3. Does it interact with external entities (3rd party APIs)?
|
149
|
+
4. Could it be potentially destructive?
|
150
|
+
|
151
|
+
GUIDELINES for readOnlyHint (does not modify environment):
|
152
|
+
- Functions that only READ or FETCH data
|
153
|
+
- Functions that VALIDATE or CHECK things without saving
|
154
|
+
- Functions that EXPORT or DOWNLOAD data
|
155
|
+
- Functions that perform HEALTH CHECKS or PING operations
|
156
|
+
- Functions that REFRESH tokens or sessions
|
157
|
+
- Functions that SEARCH or FILTER data
|
158
|
+
- Functions that GET information without changing anything
|
159
|
+
- Functions that LIST or RETRIEVE data
|
160
|
+
|
161
|
+
GUIDELINES for destructiveHint (DESTROYS or DELETES things):
|
162
|
+
- Functions that DELETE resources or data
|
163
|
+
- Functions that REMOVE or ERASE things
|
164
|
+
- Functions that DESTROY or TERMINATE resources
|
165
|
+
- Functions that CANCEL or ABORT operations
|
166
|
+
- Functions that REVOKE or INVALIDATE things
|
167
|
+
|
168
|
+
IMPORTANT:
|
169
|
+
- A function should NOT have both readOnlyHint and destructiveHint - they are mutually exclusive.
|
170
|
+
- Creating, sending, or updating things is NOT destructive - only deleting/destroying is destructive.
|
171
|
+
- Functions that CREATE, SEND, UPDATE, or MODIFY should NOT get destructiveHint.
|
172
|
+
|
173
|
+
GUIDELINES for openWorldHint (interacts with external entities):
|
174
|
+
- Functions that interact with 3rd party APIs (Gmail, Outlook, Reddit, GitHub, etc.)
|
175
|
+
- Functions that make external HTTP requests
|
176
|
+
- Functions that connect to external services
|
177
|
+
- Functions that interact with cloud services
|
178
|
+
- Functions that communicate with external databases
|
179
|
+
- Functions that call external webhooks
|
180
|
+
- MOST API client functions will have this tag since they interact with external APIs
|
181
|
+
|
182
|
+
NOT openWorldHint (local operations):
|
183
|
+
- Functions that only read local files
|
184
|
+
- Functions that process local data
|
185
|
+
- Functions that work with local databases
|
186
|
+
- Functions that manipulate local variables
|
187
|
+
- Functions that only work with local system resources
|
188
|
+
|
189
|
+
Examples:
|
190
|
+
- Gmail API read function: 'readOnlyHint, openWorldHint'
|
191
|
+
- Gmail API send email: 'openWorldHint' (not destructive, just sending)
|
192
|
+
- Gmail API create draft: 'openWorldHint' (not destructive, just creating)
|
193
|
+
- GitHub API delete repository: 'destructiveHint, openWorldHint'
|
194
|
+
- Local file reader: 'readOnlyHint' (no openWorldHint)
|
195
|
+
- Local data processor: 'none' (no tags)
|
196
|
+
|
197
|
+
Focus on the FUNCTION'S PURPOSE, not just the HTTP method.
|
198
|
+
|
199
|
+
Your answer (comma-separated tags or 'none'):"""
|
200
|
+
|
201
|
+
try:
|
202
|
+
response = litellm.completion(
|
203
|
+
model="perplexity/sonar-pro",
|
204
|
+
messages=[{"role": "system", "content": system_prompt}, {"role": "user", "content": user_prompt}],
|
205
|
+
temperature=0.1,
|
206
|
+
max_tokens=50,
|
207
|
+
)
|
208
|
+
|
209
|
+
suggested_tags = response.choices[0].message.content.strip().lower()
|
210
|
+
|
211
|
+
if suggested_tags == "none":
|
212
|
+
return None
|
213
|
+
|
214
|
+
# Parse comma-separated tags
|
215
|
+
tag_list = [tag.strip() for tag in suggested_tags.split(",")]
|
216
|
+
valid_tags = []
|
217
|
+
|
218
|
+
for tag in tag_list:
|
219
|
+
if tag == "readonlyhint":
|
220
|
+
valid_tags.append("readOnlyHint")
|
221
|
+
elif tag == "destructivehint":
|
222
|
+
valid_tags.append("destructiveHint")
|
223
|
+
elif tag == "openworldhint":
|
224
|
+
valid_tags.append("openWorldHint")
|
225
|
+
|
226
|
+
if valid_tags:
|
227
|
+
return ", ".join(valid_tags)
|
228
|
+
else:
|
229
|
+
# If LLM gives unexpected response, return None (no tag added)
|
230
|
+
return None
|
231
|
+
|
232
|
+
except Exception as e:
|
233
|
+
nonlocal llm_failures
|
234
|
+
llm_failures += 1
|
235
|
+
print(f" └─ ❌ LLM failed for function {function_name}: {e}")
|
236
|
+
# If LLM fails, return None (no tag added)
|
237
|
+
return None
|
238
|
+
|
239
|
+
new_tree = DocstringTagAdder().visit(tree)
|
240
|
+
ast.fix_missing_locations(new_tree)
|
241
|
+
new_source = ast.unparse(new_tree)
|
242
|
+
|
243
|
+
# Print summary statistics
|
244
|
+
print(f"\n{'=' * 60}")
|
245
|
+
print("📊 PROCESSING SUMMARY")
|
246
|
+
print(f"{'=' * 60}")
|
247
|
+
print(f"Total functions processed: {total_functions}")
|
248
|
+
print(f"Functions with HTTP methods: {functions_with_http_methods}")
|
249
|
+
print(f"Functions processed by LLM: {functions_processed_by_llm}")
|
250
|
+
print(f"Functions successfully tagged: {functions_tagged}")
|
251
|
+
print(f"LLM failures: {llm_failures}")
|
252
|
+
if functions_with_http_methods > 0:
|
253
|
+
print(
|
254
|
+
f"LLM success rate: {(functions_processed_by_llm / functions_with_http_methods * 100):.1f}% of HTTP functions"
|
255
|
+
)
|
256
|
+
print(f"{'=' * 60}")
|
257
|
+
|
258
|
+
# Format with Black in memory
|
259
|
+
try:
|
260
|
+
import black
|
261
|
+
|
262
|
+
formatted_content = black.format_file_contents(new_source, fast=False, mode=black.FileMode())
|
263
|
+
with open(output_path, "w", encoding="utf-8") as f:
|
264
|
+
f.write(formatted_content)
|
265
|
+
print(f"Black formatting applied successfully to: {output_path}")
|
266
|
+
except ImportError:
|
267
|
+
print(f"Black not installed. Skipping formatting for: {output_path}")
|
268
|
+
# Write unformatted version if Black is not available
|
269
|
+
with open(output_path, "w", encoding="utf-8") as f:
|
270
|
+
f.write(new_source)
|
271
|
+
except Exception as e:
|
272
|
+
print(f"Black formatting failed for {output_path}: {e}")
|
273
|
+
# Write unformatted version if Black formatting fails
|
274
|
+
with open(output_path, "w", encoding="utf-8") as f:
|
275
|
+
f.write(new_source)
|