universal-mcp 0.1.24rc2__py3-none-any.whl → 0.1.24rc4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. universal_mcp/agentr/README.md +201 -0
  2. universal_mcp/agentr/__init__.py +6 -0
  3. universal_mcp/agentr/agentr.py +30 -0
  4. universal_mcp/{utils/agentr.py → agentr/client.py} +19 -3
  5. universal_mcp/agentr/integration.py +104 -0
  6. universal_mcp/agentr/registry.py +91 -0
  7. universal_mcp/agentr/server.py +51 -0
  8. universal_mcp/agents/__init__.py +6 -0
  9. universal_mcp/agents/auto.py +576 -0
  10. universal_mcp/agents/base.py +88 -0
  11. universal_mcp/agents/cli.py +27 -0
  12. universal_mcp/agents/codeact/__init__.py +243 -0
  13. universal_mcp/agents/codeact/sandbox.py +27 -0
  14. universal_mcp/agents/codeact/test.py +15 -0
  15. universal_mcp/agents/codeact/utils.py +61 -0
  16. universal_mcp/agents/hil.py +104 -0
  17. universal_mcp/agents/llm.py +10 -0
  18. universal_mcp/agents/react.py +58 -0
  19. universal_mcp/agents/simple.py +40 -0
  20. universal_mcp/agents/utils.py +111 -0
  21. universal_mcp/analytics.py +5 -7
  22. universal_mcp/applications/__init__.py +42 -75
  23. universal_mcp/applications/application.py +1 -1
  24. universal_mcp/applications/sample/app.py +245 -0
  25. universal_mcp/cli.py +10 -3
  26. universal_mcp/config.py +33 -7
  27. universal_mcp/exceptions.py +4 -0
  28. universal_mcp/integrations/__init__.py +0 -15
  29. universal_mcp/integrations/integration.py +9 -91
  30. universal_mcp/servers/__init__.py +2 -14
  31. universal_mcp/servers/server.py +10 -51
  32. universal_mcp/tools/__init__.py +3 -0
  33. universal_mcp/tools/adapters.py +20 -11
  34. universal_mcp/tools/manager.py +29 -56
  35. universal_mcp/tools/registry.py +41 -0
  36. universal_mcp/tools/tools.py +22 -1
  37. universal_mcp/types.py +10 -0
  38. universal_mcp/utils/common.py +245 -0
  39. universal_mcp/utils/openapi/api_generator.py +46 -18
  40. universal_mcp/utils/openapi/cli.py +445 -19
  41. universal_mcp/utils/openapi/openapi.py +284 -21
  42. universal_mcp/utils/openapi/postprocessor.py +275 -0
  43. universal_mcp/utils/openapi/preprocessor.py +1 -1
  44. universal_mcp/utils/openapi/test_generator.py +287 -0
  45. universal_mcp/utils/prompts.py +188 -341
  46. universal_mcp/utils/testing.py +190 -2
  47. {universal_mcp-0.1.24rc2.dist-info → universal_mcp-0.1.24rc4.dist-info}/METADATA +17 -3
  48. universal_mcp-0.1.24rc4.dist-info/RECORD +71 -0
  49. universal_mcp/applications/sample_tool_app.py +0 -80
  50. universal_mcp/client/agents/__init__.py +0 -4
  51. universal_mcp/client/agents/base.py +0 -38
  52. universal_mcp/client/agents/llm.py +0 -115
  53. universal_mcp/client/agents/react.py +0 -67
  54. universal_mcp/client/cli.py +0 -181
  55. universal_mcp-0.1.24rc2.dist-info/RECORD +0 -53
  56. {universal_mcp-0.1.24rc2.dist-info → universal_mcp-0.1.24rc4.dist-info}/WHEEL +0 -0
  57. {universal_mcp-0.1.24rc2.dist-info → universal_mcp-0.1.24rc4.dist-info}/entry_points.txt +0 -0
  58. {universal_mcp-0.1.24rc2.dist-info → universal_mcp-0.1.24rc4.dist-info}/licenses/LICENSE +0 -0
@@ -1,3 +1,4 @@
1
+ import hashlib
1
2
  import json
2
3
  import re
3
4
  import textwrap
@@ -11,6 +12,177 @@ from pydantic import BaseModel
11
12
 
12
13
  from .filters import load_filter_config, should_process_operation
13
14
 
15
+ # Schema registry for tracking unique response schemas to avoid duplicates
16
+ _schema_registry: dict[str, str] = {} # schema_hash -> model_class_name
17
+ _generated_models: dict[str, str] = {} # model_class_name -> model_source_code
18
+
19
+
20
+ def _get_schema_hash(schema: dict[str, Any]) -> str:
21
+ """Generate a hash for a schema to identify unique schemas."""
22
+ try:
23
+ schema_str = json.dumps(schema, sort_keys=True, default=str)
24
+ return hashlib.md5(schema_str.encode()).hexdigest()[:8]
25
+ except (TypeError, ValueError):
26
+ # Fallback to string representation if JSON serialization fails
27
+ schema_str = str(sorted(schema.items())) if isinstance(schema, dict) else str(schema)
28
+ return hashlib.md5(schema_str.encode()).hexdigest()[:8]
29
+
30
+
31
+ def _generate_model_name(operation: dict[str, Any], path: str, method: str, schema: dict[str, Any]) -> str:
32
+ """Generate a meaningful model name for a response schema."""
33
+ if "title" in schema:
34
+ name = schema["title"]
35
+ else:
36
+ # Generate name from operation info
37
+ if "operationId" in operation:
38
+ name = operation["operationId"] + "Response"
39
+ else:
40
+ # Generate from path and method
41
+ path_parts = [
42
+ part for part in path.strip("/").split("/") if not (part.startswith("{") and part.endswith("}"))
43
+ ]
44
+ if path_parts:
45
+ name = f"{method.capitalize()}{path_parts[-1].capitalize()}Response"
46
+ else:
47
+ name = f"{method.capitalize()}Response"
48
+
49
+ name = "".join(word.capitalize() for word in re.split(r"[^a-zA-Z0-9]", name) if word)
50
+
51
+ if name and name[0].isdigit():
52
+ name = "Response" + name
53
+
54
+ return name or "Response"
55
+
56
+
57
+ def _generate_response_model_class(schema: dict[str, Any], model_name: str) -> str:
58
+ """Generate Pydantic model source code from OpenAPI response schema."""
59
+ if not schema:
60
+ return ""
61
+
62
+ # Handle array responses
63
+ if schema.get("type") == "array":
64
+ items_schema = schema.get("items", {})
65
+ if items_schema and (items_schema.get("properties") or items_schema.get("type") == "object"):
66
+ # Generate model for array items if it's an object
67
+ item_model_name = f"{model_name}Item"
68
+ item_model_code = _generate_response_model_class(items_schema, item_model_name)
69
+
70
+ # Create collection model
71
+ collection_model = f"""
72
+ class {model_name}(BaseModel):
73
+ value: List[{item_model_name}]
74
+ """
75
+ return item_model_code + collection_model
76
+ else:
77
+ # Fallback for arrays with simple items or no schema
78
+ item_type = "Any"
79
+ if items_schema:
80
+ if items_schema.get("type") == "string":
81
+ item_type = "str"
82
+ elif items_schema.get("type") == "integer":
83
+ item_type = "int"
84
+ elif items_schema.get("type") == "number":
85
+ item_type = "float"
86
+ elif items_schema.get("type") == "boolean":
87
+ item_type = "bool"
88
+
89
+ return f"""
90
+ class {model_name}(BaseModel):
91
+ value: List[{item_type}]
92
+ """
93
+
94
+ # Handle object responses
95
+ if schema.get("type") == "object" or "properties" in schema:
96
+ properties, required_fields = _extract_properties_from_schema(schema)
97
+
98
+ if not properties:
99
+ return f"""
100
+ class {model_name}(BaseModel):
101
+ pass
102
+ """
103
+
104
+ field_definitions = []
105
+ for prop_name, prop_schema in properties.items():
106
+ field_name = _sanitize_identifier(prop_name)
107
+ is_required = prop_name in required_fields
108
+
109
+ # Handle arrays with object items specially
110
+ if prop_schema.get("type") == "array" and prop_schema.get("items", {}).get("properties"):
111
+ # Generate a model for the array items
112
+ item_model_name = f"{model_name}{field_name.capitalize()}Item"
113
+ items_schema = prop_schema.get("items", {})
114
+
115
+ # Generate the item model and store it globally
116
+ item_model_code = _generate_response_model_class(items_schema, item_model_name)
117
+ if item_model_code and item_model_name not in _generated_models:
118
+ _generated_models[item_model_name] = item_model_code
119
+
120
+ python_type = f"List[{item_model_name}]" if is_required else f"Optional[List[{item_model_name}]]"
121
+ else:
122
+ python_type = _openapi_type_to_python_type(prop_schema, required=is_required)
123
+
124
+ # Handle field aliases for special characters like @odata.context
125
+ if prop_name != field_name or prop_name.startswith("@"):
126
+ if is_required:
127
+ field_definitions.append(f" {field_name}: {python_type} = Field(alias='{prop_name}')")
128
+ else:
129
+ field_definitions.append(f" {field_name}: {python_type} = Field(None, alias='{prop_name}')")
130
+ else:
131
+ if is_required:
132
+ field_definitions.append(f" {field_name}: {python_type}")
133
+ else:
134
+ field_definitions.append(f" {field_name}: {python_type} = None")
135
+
136
+ model_code = f"""
137
+ class {model_name}(BaseModel):
138
+ {chr(10).join(field_definitions)}
139
+ """
140
+ return model_code
141
+
142
+ # Fallback for other schema types
143
+ return ""
144
+
145
+
146
+ def _get_or_create_response_model(
147
+ operation: dict[str, Any], path: str, method: str, schema: dict[str, Any]
148
+ ) -> str | None:
149
+ """Get or create a response model for a given schema, avoiding duplicates."""
150
+ if not schema:
151
+ return None
152
+
153
+ try:
154
+ # Generate hash for this schema
155
+ schema_hash = _get_schema_hash(schema)
156
+
157
+ # Check if we already have a model for this schema
158
+ if schema_hash in _schema_registry:
159
+ return _schema_registry[schema_hash]
160
+
161
+ # Generate new model
162
+ model_name = _generate_model_name(operation, path, method, schema)
163
+
164
+ # Ensure unique model name
165
+ base_name = model_name
166
+ counter = 1
167
+ while model_name in _generated_models:
168
+ model_name = f"{base_name}{counter}"
169
+ counter += 1
170
+
171
+ # Generate model source code
172
+ model_code = _generate_response_model_class(schema, model_name)
173
+
174
+ if model_code:
175
+ # Register the model
176
+ _schema_registry[schema_hash] = model_name
177
+ _generated_models[model_name] = model_code
178
+ return model_name
179
+
180
+ except Exception as e:
181
+ # If model generation fails, log and continue with fallback
182
+ print(f"Warning: Could not generate model for {method.upper()} {path}: {e}")
183
+
184
+ return None
185
+
14
186
 
15
187
  class Parameters(BaseModel):
16
188
  name: str
@@ -220,15 +392,20 @@ def _load_and_resolve_references(path: Path):
220
392
  return replace_refs(schema)
221
393
 
222
394
 
223
- def _determine_return_type(operation: dict[str, Any]) -> str:
395
+ def _determine_return_type(operation: dict[str, Any], path: str, method: str) -> str:
224
396
  """
225
397
  Determine the return type from the response schema.
226
398
 
399
+ Now generates specific Pydantic model classes for response schemas where possible,
400
+ falling back to generic types for complex or missing schemas.
401
+
227
402
  Args:
228
403
  operation (dict): The operation details from the schema.
404
+ path (str): The API path (e.g., '/users/{user_id}').
405
+ method (str): The HTTP method (e.g., 'get').
229
406
 
230
407
  Returns:
231
- str: The appropriate return type annotation (list[Any], dict[str, Any], or Any)
408
+ str: The appropriate return type annotation (specific model class name or generic type)
232
409
  """
233
410
  responses = operation.get("responses", {})
234
411
  # Find successful response (2XX)
@@ -247,7 +424,12 @@ def _determine_return_type(operation: dict[str, Any]) -> str:
247
424
  if content_type.startswith("application/json") and "schema" in content_info:
248
425
  schema = content_info["schema"]
249
426
 
250
- # Only determine if it's a list, dict, or unknown (Any)
427
+ # generate a specific model class for this schema
428
+ model_name = _get_or_create_response_model(operation, path, method, schema)
429
+
430
+ if model_name:
431
+ return model_name
432
+
251
433
  if schema.get("type") == "array":
252
434
  return "list[Any]"
253
435
  elif schema.get("type") == "object" or "$ref" in schema:
@@ -536,7 +718,7 @@ def _generate_method_code(path, method, operation):
536
718
  # --- End Alias duplicate parameter names ---
537
719
 
538
720
  # --- Determine Return Type and Body Characteristics ---
539
- return_type = _determine_return_type(operation)
721
+ return_type = _determine_return_type(operation, path, method)
540
722
 
541
723
  body_required = has_body and operation["requestBody"].get("required", False) # Remains useful
542
724
 
@@ -751,7 +933,7 @@ def _generate_method_code(path, method, operation):
751
933
  # openapi_path_comment_for_docstring = f"# openapi_path: {path}"
752
934
  # docstring_parts.append(openapi_path_comment_for_docstring)
753
935
 
754
- return_type = _determine_return_type(operation)
936
+ return_type = _determine_return_type(operation, path, method)
755
937
 
756
938
  # Summary
757
939
  summary = operation.get("summary", "").strip()
@@ -978,9 +1160,15 @@ def _generate_method_code(path, method, operation):
978
1160
  # using the prepared URL, query parameters, request body data, files, and content type.
979
1161
  # Use convenience methods that automatically handle responses and errors
980
1162
 
1163
+ # Determine the appropriate return statement based on return type
1164
+ if return_type in ["Any", "dict[str, Any]", "list[Any]"]:
1165
+ return_statement = " return self._handle_response(response)"
1166
+ else:
1167
+ return_statement = f" return {return_type}.model_validate(self._handle_response(response))"
1168
+
981
1169
  if method_lower == "get":
982
1170
  body_lines.append(" response = self._get(url, params=query_params)")
983
- body_lines.append(" return self._handle_response(response)")
1171
+ body_lines.append(return_statement)
984
1172
  elif method_lower == "post":
985
1173
  if selected_content_type == "multipart/form-data":
986
1174
  body_lines.append(
@@ -990,7 +1178,7 @@ def _generate_method_code(path, method, operation):
990
1178
  body_lines.append(
991
1179
  f" response = self._post(url, data=request_body_data, params=query_params, content_type='{final_content_type_for_api_call}')"
992
1180
  )
993
- body_lines.append(" return self._handle_response(response)")
1181
+ body_lines.append(return_statement)
994
1182
  elif method_lower == "put":
995
1183
  if selected_content_type == "multipart/form-data":
996
1184
  body_lines.append(
@@ -1000,16 +1188,16 @@ def _generate_method_code(path, method, operation):
1000
1188
  body_lines.append(
1001
1189
  f" response = self._put(url, data=request_body_data, params=query_params, content_type='{final_content_type_for_api_call}')"
1002
1190
  )
1003
- body_lines.append(" return self._handle_response(response)")
1191
+ body_lines.append(return_statement)
1004
1192
  elif method_lower == "patch":
1005
1193
  body_lines.append(" response = self._patch(url, data=request_body_data, params=query_params)")
1006
- body_lines.append(" return self._handle_response(response)")
1194
+ body_lines.append(return_statement)
1007
1195
  elif method_lower == "delete":
1008
1196
  body_lines.append(" response = self._delete(url, params=query_params)")
1009
- body_lines.append(" return self._handle_response(response)")
1197
+ body_lines.append(return_statement)
1010
1198
  else:
1011
1199
  body_lines.append(f" response = self._{method_lower}(url, data=request_body_data, params=query_params)")
1012
- body_lines.append(" return self._handle_response(response)")
1200
+ body_lines.append(return_statement)
1013
1201
 
1014
1202
  # --- Combine Signature, Docstring, and Body for Final Method Code ---
1015
1203
  method_code = signature + formatted_docstring + "\n" + "\n".join(body_lines)
@@ -1020,9 +1208,69 @@ def load_schema(path: Path):
1020
1208
  return _load_and_resolve_references(path)
1021
1209
 
1022
1210
 
1211
+ def generate_schemas_file(schema, class_name: str | None = None, filter_config_path: str | None = None):
1212
+ """
1213
+ Generate a Python file containing only the response schema classes from an OpenAPI schema.
1214
+
1215
+ Args:
1216
+ schema (dict): The OpenAPI schema as a dictionary.
1217
+ class_name (str | None): Optional class name for context.
1218
+ filter_config_path (str | None): Optional path to JSON filter configuration file.
1219
+
1220
+ Returns:
1221
+ str: A string containing the Python code for the response schema classes.
1222
+ """
1223
+ global _schema_registry, _generated_models
1224
+ _schema_registry.clear()
1225
+ _generated_models.clear()
1226
+
1227
+ # Load filter configuration if provided
1228
+ filter_config = None
1229
+ if filter_config_path:
1230
+ filter_config = load_filter_config(filter_config_path)
1231
+
1232
+ # Generate response models by processing all operations
1233
+ for path, path_info in schema.get("paths", {}).items():
1234
+ for method in path_info:
1235
+ if method in ["get", "post", "put", "delete", "patch", "options", "head"]:
1236
+ # Apply filter configuration
1237
+ if not should_process_operation(path, method, filter_config):
1238
+ continue
1239
+
1240
+ operation = path_info[method]
1241
+ # Generate response model for this operation
1242
+ _determine_return_type(operation, path, method)
1243
+
1244
+ # Generate the schemas file content
1245
+ imports = [
1246
+ "from typing import Any, Optional, List",
1247
+ "from pydantic import BaseModel, Field",
1248
+ ]
1249
+
1250
+ imports_section = "\n".join(imports)
1251
+ models_section = "\n".join(_generated_models.values()) if _generated_models else ""
1252
+
1253
+ if not models_section:
1254
+ # If no models were generated, create a minimal file
1255
+ schemas_code = f"""{imports_section}
1256
+
1257
+ # No response models were generated for this OpenAPI schema
1258
+ """
1259
+ else:
1260
+ schemas_code = f"""{imports_section}
1261
+
1262
+ # Generated Response Models
1263
+
1264
+ {models_section}
1265
+ """
1266
+
1267
+ return schemas_code
1268
+
1269
+
1023
1270
  def generate_api_client(schema, class_name: str | None = None, filter_config_path: str | None = None):
1024
1271
  """
1025
1272
  Generate a Python API client class from an OpenAPI schema.
1273
+ Models are not included - they should be generated separately using generate_schemas_file.
1026
1274
 
1027
1275
  Args:
1028
1276
  schema (dict): The OpenAPI schema as a dictionary.
@@ -1032,6 +1280,10 @@ def generate_api_client(schema, class_name: str | None = None, filter_config_pat
1032
1280
  Returns:
1033
1281
  str: A string containing the Python code for the API client class.
1034
1282
  """
1283
+ global _schema_registry, _generated_models
1284
+ _schema_registry.clear()
1285
+ _generated_models.clear()
1286
+
1035
1287
  # Load filter configuration if provided
1036
1288
  filter_config = None
1037
1289
  if filter_config_path:
@@ -1057,7 +1309,7 @@ def generate_api_client(schema, class_name: str | None = None, filter_config_pat
1057
1309
  if api_title:
1058
1310
  # Convert API title to a clean class name
1059
1311
  if class_name:
1060
- clean_name = class_name.capitalize()[:-3] if class_name.endswith("App") else class_name.capitalize()
1312
+ clean_name = class_name[:-3] if class_name.endswith("App") else class_name.capitalize()
1061
1313
  else:
1062
1314
  base_name = "".join(word.capitalize() for word in api_title.split())
1063
1315
  clean_name = "".join(c for c in base_name if c.isalnum())
@@ -1114,21 +1366,32 @@ def generate_api_client(schema, class_name: str | None = None, filter_config_pat
1114
1366
  {tools_list}
1115
1367
  ]"""
1116
1368
 
1117
- # Generate class imports
1369
+ # Generate class imports - import from separate schemas file
1118
1370
  imports = [
1119
1371
  "from typing import Any, Optional, List",
1120
1372
  "from universal_mcp.applications import APIApplication",
1121
1373
  "from universal_mcp.integrations import Integration",
1374
+ "from .schemas import *",
1122
1375
  ]
1123
1376
 
1124
- # Construct the class code
1125
- class_code = (
1126
- "\n".join(imports) + "\n\n"
1127
- f"class {class_name}(APIApplication):\n"
1128
- f" def __init__(self, integration: Integration = None, **kwargs) -> None:\n"
1129
- f" super().__init__(name='{class_name.lower()}', integration=integration, **kwargs)\n"
1130
- f' self.base_url = "{base_url}"\n\n' + "\n\n".join(methods) + "\n\n" + list_tools_method + "\n"
1131
- )
1377
+ # Construct the class code (no model classes since they're in separate file)
1378
+ imports_section = "\n".join(imports)
1379
+
1380
+ class_code_parts = [
1381
+ imports_section,
1382
+ "",
1383
+ f"class {class_name}(APIApplication):",
1384
+ " def __init__(self, integration: Integration = None, **kwargs) -> None:",
1385
+ f" super().__init__(name='{class_name.lower()}', integration=integration, **kwargs)",
1386
+ f' self.base_url = "{base_url}"',
1387
+ "",
1388
+ "\n\n".join(methods),
1389
+ "",
1390
+ list_tools_method,
1391
+ "",
1392
+ ]
1393
+
1394
+ class_code = "\n".join(class_code_parts)
1132
1395
  return class_code
1133
1396
 
1134
1397
 
@@ -0,0 +1,275 @@
1
+ import ast
2
+ import re
3
+
4
+ import litellm
5
+
6
+
7
+ def add_hint_tags_to_docstrings(input_path: str, output_path: str):
8
+ """
9
+ Reads a Python API client file, inspects each function, and adds appropriate tags to the docstring:
10
+ - 'readOnlyHint': Tool does not modify its environment (fetching, reading, etc.)
11
+ - 'destructiveHint': Tool may perform destructive updates
12
+ - 'openWorldHint': Tool interacts with external entities (3rd party APIs)
13
+
14
+ Functions can have multiple tags (e.g., 'readOnlyHint, openWorldHint').
15
+ Does not alter other tags in the docstring.
16
+ Writes the modified code to output_path.
17
+ """
18
+ with open(input_path, encoding="utf-8") as f:
19
+ source = f.read()
20
+ tree = ast.parse(source)
21
+
22
+ # Initialize counters
23
+ total_functions = 0
24
+ functions_with_http_methods = 0
25
+ functions_processed_by_llm = 0
26
+ functions_tagged = 0
27
+ llm_failures = 0
28
+
29
+ class DocstringTagAdder(ast.NodeTransformer):
30
+ def _find_http_method(self, node):
31
+ """Find the HTTP method used in the function body."""
32
+ http_methods = []
33
+
34
+ def visit_node(n):
35
+ if (
36
+ isinstance(n, ast.Call)
37
+ and isinstance(n.func, ast.Attribute)
38
+ and isinstance(n.func.value, ast.Name)
39
+ and n.func.value.id == "self"
40
+ and n.func.attr in ["_get", "_post", "_put", "_patch", "_delete"]
41
+ ):
42
+ http_methods.append(n.func.attr[1:])
43
+ for child in ast.iter_child_nodes(n):
44
+ visit_node(child)
45
+
46
+ visit_node(node)
47
+ return http_methods[0] if http_methods else None
48
+
49
+ def visit_FunctionDef(self, node):
50
+ nonlocal \
51
+ total_functions, \
52
+ functions_with_http_methods, \
53
+ functions_processed_by_llm, \
54
+ functions_tagged, \
55
+ llm_failures
56
+
57
+ total_functions += 1
58
+ print(f"\n[{total_functions}] Processing function: {node.name}")
59
+
60
+ http_method = self._find_http_method(node)
61
+ tag_to_add = None
62
+
63
+ if http_method:
64
+ functions_with_http_methods += 1
65
+ print(f" └─ Found HTTP method: {http_method.upper()}")
66
+
67
+ # Use simple agent to decide tag
68
+ print(" └─ Calling LLM to determine tag...")
69
+ tag_to_add = self._get_tag_suggestion_from_agent(node, http_method)
70
+
71
+ if tag_to_add:
72
+ functions_processed_by_llm += 1
73
+ print(f" └─ LLM suggested tags: {tag_to_add}")
74
+ else:
75
+ print(" └─ LLM failed or returned invalid response")
76
+ else:
77
+ print(" └─ No HTTP method found - skipping")
78
+
79
+ if tag_to_add:
80
+ docstring = ast.get_docstring(node, clean=False)
81
+ if docstring is not None:
82
+ # Look for Tags: section in the docstring
83
+ tags_match = re.search(r"Tags:\s*(.+)", docstring, re.DOTALL)
84
+ if tags_match:
85
+ tags_line = tags_match.group(1).strip()
86
+ # Parse existing tags
87
+ existing_tags = [tag.strip() for tag in tags_line.split(",")]
88
+
89
+ # Parse new tags to add
90
+ new_tags_to_add = [tag.strip() for tag in tag_to_add.split(",")]
91
+ tags_to_add = [tag for tag in new_tags_to_add if tag not in existing_tags]
92
+
93
+ if tags_to_add:
94
+ # Add the new tags to the existing list
95
+ new_tags_line = tags_line.rstrip() + f", {', '.join(tags_to_add)}"
96
+ new_docstring = re.sub(r"(Tags:\s*)(.+)", r"\1" + new_tags_line, docstring, flags=re.DOTALL)
97
+ # Replace docstring
98
+ if isinstance(node.body[0], ast.Expr) and isinstance(node.body[0].value, ast.Constant):
99
+ node.body[0].value.value = new_docstring
100
+ functions_tagged += 1
101
+ print(f" └─ ✅ Tags '{', '.join(tags_to_add)}' added successfully")
102
+ else:
103
+ print(f" └─ ⚠️ All tags '{tag_to_add}' already exist - skipping")
104
+ else:
105
+ print(" └─ ⚠️ No 'Tags:' section found in docstring - skipping")
106
+ else:
107
+ print(" └─ ⚠️ No docstring found - skipping")
108
+ return node
109
+
110
+ def _get_tag_suggestion_from_agent(self, node, http_method):
111
+ """Use a simple agent to decide which tag to add based on function context."""
112
+
113
+ function_name = node.name
114
+ docstring = ast.get_docstring(node, clean=False) or ""
115
+ parameters = [arg.arg for arg in node.args.args if arg.arg != "self"]
116
+
117
+ system_prompt = """You are an expert at analyzing API functions and determining their characteristics.
118
+
119
+ Your task is to analyze each function and decide which tags to add:
120
+ - 'readOnlyHint': Tool does not modify its environment (fetching, reading, etc.)
121
+ - 'destructiveHint': Tool may perform destructive updates
122
+ - 'openWorldHint': Tool interacts with external entities (3rd party APIs)
123
+
124
+ IMPORTANT:
125
+ - HTTP method alone is NOT enough to determine the tags. You must analyze the function's actual purpose.
126
+ - Since these are all API client functions, MOST functions should have 'openWorldHint' (they interact with external APIs).
127
+ - Only functions that are purely local operations (like reading local files) should NOT have 'openWorldHint'.
128
+
129
+ Functions can have multiple tags. For example:
130
+ - A function that reads from Gmail API: 'readOnlyHint, openWorldHint'
131
+ - A function that deletes from GitHub API: 'destructiveHint, openWorldHint'
132
+ - A function that only reads local files: 'readOnlyHint' (no openWorldHint)
133
+
134
+ Respond with comma-separated tags (e.g., 'readOnlyHint, openWorldHint') or 'none' if no tags apply."""
135
+
136
+ user_prompt = f"""Analyze this API function and decide which tags to add:
137
+
138
+ Function Name: {function_name}
139
+ HTTP Method: {http_method}
140
+ Parameters: {", ".join(parameters)}
141
+ Docstring: {docstring[:1000]}...
142
+
143
+ Based on this information, which tags should this function get?
144
+
145
+ Think through:
146
+ 1. What does this function actually do? (from name and docstring)
147
+ 2. Does it modify its environment or just read/fetch?
148
+ 3. Does it interact with external entities (3rd party APIs)?
149
+ 4. Could it be potentially destructive?
150
+
151
+ GUIDELINES for readOnlyHint (does not modify environment):
152
+ - Functions that only READ or FETCH data
153
+ - Functions that VALIDATE or CHECK things without saving
154
+ - Functions that EXPORT or DOWNLOAD data
155
+ - Functions that perform HEALTH CHECKS or PING operations
156
+ - Functions that REFRESH tokens or sessions
157
+ - Functions that SEARCH or FILTER data
158
+ - Functions that GET information without changing anything
159
+ - Functions that LIST or RETRIEVE data
160
+
161
+ GUIDELINES for destructiveHint (DESTROYS or DELETES things):
162
+ - Functions that DELETE resources or data
163
+ - Functions that REMOVE or ERASE things
164
+ - Functions that DESTROY or TERMINATE resources
165
+ - Functions that CANCEL or ABORT operations
166
+ - Functions that REVOKE or INVALIDATE things
167
+
168
+ IMPORTANT:
169
+ - A function should NOT have both readOnlyHint and destructiveHint - they are mutually exclusive.
170
+ - Creating, sending, or updating things is NOT destructive - only deleting/destroying is destructive.
171
+ - Functions that CREATE, SEND, UPDATE, or MODIFY should NOT get destructiveHint.
172
+
173
+ GUIDELINES for openWorldHint (interacts with external entities):
174
+ - Functions that interact with 3rd party APIs (Gmail, Outlook, Reddit, GitHub, etc.)
175
+ - Functions that make external HTTP requests
176
+ - Functions that connect to external services
177
+ - Functions that interact with cloud services
178
+ - Functions that communicate with external databases
179
+ - Functions that call external webhooks
180
+ - MOST API client functions will have this tag since they interact with external APIs
181
+
182
+ NOT openWorldHint (local operations):
183
+ - Functions that only read local files
184
+ - Functions that process local data
185
+ - Functions that work with local databases
186
+ - Functions that manipulate local variables
187
+ - Functions that only work with local system resources
188
+
189
+ Examples:
190
+ - Gmail API read function: 'readOnlyHint, openWorldHint'
191
+ - Gmail API send email: 'openWorldHint' (not destructive, just sending)
192
+ - Gmail API create draft: 'openWorldHint' (not destructive, just creating)
193
+ - GitHub API delete repository: 'destructiveHint, openWorldHint'
194
+ - Local file reader: 'readOnlyHint' (no openWorldHint)
195
+ - Local data processor: 'none' (no tags)
196
+
197
+ Focus on the FUNCTION'S PURPOSE, not just the HTTP method.
198
+
199
+ Your answer (comma-separated tags or 'none'):"""
200
+
201
+ try:
202
+ response = litellm.completion(
203
+ model="perplexity/sonar-pro",
204
+ messages=[{"role": "system", "content": system_prompt}, {"role": "user", "content": user_prompt}],
205
+ temperature=0.1,
206
+ max_tokens=50,
207
+ )
208
+
209
+ suggested_tags = response.choices[0].message.content.strip().lower()
210
+
211
+ if suggested_tags == "none":
212
+ return None
213
+
214
+ # Parse comma-separated tags
215
+ tag_list = [tag.strip() for tag in suggested_tags.split(",")]
216
+ valid_tags = []
217
+
218
+ for tag in tag_list:
219
+ if tag == "readonlyhint":
220
+ valid_tags.append("readOnlyHint")
221
+ elif tag == "destructivehint":
222
+ valid_tags.append("destructiveHint")
223
+ elif tag == "openworldhint":
224
+ valid_tags.append("openWorldHint")
225
+
226
+ if valid_tags:
227
+ return ", ".join(valid_tags)
228
+ else:
229
+ # If LLM gives unexpected response, return None (no tag added)
230
+ return None
231
+
232
+ except Exception as e:
233
+ nonlocal llm_failures
234
+ llm_failures += 1
235
+ print(f" └─ ❌ LLM failed for function {function_name}: {e}")
236
+ # If LLM fails, return None (no tag added)
237
+ return None
238
+
239
+ new_tree = DocstringTagAdder().visit(tree)
240
+ ast.fix_missing_locations(new_tree)
241
+ new_source = ast.unparse(new_tree)
242
+
243
+ # Print summary statistics
244
+ print(f"\n{'=' * 60}")
245
+ print("📊 PROCESSING SUMMARY")
246
+ print(f"{'=' * 60}")
247
+ print(f"Total functions processed: {total_functions}")
248
+ print(f"Functions with HTTP methods: {functions_with_http_methods}")
249
+ print(f"Functions processed by LLM: {functions_processed_by_llm}")
250
+ print(f"Functions successfully tagged: {functions_tagged}")
251
+ print(f"LLM failures: {llm_failures}")
252
+ if functions_with_http_methods > 0:
253
+ print(
254
+ f"LLM success rate: {(functions_processed_by_llm / functions_with_http_methods * 100):.1f}% of HTTP functions"
255
+ )
256
+ print(f"{'=' * 60}")
257
+
258
+ # Format with Black in memory
259
+ try:
260
+ import black
261
+
262
+ formatted_content = black.format_file_contents(new_source, fast=False, mode=black.FileMode())
263
+ with open(output_path, "w", encoding="utf-8") as f:
264
+ f.write(formatted_content)
265
+ print(f"Black formatting applied successfully to: {output_path}")
266
+ except ImportError:
267
+ print(f"Black not installed. Skipping formatting for: {output_path}")
268
+ # Write unformatted version if Black is not available
269
+ with open(output_path, "w", encoding="utf-8") as f:
270
+ f.write(new_source)
271
+ except Exception as e:
272
+ print(f"Black formatting failed for {output_path}: {e}")
273
+ # Write unformatted version if Black formatting fails
274
+ with open(output_path, "w", encoding="utf-8") as f:
275
+ f.write(new_source)