universal-mcp 0.1.8rc1__py3-none-any.whl → 0.1.8rc2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- universal_mcp/applications/application.py +6 -5
- universal_mcp/applications/calendly/README.md +78 -0
- universal_mcp/applications/calendly/app.py +954 -0
- universal_mcp/applications/e2b/app.py +18 -12
- universal_mcp/applications/firecrawl/app.py +28 -1
- universal_mcp/applications/github/app.py +150 -107
- universal_mcp/applications/google_calendar/app.py +72 -137
- universal_mcp/applications/google_docs/app.py +35 -15
- universal_mcp/applications/google_drive/app.py +84 -55
- universal_mcp/applications/google_mail/app.py +143 -53
- universal_mcp/applications/google_sheet/app.py +61 -38
- universal_mcp/applications/markitdown/app.py +12 -11
- universal_mcp/applications/notion/app.py +199 -89
- universal_mcp/applications/perplexity/app.py +17 -15
- universal_mcp/applications/reddit/app.py +110 -101
- universal_mcp/applications/resend/app.py +14 -7
- universal_mcp/applications/serpapi/app.py +13 -6
- universal_mcp/applications/tavily/app.py +13 -10
- universal_mcp/applications/wrike/README.md +71 -0
- universal_mcp/applications/wrike/__init__.py +0 -0
- universal_mcp/applications/wrike/app.py +1044 -0
- universal_mcp/applications/youtube/README.md +82 -0
- universal_mcp/applications/youtube/__init__.py +0 -0
- universal_mcp/applications/youtube/app.py +986 -0
- universal_mcp/applications/zenquotes/app.py +13 -3
- universal_mcp/exceptions.py +8 -2
- universal_mcp/integrations/__init__.py +15 -1
- universal_mcp/integrations/integration.py +132 -27
- universal_mcp/servers/__init__.py +6 -15
- universal_mcp/servers/server.py +209 -153
- universal_mcp/stores/__init__.py +7 -2
- universal_mcp/stores/store.py +103 -42
- universal_mcp/tools/__init__.py +3 -0
- universal_mcp/tools/adapters.py +40 -0
- universal_mcp/tools/func_metadata.py +214 -0
- universal_mcp/tools/tools.py +285 -0
- universal_mcp/utils/docgen.py +277 -123
- universal_mcp/utils/docstring_parser.py +156 -0
- universal_mcp/utils/openapi.py +149 -40
- {universal_mcp-0.1.8rc1.dist-info → universal_mcp-0.1.8rc2.dist-info}/METADATA +7 -3
- universal_mcp-0.1.8rc2.dist-info/RECORD +71 -0
- universal_mcp-0.1.8rc1.dist-info/RECORD +0 -58
- /universal_mcp/{utils/bridge.py → applications/calendly/__init__.py} +0 -0
- {universal_mcp-0.1.8rc1.dist-info → universal_mcp-0.1.8rc2.dist-info}/WHEEL +0 -0
- {universal_mcp-0.1.8rc1.dist-info → universal_mcp-0.1.8rc2.dist-info}/entry_points.txt +0 -0
universal_mcp/utils/docgen.py
CHANGED
@@ -5,9 +5,14 @@ using LLMs with structured output
|
|
5
5
|
"""
|
6
6
|
|
7
7
|
import ast
|
8
|
+
import json
|
8
9
|
import os
|
10
|
+
import sys
|
11
|
+
import textwrap
|
12
|
+
import traceback
|
9
13
|
|
10
14
|
import litellm
|
15
|
+
import re
|
11
16
|
from pydantic import BaseModel, Field
|
12
17
|
|
13
18
|
|
@@ -21,6 +26,14 @@ class DocstringOutput(BaseModel):
|
|
21
26
|
description="Dictionary mapping parameter names to their descriptions"
|
22
27
|
)
|
23
28
|
returns: str = Field(description="Description of what the function returns")
|
29
|
+
raises: dict[str, str] = Field(
|
30
|
+
default_factory=dict,
|
31
|
+
description="Dictionary mapping potential exception types/reasons to their descriptions"
|
32
|
+
)
|
33
|
+
tags: list[str] = Field(
|
34
|
+
default_factory=list,
|
35
|
+
description="List of relevant tags for the function (e.g., action, job type, async status, importance)"
|
36
|
+
)
|
24
37
|
|
25
38
|
|
26
39
|
class FunctionExtractor(ast.NodeVisitor):
|
@@ -47,17 +60,23 @@ class FunctionExtractor(ast.NodeVisitor):
|
|
47
60
|
return None
|
48
61
|
|
49
62
|
def visit_FunctionDef(self, node: ast.FunctionDef):
|
50
|
-
"""Visits a regular function definition."""
|
51
|
-
|
52
|
-
if
|
53
|
-
self.
|
63
|
+
"""Visits a regular function definition and collects it if not excluded."""
|
64
|
+
# Add the exclusion logic here
|
65
|
+
if not node.name.startswith('_') and node.name != 'list_tools':
|
66
|
+
source_code = self._get_source_segment(node)
|
67
|
+
if source_code:
|
68
|
+
self.functions.append((node.name, source_code))
|
69
|
+
# Continue traversing the AST for nested functions/classes
|
54
70
|
self.generic_visit(node)
|
55
71
|
|
56
72
|
def visit_AsyncFunctionDef(self, node: ast.AsyncFunctionDef):
|
57
|
-
"""Visits an asynchronous function definition."""
|
58
|
-
|
59
|
-
if
|
60
|
-
self.
|
73
|
+
"""Visits an asynchronous function definition and collects it if not excluded."""
|
74
|
+
# Add the exclusion logic here
|
75
|
+
if not node.name.startswith('_') and node.name != 'list_tools':
|
76
|
+
source_code = self._get_source_segment(node)
|
77
|
+
if source_code:
|
78
|
+
self.functions.append((node.name, source_code))
|
79
|
+
# Continue traversing the AST for nested functions/classes
|
61
80
|
self.generic_visit(node)
|
62
81
|
|
63
82
|
|
@@ -115,8 +134,49 @@ def extract_functions_from_script(file_path: str) -> list[tuple[str, str]]:
|
|
115
134
|
return []
|
116
135
|
|
117
136
|
|
137
|
+
def extract_json_from_text(text):
|
138
|
+
"""Extract valid JSON from text that might contain additional content.
|
139
|
+
|
140
|
+
Args:
|
141
|
+
text: Raw text response from the model
|
142
|
+
|
143
|
+
Returns:
|
144
|
+
Dict containing the extracted JSON data
|
145
|
+
|
146
|
+
Raises:
|
147
|
+
ValueError: If no valid JSON could be extracted
|
148
|
+
"""
|
149
|
+
# Try to find JSON between triple backticks (common markdown pattern)
|
150
|
+
json_match = re.search(r"```(?:json)?\s*([\s\S]*?)\s*```", text)
|
151
|
+
if json_match:
|
152
|
+
try:
|
153
|
+
return json.loads(json_match.group(1))
|
154
|
+
except:
|
155
|
+
pass
|
156
|
+
|
157
|
+
# Try to find the first { and last } for a complete JSON object
|
158
|
+
try:
|
159
|
+
start = text.find('{')
|
160
|
+
if start >= 0:
|
161
|
+
brace_count = 0
|
162
|
+
for i in range(start, len(text)):
|
163
|
+
if text[i] == '{':
|
164
|
+
brace_count += 1
|
165
|
+
elif text[i] == '}':
|
166
|
+
brace_count -= 1
|
167
|
+
if brace_count == 0:
|
168
|
+
return json.loads(text[start:i+1])
|
169
|
+
except:
|
170
|
+
pass
|
171
|
+
|
172
|
+
try:
|
173
|
+
return json.loads(text)
|
174
|
+
except:
|
175
|
+
raise ValueError("Could not extract valid JSON from the response")
|
176
|
+
|
177
|
+
|
118
178
|
def generate_docstring(
|
119
|
-
function_code: str, model: str = "
|
179
|
+
function_code: str, model: str = "perplexity/sonar-pro"
|
120
180
|
) -> DocstringOutput:
|
121
181
|
"""
|
122
182
|
Generate a docstring for a Python function using litellm with structured output.
|
@@ -129,31 +189,42 @@ def generate_docstring(
|
|
129
189
|
A DocstringOutput object containing the structured docstring components
|
130
190
|
"""
|
131
191
|
system_prompt = """You are a helpful AI assistant specialized in writing high-quality Google-style Python docstrings.
|
132
|
-
You MUST ALWAYS include an Args section, even if there are no arguments (in which case mention 'None').
|
192
|
+
You MUST ALWAYS include an Args section, even if there are no arguments (in which case mention 'None').
|
193
|
+
You should also generate a list of tags describing the function's purpose and characteristics."""
|
194
|
+
|
195
|
+
user_prompt = f"""Generate a high-quality Google-style docstring for the following Python function.
|
196
|
+
Analyze the function's name, parameters, return values, potential exceptions, and functionality to create a comprehensive docstring.
|
133
197
|
|
134
|
-
user_prompt = f"""Generate a high-quality Google-style docstring for the following Python function.
|
135
|
-
Analyze the function's name, parameters, return values, and functionality to create a comprehensive docstring.
|
136
|
-
|
137
198
|
The docstring MUST:
|
138
199
|
1. Start with a clear, concise summary of what the function does
|
139
200
|
2. ALWAYS include Args section with description of each parameter (or 'None' if no parameters)
|
140
|
-
3. Include Returns section describing the return value
|
141
|
-
4.
|
142
|
-
|
201
|
+
3. Include Returns section describing the return value (or 'None' if nothing is explicitly returned)
|
202
|
+
4. **Optionally include a Raises section if the function might raise exceptions, describing the exception type/reason and when it's raised.**
|
203
|
+
5. **Include a Tags section with a list of strings describing the function's purpose, characteristics, or keywords.** Tags should be lowercase and single words or hyphenated phrases. Include tags like:
|
204
|
+
- The main action (e.g., 'scrape', 'search', 'start', 'check', 'cancel', 'list')
|
205
|
+
- The type of job ('async_job', 'batch')
|
206
|
+
- The stage of an asynchronous job ('start', 'status', 'cancel')
|
207
|
+
- Related domain/feature ('ai', 'management')
|
208
|
+
- **Significance: Add the tag 'important' to functions that represent core capabilities or primary interaction points of the class (e.g., initiating actions like scrape, search, or starting async jobs).**
|
209
|
+
6. Be formatted according to Google Python Style Guide
|
210
|
+
|
143
211
|
Here is the function:
|
144
|
-
|
212
|
+
|
145
213
|
{function_code}
|
146
|
-
|
147
|
-
Respond in JSON format with the following structure
|
214
|
+
|
215
|
+
Respond ONLY in JSON format with the following structure. **Include the 'raises' field only if the function is likely to raise exceptions.** **Include the 'tags' field as a list of strings.**
|
148
216
|
{{
|
149
|
-
|
150
|
-
|
151
|
-
|
217
|
+
"summary": "A clear, concise summary of what the function does",
|
218
|
+
"args": {{"param_name": "param description", "param_name2": "param description"}},
|
219
|
+
"returns": "Description of what the function returns",
|
220
|
+
"raises": {{
|
221
|
+
"ExceptionType": "Description of when/why this exception is raised"
|
222
|
+
}},
|
223
|
+
"tags": ["tag1", "tag2", "important"]
|
152
224
|
}}
|
153
225
|
"""
|
154
226
|
|
155
227
|
try:
|
156
|
-
# Use regular completion and parse the JSON ourselves instead of using response_model
|
157
228
|
response = litellm.completion(
|
158
229
|
model=model,
|
159
230
|
messages=[
|
@@ -162,149 +233,231 @@ def generate_docstring(
|
|
162
233
|
],
|
163
234
|
)
|
164
235
|
|
165
|
-
# Get the response content
|
166
236
|
response_text = response.choices[0].message.content
|
167
237
|
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
parsed_data["args"] = {"None": "This function takes no arguments"}
|
238
|
+
|
239
|
+
try:
|
240
|
+
parsed_data = extract_json_from_text(response_text)
|
241
|
+
except ValueError as e:
|
242
|
+
print(f"JSON extraction failed: {e}")
|
243
|
+
print(f"Raw response: {response_text[:100]}...") # Log first 100 chars for debugging
|
244
|
+
# Return a default structure if extraction fails
|
245
|
+
return DocstringOutput(
|
246
|
+
summary="Failed to extract docstring information",
|
247
|
+
args={"None": "This function takes no arguments"},
|
248
|
+
returns="Unknown return value"
|
249
|
+
)
|
250
|
+
model_args = parsed_data.get("args")
|
251
|
+
if not model_args:
|
252
|
+
parsed_data["args"] = {"None": "This function takes no arguments"}
|
184
253
|
|
185
|
-
# Create DocstringOutput from parsed data
|
186
254
|
return DocstringOutput(
|
187
|
-
summary=parsed_data.get("summary", ""),
|
255
|
+
summary=parsed_data.get("summary", "No documentation available"),
|
188
256
|
args=parsed_data.get("args", {"None": "This function takes no arguments"}),
|
189
|
-
returns=parsed_data.get("returns", ""),
|
257
|
+
returns=parsed_data.get("returns", "None"),
|
258
|
+
raises=parsed_data.get("raises", {}),
|
259
|
+
tags=parsed_data.get("tags", []) # Get tags, default to empty list
|
190
260
|
)
|
191
261
|
|
192
262
|
except Exception as e:
|
193
|
-
print(f"Error generating docstring: {e}")
|
194
|
-
|
263
|
+
print(f"Error generating docstring: {e}", file=sys.stderr)
|
264
|
+
traceback.print_exc(file=sys.stderr)
|
195
265
|
return DocstringOutput(
|
196
|
-
summary="
|
266
|
+
summary=f"Error generating docstring: {e}",
|
197
267
|
args={"None": "This function takes no arguments"},
|
198
268
|
returns="None",
|
269
|
+
raises={},
|
270
|
+
tags=["generation-error"]
|
199
271
|
)
|
200
272
|
|
201
|
-
|
202
273
|
def format_docstring(docstring: DocstringOutput) -> str:
|
203
274
|
"""
|
204
|
-
Format a DocstringOutput object into
|
275
|
+
Format a DocstringOutput object into the content string for a docstring.
|
276
|
+
This function produces the content *between* the triple quotes, without
|
277
|
+
the leading/trailing triple quotes or the main indentation.
|
205
278
|
|
206
279
|
Args:
|
207
280
|
docstring: The DocstringOutput object to format
|
208
281
|
|
209
282
|
Returns:
|
210
|
-
A formatted docstring string ready to be
|
283
|
+
A formatted docstring content string ready to be indented and wrapped
|
284
|
+
in triple quotes for insertion into code.
|
211
285
|
"""
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
if
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
286
|
+
parts = []
|
287
|
+
|
288
|
+
summary = docstring.summary.strip()
|
289
|
+
if summary:
|
290
|
+
parts.append(summary)
|
291
|
+
|
292
|
+
filtered_args = {name: desc for name, desc in docstring.args.items() if name not in ('self', 'cls')}
|
293
|
+
args_lines = []
|
294
|
+
if filtered_args:
|
295
|
+
args_lines.append("Args:")
|
296
|
+
for arg_name, arg_desc in filtered_args.items():
|
297
|
+
arg_desc_cleaned = arg_desc.strip()
|
298
|
+
args_lines.append(f" {arg_name}: {arg_desc_cleaned}")
|
299
|
+
elif docstring.args.get('None'): # Include the 'None' placeholder if it was generated
|
300
|
+
args_lines.append("Args:")
|
301
|
+
none_desc_cleaned = docstring.args['None'].strip()
|
302
|
+
args_lines.append(f" None: {none_desc_cleaned}")
|
303
|
+
|
304
|
+
if args_lines:
|
305
|
+
parts.append("\n".join(args_lines))
|
306
|
+
|
307
|
+
returns_desc_cleaned = docstring.returns.strip()
|
308
|
+
if returns_desc_cleaned and returns_desc_cleaned.lower() not in ('none', ''):
|
309
|
+
parts.append(f"Returns:\n {returns_desc_cleaned}")
|
310
|
+
|
311
|
+
raises_lines = []
|
312
|
+
if docstring.raises:
|
313
|
+
raises_lines.append("Raises:")
|
314
|
+
for exception_type, exception_desc in docstring.raises.items():
|
315
|
+
exception_desc_cleaned = exception_desc.strip()
|
316
|
+
if exception_type.strip() and exception_desc_cleaned: # Ensure type and desc are not empty
|
317
|
+
raises_lines.append(f" {exception_type.strip()}: {exception_desc_cleaned}")
|
318
|
+
if raises_lines:
|
319
|
+
parts.append("\n".join(raises_lines))
|
320
|
+
|
321
|
+
cleaned_tags = [tag.strip() for tag in docstring.tags if tag and tag.strip()]
|
322
|
+
if cleaned_tags:
|
323
|
+
tags_string = ", ".join(cleaned_tags)
|
324
|
+
parts.append(f"Tags:\n {tags_string}")
|
325
|
+
|
326
|
+
return "\n\n".join(parts)
|
225
327
|
|
226
328
|
def insert_docstring_into_function(function_code: str, docstring: str) -> str:
|
227
329
|
"""
|
228
|
-
Insert a docstring into a function's code
|
330
|
+
Insert a docstring into a function's code, replacing an existing one if present
|
331
|
+
at the correct location, and attempting to remove misplaced string literals
|
332
|
+
from the body.
|
333
|
+
|
334
|
+
This version handles multiline function definitions and existing docstrings
|
335
|
+
by carefully splicing lines based on AST node positions. It also tries to
|
336
|
+
clean up old, misplaced string literals that might have been interpreted
|
337
|
+
as docstrings previously.
|
229
338
|
|
230
339
|
Args:
|
231
|
-
function_code: The source code of the function
|
232
|
-
|
340
|
+
function_code: The source code of the function snippet. This snippet is
|
341
|
+
expected to contain exactly one function definition.
|
342
|
+
docstring: The formatted docstring string content (without triple quotes or
|
343
|
+
leading/trailing newlines within the content itself).
|
233
344
|
|
234
345
|
Returns:
|
235
|
-
The updated function code with the docstring inserted
|
346
|
+
The updated function code with the docstring inserted, or the original
|
347
|
+
code if an error occurs during processing or parsing.
|
236
348
|
"""
|
237
349
|
try:
|
238
|
-
|
239
|
-
if not function_ast.body or not hasattr(function_ast.body[0], "body"):
|
240
|
-
return function_code
|
350
|
+
lines = function_code.splitlines(keepends=True)
|
241
351
|
|
242
|
-
|
352
|
+
tree = ast.parse(function_code)
|
353
|
+
if not tree.body or not isinstance(tree.body[0], ast.FunctionDef | ast.AsyncFunctionDef):
|
354
|
+
print("Warning: Could not parse function definition from code snippet. Returning original code.", file=sys.stderr)
|
355
|
+
return function_code # Return original code if parsing fails or isn't a function
|
243
356
|
|
244
|
-
|
245
|
-
|
246
|
-
for i, line in enumerate(function_lines):
|
247
|
-
if "def " in line and line.strip().endswith(":"):
|
248
|
-
func_def_line = i
|
249
|
-
break
|
357
|
+
func_node = tree.body[0]
|
358
|
+
func_name = getattr(func_node, 'name', 'unknown_function')
|
250
359
|
|
251
|
-
|
252
|
-
return function_code
|
360
|
+
insert_idx = func_node.end_lineno
|
253
361
|
|
254
|
-
|
255
|
-
|
256
|
-
for line in function_lines[func_def_line + 1 :]:
|
257
|
-
if line.strip():
|
258
|
-
body_indent = " " * (len(line) - len(line.lstrip()))
|
259
|
-
break
|
362
|
+
if func_node.body:
|
363
|
+
insert_idx = func_node.body[0].lineno - 1
|
260
364
|
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
and
|
269
|
-
|
365
|
+
body_indent = " " # Default indentation (PEP 8)
|
366
|
+
|
367
|
+
indent_source_idx = insert_idx
|
368
|
+
actual_first_body_line_idx = -1
|
369
|
+
for i in range(indent_source_idx, len(lines)):
|
370
|
+
line = lines[i]
|
371
|
+
stripped = line.lstrip()
|
372
|
+
if stripped and not stripped.startswith('#'):
|
373
|
+
actual_first_body_line_idx = i
|
374
|
+
break
|
270
375
|
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
]
|
276
|
-
|
277
|
-
if has_docstring:
|
278
|
-
# Find the existing docstring in the source and replace it
|
279
|
-
for i in range(func_def_line + 1, len(function_lines)):
|
280
|
-
if '"""' in function_lines[i] or "'''" in function_lines[i]:
|
281
|
-
docstring_start = i
|
282
|
-
# Find end of docstring
|
283
|
-
for j in range(docstring_start + 1, len(function_lines)):
|
284
|
-
if '"""' in function_lines[j] or "'''" in function_lines[j]:
|
285
|
-
docstring_end = j
|
286
|
-
# Replace the existing docstring
|
287
|
-
return "\n".join(
|
288
|
-
function_lines[:docstring_start]
|
289
|
-
+ docstring_lines
|
290
|
-
+ function_lines[docstring_end + 1 :]
|
291
|
-
)
|
376
|
+
# If a meaningful line was found at or after insertion point, use its indentation
|
377
|
+
if actual_first_body_line_idx != -1:
|
378
|
+
body_line = lines[actual_first_body_line_idx]
|
379
|
+
body_indent = body_line[:len(body_line) - len(body_line.lstrip())]
|
292
380
|
else:
|
293
|
-
|
294
|
-
|
295
|
-
|
296
|
-
|
297
|
-
|
298
|
-
|
381
|
+
if func_node.lineno - 1 < len(lines): # Ensure def line exists
|
382
|
+
def_line = lines[func_node.lineno - 1]
|
383
|
+
def_line_indent = def_line[:len(def_line) - len(def_line.lstrip())]
|
384
|
+
body_indent = def_line_indent + " " # Standard 4 spaces relative indent
|
385
|
+
|
386
|
+
|
387
|
+
# Format the new docstring lines with the calculated indentation
|
388
|
+
new_docstring_lines_formatted = [f'{body_indent}"""\n']
|
389
|
+
new_docstring_lines_formatted.extend([f"{body_indent}{line}\n" for line in docstring.splitlines()])
|
390
|
+
new_docstring_lines_formatted.append(f'{body_indent}"""\n')
|
391
|
+
|
392
|
+
output_lines = []
|
393
|
+
output_lines.extend(lines[:insert_idx])
|
394
|
+
|
395
|
+
# 2. Insert the new docstring
|
396
|
+
output_lines.extend(new_docstring_lines_formatted)
|
397
|
+
remaining_body_lines = lines[insert_idx:]
|
398
|
+
|
399
|
+
remaining_body_code = "".join(remaining_body_lines)
|
400
|
+
|
401
|
+
if remaining_body_code.strip(): # Only parse if there's non-whitespace content
|
402
|
+
try:
|
403
|
+
dummy_code = f"def _dummy_func():\n{textwrap.indent(remaining_body_code, body_indent)}"
|
404
|
+
dummy_tree = ast.parse(dummy_code)
|
405
|
+
dummy_body_statements = dummy_tree.body[0].body if dummy_tree.body and isinstance(dummy_tree.body[0], ast.FunctionDef | ast.AsyncFunctionDef) else []
|
406
|
+
cleaned_body_parts = []
|
407
|
+
for _node in dummy_body_statements:
|
408
|
+
break # Exit this loop, we'll process func_node.body instead
|
409
|
+
cleaned_body_parts = []
|
410
|
+
start_stmt_index = 1 if func_node.body and isinstance(func_node.body[0], ast.Expr) and isinstance(func_node.body[0].value, ast.Constant) and isinstance(func_node.body[0].value.value, str) else 0
|
411
|
+
|
412
|
+
for i in range(start_stmt_index, len(func_node.body)):
|
413
|
+
stmt_node = func_node.body[i]
|
414
|
+
|
415
|
+
is_just_string_stmt = isinstance(stmt_node, ast.Expr) and isinstance(stmt_node.value, ast.Constant) and isinstance(stmt_node.value.value, str)
|
416
|
+
|
417
|
+
if not is_just_string_stmt:
|
418
|
+
stmt_start_idx = stmt_node.lineno - 1
|
419
|
+
stmt_end_idx = stmt_node.end_lineno - 1 # Inclusive end line index
|
420
|
+
|
421
|
+
cleaned_body_parts.extend(lines[stmt_start_idx : stmt_end_idx + 1])
|
422
|
+
|
423
|
+
if func_node.body:
|
424
|
+
last_stmt_end_idx = func_node.body[-1].end_lineno - 1
|
425
|
+
for line in lines[last_stmt_end_idx + 1:]:
|
426
|
+
if line.strip():
|
427
|
+
cleaned_body_parts.append(line)
|
428
|
+
cleaned_body_lines = cleaned_body_parts
|
429
|
+
|
430
|
+
except SyntaxError as parse_e:
|
431
|
+
print(f"WARNING: Could not parse function body for cleaning, keeping all body lines: {parse_e}", file=sys.stderr)
|
432
|
+
traceback.print_exc(file=sys.stderr)
|
433
|
+
cleaned_body_lines = remaining_body_lines
|
434
|
+
except Exception as other_e:
|
435
|
+
print(f"WARNING: Unexpected error processing function body for cleaning, keeping all body lines: {other_e}", file=sys.stderr)
|
436
|
+
traceback.print_exc(file=sys.stderr)
|
437
|
+
cleaned_body_lines = remaining_body_lines
|
438
|
+
else:
|
439
|
+
cleaned_body_lines = []
|
440
|
+
output_lines.extend(lines[func_node.end_lineno:])
|
441
|
+
|
442
|
+
if func_node.body or not remaining_body_code.strip():
|
443
|
+
output_lines.extend(cleaned_body_lines)
|
299
444
|
|
300
|
-
|
445
|
+
final_code = "".join(output_lines)
|
446
|
+
ast.parse(final_code)
|
447
|
+
return final_code
|
448
|
+
|
449
|
+
except SyntaxError as e:
|
450
|
+
print(f"WARNING: Generated code snippet for '{func_name}' has syntax error: {e}", file=sys.stderr)
|
451
|
+
traceback.print_exc(file=sys.stderr)
|
301
452
|
return function_code
|
302
453
|
except Exception as e:
|
303
|
-
print(f"Error
|
454
|
+
print(f"Error processing function snippet for insertion: {e}", file=sys.stderr)
|
455
|
+
traceback.print_exc(file=sys.stderr)
|
456
|
+
|
304
457
|
return function_code
|
305
458
|
|
306
459
|
|
307
|
-
def process_file(file_path: str, model: str = "
|
460
|
+
def process_file(file_path: str, model: str = "perplexity/sonar-pro") -> int:
|
308
461
|
"""
|
309
462
|
Process a Python file and add docstrings to all functions in it.
|
310
463
|
|
@@ -355,6 +508,7 @@ def process_file(file_path: str, model: str = "openai/gpt-4o") -> int:
|
|
355
508
|
f.write(updated_content)
|
356
509
|
print(f"Updated {count} functions in {file_path}")
|
357
510
|
else:
|
511
|
+
print(updated_function, "formatted docstring",formatted_docstring)
|
358
512
|
print(f"No changes made to {file_path}")
|
359
513
|
|
360
514
|
return count
|
@@ -0,0 +1,156 @@
|
|
1
|
+
import re
|
2
|
+
from typing import Any
|
3
|
+
|
4
|
+
|
5
|
+
def parse_docstring(docstring: str | None) -> dict[str, Any]:
|
6
|
+
"""
|
7
|
+
Parses a standard Python docstring into summary, args, returns, raises, and tags.
|
8
|
+
|
9
|
+
Args:
|
10
|
+
docstring: The docstring to parse.
|
11
|
+
|
12
|
+
Returns:
|
13
|
+
A dictionary with keys 'summary', 'args', 'returns', 'raises', 'tags'.
|
14
|
+
'args' is a dict mapping arg names to descriptions.
|
15
|
+
'raises' is a dict mapping exception type names to descriptions.
|
16
|
+
'tags' is a list of strings extracted from the 'Tags:' section, comma-separated.
|
17
|
+
"""
|
18
|
+
if not docstring:
|
19
|
+
return {"summary": "", "args": {}, "returns": "", "raises": {}, "tags": []}
|
20
|
+
|
21
|
+
lines = docstring.strip().splitlines()
|
22
|
+
if not lines:
|
23
|
+
return {"summary": "", "args": {}, "returns": "", "raises": {}, "tags": []}
|
24
|
+
|
25
|
+
summary = lines[0].strip()
|
26
|
+
args = {}
|
27
|
+
returns = ""
|
28
|
+
raises = {}
|
29
|
+
tags: list[str] = [] # Final list of parsed tags
|
30
|
+
current_section = None
|
31
|
+
current_key = None
|
32
|
+
current_desc_lines = [] # Accumulator for multi-line descriptions/tag content
|
33
|
+
key_pattern = re.compile(r"^\s*([\w\.]+)\s*(?:\(.*\))?:\s*(.*)")
|
34
|
+
|
35
|
+
def finalize_current_item():
|
36
|
+
"""Helper function to finalize the currently parsed item."""
|
37
|
+
nonlocal returns, tags # Allow modification of outer scope variables
|
38
|
+
desc = " ".join(current_desc_lines).strip()
|
39
|
+
if current_section == "args" and current_key:
|
40
|
+
args[current_key] = desc
|
41
|
+
elif current_section == "raises" and current_key:
|
42
|
+
raises[current_key] = desc
|
43
|
+
elif current_section == "returns":
|
44
|
+
returns = desc
|
45
|
+
# SIM102 applied: Combine nested if
|
46
|
+
elif current_section == "tags" and desc: # Only process if there's content
|
47
|
+
tags = [tag.strip() for tag in desc.split(',') if tag.strip()]
|
48
|
+
|
49
|
+
# B007 applied: Rename unused loop variable i to _
|
50
|
+
for _, line in enumerate(lines[1:]):
|
51
|
+
stripped_line = line.strip()
|
52
|
+
original_indentation = len(line) - len(line.lstrip(' '))
|
53
|
+
|
54
|
+
section_line = stripped_line.lower()
|
55
|
+
is_new_section_header = False
|
56
|
+
new_section_type = None
|
57
|
+
header_content = ""
|
58
|
+
|
59
|
+
if section_line in ("args:", "arguments:", "parameters:"):
|
60
|
+
new_section_type = "args"
|
61
|
+
is_new_section_header = True
|
62
|
+
elif section_line in ("returns:", "yields:"):
|
63
|
+
new_section_type = "returns"
|
64
|
+
is_new_section_header = True
|
65
|
+
elif section_line.startswith(("raises ", "raises:", "errors:", "exceptions:")):
|
66
|
+
new_section_type = "raises"
|
67
|
+
is_new_section_header = True
|
68
|
+
elif section_line.startswith(("tags:", "tags")): # Match "Tags:" or "Tags" potentially followed by content
|
69
|
+
new_section_type = "tags"
|
70
|
+
is_new_section_header = True
|
71
|
+
if ":" in stripped_line:
|
72
|
+
header_content = stripped_line.split(":", 1)[1].strip()
|
73
|
+
elif section_line.endswith(":") and section_line[:-1] in ("attributes", "see also", "example", "examples", "notes"):
|
74
|
+
new_section_type = "other"
|
75
|
+
is_new_section_header = True
|
76
|
+
|
77
|
+
finalize_previous = False
|
78
|
+
if is_new_section_header:
|
79
|
+
finalize_previous = True
|
80
|
+
elif current_section in ["args", "raises"] and current_key:
|
81
|
+
if key_pattern.match(line) or (original_indentation == 0 and stripped_line):
|
82
|
+
finalize_previous = True
|
83
|
+
elif current_section in ["returns", "tags"] and current_desc_lines:
|
84
|
+
if original_indentation == 0 and stripped_line:
|
85
|
+
finalize_previous = True
|
86
|
+
# SIM102 applied: Combine nested if/elif
|
87
|
+
elif (not stripped_line and current_desc_lines and current_section in ["args", "raises", "returns", "tags"]
|
88
|
+
and (current_section not in ["args", "raises"] or current_key)):
|
89
|
+
finalize_previous = True
|
90
|
+
|
91
|
+
if finalize_previous:
|
92
|
+
finalize_current_item()
|
93
|
+
current_key = None
|
94
|
+
current_desc_lines = []
|
95
|
+
if not is_new_section_header or new_section_type == "other":
|
96
|
+
current_section = None
|
97
|
+
|
98
|
+
if is_new_section_header and new_section_type != "other":
|
99
|
+
current_section = new_section_type
|
100
|
+
# If Tags header had content, start accumulating it
|
101
|
+
if new_section_type == "tags" and header_content:
|
102
|
+
current_desc_lines.append(header_content)
|
103
|
+
# Don't process the header line itself further
|
104
|
+
continue
|
105
|
+
|
106
|
+
if not stripped_line:
|
107
|
+
continue
|
108
|
+
|
109
|
+
if current_section == "args" or current_section == "raises":
|
110
|
+
match = key_pattern.match(line)
|
111
|
+
if match:
|
112
|
+
current_key = match.group(1)
|
113
|
+
current_desc_lines = [match.group(2).strip()] # Start new description
|
114
|
+
elif current_key and original_indentation > 0: # Check for indentation for continuation
|
115
|
+
current_desc_lines.append(stripped_line)
|
116
|
+
|
117
|
+
elif current_section == "returns":
|
118
|
+
if not current_desc_lines or original_indentation > 0:
|
119
|
+
current_desc_lines.append(stripped_line)
|
120
|
+
|
121
|
+
elif current_section == "tags":
|
122
|
+
if original_indentation > 0 or not current_desc_lines: # Indented or first line
|
123
|
+
current_desc_lines.append(stripped_line)
|
124
|
+
|
125
|
+
finalize_current_item()
|
126
|
+
return {"summary": summary, "args": args, "returns": returns, "raises": raises, "tags": tags}
|
127
|
+
|
128
|
+
|
129
|
+
docstring_example = """
|
130
|
+
Starts a crawl job for a given URL using Firecrawl. Returns the job ID immediately.
|
131
|
+
|
132
|
+
Args:
|
133
|
+
url: The starting URL for the crawl.
|
134
|
+
It can be a very long url that spans multiple lines if needed.
|
135
|
+
params: Optional dictionary of parameters to customize the crawl.
|
136
|
+
See API docs for details.
|
137
|
+
idempotency_key: Optional unique key to prevent duplicate jobs.
|
138
|
+
|
139
|
+
Returns:
|
140
|
+
A dictionary containing the job initiation response on success,
|
141
|
+
or a string containing an error message on failure. This description
|
142
|
+
can also span multiple lines.
|
143
|
+
|
144
|
+
Raises:
|
145
|
+
ValueError: If the URL is invalid.
|
146
|
+
requests.exceptions.ConnectionError: If connection fails.
|
147
|
+
|
148
|
+
Tags:
|
149
|
+
crawl, async_job, start, api, long_tag_example , another
|
150
|
+
, final_tag
|
151
|
+
"""
|
152
|
+
|
153
|
+
if __name__ == "__main__":
|
154
|
+
parsed = parse_docstring(docstring_example)
|
155
|
+
import json
|
156
|
+
print(json.dumps(parsed, indent=4))
|