universal-mcp 0.1.8rc1__py3-none-any.whl → 0.1.8rc2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. universal_mcp/applications/application.py +6 -5
  2. universal_mcp/applications/calendly/README.md +78 -0
  3. universal_mcp/applications/calendly/app.py +954 -0
  4. universal_mcp/applications/e2b/app.py +18 -12
  5. universal_mcp/applications/firecrawl/app.py +28 -1
  6. universal_mcp/applications/github/app.py +150 -107
  7. universal_mcp/applications/google_calendar/app.py +72 -137
  8. universal_mcp/applications/google_docs/app.py +35 -15
  9. universal_mcp/applications/google_drive/app.py +84 -55
  10. universal_mcp/applications/google_mail/app.py +143 -53
  11. universal_mcp/applications/google_sheet/app.py +61 -38
  12. universal_mcp/applications/markitdown/app.py +12 -11
  13. universal_mcp/applications/notion/app.py +199 -89
  14. universal_mcp/applications/perplexity/app.py +17 -15
  15. universal_mcp/applications/reddit/app.py +110 -101
  16. universal_mcp/applications/resend/app.py +14 -7
  17. universal_mcp/applications/serpapi/app.py +13 -6
  18. universal_mcp/applications/tavily/app.py +13 -10
  19. universal_mcp/applications/wrike/README.md +71 -0
  20. universal_mcp/applications/wrike/__init__.py +0 -0
  21. universal_mcp/applications/wrike/app.py +1044 -0
  22. universal_mcp/applications/youtube/README.md +82 -0
  23. universal_mcp/applications/youtube/__init__.py +0 -0
  24. universal_mcp/applications/youtube/app.py +986 -0
  25. universal_mcp/applications/zenquotes/app.py +13 -3
  26. universal_mcp/exceptions.py +8 -2
  27. universal_mcp/integrations/__init__.py +15 -1
  28. universal_mcp/integrations/integration.py +132 -27
  29. universal_mcp/servers/__init__.py +6 -15
  30. universal_mcp/servers/server.py +209 -153
  31. universal_mcp/stores/__init__.py +7 -2
  32. universal_mcp/stores/store.py +103 -42
  33. universal_mcp/tools/__init__.py +3 -0
  34. universal_mcp/tools/adapters.py +40 -0
  35. universal_mcp/tools/func_metadata.py +214 -0
  36. universal_mcp/tools/tools.py +285 -0
  37. universal_mcp/utils/docgen.py +277 -123
  38. universal_mcp/utils/docstring_parser.py +156 -0
  39. universal_mcp/utils/openapi.py +149 -40
  40. {universal_mcp-0.1.8rc1.dist-info → universal_mcp-0.1.8rc2.dist-info}/METADATA +7 -3
  41. universal_mcp-0.1.8rc2.dist-info/RECORD +71 -0
  42. universal_mcp-0.1.8rc1.dist-info/RECORD +0 -58
  43. /universal_mcp/{utils/bridge.py → applications/calendly/__init__.py} +0 -0
  44. {universal_mcp-0.1.8rc1.dist-info → universal_mcp-0.1.8rc2.dist-info}/WHEEL +0 -0
  45. {universal_mcp-0.1.8rc1.dist-info → universal_mcp-0.1.8rc2.dist-info}/entry_points.txt +0 -0
@@ -5,9 +5,14 @@ using LLMs with structured output
5
5
  """
6
6
 
7
7
  import ast
8
+ import json
8
9
  import os
10
+ import sys
11
+ import textwrap
12
+ import traceback
9
13
 
10
14
  import litellm
15
+ import re
11
16
  from pydantic import BaseModel, Field
12
17
 
13
18
 
@@ -21,6 +26,14 @@ class DocstringOutput(BaseModel):
21
26
  description="Dictionary mapping parameter names to their descriptions"
22
27
  )
23
28
  returns: str = Field(description="Description of what the function returns")
29
+ raises: dict[str, str] = Field(
30
+ default_factory=dict,
31
+ description="Dictionary mapping potential exception types/reasons to their descriptions"
32
+ )
33
+ tags: list[str] = Field(
34
+ default_factory=list,
35
+ description="List of relevant tags for the function (e.g., action, job type, async status, importance)"
36
+ )
24
37
 
25
38
 
26
39
  class FunctionExtractor(ast.NodeVisitor):
@@ -47,17 +60,23 @@ class FunctionExtractor(ast.NodeVisitor):
47
60
  return None
48
61
 
49
62
  def visit_FunctionDef(self, node: ast.FunctionDef):
50
- """Visits a regular function definition."""
51
- source_code = self._get_source_segment(node)
52
- if source_code:
53
- self.functions.append((node.name, source_code))
63
+ """Visits a regular function definition and collects it if not excluded."""
64
+ # Add the exclusion logic here
65
+ if not node.name.startswith('_') and node.name != 'list_tools':
66
+ source_code = self._get_source_segment(node)
67
+ if source_code:
68
+ self.functions.append((node.name, source_code))
69
+ # Continue traversing the AST for nested functions/classes
54
70
  self.generic_visit(node)
55
71
 
56
72
  def visit_AsyncFunctionDef(self, node: ast.AsyncFunctionDef):
57
- """Visits an asynchronous function definition."""
58
- source_code = self._get_source_segment(node)
59
- if source_code:
60
- self.functions.append((node.name, source_code))
73
+ """Visits an asynchronous function definition and collects it if not excluded."""
74
+ # Add the exclusion logic here
75
+ if not node.name.startswith('_') and node.name != 'list_tools':
76
+ source_code = self._get_source_segment(node)
77
+ if source_code:
78
+ self.functions.append((node.name, source_code))
79
+ # Continue traversing the AST for nested functions/classes
61
80
  self.generic_visit(node)
62
81
 
63
82
 
@@ -115,8 +134,49 @@ def extract_functions_from_script(file_path: str) -> list[tuple[str, str]]:
115
134
  return []
116
135
 
117
136
 
137
+ def extract_json_from_text(text):
138
+ """Extract valid JSON from text that might contain additional content.
139
+
140
+ Args:
141
+ text: Raw text response from the model
142
+
143
+ Returns:
144
+ Dict containing the extracted JSON data
145
+
146
+ Raises:
147
+ ValueError: If no valid JSON could be extracted
148
+ """
149
+ # Try to find JSON between triple backticks (common markdown pattern)
150
+ json_match = re.search(r"```(?:json)?\s*([\s\S]*?)\s*```", text)
151
+ if json_match:
152
+ try:
153
+ return json.loads(json_match.group(1))
154
+ except:
155
+ pass
156
+
157
+ # Try to find the first { and last } for a complete JSON object
158
+ try:
159
+ start = text.find('{')
160
+ if start >= 0:
161
+ brace_count = 0
162
+ for i in range(start, len(text)):
163
+ if text[i] == '{':
164
+ brace_count += 1
165
+ elif text[i] == '}':
166
+ brace_count -= 1
167
+ if brace_count == 0:
168
+ return json.loads(text[start:i+1])
169
+ except:
170
+ pass
171
+
172
+ try:
173
+ return json.loads(text)
174
+ except:
175
+ raise ValueError("Could not extract valid JSON from the response")
176
+
177
+
118
178
  def generate_docstring(
119
- function_code: str, model: str = "openai/gpt-4o"
179
+ function_code: str, model: str = "perplexity/sonar-pro"
120
180
  ) -> DocstringOutput:
121
181
  """
122
182
  Generate a docstring for a Python function using litellm with structured output.
@@ -129,31 +189,42 @@ def generate_docstring(
129
189
  A DocstringOutput object containing the structured docstring components
130
190
  """
131
191
  system_prompt = """You are a helpful AI assistant specialized in writing high-quality Google-style Python docstrings.
132
- You MUST ALWAYS include an Args section, even if there are no arguments (in which case mention 'None')."""
192
+ You MUST ALWAYS include an Args section, even if there are no arguments (in which case mention 'None').
193
+ You should also generate a list of tags describing the function's purpose and characteristics."""
194
+
195
+ user_prompt = f"""Generate a high-quality Google-style docstring for the following Python function.
196
+ Analyze the function's name, parameters, return values, potential exceptions, and functionality to create a comprehensive docstring.
133
197
 
134
- user_prompt = f"""Generate a high-quality Google-style docstring for the following Python function.
135
- Analyze the function's name, parameters, return values, and functionality to create a comprehensive docstring.
136
-
137
198
  The docstring MUST:
138
199
  1. Start with a clear, concise summary of what the function does
139
200
  2. ALWAYS include Args section with description of each parameter (or 'None' if no parameters)
140
- 3. Include Returns section describing the return value
141
- 4. Be formatted according to Google Python Style Guide
142
-
201
+ 3. Include Returns section describing the return value (or 'None' if nothing is explicitly returned)
202
+ 4. **Optionally include a Raises section if the function might raise exceptions, describing the exception type/reason and when it's raised.**
203
+ 5. **Include a Tags section with a list of strings describing the function's purpose, characteristics, or keywords.** Tags should be lowercase and single words or hyphenated phrases. Include tags like:
204
+ - The main action (e.g., 'scrape', 'search', 'start', 'check', 'cancel', 'list')
205
+ - The type of job ('async_job', 'batch')
206
+ - The stage of an asynchronous job ('start', 'status', 'cancel')
207
+ - Related domain/feature ('ai', 'management')
208
+ - **Significance: Add the tag 'important' to functions that represent core capabilities or primary interaction points of the class (e.g., initiating actions like scrape, search, or starting async jobs).**
209
+ 6. Be formatted according to Google Python Style Guide
210
+
143
211
  Here is the function:
144
-
212
+
145
213
  {function_code}
146
-
147
- Respond in JSON format with the following structure:
214
+
215
+ Respond ONLY in JSON format with the following structure. **Include the 'raises' field only if the function is likely to raise exceptions.** **Include the 'tags' field as a list of strings.**
148
216
  {{
149
- "summary": "A clear, concise summary of what the function does",
150
- "args": {{"param_name": "param description", "param_name2": "param description"}},
151
- "returns": "Description of what the function returns"
217
+ "summary": "A clear, concise summary of what the function does",
218
+ "args": {{"param_name": "param description", "param_name2": "param description"}},
219
+ "returns": "Description of what the function returns",
220
+ "raises": {{
221
+ "ExceptionType": "Description of when/why this exception is raised"
222
+ }},
223
+ "tags": ["tag1", "tag2", "important"]
152
224
  }}
153
225
  """
154
226
 
155
227
  try:
156
- # Use regular completion and parse the JSON ourselves instead of using response_model
157
228
  response = litellm.completion(
158
229
  model=model,
159
230
  messages=[
@@ -162,149 +233,231 @@ def generate_docstring(
162
233
  ],
163
234
  )
164
235
 
165
- # Get the response content
166
236
  response_text = response.choices[0].message.content
167
237
 
168
- # Simple JSON extraction in case the model includes extra text
169
- import json
170
- import re
171
-
172
- # Find JSON object in the response using regex
173
- json_match = re.search(r"({.*})", response_text.replace("\n", " "), re.DOTALL)
174
- if json_match:
175
- json_str = json_match.group(1)
176
- parsed_data = json.loads(json_str)
177
- else:
178
- # Try to parse the whole response as JSON
179
- parsed_data = json.loads(response_text)
180
-
181
- # Ensure args is never empty
182
- if not parsed_data.get("args"):
183
- parsed_data["args"] = {"None": "This function takes no arguments"}
238
+
239
+ try:
240
+ parsed_data = extract_json_from_text(response_text)
241
+ except ValueError as e:
242
+ print(f"JSON extraction failed: {e}")
243
+ print(f"Raw response: {response_text[:100]}...") # Log first 100 chars for debugging
244
+ # Return a default structure if extraction fails
245
+ return DocstringOutput(
246
+ summary="Failed to extract docstring information",
247
+ args={"None": "This function takes no arguments"},
248
+ returns="Unknown return value"
249
+ )
250
+ model_args = parsed_data.get("args")
251
+ if not model_args:
252
+ parsed_data["args"] = {"None": "This function takes no arguments"}
184
253
 
185
- # Create DocstringOutput from parsed data
186
254
  return DocstringOutput(
187
- summary=parsed_data.get("summary", ""),
255
+ summary=parsed_data.get("summary", "No documentation available"),
188
256
  args=parsed_data.get("args", {"None": "This function takes no arguments"}),
189
- returns=parsed_data.get("returns", ""),
257
+ returns=parsed_data.get("returns", "None"),
258
+ raises=parsed_data.get("raises", {}),
259
+ tags=parsed_data.get("tags", []) # Get tags, default to empty list
190
260
  )
191
261
 
192
262
  except Exception as e:
193
- print(f"Error generating docstring: {e}")
194
- # Return a docstring object with default values
263
+ print(f"Error generating docstring: {e}", file=sys.stderr)
264
+ traceback.print_exc(file=sys.stderr)
195
265
  return DocstringOutput(
196
- summary="No documentation available",
266
+ summary=f"Error generating docstring: {e}",
197
267
  args={"None": "This function takes no arguments"},
198
268
  returns="None",
269
+ raises={},
270
+ tags=["generation-error"]
199
271
  )
200
272
 
201
-
202
273
  def format_docstring(docstring: DocstringOutput) -> str:
203
274
  """
204
- Format a DocstringOutput object into a properly formatted docstring string.
275
+ Format a DocstringOutput object into the content string for a docstring.
276
+ This function produces the content *between* the triple quotes, without
277
+ the leading/trailing triple quotes or the main indentation.
205
278
 
206
279
  Args:
207
280
  docstring: The DocstringOutput object to format
208
281
 
209
282
  Returns:
210
- A formatted docstring string ready to be inserted into code
283
+ A formatted docstring content string ready to be indented and wrapped
284
+ in triple quotes for insertion into code.
211
285
  """
212
- formatted_docstring = f"{docstring.summary}\n\n"
213
-
214
- if docstring.args:
215
- formatted_docstring += "Args:\n"
216
- for arg_name, arg_desc in docstring.args.items():
217
- formatted_docstring += f" {arg_name}: {arg_desc}\n"
218
- formatted_docstring += "\n"
219
-
220
- if docstring.returns:
221
- formatted_docstring += f"Returns:\n {docstring.returns}\n"
222
-
223
- return formatted_docstring.strip()
224
-
286
+ parts = []
287
+
288
+ summary = docstring.summary.strip()
289
+ if summary:
290
+ parts.append(summary)
291
+
292
+ filtered_args = {name: desc for name, desc in docstring.args.items() if name not in ('self', 'cls')}
293
+ args_lines = []
294
+ if filtered_args:
295
+ args_lines.append("Args:")
296
+ for arg_name, arg_desc in filtered_args.items():
297
+ arg_desc_cleaned = arg_desc.strip()
298
+ args_lines.append(f" {arg_name}: {arg_desc_cleaned}")
299
+ elif docstring.args.get('None'): # Include the 'None' placeholder if it was generated
300
+ args_lines.append("Args:")
301
+ none_desc_cleaned = docstring.args['None'].strip()
302
+ args_lines.append(f" None: {none_desc_cleaned}")
303
+
304
+ if args_lines:
305
+ parts.append("\n".join(args_lines))
306
+
307
+ returns_desc_cleaned = docstring.returns.strip()
308
+ if returns_desc_cleaned and returns_desc_cleaned.lower() not in ('none', ''):
309
+ parts.append(f"Returns:\n {returns_desc_cleaned}")
310
+
311
+ raises_lines = []
312
+ if docstring.raises:
313
+ raises_lines.append("Raises:")
314
+ for exception_type, exception_desc in docstring.raises.items():
315
+ exception_desc_cleaned = exception_desc.strip()
316
+ if exception_type.strip() and exception_desc_cleaned: # Ensure type and desc are not empty
317
+ raises_lines.append(f" {exception_type.strip()}: {exception_desc_cleaned}")
318
+ if raises_lines:
319
+ parts.append("\n".join(raises_lines))
320
+
321
+ cleaned_tags = [tag.strip() for tag in docstring.tags if tag and tag.strip()]
322
+ if cleaned_tags:
323
+ tags_string = ", ".join(cleaned_tags)
324
+ parts.append(f"Tags:\n {tags_string}")
325
+
326
+ return "\n\n".join(parts)
225
327
 
226
328
  def insert_docstring_into_function(function_code: str, docstring: str) -> str:
227
329
  """
228
- Insert a docstring into a function's code.
330
+ Insert a docstring into a function's code, replacing an existing one if present
331
+ at the correct location, and attempting to remove misplaced string literals
332
+ from the body.
333
+
334
+ This version handles multiline function definitions and existing docstrings
335
+ by carefully splicing lines based on AST node positions. It also tries to
336
+ clean up old, misplaced string literals that might have been interpreted
337
+ as docstrings previously.
229
338
 
230
339
  Args:
231
- function_code: The source code of the function
232
- docstring: The formatted docstring string to insert
340
+ function_code: The source code of the function snippet. This snippet is
341
+ expected to contain exactly one function definition.
342
+ docstring: The formatted docstring string content (without triple quotes or
343
+ leading/trailing newlines within the content itself).
233
344
 
234
345
  Returns:
235
- The updated function code with the docstring inserted
346
+ The updated function code with the docstring inserted, or the original
347
+ code if an error occurs during processing or parsing.
236
348
  """
237
349
  try:
238
- function_ast = ast.parse(function_code)
239
- if not function_ast.body or not hasattr(function_ast.body[0], "body"):
240
- return function_code
350
+ lines = function_code.splitlines(keepends=True)
241
351
 
242
- function_lines = function_code.splitlines()
352
+ tree = ast.parse(function_code)
353
+ if not tree.body or not isinstance(tree.body[0], ast.FunctionDef | ast.AsyncFunctionDef):
354
+ print("Warning: Could not parse function definition from code snippet. Returning original code.", file=sys.stderr)
355
+ return function_code # Return original code if parsing fails or isn't a function
243
356
 
244
- # Find the function definition line (ends with ':')
245
- func_def_line = None
246
- for i, line in enumerate(function_lines):
247
- if "def " in line and line.strip().endswith(":"):
248
- func_def_line = i
249
- break
357
+ func_node = tree.body[0]
358
+ func_name = getattr(func_node, 'name', 'unknown_function')
250
359
 
251
- if func_def_line is None:
252
- return function_code
360
+ insert_idx = func_node.end_lineno
253
361
 
254
- # Determine indentation from the first non-empty line after the function definition
255
- body_indent = ""
256
- for line in function_lines[func_def_line + 1 :]:
257
- if line.strip():
258
- body_indent = " " * (len(line) - len(line.lstrip()))
259
- break
362
+ if func_node.body:
363
+ insert_idx = func_node.body[0].lineno - 1
260
364
 
261
- # Check if the function already has a docstring
262
- first_element = (
263
- function_ast.body[0].body[0] if function_ast.body[0].body else None
264
- )
265
- has_docstring = (
266
- isinstance(first_element, ast.Expr)
267
- and isinstance(first_element.value, ast.Constant)
268
- and isinstance(first_element.value.value, str)
269
- )
365
+ body_indent = " " # Default indentation (PEP 8)
366
+
367
+ indent_source_idx = insert_idx
368
+ actual_first_body_line_idx = -1
369
+ for i in range(indent_source_idx, len(lines)):
370
+ line = lines[i]
371
+ stripped = line.lstrip()
372
+ if stripped and not stripped.startswith('#'):
373
+ actual_first_body_line_idx = i
374
+ break
270
375
 
271
- docstring_lines = [
272
- f'{body_indent}"""',
273
- *[f"{body_indent}{line}" for line in docstring.split("\n")],
274
- f'{body_indent}"""',
275
- ]
276
-
277
- if has_docstring:
278
- # Find the existing docstring in the source and replace it
279
- for i in range(func_def_line + 1, len(function_lines)):
280
- if '"""' in function_lines[i] or "'''" in function_lines[i]:
281
- docstring_start = i
282
- # Find end of docstring
283
- for j in range(docstring_start + 1, len(function_lines)):
284
- if '"""' in function_lines[j] or "'''" in function_lines[j]:
285
- docstring_end = j
286
- # Replace the existing docstring
287
- return "\n".join(
288
- function_lines[:docstring_start]
289
- + docstring_lines
290
- + function_lines[docstring_end + 1 :]
291
- )
376
+ # If a meaningful line was found at or after insertion point, use its indentation
377
+ if actual_first_body_line_idx != -1:
378
+ body_line = lines[actual_first_body_line_idx]
379
+ body_indent = body_line[:len(body_line) - len(body_line.lstrip())]
292
380
  else:
293
- # Insert new docstring after function definition
294
- return "\n".join(
295
- function_lines[: func_def_line + 1]
296
- + docstring_lines
297
- + function_lines[func_def_line + 1 :]
298
- )
381
+ if func_node.lineno - 1 < len(lines): # Ensure def line exists
382
+ def_line = lines[func_node.lineno - 1]
383
+ def_line_indent = def_line[:len(def_line) - len(def_line.lstrip())]
384
+ body_indent = def_line_indent + " " # Standard 4 spaces relative indent
385
+
386
+
387
+ # Format the new docstring lines with the calculated indentation
388
+ new_docstring_lines_formatted = [f'{body_indent}"""\n']
389
+ new_docstring_lines_formatted.extend([f"{body_indent}{line}\n" for line in docstring.splitlines()])
390
+ new_docstring_lines_formatted.append(f'{body_indent}"""\n')
391
+
392
+ output_lines = []
393
+ output_lines.extend(lines[:insert_idx])
394
+
395
+ # 2. Insert the new docstring
396
+ output_lines.extend(new_docstring_lines_formatted)
397
+ remaining_body_lines = lines[insert_idx:]
398
+
399
+ remaining_body_code = "".join(remaining_body_lines)
400
+
401
+ if remaining_body_code.strip(): # Only parse if there's non-whitespace content
402
+ try:
403
+ dummy_code = f"def _dummy_func():\n{textwrap.indent(remaining_body_code, body_indent)}"
404
+ dummy_tree = ast.parse(dummy_code)
405
+ dummy_body_statements = dummy_tree.body[0].body if dummy_tree.body and isinstance(dummy_tree.body[0], ast.FunctionDef | ast.AsyncFunctionDef) else []
406
+ cleaned_body_parts = []
407
+ for _node in dummy_body_statements:
408
+ break # Exit this loop, we'll process func_node.body instead
409
+ cleaned_body_parts = []
410
+ start_stmt_index = 1 if func_node.body and isinstance(func_node.body[0], ast.Expr) and isinstance(func_node.body[0].value, ast.Constant) and isinstance(func_node.body[0].value.value, str) else 0
411
+
412
+ for i in range(start_stmt_index, len(func_node.body)):
413
+ stmt_node = func_node.body[i]
414
+
415
+ is_just_string_stmt = isinstance(stmt_node, ast.Expr) and isinstance(stmt_node.value, ast.Constant) and isinstance(stmt_node.value.value, str)
416
+
417
+ if not is_just_string_stmt:
418
+ stmt_start_idx = stmt_node.lineno - 1
419
+ stmt_end_idx = stmt_node.end_lineno - 1 # Inclusive end line index
420
+
421
+ cleaned_body_parts.extend(lines[stmt_start_idx : stmt_end_idx + 1])
422
+
423
+ if func_node.body:
424
+ last_stmt_end_idx = func_node.body[-1].end_lineno - 1
425
+ for line in lines[last_stmt_end_idx + 1:]:
426
+ if line.strip():
427
+ cleaned_body_parts.append(line)
428
+ cleaned_body_lines = cleaned_body_parts
429
+
430
+ except SyntaxError as parse_e:
431
+ print(f"WARNING: Could not parse function body for cleaning, keeping all body lines: {parse_e}", file=sys.stderr)
432
+ traceback.print_exc(file=sys.stderr)
433
+ cleaned_body_lines = remaining_body_lines
434
+ except Exception as other_e:
435
+ print(f"WARNING: Unexpected error processing function body for cleaning, keeping all body lines: {other_e}", file=sys.stderr)
436
+ traceback.print_exc(file=sys.stderr)
437
+ cleaned_body_lines = remaining_body_lines
438
+ else:
439
+ cleaned_body_lines = []
440
+ output_lines.extend(lines[func_node.end_lineno:])
441
+
442
+ if func_node.body or not remaining_body_code.strip():
443
+ output_lines.extend(cleaned_body_lines)
299
444
 
300
- # Default return if insertion logic fails
445
+ final_code = "".join(output_lines)
446
+ ast.parse(final_code)
447
+ return final_code
448
+
449
+ except SyntaxError as e:
450
+ print(f"WARNING: Generated code snippet for '{func_name}' has syntax error: {e}", file=sys.stderr)
451
+ traceback.print_exc(file=sys.stderr)
301
452
  return function_code
302
453
  except Exception as e:
303
- print(f"Error inserting docstring: {e}")
454
+ print(f"Error processing function snippet for insertion: {e}", file=sys.stderr)
455
+ traceback.print_exc(file=sys.stderr)
456
+
304
457
  return function_code
305
458
 
306
459
 
307
- def process_file(file_path: str, model: str = "openai/gpt-4o") -> int:
460
+ def process_file(file_path: str, model: str = "perplexity/sonar-pro") -> int:
308
461
  """
309
462
  Process a Python file and add docstrings to all functions in it.
310
463
 
@@ -355,6 +508,7 @@ def process_file(file_path: str, model: str = "openai/gpt-4o") -> int:
355
508
  f.write(updated_content)
356
509
  print(f"Updated {count} functions in {file_path}")
357
510
  else:
511
+ print(updated_function, "formatted docstring",formatted_docstring)
358
512
  print(f"No changes made to {file_path}")
359
513
 
360
514
  return count
@@ -0,0 +1,156 @@
1
+ import re
2
+ from typing import Any
3
+
4
+
5
+ def parse_docstring(docstring: str | None) -> dict[str, Any]:
6
+ """
7
+ Parses a standard Python docstring into summary, args, returns, raises, and tags.
8
+
9
+ Args:
10
+ docstring: The docstring to parse.
11
+
12
+ Returns:
13
+ A dictionary with keys 'summary', 'args', 'returns', 'raises', 'tags'.
14
+ 'args' is a dict mapping arg names to descriptions.
15
+ 'raises' is a dict mapping exception type names to descriptions.
16
+ 'tags' is a list of strings extracted from the 'Tags:' section, comma-separated.
17
+ """
18
+ if not docstring:
19
+ return {"summary": "", "args": {}, "returns": "", "raises": {}, "tags": []}
20
+
21
+ lines = docstring.strip().splitlines()
22
+ if not lines:
23
+ return {"summary": "", "args": {}, "returns": "", "raises": {}, "tags": []}
24
+
25
+ summary = lines[0].strip()
26
+ args = {}
27
+ returns = ""
28
+ raises = {}
29
+ tags: list[str] = [] # Final list of parsed tags
30
+ current_section = None
31
+ current_key = None
32
+ current_desc_lines = [] # Accumulator for multi-line descriptions/tag content
33
+ key_pattern = re.compile(r"^\s*([\w\.]+)\s*(?:\(.*\))?:\s*(.*)")
34
+
35
+ def finalize_current_item():
36
+ """Helper function to finalize the currently parsed item."""
37
+ nonlocal returns, tags # Allow modification of outer scope variables
38
+ desc = " ".join(current_desc_lines).strip()
39
+ if current_section == "args" and current_key:
40
+ args[current_key] = desc
41
+ elif current_section == "raises" and current_key:
42
+ raises[current_key] = desc
43
+ elif current_section == "returns":
44
+ returns = desc
45
+ # SIM102 applied: Combine nested if
46
+ elif current_section == "tags" and desc: # Only process if there's content
47
+ tags = [tag.strip() for tag in desc.split(',') if tag.strip()]
48
+
49
+ # B007 applied: Rename unused loop variable i to _
50
+ for _, line in enumerate(lines[1:]):
51
+ stripped_line = line.strip()
52
+ original_indentation = len(line) - len(line.lstrip(' '))
53
+
54
+ section_line = stripped_line.lower()
55
+ is_new_section_header = False
56
+ new_section_type = None
57
+ header_content = ""
58
+
59
+ if section_line in ("args:", "arguments:", "parameters:"):
60
+ new_section_type = "args"
61
+ is_new_section_header = True
62
+ elif section_line in ("returns:", "yields:"):
63
+ new_section_type = "returns"
64
+ is_new_section_header = True
65
+ elif section_line.startswith(("raises ", "raises:", "errors:", "exceptions:")):
66
+ new_section_type = "raises"
67
+ is_new_section_header = True
68
+ elif section_line.startswith(("tags:", "tags")): # Match "Tags:" or "Tags" potentially followed by content
69
+ new_section_type = "tags"
70
+ is_new_section_header = True
71
+ if ":" in stripped_line:
72
+ header_content = stripped_line.split(":", 1)[1].strip()
73
+ elif section_line.endswith(":") and section_line[:-1] in ("attributes", "see also", "example", "examples", "notes"):
74
+ new_section_type = "other"
75
+ is_new_section_header = True
76
+
77
+ finalize_previous = False
78
+ if is_new_section_header:
79
+ finalize_previous = True
80
+ elif current_section in ["args", "raises"] and current_key:
81
+ if key_pattern.match(line) or (original_indentation == 0 and stripped_line):
82
+ finalize_previous = True
83
+ elif current_section in ["returns", "tags"] and current_desc_lines:
84
+ if original_indentation == 0 and stripped_line:
85
+ finalize_previous = True
86
+ # SIM102 applied: Combine nested if/elif
87
+ elif (not stripped_line and current_desc_lines and current_section in ["args", "raises", "returns", "tags"]
88
+ and (current_section not in ["args", "raises"] or current_key)):
89
+ finalize_previous = True
90
+
91
+ if finalize_previous:
92
+ finalize_current_item()
93
+ current_key = None
94
+ current_desc_lines = []
95
+ if not is_new_section_header or new_section_type == "other":
96
+ current_section = None
97
+
98
+ if is_new_section_header and new_section_type != "other":
99
+ current_section = new_section_type
100
+ # If Tags header had content, start accumulating it
101
+ if new_section_type == "tags" and header_content:
102
+ current_desc_lines.append(header_content)
103
+ # Don't process the header line itself further
104
+ continue
105
+
106
+ if not stripped_line:
107
+ continue
108
+
109
+ if current_section == "args" or current_section == "raises":
110
+ match = key_pattern.match(line)
111
+ if match:
112
+ current_key = match.group(1)
113
+ current_desc_lines = [match.group(2).strip()] # Start new description
114
+ elif current_key and original_indentation > 0: # Check for indentation for continuation
115
+ current_desc_lines.append(stripped_line)
116
+
117
+ elif current_section == "returns":
118
+ if not current_desc_lines or original_indentation > 0:
119
+ current_desc_lines.append(stripped_line)
120
+
121
+ elif current_section == "tags":
122
+ if original_indentation > 0 or not current_desc_lines: # Indented or first line
123
+ current_desc_lines.append(stripped_line)
124
+
125
+ finalize_current_item()
126
+ return {"summary": summary, "args": args, "returns": returns, "raises": raises, "tags": tags}
127
+
128
+
129
+ docstring_example = """
130
+ Starts a crawl job for a given URL using Firecrawl. Returns the job ID immediately.
131
+
132
+ Args:
133
+ url: The starting URL for the crawl.
134
+ It can be a very long url that spans multiple lines if needed.
135
+ params: Optional dictionary of parameters to customize the crawl.
136
+ See API docs for details.
137
+ idempotency_key: Optional unique key to prevent duplicate jobs.
138
+
139
+ Returns:
140
+ A dictionary containing the job initiation response on success,
141
+ or a string containing an error message on failure. This description
142
+ can also span multiple lines.
143
+
144
+ Raises:
145
+ ValueError: If the URL is invalid.
146
+ requests.exceptions.ConnectionError: If connection fails.
147
+
148
+ Tags:
149
+ crawl, async_job, start, api, long_tag_example , another
150
+ , final_tag
151
+ """
152
+
153
+ if __name__ == "__main__":
154
+ parsed = parse_docstring(docstring_example)
155
+ import json
156
+ print(json.dumps(parsed, indent=4))