universal-mcp 0.1.15rc5__py3-none-any.whl → 0.1.16__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. universal_mcp/analytics.py +7 -1
  2. universal_mcp/applications/README.md +122 -0
  3. universal_mcp/applications/__init__.py +51 -56
  4. universal_mcp/applications/application.py +255 -82
  5. universal_mcp/cli.py +27 -43
  6. universal_mcp/config.py +16 -48
  7. universal_mcp/exceptions.py +8 -0
  8. universal_mcp/integrations/__init__.py +1 -3
  9. universal_mcp/integrations/integration.py +18 -2
  10. universal_mcp/logger.py +31 -29
  11. universal_mcp/servers/server.py +6 -18
  12. universal_mcp/stores/store.py +2 -12
  13. universal_mcp/tools/__init__.py +12 -1
  14. universal_mcp/tools/adapters.py +11 -0
  15. universal_mcp/tools/func_metadata.py +11 -15
  16. universal_mcp/tools/manager.py +163 -117
  17. universal_mcp/tools/tools.py +6 -13
  18. universal_mcp/utils/agentr.py +2 -6
  19. universal_mcp/utils/common.py +33 -0
  20. universal_mcp/utils/docstring_parser.py +4 -13
  21. universal_mcp/utils/installation.py +67 -184
  22. universal_mcp/utils/openapi/__inti__.py +0 -0
  23. universal_mcp/utils/{api_generator.py → openapi/api_generator.py} +2 -4
  24. universal_mcp/utils/{docgen.py → openapi/docgen.py} +17 -54
  25. universal_mcp/utils/openapi/openapi.py +882 -0
  26. universal_mcp/utils/openapi/preprocessor.py +1093 -0
  27. universal_mcp/utils/{readme.py → openapi/readme.py} +21 -37
  28. universal_mcp-0.1.16.dist-info/METADATA +282 -0
  29. universal_mcp-0.1.16.dist-info/RECORD +44 -0
  30. universal_mcp-0.1.16.dist-info/licenses/LICENSE +21 -0
  31. universal_mcp/utils/openapi.py +0 -646
  32. universal_mcp-0.1.15rc5.dist-info/METADATA +0 -245
  33. universal_mcp-0.1.15rc5.dist-info/RECORD +0 -39
  34. /universal_mcp/{templates → utils/templates}/README.md.j2 +0 -0
  35. /universal_mcp/{templates → utils/templates}/api_client.py.j2 +0 -0
  36. {universal_mcp-0.1.15rc5.dist-info → universal_mcp-0.1.16.dist-info}/WHEEL +0 -0
  37. {universal_mcp-0.1.15rc5.dist-info → universal_mcp-0.1.16.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,1093 @@
1
+ import json
2
+ import logging
3
+ import os
4
+ import re
5
+ import sys
6
+ import time
7
+ import traceback
8
+ from pathlib import Path
9
+
10
+ import litellm
11
+ import typer
12
+ import yaml
13
+ from rich.console import Console
14
+
15
+ console = Console()
16
+
17
+
18
+ COLORS = {
19
+ "YELLOW": "\033[93m",
20
+ "RED": "\033[91m",
21
+ "ENDC": "\033[0m",
22
+ "BLUE": "\033[94m",
23
+ "GREEN": "\033[92m",
24
+ "CYAN": "\033[96m",
25
+ }
26
+
27
+
28
+ class ColoredFormatter(logging.Formatter):
29
+ FORMAT = "%(levelname)s:%(message)s"
30
+
31
+ LOG_LEVEL_COLORS = {
32
+ logging.DEBUG: COLORS["CYAN"],
33
+ logging.INFO: COLORS["GREEN"],
34
+ logging.WARNING: COLORS["YELLOW"],
35
+ logging.ERROR: COLORS["RED"],
36
+ logging.CRITICAL: COLORS["RED"],
37
+ }
38
+
39
+ def format(self, record):
40
+ log_format = self.FORMAT
41
+
42
+ color_prefix = self.LOG_LEVEL_COLORS.get(record.levelno)
43
+
44
+ if color_prefix:
45
+ log_format = color_prefix + log_format + COLORS["ENDC"]
46
+
47
+ # Add filename and line number for debug
48
+ if record.levelno == logging.DEBUG:
49
+ log_format = f"%(filename)s:%(lineno)d - {log_format}"
50
+
51
+ formatter = logging.Formatter(log_format)
52
+
53
+ return formatter.format(record)
54
+
55
+
56
+ logger = logging.getLogger()
57
+ if logger.handlers:
58
+ for handler in logger.handlers:
59
+ logger.removeHandler(handler)
60
+
61
+ logger.setLevel(logging.INFO) # Default level, can be changed by set_logging_level
62
+
63
+ console_handler = logging.StreamHandler(sys.stdout)
64
+ colored_formatter = ColoredFormatter()
65
+ console_handler.setFormatter(colored_formatter)
66
+ logger.addHandler(console_handler)
67
+
68
+
69
+ def set_logging_level(level: str):
70
+ level_map = {
71
+ "DEBUG": logging.DEBUG,
72
+ "INFO": logging.INFO,
73
+ "WARNING": logging.WARNING,
74
+ "ERROR": logging.ERROR,
75
+ "CRITICAL": logging.CRITICAL,
76
+ }
77
+ log_level = level_map.get(level.upper(), logging.INFO)
78
+ logger.setLevel(log_level)
79
+ logger.info(f"Logging level set to {logging.getLevelName(log_level)}")
80
+
81
+
82
+ MAX_DESCRIPTION_LENGTH = 200
83
+
84
+
85
+ def is_fallback_text(text: str | None) -> bool:
86
+ """Checks if the text looks like a generated fallback message."""
87
+ if not isinstance(text, str) or not text.strip():
88
+ return False
89
+ # Check for the specific pattern used for LLM generation failures
90
+ return text.strip().startswith("[LLM could not generate")
91
+
92
+
93
+ def read_schema_file(schema_path: str) -> dict:
94
+ # Keep this function as is
95
+ logger.info(f"Attempting to read schema file: {schema_path}")
96
+ if not os.path.exists(schema_path):
97
+ logger.critical(f"Schema file not found at: {schema_path}") # Use critical for pre-processing essential step
98
+ raise FileNotFoundError(f"Schema file not found at: {schema_path}")
99
+
100
+ try:
101
+ with open(schema_path, encoding="utf-8") as f:
102
+ _, file_extension = os.path.splitext(schema_path)
103
+ file_extension = file_extension.lower()
104
+
105
+ if file_extension in [".yaml", ".yml"]:
106
+ logger.info(f"Reading as YAML: {schema_path}")
107
+ return yaml.safe_load(f)
108
+ elif file_extension == ".json":
109
+ logger.info(f"Reading as JSON: {schema_path}")
110
+ return json.load(f)
111
+ else:
112
+ # Attempt YAML as a fallback for unknown extensions
113
+ logger.warning(
114
+ f"Unknown file extension '{file_extension}' for {schema_path}. Attempting to read as YAML."
115
+ )
116
+ try:
117
+ return yaml.safe_load(f)
118
+ except (
119
+ yaml.YAMLError,
120
+ json.JSONDecodeError,
121
+ ): # If YAML fails, try JSON
122
+ f.seek(0) # Reset file pointer
123
+ logger.warning("YAML load failed, attempting JSON.")
124
+ return json.load(f)
125
+
126
+ except (yaml.YAMLError, json.JSONDecodeError) as e:
127
+ logger.critical(f"Error parsing schema file {schema_path}: {e}")
128
+ raise
129
+ except OSError as e:
130
+ logger.critical(f"Error reading schema file {schema_path}: {e}")
131
+ raise
132
+ except Exception as e:
133
+ logger.critical(f"An unexpected error occurred while reading {schema_path}: {e}")
134
+ traceback.print_exc(file=sys.stderr)
135
+ raise
136
+
137
+
138
+ def write_schema_file(schema_data: dict, output_path: str):
139
+ # Keep this function as is
140
+ logger.info(f"Attempting to write processed schema to: {output_path}")
141
+ try:
142
+ output_dir = os.path.dirname(output_path)
143
+ if output_dir and not os.path.exists(output_dir):
144
+ os.makedirs(output_dir)
145
+ logger.info(f"Created output directory: {output_dir}")
146
+
147
+ with open(output_path, "w", encoding="utf-8") as f:
148
+ _, file_extension = os.path.splitext(output_path)
149
+ file_extension = file_extension.lower()
150
+
151
+ if file_extension == ".json":
152
+ json.dump(schema_data, f, indent=2, ensure_ascii=False)
153
+ logger.info(f"Successfully wrote processed schema as JSON to {output_path}")
154
+ elif file_extension in [".yaml", ".yml"]:
155
+ yaml.dump(
156
+ schema_data,
157
+ f,
158
+ default_flow_style=False,
159
+ allow_unicode=True,
160
+ sort_keys=False,
161
+ )
162
+ logger.info(f"Successfully wrote processed schema as YAML to {output_path}")
163
+ else:
164
+ logger.error(f"Unsupported output file extension '{file_extension}' for writing.")
165
+ raise ValueError(f"Unsupported output file extension '{file_extension}'. Use .json or .yaml/.yml.")
166
+
167
+ except OSError as e:
168
+ logger.critical(f"Error writing schema file {output_path}: {e}")
169
+ raise
170
+ except Exception as e:
171
+ logger.critical(f"An unexpected error occurred while writing {output_path}: {e}")
172
+ traceback.print_exc(file=sys.stderr)
173
+ raise
174
+
175
+
176
+ def generate_description_llm(
177
+ description_type: str,
178
+ model: str,
179
+ context: dict = None,
180
+ max_retries: int = 3,
181
+ retry_delay: int = 5,
182
+ ) -> str:
183
+ if context is None:
184
+ context = {}
185
+
186
+ system_prompt = """You are a helpful AI assistant specialized in writing concise summaries for API operations, clear, brief descriptions for API parameters, and overview descriptions for the entire API.
187
+ Respond ONLY with the generated text, without any conversational filler or formatting like bullet points unless the description itself requires it. Ensure the response is a single string suitable for a description field."""
188
+
189
+ user_prompt = ""
190
+ # Make fallback text consistent
191
+ fallback_text = f"[LLM could not generate {description_type}]"
192
+
193
+ if description_type == "summary":
194
+ path_key = context.get("path_key", "unknown path")
195
+ method = context.get("method", "unknown method")
196
+ operation_context_str = json.dumps(
197
+ context.get("operation_value", {}),
198
+ indent=None,
199
+ separators=(",", ":"),
200
+ sort_keys=True,
201
+ )
202
+ if len(operation_context_str) > 1500: # Limit context size
203
+ operation_context_str = operation_context_str[:1500] + "..."
204
+
205
+ user_prompt = f"""Generate a concise one-sentence summary for the API operation defined at path "{path_key}" using the "{method.upper()}" method.
206
+ Example:
207
+ - Stars a GitHub repository using the GitHub API and returns a status message.
208
+ - Retrieves and formats a list of recent commits from a GitHub repository
209
+
210
+ Context (operation details): {operation_context_str}
211
+ Respond ONLY with the summary text."""
212
+ fallback_text = f"[LLM could not generate summary for {method.upper()} {path_key}]" # More specific fallback
213
+
214
+ elif description_type == "parameter":
215
+ path_key = context.get("path_key", "unknown path")
216
+ method = context.get("method", "unknown method")
217
+ param_name = context.get("param_name", "unknown parameter")
218
+ param_in = context.get("param_in", "unknown location")
219
+ param_context_str = json.dumps(
220
+ context.get("parameter_details", {}),
221
+ indent=None,
222
+ separators=(",", ":"),
223
+ sort_keys=True,
224
+ )
225
+ if len(param_context_str) > 1000: # Limit context size
226
+ param_context_str = param_context_str[:1000] + "..."
227
+
228
+ user_prompt = f"""Generate a clear, brief description for the API parameter named "{param_name}" located "{param_in}" for the "{method.upper()}" operation at path "{path_key}".
229
+ Context (parameter details): {param_context_str}
230
+ Respond ONLY with the *SINGLE LINE* description text."""
231
+ fallback_text = f"[LLM could not generate description for parameter {param_name} in {method.upper()} {path_key}]" # More specific fallback
232
+
233
+ elif description_type == "api_description":
234
+ api_title = context.get("title", "Untitled API")
235
+ user_prompt = f"""Generate a brief overview description for an API titled "{api_title}" based on an OpenAPI schema.
236
+ Respond ONLY with the description text."""
237
+ fallback_text = f"[LLM could not generate description for API '{api_title}']" # More specific fallback
238
+
239
+ else:
240
+ logger.error(f"Invalid description_type '{description_type}' passed to generate_description_llm.")
241
+ return "[Invalid description type specified]"
242
+
243
+ if not user_prompt:
244
+ logger.error(f"User prompt was not generated for description_type '{description_type}'.")
245
+ return fallback_text
246
+
247
+ messages = [
248
+ {"role": "system", "content": system_prompt},
249
+ {"role": "user", "content": user_prompt},
250
+ ]
251
+
252
+ # Temporarily set debug level for prompt/response logging
253
+ original_level = logger.level
254
+ logger.setLevel(logging.DEBUG)
255
+
256
+ # logger.debug(
257
+ # f"\n{COLORS['BLUE']}--- LLM Input Prompt ({description_type}) ---{COLORS['ENDC']}"
258
+ # )
259
+ # logger.debug(f"System: {system_prompt}")
260
+ # logger.debug(f"User: {user_prompt}")
261
+ # logger.debug(
262
+ # f"{COLORS['BLUE']}------------------------------------------{COLORS['ENDC']}\n"
263
+ # )
264
+
265
+ response_text = fallback_text # Default in case all retries fail
266
+
267
+ for attempt in range(max_retries):
268
+ try:
269
+ response = litellm.completion(
270
+ model=model,
271
+ messages=messages,
272
+ temperature=0.7,
273
+ max_tokens=150, # Keep tokens low for concise output
274
+ timeout=60,
275
+ )
276
+
277
+ # logger.debug(
278
+ # f"\n{COLORS['YELLOW']}--- LLM Raw Response ({description_type}, Attempt {attempt+1}) ---{COLORS['ENDC']}"
279
+ # )
280
+ try:
281
+ # Use model_dump() for Pydantic v2, dict() for v1
282
+ response.model_dump()
283
+ except AttributeError:
284
+ response.dict()
285
+ # logger.debug(json.dumps(response_dict, indent=2))
286
+ # logger.debug(
287
+ # f"{COLORS['YELLOW']}--------------------------------------------{COLORS['ENDC']}\n"
288
+ # )
289
+
290
+ if response and response.choices and response.choices[0] and response.choices[0].message:
291
+ response_text = response.choices[0].message.content.strip()
292
+
293
+ # Remove potential quotes around the response
294
+ if response_text.startswith('"') and response_text.endswith('"'):
295
+ response_text = response_text[1:-1].strip()
296
+ if response_text.startswith("'") and response_text.endswith("'"):
297
+ response_text = response_text[1:-1].strip()
298
+
299
+ response_text = response_text.strip()
300
+
301
+ # Check if the LLM returned the fallback text literally
302
+ if response_text == fallback_text:
303
+ logger.warning(
304
+ f"LLM returned the fallback text literally for type '{description_type}'. Treating as failure. Attempt {attempt + 1}/{max_retries}."
305
+ )
306
+ if attempt < max_retries - 1:
307
+ time.sleep(retry_delay)
308
+ continue # Retry
309
+
310
+ # Check if the response is empty or too short after stripping
311
+ if not response_text:
312
+ logger.warning(
313
+ f"LLM response is empty after stripping for type '{description_type}'. Attempt {attempt + 1}/{max_retries}."
314
+ )
315
+ if attempt < max_retries - 1:
316
+ time.sleep(retry_delay)
317
+ continue # Retry
318
+
319
+ # Successful generation
320
+ # logger.debug(f"Generated response: {response_text}")
321
+ return response_text
322
+
323
+ else:
324
+ logger.warning(
325
+ f"LLM response was empty or unexpected structure for type '{description_type}'. Attempt {attempt + 1}/{max_retries}."
326
+ )
327
+ if attempt < max_retries - 1:
328
+ time.sleep(retry_delay)
329
+ continue # Retry
330
+
331
+ except Exception as e:
332
+ logger.error(
333
+ f"Error generating description using LLM for type '{description_type}' (Attempt {attempt + 1}/{max_retries}): {e}"
334
+ )
335
+ traceback.print_exc(file=sys.stderr) # Print traceback for debugging
336
+ if attempt < max_retries - 1:
337
+ logger.info(f"Retrying in {retry_delay} seconds...")
338
+ time.sleep(retry_delay)
339
+ else:
340
+ logger.error(f"Max retries ({max_retries}) reached for type '{description_type}'.")
341
+ break # Exit retry loop
342
+
343
+ # Restore original logging level
344
+ logger.setLevel(original_level)
345
+ logger.warning(f"Returning fallback text for type '{description_type}'.")
346
+ return fallback_text # Return fallback if all retries fail
347
+
348
+
349
+ def simplify_operation_context(operation_value: dict) -> dict:
350
+ # Keep this function as is
351
+ simplified_context = {}
352
+
353
+ original_params = operation_value.get("parameters")
354
+ if isinstance(original_params, list):
355
+ simplified_params_list = []
356
+ for param in original_params:
357
+ if isinstance(param, dict):
358
+ if "$ref" in param:
359
+ simplified_params_list.append({"$ref": param["$ref"]})
360
+ else:
361
+ simplified_param = {}
362
+ if "name" in param:
363
+ simplified_param["name"] = param["name"]
364
+ if "in" in param:
365
+ simplified_param["in"] = param["in"]
366
+ # Optionally add type/required for better context, but keep it small
367
+ if "schema" in param and isinstance(param["schema"], dict) and "type" in param["schema"]:
368
+ simplified_param["type"] = param["schema"]["type"]
369
+ if "required" in param:
370
+ simplified_param["required"] = param["required"]
371
+
372
+ if simplified_param:
373
+ simplified_params_list.append(simplified_param)
374
+ if simplified_params_list:
375
+ simplified_context["parameters"] = simplified_params_list
376
+
377
+ original_responses = operation_value.get("responses")
378
+ if isinstance(original_responses, dict):
379
+ # Only include keys (status codes) to keep context size down
380
+ response_status_codes = list(original_responses.keys())
381
+ if response_status_codes:
382
+ simplified_responses_dict = {code: {} for code in response_status_codes}
383
+ simplified_context["responses"] = simplified_responses_dict
384
+
385
+ # Include requestBody if present (simplified)
386
+ original_request_body = operation_value.get("requestBody")
387
+ if isinstance(original_request_body, dict):
388
+ simplified_request_body = {}
389
+ if "required" in original_request_body:
390
+ simplified_request_body["required"] = original_request_body["required"]
391
+ if "content" in original_request_body and isinstance(original_request_body["content"], dict):
392
+ simplified_request_body["content_types"] = list(original_request_body["content"].keys())
393
+ if simplified_request_body:
394
+ simplified_context["requestBody"] = simplified_request_body
395
+
396
+ # Include security if present (simplified)
397
+ original_security = operation_value.get("security")
398
+ if isinstance(original_security, list) and original_security:
399
+ simplified_context["security"] = original_security # List of security requirement objects (usually small)
400
+
401
+ return simplified_context
402
+
403
+
404
+ def simplify_parameter_context(parameter: dict) -> dict:
405
+ # Keep this function as is, adding type/required like in operation context simplification
406
+ simplified_context = {}
407
+ if "name" in parameter:
408
+ simplified_context["name"] = parameter["name"]
409
+ if "in" in parameter:
410
+ simplified_context["in"] = parameter["in"]
411
+ if "required" in parameter:
412
+ simplified_context["required"] = parameter["required"]
413
+ if "schema" in parameter and isinstance(parameter["schema"], dict):
414
+ if "type" in parameter["schema"]:
415
+ simplified_context["type"] = parameter["schema"]["type"]
416
+ # Optionally add enum, default?
417
+ if "enum" in parameter["schema"]:
418
+ simplified_context["enum"] = parameter["schema"]["enum"]
419
+ if "default" in parameter["schema"]:
420
+ simplified_context["default"] = parameter["schema"]["default"]
421
+
422
+ return simplified_context
423
+
424
+
425
+ def scan_schema_for_status(schema_data: dict):
426
+ """
427
+ Scans the schema to report the status of descriptions/summaries
428
+ and identify critical issues like missing parameter 'name'/'in'.
429
+ Does NOT modify the schema or call the LLM.
430
+ """
431
+ logger.info("\n--- Scanning Schema for Status ---")
432
+
433
+ scan_report = {
434
+ "info_description": {"present": 0, "missing": 0, "fallback": 0},
435
+ "operation_summary": {"present": 0, "missing": 0, "fallback": 0},
436
+ "parameter_description": {"present": 0, "missing": 0, "fallback": 0},
437
+ "parameters_missing_name": [],
438
+ "parameters_missing_in": [],
439
+ "critical_errors": [], # For essential validation issues like missing info/title
440
+ }
441
+
442
+ # --- Check Info Section ---
443
+ info = schema_data.get("info")
444
+ info_location = "info"
445
+
446
+ if not isinstance(info, dict):
447
+ error_msg = f"Critical: Required '{info_location}' object is missing or not a dictionary."
448
+ logger.critical(error_msg)
449
+ scan_report["critical_errors"].append(error_msg)
450
+ # Cannot proceed meaningfully without info block
451
+ return scan_report
452
+
453
+ info_title = info.get("title")
454
+ if not isinstance(info_title, str) or not info_title.strip():
455
+ error_msg = f"Critical: Required field '{info_location}.title' is missing or empty."
456
+ logger.critical(error_msg)
457
+ scan_report["critical_errors"].append(error_msg)
458
+ # Cannot proceed meaningfully without title
459
+ return scan_report
460
+
461
+ info_description = info.get("description")
462
+ if isinstance(info_description, str) and info_description.strip():
463
+ if is_fallback_text(info_description):
464
+ scan_report["info_description"]["fallback"] += 1
465
+ else:
466
+ scan_report["info_description"]["present"] += 1
467
+ else:
468
+ scan_report["info_description"]["missing"] += 1
469
+
470
+ # --- Check Paths ---
471
+ paths = schema_data.get("paths")
472
+ if not isinstance(paths, dict):
473
+ if paths is not None: # Allow None if schema is empty, but warn if it's wrong type
474
+ logger.warning("'paths' field is not a dictionary. Skipping path scanning.")
475
+ else:
476
+ logger.info("'paths' field is missing or null. No operations to scan.")
477
+ return scan_report # No paths to scan
478
+
479
+ for path_key, path_value in paths.items():
480
+ if path_key.lower().startswith("x-"):
481
+ logger.debug(f"Skipping scanning of path extension '{path_key}'.")
482
+ continue
483
+
484
+ if not isinstance(path_value, dict):
485
+ logger.warning(f"Path value for '{path_key}' is not a dictionary. Skipping scanning for this path.")
486
+ continue
487
+
488
+ for method, operation_value in path_value.items():
489
+ if method.lower() in [
490
+ "get",
491
+ "put",
492
+ "post",
493
+ "delete",
494
+ "options",
495
+ "head",
496
+ "patch",
497
+ "trace",
498
+ ]:
499
+ operation_location_base = f"paths.{path_key}.{method.lower()}"
500
+ if not isinstance(operation_value, dict):
501
+ logger.warning(f"Operation value for '{operation_location_base}' is not a dictionary. Skipping.")
502
+ continue
503
+
504
+ # Check Operation Summary
505
+ operation_summary = operation_value.get("summary")
506
+ if isinstance(operation_summary, str) and operation_summary.strip():
507
+ if is_fallback_text(operation_summary):
508
+ scan_report["operation_summary"]["fallback"] += 1
509
+ else:
510
+ scan_report["operation_summary"]["present"] += 1
511
+ else:
512
+ scan_report["operation_summary"]["missing"] += 1
513
+
514
+ # Check Parameters
515
+ parameters = operation_value.get("parameters")
516
+ if isinstance(parameters, list):
517
+ for i, parameter in enumerate(parameters):
518
+ if not isinstance(parameter, dict):
519
+ logger.warning(
520
+ f"Parameter at index {i} in {operation_location_base}.parameters is not a dictionary. Skipping."
521
+ )
522
+ continue
523
+
524
+ if "$ref" in parameter:
525
+ logger.debug(
526
+ f"Parameter at index {i} in {operation_location_base}.parameters is a reference. Skipping detailed scan."
527
+ )
528
+ continue
529
+
530
+ param_name = parameter.get("name")
531
+ param_in = parameter.get("in")
532
+ param_location_id = (
533
+ param_name if isinstance(param_name, str) and param_name.strip() else f"index {i}"
534
+ )
535
+ param_location_base = f"{operation_location_base}.parameters[{param_location_id}]"
536
+
537
+ # Check Parameter 'name' and 'in'
538
+ if not isinstance(param_name, str) or not param_name.strip():
539
+ error_msg = f"Missing/empty 'name' field for parameter at {param_location_base}. Cannot generate description."
540
+ logger.warning(error_msg) # Use warning as it might be fixable manually
541
+ scan_report["parameters_missing_name"].append(param_location_base)
542
+
543
+ if not isinstance(param_in, str) or not param_in.strip():
544
+ error_msg = f"Missing/empty 'in' field for parameter '{param_name}' at {param_location_base}. Cannot generate description."
545
+ logger.warning(error_msg) # Use warning
546
+ scan_report["parameters_missing_in"].append(param_location_base)
547
+
548
+ # Check Parameter Description (only if name/in are present for meaningful description)
549
+ if (
550
+ isinstance(param_name, str)
551
+ and param_name.strip()
552
+ and isinstance(param_in, str)
553
+ and param_in.strip()
554
+ ):
555
+ param_description = parameter.get("description")
556
+ if isinstance(param_description, str) and param_description.strip():
557
+ if is_fallback_text(param_description):
558
+ scan_report["parameter_description"]["fallback"] += 1
559
+ else:
560
+ scan_report["parameter_description"]["present"] += 1
561
+ else:
562
+ scan_report["parameter_description"]["missing"] += 1
563
+ else:
564
+ logger.debug(
565
+ f"Skipping description scan for parameter at {param_location_base} due to missing name/in."
566
+ )
567
+
568
+ elif parameters is not None:
569
+ logger.warning(
570
+ f"'parameters' field for operation '{operation_location_base}' is not a list. Skipping parameter scanning."
571
+ )
572
+
573
+ elif method.lower().startswith("x-"):
574
+ logger.debug(f"Skipping scanning of method extension '{method.lower()}' in path '{path_key}'.")
575
+ continue
576
+ elif method.lower() == "parameters": # Path level parameters
577
+ logger.debug(f"Skipping scanning of path-level parameters in '{path_key}'.")
578
+ continue
579
+ elif operation_value is not None:
580
+ logger.warning(f"Unknown method '{method}' found in path '{path_key}'. Skipping scanning.")
581
+ elif operation_value is None:
582
+ logger.debug(f"Operation value for method '{method}' in path '{path_key}' is null. Skipping scanning.")
583
+
584
+ logger.info("--- Scan Complete ---")
585
+ return scan_report
586
+
587
+
588
+ def report_scan_results(scan_report: dict):
589
+ """Prints a formatted summary of the scan results."""
590
+ console = logging.getLogger().handlers[0].console if hasattr(logging.getLogger().handlers[0], "console") else None
591
+ if console is None: # Fallback if rich console isn't attached to logger
592
+ from rich.console import Console
593
+
594
+ console = Console()
595
+
596
+ console.print("\n[bold blue]--- Schema Scan Summary ---[/bold blue]")
597
+
598
+ if scan_report.get("critical_errors"):
599
+ console.print("[bold red]CRITICAL ERRORS FOUND:[/bold red]")
600
+ for error in scan_report["critical_errors"]:
601
+ console.print(f" [red]❌[/red] {error}")
602
+ console.print("[bold red]Critical errors prevent automatic generation. Please fix these manually.[/bold red]")
603
+ return # Stop here if critical errors exist
604
+
605
+ console.print("[bold yellow]Description/Summary Status:[/bold yellow]")
606
+ info_desc = scan_report["info_description"]
607
+ op_summ = scan_report["operation_summary"]
608
+ param_desc = scan_report["parameter_description"]
609
+
610
+ console.print(" API Description (info.description):")
611
+ console.print(f" [green]✅ Present[/green]: {info_desc['present']}")
612
+ console.print(f" [orange1]❓ Missing[/orange1]: {info_desc['missing']}")
613
+ console.print(f" [yellow]⚠️ Fallback[/yellow]: {info_desc['fallback']}")
614
+
615
+ console.print(" Operation Summaries (paths.*.summary):")
616
+ console.print(f" [green]✅ Present[/green]: {op_summ['present']}")
617
+ console.print(f" [orange1]❓ Missing[/orange1]: {op_summ['missing']}")
618
+ console.print(f" [yellow]⚠️ Fallback[/yellow]: {op_summ['fallback']}")
619
+
620
+ console.print(" Parameter Descriptions (paths.*.*.parameters.description):")
621
+ console.print(f" [green]✅ Present[/green]: {param_desc['present']}")
622
+ console.print(f" [orange1]❓ Missing[/orange1]: {param_desc['missing']}")
623
+ console.print(f" [yellow]⚠️ Fallback[/yellow]: {param_desc['fallback']}")
624
+
625
+ missing_name = scan_report.get("parameters_missing_name", [])
626
+ missing_in = scan_report.get("parameters_missing_in", [])
627
+
628
+ if missing_name or missing_in:
629
+ console.print("\n[bold red]Parameter Issues Preventing LLM Generation:[/bold red]")
630
+ console.print(
631
+ "[yellow]Parameters below cannot have descriptions generated by LLM until 'name' and 'in' fields are fixed manually.[/yellow]"
632
+ )
633
+ if missing_name:
634
+ console.print(" [bold red]Missing 'name' field:[/bold red]")
635
+ for path in missing_name:
636
+ console.print(f" [red]❌[/red] {path}")
637
+ if missing_in:
638
+ console.print(" [bold red]Missing 'in' field:[/bold red]")
639
+ for path in missing_in:
640
+ console.print(f" [red]❌[/red] {path}")
641
+
642
+ total_missing_or_fallback = (
643
+ info_desc["missing"]
644
+ + info_desc["fallback"]
645
+ + op_summ["missing"]
646
+ + op_summ["fallback"]
647
+ + param_desc["missing"]
648
+ + param_desc["fallback"]
649
+ )
650
+
651
+ if total_missing_or_fallback > 0:
652
+ console.print(
653
+ f"\n[bold]Total items missing or needing enhancement:[/bold] [orange1]{total_missing_or_fallback}[/orange1]"
654
+ )
655
+ else:
656
+ console.print("\n[bold green]Scan found no missing or fallback descriptions/summaries.[/bold green]")
657
+
658
+ console.print("[bold blue]-------------------------[/bold blue]")
659
+
660
+
661
+ def process_parameter(
662
+ parameter: dict,
663
+ operation_location_base: str,
664
+ path_key: str,
665
+ method: str,
666
+ llm_model: str,
667
+ enhance_all: bool, # New flag
668
+ ):
669
+ if not isinstance(parameter, dict):
670
+ logger.warning(f"Invalid parameter object found in {operation_location_base}. Expected dictionary.")
671
+ return
672
+
673
+ if "$ref" in parameter:
674
+ ref_path = parameter["$ref"]
675
+ logger.debug(
676
+ f"Parameter in {operation_location_base} is a reference ('{ref_path}'). Skipping description generation."
677
+ )
678
+ return
679
+
680
+ param_name = parameter.get("name")
681
+ param_in = parameter.get("in")
682
+
683
+ param_location_id = "unknown_param"
684
+ if isinstance(param_name, str) and param_name.strip():
685
+ param_location_id = param_name.strip()
686
+ if isinstance(param_in, str) and param_in.strip():
687
+ param_location_id = f"{param_in.strip()}:{param_name.strip()}"
688
+ elif isinstance(param_in, str) and param_in.strip():
689
+ param_location_id = f"{param_in.strip()}:[name missing]"
690
+
691
+ parameter_location_base = f"{operation_location_base}.parameters[{param_location_id}]"
692
+
693
+ # Crucial check: Cannot generate description without name/in
694
+ if (
695
+ not isinstance(param_name, str)
696
+ or not param_name.strip()
697
+ or not isinstance(param_in, str)
698
+ or not param_in.strip()
699
+ ):
700
+ logger.warning(
701
+ f"Cannot generate description for parameter at {parameter_location_base} due to missing 'name' or 'in' field."
702
+ )
703
+ return # Skip generation for this parameter
704
+
705
+ param_description = parameter.get("description")
706
+
707
+ needs_generation = (
708
+ enhance_all # Generate if enhancing all
709
+ or not isinstance(param_description, str) # Generate if missing
710
+ or not param_description.strip() # Generate if empty
711
+ or is_fallback_text(param_description) # Generate if it's previous fallback text
712
+ )
713
+
714
+ if needs_generation:
715
+ logger.info(f"Generating description for parameter '{param_name}' at {parameter_location_base}.")
716
+
717
+ simplified_context = simplify_parameter_context(parameter)
718
+
719
+ generated_description = generate_description_llm(
720
+ description_type="parameter",
721
+ model=llm_model,
722
+ context={
723
+ "path_key": path_key,
724
+ "method": method,
725
+ "param_name": param_name,
726
+ "param_in": param_in,
727
+ "parameter_details": simplified_context,
728
+ },
729
+ )
730
+ parameter["description"] = generated_description
731
+ logger.debug(f"Inserted description for parameter '{param_name}' at {parameter_location_base}.")
732
+ else:
733
+ logger.debug(
734
+ f"Existing 'description' found for parameter '{param_name}' at {parameter_location_base}. Skipping generation."
735
+ )
736
+
737
+ # --- Remove URLs from the parameter description ---
738
+ current_description = parameter.get("description", "")
739
+ if isinstance(current_description, str) and current_description and not is_fallback_text(current_description):
740
+ url_pattern = r"https?://[\S]+"
741
+ modified_description = re.sub(url_pattern, "", current_description).strip()
742
+ modified_description = re.sub(r"\s{2,}", " ", modified_description).strip() # Collapse multiple spaces
743
+
744
+ if modified_description != current_description:
745
+ parameter["description"] = modified_description
746
+ logger.debug(
747
+ f"Removed links from description for parameter '{param_name}' at {parameter_location_base}. New description: '{modified_description[:50]}...'"
748
+ )
749
+ # --- End URL removal ---
750
+
751
+ # Validate final description length (after potential generation/cleaning)
752
+ final_param_description = parameter.get("description", "")
753
+ if isinstance(final_param_description, str):
754
+ desc_length = len(final_param_description)
755
+ if desc_length > MAX_DESCRIPTION_LENGTH:
756
+ logger.warning(
757
+ f"Parameter description at '{parameter_location_base}.description' exceeds max length. Actual length: {desc_length}, Max allowed: {MAX_DESCRIPTION_LENGTH}. Consider manual edit."
758
+ )
759
+
760
+
761
+ def process_operation(
762
+ operation_value: dict,
763
+ path_key: str,
764
+ method: str,
765
+ llm_model: str,
766
+ enhance_all: bool, # New flag
767
+ ):
768
+ operation_location_base = f"paths.{path_key}.{method.lower()}"
769
+
770
+ if not isinstance(operation_value, dict):
771
+ logger.warning(f"Operation value for '{operation_location_base}' is not a dictionary. Skipping processing.")
772
+ return
773
+
774
+ if method.lower().startswith("x-"):
775
+ logger.debug(f"Skipping extension operation '{operation_location_base}'.")
776
+ return
777
+
778
+ # --- Process Summary ---
779
+ operation_summary = operation_value.get("summary")
780
+
781
+ needs_summary_generation = (
782
+ enhance_all
783
+ or not isinstance(operation_summary, str)
784
+ or not operation_summary.strip()
785
+ or is_fallback_text(operation_summary)
786
+ )
787
+
788
+ if needs_summary_generation:
789
+ logger.info(f"Generating summary for operation '{operation_location_base}'.")
790
+
791
+ simplified_context = simplify_operation_context(operation_value)
792
+
793
+ generated_summary = generate_description_llm(
794
+ description_type="summary",
795
+ model=llm_model,
796
+ context={
797
+ "path_key": path_key,
798
+ "method": method,
799
+ "operation_value": simplified_context,
800
+ },
801
+ )
802
+ operation_value["summary"] = generated_summary
803
+ logger.debug(f"Inserted summary for '{operation_location_base}'.")
804
+ else:
805
+ logger.debug(f"Existing summary found for '{operation_location_base}'. Skipping generation.")
806
+
807
+ # Validate final summary length (after potential generation)
808
+ final_summary = operation_value.get("summary", "")
809
+ if isinstance(final_summary, str):
810
+ summary_length = len(final_summary)
811
+ if summary_length > MAX_DESCRIPTION_LENGTH:
812
+ logger.warning(
813
+ f"Operation summary at '{operation_location_base}.summary' exceeds max length ({summary_length} > {MAX_DESCRIPTION_LENGTH}). Consider manual edit."
814
+ )
815
+
816
+ # --- Process Parameters ---
817
+ parameters = operation_value.get("parameters")
818
+ if isinstance(parameters, list):
819
+ for _i, parameter in enumerate(parameters):
820
+ process_parameter(
821
+ parameter,
822
+ operation_location_base,
823
+ path_key,
824
+ method,
825
+ llm_model,
826
+ enhance_all, # Pass enhance_all
827
+ )
828
+ elif parameters is not None:
829
+ logger.warning(
830
+ f"'parameters' field for operation '{operation_location_base}' is not a list. Skipping parameter processing."
831
+ )
832
+
833
+
834
+ def process_paths(paths: dict, llm_model: str, enhance_all: bool): # New flag
835
+ if not isinstance(paths, dict):
836
+ logger.warning("'paths' field is not a dictionary. Skipping path processing.")
837
+ return
838
+
839
+ for path_key, path_value in paths.items():
840
+ if path_key.lower().startswith("x-"):
841
+ logger.debug(f"Skipping processing of path extension '{path_key}'.")
842
+ continue
843
+
844
+ if isinstance(path_value, dict):
845
+ for method, operation_value in path_value.items():
846
+ if method.lower() in [
847
+ "get",
848
+ "put",
849
+ "post",
850
+ "delete",
851
+ "options",
852
+ "head",
853
+ "patch",
854
+ "trace",
855
+ ]:
856
+ process_operation(operation_value, path_key, method, llm_model, enhance_all) # Pass enhance_all
857
+ elif method.lower().startswith("x-"):
858
+ logger.debug(f"Skipping processing of method extension '{method.lower()}' in path '{path_key}'.")
859
+ continue
860
+ elif method.lower() == "parameters":
861
+ logger.debug(f"Skipping processing of path-level parameters in '{path_key}'.")
862
+ continue
863
+ elif operation_value is not None:
864
+ logger.warning(f"Unknown method '{method}' found in path '{path_key}'. Skipping processing.")
865
+ elif operation_value is None:
866
+ logger.debug(
867
+ f"Operation value for method '{method}' in path '{path_key}' is null. Skipping processing."
868
+ )
869
+
870
+ elif path_value is not None:
871
+ logger.warning(f"Path value for '{path_key}' is not a dictionary. Skipping processing.")
872
+
873
+
874
+ def process_info_section(schema_data: dict, llm_model: str, enhance_all: bool): # New flag
875
+ info = schema_data.get("info")
876
+ info_location = "info"
877
+
878
+ # Basic validation handled by scanner/CLI caller, assume info and title exist here
879
+
880
+ info_title = info["title"] # Already validated to exist by CLI caller
881
+
882
+ info_description = info.get("description")
883
+
884
+ needs_description_generation = (
885
+ enhance_all
886
+ or not isinstance(info_description, str)
887
+ or not info_description.strip()
888
+ or is_fallback_text(info_description)
889
+ )
890
+
891
+ if needs_description_generation:
892
+ logger.info(f"Generating description for '{info_location}'.")
893
+
894
+ generated_description = generate_description_llm(
895
+ description_type="api_description",
896
+ model=llm_model,
897
+ context={"title": info_title},
898
+ )
899
+
900
+ # Ensure 'info' key exists (should due to validation)
901
+ if "info" not in schema_data or not isinstance(schema_data["info"], dict):
902
+ schema_data["info"] = {} # Should not happen if scan/validation passed
903
+ logger.warning("Re-created missing 'info' key during generation.")
904
+
905
+ schema_data["info"]["description"] = generated_description
906
+ logger.debug(f"Inserted description for '{info_location}.description'.")
907
+ else:
908
+ logger.debug("Existing 'info.description' found. Skipping generation.")
909
+
910
+ final_description = schema_data.get("info", {}).get("description", "")
911
+ if isinstance(final_description, str):
912
+ desc_length = len(final_description)
913
+ if desc_length > MAX_DESCRIPTION_LENGTH:
914
+ logger.warning(
915
+ f"API description at '{info_location}.description' exceeds max length ({desc_length} > {MAX_DESCRIPTION_LENGTH}). Consider manual edit."
916
+ )
917
+
918
+
919
+ def preprocess_schema_with_llm(schema_data: dict, llm_model: str, enhance_all: bool): # New flag
920
+ """
921
+ Processes the schema to add/enhance descriptions/summaries using an LLM.
922
+ Decides whether to generate based on the 'enhance_all' flag and existing content.
923
+ Assumes basic schema structure validation (info, title) has already passed.
924
+ """
925
+ logger.info(f"\n--- Starting LLM Generation (enhance_all={enhance_all}) ---")
926
+
927
+ process_info_section(schema_data, llm_model, enhance_all)
928
+
929
+ paths = schema_data.get("paths")
930
+ process_paths(paths, llm_model, enhance_all)
931
+
932
+ logger.info("--- LLM Generation Complete ---")
933
+
934
+
935
+ def run_preprocessing(
936
+ schema_path: Path,
937
+ output_path: Path | None = None,
938
+ model: str = "perplexity/sonar",
939
+ debug: bool = False,
940
+ ):
941
+ set_logging_level("DEBUG" if debug else "INFO")
942
+ console.print("[bold blue]--- Starting OpenAPI Schema Preprocessor ---[/bold blue]")
943
+
944
+ if schema_path is None:
945
+ path_str = typer.prompt(
946
+ "Please enter the path to the OpenAPI schema file (JSON or YAML)",
947
+ prompt_suffix=": ",
948
+ ).strip()
949
+ if not path_str:
950
+ console.print("[red]Error: Schema path is required.[/red]")
951
+ raise typer.Exit(1)
952
+ schema_path = Path(path_str)
953
+
954
+ try:
955
+ schema_data = read_schema_file(str(schema_path))
956
+ except (FileNotFoundError, yaml.YAMLError, json.JSONDecodeError, OSError) as e:
957
+ raise typer.Exit(1) from e
958
+ except Exception as e:
959
+ console.print(f"[red]An unexpected error occurred while reading schema: {e}[/red]")
960
+ raise typer.Exit(1) from e
961
+
962
+ # --- Step 2: Scan and Report Status ---
963
+ try:
964
+ scan_report = scan_schema_for_status(schema_data)
965
+ report_scan_results(scan_report)
966
+ except Exception as e:
967
+ console.print(f"[red]An unexpected error occurred during schema scanning: {e}[/red]")
968
+ raise typer.Exit(1) from e
969
+
970
+ # --- Step 3: Check for Critical Errors ---
971
+ if scan_report.get("critical_errors"):
972
+ console.print(
973
+ "[bold red]Cannot proceed with generation due to critical errors. Please fix the schema file manually.[/bold red]"
974
+ )
975
+ raise typer.Exit(1)
976
+
977
+ # --- Step 4: Determine Prompt Options based on Scan Results ---
978
+ total_missing_or_fallback = (
979
+ scan_report["info_description"]["missing"]
980
+ + scan_report["info_description"]["fallback"]
981
+ + scan_report["operation_summary"]["missing"]
982
+ + scan_report["operation_summary"]["fallback"]
983
+ + scan_report["parameter_description"]["missing"]
984
+ + scan_report["parameter_description"]["fallback"]
985
+ )
986
+
987
+ ungeneratable_params = len(scan_report.get("parameters_missing_name", [])) + len(
988
+ scan_report.get("parameters_missing_in", [])
989
+ )
990
+
991
+ prompt_options = []
992
+ valid_choices = []
993
+ default_choice = "3" # Default is always Quit unless there's something missing
994
+
995
+ console.print("\n[bold blue]Choose an action:[/bold blue]")
996
+
997
+ if total_missing_or_fallback > 0:
998
+ console.print(
999
+ f"[bold]Scan found {total_missing_or_fallback} items that are missing or using fallback text and can be generated/enhanced.[/bold]"
1000
+ )
1001
+ if ungeneratable_params > 0:
1002
+ console.print(
1003
+ f"[yellow]Note: {ungeneratable_params} parameters require manual fixing and cannot be generated by the LLM due to missing name/in.[/yellow]"
1004
+ )
1005
+
1006
+ prompt_options = [
1007
+ " [1] Generate [bold]only missing[/bold] descriptions/summaries [green](default)[/green]",
1008
+ " [2] Generate/Enhance [bold]all[/bold] descriptions/summaries",
1009
+ " [3] [bold red]Quit[/bold red] (exit without changes)",
1010
+ ]
1011
+ valid_choices = ["1", "2", "3"]
1012
+ default_choice = "1" # Default to filling missing
1013
+
1014
+ else: # total_missing_or_fallback == 0
1015
+ if ungeneratable_params > 0:
1016
+ console.print(
1017
+ f"[bold yellow]Scan found no missing/fallback items suitable for generation, but {ungeneratable_params} parameters have missing 'name' or 'in'.[/bold yellow]"
1018
+ )
1019
+ console.print(
1020
+ "[bold yellow]These parameters require manual fixing and cannot be generated by the LLM.[/bold yellow]"
1021
+ )
1022
+ else:
1023
+ console.print("[bold green]Scan found no missing or fallback descriptions/summaries.[/bold green]")
1024
+
1025
+ console.print("[bold blue]You can choose to enhance all existing descriptions or exit.[/bold blue]")
1026
+
1027
+ prompt_options = [
1028
+ " [2] Generate/Enhance [bold]all[/bold] descriptions/summaries",
1029
+ " [3] [bold red]Quit[/bold red] [green](default)[/green]",
1030
+ ]
1031
+ valid_choices = ["2", "3"]
1032
+ default_choice = "3" # Default to quitting if nothing missing
1033
+
1034
+ for option_text in prompt_options:
1035
+ console.print(option_text)
1036
+
1037
+ while True:
1038
+ choice = typer.prompt("Enter choice", default=default_choice, show_default=False, type=str).strip()
1039
+
1040
+ if choice not in valid_choices:
1041
+ console.print("[red]Invalid choice. Please select from the options above.[/red]")
1042
+ continue # Ask again
1043
+
1044
+ if choice == "3":
1045
+ console.print("[yellow]Exiting without making changes.[/yellow]")
1046
+ raise typer.Exit(0)
1047
+ elif choice == "1":
1048
+ enhance_all = False
1049
+ break # Exit prompt loop
1050
+ elif choice == "2":
1051
+ enhance_all = True
1052
+ break # Exit prompt loop
1053
+
1054
+ perform_generation = False
1055
+ if enhance_all or choice == "1" and total_missing_or_fallback > 0:
1056
+ perform_generation = True
1057
+
1058
+ if perform_generation:
1059
+ console.print(f"[blue]Starting LLM generation with Enhance All: {enhance_all}[/blue]")
1060
+ try:
1061
+ preprocess_schema_with_llm(schema_data, model, enhance_all)
1062
+ console.print("[green]LLM generation complete.[/green]")
1063
+ except Exception as e:
1064
+ console.print(f"[red]Error during LLM generation: {e}[/red]")
1065
+ # Log traceback for debugging
1066
+ import traceback
1067
+
1068
+ traceback.print_exc(file=sys.stderr)
1069
+ raise typer.Exit(1) from e
1070
+ else:
1071
+ console.print(
1072
+ "[yellow]No missing or fallback items found, and 'Enhance All' was not selected. Skipping LLM generation step.[/yellow]"
1073
+ )
1074
+
1075
+ if output_path is None:
1076
+ base, ext = os.path.splitext(schema_path)
1077
+ output_path = Path(f"{base}_processed{ext}")
1078
+ console.print(f"[blue]No output path specified. Defaulting to: {output_path}[/blue]")
1079
+ else:
1080
+ console.print(f"[blue]Saving processed schema to: {output_path}[/blue]")
1081
+
1082
+ try:
1083
+ write_schema_file(schema_data, str(output_path))
1084
+ except (OSError, ValueError) as e:
1085
+ # write_schema_file logs critical errors, just exit here
1086
+ raise typer.Exit(1) from e
1087
+ except Exception as e:
1088
+ console.print(f"[red]An unexpected error occurred while writing the schema: {e}[/red]")
1089
+ raise typer.Exit(1) from e
1090
+
1091
+ console.print("\n[bold green]--- Schema Processing and Saving Complete ---[/bold green]")
1092
+ console.print(f"Processed schema saved to: [blue]{output_path}[/blue]")
1093
+ console.print("[bold blue]Preprocessor finished successfully.[/bold blue]")