universal-mcp 0.1.13rc14__py3-none-any.whl → 0.1.15__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- universal_mcp/analytics.py +7 -1
- universal_mcp/applications/README.md +122 -0
- universal_mcp/applications/__init__.py +48 -46
- universal_mcp/applications/application.py +249 -40
- universal_mcp/cli.py +49 -49
- universal_mcp/config.py +95 -22
- universal_mcp/exceptions.py +8 -0
- universal_mcp/integrations/integration.py +18 -2
- universal_mcp/logger.py +59 -8
- universal_mcp/servers/__init__.py +2 -2
- universal_mcp/stores/store.py +2 -12
- universal_mcp/tools/__init__.py +14 -2
- universal_mcp/tools/adapters.py +25 -0
- universal_mcp/tools/func_metadata.py +12 -2
- universal_mcp/tools/manager.py +236 -0
- universal_mcp/tools/tools.py +5 -249
- universal_mcp/utils/common.py +33 -0
- universal_mcp/utils/installation.py +8 -8
- universal_mcp/utils/openapi/__inti__.py +0 -0
- universal_mcp/utils/{api_generator.py → openapi/api_generator.py} +1 -1
- universal_mcp/utils/openapi/openapi.py +930 -0
- universal_mcp/utils/openapi/preprocessor.py +1223 -0
- universal_mcp/utils/{readme.py → openapi/readme.py} +21 -31
- universal_mcp/utils/templates/README.md.j2 +17 -0
- {universal_mcp-0.1.13rc14.dist-info → universal_mcp-0.1.15.dist-info}/METADATA +6 -3
- universal_mcp-0.1.15.dist-info/RECORD +44 -0
- universal_mcp-0.1.15.dist-info/licenses/LICENSE +21 -0
- universal_mcp/templates/README.md.j2 +0 -93
- universal_mcp/utils/dump_app_tools.py +0 -78
- universal_mcp/utils/openapi.py +0 -697
- universal_mcp-0.1.13rc14.dist-info/RECORD +0 -39
- /universal_mcp/utils/{docgen.py → openapi/docgen.py} +0 -0
- /universal_mcp/{templates → utils/templates}/api_client.py.j2 +0 -0
- {universal_mcp-0.1.13rc14.dist-info → universal_mcp-0.1.15.dist-info}/WHEEL +0 -0
- {universal_mcp-0.1.13rc14.dist-info → universal_mcp-0.1.15.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,1223 @@
|
|
1
|
+
import json
|
2
|
+
import logging
|
3
|
+
import os
|
4
|
+
import re
|
5
|
+
import sys
|
6
|
+
import time
|
7
|
+
import traceback
|
8
|
+
from pathlib import Path
|
9
|
+
|
10
|
+
import litellm
|
11
|
+
import typer
|
12
|
+
import yaml
|
13
|
+
from rich.console import Console
|
14
|
+
|
15
|
+
console = Console()
|
16
|
+
|
17
|
+
|
18
|
+
COLORS = {
|
19
|
+
"YELLOW": "\033[93m",
|
20
|
+
"RED": "\033[91m",
|
21
|
+
"ENDC": "\033[0m",
|
22
|
+
"BLUE": "\033[94m",
|
23
|
+
"GREEN": "\033[92m",
|
24
|
+
"CYAN": "\033[96m",
|
25
|
+
}
|
26
|
+
|
27
|
+
|
28
|
+
class ColoredFormatter(logging.Formatter):
|
29
|
+
FORMAT = "%(levelname)s:%(message)s"
|
30
|
+
|
31
|
+
LOG_LEVEL_COLORS = {
|
32
|
+
logging.DEBUG: COLORS["CYAN"],
|
33
|
+
logging.INFO: COLORS["GREEN"],
|
34
|
+
logging.WARNING: COLORS["YELLOW"],
|
35
|
+
logging.ERROR: COLORS["RED"],
|
36
|
+
logging.CRITICAL: COLORS["RED"],
|
37
|
+
}
|
38
|
+
|
39
|
+
def format(self, record):
|
40
|
+
log_format = self.FORMAT
|
41
|
+
|
42
|
+
color_prefix = self.LOG_LEVEL_COLORS.get(record.levelno)
|
43
|
+
|
44
|
+
if color_prefix:
|
45
|
+
log_format = color_prefix + log_format + COLORS["ENDC"]
|
46
|
+
|
47
|
+
# Add filename and line number for debug
|
48
|
+
if record.levelno == logging.DEBUG:
|
49
|
+
log_format = f"%(filename)s:%(lineno)d - {log_format}"
|
50
|
+
|
51
|
+
formatter = logging.Formatter(log_format)
|
52
|
+
|
53
|
+
return formatter.format(record)
|
54
|
+
|
55
|
+
|
56
|
+
logger = logging.getLogger()
|
57
|
+
if logger.handlers:
|
58
|
+
for handler in logger.handlers:
|
59
|
+
logger.removeHandler(handler)
|
60
|
+
|
61
|
+
logger.setLevel(logging.INFO) # Default level, can be changed by set_logging_level
|
62
|
+
|
63
|
+
console_handler = logging.StreamHandler(sys.stdout)
|
64
|
+
colored_formatter = ColoredFormatter()
|
65
|
+
console_handler.setFormatter(colored_formatter)
|
66
|
+
logger.addHandler(console_handler)
|
67
|
+
|
68
|
+
|
69
|
+
def set_logging_level(level: str):
|
70
|
+
level_map = {
|
71
|
+
"DEBUG": logging.DEBUG,
|
72
|
+
"INFO": logging.INFO,
|
73
|
+
"WARNING": logging.WARNING,
|
74
|
+
"ERROR": logging.ERROR,
|
75
|
+
"CRITICAL": logging.CRITICAL,
|
76
|
+
}
|
77
|
+
log_level = level_map.get(level.upper(), logging.INFO)
|
78
|
+
logger.setLevel(log_level)
|
79
|
+
logger.info(f"Logging level set to {logging.getLevelName(log_level)}")
|
80
|
+
|
81
|
+
|
82
|
+
MAX_DESCRIPTION_LENGTH = 200
|
83
|
+
|
84
|
+
|
85
|
+
def is_fallback_text(text: str | None) -> bool:
|
86
|
+
"""Checks if the text looks like a generated fallback message."""
|
87
|
+
if not isinstance(text, str) or not text.strip():
|
88
|
+
return False
|
89
|
+
# Check for the specific pattern used for LLM generation failures
|
90
|
+
return text.strip().startswith("[LLM could not generate")
|
91
|
+
|
92
|
+
|
93
|
+
def read_schema_file(schema_path: str) -> dict:
|
94
|
+
# Keep this function as is
|
95
|
+
logger.info(f"Attempting to read schema file: {schema_path}")
|
96
|
+
if not os.path.exists(schema_path):
|
97
|
+
logger.critical(
|
98
|
+
f"Schema file not found at: {schema_path}"
|
99
|
+
) # Use critical for pre-processing essential step
|
100
|
+
raise FileNotFoundError(f"Schema file not found at: {schema_path}")
|
101
|
+
|
102
|
+
try:
|
103
|
+
with open(schema_path, encoding="utf-8") as f:
|
104
|
+
_, file_extension = os.path.splitext(schema_path)
|
105
|
+
file_extension = file_extension.lower()
|
106
|
+
|
107
|
+
if file_extension in [".yaml", ".yml"]:
|
108
|
+
logger.info(f"Reading as YAML: {schema_path}")
|
109
|
+
return yaml.safe_load(f)
|
110
|
+
elif file_extension == ".json":
|
111
|
+
logger.info(f"Reading as JSON: {schema_path}")
|
112
|
+
return json.load(f)
|
113
|
+
else:
|
114
|
+
# Attempt YAML as a fallback for unknown extensions
|
115
|
+
logger.warning(
|
116
|
+
f"Unknown file extension '{file_extension}' for {schema_path}. Attempting to read as YAML."
|
117
|
+
)
|
118
|
+
try:
|
119
|
+
return yaml.safe_load(f)
|
120
|
+
except (
|
121
|
+
yaml.YAMLError,
|
122
|
+
json.JSONDecodeError,
|
123
|
+
): # If YAML fails, try JSON
|
124
|
+
f.seek(0) # Reset file pointer
|
125
|
+
logger.warning("YAML load failed, attempting JSON.")
|
126
|
+
return json.load(f)
|
127
|
+
|
128
|
+
except (yaml.YAMLError, json.JSONDecodeError) as e:
|
129
|
+
logger.critical(f"Error parsing schema file {schema_path}: {e}")
|
130
|
+
raise
|
131
|
+
except OSError as e:
|
132
|
+
logger.critical(f"Error reading schema file {schema_path}: {e}")
|
133
|
+
raise
|
134
|
+
except Exception as e:
|
135
|
+
logger.critical(
|
136
|
+
f"An unexpected error occurred while reading {schema_path}: {e}"
|
137
|
+
)
|
138
|
+
traceback.print_exc(file=sys.stderr)
|
139
|
+
raise
|
140
|
+
|
141
|
+
|
142
|
+
def write_schema_file(schema_data: dict, output_path: str):
|
143
|
+
# Keep this function as is
|
144
|
+
logger.info(f"Attempting to write processed schema to: {output_path}")
|
145
|
+
try:
|
146
|
+
output_dir = os.path.dirname(output_path)
|
147
|
+
if output_dir and not os.path.exists(output_dir):
|
148
|
+
os.makedirs(output_dir)
|
149
|
+
logger.info(f"Created output directory: {output_dir}")
|
150
|
+
|
151
|
+
with open(output_path, "w", encoding="utf-8") as f:
|
152
|
+
_, file_extension = os.path.splitext(output_path)
|
153
|
+
file_extension = file_extension.lower()
|
154
|
+
|
155
|
+
if file_extension == ".json":
|
156
|
+
json.dump(schema_data, f, indent=2, ensure_ascii=False)
|
157
|
+
logger.info(
|
158
|
+
f"Successfully wrote processed schema as JSON to {output_path}"
|
159
|
+
)
|
160
|
+
elif file_extension in [".yaml", ".yml"]:
|
161
|
+
yaml.dump(
|
162
|
+
schema_data,
|
163
|
+
f,
|
164
|
+
default_flow_style=False,
|
165
|
+
allow_unicode=True,
|
166
|
+
sort_keys=False,
|
167
|
+
)
|
168
|
+
logger.info(
|
169
|
+
f"Successfully wrote processed schema as YAML to {output_path}"
|
170
|
+
)
|
171
|
+
else:
|
172
|
+
logger.error(
|
173
|
+
f"Unsupported output file extension '{file_extension}' for writing."
|
174
|
+
)
|
175
|
+
raise ValueError(
|
176
|
+
f"Unsupported output file extension '{file_extension}'. Use .json or .yaml/.yml."
|
177
|
+
)
|
178
|
+
|
179
|
+
except OSError as e:
|
180
|
+
logger.critical(f"Error writing schema file {output_path}: {e}")
|
181
|
+
raise
|
182
|
+
except Exception as e:
|
183
|
+
logger.critical(
|
184
|
+
f"An unexpected error occurred while writing {output_path}: {e}"
|
185
|
+
)
|
186
|
+
traceback.print_exc(file=sys.stderr)
|
187
|
+
raise
|
188
|
+
|
189
|
+
|
190
|
+
def generate_description_llm(
|
191
|
+
description_type: str,
|
192
|
+
model: str,
|
193
|
+
context: dict = None,
|
194
|
+
max_retries: int = 3,
|
195
|
+
retry_delay: int = 5,
|
196
|
+
) -> str:
|
197
|
+
if context is None:
|
198
|
+
context = {}
|
199
|
+
|
200
|
+
system_prompt = """You are a helpful AI assistant specialized in writing concise summaries for API operations, clear, brief descriptions for API parameters, and overview descriptions for the entire API.
|
201
|
+
Respond ONLY with the generated text, without any conversational filler or formatting like bullet points unless the description itself requires it. Ensure the response is a single string suitable for a description field."""
|
202
|
+
|
203
|
+
user_prompt = ""
|
204
|
+
# Make fallback text consistent
|
205
|
+
fallback_text = f"[LLM could not generate {description_type}]"
|
206
|
+
|
207
|
+
if description_type == "summary":
|
208
|
+
path_key = context.get("path_key", "unknown path")
|
209
|
+
method = context.get("method", "unknown method")
|
210
|
+
operation_context_str = json.dumps(
|
211
|
+
context.get("operation_value", {}),
|
212
|
+
indent=None,
|
213
|
+
separators=(",", ":"),
|
214
|
+
sort_keys=True,
|
215
|
+
)
|
216
|
+
if len(operation_context_str) > 1500: # Limit context size
|
217
|
+
operation_context_str = operation_context_str[:1500] + "..."
|
218
|
+
|
219
|
+
user_prompt = f"""Generate a concise one-sentence summary for the API operation defined at path "{path_key}" using the "{method.upper()}" method.
|
220
|
+
Example:
|
221
|
+
- Stars a GitHub repository using the GitHub API and returns a status message.
|
222
|
+
- Retrieves and formats a list of recent commits from a GitHub repository
|
223
|
+
|
224
|
+
Context (operation details): {operation_context_str}
|
225
|
+
Respond ONLY with the summary text."""
|
226
|
+
fallback_text = f"[LLM could not generate summary for {method.upper()} {path_key}]" # More specific fallback
|
227
|
+
|
228
|
+
elif description_type == "parameter":
|
229
|
+
path_key = context.get("path_key", "unknown path")
|
230
|
+
method = context.get("method", "unknown method")
|
231
|
+
param_name = context.get("param_name", "unknown parameter")
|
232
|
+
param_in = context.get("param_in", "unknown location")
|
233
|
+
param_context_str = json.dumps(
|
234
|
+
context.get("parameter_details", {}),
|
235
|
+
indent=None,
|
236
|
+
separators=(",", ":"),
|
237
|
+
sort_keys=True,
|
238
|
+
)
|
239
|
+
if len(param_context_str) > 1000: # Limit context size
|
240
|
+
param_context_str = param_context_str[:1000] + "..."
|
241
|
+
|
242
|
+
user_prompt = f"""Generate a clear, brief description for the API parameter named "{param_name}" located "{param_in}" for the "{method.upper()}" operation at path "{path_key}".
|
243
|
+
Context (parameter details): {param_context_str}
|
244
|
+
Respond ONLY with the *SINGLE LINE* description text."""
|
245
|
+
fallback_text = f"[LLM could not generate description for parameter {param_name} in {method.upper()} {path_key}]" # More specific fallback
|
246
|
+
|
247
|
+
elif description_type == "api_description":
|
248
|
+
api_title = context.get("title", "Untitled API")
|
249
|
+
user_prompt = f"""Generate a brief overview description for an API titled "{api_title}" based on an OpenAPI schema.
|
250
|
+
Respond ONLY with the description text."""
|
251
|
+
fallback_text = f"[LLM could not generate description for API '{api_title}']" # More specific fallback
|
252
|
+
|
253
|
+
else:
|
254
|
+
logger.error(
|
255
|
+
f"Invalid description_type '{description_type}' passed to generate_description_llm."
|
256
|
+
)
|
257
|
+
return "[Invalid description type specified]"
|
258
|
+
|
259
|
+
if not user_prompt:
|
260
|
+
logger.error(
|
261
|
+
f"User prompt was not generated for description_type '{description_type}'."
|
262
|
+
)
|
263
|
+
return fallback_text
|
264
|
+
|
265
|
+
messages = [
|
266
|
+
{"role": "system", "content": system_prompt},
|
267
|
+
{"role": "user", "content": user_prompt},
|
268
|
+
]
|
269
|
+
|
270
|
+
# Temporarily set debug level for prompt/response logging
|
271
|
+
original_level = logger.level
|
272
|
+
logger.setLevel(logging.DEBUG)
|
273
|
+
|
274
|
+
# logger.debug(
|
275
|
+
# f"\n{COLORS['BLUE']}--- LLM Input Prompt ({description_type}) ---{COLORS['ENDC']}"
|
276
|
+
# )
|
277
|
+
# logger.debug(f"System: {system_prompt}")
|
278
|
+
# logger.debug(f"User: {user_prompt}")
|
279
|
+
# logger.debug(
|
280
|
+
# f"{COLORS['BLUE']}------------------------------------------{COLORS['ENDC']}\n"
|
281
|
+
# )
|
282
|
+
|
283
|
+
response_text = fallback_text # Default in case all retries fail
|
284
|
+
|
285
|
+
for attempt in range(max_retries):
|
286
|
+
try:
|
287
|
+
response = litellm.completion(
|
288
|
+
model=model,
|
289
|
+
messages=messages,
|
290
|
+
temperature=0.7,
|
291
|
+
max_tokens=150, # Keep tokens low for concise output
|
292
|
+
timeout=60,
|
293
|
+
)
|
294
|
+
|
295
|
+
# logger.debug(
|
296
|
+
# f"\n{COLORS['YELLOW']}--- LLM Raw Response ({description_type}, Attempt {attempt+1}) ---{COLORS['ENDC']}"
|
297
|
+
# )
|
298
|
+
try:
|
299
|
+
# Use model_dump() for Pydantic v2, dict() for v1
|
300
|
+
response.model_dump()
|
301
|
+
except AttributeError:
|
302
|
+
response.dict()
|
303
|
+
# logger.debug(json.dumps(response_dict, indent=2))
|
304
|
+
# logger.debug(
|
305
|
+
# f"{COLORS['YELLOW']}--------------------------------------------{COLORS['ENDC']}\n"
|
306
|
+
# )
|
307
|
+
|
308
|
+
if (
|
309
|
+
response
|
310
|
+
and response.choices
|
311
|
+
and response.choices[0]
|
312
|
+
and response.choices[0].message
|
313
|
+
):
|
314
|
+
response_text = response.choices[0].message.content.strip()
|
315
|
+
|
316
|
+
# Remove potential quotes around the response
|
317
|
+
if response_text.startswith('"') and response_text.endswith('"'):
|
318
|
+
response_text = response_text[1:-1].strip()
|
319
|
+
if response_text.startswith("'") and response_text.endswith("'"):
|
320
|
+
response_text = response_text[1:-1].strip()
|
321
|
+
|
322
|
+
response_text = response_text.strip()
|
323
|
+
|
324
|
+
# Check if the LLM returned the fallback text literally
|
325
|
+
if response_text == fallback_text:
|
326
|
+
logger.warning(
|
327
|
+
f"LLM returned the fallback text literally for type '{description_type}'. Treating as failure. Attempt {attempt + 1}/{max_retries}."
|
328
|
+
)
|
329
|
+
if attempt < max_retries - 1:
|
330
|
+
time.sleep(retry_delay)
|
331
|
+
continue # Retry
|
332
|
+
|
333
|
+
# Check if the response is empty or too short after stripping
|
334
|
+
if not response_text:
|
335
|
+
logger.warning(
|
336
|
+
f"LLM response is empty after stripping for type '{description_type}'. Attempt {attempt + 1}/{max_retries}."
|
337
|
+
)
|
338
|
+
if attempt < max_retries - 1:
|
339
|
+
time.sleep(retry_delay)
|
340
|
+
continue # Retry
|
341
|
+
|
342
|
+
# Successful generation
|
343
|
+
# logger.debug(f"Generated response: {response_text}")
|
344
|
+
return response_text
|
345
|
+
|
346
|
+
else:
|
347
|
+
logger.warning(
|
348
|
+
f"LLM response was empty or unexpected structure for type '{description_type}'. Attempt {attempt + 1}/{max_retries}."
|
349
|
+
)
|
350
|
+
if attempt < max_retries - 1:
|
351
|
+
time.sleep(retry_delay)
|
352
|
+
continue # Retry
|
353
|
+
|
354
|
+
except Exception as e:
|
355
|
+
logger.error(
|
356
|
+
f"Error generating description using LLM for type '{description_type}' (Attempt {attempt + 1}/{max_retries}): {e}"
|
357
|
+
)
|
358
|
+
traceback.print_exc(file=sys.stderr) # Print traceback for debugging
|
359
|
+
if attempt < max_retries - 1:
|
360
|
+
logger.info(f"Retrying in {retry_delay} seconds...")
|
361
|
+
time.sleep(retry_delay)
|
362
|
+
else:
|
363
|
+
logger.error(
|
364
|
+
f"Max retries ({max_retries}) reached for type '{description_type}'."
|
365
|
+
)
|
366
|
+
break # Exit retry loop
|
367
|
+
|
368
|
+
# Restore original logging level
|
369
|
+
logger.setLevel(original_level)
|
370
|
+
logger.warning(f"Returning fallback text for type '{description_type}'.")
|
371
|
+
return fallback_text # Return fallback if all retries fail
|
372
|
+
|
373
|
+
|
374
|
+
def simplify_operation_context(operation_value: dict) -> dict:
|
375
|
+
# Keep this function as is
|
376
|
+
simplified_context = {}
|
377
|
+
|
378
|
+
original_params = operation_value.get("parameters")
|
379
|
+
if isinstance(original_params, list):
|
380
|
+
simplified_params_list = []
|
381
|
+
for param in original_params:
|
382
|
+
if isinstance(param, dict):
|
383
|
+
if "$ref" in param:
|
384
|
+
simplified_params_list.append({"$ref": param["$ref"]})
|
385
|
+
else:
|
386
|
+
simplified_param = {}
|
387
|
+
if "name" in param:
|
388
|
+
simplified_param["name"] = param["name"]
|
389
|
+
if "in" in param:
|
390
|
+
simplified_param["in"] = param["in"]
|
391
|
+
# Optionally add type/required for better context, but keep it small
|
392
|
+
if "schema" in param and isinstance(param["schema"], dict) and "type" in param["schema"]:
|
393
|
+
simplified_param["type"] = param["schema"]["type"]
|
394
|
+
if "required" in param:
|
395
|
+
simplified_param["required"] = param["required"]
|
396
|
+
|
397
|
+
if simplified_param:
|
398
|
+
simplified_params_list.append(simplified_param)
|
399
|
+
if simplified_params_list:
|
400
|
+
simplified_context["parameters"] = simplified_params_list
|
401
|
+
|
402
|
+
original_responses = operation_value.get("responses")
|
403
|
+
if isinstance(original_responses, dict):
|
404
|
+
# Only include keys (status codes) to keep context size down
|
405
|
+
response_status_codes = list(original_responses.keys())
|
406
|
+
if response_status_codes:
|
407
|
+
simplified_responses_dict = {code: {} for code in response_status_codes}
|
408
|
+
simplified_context["responses"] = simplified_responses_dict
|
409
|
+
|
410
|
+
# Include requestBody if present (simplified)
|
411
|
+
original_request_body = operation_value.get("requestBody")
|
412
|
+
if isinstance(original_request_body, dict):
|
413
|
+
simplified_request_body = {}
|
414
|
+
if "required" in original_request_body:
|
415
|
+
simplified_request_body["required"] = original_request_body["required"]
|
416
|
+
if "content" in original_request_body and isinstance(
|
417
|
+
original_request_body["content"], dict
|
418
|
+
):
|
419
|
+
simplified_request_body["content_types"] = list(
|
420
|
+
original_request_body["content"].keys()
|
421
|
+
)
|
422
|
+
if simplified_request_body:
|
423
|
+
simplified_context["requestBody"] = simplified_request_body
|
424
|
+
|
425
|
+
# Include security if present (simplified)
|
426
|
+
original_security = operation_value.get("security")
|
427
|
+
if isinstance(original_security, list) and original_security:
|
428
|
+
simplified_context["security"] = (
|
429
|
+
original_security # List of security requirement objects (usually small)
|
430
|
+
)
|
431
|
+
|
432
|
+
return simplified_context
|
433
|
+
|
434
|
+
|
435
|
+
def simplify_parameter_context(parameter: dict) -> dict:
|
436
|
+
# Keep this function as is, adding type/required like in operation context simplification
|
437
|
+
simplified_context = {}
|
438
|
+
if "name" in parameter:
|
439
|
+
simplified_context["name"] = parameter["name"]
|
440
|
+
if "in" in parameter:
|
441
|
+
simplified_context["in"] = parameter["in"]
|
442
|
+
if "required" in parameter:
|
443
|
+
simplified_context["required"] = parameter["required"]
|
444
|
+
if "schema" in parameter and isinstance(parameter["schema"], dict):
|
445
|
+
if "type" in parameter["schema"]:
|
446
|
+
simplified_context["type"] = parameter["schema"]["type"]
|
447
|
+
# Optionally add enum, default?
|
448
|
+
if "enum" in parameter["schema"]:
|
449
|
+
simplified_context["enum"] = parameter["schema"]["enum"]
|
450
|
+
if "default" in parameter["schema"]:
|
451
|
+
simplified_context["default"] = parameter["schema"]["default"]
|
452
|
+
|
453
|
+
return simplified_context
|
454
|
+
|
455
|
+
|
456
|
+
def scan_schema_for_status(schema_data: dict):
|
457
|
+
"""
|
458
|
+
Scans the schema to report the status of descriptions/summaries
|
459
|
+
and identify critical issues like missing parameter 'name'/'in'.
|
460
|
+
Does NOT modify the schema or call the LLM.
|
461
|
+
"""
|
462
|
+
logger.info("\n--- Scanning Schema for Status ---")
|
463
|
+
|
464
|
+
scan_report = {
|
465
|
+
"info_description": {"present": 0, "missing": 0, "fallback": 0},
|
466
|
+
"operation_summary": {"present": 0, "missing": 0, "fallback": 0},
|
467
|
+
"parameter_description": {"present": 0, "missing": 0, "fallback": 0},
|
468
|
+
"parameters_missing_name": [],
|
469
|
+
"parameters_missing_in": [],
|
470
|
+
"critical_errors": [], # For essential validation issues like missing info/title
|
471
|
+
}
|
472
|
+
|
473
|
+
# --- Check Info Section ---
|
474
|
+
info = schema_data.get("info")
|
475
|
+
info_location = "info"
|
476
|
+
|
477
|
+
if not isinstance(info, dict):
|
478
|
+
error_msg = f"Critical: Required '{info_location}' object is missing or not a dictionary."
|
479
|
+
logger.critical(error_msg)
|
480
|
+
scan_report["critical_errors"].append(error_msg)
|
481
|
+
# Cannot proceed meaningfully without info block
|
482
|
+
return scan_report
|
483
|
+
|
484
|
+
info_title = info.get("title")
|
485
|
+
if not isinstance(info_title, str) or not info_title.strip():
|
486
|
+
error_msg = (
|
487
|
+
f"Critical: Required field '{info_location}.title' is missing or empty."
|
488
|
+
)
|
489
|
+
logger.critical(error_msg)
|
490
|
+
scan_report["critical_errors"].append(error_msg)
|
491
|
+
# Cannot proceed meaningfully without title
|
492
|
+
return scan_report
|
493
|
+
|
494
|
+
info_description = info.get("description")
|
495
|
+
if isinstance(info_description, str) and info_description.strip():
|
496
|
+
if is_fallback_text(info_description):
|
497
|
+
scan_report["info_description"]["fallback"] += 1
|
498
|
+
else:
|
499
|
+
scan_report["info_description"]["present"] += 1
|
500
|
+
else:
|
501
|
+
scan_report["info_description"]["missing"] += 1
|
502
|
+
|
503
|
+
# --- Check Paths ---
|
504
|
+
paths = schema_data.get("paths")
|
505
|
+
if not isinstance(paths, dict):
|
506
|
+
if (
|
507
|
+
paths is not None
|
508
|
+
): # Allow None if schema is empty, but warn if it's wrong type
|
509
|
+
logger.warning("'paths' field is not a dictionary. Skipping path scanning.")
|
510
|
+
else:
|
511
|
+
logger.info("'paths' field is missing or null. No operations to scan.")
|
512
|
+
return scan_report # No paths to scan
|
513
|
+
|
514
|
+
for path_key, path_value in paths.items():
|
515
|
+
if path_key.lower().startswith("x-"):
|
516
|
+
logger.debug(f"Skipping scanning of path extension '{path_key}'.")
|
517
|
+
continue
|
518
|
+
|
519
|
+
if not isinstance(path_value, dict):
|
520
|
+
logger.warning(
|
521
|
+
f"Path value for '{path_key}' is not a dictionary. Skipping scanning for this path."
|
522
|
+
)
|
523
|
+
continue
|
524
|
+
|
525
|
+
for method, operation_value in path_value.items():
|
526
|
+
if method.lower() in [
|
527
|
+
"get",
|
528
|
+
"put",
|
529
|
+
"post",
|
530
|
+
"delete",
|
531
|
+
"options",
|
532
|
+
"head",
|
533
|
+
"patch",
|
534
|
+
"trace",
|
535
|
+
]:
|
536
|
+
operation_location_base = f"paths.{path_key}.{method.lower()}"
|
537
|
+
if not isinstance(operation_value, dict):
|
538
|
+
logger.warning(
|
539
|
+
f"Operation value for '{operation_location_base}' is not a dictionary. Skipping."
|
540
|
+
)
|
541
|
+
continue
|
542
|
+
|
543
|
+
# Check Operation Summary
|
544
|
+
operation_summary = operation_value.get("summary")
|
545
|
+
if isinstance(operation_summary, str) and operation_summary.strip():
|
546
|
+
if is_fallback_text(operation_summary):
|
547
|
+
scan_report["operation_summary"]["fallback"] += 1
|
548
|
+
else:
|
549
|
+
scan_report["operation_summary"]["present"] += 1
|
550
|
+
else:
|
551
|
+
scan_report["operation_summary"]["missing"] += 1
|
552
|
+
|
553
|
+
# Check Parameters
|
554
|
+
parameters = operation_value.get("parameters")
|
555
|
+
if isinstance(parameters, list):
|
556
|
+
for i, parameter in enumerate(parameters):
|
557
|
+
if not isinstance(parameter, dict):
|
558
|
+
logger.warning(
|
559
|
+
f"Parameter at index {i} in {operation_location_base}.parameters is not a dictionary. Skipping."
|
560
|
+
)
|
561
|
+
continue
|
562
|
+
|
563
|
+
if "$ref" in parameter:
|
564
|
+
logger.debug(
|
565
|
+
f"Parameter at index {i} in {operation_location_base}.parameters is a reference. Skipping detailed scan."
|
566
|
+
)
|
567
|
+
continue
|
568
|
+
|
569
|
+
param_name = parameter.get("name")
|
570
|
+
param_in = parameter.get("in")
|
571
|
+
param_location_id = (
|
572
|
+
param_name
|
573
|
+
if isinstance(param_name, str) and param_name.strip()
|
574
|
+
else f"index {i}"
|
575
|
+
)
|
576
|
+
param_location_base = (
|
577
|
+
f"{operation_location_base}.parameters[{param_location_id}]"
|
578
|
+
)
|
579
|
+
|
580
|
+
# Check Parameter 'name' and 'in'
|
581
|
+
if not isinstance(param_name, str) or not param_name.strip():
|
582
|
+
error_msg = f"Missing/empty 'name' field for parameter at {param_location_base}. Cannot generate description."
|
583
|
+
logger.warning(
|
584
|
+
error_msg
|
585
|
+
) # Use warning as it might be fixable manually
|
586
|
+
scan_report["parameters_missing_name"].append(
|
587
|
+
param_location_base
|
588
|
+
)
|
589
|
+
|
590
|
+
if not isinstance(param_in, str) or not param_in.strip():
|
591
|
+
error_msg = f"Missing/empty 'in' field for parameter '{param_name}' at {param_location_base}. Cannot generate description."
|
592
|
+
logger.warning(error_msg) # Use warning
|
593
|
+
scan_report["parameters_missing_in"].append(
|
594
|
+
param_location_base
|
595
|
+
)
|
596
|
+
|
597
|
+
# Check Parameter Description (only if name/in are present for meaningful description)
|
598
|
+
if (
|
599
|
+
isinstance(param_name, str)
|
600
|
+
and param_name.strip()
|
601
|
+
and isinstance(param_in, str)
|
602
|
+
and param_in.strip()
|
603
|
+
):
|
604
|
+
param_description = parameter.get("description")
|
605
|
+
if (
|
606
|
+
isinstance(param_description, str)
|
607
|
+
and param_description.strip()
|
608
|
+
):
|
609
|
+
if is_fallback_text(param_description):
|
610
|
+
scan_report["parameter_description"][
|
611
|
+
"fallback"
|
612
|
+
] += 1
|
613
|
+
else:
|
614
|
+
scan_report["parameter_description"]["present"] += 1
|
615
|
+
else:
|
616
|
+
scan_report["parameter_description"]["missing"] += 1
|
617
|
+
else:
|
618
|
+
logger.debug(
|
619
|
+
f"Skipping description scan for parameter at {param_location_base} due to missing name/in."
|
620
|
+
)
|
621
|
+
|
622
|
+
elif parameters is not None:
|
623
|
+
logger.warning(
|
624
|
+
f"'parameters' field for operation '{operation_location_base}' is not a list. Skipping parameter scanning."
|
625
|
+
)
|
626
|
+
|
627
|
+
elif method.lower().startswith("x-"):
|
628
|
+
logger.debug(
|
629
|
+
f"Skipping scanning of method extension '{method.lower()}' in path '{path_key}'."
|
630
|
+
)
|
631
|
+
continue
|
632
|
+
elif method.lower() == "parameters": # Path level parameters
|
633
|
+
logger.debug(
|
634
|
+
f"Skipping scanning of path-level parameters in '{path_key}'."
|
635
|
+
)
|
636
|
+
continue
|
637
|
+
elif operation_value is not None:
|
638
|
+
logger.warning(
|
639
|
+
f"Unknown method '{method}' found in path '{path_key}'. Skipping scanning."
|
640
|
+
)
|
641
|
+
elif operation_value is None:
|
642
|
+
logger.debug(
|
643
|
+
f"Operation value for method '{method}' in path '{path_key}' is null. Skipping scanning."
|
644
|
+
)
|
645
|
+
|
646
|
+
logger.info("--- Scan Complete ---")
|
647
|
+
return scan_report
|
648
|
+
|
649
|
+
|
650
|
+
def report_scan_results(scan_report: dict):
|
651
|
+
"""Prints a formatted summary of the scan results."""
|
652
|
+
console = (
|
653
|
+
logging.getLogger().handlers[0].console
|
654
|
+
if hasattr(logging.getLogger().handlers[0], "console")
|
655
|
+
else None
|
656
|
+
)
|
657
|
+
if console is None: # Fallback if rich console isn't attached to logger
|
658
|
+
from rich.console import Console
|
659
|
+
|
660
|
+
console = Console()
|
661
|
+
|
662
|
+
console.print("\n[bold blue]--- Schema Scan Summary ---[/bold blue]")
|
663
|
+
|
664
|
+
if scan_report.get("critical_errors"):
|
665
|
+
console.print("[bold red]CRITICAL ERRORS FOUND:[/bold red]")
|
666
|
+
for error in scan_report["critical_errors"]:
|
667
|
+
console.print(f" [red]❌[/red] {error}")
|
668
|
+
console.print(
|
669
|
+
"[bold red]Critical errors prevent automatic generation. Please fix these manually.[/bold red]"
|
670
|
+
)
|
671
|
+
return # Stop here if critical errors exist
|
672
|
+
|
673
|
+
console.print("[bold yellow]Description/Summary Status:[/bold yellow]")
|
674
|
+
info_desc = scan_report["info_description"]
|
675
|
+
op_summ = scan_report["operation_summary"]
|
676
|
+
param_desc = scan_report["parameter_description"]
|
677
|
+
|
678
|
+
console.print(" API Description (info.description):")
|
679
|
+
console.print(f" [green]✅ Present[/green]: {info_desc['present']}")
|
680
|
+
console.print(f" [orange1]❓ Missing[/orange1]: {info_desc['missing']}")
|
681
|
+
console.print(f" [yellow]⚠️ Fallback[/yellow]: {info_desc['fallback']}")
|
682
|
+
|
683
|
+
console.print(" Operation Summaries (paths.*.summary):")
|
684
|
+
console.print(f" [green]✅ Present[/green]: {op_summ['present']}")
|
685
|
+
console.print(f" [orange1]❓ Missing[/orange1]: {op_summ['missing']}")
|
686
|
+
console.print(f" [yellow]⚠️ Fallback[/yellow]: {op_summ['fallback']}")
|
687
|
+
|
688
|
+
console.print(" Parameter Descriptions (paths.*.*.parameters.description):")
|
689
|
+
console.print(f" [green]✅ Present[/green]: {param_desc['present']}")
|
690
|
+
console.print(f" [orange1]❓ Missing[/orange1]: {param_desc['missing']}")
|
691
|
+
console.print(f" [yellow]⚠️ Fallback[/yellow]: {param_desc['fallback']}")
|
692
|
+
|
693
|
+
missing_name = scan_report.get("parameters_missing_name", [])
|
694
|
+
missing_in = scan_report.get("parameters_missing_in", [])
|
695
|
+
|
696
|
+
if missing_name or missing_in:
|
697
|
+
console.print(
|
698
|
+
"\n[bold red]Parameter Issues Preventing LLM Generation:[/bold red]"
|
699
|
+
)
|
700
|
+
console.print(
|
701
|
+
"[yellow]Parameters below cannot have descriptions generated by LLM until 'name' and 'in' fields are fixed manually.[/yellow]"
|
702
|
+
)
|
703
|
+
if missing_name:
|
704
|
+
console.print(" [bold red]Missing 'name' field:[/bold red]")
|
705
|
+
for path in missing_name:
|
706
|
+
console.print(f" [red]❌[/red] {path}")
|
707
|
+
if missing_in:
|
708
|
+
console.print(" [bold red]Missing 'in' field:[/bold red]")
|
709
|
+
for path in missing_in:
|
710
|
+
console.print(f" [red]❌[/red] {path}")
|
711
|
+
|
712
|
+
total_missing_or_fallback = (
|
713
|
+
info_desc["missing"]
|
714
|
+
+ info_desc["fallback"]
|
715
|
+
+ op_summ["missing"]
|
716
|
+
+ op_summ["fallback"]
|
717
|
+
+ param_desc["missing"]
|
718
|
+
+ param_desc["fallback"]
|
719
|
+
)
|
720
|
+
|
721
|
+
if total_missing_or_fallback > 0:
|
722
|
+
console.print(
|
723
|
+
f"\n[bold]Total items missing or needing enhancement:[/bold] [orange1]{total_missing_or_fallback}[/orange1]"
|
724
|
+
)
|
725
|
+
else:
|
726
|
+
console.print(
|
727
|
+
"\n[bold green]Scan found no missing or fallback descriptions/summaries.[/bold green]"
|
728
|
+
)
|
729
|
+
|
730
|
+
console.print("[bold blue]-------------------------[/bold blue]")
|
731
|
+
|
732
|
+
|
733
|
+
def process_parameter(
|
734
|
+
parameter: dict,
|
735
|
+
operation_location_base: str,
|
736
|
+
path_key: str,
|
737
|
+
method: str,
|
738
|
+
llm_model: str,
|
739
|
+
enhance_all: bool, # New flag
|
740
|
+
):
|
741
|
+
if not isinstance(parameter, dict):
|
742
|
+
logger.warning(
|
743
|
+
f"Invalid parameter object found in {operation_location_base}. Expected dictionary."
|
744
|
+
)
|
745
|
+
return
|
746
|
+
|
747
|
+
if "$ref" in parameter:
|
748
|
+
ref_path = parameter["$ref"]
|
749
|
+
logger.debug(
|
750
|
+
f"Parameter in {operation_location_base} is a reference ('{ref_path}'). Skipping description generation."
|
751
|
+
)
|
752
|
+
return
|
753
|
+
|
754
|
+
param_name = parameter.get("name")
|
755
|
+
param_in = parameter.get("in")
|
756
|
+
|
757
|
+
param_location_id = "unknown_param"
|
758
|
+
if isinstance(param_name, str) and param_name.strip():
|
759
|
+
param_location_id = param_name.strip()
|
760
|
+
if isinstance(param_in, str) and param_in.strip():
|
761
|
+
param_location_id = f"{param_in.strip()}:{param_name.strip()}"
|
762
|
+
elif isinstance(param_in, str) and param_in.strip():
|
763
|
+
param_location_id = f"{param_in.strip()}:[name missing]"
|
764
|
+
|
765
|
+
parameter_location_base = (
|
766
|
+
f"{operation_location_base}.parameters[{param_location_id}]"
|
767
|
+
)
|
768
|
+
|
769
|
+
# Crucial check: Cannot generate description without name/in
|
770
|
+
if (
|
771
|
+
not isinstance(param_name, str)
|
772
|
+
or not param_name.strip()
|
773
|
+
or not isinstance(param_in, str)
|
774
|
+
or not param_in.strip()
|
775
|
+
):
|
776
|
+
logger.warning(
|
777
|
+
f"Cannot generate description for parameter at {parameter_location_base} due to missing 'name' or 'in' field."
|
778
|
+
)
|
779
|
+
return # Skip generation for this parameter
|
780
|
+
|
781
|
+
param_description = parameter.get("description")
|
782
|
+
|
783
|
+
needs_generation = (
|
784
|
+
enhance_all # Generate if enhancing all
|
785
|
+
or not isinstance(param_description, str) # Generate if missing
|
786
|
+
or not param_description.strip() # Generate if empty
|
787
|
+
or is_fallback_text(
|
788
|
+
param_description
|
789
|
+
) # Generate if it's previous fallback text
|
790
|
+
)
|
791
|
+
|
792
|
+
if needs_generation:
|
793
|
+
logger.info(
|
794
|
+
f"Generating description for parameter '{param_name}' at {parameter_location_base}."
|
795
|
+
)
|
796
|
+
|
797
|
+
simplified_context = simplify_parameter_context(parameter)
|
798
|
+
|
799
|
+
generated_description = generate_description_llm(
|
800
|
+
description_type="parameter",
|
801
|
+
model=llm_model,
|
802
|
+
context={
|
803
|
+
"path_key": path_key,
|
804
|
+
"method": method,
|
805
|
+
"param_name": param_name,
|
806
|
+
"param_in": param_in,
|
807
|
+
"parameter_details": simplified_context,
|
808
|
+
},
|
809
|
+
)
|
810
|
+
parameter["description"] = generated_description
|
811
|
+
logger.debug(
|
812
|
+
f"Inserted description for parameter '{param_name}' at {parameter_location_base}."
|
813
|
+
)
|
814
|
+
else:
|
815
|
+
logger.debug(
|
816
|
+
f"Existing 'description' found for parameter '{param_name}' at {parameter_location_base}. Skipping generation."
|
817
|
+
)
|
818
|
+
|
819
|
+
# --- Remove URLs from the parameter description ---
|
820
|
+
current_description = parameter.get("description", "")
|
821
|
+
if (
|
822
|
+
isinstance(current_description, str)
|
823
|
+
and current_description
|
824
|
+
and not is_fallback_text(current_description)
|
825
|
+
):
|
826
|
+
url_pattern = r"https?://[\S]+"
|
827
|
+
modified_description = re.sub(url_pattern, "", current_description).strip()
|
828
|
+
modified_description = re.sub(
|
829
|
+
r"\s{2,}", " ", modified_description
|
830
|
+
).strip() # Collapse multiple spaces
|
831
|
+
|
832
|
+
if modified_description != current_description:
|
833
|
+
parameter["description"] = modified_description
|
834
|
+
logger.debug(
|
835
|
+
f"Removed links from description for parameter '{param_name}' at {parameter_location_base}. New description: '{modified_description[:50]}...'"
|
836
|
+
)
|
837
|
+
# --- End URL removal ---
|
838
|
+
|
839
|
+
# Validate final description length (after potential generation/cleaning)
|
840
|
+
final_param_description = parameter.get("description", "")
|
841
|
+
if isinstance(final_param_description, str):
|
842
|
+
desc_length = len(final_param_description)
|
843
|
+
if desc_length > MAX_DESCRIPTION_LENGTH:
|
844
|
+
logger.warning(
|
845
|
+
f"Parameter description at '{parameter_location_base}.description' exceeds max length. Actual length: {desc_length}, Max allowed: {MAX_DESCRIPTION_LENGTH}. Consider manual edit."
|
846
|
+
)
|
847
|
+
|
848
|
+
|
849
|
+
def process_operation(
|
850
|
+
operation_value: dict,
|
851
|
+
path_key: str,
|
852
|
+
method: str,
|
853
|
+
llm_model: str,
|
854
|
+
enhance_all: bool, # New flag
|
855
|
+
):
|
856
|
+
operation_location_base = f"paths.{path_key}.{method.lower()}"
|
857
|
+
|
858
|
+
if not isinstance(operation_value, dict):
|
859
|
+
logger.warning(
|
860
|
+
f"Operation value for '{operation_location_base}' is not a dictionary. Skipping processing."
|
861
|
+
)
|
862
|
+
return
|
863
|
+
|
864
|
+
if method.lower().startswith("x-"):
|
865
|
+
logger.debug(f"Skipping extension operation '{operation_location_base}'.")
|
866
|
+
return
|
867
|
+
|
868
|
+
# --- Process Summary ---
|
869
|
+
operation_summary = operation_value.get("summary")
|
870
|
+
|
871
|
+
needs_summary_generation = (
|
872
|
+
enhance_all
|
873
|
+
or not isinstance(operation_summary, str)
|
874
|
+
or not operation_summary.strip()
|
875
|
+
or is_fallback_text(operation_summary)
|
876
|
+
)
|
877
|
+
|
878
|
+
if needs_summary_generation:
|
879
|
+
logger.info(f"Generating summary for operation '{operation_location_base}'.")
|
880
|
+
|
881
|
+
simplified_context = simplify_operation_context(operation_value)
|
882
|
+
|
883
|
+
generated_summary = generate_description_llm(
|
884
|
+
description_type="summary",
|
885
|
+
model=llm_model,
|
886
|
+
context={
|
887
|
+
"path_key": path_key,
|
888
|
+
"method": method,
|
889
|
+
"operation_value": simplified_context,
|
890
|
+
},
|
891
|
+
)
|
892
|
+
operation_value["summary"] = generated_summary
|
893
|
+
logger.debug(f"Inserted summary for '{operation_location_base}'.")
|
894
|
+
else:
|
895
|
+
logger.debug(
|
896
|
+
f"Existing summary found for '{operation_location_base}'. Skipping generation."
|
897
|
+
)
|
898
|
+
|
899
|
+
# Validate final summary length (after potential generation)
|
900
|
+
final_summary = operation_value.get("summary", "")
|
901
|
+
if isinstance(final_summary, str):
|
902
|
+
summary_length = len(final_summary)
|
903
|
+
if summary_length > MAX_DESCRIPTION_LENGTH:
|
904
|
+
logger.warning(
|
905
|
+
f"Operation summary at '{operation_location_base}.summary' exceeds max length ({summary_length} > {MAX_DESCRIPTION_LENGTH}). Consider manual edit."
|
906
|
+
)
|
907
|
+
|
908
|
+
# --- Process Parameters ---
|
909
|
+
parameters = operation_value.get("parameters")
|
910
|
+
if isinstance(parameters, list):
|
911
|
+
for _i, parameter in enumerate(parameters):
|
912
|
+
process_parameter(
|
913
|
+
parameter,
|
914
|
+
operation_location_base,
|
915
|
+
path_key,
|
916
|
+
method,
|
917
|
+
llm_model,
|
918
|
+
enhance_all, # Pass enhance_all
|
919
|
+
)
|
920
|
+
elif parameters is not None:
|
921
|
+
logger.warning(
|
922
|
+
f"'parameters' field for operation '{operation_location_base}' is not a list. Skipping parameter processing."
|
923
|
+
)
|
924
|
+
|
925
|
+
|
926
|
+
def process_paths(paths: dict, llm_model: str, enhance_all: bool): # New flag
|
927
|
+
if not isinstance(paths, dict):
|
928
|
+
logger.warning("'paths' field is not a dictionary. Skipping path processing.")
|
929
|
+
return
|
930
|
+
|
931
|
+
for path_key, path_value in paths.items():
|
932
|
+
if path_key.lower().startswith("x-"):
|
933
|
+
logger.debug(f"Skipping processing of path extension '{path_key}'.")
|
934
|
+
continue
|
935
|
+
|
936
|
+
if isinstance(path_value, dict):
|
937
|
+
for method, operation_value in path_value.items():
|
938
|
+
if method.lower() in [
|
939
|
+
"get",
|
940
|
+
"put",
|
941
|
+
"post",
|
942
|
+
"delete",
|
943
|
+
"options",
|
944
|
+
"head",
|
945
|
+
"patch",
|
946
|
+
"trace",
|
947
|
+
]:
|
948
|
+
process_operation(
|
949
|
+
operation_value, path_key, method, llm_model, enhance_all
|
950
|
+
) # Pass enhance_all
|
951
|
+
elif method.lower().startswith("x-"):
|
952
|
+
logger.debug(
|
953
|
+
f"Skipping processing of method extension '{method.lower()}' in path '{path_key}'."
|
954
|
+
)
|
955
|
+
continue
|
956
|
+
elif method.lower() == "parameters":
|
957
|
+
logger.debug(
|
958
|
+
f"Skipping processing of path-level parameters in '{path_key}'."
|
959
|
+
)
|
960
|
+
continue
|
961
|
+
elif operation_value is not None:
|
962
|
+
logger.warning(
|
963
|
+
f"Unknown method '{method}' found in path '{path_key}'. Skipping processing."
|
964
|
+
)
|
965
|
+
elif operation_value is None:
|
966
|
+
logger.debug(
|
967
|
+
f"Operation value for method '{method}' in path '{path_key}' is null. Skipping processing."
|
968
|
+
)
|
969
|
+
|
970
|
+
elif path_value is not None:
|
971
|
+
logger.warning(
|
972
|
+
f"Path value for '{path_key}' is not a dictionary. Skipping processing."
|
973
|
+
)
|
974
|
+
|
975
|
+
|
976
|
+
def process_info_section(
|
977
|
+
schema_data: dict, llm_model: str, enhance_all: bool
|
978
|
+
): # New flag
|
979
|
+
info = schema_data.get("info")
|
980
|
+
info_location = "info"
|
981
|
+
|
982
|
+
# Basic validation handled by scanner/CLI caller, assume info and title exist here
|
983
|
+
|
984
|
+
info_title = info["title"] # Already validated to exist by CLI caller
|
985
|
+
|
986
|
+
info_description = info.get("description")
|
987
|
+
|
988
|
+
needs_description_generation = (
|
989
|
+
enhance_all
|
990
|
+
or not isinstance(info_description, str)
|
991
|
+
or not info_description.strip()
|
992
|
+
or is_fallback_text(info_description)
|
993
|
+
)
|
994
|
+
|
995
|
+
if needs_description_generation:
|
996
|
+
logger.info(f"Generating description for '{info_location}'.")
|
997
|
+
|
998
|
+
generated_description = generate_description_llm(
|
999
|
+
description_type="api_description",
|
1000
|
+
model=llm_model,
|
1001
|
+
context={"title": info_title},
|
1002
|
+
)
|
1003
|
+
|
1004
|
+
# Ensure 'info' key exists (should due to validation)
|
1005
|
+
if "info" not in schema_data or not isinstance(schema_data["info"], dict):
|
1006
|
+
schema_data["info"] = {} # Should not happen if scan/validation passed
|
1007
|
+
logger.warning("Re-created missing 'info' key during generation.")
|
1008
|
+
|
1009
|
+
schema_data["info"]["description"] = generated_description
|
1010
|
+
logger.debug(f"Inserted description for '{info_location}.description'.")
|
1011
|
+
else:
|
1012
|
+
logger.debug("Existing 'info.description' found. Skipping generation.")
|
1013
|
+
|
1014
|
+
final_description = schema_data.get("info", {}).get("description", "")
|
1015
|
+
if isinstance(final_description, str):
|
1016
|
+
desc_length = len(final_description)
|
1017
|
+
if desc_length > MAX_DESCRIPTION_LENGTH:
|
1018
|
+
logger.warning(
|
1019
|
+
f"API description at '{info_location}.description' exceeds max length ({desc_length} > {MAX_DESCRIPTION_LENGTH}). Consider manual edit."
|
1020
|
+
)
|
1021
|
+
|
1022
|
+
|
1023
|
+
def preprocess_schema_with_llm(
|
1024
|
+
schema_data: dict, llm_model: str, enhance_all: bool
|
1025
|
+
): # New flag
|
1026
|
+
"""
|
1027
|
+
Processes the schema to add/enhance descriptions/summaries using an LLM.
|
1028
|
+
Decides whether to generate based on the 'enhance_all' flag and existing content.
|
1029
|
+
Assumes basic schema structure validation (info, title) has already passed.
|
1030
|
+
"""
|
1031
|
+
logger.info(f"\n--- Starting LLM Generation (enhance_all={enhance_all}) ---")
|
1032
|
+
|
1033
|
+
process_info_section(schema_data, llm_model, enhance_all)
|
1034
|
+
|
1035
|
+
paths = schema_data.get("paths")
|
1036
|
+
process_paths(paths, llm_model, enhance_all)
|
1037
|
+
|
1038
|
+
logger.info("--- LLM Generation Complete ---")
|
1039
|
+
|
1040
|
+
|
1041
|
+
def run_preprocessing(
|
1042
|
+
schema_path: Path,
|
1043
|
+
output_path: Path | None = None,
|
1044
|
+
model: str = "perplexity/sonar",
|
1045
|
+
debug: bool = False,
|
1046
|
+
):
|
1047
|
+
set_logging_level("DEBUG" if debug else "INFO")
|
1048
|
+
console.print("[bold blue]--- Starting OpenAPI Schema Preprocessor ---[/bold blue]")
|
1049
|
+
|
1050
|
+
if schema_path is None:
|
1051
|
+
path_str = typer.prompt(
|
1052
|
+
"Please enter the path to the OpenAPI schema file (JSON or YAML)",
|
1053
|
+
prompt_suffix=": ",
|
1054
|
+
).strip()
|
1055
|
+
if not path_str:
|
1056
|
+
console.print("[red]Error: Schema path is required.[/red]")
|
1057
|
+
raise typer.Exit(1)
|
1058
|
+
schema_path = Path(path_str)
|
1059
|
+
|
1060
|
+
try:
|
1061
|
+
schema_data = read_schema_file(str(schema_path))
|
1062
|
+
except (FileNotFoundError, yaml.YAMLError, json.JSONDecodeError, OSError) as e:
|
1063
|
+
raise typer.Exit(1) from e
|
1064
|
+
except Exception as e:
|
1065
|
+
console.print(
|
1066
|
+
f"[red]An unexpected error occurred while reading schema: {e}[/red]"
|
1067
|
+
)
|
1068
|
+
raise typer.Exit(1) from e
|
1069
|
+
|
1070
|
+
# --- Step 2: Scan and Report Status ---
|
1071
|
+
try:
|
1072
|
+
scan_report = scan_schema_for_status(schema_data)
|
1073
|
+
report_scan_results(scan_report)
|
1074
|
+
except Exception as e:
|
1075
|
+
console.print(
|
1076
|
+
f"[red]An unexpected error occurred during schema scanning: {e}[/red]"
|
1077
|
+
)
|
1078
|
+
raise typer.Exit(1) from e
|
1079
|
+
|
1080
|
+
# --- Step 3: Check for Critical Errors ---
|
1081
|
+
if scan_report.get("critical_errors"):
|
1082
|
+
console.print(
|
1083
|
+
"[bold red]Cannot proceed with generation due to critical errors. Please fix the schema file manually.[/bold red]"
|
1084
|
+
)
|
1085
|
+
raise typer.Exit(1)
|
1086
|
+
|
1087
|
+
# --- Step 4: Determine Prompt Options based on Scan Results ---
|
1088
|
+
total_missing_or_fallback = (
|
1089
|
+
scan_report["info_description"]["missing"]
|
1090
|
+
+ scan_report["info_description"]["fallback"]
|
1091
|
+
+ scan_report["operation_summary"]["missing"]
|
1092
|
+
+ scan_report["operation_summary"]["fallback"]
|
1093
|
+
+ scan_report["parameter_description"]["missing"]
|
1094
|
+
+ scan_report["parameter_description"]["fallback"]
|
1095
|
+
)
|
1096
|
+
|
1097
|
+
ungeneratable_params = len(scan_report.get("parameters_missing_name", [])) + len(
|
1098
|
+
scan_report.get("parameters_missing_in", [])
|
1099
|
+
)
|
1100
|
+
|
1101
|
+
prompt_options = []
|
1102
|
+
valid_choices = []
|
1103
|
+
default_choice = "3" # Default is always Quit unless there's something missing
|
1104
|
+
|
1105
|
+
console.print("\n[bold blue]Choose an action:[/bold blue]")
|
1106
|
+
|
1107
|
+
if total_missing_or_fallback > 0:
|
1108
|
+
console.print(
|
1109
|
+
f"[bold]Scan found {total_missing_or_fallback} items that are missing or using fallback text and can be generated/enhanced.[/bold]"
|
1110
|
+
)
|
1111
|
+
if ungeneratable_params > 0:
|
1112
|
+
console.print(
|
1113
|
+
f"[yellow]Note: {ungeneratable_params} parameters require manual fixing and cannot be generated by the LLM due to missing name/in.[/yellow]"
|
1114
|
+
)
|
1115
|
+
|
1116
|
+
prompt_options = [
|
1117
|
+
" [1] Generate [bold]only missing[/bold] descriptions/summaries [green](default)[/green]",
|
1118
|
+
" [2] Generate/Enhance [bold]all[/bold] descriptions/summaries",
|
1119
|
+
" [3] [bold red]Quit[/bold red] (exit without changes)",
|
1120
|
+
]
|
1121
|
+
valid_choices = ["1", "2", "3"]
|
1122
|
+
default_choice = "1" # Default to filling missing
|
1123
|
+
|
1124
|
+
else: # total_missing_or_fallback == 0
|
1125
|
+
if ungeneratable_params > 0:
|
1126
|
+
console.print(
|
1127
|
+
f"[bold yellow]Scan found no missing/fallback items suitable for generation, but {ungeneratable_params} parameters have missing 'name' or 'in'.[/bold yellow]"
|
1128
|
+
)
|
1129
|
+
console.print(
|
1130
|
+
"[bold yellow]These parameters require manual fixing and cannot be generated by the LLM.[/bold yellow]"
|
1131
|
+
)
|
1132
|
+
else:
|
1133
|
+
console.print(
|
1134
|
+
"[bold green]Scan found no missing or fallback descriptions/summaries.[/bold green]"
|
1135
|
+
)
|
1136
|
+
|
1137
|
+
console.print(
|
1138
|
+
"[bold blue]You can choose to enhance all existing descriptions or exit.[/bold blue]"
|
1139
|
+
)
|
1140
|
+
|
1141
|
+
prompt_options = [
|
1142
|
+
" [2] Generate/Enhance [bold]all[/bold] descriptions/summaries",
|
1143
|
+
" [3] [bold red]Quit[/bold red] [green](default)[/green]",
|
1144
|
+
]
|
1145
|
+
valid_choices = ["2", "3"]
|
1146
|
+
default_choice = "3" # Default to quitting if nothing missing
|
1147
|
+
|
1148
|
+
for option_text in prompt_options:
|
1149
|
+
console.print(option_text)
|
1150
|
+
|
1151
|
+
while True:
|
1152
|
+
choice = typer.prompt(
|
1153
|
+
"Enter choice", default=default_choice, show_default=False, type=str
|
1154
|
+
).strip()
|
1155
|
+
|
1156
|
+
if choice not in valid_choices:
|
1157
|
+
console.print(
|
1158
|
+
"[red]Invalid choice. Please select from the options above.[/red]"
|
1159
|
+
)
|
1160
|
+
continue # Ask again
|
1161
|
+
|
1162
|
+
if choice == "3":
|
1163
|
+
console.print("[yellow]Exiting without making changes.[/yellow]")
|
1164
|
+
raise typer.Exit(0)
|
1165
|
+
elif choice == "1":
|
1166
|
+
enhance_all = False
|
1167
|
+
break # Exit prompt loop
|
1168
|
+
elif choice == "2":
|
1169
|
+
enhance_all = True
|
1170
|
+
break # Exit prompt loop
|
1171
|
+
|
1172
|
+
perform_generation = False
|
1173
|
+
if enhance_all:
|
1174
|
+
perform_generation = True
|
1175
|
+
elif (
|
1176
|
+
choice == "1" and total_missing_or_fallback > 0
|
1177
|
+
): # Chosen option 1 AND there was something missing
|
1178
|
+
perform_generation = True
|
1179
|
+
|
1180
|
+
if perform_generation:
|
1181
|
+
console.print(
|
1182
|
+
f"[blue]Starting LLM generation with Enhance All: {enhance_all}[/blue]"
|
1183
|
+
)
|
1184
|
+
try:
|
1185
|
+
preprocess_schema_with_llm(schema_data, model, enhance_all)
|
1186
|
+
console.print("[green]LLM generation complete.[/green]")
|
1187
|
+
except Exception as e:
|
1188
|
+
console.print(f"[red]Error during LLM generation: {e}[/red]")
|
1189
|
+
# Log traceback for debugging
|
1190
|
+
import traceback
|
1191
|
+
|
1192
|
+
traceback.print_exc(file=sys.stderr)
|
1193
|
+
raise typer.Exit(1) from e
|
1194
|
+
else:
|
1195
|
+
console.print(
|
1196
|
+
"[yellow]No missing or fallback items found, and 'Enhance All' was not selected. Skipping LLM generation step.[/yellow]"
|
1197
|
+
)
|
1198
|
+
|
1199
|
+
if output_path is None:
|
1200
|
+
base, ext = os.path.splitext(schema_path)
|
1201
|
+
output_path = Path(f"{base}_processed{ext}")
|
1202
|
+
console.print(
|
1203
|
+
f"[blue]No output path specified. Defaulting to: {output_path}[/blue]"
|
1204
|
+
)
|
1205
|
+
else:
|
1206
|
+
console.print(f"[blue]Saving processed schema to: {output_path}[/blue]")
|
1207
|
+
|
1208
|
+
try:
|
1209
|
+
write_schema_file(schema_data, str(output_path))
|
1210
|
+
except (OSError, ValueError) as e:
|
1211
|
+
# write_schema_file logs critical errors, just exit here
|
1212
|
+
raise typer.Exit(1) from e
|
1213
|
+
except Exception as e:
|
1214
|
+
console.print(
|
1215
|
+
f"[red]An unexpected error occurred while writing the schema: {e}[/red]"
|
1216
|
+
)
|
1217
|
+
raise typer.Exit(1) from e
|
1218
|
+
|
1219
|
+
console.print(
|
1220
|
+
"\n[bold green]--- Schema Processing and Saving Complete ---[/bold green]"
|
1221
|
+
)
|
1222
|
+
console.print(f"Processed schema saved to: [blue]{output_path}[/blue]")
|
1223
|
+
console.print("[bold blue]Preprocessor finished successfully.[/bold blue]")
|