symbolicai 0.21.0__py3-none-any.whl → 1.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- symai/__init__.py +269 -173
- symai/backend/base.py +123 -110
- symai/backend/engines/drawing/engine_bfl.py +45 -44
- symai/backend/engines/drawing/engine_gpt_image.py +112 -97
- symai/backend/engines/embedding/engine_llama_cpp.py +63 -52
- symai/backend/engines/embedding/engine_openai.py +25 -21
- symai/backend/engines/execute/engine_python.py +19 -18
- symai/backend/engines/files/engine_io.py +104 -95
- symai/backend/engines/imagecaptioning/engine_blip2.py +28 -24
- symai/backend/engines/imagecaptioning/engine_llavacpp_client.py +102 -79
- symai/backend/engines/index/engine_pinecone.py +124 -97
- symai/backend/engines/index/engine_qdrant.py +1011 -0
- symai/backend/engines/index/engine_vectordb.py +84 -56
- symai/backend/engines/lean/engine_lean4.py +96 -52
- symai/backend/engines/neurosymbolic/__init__.py +41 -13
- symai/backend/engines/neurosymbolic/engine_anthropic_claudeX_chat.py +330 -248
- symai/backend/engines/neurosymbolic/engine_anthropic_claudeX_reasoning.py +329 -264
- symai/backend/engines/neurosymbolic/engine_cerebras.py +328 -0
- symai/backend/engines/neurosymbolic/engine_deepseekX_reasoning.py +118 -88
- symai/backend/engines/neurosymbolic/engine_google_geminiX_reasoning.py +344 -299
- symai/backend/engines/neurosymbolic/engine_groq.py +173 -115
- symai/backend/engines/neurosymbolic/engine_huggingface.py +114 -84
- symai/backend/engines/neurosymbolic/engine_llama_cpp.py +144 -118
- symai/backend/engines/neurosymbolic/engine_openai_gptX_chat.py +415 -307
- symai/backend/engines/neurosymbolic/engine_openai_gptX_reasoning.py +394 -231
- symai/backend/engines/ocr/engine_apilayer.py +23 -27
- symai/backend/engines/output/engine_stdout.py +10 -13
- symai/backend/engines/{webscraping → scrape}/engine_requests.py +101 -54
- symai/backend/engines/search/engine_openai.py +100 -88
- symai/backend/engines/search/engine_parallel.py +665 -0
- symai/backend/engines/search/engine_perplexity.py +44 -45
- symai/backend/engines/search/engine_serpapi.py +37 -34
- symai/backend/engines/speech_to_text/engine_local_whisper.py +54 -51
- symai/backend/engines/symbolic/engine_wolframalpha.py +15 -9
- symai/backend/engines/text_to_speech/engine_openai.py +20 -26
- symai/backend/engines/text_vision/engine_clip.py +39 -37
- symai/backend/engines/userinput/engine_console.py +5 -6
- symai/backend/mixin/__init__.py +13 -0
- symai/backend/mixin/anthropic.py +48 -38
- symai/backend/mixin/deepseek.py +6 -5
- symai/backend/mixin/google.py +7 -4
- symai/backend/mixin/groq.py +2 -4
- symai/backend/mixin/openai.py +140 -110
- symai/backend/settings.py +87 -20
- symai/chat.py +216 -123
- symai/collect/__init__.py +7 -1
- symai/collect/dynamic.py +80 -70
- symai/collect/pipeline.py +67 -51
- symai/collect/stats.py +161 -109
- symai/components.py +707 -360
- symai/constraints.py +24 -12
- symai/core.py +1857 -1233
- symai/core_ext.py +83 -80
- symai/endpoints/api.py +166 -104
- symai/extended/.DS_Store +0 -0
- symai/extended/__init__.py +46 -12
- symai/extended/api_builder.py +29 -21
- symai/extended/arxiv_pdf_parser.py +23 -14
- symai/extended/bibtex_parser.py +9 -6
- symai/extended/conversation.py +156 -126
- symai/extended/document.py +50 -30
- symai/extended/file_merger.py +57 -14
- symai/extended/graph.py +51 -32
- symai/extended/html_style_template.py +18 -14
- symai/extended/interfaces/blip_2.py +2 -3
- symai/extended/interfaces/clip.py +4 -3
- symai/extended/interfaces/console.py +9 -1
- symai/extended/interfaces/dall_e.py +4 -2
- symai/extended/interfaces/file.py +2 -0
- symai/extended/interfaces/flux.py +4 -2
- symai/extended/interfaces/gpt_image.py +16 -7
- symai/extended/interfaces/input.py +2 -1
- symai/extended/interfaces/llava.py +1 -2
- symai/extended/interfaces/{naive_webscraping.py → naive_scrape.py} +4 -3
- symai/extended/interfaces/naive_vectordb.py +9 -10
- symai/extended/interfaces/ocr.py +5 -3
- symai/extended/interfaces/openai_search.py +2 -0
- symai/extended/interfaces/parallel.py +30 -0
- symai/extended/interfaces/perplexity.py +2 -0
- symai/extended/interfaces/pinecone.py +12 -9
- symai/extended/interfaces/python.py +2 -0
- symai/extended/interfaces/serpapi.py +3 -1
- symai/extended/interfaces/terminal.py +2 -4
- symai/extended/interfaces/tts.py +3 -2
- symai/extended/interfaces/whisper.py +3 -2
- symai/extended/interfaces/wolframalpha.py +2 -1
- symai/extended/metrics/__init__.py +11 -1
- symai/extended/metrics/similarity.py +14 -13
- symai/extended/os_command.py +39 -29
- symai/extended/packages/__init__.py +29 -3
- symai/extended/packages/symdev.py +51 -43
- symai/extended/packages/sympkg.py +41 -35
- symai/extended/packages/symrun.py +63 -50
- symai/extended/repo_cloner.py +14 -12
- symai/extended/seo_query_optimizer.py +15 -13
- symai/extended/solver.py +116 -91
- symai/extended/summarizer.py +12 -10
- symai/extended/taypan_interpreter.py +17 -18
- symai/extended/vectordb.py +122 -92
- symai/formatter/__init__.py +9 -1
- symai/formatter/formatter.py +51 -47
- symai/formatter/regex.py +70 -69
- symai/functional.py +325 -176
- symai/imports.py +190 -147
- symai/interfaces.py +57 -28
- symai/memory.py +45 -35
- symai/menu/screen.py +28 -19
- symai/misc/console.py +66 -56
- symai/misc/loader.py +8 -5
- symai/models/__init__.py +17 -1
- symai/models/base.py +395 -236
- symai/models/errors.py +1 -2
- symai/ops/__init__.py +32 -22
- symai/ops/measures.py +24 -25
- symai/ops/primitives.py +1149 -731
- symai/post_processors.py +58 -50
- symai/pre_processors.py +86 -82
- symai/processor.py +21 -13
- symai/prompts.py +764 -685
- symai/server/huggingface_server.py +135 -49
- symai/server/llama_cpp_server.py +21 -11
- symai/server/qdrant_server.py +206 -0
- symai/shell.py +100 -42
- symai/shellsv.py +700 -492
- symai/strategy.py +630 -346
- symai/symbol.py +368 -322
- symai/utils.py +100 -78
- {symbolicai-0.21.0.dist-info → symbolicai-1.1.0.dist-info}/METADATA +22 -10
- symbolicai-1.1.0.dist-info/RECORD +168 -0
- symbolicai-0.21.0.dist-info/RECORD +0 -162
- {symbolicai-0.21.0.dist-info → symbolicai-1.1.0.dist-info}/WHEEL +0 -0
- {symbolicai-0.21.0.dist-info → symbolicai-1.1.0.dist-info}/entry_points.txt +0 -0
- {symbolicai-0.21.0.dist-info → symbolicai-1.1.0.dist-info}/licenses/LICENSE +0 -0
- {symbolicai-0.21.0.dist-info → symbolicai-1.1.0.dist-info}/top_level.txt +0 -0
symai/models/base.py
CHANGED
|
@@ -2,12 +2,14 @@ import json
|
|
|
2
2
|
from enum import Enum
|
|
3
3
|
from functools import lru_cache
|
|
4
4
|
from types import UnionType
|
|
5
|
-
from typing import Any, Literal,
|
|
5
|
+
from typing import Any, Literal, Union, get_args, get_origin
|
|
6
6
|
|
|
7
7
|
from attr import dataclass
|
|
8
8
|
from pydantic import BaseModel, Field, create_model, model_validator
|
|
9
9
|
from pydantic_core import PydanticUndefined
|
|
10
10
|
|
|
11
|
+
from ..utils import UserMessage
|
|
12
|
+
|
|
11
13
|
|
|
12
14
|
@dataclass
|
|
13
15
|
class LengthConstraint:
|
|
@@ -22,7 +24,7 @@ class CustomConstraint:
|
|
|
22
24
|
|
|
23
25
|
|
|
24
26
|
def Const(value: str):
|
|
25
|
-
return Field(default=value, json_schema_extra={
|
|
27
|
+
return Field(default=value, json_schema_extra={"const_value": value})
|
|
26
28
|
|
|
27
29
|
|
|
28
30
|
class LLMDataModel(BaseModel):
|
|
@@ -33,11 +35,9 @@ class LLMDataModel(BaseModel):
|
|
|
33
35
|
|
|
34
36
|
_MAX_RECURSION_DEPTH = 50
|
|
35
37
|
|
|
36
|
-
section_header: str = Field(
|
|
37
|
-
default=None, exclude=True, frozen=True
|
|
38
|
-
)
|
|
38
|
+
section_header: str = Field(default=None, exclude=True, frozen=True)
|
|
39
39
|
|
|
40
|
-
@model_validator(mode=
|
|
40
|
+
@model_validator(mode="before")
|
|
41
41
|
@classmethod
|
|
42
42
|
def validate_const_fields(cls, values):
|
|
43
43
|
"""Validate that const fields have their expected values."""
|
|
@@ -45,7 +45,7 @@ class LLMDataModel(BaseModel):
|
|
|
45
45
|
if cls._is_const_field(field_info):
|
|
46
46
|
const_value = cls._get_const_value(field_info)
|
|
47
47
|
if field_name in values and values[field_name] != const_value:
|
|
48
|
-
|
|
48
|
+
UserMessage(f"{field_name} must be {const_value!r}", raise_with=ValueError)
|
|
49
49
|
return values
|
|
50
50
|
|
|
51
51
|
@staticmethod
|
|
@@ -72,27 +72,32 @@ class LLMDataModel(BaseModel):
|
|
|
72
72
|
def _is_collection_type(field_type: Any) -> bool:
|
|
73
73
|
"""Check if a type is a collection (list, set, tuple, dict, etc.)."""
|
|
74
74
|
origin = get_origin(field_type)
|
|
75
|
-
return origin in (list, set, frozenset, tuple, dict) or field_type in (
|
|
75
|
+
return origin in (list, set, frozenset, tuple, dict) or field_type in (
|
|
76
|
+
list,
|
|
77
|
+
set,
|
|
78
|
+
frozenset,
|
|
79
|
+
tuple,
|
|
80
|
+
dict,
|
|
81
|
+
)
|
|
76
82
|
|
|
77
83
|
@staticmethod
|
|
78
84
|
def _is_const_field(field_info) -> bool:
|
|
79
85
|
"""Check if a field is a const field."""
|
|
80
|
-
return
|
|
81
|
-
field_info.json_schema_extra and
|
|
82
|
-
'const_value' in field_info.json_schema_extra
|
|
83
|
-
)
|
|
86
|
+
return field_info.json_schema_extra and "const_value" in field_info.json_schema_extra
|
|
84
87
|
|
|
85
88
|
@staticmethod
|
|
86
89
|
def _get_const_value(field_info):
|
|
87
90
|
"""Get the const value from a field."""
|
|
88
|
-
return field_info.json_schema_extra.get(
|
|
91
|
+
return field_info.json_schema_extra.get("const_value")
|
|
89
92
|
|
|
90
93
|
@staticmethod
|
|
91
94
|
def _has_default_value(field_info) -> bool:
|
|
92
95
|
"""Check if a field has a default value."""
|
|
93
96
|
return field_info.default != ... and field_info.default != PydanticUndefined
|
|
94
97
|
|
|
95
|
-
def format_field(
|
|
98
|
+
def format_field(
|
|
99
|
+
self, key: str, value: Any, indent: int = 0, visited: set | None = None, depth: int = 0
|
|
100
|
+
) -> str:
|
|
96
101
|
"""Formats a field value for string representation, handling nested structures."""
|
|
97
102
|
visited = visited or set()
|
|
98
103
|
formatter = self._get_formatter_for_value(value)
|
|
@@ -109,22 +114,28 @@ class LLMDataModel(BaseModel):
|
|
|
109
114
|
}
|
|
110
115
|
|
|
111
116
|
for type_class, formatter in formatters.items():
|
|
112
|
-
if type_class
|
|
117
|
+
if type_class is type(None) and value is None:
|
|
113
118
|
return formatter
|
|
114
|
-
if type_class
|
|
119
|
+
if type_class is not type(None) and isinstance(value, type_class):
|
|
115
120
|
return formatter
|
|
116
121
|
|
|
117
122
|
return self._format_primitive_field
|
|
118
123
|
|
|
119
|
-
def _format_none_field(
|
|
124
|
+
def _format_none_field(
|
|
125
|
+
self, key: str, _value: Any, indent: int, _visited: set, _depth: int
|
|
126
|
+
) -> str:
|
|
120
127
|
"""Format a None value."""
|
|
121
128
|
return f"{' ' * indent}{key}: None"
|
|
122
129
|
|
|
123
|
-
def _format_enum_field(
|
|
130
|
+
def _format_enum_field(
|
|
131
|
+
self, key: str, value: Enum, indent: int, _visited: set, _depth: int
|
|
132
|
+
) -> str:
|
|
124
133
|
"""Format an Enum value."""
|
|
125
134
|
return f"{' ' * indent}{key}: {value.value}"
|
|
126
135
|
|
|
127
|
-
def _format_model_field(
|
|
136
|
+
def _format_model_field(
|
|
137
|
+
self, key: str, value: "LLMDataModel", indent: int, visited: set, depth: int
|
|
138
|
+
) -> str:
|
|
128
139
|
"""Format a nested model field."""
|
|
129
140
|
obj_id = id(value)
|
|
130
141
|
indent_str = " " * indent
|
|
@@ -137,7 +148,9 @@ class LLMDataModel(BaseModel):
|
|
|
137
148
|
visited.discard(obj_id)
|
|
138
149
|
return f"{indent_str}{key}:\n{indent_str} {nested_str}"
|
|
139
150
|
|
|
140
|
-
def _format_list_field(
|
|
151
|
+
def _format_list_field(
|
|
152
|
+
self, key: str, value: list, indent: int, visited: set, depth: int
|
|
153
|
+
) -> str:
|
|
141
154
|
"""Format a list field."""
|
|
142
155
|
indent_str = " " * indent
|
|
143
156
|
if not value:
|
|
@@ -159,12 +172,16 @@ class LLMDataModel(BaseModel):
|
|
|
159
172
|
visited.add(obj_id)
|
|
160
173
|
item_str = item.__str__(indent + 2, visited, depth + 1).strip()
|
|
161
174
|
visited.discard(obj_id)
|
|
162
|
-
items.append(
|
|
175
|
+
items.append(
|
|
176
|
+
f"{indent_str} - : {item_str}" if item_str else f"{indent_str} - :"
|
|
177
|
+
)
|
|
163
178
|
else:
|
|
164
179
|
items.append(f"{indent_str} - : {item}" if item != "" else f"{indent_str} - :")
|
|
165
180
|
return f"{indent_str}{key}:\n" + "\n".join(items)
|
|
166
181
|
|
|
167
|
-
def _format_dict_field(
|
|
182
|
+
def _format_dict_field(
|
|
183
|
+
self, key: str, value: dict, indent: int, visited: set, depth: int
|
|
184
|
+
) -> str:
|
|
168
185
|
"""Format a dictionary field."""
|
|
169
186
|
indent_str = " " * indent
|
|
170
187
|
if not value:
|
|
@@ -190,11 +207,13 @@ class LLMDataModel(BaseModel):
|
|
|
190
207
|
visited.discard(obj_id)
|
|
191
208
|
return f"{indent_str}{key}:\n" + "\n".join(items) if key else "\n".join(items)
|
|
192
209
|
|
|
193
|
-
def _format_primitive_field(
|
|
210
|
+
def _format_primitive_field(
|
|
211
|
+
self, key: str, value: Any, indent: int, _visited: set, _depth: int
|
|
212
|
+
) -> str:
|
|
194
213
|
"""Format a primitive field."""
|
|
195
214
|
return f"{' ' * indent}{key}: {value}"
|
|
196
215
|
|
|
197
|
-
def __str__(self, indent: int = 0, visited: set = None, depth: int = 0) -> str:
|
|
216
|
+
def __str__(self, indent: int = 0, visited: set | None = None, depth: int = 0) -> str:
|
|
198
217
|
"""
|
|
199
218
|
Converts the model into a formatted string for LLM prompts.
|
|
200
219
|
Handles indentation for nested models and includes an optional section header.
|
|
@@ -205,16 +224,10 @@ class LLMDataModel(BaseModel):
|
|
|
205
224
|
field_list = [
|
|
206
225
|
self.format_field(name, getattr(self, name), indent + 2, visited, depth)
|
|
207
226
|
for name, field in type(self).model_fields.items()
|
|
208
|
-
if (
|
|
209
|
-
not getattr(field, "exclude", False)
|
|
210
|
-
and not name == "section_header"
|
|
211
|
-
)
|
|
227
|
+
if (not getattr(field, "exclude", False) and name != "section_header")
|
|
212
228
|
]
|
|
213
229
|
|
|
214
|
-
if field_list
|
|
215
|
-
fields = "\n".join(field_list) + "\n"
|
|
216
|
-
else:
|
|
217
|
-
fields = ""
|
|
230
|
+
fields = "\n".join(field_list) + "\n" if field_list else ""
|
|
218
231
|
|
|
219
232
|
if self.section_header and indent == 0:
|
|
220
233
|
header = f"{indent_str}[[{self.section_header}]]\n"
|
|
@@ -227,7 +240,7 @@ class LLMDataModel(BaseModel):
|
|
|
227
240
|
|
|
228
241
|
def remedy(self):
|
|
229
242
|
"""Default remedy method for the model."""
|
|
230
|
-
return
|
|
243
|
+
return
|
|
231
244
|
|
|
232
245
|
@classmethod
|
|
233
246
|
@lru_cache(maxsize=128)
|
|
@@ -281,8 +294,15 @@ class LLMDataModel(BaseModel):
|
|
|
281
294
|
return schema.get("$defs", schema.get("definitions", {}))
|
|
282
295
|
|
|
283
296
|
@classmethod
|
|
284
|
-
def _format_schema_field(
|
|
285
|
-
|
|
297
|
+
def _format_schema_field(
|
|
298
|
+
cls,
|
|
299
|
+
name: str,
|
|
300
|
+
field_schema: dict,
|
|
301
|
+
required: bool,
|
|
302
|
+
definitions: dict,
|
|
303
|
+
indent_level: int,
|
|
304
|
+
visited: set | None = None,
|
|
305
|
+
) -> str:
|
|
286
306
|
"""Format a single schema field without descriptions (kept for definitions)."""
|
|
287
307
|
visited = visited or set()
|
|
288
308
|
|
|
@@ -312,27 +332,38 @@ class LLMDataModel(BaseModel):
|
|
|
312
332
|
return result
|
|
313
333
|
|
|
314
334
|
@classmethod
|
|
315
|
-
def _format_referenced_object_fields(
|
|
316
|
-
|
|
335
|
+
def _format_referenced_object_fields(
|
|
336
|
+
cls, ref_name: str, definitions: dict, indent_level: int, visited: set
|
|
337
|
+
) -> str:
|
|
317
338
|
"""Format nested fields for a referenced object definition by name."""
|
|
318
339
|
if ref_name in definitions and ref_name not in visited:
|
|
319
340
|
visited.add(ref_name)
|
|
320
341
|
return cls._format_schema_fields(
|
|
321
342
|
definitions[ref_name].get("properties", {}),
|
|
322
|
-
definitions[ref_name],
|
|
343
|
+
definitions[ref_name],
|
|
344
|
+
definitions,
|
|
345
|
+
indent_level + 1,
|
|
346
|
+
visited.copy(),
|
|
323
347
|
)
|
|
324
348
|
return ""
|
|
325
349
|
|
|
326
350
|
@classmethod
|
|
327
|
-
def _format_array_referenced_object_fields(
|
|
328
|
-
|
|
351
|
+
def _format_array_referenced_object_fields(
|
|
352
|
+
cls, field_schema: dict, definitions: dict, indent_level: int, visited: set
|
|
353
|
+
) -> str:
|
|
329
354
|
"""Format nested fields for arrays referencing object definitions."""
|
|
330
355
|
ref_name = field_schema.get("items", {}).get("$ref", "").split("/")[-1]
|
|
331
356
|
return cls._format_referenced_object_fields(ref_name, definitions, indent_level, visited)
|
|
332
357
|
|
|
333
358
|
@classmethod
|
|
334
|
-
def _format_schema_fields(
|
|
335
|
-
|
|
359
|
+
def _format_schema_fields(
|
|
360
|
+
cls,
|
|
361
|
+
properties: dict,
|
|
362
|
+
schema: dict,
|
|
363
|
+
definitions: dict,
|
|
364
|
+
indent_level: int,
|
|
365
|
+
visited: set | None = None,
|
|
366
|
+
) -> str:
|
|
336
367
|
"""Format multiple schema fields."""
|
|
337
368
|
visited = visited or set()
|
|
338
369
|
required_fields = set(schema.get("required", []))
|
|
@@ -343,8 +374,12 @@ class LLMDataModel(BaseModel):
|
|
|
343
374
|
continue
|
|
344
375
|
lines.append(
|
|
345
376
|
cls._format_schema_field(
|
|
346
|
-
name,
|
|
347
|
-
|
|
377
|
+
name,
|
|
378
|
+
field_schema,
|
|
379
|
+
name in required_fields,
|
|
380
|
+
definitions,
|
|
381
|
+
indent_level,
|
|
382
|
+
visited.copy(),
|
|
348
383
|
)
|
|
349
384
|
)
|
|
350
385
|
|
|
@@ -398,10 +433,7 @@ class LLMDataModel(BaseModel):
|
|
|
398
433
|
@classmethod
|
|
399
434
|
def _resolve_union_type(cls, schemas: list, definitions: dict, separator: str) -> str:
|
|
400
435
|
"""Resolve union types (anyOf/oneOf)."""
|
|
401
|
-
subtypes = [
|
|
402
|
-
cls._resolve_field_type(subschema, definitions)
|
|
403
|
-
for subschema in schemas
|
|
404
|
-
]
|
|
436
|
+
subtypes = [cls._resolve_field_type(subschema, definitions) for subschema in schemas]
|
|
405
437
|
return separator.join(subtypes)
|
|
406
438
|
|
|
407
439
|
@classmethod
|
|
@@ -460,138 +492,180 @@ class LLMDataModel(BaseModel):
|
|
|
460
492
|
Also includes the root model's fields (from root_schema) so their descriptions/examples
|
|
461
493
|
are visible, not just $defs.
|
|
462
494
|
"""
|
|
463
|
-
lines = []
|
|
464
|
-
visited_defs = set()
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
"""Render property lines using only Field(description=...), with const/excerpts.
|
|
468
|
-
|
|
469
|
-
Always lists properties; if description is missing, emit a generic guidance message.
|
|
470
|
-
"""
|
|
471
|
-
out: list[str] = []
|
|
472
|
-
def _fmt_example_value(val):
|
|
473
|
-
if isinstance(val, str):
|
|
474
|
-
return val
|
|
475
|
-
try:
|
|
476
|
-
return json.dumps(val, ensure_ascii=False)
|
|
477
|
-
except Exception:
|
|
478
|
-
return str(val)
|
|
479
|
-
for prop_name, prop_schema in props.items():
|
|
480
|
-
if prop_name == "section_header":
|
|
481
|
-
continue
|
|
482
|
-
desc = prop_schema.get("description")
|
|
483
|
-
const_note = ""
|
|
484
|
-
if "const_value" in prop_schema:
|
|
485
|
-
const_note = f' (const value: "{prop_schema["const_value"]}")'
|
|
486
|
-
if not desc:
|
|
487
|
-
out.append(
|
|
488
|
-
f' - "{prop_name}": '
|
|
489
|
-
'No definition provided. Focus on the [[Schema]] and the prompt to infer '
|
|
490
|
-
'the expected structure and constraints.'
|
|
491
|
-
)
|
|
492
|
-
else:
|
|
493
|
-
out.append(f' - "{prop_name}": {desc}{const_note}')
|
|
494
|
-
|
|
495
|
-
examples = prop_schema.get("examples")
|
|
496
|
-
if examples is None and "example" in prop_schema:
|
|
497
|
-
examples = prop_schema.get("example")
|
|
498
|
-
|
|
499
|
-
if isinstance(examples, (list, tuple)):
|
|
500
|
-
if len(examples) > 0:
|
|
501
|
-
out.append(" - Examples:")
|
|
502
|
-
for ex in examples:
|
|
503
|
-
out.append(f" - {_fmt_example_value(ex)}")
|
|
504
|
-
elif examples is not None:
|
|
505
|
-
out.append(f" - Example: {_fmt_example_value(examples)}")
|
|
506
|
-
return out
|
|
507
|
-
|
|
508
|
-
# Include root model's fields in Definitions (for descriptions/examples)
|
|
509
|
-
if root_schema and isinstance(root_schema, dict):
|
|
510
|
-
root_title = root_schema.get("title", "Root")
|
|
511
|
-
root_props = cls._extract_schema_properties(root_schema)
|
|
512
|
-
if root_props:
|
|
513
|
-
lines.append(f"- {root_title}:")
|
|
514
|
-
lines.extend(_format_definition_properties(root_props))
|
|
495
|
+
lines: list[str] = []
|
|
496
|
+
visited_defs: set[str] = set()
|
|
497
|
+
|
|
498
|
+
lines.extend(cls._format_root_definition_lines(root_schema))
|
|
515
499
|
|
|
516
500
|
for name, definition in definitions.items():
|
|
517
501
|
if name in visited_defs:
|
|
518
502
|
continue
|
|
519
503
|
visited_defs.add(name)
|
|
520
|
-
lines.
|
|
504
|
+
lines.extend(cls._format_single_definition(name, definition))
|
|
521
505
|
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
506
|
+
return "\n".join(lines)
|
|
507
|
+
|
|
508
|
+
@classmethod
|
|
509
|
+
def _format_root_definition_lines(cls, root_schema: dict | None) -> list[str]:
|
|
510
|
+
"""Format definitions derived from the root schema."""
|
|
511
|
+
if not (root_schema and isinstance(root_schema, dict)):
|
|
512
|
+
return []
|
|
513
|
+
root_props = cls._extract_schema_properties(root_schema)
|
|
514
|
+
if not root_props:
|
|
515
|
+
return []
|
|
516
|
+
root_title = root_schema.get("title", "Root")
|
|
517
|
+
lines = [f"- {root_title}:"]
|
|
518
|
+
lines.extend(cls._format_definition_properties(root_props))
|
|
519
|
+
return lines
|
|
520
|
+
|
|
521
|
+
@classmethod
|
|
522
|
+
def _format_single_definition(cls, name: str, definition: dict) -> list[str]:
|
|
523
|
+
"""Format a single definition block, including enum handling."""
|
|
524
|
+
lines = [f"- {name}:"]
|
|
525
|
+
if "enum" in definition:
|
|
526
|
+
enum_values = ", ".join(map(repr, definition["enum"]))
|
|
527
|
+
lines.append(f" Enum values: {enum_values}")
|
|
528
|
+
return lines
|
|
529
|
+
props = definition.get("properties", {})
|
|
530
|
+
lines.extend(cls._format_definition_properties(props))
|
|
531
|
+
return lines
|
|
532
|
+
|
|
533
|
+
@classmethod
|
|
534
|
+
def _format_definition_properties(cls, props: dict) -> list[str]:
|
|
535
|
+
"""Render property lines using descriptions and examples."""
|
|
536
|
+
out: list[str] = []
|
|
537
|
+
for prop_name, prop_schema in props.items():
|
|
538
|
+
if prop_name == "section_header":
|
|
525
539
|
continue
|
|
540
|
+
out.append(cls._format_property_description(prop_name, prop_schema))
|
|
541
|
+
out.extend(cls._format_property_examples(prop_schema))
|
|
542
|
+
return out
|
|
526
543
|
|
|
527
|
-
|
|
528
|
-
|
|
544
|
+
@staticmethod
|
|
545
|
+
def _format_property_description(prop_name: str, prop_schema: dict) -> str:
|
|
546
|
+
"""Format the description line for a property, including const hints."""
|
|
547
|
+
desc = prop_schema.get("description")
|
|
548
|
+
const_note = ""
|
|
549
|
+
if "const_value" in prop_schema:
|
|
550
|
+
const_note = f' (const value: "{prop_schema["const_value"]}")'
|
|
551
|
+
if not desc:
|
|
552
|
+
return (
|
|
553
|
+
f' - "{prop_name}": '
|
|
554
|
+
"No definition provided. Focus on the [[Schema]] and the prompt to infer "
|
|
555
|
+
"the expected structure and constraints."
|
|
556
|
+
)
|
|
557
|
+
return f' - "{prop_name}": {desc}{const_note}'
|
|
529
558
|
|
|
530
|
-
|
|
559
|
+
@classmethod
|
|
560
|
+
def _format_property_examples(cls, prop_schema: dict) -> list[str]:
|
|
561
|
+
"""Format example lines for a property schema."""
|
|
562
|
+
examples = prop_schema.get("examples")
|
|
563
|
+
if examples is None and "example" in prop_schema:
|
|
564
|
+
examples = prop_schema.get("example")
|
|
565
|
+
if isinstance(examples, (list, tuple)):
|
|
566
|
+
if not examples:
|
|
567
|
+
return []
|
|
568
|
+
lines = [" - Examples:"]
|
|
569
|
+
for example in examples:
|
|
570
|
+
lines.append(f" - {cls._format_example_value(example)}")
|
|
571
|
+
return lines
|
|
572
|
+
if examples is not None:
|
|
573
|
+
return [f" - Example: {cls._format_example_value(examples)}"]
|
|
574
|
+
return []
|
|
575
|
+
|
|
576
|
+
@staticmethod
|
|
577
|
+
def _format_example_value(val: Any) -> str:
|
|
578
|
+
"""Safely format example values for display, preserving human readability."""
|
|
579
|
+
if isinstance(val, str):
|
|
580
|
+
return val
|
|
581
|
+
try:
|
|
582
|
+
return json.dumps(val, ensure_ascii=False)
|
|
583
|
+
except Exception:
|
|
584
|
+
return str(val)
|
|
531
585
|
|
|
532
586
|
@classmethod
|
|
533
587
|
def _generate_type_description(cls, type_desc: str | dict) -> str:
|
|
534
588
|
"""Generate a human-readable description for a type."""
|
|
535
|
-
|
|
589
|
+
normalized = cls._normalize_type_descriptor(type_desc)
|
|
590
|
+
if normalized is None:
|
|
536
591
|
return "unknown"
|
|
537
592
|
|
|
593
|
+
composite_description = cls._describe_composite_type(normalized)
|
|
594
|
+
return composite_description if composite_description is not None else normalized
|
|
595
|
+
|
|
596
|
+
@classmethod
|
|
597
|
+
def _normalize_type_descriptor(cls, type_desc: Any) -> str | None:
|
|
598
|
+
"""Normalize a type descriptor into a descriptive string."""
|
|
599
|
+
if type_desc is None:
|
|
600
|
+
return None
|
|
538
601
|
if isinstance(type_desc, dict):
|
|
539
602
|
type_desc = cls._resolve_field_type(type_desc, {})
|
|
540
|
-
|
|
541
603
|
if isinstance(type_desc, type):
|
|
542
604
|
type_desc = type_desc.__name__
|
|
543
|
-
|
|
544
605
|
if not isinstance(type_desc, str):
|
|
545
606
|
type_desc = str(type_desc)
|
|
607
|
+
return type_desc
|
|
546
608
|
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
"
|
|
554
|
-
"
|
|
555
|
-
"tuple of": ("tuple", "A tuple with specific types:"),
|
|
556
|
-
"object of": ("dict", "A dictionary"),
|
|
609
|
+
@classmethod
|
|
610
|
+
def _describe_composite_type(cls, type_desc: str) -> str | None:
|
|
611
|
+
"""Describe composite collection-like types with friendly language."""
|
|
612
|
+
handlers = {
|
|
613
|
+
"array of ": cls._describe_list_type,
|
|
614
|
+
"set of ": cls._describe_set_type,
|
|
615
|
+
"tuple of ": cls._describe_tuple_type,
|
|
616
|
+
"object of ": cls._describe_dict_type,
|
|
557
617
|
}
|
|
618
|
+
for prefix, handler in handlers.items():
|
|
619
|
+
if type_desc.startswith(prefix):
|
|
620
|
+
item_type = type_desc[len(prefix) :]
|
|
621
|
+
return handler(item_type)
|
|
622
|
+
return None
|
|
558
623
|
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
|
|
566
|
-
element_desc = item_type
|
|
567
|
-
elif inner := extract_after(item_type, "array of "):
|
|
568
|
-
element_desc = f"a list of {inner} values"
|
|
569
|
-
elif inner := extract_after(item_type, "set of "):
|
|
570
|
-
element_desc = f"a set of unique {inner} values"
|
|
571
|
-
elif inner := extract_after(item_type, "tuple of "):
|
|
572
|
-
element_desc = f"a tuple with types: {inner}"
|
|
573
|
-
elif inner := extract_after(item_type, "object of "):
|
|
574
|
-
element_desc = f"a dictionary with {inner} values"
|
|
575
|
-
else:
|
|
576
|
-
# Simple types
|
|
577
|
-
if type_name == "list":
|
|
578
|
-
return f"A list containing {item_type} values"
|
|
579
|
-
elif type_name == "set":
|
|
580
|
-
return f"A set containing unique {item_type} values"
|
|
581
|
-
elif type_name == "dict":
|
|
582
|
-
return f"A dictionary with {item_type} values"
|
|
583
|
-
else:
|
|
584
|
-
element_desc = item_type
|
|
585
|
-
|
|
586
|
-
# Format the final description
|
|
587
|
-
if type_name == "list":
|
|
588
|
-
return f"A list where each element is {element_desc}"
|
|
589
|
-
elif type_name == "set":
|
|
590
|
-
return f"A set where each element is {element_desc}"
|
|
591
|
-
elif type_name == "dict":
|
|
592
|
-
return f"A dictionary where each value is {element_desc}"
|
|
624
|
+
@classmethod
|
|
625
|
+
def _describe_list_type(cls, item_type: str) -> str:
|
|
626
|
+
"""Describe a list-style type."""
|
|
627
|
+
element_desc = cls._nested_element_description(item_type)
|
|
628
|
+
if element_desc is None:
|
|
629
|
+
return f"A list containing {item_type} values"
|
|
630
|
+
return f"A list where each element is {element_desc}"
|
|
593
631
|
|
|
594
|
-
|
|
632
|
+
@classmethod
|
|
633
|
+
def _describe_set_type(cls, item_type: str) -> str:
|
|
634
|
+
"""Describe a set-style type."""
|
|
635
|
+
element_desc = cls._nested_element_description(item_type)
|
|
636
|
+
if element_desc is None:
|
|
637
|
+
return f"A set containing unique {item_type} values"
|
|
638
|
+
return f"A set where each element is {element_desc}"
|
|
639
|
+
|
|
640
|
+
@staticmethod
|
|
641
|
+
def _describe_tuple_type(item_type: str) -> str:
|
|
642
|
+
"""Describe a tuple-style type."""
|
|
643
|
+
return f"A tuple with specific types: {item_type}"
|
|
644
|
+
|
|
645
|
+
@classmethod
|
|
646
|
+
def _describe_dict_type(cls, item_type: str) -> str:
|
|
647
|
+
"""Describe a dictionary-style type."""
|
|
648
|
+
element_desc = cls._nested_element_description(item_type)
|
|
649
|
+
if element_desc is None:
|
|
650
|
+
return f"A dictionary with {item_type} values"
|
|
651
|
+
return f"A dictionary where each value is {element_desc}"
|
|
652
|
+
|
|
653
|
+
@staticmethod
|
|
654
|
+
def _nested_element_description(item_type: str) -> str | None:
|
|
655
|
+
"""Convert nested composite descriptors into human-friendly text."""
|
|
656
|
+
if item_type.startswith("nested object"):
|
|
657
|
+
return item_type
|
|
658
|
+
nested_mappings = {
|
|
659
|
+
"array of ": "a list of {} values",
|
|
660
|
+
"set of ": "a set of unique {} values",
|
|
661
|
+
"tuple of ": "a tuple with types: {}",
|
|
662
|
+
"object of ": "a dictionary with {} values",
|
|
663
|
+
}
|
|
664
|
+
for prefix, template in nested_mappings.items():
|
|
665
|
+
if item_type.startswith(prefix):
|
|
666
|
+
inner = item_type[len(prefix) :]
|
|
667
|
+
return template.format(inner)
|
|
668
|
+
return None
|
|
595
669
|
|
|
596
670
|
@classmethod
|
|
597
671
|
def _compose_schema_output(cls, main_schema: str, definitions_schema: str) -> str:
|
|
@@ -612,7 +686,7 @@ class LLMDataModel(BaseModel):
|
|
|
612
686
|
return cls._generate_example_for_model(model, visited_models)
|
|
613
687
|
|
|
614
688
|
@staticmethod
|
|
615
|
-
def _generate_example_for_model(model:
|
|
689
|
+
def _generate_example_for_model(model: type[BaseModel], visited_models: set) -> dict:
|
|
616
690
|
"""Generate example for a model, excluding section_header."""
|
|
617
691
|
example = {}
|
|
618
692
|
for field_name, model_field in model.model_fields.items():
|
|
@@ -630,35 +704,35 @@ class LLMDataModel(BaseModel):
|
|
|
630
704
|
|
|
631
705
|
if LLMDataModel._has_default_value(model_field):
|
|
632
706
|
default_val = model_field.default
|
|
633
|
-
desc = getattr(model_field,
|
|
634
|
-
ann = getattr(model_field,
|
|
707
|
+
desc = getattr(model_field, "description", None)
|
|
708
|
+
ann = getattr(model_field, "annotation", None)
|
|
635
709
|
is_desc_like = isinstance(default_val, str) and (
|
|
636
|
-
(desc and default_val.strip() == str(desc).strip())
|
|
637
|
-
len(default_val) >= 30
|
|
638
|
-
any(
|
|
710
|
+
(desc and default_val.strip() == str(desc).strip())
|
|
711
|
+
or len(default_val) >= 30
|
|
712
|
+
or any(
|
|
713
|
+
kw in default_val
|
|
714
|
+
for kw in ["represents", "should", "Always use", "This is", "This represents"]
|
|
715
|
+
)
|
|
639
716
|
)
|
|
640
717
|
if is_desc_like and (ann is str or ann is Any or ann is None):
|
|
641
718
|
return "example_string"
|
|
642
719
|
return default_val
|
|
643
|
-
|
|
720
|
+
if model_field.default_factory is not None:
|
|
644
721
|
# For example generation, we want to show structure even if default is empty
|
|
645
722
|
# Check if default_factory would produce an empty container
|
|
646
723
|
default_val = model_field.default_factory()
|
|
647
724
|
if isinstance(default_val, (list, dict, set, tuple)) and len(default_val) == 0:
|
|
648
725
|
# Generate example data instead of using empty default
|
|
649
|
-
return LLMDataModel._generate_value_for_type(
|
|
650
|
-
model_field.annotation, visited_models
|
|
651
|
-
)
|
|
726
|
+
return LLMDataModel._generate_value_for_type(model_field.annotation, visited_models)
|
|
652
727
|
return default_val
|
|
653
|
-
|
|
654
|
-
return LLMDataModel._generate_value_for_type(
|
|
655
|
-
model_field.annotation, visited_models
|
|
656
|
-
)
|
|
728
|
+
return LLMDataModel._generate_value_for_type(model_field.annotation, visited_models)
|
|
657
729
|
|
|
658
730
|
@staticmethod
|
|
659
731
|
def _generate_value_for_type(field_type: Any, visited_models: set) -> Any:
|
|
660
732
|
"""Generate a value for a specific type (standard behavior)."""
|
|
661
|
-
return LLMDataModel._generate_value_for_type_generic(
|
|
733
|
+
return LLMDataModel._generate_value_for_type_generic(
|
|
734
|
+
field_type, visited_models, prefer_non_null=False
|
|
735
|
+
)
|
|
662
736
|
|
|
663
737
|
@staticmethod
|
|
664
738
|
def _generate_union_value(field_type: Any, visited_models: set) -> Any:
|
|
@@ -666,7 +740,9 @@ class LLMDataModel(BaseModel):
|
|
|
666
740
|
subtypes = LLMDataModel._get_union_types(field_type, exclude_none=True)
|
|
667
741
|
if not subtypes:
|
|
668
742
|
return None
|
|
669
|
-
return LLMDataModel._generate_value_for_type_generic(
|
|
743
|
+
return LLMDataModel._generate_value_for_type_generic(
|
|
744
|
+
subtypes[0], visited_models, prefer_non_null=False
|
|
745
|
+
)
|
|
670
746
|
|
|
671
747
|
@staticmethod
|
|
672
748
|
def _generate_collection_value(field_type: Any, visited_models: set) -> Any:
|
|
@@ -691,7 +767,10 @@ class LLMDataModel(BaseModel):
|
|
|
691
767
|
|
|
692
768
|
if LLMDataModel._is_union_type(item_type):
|
|
693
769
|
subtypes = LLMDataModel._get_union_types(item_type)
|
|
694
|
-
return [
|
|
770
|
+
return [
|
|
771
|
+
LLMDataModel._generate_value_for_type_generic(subtype, visited_models, False)
|
|
772
|
+
for subtype in subtypes[:2]
|
|
773
|
+
]
|
|
695
774
|
|
|
696
775
|
return [LLMDataModel._generate_value_for_type_generic(item_type, visited_models, False)]
|
|
697
776
|
|
|
@@ -701,7 +780,11 @@ class LLMDataModel(BaseModel):
|
|
|
701
780
|
key_type, value_type = get_args(field_type) if get_args(field_type) else (Any, Any)
|
|
702
781
|
|
|
703
782
|
example_key = LLMDataModel._example_key_for_type(key_type, visited_models)
|
|
704
|
-
return {
|
|
783
|
+
return {
|
|
784
|
+
example_key: LLMDataModel._generate_value_for_type_generic(
|
|
785
|
+
value_type, visited_models, False
|
|
786
|
+
)
|
|
787
|
+
}
|
|
705
788
|
|
|
706
789
|
@staticmethod
|
|
707
790
|
def _generate_set_value(field_type: Any, visited_models: set) -> list:
|
|
@@ -714,7 +797,10 @@ class LLMDataModel(BaseModel):
|
|
|
714
797
|
"""Generate a value for a tuple type."""
|
|
715
798
|
types = get_args(field_type)
|
|
716
799
|
if types:
|
|
717
|
-
return tuple(
|
|
800
|
+
return tuple(
|
|
801
|
+
LLMDataModel._generate_value_for_type_generic(t, visited_models, False)
|
|
802
|
+
for t in types
|
|
803
|
+
)
|
|
718
804
|
return ("item1", "item2")
|
|
719
805
|
|
|
720
806
|
@staticmethod
|
|
@@ -778,10 +864,7 @@ class LLMDataModel(BaseModel):
|
|
|
778
864
|
@classmethod
|
|
779
865
|
def _find_non_header_fields(cls) -> dict:
|
|
780
866
|
"""Find all fields except section_header."""
|
|
781
|
-
return {
|
|
782
|
-
name: f for name, f in cls.model_fields.items()
|
|
783
|
-
if name != "section_header"
|
|
784
|
-
}
|
|
867
|
+
return {name: f for name, f in cls.model_fields.items() if name != "section_header"}
|
|
785
868
|
|
|
786
869
|
@classmethod
|
|
787
870
|
def _is_single_value_model(cls, fields: dict) -> bool:
|
|
@@ -838,7 +921,9 @@ class LLMDataModel(BaseModel):
|
|
|
838
921
|
return submodel.generate_example_json()
|
|
839
922
|
|
|
840
923
|
@classmethod
|
|
841
|
-
def _generate_non_null_example_for_model(
|
|
924
|
+
def _generate_non_null_example_for_model(
|
|
925
|
+
cls, model: type[BaseModel], visited_models: set | None = None
|
|
926
|
+
) -> dict:
|
|
842
927
|
"""Generate an example for a model, preferring non-null for Optional fields (recursive)."""
|
|
843
928
|
if visited_models is None:
|
|
844
929
|
visited_models = set()
|
|
@@ -857,13 +942,17 @@ class LLMDataModel(BaseModel):
|
|
|
857
942
|
chosen = non_none_types[0] if non_none_types else Any
|
|
858
943
|
example[field_name] = cls._generate_value_for_type_non_null(chosen, visited_models)
|
|
859
944
|
else:
|
|
860
|
-
example[field_name] = cls._generate_value_for_type_non_null(
|
|
945
|
+
example[field_name] = cls._generate_value_for_type_non_null(
|
|
946
|
+
model_field.annotation, visited_models
|
|
947
|
+
)
|
|
861
948
|
return example
|
|
862
949
|
|
|
863
950
|
@classmethod
|
|
864
951
|
def _generate_value_for_type_non_null(cls, field_type: Any, visited_models: set) -> Any:
|
|
865
952
|
"""Generate a value ensuring non-null choices for unions/optionals."""
|
|
866
|
-
return cls._generate_value_for_type_generic(
|
|
953
|
+
return cls._generate_value_for_type_generic(
|
|
954
|
+
field_type, visited_models, prefer_non_null=True
|
|
955
|
+
)
|
|
867
956
|
|
|
868
957
|
@classmethod
|
|
869
958
|
def _example_key_for_type(cls, key_type: Any, visited_models: set) -> Any:
|
|
@@ -876,67 +965,137 @@ class LLMDataModel(BaseModel):
|
|
|
876
965
|
return True
|
|
877
966
|
if key_type is tuple or get_origin(key_type) is tuple:
|
|
878
967
|
tuple_args = get_args(key_type) if get_args(key_type) else (str, int)
|
|
879
|
-
return tuple(
|
|
968
|
+
return tuple(
|
|
969
|
+
cls._generate_value_for_type_generic(t, visited_models, True) for t in tuple_args
|
|
970
|
+
)
|
|
880
971
|
if key_type is frozenset or get_origin(key_type) is frozenset:
|
|
881
972
|
item_type = get_args(key_type)[0] if get_args(key_type) else str
|
|
882
|
-
return frozenset(
|
|
973
|
+
return frozenset(
|
|
974
|
+
[cls._generate_value_for_type_generic(item_type, visited_models, True)]
|
|
975
|
+
)
|
|
883
976
|
return "example_string"
|
|
884
977
|
|
|
885
978
|
@classmethod
|
|
886
|
-
def _generate_value_for_type_generic(
|
|
979
|
+
def _generate_value_for_type_generic(
|
|
980
|
+
cls, field_type: Any, visited_models: set, prefer_non_null: bool
|
|
981
|
+
) -> Any:
|
|
887
982
|
"""Unified generator for example values; prefer_non_null to avoid None variants."""
|
|
888
983
|
origin = get_origin(field_type) or field_type
|
|
889
984
|
|
|
890
|
-
|
|
985
|
+
handled, value = cls._handle_enum_type(origin)
|
|
986
|
+
if handled:
|
|
987
|
+
return value
|
|
988
|
+
|
|
989
|
+
handled, value = cls._handle_literal_type(origin, field_type)
|
|
990
|
+
if handled:
|
|
991
|
+
return value
|
|
992
|
+
|
|
993
|
+
handled, value = cls._handle_model_type(origin, field_type, visited_models, prefer_non_null)
|
|
994
|
+
if handled:
|
|
995
|
+
return value
|
|
996
|
+
|
|
997
|
+
handled, value = cls._handle_union_type(field_type, visited_models, prefer_non_null)
|
|
998
|
+
if handled:
|
|
999
|
+
return value
|
|
1000
|
+
|
|
1001
|
+
handled, value = cls._handle_collection_type(field_type, visited_models, prefer_non_null)
|
|
1002
|
+
if handled:
|
|
1003
|
+
return value
|
|
1004
|
+
|
|
1005
|
+
return LLMDataModel._generate_primitive_value(field_type)
|
|
1006
|
+
|
|
1007
|
+
@staticmethod
|
|
1008
|
+
def _handle_enum_type(origin: Any) -> tuple[bool, Any]:
|
|
1009
|
+
"""Handle Enum types when generating example values."""
|
|
891
1010
|
if isinstance(origin, type) and issubclass(origin, Enum):
|
|
892
|
-
|
|
1011
|
+
first_member = next(iter(origin), None)
|
|
1012
|
+
value = first_member.value if first_member is not None else "enum_value"
|
|
1013
|
+
return True, value
|
|
1014
|
+
return False, None
|
|
893
1015
|
|
|
894
|
-
|
|
1016
|
+
@staticmethod
|
|
1017
|
+
def _handle_literal_type(origin: Any, field_type: Any) -> tuple[bool, Any]:
|
|
1018
|
+
"""Handle Literal[...] annotations."""
|
|
895
1019
|
if origin is Literal:
|
|
896
|
-
|
|
897
|
-
|
|
898
|
-
|
|
899
|
-
|
|
900
|
-
|
|
901
|
-
|
|
902
|
-
|
|
903
|
-
|
|
904
|
-
|
|
905
|
-
|
|
906
|
-
return
|
|
907
|
-
|
|
908
|
-
|
|
909
|
-
|
|
910
|
-
|
|
911
|
-
|
|
912
|
-
|
|
913
|
-
|
|
914
|
-
|
|
915
|
-
|
|
916
|
-
|
|
917
|
-
if LLMDataModel._is_collection_type(field_type):
|
|
918
|
-
origin = get_origin(field_type) or field_type
|
|
919
|
-
if origin is list:
|
|
920
|
-
item_type = get_args(field_type)[0] if get_args(field_type) else Any
|
|
921
|
-
return [cls._generate_value_for_type_generic(item_type, visited_models, prefer_non_null)]
|
|
922
|
-
if origin is dict:
|
|
923
|
-
key_type, value_type = get_args(field_type) if get_args(field_type) else (Any, Any)
|
|
924
|
-
example_key = cls._example_key_for_type(key_type, visited_models)
|
|
925
|
-
return {example_key: cls._generate_value_for_type_generic(value_type, visited_models, prefer_non_null)}
|
|
926
|
-
if origin in (set, frozenset):
|
|
927
|
-
item_type = get_args(field_type)[0] if get_args(field_type) else Any
|
|
928
|
-
return [cls._generate_value_for_type_generic(item_type, visited_models, prefer_non_null)]
|
|
929
|
-
if origin is tuple:
|
|
930
|
-
types = get_args(field_type)
|
|
931
|
-
if types:
|
|
932
|
-
return tuple(cls._generate_value_for_type_generic(t, visited_models, prefer_non_null) for t in types)
|
|
933
|
-
return ("item1", "item2")
|
|
934
|
-
|
|
935
|
-
# Primitives
|
|
936
|
-
return LLMDataModel._generate_primitive_value(field_type)
|
|
1020
|
+
literal_args = get_args(field_type)
|
|
1021
|
+
return True, literal_args[0] if literal_args else None
|
|
1022
|
+
return False, None
|
|
1023
|
+
|
|
1024
|
+
@classmethod
|
|
1025
|
+
def _handle_model_type(
|
|
1026
|
+
cls, origin: Any, field_type: Any, visited_models: set, prefer_non_null: bool
|
|
1027
|
+
) -> tuple[bool, Any]:
|
|
1028
|
+
"""Handle Pydantic BaseModel subclasses."""
|
|
1029
|
+
if not (isinstance(origin, type) and issubclass(origin, BaseModel)):
|
|
1030
|
+
return False, None
|
|
1031
|
+
model_name = field_type.__name__
|
|
1032
|
+
if model_name in visited_models:
|
|
1033
|
+
return True, {}
|
|
1034
|
+
visited_models.add(model_name)
|
|
1035
|
+
generator = (
|
|
1036
|
+
cls._generate_non_null_example_for_model
|
|
1037
|
+
if prefer_non_null
|
|
1038
|
+
else LLMDataModel._generate_example_for_model
|
|
1039
|
+
)
|
|
1040
|
+
return True, generator(field_type, visited_models.copy())
|
|
937
1041
|
|
|
938
1042
|
@classmethod
|
|
939
|
-
def
|
|
1043
|
+
def _handle_union_type(
|
|
1044
|
+
cls, field_type: Any, visited_models: set, prefer_non_null: bool
|
|
1045
|
+
) -> tuple[bool, Any]:
|
|
1046
|
+
"""Handle Optional/Union annotations."""
|
|
1047
|
+
if not LLMDataModel._is_union_type(field_type):
|
|
1048
|
+
return False, None
|
|
1049
|
+
subtypes = LLMDataModel._get_union_types(field_type, exclude_none=True)
|
|
1050
|
+
if not subtypes:
|
|
1051
|
+
return True, None
|
|
1052
|
+
chosen = subtypes[0]
|
|
1053
|
+
value = cls._generate_value_for_type_generic(chosen, visited_models, prefer_non_null)
|
|
1054
|
+
return True, value
|
|
1055
|
+
|
|
1056
|
+
@classmethod
|
|
1057
|
+
def _handle_collection_type(
|
|
1058
|
+
cls, field_type: Any, visited_models: set, prefer_non_null: bool
|
|
1059
|
+
) -> tuple[bool, Any]:
|
|
1060
|
+
"""Handle list/dict/set/tuple-like annotations."""
|
|
1061
|
+
if not LLMDataModel._is_collection_type(field_type):
|
|
1062
|
+
return False, None
|
|
1063
|
+
|
|
1064
|
+
origin = get_origin(field_type) or field_type
|
|
1065
|
+
args = get_args(field_type)
|
|
1066
|
+
|
|
1067
|
+
if origin is list:
|
|
1068
|
+
item_type = args[0] if args else Any
|
|
1069
|
+
value = [
|
|
1070
|
+
cls._generate_value_for_type_generic(item_type, visited_models, prefer_non_null)
|
|
1071
|
+
]
|
|
1072
|
+
return True, value
|
|
1073
|
+
if origin is dict:
|
|
1074
|
+
key_type, value_type = args if args else (Any, Any)
|
|
1075
|
+
example_key = cls._example_key_for_type(key_type, visited_models)
|
|
1076
|
+
example_value = cls._generate_value_for_type_generic(
|
|
1077
|
+
value_type, visited_models, prefer_non_null
|
|
1078
|
+
)
|
|
1079
|
+
return True, {example_key: example_value}
|
|
1080
|
+
if origin in (set, frozenset):
|
|
1081
|
+
item_type = args[0] if args else Any
|
|
1082
|
+
value = [
|
|
1083
|
+
cls._generate_value_for_type_generic(item_type, visited_models, prefer_non_null)
|
|
1084
|
+
]
|
|
1085
|
+
return True, value
|
|
1086
|
+
if origin is tuple:
|
|
1087
|
+
if args:
|
|
1088
|
+
tuple_values = tuple(
|
|
1089
|
+
cls._generate_value_for_type_generic(t, visited_models, prefer_non_null)
|
|
1090
|
+
for t in args
|
|
1091
|
+
)
|
|
1092
|
+
return True, tuple_values
|
|
1093
|
+
return True, ("item1", "item2")
|
|
1094
|
+
|
|
1095
|
+
return True, []
|
|
1096
|
+
|
|
1097
|
+
@classmethod
|
|
1098
|
+
def _format_json_example(cls, obj: Any, _indent: int = 0) -> str:
|
|
940
1099
|
"""Format an object as a JSON string representation."""
|
|
941
1100
|
return json.dumps(obj, indent=2, default=str)
|
|
942
1101
|
|
|
@@ -952,22 +1111,22 @@ class LLMDataModel(BaseModel):
|
|
|
952
1111
|
return "\n\n".join(example_blocks)
|
|
953
1112
|
|
|
954
1113
|
|
|
955
|
-
def build_dynamic_llm_datamodel(py_type: Any) ->
|
|
1114
|
+
def build_dynamic_llm_datamodel(py_type: Any) -> type[LLMDataModel]:
|
|
956
1115
|
"""Dynamically create a subclass of LLMDataModel with a single 'value' field."""
|
|
957
1116
|
model_name = f"LLMDynamicDataModel_{hash(str(py_type)) & 0xFFFFFFFF:X}"
|
|
958
1117
|
|
|
959
|
-
model:
|
|
1118
|
+
model: type[LLMDataModel] = create_model(
|
|
960
1119
|
model_name,
|
|
961
1120
|
__base__=LLMDataModel,
|
|
962
1121
|
value=(
|
|
963
1122
|
py_type,
|
|
964
1123
|
Field(
|
|
965
1124
|
...,
|
|
966
|
-
description="This is a dynamically generated data model. This description is general. "
|
|
967
|
-
"If you're dealing with a complex type, or nested types in combination with unions, make sure you "
|
|
968
|
-
"understand the instructions provided in the prompt, and select the appropriate data model based on the "
|
|
969
|
-
"type at hand, as described in the schema section."
|
|
970
|
-
)
|
|
1125
|
+
description="This is a dynamically generated data model. This description is general. "
|
|
1126
|
+
"If you're dealing with a complex type, or nested types in combination with unions, make sure you "
|
|
1127
|
+
"understand the instructions provided in the prompt, and select the appropriate data model based on the "
|
|
1128
|
+
"type at hand, as described in the schema section.",
|
|
1129
|
+
),
|
|
971
1130
|
),
|
|
972
1131
|
)
|
|
973
1132
|
|