symbolicai 0.20.2__py3-none-any.whl → 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- symai/__init__.py +96 -64
- symai/backend/base.py +93 -80
- symai/backend/engines/drawing/engine_bfl.py +12 -11
- symai/backend/engines/drawing/engine_gpt_image.py +108 -87
- symai/backend/engines/embedding/engine_llama_cpp.py +25 -28
- symai/backend/engines/embedding/engine_openai.py +3 -5
- symai/backend/engines/execute/engine_python.py +6 -5
- symai/backend/engines/files/engine_io.py +74 -67
- symai/backend/engines/imagecaptioning/engine_blip2.py +3 -3
- symai/backend/engines/imagecaptioning/engine_llavacpp_client.py +54 -38
- symai/backend/engines/index/engine_pinecone.py +23 -24
- symai/backend/engines/index/engine_vectordb.py +16 -14
- symai/backend/engines/lean/engine_lean4.py +38 -34
- symai/backend/engines/neurosymbolic/__init__.py +41 -13
- symai/backend/engines/neurosymbolic/engine_anthropic_claudeX_chat.py +262 -182
- symai/backend/engines/neurosymbolic/engine_anthropic_claudeX_reasoning.py +263 -191
- symai/backend/engines/neurosymbolic/engine_deepseekX_reasoning.py +53 -49
- symai/backend/engines/neurosymbolic/engine_google_geminiX_reasoning.py +212 -211
- symai/backend/engines/neurosymbolic/engine_groq.py +87 -63
- symai/backend/engines/neurosymbolic/engine_huggingface.py +21 -24
- symai/backend/engines/neurosymbolic/engine_llama_cpp.py +117 -48
- symai/backend/engines/neurosymbolic/engine_openai_gptX_chat.py +256 -229
- symai/backend/engines/neurosymbolic/engine_openai_gptX_reasoning.py +270 -150
- symai/backend/engines/ocr/engine_apilayer.py +6 -8
- symai/backend/engines/output/engine_stdout.py +1 -4
- symai/backend/engines/search/engine_openai.py +7 -7
- symai/backend/engines/search/engine_perplexity.py +5 -5
- symai/backend/engines/search/engine_serpapi.py +12 -14
- symai/backend/engines/speech_to_text/engine_local_whisper.py +20 -27
- symai/backend/engines/symbolic/engine_wolframalpha.py +3 -3
- symai/backend/engines/text_to_speech/engine_openai.py +5 -7
- symai/backend/engines/text_vision/engine_clip.py +7 -11
- symai/backend/engines/userinput/engine_console.py +3 -3
- symai/backend/engines/webscraping/engine_requests.py +81 -48
- symai/backend/mixin/__init__.py +13 -0
- symai/backend/mixin/anthropic.py +4 -2
- symai/backend/mixin/deepseek.py +2 -0
- symai/backend/mixin/google.py +2 -0
- symai/backend/mixin/openai.py +11 -3
- symai/backend/settings.py +83 -16
- symai/chat.py +101 -78
- symai/collect/__init__.py +7 -1
- symai/collect/dynamic.py +77 -69
- symai/collect/pipeline.py +35 -27
- symai/collect/stats.py +75 -63
- symai/components.py +198 -169
- symai/constraints.py +15 -12
- symai/core.py +698 -359
- symai/core_ext.py +32 -34
- symai/endpoints/api.py +80 -73
- symai/extended/.DS_Store +0 -0
- symai/extended/__init__.py +46 -12
- symai/extended/api_builder.py +11 -8
- symai/extended/arxiv_pdf_parser.py +13 -12
- symai/extended/bibtex_parser.py +2 -3
- symai/extended/conversation.py +101 -90
- symai/extended/document.py +17 -10
- symai/extended/file_merger.py +18 -13
- symai/extended/graph.py +18 -13
- symai/extended/html_style_template.py +2 -4
- symai/extended/interfaces/blip_2.py +1 -2
- symai/extended/interfaces/clip.py +1 -2
- symai/extended/interfaces/console.py +7 -1
- symai/extended/interfaces/dall_e.py +1 -1
- symai/extended/interfaces/flux.py +1 -1
- symai/extended/interfaces/gpt_image.py +1 -1
- symai/extended/interfaces/input.py +1 -1
- symai/extended/interfaces/llava.py +0 -1
- symai/extended/interfaces/naive_vectordb.py +7 -8
- symai/extended/interfaces/naive_webscraping.py +1 -1
- symai/extended/interfaces/ocr.py +1 -1
- symai/extended/interfaces/pinecone.py +6 -5
- symai/extended/interfaces/serpapi.py +1 -1
- symai/extended/interfaces/terminal.py +2 -3
- symai/extended/interfaces/tts.py +1 -1
- symai/extended/interfaces/whisper.py +1 -1
- symai/extended/interfaces/wolframalpha.py +1 -1
- symai/extended/metrics/__init__.py +11 -1
- symai/extended/metrics/similarity.py +11 -13
- symai/extended/os_command.py +17 -16
- symai/extended/packages/__init__.py +29 -3
- symai/extended/packages/symdev.py +19 -16
- symai/extended/packages/sympkg.py +12 -9
- symai/extended/packages/symrun.py +21 -19
- symai/extended/repo_cloner.py +11 -10
- symai/extended/seo_query_optimizer.py +1 -2
- symai/extended/solver.py +20 -23
- symai/extended/summarizer.py +4 -3
- symai/extended/taypan_interpreter.py +10 -12
- symai/extended/vectordb.py +99 -82
- symai/formatter/__init__.py +9 -1
- symai/formatter/formatter.py +12 -16
- symai/formatter/regex.py +62 -63
- symai/functional.py +176 -122
- symai/imports.py +136 -127
- symai/interfaces.py +56 -27
- symai/memory.py +14 -13
- symai/misc/console.py +49 -39
- symai/misc/loader.py +5 -3
- symai/models/__init__.py +17 -1
- symai/models/base.py +269 -181
- symai/models/errors.py +0 -1
- symai/ops/__init__.py +32 -22
- symai/ops/measures.py +11 -15
- symai/ops/primitives.py +348 -228
- symai/post_processors.py +32 -28
- symai/pre_processors.py +39 -41
- symai/processor.py +6 -4
- symai/prompts.py +59 -45
- symai/server/huggingface_server.py +23 -20
- symai/server/llama_cpp_server.py +7 -5
- symai/shell.py +3 -4
- symai/shellsv.py +499 -375
- symai/strategy.py +517 -287
- symai/symbol.py +111 -116
- symai/utils.py +42 -36
- {symbolicai-0.20.2.dist-info → symbolicai-1.0.0.dist-info}/METADATA +4 -2
- symbolicai-1.0.0.dist-info/RECORD +163 -0
- symbolicai-0.20.2.dist-info/RECORD +0 -162
- {symbolicai-0.20.2.dist-info → symbolicai-1.0.0.dist-info}/WHEEL +0 -0
- {symbolicai-0.20.2.dist-info → symbolicai-1.0.0.dist-info}/entry_points.txt +0 -0
- {symbolicai-0.20.2.dist-info → symbolicai-1.0.0.dist-info}/licenses/LICENSE +0 -0
- {symbolicai-0.20.2.dist-info → symbolicai-1.0.0.dist-info}/top_level.txt +0 -0
symai/models/base.py
CHANGED
|
@@ -2,12 +2,14 @@ import json
|
|
|
2
2
|
from enum import Enum
|
|
3
3
|
from functools import lru_cache
|
|
4
4
|
from types import UnionType
|
|
5
|
-
from typing import Any, Literal,
|
|
5
|
+
from typing import Any, Literal, Union, get_args, get_origin
|
|
6
6
|
|
|
7
7
|
from attr import dataclass
|
|
8
8
|
from pydantic import BaseModel, Field, create_model, model_validator
|
|
9
9
|
from pydantic_core import PydanticUndefined
|
|
10
10
|
|
|
11
|
+
from ..utils import UserMessage
|
|
12
|
+
|
|
11
13
|
|
|
12
14
|
@dataclass
|
|
13
15
|
class LengthConstraint:
|
|
@@ -45,7 +47,7 @@ class LLMDataModel(BaseModel):
|
|
|
45
47
|
if cls._is_const_field(field_info):
|
|
46
48
|
const_value = cls._get_const_value(field_info)
|
|
47
49
|
if field_name in values and values[field_name] != const_value:
|
|
48
|
-
|
|
50
|
+
UserMessage(f'{field_name} must be {const_value!r}', raise_with=ValueError)
|
|
49
51
|
return values
|
|
50
52
|
|
|
51
53
|
@staticmethod
|
|
@@ -92,7 +94,7 @@ class LLMDataModel(BaseModel):
|
|
|
92
94
|
"""Check if a field has a default value."""
|
|
93
95
|
return field_info.default != ... and field_info.default != PydanticUndefined
|
|
94
96
|
|
|
95
|
-
def format_field(self, key: str, value: Any, indent: int = 0, visited: set = None, depth: int = 0) -> str:
|
|
97
|
+
def format_field(self, key: str, value: Any, indent: int = 0, visited: set | None = None, depth: int = 0) -> str:
|
|
96
98
|
"""Formats a field value for string representation, handling nested structures."""
|
|
97
99
|
visited = visited or set()
|
|
98
100
|
formatter = self._get_formatter_for_value(value)
|
|
@@ -109,18 +111,18 @@ class LLMDataModel(BaseModel):
|
|
|
109
111
|
}
|
|
110
112
|
|
|
111
113
|
for type_class, formatter in formatters.items():
|
|
112
|
-
if type_class
|
|
114
|
+
if type_class is type(None) and value is None:
|
|
113
115
|
return formatter
|
|
114
|
-
if type_class
|
|
116
|
+
if type_class is not type(None) and isinstance(value, type_class):
|
|
115
117
|
return formatter
|
|
116
118
|
|
|
117
119
|
return self._format_primitive_field
|
|
118
120
|
|
|
119
|
-
def _format_none_field(self, key: str,
|
|
121
|
+
def _format_none_field(self, key: str, _value: Any, indent: int, _visited: set, _depth: int) -> str:
|
|
120
122
|
"""Format a None value."""
|
|
121
123
|
return f"{' ' * indent}{key}: None"
|
|
122
124
|
|
|
123
|
-
def _format_enum_field(self, key: str, value: Enum, indent: int,
|
|
125
|
+
def _format_enum_field(self, key: str, value: Enum, indent: int, _visited: set, _depth: int) -> str:
|
|
124
126
|
"""Format an Enum value."""
|
|
125
127
|
return f"{' ' * indent}{key}: {value.value}"
|
|
126
128
|
|
|
@@ -190,11 +192,11 @@ class LLMDataModel(BaseModel):
|
|
|
190
192
|
visited.discard(obj_id)
|
|
191
193
|
return f"{indent_str}{key}:\n" + "\n".join(items) if key else "\n".join(items)
|
|
192
194
|
|
|
193
|
-
def _format_primitive_field(self, key: str, value: Any, indent: int,
|
|
195
|
+
def _format_primitive_field(self, key: str, value: Any, indent: int, _visited: set, _depth: int) -> str:
|
|
194
196
|
"""Format a primitive field."""
|
|
195
197
|
return f"{' ' * indent}{key}: {value}"
|
|
196
198
|
|
|
197
|
-
def __str__(self, indent: int = 0, visited: set = None, depth: int = 0) -> str:
|
|
199
|
+
def __str__(self, indent: int = 0, visited: set | None = None, depth: int = 0) -> str:
|
|
198
200
|
"""
|
|
199
201
|
Converts the model into a formatted string for LLM prompts.
|
|
200
202
|
Handles indentation for nested models and includes an optional section header.
|
|
@@ -207,14 +209,11 @@ class LLMDataModel(BaseModel):
|
|
|
207
209
|
for name, field in type(self).model_fields.items()
|
|
208
210
|
if (
|
|
209
211
|
not getattr(field, "exclude", False)
|
|
210
|
-
and
|
|
212
|
+
and name != "section_header"
|
|
211
213
|
)
|
|
212
214
|
]
|
|
213
215
|
|
|
214
|
-
if field_list
|
|
215
|
-
fields = "\n".join(field_list) + "\n"
|
|
216
|
-
else:
|
|
217
|
-
fields = ""
|
|
216
|
+
fields = "\n".join(field_list) + "\n" if field_list else ""
|
|
218
217
|
|
|
219
218
|
if self.section_header and indent == 0:
|
|
220
219
|
header = f"{indent_str}[[{self.section_header}]]\n"
|
|
@@ -227,7 +226,7 @@ class LLMDataModel(BaseModel):
|
|
|
227
226
|
|
|
228
227
|
def remedy(self):
|
|
229
228
|
"""Default remedy method for the model."""
|
|
230
|
-
return
|
|
229
|
+
return
|
|
231
230
|
|
|
232
231
|
@classmethod
|
|
233
232
|
@lru_cache(maxsize=128)
|
|
@@ -282,7 +281,7 @@ class LLMDataModel(BaseModel):
|
|
|
282
281
|
|
|
283
282
|
@classmethod
|
|
284
283
|
def _format_schema_field(cls, name: str, field_schema: dict, required: bool,
|
|
285
|
-
definitions: dict, indent_level: int, visited: set = None) -> str:
|
|
284
|
+
definitions: dict, indent_level: int, visited: set | None = None) -> str:
|
|
286
285
|
"""Format a single schema field without descriptions (kept for definitions)."""
|
|
287
286
|
visited = visited or set()
|
|
288
287
|
|
|
@@ -332,7 +331,7 @@ class LLMDataModel(BaseModel):
|
|
|
332
331
|
|
|
333
332
|
@classmethod
|
|
334
333
|
def _format_schema_fields(cls, properties: dict, schema: dict, definitions: dict,
|
|
335
|
-
indent_level: int, visited: set = None) -> str:
|
|
334
|
+
indent_level: int, visited: set | None = None) -> str:
|
|
336
335
|
"""Format multiple schema fields."""
|
|
337
336
|
visited = visited or set()
|
|
338
337
|
required_fields = set(schema.get("required", []))
|
|
@@ -460,138 +459,180 @@ class LLMDataModel(BaseModel):
|
|
|
460
459
|
Also includes the root model's fields (from root_schema) so their descriptions/examples
|
|
461
460
|
are visible, not just $defs.
|
|
462
461
|
"""
|
|
463
|
-
lines = []
|
|
464
|
-
visited_defs = set()
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
"""Render property lines using only Field(description=...), with const/excerpts.
|
|
468
|
-
|
|
469
|
-
Always lists properties; if description is missing, emit a generic guidance message.
|
|
470
|
-
"""
|
|
471
|
-
out: list[str] = []
|
|
472
|
-
def _fmt_example_value(val):
|
|
473
|
-
if isinstance(val, str):
|
|
474
|
-
return val
|
|
475
|
-
try:
|
|
476
|
-
return json.dumps(val, ensure_ascii=False)
|
|
477
|
-
except Exception:
|
|
478
|
-
return str(val)
|
|
479
|
-
for prop_name, prop_schema in props.items():
|
|
480
|
-
if prop_name == "section_header":
|
|
481
|
-
continue
|
|
482
|
-
desc = prop_schema.get("description")
|
|
483
|
-
const_note = ""
|
|
484
|
-
if "const_value" in prop_schema:
|
|
485
|
-
const_note = f' (const value: "{prop_schema["const_value"]}")'
|
|
486
|
-
if not desc:
|
|
487
|
-
out.append(
|
|
488
|
-
f' - "{prop_name}": '
|
|
489
|
-
'No definition provided. Focus on the [[Schema]] and the prompt to infer '
|
|
490
|
-
'the expected structure and constraints.'
|
|
491
|
-
)
|
|
492
|
-
else:
|
|
493
|
-
out.append(f' - "{prop_name}": {desc}{const_note}')
|
|
494
|
-
|
|
495
|
-
examples = prop_schema.get("examples")
|
|
496
|
-
if examples is None and "example" in prop_schema:
|
|
497
|
-
examples = prop_schema.get("example")
|
|
498
|
-
|
|
499
|
-
if isinstance(examples, (list, tuple)):
|
|
500
|
-
if len(examples) > 0:
|
|
501
|
-
out.append(" - Examples:")
|
|
502
|
-
for ex in examples:
|
|
503
|
-
out.append(f" - {_fmt_example_value(ex)}")
|
|
504
|
-
elif examples is not None:
|
|
505
|
-
out.append(f" - Example: {_fmt_example_value(examples)}")
|
|
506
|
-
return out
|
|
507
|
-
|
|
508
|
-
# Include root model's fields in Definitions (for descriptions/examples)
|
|
509
|
-
if root_schema and isinstance(root_schema, dict):
|
|
510
|
-
root_title = root_schema.get("title", "Root")
|
|
511
|
-
root_props = cls._extract_schema_properties(root_schema)
|
|
512
|
-
if root_props:
|
|
513
|
-
lines.append(f"- {root_title}:")
|
|
514
|
-
lines.extend(_format_definition_properties(root_props))
|
|
462
|
+
lines: list[str] = []
|
|
463
|
+
visited_defs: set[str] = set()
|
|
464
|
+
|
|
465
|
+
lines.extend(cls._format_root_definition_lines(root_schema))
|
|
515
466
|
|
|
516
467
|
for name, definition in definitions.items():
|
|
517
468
|
if name in visited_defs:
|
|
518
469
|
continue
|
|
519
470
|
visited_defs.add(name)
|
|
520
|
-
lines.
|
|
471
|
+
lines.extend(cls._format_single_definition(name, definition))
|
|
472
|
+
|
|
473
|
+
return "\n".join(lines)
|
|
474
|
+
|
|
475
|
+
@classmethod
|
|
476
|
+
def _format_root_definition_lines(cls, root_schema: dict | None) -> list[str]:
|
|
477
|
+
"""Format definitions derived from the root schema."""
|
|
478
|
+
if not (root_schema and isinstance(root_schema, dict)):
|
|
479
|
+
return []
|
|
480
|
+
root_props = cls._extract_schema_properties(root_schema)
|
|
481
|
+
if not root_props:
|
|
482
|
+
return []
|
|
483
|
+
root_title = root_schema.get("title", "Root")
|
|
484
|
+
lines = [f"- {root_title}:"]
|
|
485
|
+
lines.extend(cls._format_definition_properties(root_props))
|
|
486
|
+
return lines
|
|
521
487
|
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
488
|
+
@classmethod
|
|
489
|
+
def _format_single_definition(cls, name: str, definition: dict) -> list[str]:
|
|
490
|
+
"""Format a single definition block, including enum handling."""
|
|
491
|
+
lines = [f"- {name}:"]
|
|
492
|
+
if "enum" in definition:
|
|
493
|
+
enum_values = ", ".join(map(repr, definition["enum"]))
|
|
494
|
+
lines.append(f" Enum values: {enum_values}")
|
|
495
|
+
return lines
|
|
496
|
+
props = definition.get("properties", {})
|
|
497
|
+
lines.extend(cls._format_definition_properties(props))
|
|
498
|
+
return lines
|
|
499
|
+
|
|
500
|
+
@classmethod
|
|
501
|
+
def _format_definition_properties(cls, props: dict) -> list[str]:
|
|
502
|
+
"""Render property lines using descriptions and examples."""
|
|
503
|
+
out: list[str] = []
|
|
504
|
+
for prop_name, prop_schema in props.items():
|
|
505
|
+
if prop_name == "section_header":
|
|
525
506
|
continue
|
|
507
|
+
out.append(cls._format_property_description(prop_name, prop_schema))
|
|
508
|
+
out.extend(cls._format_property_examples(prop_schema))
|
|
509
|
+
return out
|
|
526
510
|
|
|
527
|
-
|
|
528
|
-
|
|
511
|
+
@staticmethod
|
|
512
|
+
def _format_property_description(prop_name: str, prop_schema: dict) -> str:
|
|
513
|
+
"""Format the description line for a property, including const hints."""
|
|
514
|
+
desc = prop_schema.get("description")
|
|
515
|
+
const_note = ""
|
|
516
|
+
if "const_value" in prop_schema:
|
|
517
|
+
const_note = f' (const value: "{prop_schema["const_value"]}")'
|
|
518
|
+
if not desc:
|
|
519
|
+
return (
|
|
520
|
+
f' - "{prop_name}": '
|
|
521
|
+
"No definition provided. Focus on the [[Schema]] and the prompt to infer "
|
|
522
|
+
"the expected structure and constraints."
|
|
523
|
+
)
|
|
524
|
+
return f' - "{prop_name}": {desc}{const_note}'
|
|
529
525
|
|
|
530
|
-
|
|
526
|
+
@classmethod
|
|
527
|
+
def _format_property_examples(cls, prop_schema: dict) -> list[str]:
|
|
528
|
+
"""Format example lines for a property schema."""
|
|
529
|
+
examples = prop_schema.get("examples")
|
|
530
|
+
if examples is None and "example" in prop_schema:
|
|
531
|
+
examples = prop_schema.get("example")
|
|
532
|
+
if isinstance(examples, (list, tuple)):
|
|
533
|
+
if not examples:
|
|
534
|
+
return []
|
|
535
|
+
lines = [" - Examples:"]
|
|
536
|
+
for example in examples:
|
|
537
|
+
lines.append(f" - {cls._format_example_value(example)}")
|
|
538
|
+
return lines
|
|
539
|
+
if examples is not None:
|
|
540
|
+
return [f" - Example: {cls._format_example_value(examples)}"]
|
|
541
|
+
return []
|
|
542
|
+
|
|
543
|
+
@staticmethod
|
|
544
|
+
def _format_example_value(val: Any) -> str:
|
|
545
|
+
"""Safely format example values for display, preserving human readability."""
|
|
546
|
+
if isinstance(val, str):
|
|
547
|
+
return val
|
|
548
|
+
try:
|
|
549
|
+
return json.dumps(val, ensure_ascii=False)
|
|
550
|
+
except Exception:
|
|
551
|
+
return str(val)
|
|
531
552
|
|
|
532
553
|
@classmethod
|
|
533
554
|
def _generate_type_description(cls, type_desc: str | dict) -> str:
|
|
534
555
|
"""Generate a human-readable description for a type."""
|
|
535
|
-
|
|
556
|
+
normalized = cls._normalize_type_descriptor(type_desc)
|
|
557
|
+
if normalized is None:
|
|
536
558
|
return "unknown"
|
|
537
559
|
|
|
560
|
+
composite_description = cls._describe_composite_type(normalized)
|
|
561
|
+
return composite_description if composite_description is not None else normalized
|
|
562
|
+
|
|
563
|
+
@classmethod
|
|
564
|
+
def _normalize_type_descriptor(cls, type_desc: Any) -> str | None:
|
|
565
|
+
"""Normalize a type descriptor into a descriptive string."""
|
|
566
|
+
if type_desc is None:
|
|
567
|
+
return None
|
|
538
568
|
if isinstance(type_desc, dict):
|
|
539
569
|
type_desc = cls._resolve_field_type(type_desc, {})
|
|
540
|
-
|
|
541
570
|
if isinstance(type_desc, type):
|
|
542
571
|
type_desc = type_desc.__name__
|
|
543
|
-
|
|
544
572
|
if not isinstance(type_desc, str):
|
|
545
573
|
type_desc = str(type_desc)
|
|
574
|
+
return type_desc
|
|
546
575
|
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
"
|
|
554
|
-
"
|
|
555
|
-
"tuple of": ("tuple", "A tuple with specific types:"),
|
|
556
|
-
"object of": ("dict", "A dictionary"),
|
|
576
|
+
@classmethod
|
|
577
|
+
def _describe_composite_type(cls, type_desc: str) -> str | None:
|
|
578
|
+
"""Describe composite collection-like types with friendly language."""
|
|
579
|
+
handlers = {
|
|
580
|
+
"array of ": cls._describe_list_type,
|
|
581
|
+
"set of ": cls._describe_set_type,
|
|
582
|
+
"tuple of ": cls._describe_tuple_type,
|
|
583
|
+
"object of ": cls._describe_dict_type,
|
|
557
584
|
}
|
|
585
|
+
for prefix, handler in handlers.items():
|
|
586
|
+
if type_desc.startswith(prefix):
|
|
587
|
+
item_type = type_desc[len(prefix):]
|
|
588
|
+
return handler(item_type)
|
|
589
|
+
return None
|
|
558
590
|
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
|
|
566
|
-
element_desc = item_type
|
|
567
|
-
elif inner := extract_after(item_type, "array of "):
|
|
568
|
-
element_desc = f"a list of {inner} values"
|
|
569
|
-
elif inner := extract_after(item_type, "set of "):
|
|
570
|
-
element_desc = f"a set of unique {inner} values"
|
|
571
|
-
elif inner := extract_after(item_type, "tuple of "):
|
|
572
|
-
element_desc = f"a tuple with types: {inner}"
|
|
573
|
-
elif inner := extract_after(item_type, "object of "):
|
|
574
|
-
element_desc = f"a dictionary with {inner} values"
|
|
575
|
-
else:
|
|
576
|
-
# Simple types
|
|
577
|
-
if type_name == "list":
|
|
578
|
-
return f"A list containing {item_type} values"
|
|
579
|
-
elif type_name == "set":
|
|
580
|
-
return f"A set containing unique {item_type} values"
|
|
581
|
-
elif type_name == "dict":
|
|
582
|
-
return f"A dictionary with {item_type} values"
|
|
583
|
-
else:
|
|
584
|
-
element_desc = item_type
|
|
585
|
-
|
|
586
|
-
# Format the final description
|
|
587
|
-
if type_name == "list":
|
|
588
|
-
return f"A list where each element is {element_desc}"
|
|
589
|
-
elif type_name == "set":
|
|
590
|
-
return f"A set where each element is {element_desc}"
|
|
591
|
-
elif type_name == "dict":
|
|
592
|
-
return f"A dictionary where each value is {element_desc}"
|
|
591
|
+
@classmethod
|
|
592
|
+
def _describe_list_type(cls, item_type: str) -> str:
|
|
593
|
+
"""Describe a list-style type."""
|
|
594
|
+
element_desc = cls._nested_element_description(item_type)
|
|
595
|
+
if element_desc is None:
|
|
596
|
+
return f"A list containing {item_type} values"
|
|
597
|
+
return f"A list where each element is {element_desc}"
|
|
593
598
|
|
|
594
|
-
|
|
599
|
+
@classmethod
|
|
600
|
+
def _describe_set_type(cls, item_type: str) -> str:
|
|
601
|
+
"""Describe a set-style type."""
|
|
602
|
+
element_desc = cls._nested_element_description(item_type)
|
|
603
|
+
if element_desc is None:
|
|
604
|
+
return f"A set containing unique {item_type} values"
|
|
605
|
+
return f"A set where each element is {element_desc}"
|
|
606
|
+
|
|
607
|
+
@staticmethod
|
|
608
|
+
def _describe_tuple_type(item_type: str) -> str:
|
|
609
|
+
"""Describe a tuple-style type."""
|
|
610
|
+
return f"A tuple with specific types: {item_type}"
|
|
611
|
+
|
|
612
|
+
@classmethod
|
|
613
|
+
def _describe_dict_type(cls, item_type: str) -> str:
|
|
614
|
+
"""Describe a dictionary-style type."""
|
|
615
|
+
element_desc = cls._nested_element_description(item_type)
|
|
616
|
+
if element_desc is None:
|
|
617
|
+
return f"A dictionary with {item_type} values"
|
|
618
|
+
return f"A dictionary where each value is {element_desc}"
|
|
619
|
+
|
|
620
|
+
@staticmethod
|
|
621
|
+
def _nested_element_description(item_type: str) -> str | None:
|
|
622
|
+
"""Convert nested composite descriptors into human-friendly text."""
|
|
623
|
+
if item_type.startswith("nested object"):
|
|
624
|
+
return item_type
|
|
625
|
+
nested_mappings = {
|
|
626
|
+
"array of ": "a list of {} values",
|
|
627
|
+
"set of ": "a set of unique {} values",
|
|
628
|
+
"tuple of ": "a tuple with types: {}",
|
|
629
|
+
"object of ": "a dictionary with {} values",
|
|
630
|
+
}
|
|
631
|
+
for prefix, template in nested_mappings.items():
|
|
632
|
+
if item_type.startswith(prefix):
|
|
633
|
+
inner = item_type[len(prefix):]
|
|
634
|
+
return template.format(inner)
|
|
635
|
+
return None
|
|
595
636
|
|
|
596
637
|
@classmethod
|
|
597
638
|
def _compose_schema_output(cls, main_schema: str, definitions_schema: str) -> str:
|
|
@@ -612,7 +653,7 @@ class LLMDataModel(BaseModel):
|
|
|
612
653
|
return cls._generate_example_for_model(model, visited_models)
|
|
613
654
|
|
|
614
655
|
@staticmethod
|
|
615
|
-
def _generate_example_for_model(model:
|
|
656
|
+
def _generate_example_for_model(model: type[BaseModel], visited_models: set) -> dict:
|
|
616
657
|
"""Generate example for a model, excluding section_header."""
|
|
617
658
|
example = {}
|
|
618
659
|
for field_name, model_field in model.model_fields.items():
|
|
@@ -640,7 +681,7 @@ class LLMDataModel(BaseModel):
|
|
|
640
681
|
if is_desc_like and (ann is str or ann is Any or ann is None):
|
|
641
682
|
return "example_string"
|
|
642
683
|
return default_val
|
|
643
|
-
|
|
684
|
+
if model_field.default_factory is not None:
|
|
644
685
|
# For example generation, we want to show structure even if default is empty
|
|
645
686
|
# Check if default_factory would produce an empty container
|
|
646
687
|
default_val = model_field.default_factory()
|
|
@@ -650,10 +691,9 @@ class LLMDataModel(BaseModel):
|
|
|
650
691
|
model_field.annotation, visited_models
|
|
651
692
|
)
|
|
652
693
|
return default_val
|
|
653
|
-
|
|
654
|
-
|
|
655
|
-
|
|
656
|
-
)
|
|
694
|
+
return LLMDataModel._generate_value_for_type(
|
|
695
|
+
model_field.annotation, visited_models
|
|
696
|
+
)
|
|
657
697
|
|
|
658
698
|
@staticmethod
|
|
659
699
|
def _generate_value_for_type(field_type: Any, visited_models: set) -> Any:
|
|
@@ -838,7 +878,7 @@ class LLMDataModel(BaseModel):
|
|
|
838
878
|
return submodel.generate_example_json()
|
|
839
879
|
|
|
840
880
|
@classmethod
|
|
841
|
-
def _generate_non_null_example_for_model(cls, model:
|
|
881
|
+
def _generate_non_null_example_for_model(cls, model: type[BaseModel], visited_models: set | None = None) -> dict:
|
|
842
882
|
"""Generate an example for a model, preferring non-null for Optional fields (recursive)."""
|
|
843
883
|
if visited_models is None:
|
|
844
884
|
visited_models = set()
|
|
@@ -887,56 +927,104 @@ class LLMDataModel(BaseModel):
|
|
|
887
927
|
"""Unified generator for example values; prefer_non_null to avoid None variants."""
|
|
888
928
|
origin = get_origin(field_type) or field_type
|
|
889
929
|
|
|
890
|
-
|
|
930
|
+
handled, value = cls._handle_enum_type(origin)
|
|
931
|
+
if handled:
|
|
932
|
+
return value
|
|
933
|
+
|
|
934
|
+
handled, value = cls._handle_literal_type(origin, field_type)
|
|
935
|
+
if handled:
|
|
936
|
+
return value
|
|
937
|
+
|
|
938
|
+
handled, value = cls._handle_model_type(origin, field_type, visited_models, prefer_non_null)
|
|
939
|
+
if handled:
|
|
940
|
+
return value
|
|
941
|
+
|
|
942
|
+
handled, value = cls._handle_union_type(field_type, visited_models, prefer_non_null)
|
|
943
|
+
if handled:
|
|
944
|
+
return value
|
|
945
|
+
|
|
946
|
+
handled, value = cls._handle_collection_type(field_type, visited_models, prefer_non_null)
|
|
947
|
+
if handled:
|
|
948
|
+
return value
|
|
949
|
+
|
|
950
|
+
return LLMDataModel._generate_primitive_value(field_type)
|
|
951
|
+
|
|
952
|
+
@staticmethod
|
|
953
|
+
def _handle_enum_type(origin: Any) -> tuple[bool, Any]:
|
|
954
|
+
"""Handle Enum types when generating example values."""
|
|
891
955
|
if isinstance(origin, type) and issubclass(origin, Enum):
|
|
892
|
-
|
|
956
|
+
first_member = next(iter(origin), None)
|
|
957
|
+
value = first_member.value if first_member is not None else "enum_value"
|
|
958
|
+
return True, value
|
|
959
|
+
return False, None
|
|
893
960
|
|
|
894
|
-
|
|
961
|
+
@staticmethod
|
|
962
|
+
def _handle_literal_type(origin: Any, field_type: Any) -> tuple[bool, Any]:
|
|
963
|
+
"""Handle Literal[...] annotations."""
|
|
895
964
|
if origin is Literal:
|
|
896
|
-
|
|
897
|
-
|
|
898
|
-
|
|
899
|
-
|
|
900
|
-
|
|
901
|
-
|
|
902
|
-
|
|
903
|
-
|
|
904
|
-
|
|
905
|
-
|
|
906
|
-
|
|
907
|
-
|
|
908
|
-
|
|
909
|
-
if LLMDataModel.
|
|
910
|
-
|
|
911
|
-
|
|
912
|
-
|
|
913
|
-
|
|
914
|
-
|
|
915
|
-
|
|
916
|
-
|
|
917
|
-
|
|
918
|
-
|
|
919
|
-
|
|
920
|
-
|
|
921
|
-
|
|
922
|
-
|
|
923
|
-
|
|
924
|
-
|
|
925
|
-
|
|
926
|
-
|
|
927
|
-
|
|
928
|
-
|
|
929
|
-
|
|
930
|
-
|
|
931
|
-
|
|
932
|
-
|
|
933
|
-
|
|
934
|
-
|
|
935
|
-
|
|
936
|
-
|
|
965
|
+
literal_args = get_args(field_type)
|
|
966
|
+
return True, literal_args[0] if literal_args else None
|
|
967
|
+
return False, None
|
|
968
|
+
|
|
969
|
+
@classmethod
|
|
970
|
+
def _handle_model_type(cls, origin: Any, field_type: Any, visited_models: set, prefer_non_null: bool) -> tuple[bool, Any]:
|
|
971
|
+
"""Handle Pydantic BaseModel subclasses."""
|
|
972
|
+
if not (isinstance(origin, type) and issubclass(origin, BaseModel)):
|
|
973
|
+
return False, None
|
|
974
|
+
model_name = field_type.__name__
|
|
975
|
+
if model_name in visited_models:
|
|
976
|
+
return True, {}
|
|
977
|
+
visited_models.add(model_name)
|
|
978
|
+
generator = cls._generate_non_null_example_for_model if prefer_non_null else LLMDataModel._generate_example_for_model
|
|
979
|
+
return True, generator(field_type, visited_models.copy())
|
|
980
|
+
|
|
981
|
+
@classmethod
|
|
982
|
+
def _handle_union_type(cls, field_type: Any, visited_models: set, prefer_non_null: bool) -> tuple[bool, Any]:
|
|
983
|
+
"""Handle Optional/Union annotations."""
|
|
984
|
+
if not LLMDataModel._is_union_type(field_type):
|
|
985
|
+
return False, None
|
|
986
|
+
subtypes = LLMDataModel._get_union_types(field_type, exclude_none=True)
|
|
987
|
+
if not subtypes:
|
|
988
|
+
return True, None
|
|
989
|
+
chosen = subtypes[0]
|
|
990
|
+
value = cls._generate_value_for_type_generic(chosen, visited_models, prefer_non_null)
|
|
991
|
+
return True, value
|
|
992
|
+
|
|
993
|
+
@classmethod
|
|
994
|
+
def _handle_collection_type(cls, field_type: Any, visited_models: set, prefer_non_null: bool) -> tuple[bool, Any]:
|
|
995
|
+
"""Handle list/dict/set/tuple-like annotations."""
|
|
996
|
+
if not LLMDataModel._is_collection_type(field_type):
|
|
997
|
+
return False, None
|
|
998
|
+
|
|
999
|
+
origin = get_origin(field_type) or field_type
|
|
1000
|
+
args = get_args(field_type)
|
|
1001
|
+
|
|
1002
|
+
if origin is list:
|
|
1003
|
+
item_type = args[0] if args else Any
|
|
1004
|
+
value = [cls._generate_value_for_type_generic(item_type, visited_models, prefer_non_null)]
|
|
1005
|
+
return True, value
|
|
1006
|
+
if origin is dict:
|
|
1007
|
+
key_type, value_type = args if args else (Any, Any)
|
|
1008
|
+
example_key = cls._example_key_for_type(key_type, visited_models)
|
|
1009
|
+
example_value = cls._generate_value_for_type_generic(value_type, visited_models, prefer_non_null)
|
|
1010
|
+
return True, {example_key: example_value}
|
|
1011
|
+
if origin in (set, frozenset):
|
|
1012
|
+
item_type = args[0] if args else Any
|
|
1013
|
+
value = [cls._generate_value_for_type_generic(item_type, visited_models, prefer_non_null)]
|
|
1014
|
+
return True, value
|
|
1015
|
+
if origin is tuple:
|
|
1016
|
+
if args:
|
|
1017
|
+
tuple_values = tuple(
|
|
1018
|
+
cls._generate_value_for_type_generic(t, visited_models, prefer_non_null)
|
|
1019
|
+
for t in args
|
|
1020
|
+
)
|
|
1021
|
+
return True, tuple_values
|
|
1022
|
+
return True, ("item1", "item2")
|
|
1023
|
+
|
|
1024
|
+
return True, []
|
|
937
1025
|
|
|
938
1026
|
@classmethod
|
|
939
|
-
def _format_json_example(cls, obj: Any,
|
|
1027
|
+
def _format_json_example(cls, obj: Any, _indent: int = 0) -> str:
|
|
940
1028
|
"""Format an object as a JSON string representation."""
|
|
941
1029
|
return json.dumps(obj, indent=2, default=str)
|
|
942
1030
|
|
|
@@ -952,20 +1040,20 @@ class LLMDataModel(BaseModel):
|
|
|
952
1040
|
return "\n\n".join(example_blocks)
|
|
953
1041
|
|
|
954
1042
|
|
|
955
|
-
def build_dynamic_llm_datamodel(py_type: Any) ->
|
|
1043
|
+
def build_dynamic_llm_datamodel(py_type: Any) -> type[LLMDataModel]:
|
|
956
1044
|
"""Dynamically create a subclass of LLMDataModel with a single 'value' field."""
|
|
957
1045
|
model_name = f"LLMDynamicDataModel_{hash(str(py_type)) & 0xFFFFFFFF:X}"
|
|
958
1046
|
|
|
959
|
-
model:
|
|
1047
|
+
model: type[LLMDataModel] = create_model(
|
|
960
1048
|
model_name,
|
|
961
1049
|
__base__=LLMDataModel,
|
|
962
1050
|
value=(
|
|
963
1051
|
py_type,
|
|
964
1052
|
Field(
|
|
965
1053
|
...,
|
|
966
|
-
description="This is a dynamically generated data model. This description is general. "
|
|
967
|
-
"If you're dealing with a complex type, or nested types in combination with unions, make sure you "
|
|
968
|
-
"understand the instructions provided in the prompt, and select the appropriate data model based on the "
|
|
1054
|
+
description="This is a dynamically generated data model. This description is general. "
|
|
1055
|
+
"If you're dealing with a complex type, or nested types in combination with unions, make sure you "
|
|
1056
|
+
"understand the instructions provided in the prompt, and select the appropriate data model based on the "
|
|
969
1057
|
"type at hand, as described in the schema section."
|
|
970
1058
|
)
|
|
971
1059
|
),
|
symai/models/errors.py
CHANGED