speedy-utils 1.1.17__py3-none-any.whl → 1.1.19__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,288 @@
1
+ # type: ignore
2
+
3
+ """
4
+ Simplified LLM Task module for handling language model interactions with structured input/output.
5
+ """
6
+
7
+ from typing import Any, Dict, List, Optional, Type, Union, cast
8
+
9
+ from openai import OpenAI
10
+ from openai.types.chat import ChatCompletionMessageParam
11
+ from pydantic import BaseModel
12
+ from pydantic import create_model
13
+ from typing import Callable, Tuple
14
+ from abc import ABC, abstractmethod
15
+
16
+ # Type aliases for better readability
17
+ Messages = List[ChatCompletionMessageParam]
18
+
19
+ import json
20
+ from typing import Type, TypeVar
21
+
22
+ B = TypeVar("B", bound="BasePromptBuilder")
23
+
24
+
25
+ class BasePromptBuilder(BaseModel, ABC):
26
+ """
27
+ Abstract base class for prompt builders.
28
+ Provides a consistent interface for:
29
+ - input/output key declaration
30
+ - prompt building
31
+ - schema enforcement via auto-built modget_io_keysels
32
+ """
33
+
34
+ # ------------------------------------------------------------------ #
35
+ # Abstract methods
36
+ # ------------------------------------------------------------------ #
37
+ @abstractmethod
38
+ def get_instruction(self) -> str:
39
+ """Return the system instruction string (role of the model)."""
40
+ raise NotImplementedError
41
+
42
+ @abstractmethod
43
+ def get_io_keys(self) -> Tuple[List[str], List[Union[str, Tuple[str, str]]]]:
44
+ """
45
+ Return (input_keys, output_keys).
46
+ Each key must match a field of the subclass.
47
+ For output_keys, you can use:
48
+ - str: Use the field name as-is
49
+ - tuple[str, str]: (original_field_name, renamed_field_name)
50
+ Input keys are always strings.
51
+ """
52
+ raise NotImplementedError
53
+
54
+ # ------------------------------------------------------------------ #
55
+ # Auto-build models from keys
56
+ # ------------------------------------------------------------------ #
57
+ def _build_model_from_keys(self, keys: Union[List[str], List[Union[str, Tuple[str, str]]]], name: str) -> Type[BaseModel]:
58
+ fields: Dict[str, tuple[Any, Any]] = {}
59
+ for key in keys:
60
+ if isinstance(key, tuple):
61
+ # Handle tuple: (original_field_name, renamed_field_name)
62
+ original_key, renamed_key = key
63
+ if original_key not in self.model_fields:
64
+ raise ValueError(f"Key '{original_key}' not found in model fields")
65
+ field_info = self.model_fields[original_key]
66
+ field_type = field_info.annotation if field_info.annotation is not None else (Any,)
67
+ default = field_info.default if field_info.default is not None else ...
68
+ fields[renamed_key] = (field_type, default)
69
+ else:
70
+ # Handle string key
71
+ if key not in self.model_fields:
72
+ raise ValueError(f"Key '{key}' not found in model fields")
73
+ field_info = self.model_fields[key]
74
+ field_type = field_info.annotation if field_info.annotation is not None else (Any,)
75
+ default = field_info.default if field_info.default is not None else ...
76
+ fields[key] = (field_type, default)
77
+ return create_model(name, **fields) # type: ignore
78
+
79
+ def get_input_model(self) -> Type[BaseModel]:
80
+ input_keys, _ = self.get_io_keys()
81
+ return self._build_model_from_keys(input_keys, "InputModel")
82
+
83
+ def get_output_model(self) -> Type[BaseModel]:
84
+ _, output_keys = self.get_io_keys()
85
+ return self._build_model_from_keys(output_keys, "OutputModel")
86
+
87
+ # ------------------------------------------------------------------ #
88
+ # Dump methods (JSON)
89
+ # ------------------------------------------------------------------ #
90
+ def _dump_json_unique(self, schema_model: Type[BaseModel], keys: Union[List[str], List[Union[str, Tuple[str, str]]]], **kwargs) -> str:
91
+ allowed = list(schema_model.model_fields.keys())
92
+ seen = set()
93
+ unique_keys = [k for k in allowed if not (k in seen or seen.add(k))]
94
+ data = self.model_dump()
95
+
96
+ # Handle key mapping for renamed fields
97
+ filtered = {}
98
+ for key in keys:
99
+ if isinstance(key, tuple):
100
+ original_key, renamed_key = key
101
+ if original_key in data and renamed_key in unique_keys:
102
+ filtered[renamed_key] = data[original_key]
103
+ else:
104
+ if key in data and key in unique_keys:
105
+ filtered[key] = data[key]
106
+
107
+ return schema_model(**filtered).model_dump_json(**kwargs)
108
+
109
+ def model_dump_json_input(self, **kwargs) -> str:
110
+ input_keys, _ = self.get_io_keys()
111
+ return self._dump_json_unique(self.get_input_model(), input_keys, **kwargs)
112
+
113
+ def model_dump_json_output(self, **kwargs) -> str:
114
+ _, output_keys = self.get_io_keys()
115
+ return self._dump_json_unique(self.get_output_model(), output_keys, **kwargs)
116
+
117
+ # ------------------------------------------------------------------ #
118
+ # Markdown helpers
119
+ # ------------------------------------------------------------------ #
120
+ def _to_markdown(self, obj: Any, level: int = 1, title: Optional[str] = None) -> str:
121
+ """
122
+ Recursively convert dict/list/primitive into clean, generic Markdown.
123
+ """
124
+ md: List[str] = []
125
+
126
+ # Format title if provided
127
+ if title is not None:
128
+ formatted_title = title.replace('_', ' ').title()
129
+ if level <= 2:
130
+ md.append(f"{'#' * level} {formatted_title}")
131
+ else:
132
+ md.append(f"**{formatted_title}:**")
133
+
134
+ if isinstance(obj, dict):
135
+ if not obj: # Empty dict
136
+ md.append("None")
137
+ else:
138
+ for k, v in obj.items():
139
+ if isinstance(v, (str, int, float, bool)) and len(str(v)) < 100:
140
+ # Short values inline
141
+ key_name = k.replace('_', ' ').title()
142
+ if level <= 2:
143
+ md.append(f"**{key_name}:** {v}")
144
+ else:
145
+ md.append(f"- **{key_name}:** {v}")
146
+ else:
147
+ # Complex values get recursive handling
148
+ md.append(self._to_markdown(v, level=level + 1, title=k))
149
+ elif isinstance(obj, list):
150
+ if not obj: # Empty list
151
+ md.append("None")
152
+ elif all(isinstance(i, dict) for i in obj):
153
+ # List of objects
154
+ for i, item in enumerate(obj, 1):
155
+ if level <= 2:
156
+ md.append(f"### {title or 'Item'} {i}")
157
+ else:
158
+ md.append(f"**{title or 'Item'} {i}:**")
159
+ # Process dict items inline for cleaner output
160
+ for k, v in item.items():
161
+ key_name = k.replace('_', ' ').title()
162
+ md.append(f"- **{key_name}:** {v}")
163
+ if i < len(obj): # Add spacing between items
164
+ md.append("")
165
+ else:
166
+ # Simple list
167
+ for item in obj:
168
+ md.append(f"- {item}")
169
+ else:
170
+ # Primitive value
171
+ value_str = str(obj) if obj is not None else "None"
172
+ if title is None:
173
+ md.append(value_str)
174
+ else:
175
+ md.append(value_str)
176
+
177
+ return "\n".join(md)
178
+
179
+ def _dump_markdown_unique(self, keys: Union[List[str], List[Union[str, Tuple[str, str]]]]) -> str:
180
+ data = self.model_dump()
181
+ filtered: Dict[str, Any] = {}
182
+ for key in keys:
183
+ if isinstance(key, tuple):
184
+ original_key, renamed_key = key
185
+ if original_key in data:
186
+ filtered[renamed_key] = data[original_key]
187
+ else:
188
+ if key in data:
189
+ filtered[key] = data[key]
190
+
191
+ # Generate markdown without top-level headers to avoid duplication
192
+ parts = []
193
+ for key, value in filtered.items():
194
+ if value is None:
195
+ continue
196
+ formatted_key = key.replace('_', ' ').title()
197
+ if isinstance(value, (str, int, float, bool)) and len(str(value)) < 200:
198
+ parts.append(f"**{formatted_key}:** {value}")
199
+ else:
200
+ parts.append(self._to_markdown(value, level=2, title=key))
201
+
202
+ return '\n'.join(parts)
203
+
204
+ def model_dump_markdown_input(self) -> str:
205
+ input_keys, _ = self.get_io_keys()
206
+ return self._dump_markdown_unique(input_keys)
207
+
208
+ def model_dump_markdown_output(self) -> str:
209
+ _, output_keys = self.get_io_keys()
210
+ return self._dump_markdown_unique(output_keys)
211
+
212
+ # ------------------------------------------------------------------ #
213
+ # Training & preview (JSON or Markdown)
214
+ # ------------------------------------------------------------------ #
215
+ def build_training_data(self, format: str = "json", indent=None) -> dict[str, Any]:
216
+ """
217
+ Build training data in either JSON (dict for OpenAI-style messages)
218
+ or Markdown (clean format without role prefixes).
219
+ """
220
+ if format == "json":
221
+ return {
222
+ "messages": [
223
+ {"role": "system", "content": self.get_instruction()},
224
+ {"role": "user", "content": self.model_dump_json_input(indent=indent)},
225
+ {"role": "assistant", "content": self.model_dump_json_output(indent=indent)},
226
+ ]
227
+ }
228
+ elif format == "markdown":
229
+ system_content = self.get_instruction()
230
+
231
+ return {
232
+ 'messages': [
233
+ {"role": "system", "content": system_content},
234
+ {"role": "user", "content": self.model_dump_markdown_input()},
235
+ {"role": "assistant", "content": self.model_dump_markdown_output()},
236
+ ]
237
+ }
238
+ raise ValueError("format must be either 'json' or 'markdown'")
239
+
240
+ def __str__(self) -> str:
241
+ # Return clean format without explicit role prefixes
242
+ training_data = self.build_training_data(format="markdown")
243
+ messages = training_data['messages'] # type: ignore[index]
244
+
245
+ parts = []
246
+ for msg in messages:
247
+ content = msg['content']
248
+ if msg['role'] == 'system':
249
+ parts.append(content)
250
+ elif msg['role'] == 'user':
251
+ parts.append(content)
252
+ elif msg['role'] == 'assistant':
253
+ # Get output keys to determine the main output field name
254
+ _, output_keys = self.get_io_keys()
255
+ main_output = output_keys[0] if output_keys else 'response'
256
+ if isinstance(main_output, tuple):
257
+ main_output = main_output[1] # Use renamed key
258
+ title = main_output.replace('_', ' ').title()
259
+ parts.append(f"## {title}\n{content}")
260
+
261
+ return '\n\n'.join(parts)
262
+
263
+ @classmethod
264
+ def from_messages(cls: Type[B], messages: list[dict]) -> B:
265
+ """
266
+ Reconstruct a prompt builder instance from OpenAI-style messages.
267
+ """
268
+ user_msg = next((m for m in messages if m.get("role") == "user"), None)
269
+ assistant_msg = next((m for m in messages if m.get("role") == "assistant"), None)
270
+
271
+ if user_msg is None:
272
+ raise ValueError("No user message found")
273
+ if assistant_msg is None:
274
+ raise ValueError("No assistant message found")
275
+
276
+ try:
277
+ user_data = json.loads(user_msg["content"]) # type: ignore[index]
278
+ except Exception as e:
279
+ raise ValueError(f"Invalid user JSON content: {e}")
280
+
281
+ try:
282
+ assistant_data = json.loads(assistant_msg["content"]) # type: ignore[index]
283
+ except Exception as e:
284
+ raise ValueError(f"Invalid assistant JSON content: {e}")
285
+
286
+ combined_data = {**user_data, **assistant_data}
287
+ return cast(B, cls(**combined_data))
288
+