speedy-utils 1.1.26__py3-none-any.whl → 1.1.28__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. llm_utils/__init__.py +16 -4
  2. llm_utils/chat_format/__init__.py +10 -10
  3. llm_utils/chat_format/display.py +33 -21
  4. llm_utils/chat_format/transform.py +17 -19
  5. llm_utils/chat_format/utils.py +6 -4
  6. llm_utils/group_messages.py +17 -14
  7. llm_utils/lm/__init__.py +6 -5
  8. llm_utils/lm/async_lm/__init__.py +1 -0
  9. llm_utils/lm/async_lm/_utils.py +10 -9
  10. llm_utils/lm/async_lm/async_llm_task.py +141 -137
  11. llm_utils/lm/async_lm/async_lm.py +48 -42
  12. llm_utils/lm/async_lm/async_lm_base.py +59 -60
  13. llm_utils/lm/async_lm/lm_specific.py +4 -3
  14. llm_utils/lm/base_prompt_builder.py +93 -70
  15. llm_utils/lm/llm.py +126 -108
  16. llm_utils/lm/llm_signature.py +4 -2
  17. llm_utils/lm/lm_base.py +72 -73
  18. llm_utils/lm/mixins.py +102 -62
  19. llm_utils/lm/openai_memoize.py +124 -87
  20. llm_utils/lm/signature.py +105 -92
  21. llm_utils/lm/utils.py +42 -23
  22. llm_utils/scripts/vllm_load_balancer.py +23 -30
  23. llm_utils/scripts/vllm_serve.py +8 -7
  24. llm_utils/vector_cache/__init__.py +9 -3
  25. llm_utils/vector_cache/cli.py +1 -1
  26. llm_utils/vector_cache/core.py +59 -63
  27. llm_utils/vector_cache/types.py +7 -5
  28. llm_utils/vector_cache/utils.py +12 -8
  29. speedy_utils/__imports.py +244 -0
  30. speedy_utils/__init__.py +90 -194
  31. speedy_utils/all.py +125 -227
  32. speedy_utils/common/clock.py +37 -42
  33. speedy_utils/common/function_decorator.py +6 -12
  34. speedy_utils/common/logger.py +43 -52
  35. speedy_utils/common/notebook_utils.py +13 -21
  36. speedy_utils/common/patcher.py +21 -17
  37. speedy_utils/common/report_manager.py +42 -44
  38. speedy_utils/common/utils_cache.py +152 -169
  39. speedy_utils/common/utils_io.py +137 -103
  40. speedy_utils/common/utils_misc.py +15 -21
  41. speedy_utils/common/utils_print.py +22 -28
  42. speedy_utils/multi_worker/process.py +66 -79
  43. speedy_utils/multi_worker/thread.py +78 -155
  44. speedy_utils/scripts/mpython.py +38 -36
  45. speedy_utils/scripts/openapi_client_codegen.py +10 -10
  46. {speedy_utils-1.1.26.dist-info → speedy_utils-1.1.28.dist-info}/METADATA +1 -1
  47. speedy_utils-1.1.28.dist-info/RECORD +57 -0
  48. vision_utils/README.md +202 -0
  49. vision_utils/__init__.py +5 -0
  50. vision_utils/io_utils.py +470 -0
  51. vision_utils/plot.py +345 -0
  52. speedy_utils-1.1.26.dist-info/RECORD +0 -52
  53. {speedy_utils-1.1.26.dist-info → speedy_utils-1.1.28.dist-info}/WHEEL +0 -0
  54. {speedy_utils-1.1.26.dist-info → speedy_utils-1.1.28.dist-info}/entry_points.txt +0 -0
@@ -40,23 +40,23 @@ class AsyncLMBase:
40
40
  def __init__(
41
41
  self,
42
42
  *,
43
- base_url: Optional[str] = None,
44
- api_key: Optional[str] = None,
43
+ base_url: str | None = None,
44
+ api_key: str | None = None,
45
45
  cache: bool = True,
46
- ports: Optional[List[int]] = None,
46
+ ports: list[int] | None = None,
47
47
  ) -> None:
48
48
  self.base_url = base_url
49
- self.api_key = api_key or os.getenv("OPENAI_API_KEY", "abc")
49
+ self.api_key = api_key or os.getenv('OPENAI_API_KEY', 'abc')
50
50
  self._cache = cache
51
51
  self.ports = ports
52
52
 
53
53
  @property
54
- def client(self) -> MAsyncOpenAI:
54
+ def client(self) -> 'MAsyncOpenAI': # type: ignore
55
55
  # if have multiple ports
56
56
  if self.ports and self.base_url:
57
57
  import random
58
58
  import re
59
-
59
+
60
60
  port = random.choice(self.ports)
61
61
  # Replace port in base_url if it exists
62
62
  base_url_pattern = r'(https?://[^:/]+):?\d*(/.*)?'
@@ -64,16 +64,16 @@ class AsyncLMBase:
64
64
  if match:
65
65
  host_part = match.group(1)
66
66
  path_part = match.group(2) or '/v1'
67
- api_base = f"{host_part}:{port}{path_part}"
67
+ api_base = f'{host_part}:{port}{path_part}'
68
68
  else:
69
69
  api_base = self.base_url
70
- logger.debug(f"Using port: {port}")
70
+ logger.debug(f'Using port: {port}')
71
71
  else:
72
72
  api_base = self.base_url
73
-
73
+
74
74
  if api_base is None:
75
- raise ValueError("base_url must be provided")
76
-
75
+ raise ValueError('base_url must be provided')
76
+
77
77
  client = MAsyncOpenAI(
78
78
  api_key=self.api_key,
79
79
  base_url=api_base,
@@ -89,8 +89,8 @@ class AsyncLMBase:
89
89
  async def __call__( # type: ignore
90
90
  self,
91
91
  *,
92
- prompt: Optional[str] = ...,
93
- messages: Optional[RawMsgs] = ...,
92
+ prompt: str | None = ...,
93
+ messages: RawMsgs | None = ...,
94
94
  response_format: type[str] = str,
95
95
  return_openai_response: bool = ...,
96
96
  **kwargs: Any,
@@ -100,9 +100,9 @@ class AsyncLMBase:
100
100
  async def __call__(
101
101
  self,
102
102
  *,
103
- prompt: Optional[str] = ...,
104
- messages: Optional[RawMsgs] = ...,
105
- response_format: Type[TModel],
103
+ prompt: str | None = ...,
104
+ messages: RawMsgs | None = ...,
105
+ response_format: type[TModel],
106
106
  return_openai_response: bool = ...,
107
107
  **kwargs: Any,
108
108
  ) -> TModel: ...
@@ -114,62 +114,62 @@ class AsyncLMBase:
114
114
  def _convert_messages(msgs: LegacyMsgs) -> Messages:
115
115
  converted: Messages = []
116
116
  for msg in msgs:
117
- role = msg["role"]
118
- content = msg["content"]
119
- if role == "user":
117
+ role = msg['role']
118
+ content = msg['content']
119
+ if role == 'user':
120
120
  converted.append(
121
- ChatCompletionUserMessageParam(role="user", content=content)
121
+ ChatCompletionUserMessageParam(role='user', content=content)
122
122
  )
123
- elif role == "assistant":
123
+ elif role == 'assistant':
124
124
  converted.append(
125
125
  ChatCompletionAssistantMessageParam(
126
- role="assistant", content=content
126
+ role='assistant', content=content
127
127
  )
128
128
  )
129
- elif role == "system":
129
+ elif role == 'system':
130
130
  converted.append(
131
- ChatCompletionSystemMessageParam(role="system", content=content)
131
+ ChatCompletionSystemMessageParam(role='system', content=content)
132
132
  )
133
- elif role == "tool":
133
+ elif role == 'tool':
134
134
  converted.append(
135
135
  ChatCompletionToolMessageParam(
136
- role="tool",
136
+ role='tool',
137
137
  content=content,
138
- tool_call_id=msg.get("tool_call_id") or "",
138
+ tool_call_id=msg.get('tool_call_id') or '',
139
139
  )
140
140
  )
141
141
  else:
142
- converted.append({"role": role, "content": content}) # type: ignore[arg-type]
142
+ converted.append({'role': role, 'content': content}) # type: ignore[arg-type]
143
143
  return converted
144
144
 
145
145
  @staticmethod
146
146
  def _parse_output(
147
- raw_response: Any, response_format: Union[type[str], Type[BaseModel]]
148
- ) -> Union[str, BaseModel]:
149
- if hasattr(raw_response, "model_dump"):
147
+ raw_response: Any, response_format: type[str] | type[BaseModel]
148
+ ) -> str | BaseModel:
149
+ if hasattr(raw_response, 'model_dump'):
150
150
  raw_response = raw_response.model_dump()
151
151
 
152
152
  if response_format is str:
153
- if isinstance(raw_response, dict) and "choices" in raw_response:
154
- message = raw_response["choices"][0]["message"]
155
- return message.get("content", "") or ""
153
+ if isinstance(raw_response, dict) and 'choices' in raw_response:
154
+ message = raw_response['choices'][0]['message']
155
+ return message.get('content', '') or ''
156
156
  return cast(str, raw_response)
157
157
 
158
- model_cls = cast(Type[BaseModel], response_format)
158
+ model_cls = cast(type[BaseModel], response_format)
159
159
 
160
- if isinstance(raw_response, dict) and "choices" in raw_response:
161
- message = raw_response["choices"][0]["message"]
162
- if "parsed" in message:
163
- return model_cls.model_validate(message["parsed"])
164
- content = message.get("content")
160
+ if isinstance(raw_response, dict) and 'choices' in raw_response:
161
+ message = raw_response['choices'][0]['message']
162
+ if 'parsed' in message:
163
+ return model_cls.model_validate(message['parsed'])
164
+ content = message.get('content')
165
165
  if content is None:
166
- raise ValueError("Model returned empty content")
166
+ raise ValueError('Model returned empty content')
167
167
  try:
168
168
  data = json.loads(content)
169
169
  return model_cls.model_validate(data)
170
170
  except Exception as exc:
171
171
  raise ValueError(
172
- f"Failed to parse model output as JSON:\n{content}"
172
+ f'Failed to parse model output as JSON:\n{content}'
173
173
  ) from exc
174
174
 
175
175
  if isinstance(raw_response, model_cls):
@@ -182,7 +182,7 @@ class AsyncLMBase:
182
182
  return model_cls.model_validate(data)
183
183
  except Exception as exc:
184
184
  raise ValueError(
185
- f"Model did not return valid JSON:\n---\n{raw_response}"
185
+ f'Model did not return valid JSON:\n---\n{raw_response}'
186
186
  ) from exc
187
187
 
188
188
  # ------------------------------------------------------------------ #
@@ -190,17 +190,17 @@ class AsyncLMBase:
190
190
  # ------------------------------------------------------------------ #
191
191
 
192
192
  @staticmethod
193
- async def list_models(base_url: Optional[str] = None) -> List[str]:
193
+ async def list_models(base_url: str | None = None) -> list[str]:
194
194
  try:
195
195
  if base_url is None:
196
- raise ValueError("base_url must be provided")
196
+ raise ValueError('base_url must be provided')
197
197
  client = AsyncLMBase(base_url=base_url).client
198
198
  base_url_obj: URL = client.base_url
199
- logger.debug(f"Base URL: {base_url_obj}")
199
+ logger.debug(f'Base URL: {base_url_obj}')
200
200
  models: AsyncSyncPage[Model] = await client.models.list() # type: ignore[assignment]
201
201
  return [model.id for model in models.data]
202
202
  except Exception as exc:
203
- logger.error(f"Failed to list models: {exc}")
203
+ logger.error(f'Failed to list models: {exc}')
204
204
  return []
205
205
 
206
206
  def build_system_prompt(
@@ -212,15 +212,15 @@ class AsyncLMBase:
212
212
  think,
213
213
  ):
214
214
  if add_json_schema_to_instruction and response_model:
215
- schema_block = f"\n\n<output_json_schema>\n{json.dumps(json_schema, indent=2)}\n</output_json_schema>"
215
+ schema_block = f'\n\n<output_json_schema>\n{json.dumps(json_schema, indent=2)}\n</output_json_schema>'
216
216
  # if schema_block not in system_content:
217
- if "<output_json_schema>" in system_content:
217
+ if '<output_json_schema>' in system_content:
218
218
  # remove exsting schema block
219
219
  import re # replace
220
220
 
221
221
  system_content = re.sub(
222
- r"<output_json_schema>.*?</output_json_schema>",
223
- "",
222
+ r'<output_json_schema>.*?</output_json_schema>',
223
+ '',
224
224
  system_content,
225
225
  flags=re.DOTALL,
226
226
  )
@@ -228,21 +228,20 @@ class AsyncLMBase:
228
228
  system_content += schema_block
229
229
 
230
230
  if think is True:
231
- if "/think" in system_content:
231
+ if '/think' in system_content:
232
232
  pass
233
- elif "/no_think" in system_content:
234
- system_content = system_content.replace("/no_think", "/think")
233
+ elif '/no_think' in system_content:
234
+ system_content = system_content.replace('/no_think', '/think')
235
235
  else:
236
- system_content += "\n\n/think"
236
+ system_content += '\n\n/think'
237
237
  elif think is False:
238
- if "/no_think" in system_content:
238
+ if '/no_think' in system_content:
239
239
  pass
240
- elif "/think" in system_content:
241
- system_content = system_content.replace("/think", "/no_think")
240
+ elif '/think' in system_content:
241
+ system_content = system_content.replace('/think', '/no_think')
242
242
  else:
243
- system_content += "\n\n/no_think"
243
+ system_content += '\n\n/no_think'
244
244
  return system_content
245
245
 
246
246
  async def inspect_history(self):
247
247
  """Inspect the history of the LLM calls."""
248
- pass
@@ -2,6 +2,7 @@ from typing import List
2
2
 
3
3
  from .async_lm import AsyncLM
4
4
 
5
+
5
6
  KNOWN_CONFIG = {
6
7
  # Qwen3 family (see model card "Best Practices" section)
7
8
  "qwen3-think": {
@@ -83,7 +84,7 @@ KNOWN_CONFIG = {
83
84
  # Add more as needed...
84
85
  }
85
86
 
86
- KNOWN_KEYS: List[str] = list(KNOWN_CONFIG.keys())
87
+ KNOWN_KEYS: list[str] = list(KNOWN_CONFIG.keys())
87
88
 
88
89
 
89
90
  class AsyncLMQwenThink(AsyncLM):
@@ -107,7 +108,7 @@ class AsyncLMQwenThink(AsyncLM):
107
108
  top_k=top_k,
108
109
  presence_penalty=presence_penalty,
109
110
  **other_kwargs,
110
- think=True
111
+ think=True,
111
112
  )
112
113
 
113
114
 
@@ -132,5 +133,5 @@ class AsyncLMQwenNoThink(AsyncLM):
132
133
  top_k=top_k,
133
134
  presence_penalty=presence_penalty,
134
135
  **other_kwargs,
135
- think=False
136
+ think=False,
136
137
  )
@@ -4,22 +4,23 @@
4
4
  Simplified LLM Task module for handling language model interactions with structured input/output.
5
5
  """
6
6
 
7
- from typing import Any, Dict, List, Optional, Type, Union, cast
7
+ from abc import ABC, abstractmethod
8
+ from collections.abc import Callable
9
+ from typing import Any, Dict, List, Optional, Tuple, Type, Union, cast
8
10
 
9
11
  from openai import OpenAI
10
12
  from openai.types.chat import ChatCompletionMessageParam
11
- from pydantic import BaseModel
12
- from pydantic import create_model
13
- from typing import Callable, Tuple
14
- from abc import ABC, abstractmethod
13
+ from pydantic import BaseModel, create_model
14
+
15
15
 
16
16
  # Type aliases for better readability
17
- Messages = List[ChatCompletionMessageParam]
17
+ Messages = list[ChatCompletionMessageParam]
18
18
 
19
19
  import json
20
- from typing import Type, TypeVar
20
+ from typing import TypeVar
21
+
21
22
 
22
- B = TypeVar("B", bound="BasePromptBuilder")
23
+ B = TypeVar('B', bound='BasePromptBuilder')
23
24
 
24
25
 
25
26
  class BasePromptBuilder(BaseModel, ABC):
@@ -40,7 +41,7 @@ class BasePromptBuilder(BaseModel, ABC):
40
41
  raise NotImplementedError
41
42
 
42
43
  @abstractmethod
43
- def get_io_keys(self) -> Tuple[List[str], List[Union[str, Tuple[str, str]]]]:
44
+ def get_io_keys(self) -> tuple[list[str], list[str | tuple[str, str]]]:
44
45
  """
45
46
  Return (input_keys, output_keys).
46
47
  Each key must match a field of the subclass.
@@ -54,8 +55,10 @@ class BasePromptBuilder(BaseModel, ABC):
54
55
  # ------------------------------------------------------------------ #
55
56
  # Auto-build models from keys
56
57
  # ------------------------------------------------------------------ #
57
- def _build_model_from_keys(self, keys: Union[List[str], List[Union[str, Tuple[str, str]]]], name: str) -> Type[BaseModel]:
58
- fields: Dict[str, tuple[Any, Any]] = {}
58
+ def _build_model_from_keys(
59
+ self, keys: list[str] | list[str | tuple[str, str]], name: str
60
+ ) -> type[BaseModel]:
61
+ fields: dict[str, tuple[Any, Any]] = {}
59
62
  for key in keys:
60
63
  if isinstance(key, tuple):
61
64
  # Handle tuple: (original_field_name, renamed_field_name)
@@ -63,7 +66,11 @@ class BasePromptBuilder(BaseModel, ABC):
63
66
  if original_key not in self.model_fields:
64
67
  raise ValueError(f"Key '{original_key}' not found in model fields")
65
68
  field_info = self.model_fields[original_key]
66
- field_type = field_info.annotation if field_info.annotation is not None else (Any,)
69
+ field_type = (
70
+ field_info.annotation
71
+ if field_info.annotation is not None
72
+ else (Any,)
73
+ )
67
74
  default = field_info.default if field_info.default is not None else ...
68
75
  fields[renamed_key] = (field_type, default)
69
76
  else:
@@ -71,28 +78,37 @@ class BasePromptBuilder(BaseModel, ABC):
71
78
  if key not in self.model_fields:
72
79
  raise ValueError(f"Key '{key}' not found in model fields")
73
80
  field_info = self.model_fields[key]
74
- field_type = field_info.annotation if field_info.annotation is not None else (Any,)
81
+ field_type = (
82
+ field_info.annotation
83
+ if field_info.annotation is not None
84
+ else (Any,)
85
+ )
75
86
  default = field_info.default if field_info.default is not None else ...
76
87
  fields[key] = (field_type, default)
77
88
  return create_model(name, **fields) # type: ignore
78
89
 
79
- def get_input_model(self) -> Type[BaseModel]:
90
+ def get_input_model(self) -> type[BaseModel]:
80
91
  input_keys, _ = self.get_io_keys()
81
- return self._build_model_from_keys(input_keys, "InputModel")
92
+ return self._build_model_from_keys(input_keys, 'InputModel')
82
93
 
83
- def get_output_model(self) -> Type[BaseModel]:
94
+ def get_output_model(self) -> type[BaseModel]:
84
95
  _, output_keys = self.get_io_keys()
85
- return self._build_model_from_keys(output_keys, "OutputModel")
96
+ return self._build_model_from_keys(output_keys, 'OutputModel')
86
97
 
87
98
  # ------------------------------------------------------------------ #
88
99
  # Dump methods (JSON)
89
100
  # ------------------------------------------------------------------ #
90
- def _dump_json_unique(self, schema_model: Type[BaseModel], keys: Union[List[str], List[Union[str, Tuple[str, str]]]], **kwargs) -> str:
101
+ def _dump_json_unique(
102
+ self,
103
+ schema_model: type[BaseModel],
104
+ keys: list[str] | list[str | tuple[str, str]],
105
+ **kwargs,
106
+ ) -> str:
91
107
  allowed = list(schema_model.model_fields.keys())
92
108
  seen = set()
93
109
  unique_keys = [k for k in allowed if not (k in seen or seen.add(k))]
94
110
  data = self.model_dump()
95
-
111
+
96
112
  # Handle key mapping for renamed fields
97
113
  filtered = {}
98
114
  for key in keys:
@@ -103,7 +119,7 @@ class BasePromptBuilder(BaseModel, ABC):
103
119
  else:
104
120
  if key in data and key in unique_keys:
105
121
  filtered[key] = data[key]
106
-
122
+
107
123
  return schema_model(**filtered).model_dump_json(**kwargs)
108
124
 
109
125
  def model_dump_json_input(self, **kwargs) -> str:
@@ -117,68 +133,70 @@ class BasePromptBuilder(BaseModel, ABC):
117
133
  # ------------------------------------------------------------------ #
118
134
  # Markdown helpers
119
135
  # ------------------------------------------------------------------ #
120
- def _to_markdown(self, obj: Any, level: int = 1, title: Optional[str] = None) -> str:
136
+ def _to_markdown(self, obj: Any, level: int = 1, title: str | None = None) -> str:
121
137
  """
122
138
  Recursively convert dict/list/primitive into clean, generic Markdown.
123
139
  """
124
- md: List[str] = []
125
-
140
+ md: list[str] = []
141
+
126
142
  # Format title if provided
127
143
  if title is not None:
128
144
  formatted_title = title.replace('_', ' ').title()
129
145
  if level <= 2:
130
- md.append(f"{'#' * level} {formatted_title}")
146
+ md.append(f'{"#" * level} {formatted_title}')
131
147
  else:
132
- md.append(f"**{formatted_title}:**")
133
-
148
+ md.append(f'**{formatted_title}:**')
149
+
134
150
  if isinstance(obj, dict):
135
151
  if not obj: # Empty dict
136
- md.append("None")
152
+ md.append('None')
137
153
  else:
138
154
  for k, v in obj.items():
139
155
  if isinstance(v, (str, int, float, bool)) and len(str(v)) < 100:
140
156
  # Short values inline
141
157
  key_name = k.replace('_', ' ').title()
142
158
  if level <= 2:
143
- md.append(f"**{key_name}:** {v}")
159
+ md.append(f'**{key_name}:** {v}')
144
160
  else:
145
- md.append(f"- **{key_name}:** {v}")
161
+ md.append(f'- **{key_name}:** {v}')
146
162
  else:
147
163
  # Complex values get recursive handling
148
164
  md.append(self._to_markdown(v, level=level + 1, title=k))
149
165
  elif isinstance(obj, list):
150
166
  if not obj: # Empty list
151
- md.append("None")
167
+ md.append('None')
152
168
  elif all(isinstance(i, dict) for i in obj):
153
169
  # List of objects
154
170
  for i, item in enumerate(obj, 1):
155
171
  if level <= 2:
156
- md.append(f"### {title or 'Item'} {i}")
172
+ md.append(f'### {title or "Item"} {i}')
157
173
  else:
158
- md.append(f"**{title or 'Item'} {i}:**")
174
+ md.append(f'**{title or "Item"} {i}:**')
159
175
  # Process dict items inline for cleaner output
160
176
  for k, v in item.items():
161
177
  key_name = k.replace('_', ' ').title()
162
- md.append(f"- **{key_name}:** {v}")
178
+ md.append(f'- **{key_name}:** {v}')
163
179
  if i < len(obj): # Add spacing between items
164
- md.append("")
180
+ md.append('')
165
181
  else:
166
182
  # Simple list
167
183
  for item in obj:
168
- md.append(f"- {item}")
184
+ md.append(f'- {item}')
169
185
  else:
170
186
  # Primitive value
171
- value_str = str(obj) if obj is not None else "None"
187
+ value_str = str(obj) if obj is not None else 'None'
172
188
  if title is None:
173
189
  md.append(value_str)
174
190
  else:
175
191
  md.append(value_str)
176
192
 
177
- return "\n".join(md)
193
+ return '\n'.join(md)
178
194
 
179
- def _dump_markdown_unique(self, keys: Union[List[str], List[Union[str, Tuple[str, str]]]]) -> str:
195
+ def _dump_markdown_unique(
196
+ self, keys: list[str] | list[str | tuple[str, str]]
197
+ ) -> str:
180
198
  data = self.model_dump()
181
- filtered: Dict[str, Any] = {}
199
+ filtered: dict[str, Any] = {}
182
200
  for key in keys:
183
201
  if isinstance(key, tuple):
184
202
  original_key, renamed_key = key
@@ -187,7 +205,7 @@ class BasePromptBuilder(BaseModel, ABC):
187
205
  else:
188
206
  if key in data:
189
207
  filtered[key] = data[key]
190
-
208
+
191
209
  # Generate markdown without top-level headers to avoid duplication
192
210
  parts = []
193
211
  for key, value in filtered.items():
@@ -195,10 +213,10 @@ class BasePromptBuilder(BaseModel, ABC):
195
213
  continue
196
214
  formatted_key = key.replace('_', ' ').title()
197
215
  if isinstance(value, (str, int, float, bool)) and len(str(value)) < 200:
198
- parts.append(f"**{formatted_key}:** {value}")
216
+ parts.append(f'**{formatted_key}:** {value}')
199
217
  else:
200
218
  parts.append(self._to_markdown(value, level=2, title=key))
201
-
219
+
202
220
  return '\n'.join(parts)
203
221
 
204
222
  def model_dump_markdown_input(self) -> str:
@@ -212,42 +230,46 @@ class BasePromptBuilder(BaseModel, ABC):
212
230
  # ------------------------------------------------------------------ #
213
231
  # Training & preview (JSON or Markdown)
214
232
  # ------------------------------------------------------------------ #
215
- def build_training_data(self, format: str = "json", indent=None) -> dict[str, Any]:
233
+ def build_training_data(self, format: str = 'json', indent=None) -> dict[str, Any]:
216
234
  """
217
235
  Build training data in either JSON (dict for OpenAI-style messages)
218
236
  or Markdown (clean format without role prefixes).
219
237
  """
220
- if format == "json":
238
+ if format == 'json':
221
239
  return {
222
- "messages": [
223
- {"role": "system", "content": self.get_instruction()},
224
- {"role": "user", "content": self.model_dump_json_input(indent=indent)},
225
- {"role": "assistant", "content": self.model_dump_json_output(indent=indent)},
240
+ 'messages': [
241
+ {'role': 'system', 'content': self.get_instruction()},
242
+ {
243
+ 'role': 'user',
244
+ 'content': self.model_dump_json_input(indent=indent),
245
+ },
246
+ {
247
+ 'role': 'assistant',
248
+ 'content': self.model_dump_json_output(indent=indent),
249
+ },
226
250
  ]
227
251
  }
228
- elif format == "markdown":
252
+ if format == 'markdown':
229
253
  system_content = self.get_instruction()
230
-
254
+
231
255
  return {
232
256
  'messages': [
233
- {"role": "system", "content": system_content},
234
- {"role": "user", "content": self.model_dump_markdown_input()},
235
- {"role": "assistant", "content": self.model_dump_markdown_output()},
257
+ {'role': 'system', 'content': system_content},
258
+ {'role': 'user', 'content': self.model_dump_markdown_input()},
259
+ {'role': 'assistant', 'content': self.model_dump_markdown_output()},
236
260
  ]
237
261
  }
238
262
  raise ValueError("format must be either 'json' or 'markdown'")
239
263
 
240
264
  def __str__(self) -> str:
241
265
  # Return clean format without explicit role prefixes
242
- training_data = self.build_training_data(format="markdown")
266
+ training_data = self.build_training_data(format='markdown')
243
267
  messages = training_data['messages'] # type: ignore[index]
244
-
268
+
245
269
  parts = []
246
270
  for msg in messages:
247
271
  content = msg['content']
248
- if msg['role'] == 'system':
249
- parts.append(content)
250
- elif msg['role'] == 'user':
272
+ if msg['role'] == 'system' or msg['role'] == 'user':
251
273
  parts.append(content)
252
274
  elif msg['role'] == 'assistant':
253
275
  # Get output keys to determine the main output field name
@@ -256,33 +278,34 @@ class BasePromptBuilder(BaseModel, ABC):
256
278
  if isinstance(main_output, tuple):
257
279
  main_output = main_output[1] # Use renamed key
258
280
  title = main_output.replace('_', ' ').title()
259
- parts.append(f"## {title}\n{content}")
260
-
281
+ parts.append(f'## {title}\n{content}')
282
+
261
283
  return '\n\n'.join(parts)
262
284
 
263
285
  @classmethod
264
- def from_messages(cls: Type[B], messages: list[dict]) -> B:
286
+ def from_messages(cls: type[B], messages: list[dict]) -> B:
265
287
  """
266
288
  Reconstruct a prompt builder instance from OpenAI-style messages.
267
289
  """
268
- user_msg = next((m for m in messages if m.get("role") == "user"), None)
269
- assistant_msg = next((m for m in messages if m.get("role") == "assistant"), None)
290
+ user_msg = next((m for m in messages if m.get('role') == 'user'), None)
291
+ assistant_msg = next(
292
+ (m for m in messages if m.get('role') == 'assistant'), None
293
+ )
270
294
 
271
295
  if user_msg is None:
272
- raise ValueError("No user message found")
296
+ raise ValueError('No user message found')
273
297
  if assistant_msg is None:
274
- raise ValueError("No assistant message found")
298
+ raise ValueError('No assistant message found')
275
299
 
276
300
  try:
277
- user_data = json.loads(user_msg["content"]) # type: ignore[index]
301
+ user_data = json.loads(user_msg['content']) # type: ignore[index]
278
302
  except Exception as e:
279
- raise ValueError(f"Invalid user JSON content: {e}")
303
+ raise ValueError(f'Invalid user JSON content: {e}') from e
280
304
 
281
305
  try:
282
- assistant_data = json.loads(assistant_msg["content"]) # type: ignore[index]
306
+ assistant_data = json.loads(assistant_msg['content']) # type: ignore[index]
283
307
  except Exception as e:
284
- raise ValueError(f"Invalid assistant JSON content: {e}")
308
+ raise ValueError(f'Invalid assistant JSON content: {e}') from e
285
309
 
286
310
  combined_data = {**user_data, **assistant_data}
287
311
  return cast(B, cls(**combined_data))
288
-