speedy-utils 1.1.21__py3-none-any.whl → 1.1.23__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llm_utils/__init__.py +22 -3
- llm_utils/lm/__init__.py +10 -0
- llm_utils/lm/llm_as_a_judge.py +390 -0
- llm_utils/lm/llm_task.py +172 -251
- llm_utils/lm/signature.py +282 -0
- llm_utils/lm/utils.py +332 -110
- speedy_utils/multi_worker/process.py +125 -25
- speedy_utils/multi_worker/thread.py +341 -226
- {speedy_utils-1.1.21.dist-info → speedy_utils-1.1.23.dist-info}/METADATA +1 -1
- {speedy_utils-1.1.21.dist-info → speedy_utils-1.1.23.dist-info}/RECORD +12 -11
- llm_utils/lm/lm.py +0 -207
- {speedy_utils-1.1.21.dist-info → speedy_utils-1.1.23.dist-info}/WHEEL +0 -0
- {speedy_utils-1.1.21.dist-info → speedy_utils-1.1.23.dist-info}/entry_points.txt +0 -0
|
@@ -0,0 +1,282 @@
|
|
|
1
|
+
"""
|
|
2
|
+
DSPy-like signature system for structured LLM interactions.
|
|
3
|
+
|
|
4
|
+
This module provides a declarative way to define LLM input/output schemas
|
|
5
|
+
with field descriptions and type annotations.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from typing import Any, Dict, List, Type, Union, get_type_hints, Annotated, get_origin, get_args
|
|
9
|
+
from pydantic import BaseModel, Field
|
|
10
|
+
import inspect
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class InputField:
|
|
14
|
+
"""Represents an input field in a signature."""
|
|
15
|
+
|
|
16
|
+
def __init__(self, desc: str = "", **kwargs):
|
|
17
|
+
self.desc = desc
|
|
18
|
+
self.kwargs = kwargs
|
|
19
|
+
|
|
20
|
+
def __class_getitem__(cls, item):
|
|
21
|
+
"""Support for InputField[type] syntax."""
|
|
22
|
+
return item
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class OutputField:
|
|
26
|
+
"""Represents an output field in a signature."""
|
|
27
|
+
|
|
28
|
+
def __init__(self, desc: str = "", **kwargs):
|
|
29
|
+
self.desc = desc
|
|
30
|
+
self.kwargs = kwargs
|
|
31
|
+
|
|
32
|
+
def __class_getitem__(cls, item):
|
|
33
|
+
"""Support for OutputField[type] syntax."""
|
|
34
|
+
return item
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
# Type aliases for cleaner syntax
|
|
38
|
+
def Input(desc: str = "", **kwargs) -> Any:
|
|
39
|
+
"""Create an input field descriptor."""
|
|
40
|
+
return InputField(desc=desc, **kwargs)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def Output(desc: str = "", **kwargs) -> Any:
|
|
44
|
+
"""Create an output field descriptor."""
|
|
45
|
+
return OutputField(desc=desc, **kwargs)
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
class SignatureMeta(type):
|
|
49
|
+
"""Metaclass for Signature that processes field annotations."""
|
|
50
|
+
|
|
51
|
+
def __new__(cls, name, bases, namespace, **kwargs):
|
|
52
|
+
# Get type hints for this class
|
|
53
|
+
annotations = namespace.get('__annotations__', {})
|
|
54
|
+
|
|
55
|
+
# Store field information
|
|
56
|
+
input_fields = {}
|
|
57
|
+
output_fields = {}
|
|
58
|
+
|
|
59
|
+
for field_name, field_type in annotations.items():
|
|
60
|
+
field_value = namespace.get(field_name)
|
|
61
|
+
field_desc = None
|
|
62
|
+
|
|
63
|
+
# Handle Annotated[Type, Field(...)] syntax using get_origin/get_args
|
|
64
|
+
if get_origin(field_type) is Annotated:
|
|
65
|
+
# Extract args from Annotated type
|
|
66
|
+
args = get_args(field_type)
|
|
67
|
+
if args:
|
|
68
|
+
# First arg is the actual type
|
|
69
|
+
field_type = args[0]
|
|
70
|
+
# Look for InputField or OutputField in the metadata
|
|
71
|
+
for metadata in args[1:]:
|
|
72
|
+
if isinstance(metadata, (InputField, OutputField)):
|
|
73
|
+
field_desc = metadata
|
|
74
|
+
break
|
|
75
|
+
|
|
76
|
+
# Handle old syntax with direct assignment
|
|
77
|
+
if field_desc is None and isinstance(field_value, (InputField, OutputField)):
|
|
78
|
+
field_desc = field_value
|
|
79
|
+
|
|
80
|
+
# Store field information
|
|
81
|
+
if isinstance(field_desc, InputField):
|
|
82
|
+
input_fields[field_name] = {
|
|
83
|
+
'type': field_type,
|
|
84
|
+
'desc': field_desc.desc,
|
|
85
|
+
**field_desc.kwargs
|
|
86
|
+
}
|
|
87
|
+
elif isinstance(field_desc, OutputField):
|
|
88
|
+
output_fields[field_name] = {
|
|
89
|
+
'type': field_type,
|
|
90
|
+
'desc': field_desc.desc,
|
|
91
|
+
**field_desc.kwargs
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
# Store in class attributes
|
|
95
|
+
namespace['_input_fields'] = input_fields
|
|
96
|
+
namespace['_output_fields'] = output_fields
|
|
97
|
+
|
|
98
|
+
return super().__new__(cls, name, bases, namespace)
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
class Signature(metaclass=SignatureMeta):
|
|
102
|
+
"""Base class for defining LLM signatures with input and output fields."""
|
|
103
|
+
|
|
104
|
+
_input_fields: Dict[str, Dict[str, Any]] = {}
|
|
105
|
+
_output_fields: Dict[str, Dict[str, Any]] = {}
|
|
106
|
+
|
|
107
|
+
def __init__(self, **kwargs):
|
|
108
|
+
"""Initialize signature with field values."""
|
|
109
|
+
for field_name, value in kwargs.items():
|
|
110
|
+
setattr(self, field_name, value)
|
|
111
|
+
|
|
112
|
+
@classmethod
|
|
113
|
+
def get_instruction(cls) -> str:
|
|
114
|
+
"""Generate instruction text from docstring and field descriptions."""
|
|
115
|
+
instruction = cls.__doc__ or "Complete the following task."
|
|
116
|
+
instruction = instruction.strip()
|
|
117
|
+
|
|
118
|
+
# Add input field descriptions
|
|
119
|
+
if cls._input_fields:
|
|
120
|
+
instruction += "\n\n**Input Fields:**\n"
|
|
121
|
+
for field_name, field_info in cls._input_fields.items():
|
|
122
|
+
desc = field_info.get('desc', '')
|
|
123
|
+
field_type = field_info['type']
|
|
124
|
+
type_str = getattr(field_type, '__name__', str(field_type))
|
|
125
|
+
instruction += f"- {field_name} ({type_str}): {desc}\n"
|
|
126
|
+
|
|
127
|
+
# Add output field descriptions
|
|
128
|
+
if cls._output_fields:
|
|
129
|
+
instruction += "\n**Output Fields:**\n"
|
|
130
|
+
for field_name, field_info in cls._output_fields.items():
|
|
131
|
+
desc = field_info.get('desc', '')
|
|
132
|
+
field_type = field_info['type']
|
|
133
|
+
type_str = getattr(field_type, '__name__', str(field_type))
|
|
134
|
+
instruction += f"- {field_name} ({type_str}): {desc}\n"
|
|
135
|
+
|
|
136
|
+
return instruction
|
|
137
|
+
|
|
138
|
+
@classmethod
|
|
139
|
+
def get_input_model(cls) -> Union[Type[BaseModel], type[str]]:
|
|
140
|
+
"""Generate Pydantic input model from input fields."""
|
|
141
|
+
if not cls._input_fields:
|
|
142
|
+
return str
|
|
143
|
+
|
|
144
|
+
fields = {}
|
|
145
|
+
annotations = {}
|
|
146
|
+
|
|
147
|
+
for field_name, field_info in cls._input_fields.items():
|
|
148
|
+
field_type = field_info['type']
|
|
149
|
+
desc = field_info.get('desc', '')
|
|
150
|
+
|
|
151
|
+
# Create Pydantic field
|
|
152
|
+
field_kwargs = {k: v for k, v in field_info.items()
|
|
153
|
+
if k not in ['type', 'desc']}
|
|
154
|
+
if desc:
|
|
155
|
+
field_kwargs['description'] = desc
|
|
156
|
+
|
|
157
|
+
fields[field_name] = Field(**field_kwargs) if field_kwargs else Field()
|
|
158
|
+
annotations[field_name] = field_type
|
|
159
|
+
|
|
160
|
+
# Create dynamic Pydantic model
|
|
161
|
+
input_model = type(
|
|
162
|
+
f"{cls.__name__}Input",
|
|
163
|
+
(BaseModel,),
|
|
164
|
+
{
|
|
165
|
+
'__annotations__': annotations,
|
|
166
|
+
**fields
|
|
167
|
+
}
|
|
168
|
+
)
|
|
169
|
+
|
|
170
|
+
return input_model
|
|
171
|
+
|
|
172
|
+
@classmethod
|
|
173
|
+
def get_output_model(cls) -> Union[Type[BaseModel], type[str]]:
|
|
174
|
+
"""Generate Pydantic output model from output fields."""
|
|
175
|
+
if not cls._output_fields:
|
|
176
|
+
return str
|
|
177
|
+
|
|
178
|
+
fields = {}
|
|
179
|
+
annotations = {}
|
|
180
|
+
|
|
181
|
+
for field_name, field_info in cls._output_fields.items():
|
|
182
|
+
field_type = field_info['type']
|
|
183
|
+
desc = field_info.get('desc', '')
|
|
184
|
+
|
|
185
|
+
# Create Pydantic field
|
|
186
|
+
field_kwargs = {k: v for k, v in field_info.items()
|
|
187
|
+
if k not in ['type', 'desc']}
|
|
188
|
+
if desc:
|
|
189
|
+
field_kwargs['description'] = desc
|
|
190
|
+
|
|
191
|
+
fields[field_name] = Field(**field_kwargs) if field_kwargs else Field()
|
|
192
|
+
annotations[field_name] = field_type
|
|
193
|
+
|
|
194
|
+
# Create dynamic Pydantic model
|
|
195
|
+
output_model = type(
|
|
196
|
+
f"{cls.__name__}Output",
|
|
197
|
+
(BaseModel,),
|
|
198
|
+
{
|
|
199
|
+
'__annotations__': annotations,
|
|
200
|
+
**fields
|
|
201
|
+
}
|
|
202
|
+
)
|
|
203
|
+
|
|
204
|
+
return output_model
|
|
205
|
+
|
|
206
|
+
def format_input(self, **kwargs) -> str:
|
|
207
|
+
"""Format input fields as a string."""
|
|
208
|
+
input_data = {}
|
|
209
|
+
|
|
210
|
+
# Collect input field values
|
|
211
|
+
for field_name in self._input_fields:
|
|
212
|
+
if field_name in kwargs:
|
|
213
|
+
input_data[field_name] = kwargs[field_name]
|
|
214
|
+
elif hasattr(self, field_name):
|
|
215
|
+
input_data[field_name] = getattr(self, field_name)
|
|
216
|
+
|
|
217
|
+
# Format as key-value pairs
|
|
218
|
+
formatted_lines = []
|
|
219
|
+
for field_name, value in input_data.items():
|
|
220
|
+
field_info = self._input_fields[field_name]
|
|
221
|
+
desc = field_info.get('desc', '')
|
|
222
|
+
if desc:
|
|
223
|
+
formatted_lines.append(f"{field_name} ({desc}): {value}")
|
|
224
|
+
else:
|
|
225
|
+
formatted_lines.append(f"{field_name}: {value}")
|
|
226
|
+
|
|
227
|
+
return '\n'.join(formatted_lines)
|
|
228
|
+
|
|
229
|
+
|
|
230
|
+
# Export functions for easier importing
|
|
231
|
+
__all__ = ['Signature', 'InputField', 'OutputField', 'Input', 'Output']
|
|
232
|
+
|
|
233
|
+
|
|
234
|
+
# Example usage for testing
|
|
235
|
+
if __name__ == "__main__":
|
|
236
|
+
# Define a signature like DSPy - using Annotated approach
|
|
237
|
+
class FactJudge(Signature):
|
|
238
|
+
"""Judge if the answer is factually correct based on the context."""
|
|
239
|
+
|
|
240
|
+
context: Annotated[str, Input("Context for the prediction")]
|
|
241
|
+
question: Annotated[str, Input("Question to be answered")]
|
|
242
|
+
answer: Annotated[str, Input("Answer for the question")]
|
|
243
|
+
factually_correct: Annotated[bool, Output("Is the answer factually correct based on the context?")]
|
|
244
|
+
|
|
245
|
+
# Alternative syntax still works but will show type warnings
|
|
246
|
+
class FactJudgeOldSyntax(Signature):
|
|
247
|
+
"""Judge if the answer is factually correct based on the context."""
|
|
248
|
+
|
|
249
|
+
context: str = InputField(desc="Context for the prediction") # type: ignore
|
|
250
|
+
question: str = InputField(desc="Question to be answered") # type: ignore
|
|
251
|
+
answer: str = InputField(desc="Answer for the question") # type: ignore
|
|
252
|
+
factually_correct: bool = OutputField(desc="Is the answer factually correct based on the context?") # type: ignore
|
|
253
|
+
|
|
254
|
+
# Test both signatures
|
|
255
|
+
for judge_class in [FactJudge, FactJudgeOldSyntax]:
|
|
256
|
+
print(f"\n=== Testing {judge_class.__name__} ===")
|
|
257
|
+
print("Instruction:")
|
|
258
|
+
print(judge_class.get_instruction())
|
|
259
|
+
|
|
260
|
+
print("\nInput Model:")
|
|
261
|
+
input_model = judge_class.get_input_model()
|
|
262
|
+
if input_model is not str and hasattr(input_model, 'model_json_schema'):
|
|
263
|
+
print(input_model.model_json_schema()) # type: ignore
|
|
264
|
+
else:
|
|
265
|
+
print("String input model")
|
|
266
|
+
|
|
267
|
+
print("\nOutput Model:")
|
|
268
|
+
output_model = judge_class.get_output_model()
|
|
269
|
+
if output_model is not str and hasattr(output_model, 'model_json_schema'):
|
|
270
|
+
print(output_model.model_json_schema()) # type: ignore
|
|
271
|
+
else:
|
|
272
|
+
print("String output model")
|
|
273
|
+
|
|
274
|
+
# Test instance usage
|
|
275
|
+
judge = judge_class()
|
|
276
|
+
input_text = judge.format_input(
|
|
277
|
+
context="The sky is blue during daytime.",
|
|
278
|
+
question="What color is the sky?",
|
|
279
|
+
answer="Blue"
|
|
280
|
+
)
|
|
281
|
+
print("\nFormatted Input:")
|
|
282
|
+
print(input_text)
|