speedy-utils 1.0.24__tar.gz → 1.1.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {speedy_utils-1.0.24 → speedy_utils-1.1.2}/PKG-INFO +1 -1
- {speedy_utils-1.0.24 → speedy_utils-1.1.2}/pyproject.toml +1 -1
- {speedy_utils-1.0.24 → speedy_utils-1.1.2}/src/llm_utils/lm/async_lm.py +55 -48
- {speedy_utils-1.0.24 → speedy_utils-1.1.2}/README.md +0 -0
- {speedy_utils-1.0.24 → speedy_utils-1.1.2}/src/llm_utils/__init__.py +0 -0
- {speedy_utils-1.0.24 → speedy_utils-1.1.2}/src/llm_utils/chat_format/__init__.py +0 -0
- {speedy_utils-1.0.24 → speedy_utils-1.1.2}/src/llm_utils/chat_format/display.py +0 -0
- {speedy_utils-1.0.24 → speedy_utils-1.1.2}/src/llm_utils/chat_format/transform.py +0 -0
- {speedy_utils-1.0.24 → speedy_utils-1.1.2}/src/llm_utils/chat_format/utils.py +0 -0
- {speedy_utils-1.0.24 → speedy_utils-1.1.2}/src/llm_utils/group_messages.py +0 -0
- {speedy_utils-1.0.24 → speedy_utils-1.1.2}/src/llm_utils/lm/__init__.py +0 -0
- {speedy_utils-1.0.24 → speedy_utils-1.1.2}/src/llm_utils/lm/chat_html.py +0 -0
- {speedy_utils-1.0.24 → speedy_utils-1.1.2}/src/llm_utils/lm/lm_json.py +0 -0
- {speedy_utils-1.0.24 → speedy_utils-1.1.2}/src/llm_utils/lm/sync_lm.py +0 -0
- {speedy_utils-1.0.24 → speedy_utils-1.1.2}/src/llm_utils/lm/utils.py +0 -0
- {speedy_utils-1.0.24 → speedy_utils-1.1.2}/src/llm_utils/scripts/README.md +0 -0
- {speedy_utils-1.0.24 → speedy_utils-1.1.2}/src/llm_utils/scripts/vllm_load_balancer.py +0 -0
- {speedy_utils-1.0.24 → speedy_utils-1.1.2}/src/llm_utils/scripts/vllm_serve.py +0 -0
- {speedy_utils-1.0.24 → speedy_utils-1.1.2}/src/speedy_utils/__init__.py +0 -0
- {speedy_utils-1.0.24 → speedy_utils-1.1.2}/src/speedy_utils/all.py +0 -0
- {speedy_utils-1.0.24 → speedy_utils-1.1.2}/src/speedy_utils/common/__init__.py +0 -0
- {speedy_utils-1.0.24 → speedy_utils-1.1.2}/src/speedy_utils/common/clock.py +0 -0
- {speedy_utils-1.0.24 → speedy_utils-1.1.2}/src/speedy_utils/common/function_decorator.py +0 -0
- {speedy_utils-1.0.24 → speedy_utils-1.1.2}/src/speedy_utils/common/logger.py +0 -0
- {speedy_utils-1.0.24 → speedy_utils-1.1.2}/src/speedy_utils/common/notebook_utils.py +0 -0
- {speedy_utils-1.0.24 → speedy_utils-1.1.2}/src/speedy_utils/common/report_manager.py +0 -0
- {speedy_utils-1.0.24 → speedy_utils-1.1.2}/src/speedy_utils/common/utils_cache.py +0 -0
- {speedy_utils-1.0.24 → speedy_utils-1.1.2}/src/speedy_utils/common/utils_io.py +0 -0
- {speedy_utils-1.0.24 → speedy_utils-1.1.2}/src/speedy_utils/common/utils_misc.py +0 -0
- {speedy_utils-1.0.24 → speedy_utils-1.1.2}/src/speedy_utils/common/utils_print.py +0 -0
- {speedy_utils-1.0.24 → speedy_utils-1.1.2}/src/speedy_utils/multi_worker/__init__.py +0 -0
- {speedy_utils-1.0.24 → speedy_utils-1.1.2}/src/speedy_utils/multi_worker/process.py +0 -0
- {speedy_utils-1.0.24 → speedy_utils-1.1.2}/src/speedy_utils/multi_worker/thread.py +0 -0
- {speedy_utils-1.0.24 → speedy_utils-1.1.2}/src/speedy_utils/scripts/__init__.py +0 -0
- {speedy_utils-1.0.24 → speedy_utils-1.1.2}/src/speedy_utils/scripts/mpython.py +0 -0
- {speedy_utils-1.0.24 → speedy_utils-1.1.2}/src/speedy_utils/scripts/openapi_client_codegen.py +0 -0
|
@@ -78,7 +78,7 @@ import hashlib
|
|
|
78
78
|
import json
|
|
79
79
|
import os
|
|
80
80
|
from abc import ABC
|
|
81
|
-
from functools import lru_cache
|
|
81
|
+
from functools import cache, lru_cache
|
|
82
82
|
from typing import (
|
|
83
83
|
Any,
|
|
84
84
|
Dict,
|
|
@@ -93,7 +93,7 @@ from typing import (
|
|
|
93
93
|
cast,
|
|
94
94
|
overload,
|
|
95
95
|
)
|
|
96
|
-
|
|
96
|
+
from typing_extensions import TypedDict
|
|
97
97
|
from httpx import URL
|
|
98
98
|
from loguru import logger
|
|
99
99
|
from numpy import isin
|
|
@@ -146,6 +146,15 @@ def _yellow(t):
|
|
|
146
146
|
return _color(33, t)
|
|
147
147
|
|
|
148
148
|
|
|
149
|
+
|
|
150
|
+
TParsed = TypeVar('TParsed', bound=BaseModel)
|
|
151
|
+
|
|
152
|
+
class ParsedOutput(TypedDict, Generic[TParsed]):
|
|
153
|
+
messages: List
|
|
154
|
+
completion: Any
|
|
155
|
+
parsed: TParsed
|
|
156
|
+
|
|
157
|
+
|
|
149
158
|
class AsyncLM:
|
|
150
159
|
"""Unified **async** language‑model wrapper with optional JSON parsing."""
|
|
151
160
|
|
|
@@ -454,7 +463,7 @@ class AsyncLM:
|
|
|
454
463
|
# ------------------------------------------------------------------ #
|
|
455
464
|
async def parse(
|
|
456
465
|
self,
|
|
457
|
-
response_model: Type[
|
|
466
|
+
response_model: Type[TParsed],
|
|
458
467
|
instruction: Optional[str] = None,
|
|
459
468
|
prompt: Optional[str] = None,
|
|
460
469
|
messages: Optional[RawMsgs] = None,
|
|
@@ -462,11 +471,9 @@ class AsyncLM:
|
|
|
462
471
|
add_json_schema_to_instruction: bool = False,
|
|
463
472
|
temperature: Optional[float] = None,
|
|
464
473
|
max_tokens: Optional[int] = None,
|
|
465
|
-
return_openai_response: bool = False,
|
|
466
474
|
cache: Optional[bool] = True,
|
|
467
|
-
return_messages: bool = False,
|
|
468
475
|
**kwargs,
|
|
469
|
-
):
|
|
476
|
+
) -> ParsedOutput[TParsed]:
|
|
470
477
|
"""Parse response using guided JSON generation."""
|
|
471
478
|
if messages is None:
|
|
472
479
|
assert instruction is not None, "Instruction must be provided."
|
|
@@ -509,6 +516,7 @@ class AsyncLM:
|
|
|
509
516
|
|
|
510
517
|
use_cache = self.do_cache if cache is None else cache
|
|
511
518
|
cache_key = None
|
|
519
|
+
completion = None
|
|
512
520
|
if use_cache:
|
|
513
521
|
cache_data = {
|
|
514
522
|
"messages": messages,
|
|
@@ -517,37 +525,29 @@ class AsyncLM:
|
|
|
517
525
|
"response_format": response_model.__name__,
|
|
518
526
|
}
|
|
519
527
|
cache_key = self._cache_key(cache_data, {}, response_model)
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
|
|
528
|
+
completion = self._load_cache(cache_key) # dict
|
|
529
|
+
if not completion:
|
|
530
|
+
completion = await self.client.chat.completions.create(
|
|
531
|
+
model=self.model, # type: ignore
|
|
532
|
+
messages=messages, # type: ignore
|
|
533
|
+
extra_body={"guided_json": json_schema},
|
|
534
|
+
**model_kwargs,
|
|
535
|
+
)
|
|
536
|
+
completion = completion.model_dump()
|
|
537
|
+
if cache_key:
|
|
538
|
+
self._dump_cache(cache_key, completion)
|
|
539
|
+
assert isinstance(completion, dict), (
|
|
540
|
+
"Completion must be a dictionary with OpenAI response format."
|
|
532
541
|
)
|
|
533
|
-
|
|
534
|
-
if cache_key:
|
|
535
|
-
self._dump_cache(cache_key, completion)
|
|
536
|
-
|
|
537
542
|
self.last_log = [prompt, messages, completion]
|
|
538
543
|
|
|
539
|
-
output = self._parse_complete_output(completion, response_model)
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
"messages": full_messages,
|
|
547
|
-
"completion": completion,
|
|
548
|
-
"parsed": output,
|
|
549
|
-
}
|
|
550
|
-
return output
|
|
544
|
+
output = cast(TParsed, self._parse_complete_output(completion, response_model))
|
|
545
|
+
full_messages = messages + [completion]
|
|
546
|
+
return ParsedOutput(
|
|
547
|
+
messages=full_messages,
|
|
548
|
+
completion=completion,
|
|
549
|
+
parsed=output,
|
|
550
|
+
)
|
|
551
551
|
|
|
552
552
|
def _parse_complete_output(
|
|
553
553
|
self, completion: Any, response_model: Type[BaseModel]
|
|
@@ -894,20 +894,20 @@ class AsyncLLMTask(ABC, Generic[InputModelType, OutputModelType]):
|
|
|
894
894
|
"""
|
|
895
895
|
|
|
896
896
|
lm: "AsyncLM"
|
|
897
|
-
InputModel:
|
|
898
|
-
OutputModel:
|
|
897
|
+
InputModel: InputModelType
|
|
898
|
+
OutputModel: OutputModelType
|
|
899
899
|
|
|
900
900
|
temperature: float = 0.6
|
|
901
901
|
think: bool = False
|
|
902
902
|
add_json_schema: bool = False
|
|
903
|
+
cache: bool = False
|
|
903
904
|
|
|
904
905
|
async def __call__(
|
|
905
906
|
self,
|
|
906
907
|
data: BaseModel | dict,
|
|
907
908
|
temperature: float = 0.1,
|
|
908
909
|
cache: bool = False,
|
|
909
|
-
|
|
910
|
-
) -> OutputModelType | tuple[OutputModelType, List[Dict[str, Any]]]:
|
|
910
|
+
) -> tuple[OutputModelType, List[Dict[str, Any]]]:
|
|
911
911
|
# Get the input and output model types from the generic parameters
|
|
912
912
|
type_args = getattr(self.__class__, "__orig_bases__", None)
|
|
913
913
|
if (
|
|
@@ -930,7 +930,17 @@ class AsyncLLMTask(ABC, Generic[InputModelType, OutputModelType]):
|
|
|
930
930
|
input_model = self.InputModel
|
|
931
931
|
output_model = self.OutputModel
|
|
932
932
|
|
|
933
|
-
|
|
933
|
+
# Ensure input_model is a class before calling
|
|
934
|
+
if isinstance(data, BaseModel):
|
|
935
|
+
item = data
|
|
936
|
+
elif isinstance(input_model, type) and issubclass(input_model, BaseModel):
|
|
937
|
+
item = input_model(**data)
|
|
938
|
+
else:
|
|
939
|
+
raise TypeError("InputModel must be a subclass of BaseModel")
|
|
940
|
+
|
|
941
|
+
assert isinstance(output_model, type) and issubclass(output_model, BaseModel), (
|
|
942
|
+
"OutputModel must be a subclass of BaseModel"
|
|
943
|
+
)
|
|
934
944
|
|
|
935
945
|
result = await self.lm.parse(
|
|
936
946
|
prompt=item.model_dump_json(),
|
|
@@ -939,16 +949,13 @@ class AsyncLLMTask(ABC, Generic[InputModelType, OutputModelType]):
|
|
|
939
949
|
temperature=temperature or self.temperature,
|
|
940
950
|
think=self.think,
|
|
941
951
|
add_json_schema_to_instruction=self.add_json_schema,
|
|
942
|
-
cache=cache,
|
|
943
|
-
return_messages=True,
|
|
952
|
+
cache=self.cache or cache,
|
|
944
953
|
)
|
|
945
954
|
|
|
946
|
-
|
|
947
|
-
|
|
948
|
-
|
|
949
|
-
|
|
950
|
-
)
|
|
951
|
-
return cast(OutputModelType, result)
|
|
955
|
+
return (
|
|
956
|
+
cast(OutputModelType, result["parsed"]), # type: ignore
|
|
957
|
+
cast(List[dict], result["messages"]), # type: ignore
|
|
958
|
+
)
|
|
952
959
|
|
|
953
960
|
def generate_training_data(
|
|
954
961
|
self, input_dict: Dict[str, Any], output: Dict[str, Any]
|
|
@@ -962,4 +969,4 @@ class AsyncLLMTask(ABC, Generic[InputModelType, OutputModelType]):
|
|
|
962
969
|
)
|
|
963
970
|
return {"messages": messages}
|
|
964
971
|
|
|
965
|
-
|
|
972
|
+
arun = __call__ # alias for compatibility with other LLMTask implementations
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{speedy_utils-1.0.24 → speedy_utils-1.1.2}/src/speedy_utils/scripts/openapi_client_codegen.py
RENAMED
|
File without changes
|