speedy-utils 1.1.34__py3-none-any.whl → 1.1.35__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llm_utils/lm/__init__.py +17 -15
- llm_utils/lm/llm.py +2 -0
- llm_utils/lm/mixins.py +74 -0
- {speedy_utils-1.1.34.dist-info → speedy_utils-1.1.35.dist-info}/METADATA +1 -1
- {speedy_utils-1.1.34.dist-info → speedy_utils-1.1.35.dist-info}/RECORD +7 -7
- {speedy_utils-1.1.34.dist-info → speedy_utils-1.1.35.dist-info}/WHEEL +0 -0
- {speedy_utils-1.1.34.dist-info → speedy_utils-1.1.35.dist-info}/entry_points.txt +0 -0
llm_utils/lm/__init__.py
CHANGED
|
@@ -7,6 +7,7 @@ from .lm_base import LMBase, get_model_name
|
|
|
7
7
|
from .mixins import (
|
|
8
8
|
ModelUtilsMixin,
|
|
9
9
|
TemperatureRangeMixin,
|
|
10
|
+
TokenizationMixin,
|
|
10
11
|
TwoStepPydanticMixin,
|
|
11
12
|
VLLMMixin,
|
|
12
13
|
)
|
|
@@ -14,19 +15,20 @@ from .signature import Input, InputField, Output, OutputField, Signature
|
|
|
14
15
|
|
|
15
16
|
|
|
16
17
|
__all__ = [
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
18
|
+
'LMBase',
|
|
19
|
+
'LLM',
|
|
20
|
+
'AsyncLM',
|
|
21
|
+
'AsyncLLMTask',
|
|
22
|
+
'BasePromptBuilder',
|
|
23
|
+
'LLMSignature',
|
|
24
|
+
'Signature',
|
|
25
|
+
'InputField',
|
|
26
|
+
'OutputField',
|
|
27
|
+
'Input',
|
|
28
|
+
'Output',
|
|
29
|
+
'TemperatureRangeMixin',
|
|
30
|
+
'TwoStepPydanticMixin',
|
|
31
|
+
'VLLMMixin',
|
|
32
|
+
'ModelUtilsMixin',
|
|
33
|
+
'TokenizationMixin',
|
|
32
34
|
]
|
llm_utils/lm/llm.py
CHANGED
|
@@ -20,6 +20,7 @@ from .base_prompt_builder import BasePromptBuilder
|
|
|
20
20
|
from .mixins import (
|
|
21
21
|
ModelUtilsMixin,
|
|
22
22
|
TemperatureRangeMixin,
|
|
23
|
+
TokenizationMixin,
|
|
23
24
|
TwoStepPydanticMixin,
|
|
24
25
|
VLLMMixin,
|
|
25
26
|
)
|
|
@@ -47,6 +48,7 @@ class LLM(
|
|
|
47
48
|
TwoStepPydanticMixin,
|
|
48
49
|
VLLMMixin,
|
|
49
50
|
ModelUtilsMixin,
|
|
51
|
+
TokenizationMixin,
|
|
50
52
|
):
|
|
51
53
|
"""LLM task with structured input/output handling."""
|
|
52
54
|
|
llm_utils/lm/mixins.py
CHANGED
|
@@ -396,6 +396,80 @@ class VLLMMixin:
|
|
|
396
396
|
return _kill_vllm_on_port(port)
|
|
397
397
|
|
|
398
398
|
|
|
399
|
+
class TokenizationMixin:
|
|
400
|
+
"""Mixin for tokenization operations (encode/decode)."""
|
|
401
|
+
|
|
402
|
+
def encode(
|
|
403
|
+
self,
|
|
404
|
+
text: str,
|
|
405
|
+
*,
|
|
406
|
+
add_special_tokens: bool = True,
|
|
407
|
+
return_token_strs: bool = False,
|
|
408
|
+
) -> list[int] | tuple[list[int], list[str]]:
|
|
409
|
+
"""
|
|
410
|
+
Encode text to token IDs using the model's tokenizer.
|
|
411
|
+
|
|
412
|
+
Args:
|
|
413
|
+
text: Text to tokenize
|
|
414
|
+
add_special_tokens: Whether to add special tokens (e.g., BOS)
|
|
415
|
+
return_token_strs: If True, also return token strings
|
|
416
|
+
|
|
417
|
+
Returns:
|
|
418
|
+
List of token IDs, or tuple of (token IDs, token strings)
|
|
419
|
+
"""
|
|
420
|
+
import requests
|
|
421
|
+
|
|
422
|
+
# Get base_url from client and remove /v1 suffix if present
|
|
423
|
+
# (tokenize endpoint is at root level, not under /v1)
|
|
424
|
+
base_url = str(self.client.base_url).rstrip('/')
|
|
425
|
+
if base_url.endswith('/v1'):
|
|
426
|
+
base_url = base_url[:-3] # Remove '/v1'
|
|
427
|
+
|
|
428
|
+
response = requests.post(
|
|
429
|
+
f'{base_url}/tokenize',
|
|
430
|
+
json={
|
|
431
|
+
'prompt': text,
|
|
432
|
+
'add_special_tokens': add_special_tokens,
|
|
433
|
+
'return_token_strs': return_token_strs,
|
|
434
|
+
},
|
|
435
|
+
)
|
|
436
|
+
response.raise_for_status()
|
|
437
|
+
data = response.json()
|
|
438
|
+
|
|
439
|
+
if return_token_strs:
|
|
440
|
+
return data['tokens'], data.get('token_strs', [])
|
|
441
|
+
return data['tokens']
|
|
442
|
+
|
|
443
|
+
def decode(
|
|
444
|
+
self,
|
|
445
|
+
token_ids: list[int],
|
|
446
|
+
) -> str:
|
|
447
|
+
"""
|
|
448
|
+
Decode token IDs to text using the model's tokenizer.
|
|
449
|
+
|
|
450
|
+
Args:
|
|
451
|
+
token_ids: List of token IDs to decode
|
|
452
|
+
|
|
453
|
+
Returns:
|
|
454
|
+
Decoded text string
|
|
455
|
+
"""
|
|
456
|
+
import requests
|
|
457
|
+
|
|
458
|
+
# Get base_url from client and remove /v1 suffix if present
|
|
459
|
+
# (detokenize endpoint is at root level, not under /v1)
|
|
460
|
+
base_url = str(self.client.base_url).rstrip('/')
|
|
461
|
+
if base_url.endswith('/v1'):
|
|
462
|
+
base_url = base_url[:-3] # Remove '/v1'
|
|
463
|
+
|
|
464
|
+
response = requests.post(
|
|
465
|
+
f'{base_url}/detokenize',
|
|
466
|
+
json={'tokens': token_ids},
|
|
467
|
+
)
|
|
468
|
+
response.raise_for_status()
|
|
469
|
+
data = response.json()
|
|
470
|
+
return data['prompt']
|
|
471
|
+
|
|
472
|
+
|
|
399
473
|
class ModelUtilsMixin:
|
|
400
474
|
"""Mixin for model utility methods."""
|
|
401
475
|
|
|
@@ -4,12 +4,12 @@ llm_utils/chat_format/__init__.py,sha256=a7BKtBVktgLMq2Do4iNu3YfdDdTG1v9M_BkmaEo
|
|
|
4
4
|
llm_utils/chat_format/display.py,sha256=Lffjzna9_vV3QgfiXZM2_tuVb3wqA-WxwrmoAjsJigw,17356
|
|
5
5
|
llm_utils/chat_format/transform.py,sha256=PJ2g9KT1GSbWuAs7giEbTpTAffpU9QsIXyRlbfpTZUQ,5351
|
|
6
6
|
llm_utils/chat_format/utils.py,sha256=M2EctZ6NeHXqFYufh26Y3CpSphN0bdZm5xoNaEJj5vg,1251
|
|
7
|
-
llm_utils/lm/__init__.py,sha256=
|
|
7
|
+
llm_utils/lm/__init__.py,sha256=4jYMy3wPH3tg-tHFyWEWOqrnmX4Tu32VZCdzRGMGQsI,778
|
|
8
8
|
llm_utils/lm/base_prompt_builder.py,sha256=_TzYMsWr-SsbA_JNXptUVN56lV5RfgWWTrFi-E8LMy4,12337
|
|
9
|
-
llm_utils/lm/llm.py,sha256=
|
|
9
|
+
llm_utils/lm/llm.py,sha256=yas7Khd0Djc8-GD8jL--B2oPteV9FC3PpfPbr9XCLOQ,16515
|
|
10
10
|
llm_utils/lm/llm_signature.py,sha256=vV8uZgLLd6ZKqWbq0OPywWvXAfl7hrJQnbtBF-VnZRU,1244
|
|
11
11
|
llm_utils/lm/lm_base.py,sha256=Bk3q34KrcCK_bC4Ryxbc3KqkiPL39zuVZaBQ1i6wJqs,9437
|
|
12
|
-
llm_utils/lm/mixins.py,sha256=
|
|
12
|
+
llm_utils/lm/mixins.py,sha256=o0tZiaKW4u1BxBVlT_0yTwnO8h7KnY02HX5TuWipvr0,16735
|
|
13
13
|
llm_utils/lm/openai_memoize.py,sha256=rYrSFPpgO7adsjK1lVdkJlhqqIw_13TCW7zU8eNwm3o,5185
|
|
14
14
|
llm_utils/lm/signature.py,sha256=K1hvCAqoC5CmsQ0Y_ywnYy2fRb5JzmIK8OS-hjH-5To,9971
|
|
15
15
|
llm_utils/lm/utils.py,sha256=dEKFta8S6Mm4LjIctcpFlEGL9RnmLm5DHd2TA70UWuA,12649
|
|
@@ -50,7 +50,7 @@ vision_utils/README.md,sha256=AIDZZj8jo_QNrEjFyHwd00iOO431s-js-M2dLtVTn3I,5740
|
|
|
50
50
|
vision_utils/__init__.py,sha256=hF54sT6FAxby8kDVhOvruy4yot8O-Ateey5n96O1pQM,284
|
|
51
51
|
vision_utils/io_utils.py,sha256=pI0Va6miesBysJcllK6NXCay8HpGZsaMWwlsKB2DMgA,26510
|
|
52
52
|
vision_utils/plot.py,sha256=HkNj3osA3moPuupP1VguXfPPOW614dZO5tvC-EFKpKM,12028
|
|
53
|
-
speedy_utils-1.1.
|
|
54
|
-
speedy_utils-1.1.
|
|
55
|
-
speedy_utils-1.1.
|
|
56
|
-
speedy_utils-1.1.
|
|
53
|
+
speedy_utils-1.1.35.dist-info/METADATA,sha256=wsz89syaYNXEeGjJXV8zb0W2ZrTjpN2Lj47tE7LQeEI,8048
|
|
54
|
+
speedy_utils-1.1.35.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
55
|
+
speedy_utils-1.1.35.dist-info/entry_points.txt,sha256=1rrFMfqvaMUE9hvwGiD6vnVh98kmgy0TARBj-v0Lfhs,244
|
|
56
|
+
speedy_utils-1.1.35.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|