speedy-utils 1.1.34__py3-none-any.whl → 1.1.35__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
llm_utils/lm/__init__.py CHANGED
@@ -7,6 +7,7 @@ from .lm_base import LMBase, get_model_name
7
7
  from .mixins import (
8
8
  ModelUtilsMixin,
9
9
  TemperatureRangeMixin,
10
+ TokenizationMixin,
10
11
  TwoStepPydanticMixin,
11
12
  VLLMMixin,
12
13
  )
@@ -14,19 +15,20 @@ from .signature import Input, InputField, Output, OutputField, Signature
14
15
 
15
16
 
16
17
  __all__ = [
17
- "LMBase",
18
- "LLM",
19
- "AsyncLM",
20
- "AsyncLLMTask",
21
- "BasePromptBuilder",
22
- "LLMSignature",
23
- "Signature",
24
- "InputField",
25
- "OutputField",
26
- "Input",
27
- "Output",
28
- "TemperatureRangeMixin",
29
- "TwoStepPydanticMixin",
30
- "VLLMMixin",
31
- "ModelUtilsMixin",
18
+ 'LMBase',
19
+ 'LLM',
20
+ 'AsyncLM',
21
+ 'AsyncLLMTask',
22
+ 'BasePromptBuilder',
23
+ 'LLMSignature',
24
+ 'Signature',
25
+ 'InputField',
26
+ 'OutputField',
27
+ 'Input',
28
+ 'Output',
29
+ 'TemperatureRangeMixin',
30
+ 'TwoStepPydanticMixin',
31
+ 'VLLMMixin',
32
+ 'ModelUtilsMixin',
33
+ 'TokenizationMixin',
32
34
  ]
llm_utils/lm/llm.py CHANGED
@@ -20,6 +20,7 @@ from .base_prompt_builder import BasePromptBuilder
20
20
  from .mixins import (
21
21
  ModelUtilsMixin,
22
22
  TemperatureRangeMixin,
23
+ TokenizationMixin,
23
24
  TwoStepPydanticMixin,
24
25
  VLLMMixin,
25
26
  )
@@ -47,6 +48,7 @@ class LLM(
47
48
  TwoStepPydanticMixin,
48
49
  VLLMMixin,
49
50
  ModelUtilsMixin,
51
+ TokenizationMixin,
50
52
  ):
51
53
  """LLM task with structured input/output handling."""
52
54
 
llm_utils/lm/mixins.py CHANGED
@@ -396,6 +396,80 @@ class VLLMMixin:
396
396
  return _kill_vllm_on_port(port)
397
397
 
398
398
 
399
+ class TokenizationMixin:
400
+ """Mixin for tokenization operations (encode/decode)."""
401
+
402
+ def encode(
403
+ self,
404
+ text: str,
405
+ *,
406
+ add_special_tokens: bool = True,
407
+ return_token_strs: bool = False,
408
+ ) -> list[int] | tuple[list[int], list[str]]:
409
+ """
410
+ Encode text to token IDs using the model's tokenizer.
411
+
412
+ Args:
413
+ text: Text to tokenize
414
+ add_special_tokens: Whether to add special tokens (e.g., BOS)
415
+ return_token_strs: If True, also return token strings
416
+
417
+ Returns:
418
+ List of token IDs, or tuple of (token IDs, token strings)
419
+ """
420
+ import requests
421
+
422
+ # Get base_url from client and remove /v1 suffix if present
423
+ # (tokenize endpoint is at root level, not under /v1)
424
+ base_url = str(self.client.base_url).rstrip('/')
425
+ if base_url.endswith('/v1'):
426
+ base_url = base_url[:-3] # Remove '/v1'
427
+
428
+ response = requests.post(
429
+ f'{base_url}/tokenize',
430
+ json={
431
+ 'prompt': text,
432
+ 'add_special_tokens': add_special_tokens,
433
+ 'return_token_strs': return_token_strs,
434
+ },
435
+ )
436
+ response.raise_for_status()
437
+ data = response.json()
438
+
439
+ if return_token_strs:
440
+ return data['tokens'], data.get('token_strs', [])
441
+ return data['tokens']
442
+
443
+ def decode(
444
+ self,
445
+ token_ids: list[int],
446
+ ) -> str:
447
+ """
448
+ Decode token IDs to text using the model's tokenizer.
449
+
450
+ Args:
451
+ token_ids: List of token IDs to decode
452
+
453
+ Returns:
454
+ Decoded text string
455
+ """
456
+ import requests
457
+
458
+ # Get base_url from client and remove /v1 suffix if present
459
+ # (detokenize endpoint is at root level, not under /v1)
460
+ base_url = str(self.client.base_url).rstrip('/')
461
+ if base_url.endswith('/v1'):
462
+ base_url = base_url[:-3] # Remove '/v1'
463
+
464
+ response = requests.post(
465
+ f'{base_url}/detokenize',
466
+ json={'tokens': token_ids},
467
+ )
468
+ response.raise_for_status()
469
+ data = response.json()
470
+ return data['prompt']
471
+
472
+
399
473
  class ModelUtilsMixin:
400
474
  """Mixin for model utility methods."""
401
475
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: speedy-utils
3
- Version: 1.1.34
3
+ Version: 1.1.35
4
4
  Summary: Fast and easy-to-use package for data science
5
5
  Project-URL: Homepage, https://github.com/anhvth/speedy
6
6
  Project-URL: Repository, https://github.com/anhvth/speedy
@@ -4,12 +4,12 @@ llm_utils/chat_format/__init__.py,sha256=a7BKtBVktgLMq2Do4iNu3YfdDdTG1v9M_BkmaEo
4
4
  llm_utils/chat_format/display.py,sha256=Lffjzna9_vV3QgfiXZM2_tuVb3wqA-WxwrmoAjsJigw,17356
5
5
  llm_utils/chat_format/transform.py,sha256=PJ2g9KT1GSbWuAs7giEbTpTAffpU9QsIXyRlbfpTZUQ,5351
6
6
  llm_utils/chat_format/utils.py,sha256=M2EctZ6NeHXqFYufh26Y3CpSphN0bdZm5xoNaEJj5vg,1251
7
- llm_utils/lm/__init__.py,sha256=lFE2DZRpj6eRMo11kx7oRLyYOP2FuDmz08mAcq-cYew,730
7
+ llm_utils/lm/__init__.py,sha256=4jYMy3wPH3tg-tHFyWEWOqrnmX4Tu32VZCdzRGMGQsI,778
8
8
  llm_utils/lm/base_prompt_builder.py,sha256=_TzYMsWr-SsbA_JNXptUVN56lV5RfgWWTrFi-E8LMy4,12337
9
- llm_utils/lm/llm.py,sha256=C8Z8l6Ljs7uVX-zabLcDCdTf3fpGxfljaYRM0patHUQ,16469
9
+ llm_utils/lm/llm.py,sha256=yas7Khd0Djc8-GD8jL--B2oPteV9FC3PpfPbr9XCLOQ,16515
10
10
  llm_utils/lm/llm_signature.py,sha256=vV8uZgLLd6ZKqWbq0OPywWvXAfl7hrJQnbtBF-VnZRU,1244
11
11
  llm_utils/lm/lm_base.py,sha256=Bk3q34KrcCK_bC4Ryxbc3KqkiPL39zuVZaBQ1i6wJqs,9437
12
- llm_utils/lm/mixins.py,sha256=on83g-JO2SpZ0digOpU8mooqFBX6w7Bc-DeGzVoVCX8,14536
12
+ llm_utils/lm/mixins.py,sha256=o0tZiaKW4u1BxBVlT_0yTwnO8h7KnY02HX5TuWipvr0,16735
13
13
  llm_utils/lm/openai_memoize.py,sha256=rYrSFPpgO7adsjK1lVdkJlhqqIw_13TCW7zU8eNwm3o,5185
14
14
  llm_utils/lm/signature.py,sha256=K1hvCAqoC5CmsQ0Y_ywnYy2fRb5JzmIK8OS-hjH-5To,9971
15
15
  llm_utils/lm/utils.py,sha256=dEKFta8S6Mm4LjIctcpFlEGL9RnmLm5DHd2TA70UWuA,12649
@@ -50,7 +50,7 @@ vision_utils/README.md,sha256=AIDZZj8jo_QNrEjFyHwd00iOO431s-js-M2dLtVTn3I,5740
50
50
  vision_utils/__init__.py,sha256=hF54sT6FAxby8kDVhOvruy4yot8O-Ateey5n96O1pQM,284
51
51
  vision_utils/io_utils.py,sha256=pI0Va6miesBysJcllK6NXCay8HpGZsaMWwlsKB2DMgA,26510
52
52
  vision_utils/plot.py,sha256=HkNj3osA3moPuupP1VguXfPPOW614dZO5tvC-EFKpKM,12028
53
- speedy_utils-1.1.34.dist-info/METADATA,sha256=diZ6MTVGRDDhsbxoK9eBydHrbW2I6rvYG8lXXzJnJEU,8048
54
- speedy_utils-1.1.34.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
55
- speedy_utils-1.1.34.dist-info/entry_points.txt,sha256=1rrFMfqvaMUE9hvwGiD6vnVh98kmgy0TARBj-v0Lfhs,244
56
- speedy_utils-1.1.34.dist-info/RECORD,,
53
+ speedy_utils-1.1.35.dist-info/METADATA,sha256=wsz89syaYNXEeGjJXV8zb0W2ZrTjpN2Lj47tE7LQeEI,8048
54
+ speedy_utils-1.1.35.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
55
+ speedy_utils-1.1.35.dist-info/entry_points.txt,sha256=1rrFMfqvaMUE9hvwGiD6vnVh98kmgy0TARBj-v0Lfhs,244
56
+ speedy_utils-1.1.35.dist-info/RECORD,,