speedy-utils 1.1.34__tar.gz → 1.1.35__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (117) hide show
  1. {speedy_utils-1.1.34 → speedy_utils-1.1.35}/PKG-INFO +1 -1
  2. speedy_utils-1.1.35/debug/test_decode_api.py +19 -0
  3. speedy_utils-1.1.35/debug/test_endpoints.py +27 -0
  4. speedy_utils-1.1.35/docs/TOKENIZATION.md +149 -0
  5. speedy_utils-1.1.35/docs/TOKENIZATION_IMPLEMENTATION.md +104 -0
  6. speedy_utils-1.1.35/examples/tokenization_example.py +70 -0
  7. {speedy_utils-1.1.34 → speedy_utils-1.1.35}/pyproject.toml +1 -1
  8. {speedy_utils-1.1.34 → speedy_utils-1.1.35}/src/llm_utils/lm/__init__.py +17 -15
  9. {speedy_utils-1.1.34 → speedy_utils-1.1.35}/src/llm_utils/lm/llm.py +2 -0
  10. {speedy_utils-1.1.34 → speedy_utils-1.1.35}/src/llm_utils/lm/mixins.py +74 -0
  11. speedy_utils-1.1.35/tests/test_tokenization.py +88 -0
  12. {speedy_utils-1.1.34 → speedy_utils-1.1.35}/.github/copilot-instructions.md +0 -0
  13. {speedy_utils-1.1.34 → speedy_utils-1.1.35}/.github/skills/caching-utilities/SKILL.md +0 -0
  14. {speedy_utils-1.1.34 → speedy_utils-1.1.35}/.github/skills/caching-utilities/examples/caching_example.py +0 -0
  15. {speedy_utils-1.1.34 → speedy_utils-1.1.35}/.github/skills/io-utilities/SKILL.md +0 -0
  16. {speedy_utils-1.1.34 → speedy_utils-1.1.35}/.github/skills/io-utilities/examples/io_example.py +0 -0
  17. {speedy_utils-1.1.34 → speedy_utils-1.1.35}/.github/skills/llm-integration/SKILL.md +0 -0
  18. {speedy_utils-1.1.34 → speedy_utils-1.1.35}/.github/skills/llm-integration/examples/llm_example.py +0 -0
  19. {speedy_utils-1.1.34 → speedy_utils-1.1.35}/.github/skills/multi-threading-processing/SKILL.md +0 -0
  20. {speedy_utils-1.1.34 → speedy_utils-1.1.35}/.github/skills/ray-distributed-computing/SKILL.md +0 -0
  21. {speedy_utils-1.1.34 → speedy_utils-1.1.35}/.github/skills/skill-creation/SKILL.md +0 -0
  22. {speedy_utils-1.1.34 → speedy_utils-1.1.35}/.github/skills/vision-utilities/SKILL.md +0 -0
  23. {speedy_utils-1.1.34 → speedy_utils-1.1.35}/.github/skills/vision-utilities/examples/vision_example.py +0 -0
  24. {speedy_utils-1.1.34 → speedy_utils-1.1.35}/.github/workflows/publish.yml +0 -0
  25. {speedy_utils-1.1.34 → speedy_utils-1.1.35}/.gitignore +0 -0
  26. {speedy_utils-1.1.34 → speedy_utils-1.1.35}/.pre-commit-config.yaml +0 -0
  27. {speedy_utils-1.1.34 → speedy_utils-1.1.35}/README.md +0 -0
  28. {speedy_utils-1.1.34 → speedy_utils-1.1.35}/bumpversion.sh +0 -0
  29. {speedy_utils-1.1.34 → speedy_utils-1.1.35}/docs/IMPLEMENTATION.md +0 -0
  30. {speedy_utils-1.1.34 → speedy_utils-1.1.35}/docs/QUICKSTART.md +0 -0
  31. {speedy_utils-1.1.34 → speedy_utils-1.1.35}/docs/zero_copy_sharing.md +0 -0
  32. {speedy_utils-1.1.34 → speedy_utils-1.1.35}/examples/pytorch_large_model.py +0 -0
  33. {speedy_utils-1.1.34 → speedy_utils-1.1.35}/examples/shared_kwargs_example.py +0 -0
  34. {speedy_utils-1.1.34 → speedy_utils-1.1.35}/examples/temperature_range_example.py +0 -0
  35. {speedy_utils-1.1.34 → speedy_utils-1.1.35}/examples/test_share_ray.py +0 -0
  36. {speedy_utils-1.1.34 → speedy_utils-1.1.35}/examples/vision_utils_example.py +0 -0
  37. {speedy_utils-1.1.34 → speedy_utils-1.1.35}/experiments/exp1/dockerfile +0 -0
  38. {speedy_utils-1.1.34 → speedy_utils-1.1.35}/experiments/exp1/run_in_docker.sh +0 -0
  39. {speedy_utils-1.1.34 → speedy_utils-1.1.35}/experiments/exp1/test.png +0 -0
  40. {speedy_utils-1.1.34 → speedy_utils-1.1.35}/experiments/test_read_image.py +0 -0
  41. {speedy_utils-1.1.34 → speedy_utils-1.1.35}/notebooks/README.ipynb +0 -0
  42. {speedy_utils-1.1.34 → speedy_utils-1.1.35}/notebooks/llm_utils/llm_as_a_judge.ipynb +0 -0
  43. {speedy_utils-1.1.34 → speedy_utils-1.1.35}/notebooks/ray_tutorial.ipynb +0 -0
  44. {speedy_utils-1.1.34 → speedy_utils-1.1.35}/notebooks/test_multi_thread.ipynb +0 -0
  45. {speedy_utils-1.1.34 → speedy_utils-1.1.35}/ruff.toml +0 -0
  46. {speedy_utils-1.1.34 → speedy_utils-1.1.35}/scripts/debug_import_time.py +0 -0
  47. {speedy_utils-1.1.34 → speedy_utils-1.1.35}/scripts/deploy.sh +0 -0
  48. {speedy_utils-1.1.34 → speedy_utils-1.1.35}/scripts/imports.sh +0 -0
  49. {speedy_utils-1.1.34 → speedy_utils-1.1.35}/scripts/test_import_time_vision.py +0 -0
  50. {speedy_utils-1.1.34 → speedy_utils-1.1.35}/setup.cfg +0 -0
  51. {speedy_utils-1.1.34 → speedy_utils-1.1.35}/src/datasets/convert_to_arrow.py +0 -0
  52. {speedy_utils-1.1.34 → speedy_utils-1.1.35}/src/llm_utils/__init__.py +0 -0
  53. {speedy_utils-1.1.34 → speedy_utils-1.1.35}/src/llm_utils/chat_format/__init__.py +0 -0
  54. {speedy_utils-1.1.34 → speedy_utils-1.1.35}/src/llm_utils/chat_format/display.py +0 -0
  55. {speedy_utils-1.1.34 → speedy_utils-1.1.35}/src/llm_utils/chat_format/transform.py +0 -0
  56. {speedy_utils-1.1.34 → speedy_utils-1.1.35}/src/llm_utils/chat_format/utils.py +0 -0
  57. {speedy_utils-1.1.34 → speedy_utils-1.1.35}/src/llm_utils/group_messages.py +0 -0
  58. {speedy_utils-1.1.34 → speedy_utils-1.1.35}/src/llm_utils/lm/async_lm/__init__.py +0 -0
  59. {speedy_utils-1.1.34 → speedy_utils-1.1.35}/src/llm_utils/lm/async_lm/_utils.py +0 -0
  60. {speedy_utils-1.1.34 → speedy_utils-1.1.35}/src/llm_utils/lm/async_lm/async_llm_task.py +0 -0
  61. {speedy_utils-1.1.34 → speedy_utils-1.1.35}/src/llm_utils/lm/async_lm/async_lm.py +0 -0
  62. {speedy_utils-1.1.34 → speedy_utils-1.1.35}/src/llm_utils/lm/async_lm/async_lm_base.py +0 -0
  63. {speedy_utils-1.1.34 → speedy_utils-1.1.35}/src/llm_utils/lm/async_lm/lm_specific.py +0 -0
  64. {speedy_utils-1.1.34 → speedy_utils-1.1.35}/src/llm_utils/lm/base_prompt_builder.py +0 -0
  65. {speedy_utils-1.1.34 → speedy_utils-1.1.35}/src/llm_utils/lm/llm_signature.py +0 -0
  66. {speedy_utils-1.1.34 → speedy_utils-1.1.35}/src/llm_utils/lm/lm_base.py +0 -0
  67. {speedy_utils-1.1.34 → speedy_utils-1.1.35}/src/llm_utils/lm/openai_memoize.py +0 -0
  68. {speedy_utils-1.1.34 → speedy_utils-1.1.35}/src/llm_utils/lm/signature.py +0 -0
  69. {speedy_utils-1.1.34 → speedy_utils-1.1.35}/src/llm_utils/lm/utils.py +0 -0
  70. {speedy_utils-1.1.34 → speedy_utils-1.1.35}/src/llm_utils/scripts/README.md +0 -0
  71. {speedy_utils-1.1.34 → speedy_utils-1.1.35}/src/llm_utils/scripts/vllm_load_balancer.py +0 -0
  72. {speedy_utils-1.1.34 → speedy_utils-1.1.35}/src/llm_utils/scripts/vllm_serve.py +0 -0
  73. {speedy_utils-1.1.34 → speedy_utils-1.1.35}/src/llm_utils/vector_cache/__init__.py +0 -0
  74. {speedy_utils-1.1.34 → speedy_utils-1.1.35}/src/llm_utils/vector_cache/cli.py +0 -0
  75. {speedy_utils-1.1.34 → speedy_utils-1.1.35}/src/llm_utils/vector_cache/core.py +0 -0
  76. {speedy_utils-1.1.34 → speedy_utils-1.1.35}/src/llm_utils/vector_cache/types.py +0 -0
  77. {speedy_utils-1.1.34 → speedy_utils-1.1.35}/src/llm_utils/vector_cache/utils.py +0 -0
  78. {speedy_utils-1.1.34 → speedy_utils-1.1.35}/src/speedy_utils/__imports.py +0 -0
  79. {speedy_utils-1.1.34 → speedy_utils-1.1.35}/src/speedy_utils/__init__.py +0 -0
  80. {speedy_utils-1.1.34 → speedy_utils-1.1.35}/src/speedy_utils/common/__init__.py +0 -0
  81. {speedy_utils-1.1.34 → speedy_utils-1.1.35}/src/speedy_utils/common/clock.py +0 -0
  82. {speedy_utils-1.1.34 → speedy_utils-1.1.35}/src/speedy_utils/common/function_decorator.py +0 -0
  83. {speedy_utils-1.1.34 → speedy_utils-1.1.35}/src/speedy_utils/common/logger.py +0 -0
  84. {speedy_utils-1.1.34 → speedy_utils-1.1.35}/src/speedy_utils/common/notebook_utils.py +0 -0
  85. {speedy_utils-1.1.34 → speedy_utils-1.1.35}/src/speedy_utils/common/patcher.py +0 -0
  86. {speedy_utils-1.1.34 → speedy_utils-1.1.35}/src/speedy_utils/common/report_manager.py +0 -0
  87. {speedy_utils-1.1.34 → speedy_utils-1.1.35}/src/speedy_utils/common/utils_cache.py +0 -0
  88. {speedy_utils-1.1.34 → speedy_utils-1.1.35}/src/speedy_utils/common/utils_io.py +0 -0
  89. {speedy_utils-1.1.34 → speedy_utils-1.1.35}/src/speedy_utils/common/utils_misc.py +0 -0
  90. {speedy_utils-1.1.34 → speedy_utils-1.1.35}/src/speedy_utils/common/utils_print.py +0 -0
  91. {speedy_utils-1.1.34 → speedy_utils-1.1.35}/src/speedy_utils/multi_worker/__init__.py +0 -0
  92. {speedy_utils-1.1.34 → speedy_utils-1.1.35}/src/speedy_utils/multi_worker/process.py +0 -0
  93. {speedy_utils-1.1.34 → speedy_utils-1.1.35}/src/speedy_utils/multi_worker/thread.py +0 -0
  94. {speedy_utils-1.1.34 → speedy_utils-1.1.35}/src/speedy_utils/scripts/__init__.py +0 -0
  95. {speedy_utils-1.1.34 → speedy_utils-1.1.35}/src/speedy_utils/scripts/mpython.py +0 -0
  96. {speedy_utils-1.1.34 → speedy_utils-1.1.35}/src/speedy_utils/scripts/openapi_client_codegen.py +0 -0
  97. {speedy_utils-1.1.34 → speedy_utils-1.1.35}/src/vision_utils/README.md +0 -0
  98. {speedy_utils-1.1.34 → speedy_utils-1.1.35}/src/vision_utils/__init__.py +0 -0
  99. {speedy_utils-1.1.34 → speedy_utils-1.1.35}/src/vision_utils/io_utils.py +0 -0
  100. {speedy_utils-1.1.34 → speedy_utils-1.1.35}/src/vision_utils/plot.py +0 -0
  101. {speedy_utils-1.1.34 → speedy_utils-1.1.35}/tests/import_all.py +0 -0
  102. {speedy_utils-1.1.34 → speedy_utils-1.1.35}/tests/import_time_report.py +0 -0
  103. {speedy_utils-1.1.34 → speedy_utils-1.1.35}/tests/integration_test.py +0 -0
  104. {speedy_utils-1.1.34 → speedy_utils-1.1.35}/tests/llm_utils/test_llm_mixins.py +0 -0
  105. {speedy_utils-1.1.34 → speedy_utils-1.1.35}/tests/sample_objects.py +0 -0
  106. {speedy_utils-1.1.34 → speedy_utils-1.1.35}/tests/test.py +0 -0
  107. {speedy_utils-1.1.34 → speedy_utils-1.1.35}/tests/test_logger.py +0 -0
  108. {speedy_utils-1.1.34 → speedy_utils-1.1.35}/tests/test_logger_format.py +0 -0
  109. {speedy_utils-1.1.34 → speedy_utils-1.1.35}/tests/test_memoize_typing.py +0 -0
  110. {speedy_utils-1.1.34 → speedy_utils-1.1.35}/tests/test_mpython.py +0 -0
  111. {speedy_utils-1.1.34 → speedy_utils-1.1.35}/tests/test_multithread_error_trace.py +0 -0
  112. {speedy_utils-1.1.34 → speedy_utils-1.1.35}/tests/test_process.py +0 -0
  113. {speedy_utils-1.1.34 → speedy_utils-1.1.35}/tests/test_process_update.py +0 -0
  114. {speedy_utils-1.1.34 → speedy_utils-1.1.35}/tests/test_pytorch_sharing.py +0 -0
  115. {speedy_utils-1.1.34 → speedy_utils-1.1.35}/tests/test_shared_kwargs.py +0 -0
  116. {speedy_utils-1.1.34 → speedy_utils-1.1.35}/tests/test_thread.py +0 -0
  117. {speedy_utils-1.1.34 → speedy_utils-1.1.35}/uv.lock +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: speedy-utils
3
- Version: 1.1.34
3
+ Version: 1.1.35
4
4
  Summary: Fast and easy-to-use package for data science
5
5
  Project-URL: Homepage, https://github.com/anhvth/speedy
6
6
  Project-URL: Repository, https://github.com/anhvth/speedy
@@ -0,0 +1,19 @@
1
+ from llm_utils.lm import LLM
2
+
3
+ lm = LLM(client=8000)
4
+
5
+ # Encode text to tokens
6
+ token_ids = lm.encode('Hello, world!')
7
+ print(f'Token IDs: {token_ids}')
8
+
9
+ # Decode tokens back to text
10
+ text = lm.decode(token_ids)
11
+ print(f'Decoded text: {text}')
12
+
13
+ # Get token strings for debugging
14
+ ids, strs = lm.encode('Hello', return_token_strs=True)
15
+ print(f'IDs: {ids}')
16
+ print(f'Strings: {strs}')
17
+ print(f'Tokens with strings:')
18
+ for i, s in zip(ids, strs):
19
+ print(f' {i:6d} -> "{s}"')
@@ -0,0 +1,27 @@
1
+ from llm_utils.lm import LLM
2
+
3
+ lm = LLM(client=8000)
4
+
5
+ # Debug: Check the base_url
6
+ print(f"Client base_url: {lm.client.base_url}")
7
+
8
+ # Try to manually check what endpoints are available
9
+ import requests
10
+
11
+ # Try different endpoint paths
12
+ test_urls = [
13
+ "http://localhost:8000/tokenize",
14
+ "http://localhost:8000/v1/tokenize",
15
+ ]
16
+
17
+ for url in test_urls:
18
+ try:
19
+ response = requests.post(
20
+ url,
21
+ json={"prompt": "test", "add_special_tokens": True}
22
+ )
23
+ print(f"✓ {url} - Status: {response.status_code}")
24
+ if response.status_code == 200:
25
+ print(f" Response: {response.json()}")
26
+ except Exception as e:
27
+ print(f"✗ {url} - Error: {e}")
@@ -0,0 +1,149 @@
1
+ # Tokenization Support in LLM
2
+
3
+ The `LLM` class now includes built-in tokenization support through the `TokenizationMixin`, providing `encode` and `decode` methods to work with token IDs.
4
+
5
+ ## Features
6
+
7
+ - **encode()**: Convert text to token IDs
8
+ - **decode()**: Convert token IDs back to text
9
+ - Support for special tokens (BOS, EOS, etc.)
10
+ - Optional token string output for debugging
11
+
12
+ ## API Reference
13
+
14
+ ### encode()
15
+
16
+ ```python
17
+ def encode(
18
+ self,
19
+ text: str,
20
+ *,
21
+ add_special_tokens: bool = True,
22
+ return_token_strs: bool = False,
23
+ ) -> list[int] | tuple[list[int], list[str]]
24
+ ```
25
+
26
+ **Parameters:**
27
+ - `text` (str): Text to tokenize
28
+ - `add_special_tokens` (bool): Whether to add special tokens like BOS/EOS (default: True)
29
+ - `return_token_strs` (bool): If True, also return token strings (default: False)
30
+
31
+ **Returns:**
32
+ - `list[int]`: Token IDs (if `return_token_strs=False`)
33
+ - `tuple[list[int], list[str]]`: Token IDs and token strings (if `return_token_strs=True`)
34
+
35
+ ### decode()
36
+
37
+ ```python
38
+ def decode(
39
+ self,
40
+ token_ids: list[int],
41
+ ) -> str
42
+ ```
43
+
44
+ **Parameters:**
45
+ - `token_ids` (list[int]): List of token IDs to decode
46
+
47
+ **Returns:**
48
+ - `str`: Decoded text
49
+
50
+ ## Usage Examples
51
+
52
+ ### Basic Encoding/Decoding
53
+
54
+ ```python
55
+ from llm_utils.lm import LLM
56
+
57
+ lm = LLM(base_url='http://localhost:8000/v1')
58
+
59
+ # Encode text to token IDs
60
+ text = 'Hello, world!'
61
+ token_ids = lm.encode(text)
62
+ print(token_ids) # [123, 456, 789, ...]
63
+
64
+ # Decode token IDs back to text
65
+ decoded = lm.decode(token_ids)
66
+ print(decoded) # 'Hello, world!'
67
+ ```
68
+
69
+ ### Getting Token Strings
70
+
71
+ Useful for debugging and understanding tokenization:
72
+
73
+ ```python
74
+ # Get both token IDs and their string representations
75
+ token_ids, token_strs = lm.encode('Hello world', return_token_strs=True)
76
+
77
+ for tid, tstr in zip(token_ids, token_strs):
78
+ print(f'{tid:6d} -> "{tstr}"')
79
+ # Output:
80
+ # 123 -> "Hello"
81
+ # 456 -> " world"
82
+ ```
83
+
84
+ ### Counting Tokens
85
+
86
+ Count tokens before making API calls to manage context windows:
87
+
88
+ ```python
89
+ text = 'A very long document...'
90
+ token_count = len(lm.encode(text))
91
+ print(f'This text uses {token_count} tokens')
92
+
93
+ # Check if it fits in model's context window
94
+ MAX_TOKENS = 4096
95
+ if token_count > MAX_TOKENS:
96
+ print('Text is too long!')
97
+ ```
98
+
99
+ ### Working Without Special Tokens
100
+
101
+ ```python
102
+ # Without special tokens (useful for token manipulation)
103
+ tokens_clean = lm.encode('Hello', add_special_tokens=False)
104
+
105
+ # With special tokens (default)
106
+ tokens_with_special = lm.encode('Hello', add_special_tokens=True)
107
+
108
+ print(f'Clean: {len(tokens_clean)} tokens')
109
+ print(f'With special: {len(tokens_with_special)} tokens')
110
+ ```
111
+
112
+ ### Token-Level Text Manipulation
113
+
114
+ ```python
115
+ # Combine texts at token level
116
+ sent1_tokens = lm.encode('Hello', add_special_tokens=False)
117
+ sent2_tokens = lm.encode('world', add_special_tokens=False)
118
+
119
+ # Manually combine
120
+ combined = sent1_tokens + sent2_tokens
121
+ result = lm.decode(combined)
122
+ print(result) # 'Helloworld'
123
+ ```
124
+
125
+ ## Requirements
126
+
127
+ The tokenization functionality requires a VLLM server (or compatible API) that implements:
128
+ - `/tokenize` endpoint (accepts `TokenizeCompletionRequest`)
129
+ - `/detokenize` endpoint (accepts `DetokenizeRequest`)
130
+
131
+ ## Implementation Details
132
+
133
+ The `TokenizationMixin` is automatically included in the `LLM` class. It uses the model's base URL to make HTTP requests to the tokenization endpoints.
134
+
135
+ The mixin can be used standalone if needed:
136
+
137
+ ```python
138
+ from llm_utils.lm.mixins import TokenizationMixin
139
+
140
+ class MyCustomLM(TokenizationMixin):
141
+ def __init__(self, base_url):
142
+ self.client = MOpenAI(base_url=base_url, api_key='abc')
143
+ ```
144
+
145
+ ## See Also
146
+
147
+ - Example script: `examples/tokenization_example.py`
148
+ - Tests: `tests/test_tokenization.py`
149
+ - API specification: See OpenAPI schema for endpoint details
@@ -0,0 +1,104 @@
1
+ # Tokenization Feature Implementation Summary
2
+
3
+ ## What Was Added
4
+
5
+ Added tokenization support (encode/decode methods) to the LLM class for converting between text and token IDs.
6
+
7
+ ## Changes Made
8
+
9
+ ### 1. New Mixin: `TokenizationMixin`
10
+ **File:** `src/llm_utils/lm/mixins.py`
11
+
12
+ Added a new mixin class with two methods:
13
+ - `encode(text, add_special_tokens=True, return_token_strs=False)` - Convert text to token IDs
14
+ - `decode(token_ids)` - Convert token IDs back to text
15
+
16
+ ### 2. Updated LLM Class
17
+ **File:** `src/llm_utils/lm/llm.py`
18
+
19
+ - Added `TokenizationMixin` to the LLM class inheritance
20
+ - Imported the new mixin
21
+
22
+ ### 3. Updated Exports
23
+ **File:** `src/llm_utils/lm/__init__.py`
24
+
25
+ - Added `TokenizationMixin` to imports and `__all__`
26
+
27
+ ### 4. Documentation
28
+ **File:** `docs/TOKENIZATION.md`
29
+
30
+ - Comprehensive documentation with API reference
31
+ - Usage examples for common scenarios
32
+ - Implementation details
33
+
34
+ ### 5. Example Script
35
+ **File:** `examples/tokenization_example.py`
36
+
37
+ - Practical examples demonstrating all features
38
+ - Shows token counting, manipulation, debugging
39
+
40
+ ### 6. Tests
41
+ **File:** `tests/test_tokenization.py`
42
+
43
+ - Unit tests for encode/decode functionality
44
+ - Tests for special tokens handling
45
+ - Manual test runner included
46
+
47
+ ## API Endpoints Used
48
+
49
+ Based on the provided OpenAPI specification, the implementation uses:
50
+
51
+ 1. **POST /tokenize** - Tokenizes text input
52
+ - Accepts: `TokenizeCompletionRequest` with `prompt`, `add_special_tokens`, `return_token_strs`
53
+ - Returns: `tokens` (list of ints) and optionally `token_strs`
54
+
55
+ 2. **POST /detokenize** - Converts token IDs back to text
56
+ - Accepts: `DetokenizeRequest` with `tokens` (list of ints)
57
+ - Returns: `prompt` (string)
58
+
59
+ ## Usage
60
+
61
+ ```python
62
+ from llm_utils.lm import LLM
63
+
64
+ # Initialize
65
+ lm = LLM(base_url='http://localhost:8000/v1')
66
+
67
+ # Encode
68
+ token_ids = lm.encode('Hello, world!')
69
+
70
+ # Decode
71
+ text = lm.decode(token_ids)
72
+
73
+ # With token strings for debugging
74
+ token_ids, token_strs = lm.encode('Hello', return_token_strs=True)
75
+ ```
76
+
77
+ ## Testing
78
+
79
+ Run tests with:
80
+ ```bash
81
+ # Using pytest
82
+ pytest tests/test_tokenization.py
83
+
84
+ # Manual test
85
+ python tests/test_tokenization.py
86
+ ```
87
+
88
+ Run example:
89
+ ```bash
90
+ python examples/tokenization_example.py
91
+ ```
92
+
93
+ ## Requirements
94
+
95
+ - VLLM server (or compatible) running with tokenization endpoints
96
+ - `requests` library (already a dependency)
97
+
98
+ ## Benefits
99
+
100
+ 1. **Token Counting**: Check token count before API calls to manage context windows
101
+ 2. **Token-Level Manipulation**: Combine/split text at token boundaries
102
+ 3. **Debugging**: Inspect exact tokenization with `return_token_strs=True`
103
+ 4. **Consistency**: Use same tokenizer as the model server
104
+ 5. **No Local Tokenizer**: No need to install transformers or download tokenizer locally
@@ -0,0 +1,70 @@
1
+ """Example: Using LLM encode/decode methods for tokenization.
2
+
3
+ This example demonstrates how to use the tokenization functionality
4
+ in the LLM class to encode text to token IDs and decode token IDs
5
+ back to text.
6
+ """
7
+
8
+ from llm_utils.lm import LLM
9
+
10
+
11
+ def main():
12
+ """Demonstrate encode/decode functionality."""
13
+ # Initialize LLM with your VLLM server
14
+ lm = LLM(base_url='http://localhost:8000/v1')
15
+
16
+ # Example text
17
+ text = 'The quick brown fox jumps over the lazy dog.'
18
+ print(f'Original text: {text}\n')
19
+
20
+ # 1. Basic encoding
21
+ print('1. Basic encoding:')
22
+ token_ids = lm.encode(text)
23
+ print(f' Token IDs: {token_ids}')
24
+ print(f' Number of tokens: {len(token_ids)}\n')
25
+
26
+ # 2. Encoding with token strings
27
+ print('2. Encoding with token strings:')
28
+ token_ids, token_strs = lm.encode(text, return_token_strs=True)
29
+ for tid, tstr in zip(token_ids, token_strs):
30
+ print(f' {tid:6d} -> "{tstr}"')
31
+ print()
32
+
33
+ # 3. Encoding without special tokens
34
+ print('3. Comparing with/without special tokens:')
35
+ tokens_with = lm.encode(text, add_special_tokens=True)
36
+ tokens_without = lm.encode(text, add_special_tokens=False)
37
+ print(f' With special tokens: {len(tokens_with)} tokens')
38
+ print(f' Without special tokens: {len(tokens_without)} tokens\n')
39
+
40
+ # 4. Decoding
41
+ print('4. Decoding:')
42
+ decoded = lm.decode(token_ids)
43
+ print(f' Decoded text: {decoded}\n')
44
+
45
+ # 5. Practical use case: counting tokens before API call
46
+ print('5. Counting tokens for API calls:')
47
+ long_text = ' '.join(['This is a test sentence.'] * 10)
48
+ token_count = len(lm.encode(long_text))
49
+ print(f' Text length: {len(long_text)} characters')
50
+ print(f' Token count: {token_count} tokens')
51
+ print(f' Avg chars per token: {len(long_text) / token_count:.2f}\n')
52
+
53
+ # 6. Working with custom token sequences
54
+ print('6. Custom token manipulation:')
55
+ # Encode two sentences
56
+ sent1 = 'Hello world'
57
+ sent2 = 'How are you?'
58
+ tokens1 = lm.encode(sent1, add_special_tokens=False)
59
+ tokens2 = lm.encode(sent2, add_special_tokens=False)
60
+
61
+ # Combine tokens manually
62
+ combined_tokens = tokens1 + tokens2
63
+ combined_text = lm.decode(combined_tokens)
64
+ print(f' Sentence 1: {sent1}')
65
+ print(f' Sentence 2: {sent2}')
66
+ print(f' Combined (token-level): {combined_text}')
67
+
68
+
69
+ if __name__ == '__main__':
70
+ main()
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "speedy-utils"
3
- version = "1.1.34"
3
+ version = "1.1.35"
4
4
  description = "Fast and easy-to-use package for data science"
5
5
  authors = [{ name = "AnhVTH", email = "anhvth.226@gmail.com" }]
6
6
  readme = "README.md"
@@ -7,6 +7,7 @@ from .lm_base import LMBase, get_model_name
7
7
  from .mixins import (
8
8
  ModelUtilsMixin,
9
9
  TemperatureRangeMixin,
10
+ TokenizationMixin,
10
11
  TwoStepPydanticMixin,
11
12
  VLLMMixin,
12
13
  )
@@ -14,19 +15,20 @@ from .signature import Input, InputField, Output, OutputField, Signature
14
15
 
15
16
 
16
17
  __all__ = [
17
- "LMBase",
18
- "LLM",
19
- "AsyncLM",
20
- "AsyncLLMTask",
21
- "BasePromptBuilder",
22
- "LLMSignature",
23
- "Signature",
24
- "InputField",
25
- "OutputField",
26
- "Input",
27
- "Output",
28
- "TemperatureRangeMixin",
29
- "TwoStepPydanticMixin",
30
- "VLLMMixin",
31
- "ModelUtilsMixin",
18
+ 'LMBase',
19
+ 'LLM',
20
+ 'AsyncLM',
21
+ 'AsyncLLMTask',
22
+ 'BasePromptBuilder',
23
+ 'LLMSignature',
24
+ 'Signature',
25
+ 'InputField',
26
+ 'OutputField',
27
+ 'Input',
28
+ 'Output',
29
+ 'TemperatureRangeMixin',
30
+ 'TwoStepPydanticMixin',
31
+ 'VLLMMixin',
32
+ 'ModelUtilsMixin',
33
+ 'TokenizationMixin',
32
34
  ]
@@ -20,6 +20,7 @@ from .base_prompt_builder import BasePromptBuilder
20
20
  from .mixins import (
21
21
  ModelUtilsMixin,
22
22
  TemperatureRangeMixin,
23
+ TokenizationMixin,
23
24
  TwoStepPydanticMixin,
24
25
  VLLMMixin,
25
26
  )
@@ -47,6 +48,7 @@ class LLM(
47
48
  TwoStepPydanticMixin,
48
49
  VLLMMixin,
49
50
  ModelUtilsMixin,
51
+ TokenizationMixin,
50
52
  ):
51
53
  """LLM task with structured input/output handling."""
52
54
 
@@ -396,6 +396,80 @@ class VLLMMixin:
396
396
  return _kill_vllm_on_port(port)
397
397
 
398
398
 
399
+ class TokenizationMixin:
400
+ """Mixin for tokenization operations (encode/decode)."""
401
+
402
+ def encode(
403
+ self,
404
+ text: str,
405
+ *,
406
+ add_special_tokens: bool = True,
407
+ return_token_strs: bool = False,
408
+ ) -> list[int] | tuple[list[int], list[str]]:
409
+ """
410
+ Encode text to token IDs using the model's tokenizer.
411
+
412
+ Args:
413
+ text: Text to tokenize
414
+ add_special_tokens: Whether to add special tokens (e.g., BOS)
415
+ return_token_strs: If True, also return token strings
416
+
417
+ Returns:
418
+ List of token IDs, or tuple of (token IDs, token strings)
419
+ """
420
+ import requests
421
+
422
+ # Get base_url from client and remove /v1 suffix if present
423
+ # (tokenize endpoint is at root level, not under /v1)
424
+ base_url = str(self.client.base_url).rstrip('/')
425
+ if base_url.endswith('/v1'):
426
+ base_url = base_url[:-3] # Remove '/v1'
427
+
428
+ response = requests.post(
429
+ f'{base_url}/tokenize',
430
+ json={
431
+ 'prompt': text,
432
+ 'add_special_tokens': add_special_tokens,
433
+ 'return_token_strs': return_token_strs,
434
+ },
435
+ )
436
+ response.raise_for_status()
437
+ data = response.json()
438
+
439
+ if return_token_strs:
440
+ return data['tokens'], data.get('token_strs', [])
441
+ return data['tokens']
442
+
443
+ def decode(
444
+ self,
445
+ token_ids: list[int],
446
+ ) -> str:
447
+ """
448
+ Decode token IDs to text using the model's tokenizer.
449
+
450
+ Args:
451
+ token_ids: List of token IDs to decode
452
+
453
+ Returns:
454
+ Decoded text string
455
+ """
456
+ import requests
457
+
458
+ # Get base_url from client and remove /v1 suffix if present
459
+ # (detokenize endpoint is at root level, not under /v1)
460
+ base_url = str(self.client.base_url).rstrip('/')
461
+ if base_url.endswith('/v1'):
462
+ base_url = base_url[:-3] # Remove '/v1'
463
+
464
+ response = requests.post(
465
+ f'{base_url}/detokenize',
466
+ json={'tokens': token_ids},
467
+ )
468
+ response.raise_for_status()
469
+ data = response.json()
470
+ return data['prompt']
471
+
472
+
399
473
  class ModelUtilsMixin:
400
474
  """Mixin for model utility methods."""
401
475
 
@@ -0,0 +1,88 @@
1
+ """Test tokenization functionality for LLM."""
2
+
3
+ import pytest
4
+ from llm_utils.lm import LLM
5
+
6
+
7
+ def test_encode_decode():
8
+ """Test basic encode/decode functionality."""
9
+ # Skip if no VLLM server is running
10
+ try:
11
+ lm = LLM(base_url='http://localhost:8000/v1')
12
+
13
+ # Test encode
14
+ text = 'Hello, world!'
15
+ token_ids = lm.encode(text)
16
+ assert isinstance(token_ids, list)
17
+ assert all(isinstance(t, int) for t in token_ids)
18
+ assert len(token_ids) > 0
19
+
20
+ # Test encode with token strings
21
+ token_ids_with_strs, token_strs = lm.encode(
22
+ text, return_token_strs=True
23
+ )
24
+ assert isinstance(token_ids_with_strs, list)
25
+ assert isinstance(token_strs, list)
26
+ assert len(token_ids_with_strs) == len(token_strs)
27
+
28
+ # Test decode
29
+ decoded = lm.decode(token_ids)
30
+ assert isinstance(decoded, str)
31
+ # Note: decoded text might have slight differences due to tokenizer
32
+ # behavior (e.g., special tokens), so we don't assert exact match
33
+
34
+ except Exception as e:
35
+ pytest.skip(f'VLLM server not available: {e}')
36
+
37
+
38
+ def test_encode_with_special_tokens():
39
+ """Test encode with and without special tokens."""
40
+ try:
41
+ lm = LLM(base_url='http://localhost:8000/v1')
42
+
43
+ text = 'Test text'
44
+
45
+ # With special tokens (default)
46
+ tokens_with = lm.encode(text, add_special_tokens=True)
47
+
48
+ # Without special tokens
49
+ tokens_without = lm.encode(text, add_special_tokens=False)
50
+
51
+ # Typically tokens_with should have more tokens (BOS, EOS, etc.)
52
+ # but this depends on the model
53
+ assert isinstance(tokens_with, list)
54
+ assert isinstance(tokens_without, list)
55
+
56
+ except Exception as e:
57
+ pytest.skip(f'VLLM server not available: {e}')
58
+
59
+
60
+ if __name__ == '__main__':
61
+ # Simple manual test
62
+ print('Testing tokenization...')
63
+ try:
64
+ lm = LLM(base_url='http://localhost:8000/v1')
65
+
66
+ text = 'Hello, how are you?'
67
+ print(f'Original text: {text}')
68
+
69
+ # Encode
70
+ token_ids = lm.encode(text)
71
+ print(f'Token IDs: {token_ids}')
72
+
73
+ # Encode with token strings
74
+ token_ids_with_strs, token_strs = lm.encode(
75
+ text, return_token_strs=True
76
+ )
77
+ print(f'Tokens with strings: {list(zip(token_ids_with_strs, token_strs))}')
78
+
79
+ # Decode
80
+ decoded = lm.decode(token_ids)
81
+ print(f'Decoded text: {decoded}')
82
+
83
+ print('✓ All tests passed!')
84
+
85
+ except Exception as e:
86
+ print(f'✗ Error: {e}')
87
+ import traceback
88
+ traceback.print_exc()
File without changes
File without changes
File without changes
File without changes
File without changes