speedy-utils 1.1.34__tar.gz → 1.1.35__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {speedy_utils-1.1.34 → speedy_utils-1.1.35}/PKG-INFO +1 -1
- speedy_utils-1.1.35/debug/test_decode_api.py +19 -0
- speedy_utils-1.1.35/debug/test_endpoints.py +27 -0
- speedy_utils-1.1.35/docs/TOKENIZATION.md +149 -0
- speedy_utils-1.1.35/docs/TOKENIZATION_IMPLEMENTATION.md +104 -0
- speedy_utils-1.1.35/examples/tokenization_example.py +70 -0
- {speedy_utils-1.1.34 → speedy_utils-1.1.35}/pyproject.toml +1 -1
- {speedy_utils-1.1.34 → speedy_utils-1.1.35}/src/llm_utils/lm/__init__.py +17 -15
- {speedy_utils-1.1.34 → speedy_utils-1.1.35}/src/llm_utils/lm/llm.py +2 -0
- {speedy_utils-1.1.34 → speedy_utils-1.1.35}/src/llm_utils/lm/mixins.py +74 -0
- speedy_utils-1.1.35/tests/test_tokenization.py +88 -0
- {speedy_utils-1.1.34 → speedy_utils-1.1.35}/.github/copilot-instructions.md +0 -0
- {speedy_utils-1.1.34 → speedy_utils-1.1.35}/.github/skills/caching-utilities/SKILL.md +0 -0
- {speedy_utils-1.1.34 → speedy_utils-1.1.35}/.github/skills/caching-utilities/examples/caching_example.py +0 -0
- {speedy_utils-1.1.34 → speedy_utils-1.1.35}/.github/skills/io-utilities/SKILL.md +0 -0
- {speedy_utils-1.1.34 → speedy_utils-1.1.35}/.github/skills/io-utilities/examples/io_example.py +0 -0
- {speedy_utils-1.1.34 → speedy_utils-1.1.35}/.github/skills/llm-integration/SKILL.md +0 -0
- {speedy_utils-1.1.34 → speedy_utils-1.1.35}/.github/skills/llm-integration/examples/llm_example.py +0 -0
- {speedy_utils-1.1.34 → speedy_utils-1.1.35}/.github/skills/multi-threading-processing/SKILL.md +0 -0
- {speedy_utils-1.1.34 → speedy_utils-1.1.35}/.github/skills/ray-distributed-computing/SKILL.md +0 -0
- {speedy_utils-1.1.34 → speedy_utils-1.1.35}/.github/skills/skill-creation/SKILL.md +0 -0
- {speedy_utils-1.1.34 → speedy_utils-1.1.35}/.github/skills/vision-utilities/SKILL.md +0 -0
- {speedy_utils-1.1.34 → speedy_utils-1.1.35}/.github/skills/vision-utilities/examples/vision_example.py +0 -0
- {speedy_utils-1.1.34 → speedy_utils-1.1.35}/.github/workflows/publish.yml +0 -0
- {speedy_utils-1.1.34 → speedy_utils-1.1.35}/.gitignore +0 -0
- {speedy_utils-1.1.34 → speedy_utils-1.1.35}/.pre-commit-config.yaml +0 -0
- {speedy_utils-1.1.34 → speedy_utils-1.1.35}/README.md +0 -0
- {speedy_utils-1.1.34 → speedy_utils-1.1.35}/bumpversion.sh +0 -0
- {speedy_utils-1.1.34 → speedy_utils-1.1.35}/docs/IMPLEMENTATION.md +0 -0
- {speedy_utils-1.1.34 → speedy_utils-1.1.35}/docs/QUICKSTART.md +0 -0
- {speedy_utils-1.1.34 → speedy_utils-1.1.35}/docs/zero_copy_sharing.md +0 -0
- {speedy_utils-1.1.34 → speedy_utils-1.1.35}/examples/pytorch_large_model.py +0 -0
- {speedy_utils-1.1.34 → speedy_utils-1.1.35}/examples/shared_kwargs_example.py +0 -0
- {speedy_utils-1.1.34 → speedy_utils-1.1.35}/examples/temperature_range_example.py +0 -0
- {speedy_utils-1.1.34 → speedy_utils-1.1.35}/examples/test_share_ray.py +0 -0
- {speedy_utils-1.1.34 → speedy_utils-1.1.35}/examples/vision_utils_example.py +0 -0
- {speedy_utils-1.1.34 → speedy_utils-1.1.35}/experiments/exp1/dockerfile +0 -0
- {speedy_utils-1.1.34 → speedy_utils-1.1.35}/experiments/exp1/run_in_docker.sh +0 -0
- {speedy_utils-1.1.34 → speedy_utils-1.1.35}/experiments/exp1/test.png +0 -0
- {speedy_utils-1.1.34 → speedy_utils-1.1.35}/experiments/test_read_image.py +0 -0
- {speedy_utils-1.1.34 → speedy_utils-1.1.35}/notebooks/README.ipynb +0 -0
- {speedy_utils-1.1.34 → speedy_utils-1.1.35}/notebooks/llm_utils/llm_as_a_judge.ipynb +0 -0
- {speedy_utils-1.1.34 → speedy_utils-1.1.35}/notebooks/ray_tutorial.ipynb +0 -0
- {speedy_utils-1.1.34 → speedy_utils-1.1.35}/notebooks/test_multi_thread.ipynb +0 -0
- {speedy_utils-1.1.34 → speedy_utils-1.1.35}/ruff.toml +0 -0
- {speedy_utils-1.1.34 → speedy_utils-1.1.35}/scripts/debug_import_time.py +0 -0
- {speedy_utils-1.1.34 → speedy_utils-1.1.35}/scripts/deploy.sh +0 -0
- {speedy_utils-1.1.34 → speedy_utils-1.1.35}/scripts/imports.sh +0 -0
- {speedy_utils-1.1.34 → speedy_utils-1.1.35}/scripts/test_import_time_vision.py +0 -0
- {speedy_utils-1.1.34 → speedy_utils-1.1.35}/setup.cfg +0 -0
- {speedy_utils-1.1.34 → speedy_utils-1.1.35}/src/datasets/convert_to_arrow.py +0 -0
- {speedy_utils-1.1.34 → speedy_utils-1.1.35}/src/llm_utils/__init__.py +0 -0
- {speedy_utils-1.1.34 → speedy_utils-1.1.35}/src/llm_utils/chat_format/__init__.py +0 -0
- {speedy_utils-1.1.34 → speedy_utils-1.1.35}/src/llm_utils/chat_format/display.py +0 -0
- {speedy_utils-1.1.34 → speedy_utils-1.1.35}/src/llm_utils/chat_format/transform.py +0 -0
- {speedy_utils-1.1.34 → speedy_utils-1.1.35}/src/llm_utils/chat_format/utils.py +0 -0
- {speedy_utils-1.1.34 → speedy_utils-1.1.35}/src/llm_utils/group_messages.py +0 -0
- {speedy_utils-1.1.34 → speedy_utils-1.1.35}/src/llm_utils/lm/async_lm/__init__.py +0 -0
- {speedy_utils-1.1.34 → speedy_utils-1.1.35}/src/llm_utils/lm/async_lm/_utils.py +0 -0
- {speedy_utils-1.1.34 → speedy_utils-1.1.35}/src/llm_utils/lm/async_lm/async_llm_task.py +0 -0
- {speedy_utils-1.1.34 → speedy_utils-1.1.35}/src/llm_utils/lm/async_lm/async_lm.py +0 -0
- {speedy_utils-1.1.34 → speedy_utils-1.1.35}/src/llm_utils/lm/async_lm/async_lm_base.py +0 -0
- {speedy_utils-1.1.34 → speedy_utils-1.1.35}/src/llm_utils/lm/async_lm/lm_specific.py +0 -0
- {speedy_utils-1.1.34 → speedy_utils-1.1.35}/src/llm_utils/lm/base_prompt_builder.py +0 -0
- {speedy_utils-1.1.34 → speedy_utils-1.1.35}/src/llm_utils/lm/llm_signature.py +0 -0
- {speedy_utils-1.1.34 → speedy_utils-1.1.35}/src/llm_utils/lm/lm_base.py +0 -0
- {speedy_utils-1.1.34 → speedy_utils-1.1.35}/src/llm_utils/lm/openai_memoize.py +0 -0
- {speedy_utils-1.1.34 → speedy_utils-1.1.35}/src/llm_utils/lm/signature.py +0 -0
- {speedy_utils-1.1.34 → speedy_utils-1.1.35}/src/llm_utils/lm/utils.py +0 -0
- {speedy_utils-1.1.34 → speedy_utils-1.1.35}/src/llm_utils/scripts/README.md +0 -0
- {speedy_utils-1.1.34 → speedy_utils-1.1.35}/src/llm_utils/scripts/vllm_load_balancer.py +0 -0
- {speedy_utils-1.1.34 → speedy_utils-1.1.35}/src/llm_utils/scripts/vllm_serve.py +0 -0
- {speedy_utils-1.1.34 → speedy_utils-1.1.35}/src/llm_utils/vector_cache/__init__.py +0 -0
- {speedy_utils-1.1.34 → speedy_utils-1.1.35}/src/llm_utils/vector_cache/cli.py +0 -0
- {speedy_utils-1.1.34 → speedy_utils-1.1.35}/src/llm_utils/vector_cache/core.py +0 -0
- {speedy_utils-1.1.34 → speedy_utils-1.1.35}/src/llm_utils/vector_cache/types.py +0 -0
- {speedy_utils-1.1.34 → speedy_utils-1.1.35}/src/llm_utils/vector_cache/utils.py +0 -0
- {speedy_utils-1.1.34 → speedy_utils-1.1.35}/src/speedy_utils/__imports.py +0 -0
- {speedy_utils-1.1.34 → speedy_utils-1.1.35}/src/speedy_utils/__init__.py +0 -0
- {speedy_utils-1.1.34 → speedy_utils-1.1.35}/src/speedy_utils/common/__init__.py +0 -0
- {speedy_utils-1.1.34 → speedy_utils-1.1.35}/src/speedy_utils/common/clock.py +0 -0
- {speedy_utils-1.1.34 → speedy_utils-1.1.35}/src/speedy_utils/common/function_decorator.py +0 -0
- {speedy_utils-1.1.34 → speedy_utils-1.1.35}/src/speedy_utils/common/logger.py +0 -0
- {speedy_utils-1.1.34 → speedy_utils-1.1.35}/src/speedy_utils/common/notebook_utils.py +0 -0
- {speedy_utils-1.1.34 → speedy_utils-1.1.35}/src/speedy_utils/common/patcher.py +0 -0
- {speedy_utils-1.1.34 → speedy_utils-1.1.35}/src/speedy_utils/common/report_manager.py +0 -0
- {speedy_utils-1.1.34 → speedy_utils-1.1.35}/src/speedy_utils/common/utils_cache.py +0 -0
- {speedy_utils-1.1.34 → speedy_utils-1.1.35}/src/speedy_utils/common/utils_io.py +0 -0
- {speedy_utils-1.1.34 → speedy_utils-1.1.35}/src/speedy_utils/common/utils_misc.py +0 -0
- {speedy_utils-1.1.34 → speedy_utils-1.1.35}/src/speedy_utils/common/utils_print.py +0 -0
- {speedy_utils-1.1.34 → speedy_utils-1.1.35}/src/speedy_utils/multi_worker/__init__.py +0 -0
- {speedy_utils-1.1.34 → speedy_utils-1.1.35}/src/speedy_utils/multi_worker/process.py +0 -0
- {speedy_utils-1.1.34 → speedy_utils-1.1.35}/src/speedy_utils/multi_worker/thread.py +0 -0
- {speedy_utils-1.1.34 → speedy_utils-1.1.35}/src/speedy_utils/scripts/__init__.py +0 -0
- {speedy_utils-1.1.34 → speedy_utils-1.1.35}/src/speedy_utils/scripts/mpython.py +0 -0
- {speedy_utils-1.1.34 → speedy_utils-1.1.35}/src/speedy_utils/scripts/openapi_client_codegen.py +0 -0
- {speedy_utils-1.1.34 → speedy_utils-1.1.35}/src/vision_utils/README.md +0 -0
- {speedy_utils-1.1.34 → speedy_utils-1.1.35}/src/vision_utils/__init__.py +0 -0
- {speedy_utils-1.1.34 → speedy_utils-1.1.35}/src/vision_utils/io_utils.py +0 -0
- {speedy_utils-1.1.34 → speedy_utils-1.1.35}/src/vision_utils/plot.py +0 -0
- {speedy_utils-1.1.34 → speedy_utils-1.1.35}/tests/import_all.py +0 -0
- {speedy_utils-1.1.34 → speedy_utils-1.1.35}/tests/import_time_report.py +0 -0
- {speedy_utils-1.1.34 → speedy_utils-1.1.35}/tests/integration_test.py +0 -0
- {speedy_utils-1.1.34 → speedy_utils-1.1.35}/tests/llm_utils/test_llm_mixins.py +0 -0
- {speedy_utils-1.1.34 → speedy_utils-1.1.35}/tests/sample_objects.py +0 -0
- {speedy_utils-1.1.34 → speedy_utils-1.1.35}/tests/test.py +0 -0
- {speedy_utils-1.1.34 → speedy_utils-1.1.35}/tests/test_logger.py +0 -0
- {speedy_utils-1.1.34 → speedy_utils-1.1.35}/tests/test_logger_format.py +0 -0
- {speedy_utils-1.1.34 → speedy_utils-1.1.35}/tests/test_memoize_typing.py +0 -0
- {speedy_utils-1.1.34 → speedy_utils-1.1.35}/tests/test_mpython.py +0 -0
- {speedy_utils-1.1.34 → speedy_utils-1.1.35}/tests/test_multithread_error_trace.py +0 -0
- {speedy_utils-1.1.34 → speedy_utils-1.1.35}/tests/test_process.py +0 -0
- {speedy_utils-1.1.34 → speedy_utils-1.1.35}/tests/test_process_update.py +0 -0
- {speedy_utils-1.1.34 → speedy_utils-1.1.35}/tests/test_pytorch_sharing.py +0 -0
- {speedy_utils-1.1.34 → speedy_utils-1.1.35}/tests/test_shared_kwargs.py +0 -0
- {speedy_utils-1.1.34 → speedy_utils-1.1.35}/tests/test_thread.py +0 -0
- {speedy_utils-1.1.34 → speedy_utils-1.1.35}/uv.lock +0 -0
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
from llm_utils.lm import LLM
|
|
2
|
+
|
|
3
|
+
lm = LLM(client=8000)
|
|
4
|
+
|
|
5
|
+
# Encode text to tokens
|
|
6
|
+
token_ids = lm.encode('Hello, world!')
|
|
7
|
+
print(f'Token IDs: {token_ids}')
|
|
8
|
+
|
|
9
|
+
# Decode tokens back to text
|
|
10
|
+
text = lm.decode(token_ids)
|
|
11
|
+
print(f'Decoded text: {text}')
|
|
12
|
+
|
|
13
|
+
# Get token strings for debugging
|
|
14
|
+
ids, strs = lm.encode('Hello', return_token_strs=True)
|
|
15
|
+
print(f'IDs: {ids}')
|
|
16
|
+
print(f'Strings: {strs}')
|
|
17
|
+
print(f'Tokens with strings:')
|
|
18
|
+
for i, s in zip(ids, strs):
|
|
19
|
+
print(f' {i:6d} -> "{s}"')
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
from llm_utils.lm import LLM
|
|
2
|
+
|
|
3
|
+
lm = LLM(client=8000)
|
|
4
|
+
|
|
5
|
+
# Debug: Check the base_url
|
|
6
|
+
print(f"Client base_url: {lm.client.base_url}")
|
|
7
|
+
|
|
8
|
+
# Try to manually check what endpoints are available
|
|
9
|
+
import requests
|
|
10
|
+
|
|
11
|
+
# Try different endpoint paths
|
|
12
|
+
test_urls = [
|
|
13
|
+
"http://localhost:8000/tokenize",
|
|
14
|
+
"http://localhost:8000/v1/tokenize",
|
|
15
|
+
]
|
|
16
|
+
|
|
17
|
+
for url in test_urls:
|
|
18
|
+
try:
|
|
19
|
+
response = requests.post(
|
|
20
|
+
url,
|
|
21
|
+
json={"prompt": "test", "add_special_tokens": True}
|
|
22
|
+
)
|
|
23
|
+
print(f"✓ {url} - Status: {response.status_code}")
|
|
24
|
+
if response.status_code == 200:
|
|
25
|
+
print(f" Response: {response.json()}")
|
|
26
|
+
except Exception as e:
|
|
27
|
+
print(f"✗ {url} - Error: {e}")
|
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
# Tokenization Support in LLM
|
|
2
|
+
|
|
3
|
+
The `LLM` class now includes built-in tokenization support through the `TokenizationMixin`, providing `encode` and `decode` methods to work with token IDs.
|
|
4
|
+
|
|
5
|
+
## Features
|
|
6
|
+
|
|
7
|
+
- **encode()**: Convert text to token IDs
|
|
8
|
+
- **decode()**: Convert token IDs back to text
|
|
9
|
+
- Support for special tokens (BOS, EOS, etc.)
|
|
10
|
+
- Optional token string output for debugging
|
|
11
|
+
|
|
12
|
+
## API Reference
|
|
13
|
+
|
|
14
|
+
### encode()
|
|
15
|
+
|
|
16
|
+
```python
|
|
17
|
+
def encode(
|
|
18
|
+
self,
|
|
19
|
+
text: str,
|
|
20
|
+
*,
|
|
21
|
+
add_special_tokens: bool = True,
|
|
22
|
+
return_token_strs: bool = False,
|
|
23
|
+
) -> list[int] | tuple[list[int], list[str]]
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
**Parameters:**
|
|
27
|
+
- `text` (str): Text to tokenize
|
|
28
|
+
- `add_special_tokens` (bool): Whether to add special tokens like BOS/EOS (default: True)
|
|
29
|
+
- `return_token_strs` (bool): If True, also return token strings (default: False)
|
|
30
|
+
|
|
31
|
+
**Returns:**
|
|
32
|
+
- `list[int]`: Token IDs (if `return_token_strs=False`)
|
|
33
|
+
- `tuple[list[int], list[str]]`: Token IDs and token strings (if `return_token_strs=True`)
|
|
34
|
+
|
|
35
|
+
### decode()
|
|
36
|
+
|
|
37
|
+
```python
|
|
38
|
+
def decode(
|
|
39
|
+
self,
|
|
40
|
+
token_ids: list[int],
|
|
41
|
+
) -> str
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
**Parameters:**
|
|
45
|
+
- `token_ids` (list[int]): List of token IDs to decode
|
|
46
|
+
|
|
47
|
+
**Returns:**
|
|
48
|
+
- `str`: Decoded text
|
|
49
|
+
|
|
50
|
+
## Usage Examples
|
|
51
|
+
|
|
52
|
+
### Basic Encoding/Decoding
|
|
53
|
+
|
|
54
|
+
```python
|
|
55
|
+
from llm_utils.lm import LLM
|
|
56
|
+
|
|
57
|
+
lm = LLM(base_url='http://localhost:8000/v1')
|
|
58
|
+
|
|
59
|
+
# Encode text to token IDs
|
|
60
|
+
text = 'Hello, world!'
|
|
61
|
+
token_ids = lm.encode(text)
|
|
62
|
+
print(token_ids) # [123, 456, 789, ...]
|
|
63
|
+
|
|
64
|
+
# Decode token IDs back to text
|
|
65
|
+
decoded = lm.decode(token_ids)
|
|
66
|
+
print(decoded) # 'Hello, world!'
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
### Getting Token Strings
|
|
70
|
+
|
|
71
|
+
Useful for debugging and understanding tokenization:
|
|
72
|
+
|
|
73
|
+
```python
|
|
74
|
+
# Get both token IDs and their string representations
|
|
75
|
+
token_ids, token_strs = lm.encode('Hello world', return_token_strs=True)
|
|
76
|
+
|
|
77
|
+
for tid, tstr in zip(token_ids, token_strs):
|
|
78
|
+
print(f'{tid:6d} -> "{tstr}"')
|
|
79
|
+
# Output:
|
|
80
|
+
# 123 -> "Hello"
|
|
81
|
+
# 456 -> " world"
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
### Counting Tokens
|
|
85
|
+
|
|
86
|
+
Count tokens before making API calls to manage context windows:
|
|
87
|
+
|
|
88
|
+
```python
|
|
89
|
+
text = 'A very long document...'
|
|
90
|
+
token_count = len(lm.encode(text))
|
|
91
|
+
print(f'This text uses {token_count} tokens')
|
|
92
|
+
|
|
93
|
+
# Check if it fits in model's context window
|
|
94
|
+
MAX_TOKENS = 4096
|
|
95
|
+
if token_count > MAX_TOKENS:
|
|
96
|
+
print('Text is too long!')
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
### Working Without Special Tokens
|
|
100
|
+
|
|
101
|
+
```python
|
|
102
|
+
# Without special tokens (useful for token manipulation)
|
|
103
|
+
tokens_clean = lm.encode('Hello', add_special_tokens=False)
|
|
104
|
+
|
|
105
|
+
# With special tokens (default)
|
|
106
|
+
tokens_with_special = lm.encode('Hello', add_special_tokens=True)
|
|
107
|
+
|
|
108
|
+
print(f'Clean: {len(tokens_clean)} tokens')
|
|
109
|
+
print(f'With special: {len(tokens_with_special)} tokens')
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
### Token-Level Text Manipulation
|
|
113
|
+
|
|
114
|
+
```python
|
|
115
|
+
# Combine texts at token level
|
|
116
|
+
sent1_tokens = lm.encode('Hello', add_special_tokens=False)
|
|
117
|
+
sent2_tokens = lm.encode('world', add_special_tokens=False)
|
|
118
|
+
|
|
119
|
+
# Manually combine
|
|
120
|
+
combined = sent1_tokens + sent2_tokens
|
|
121
|
+
result = lm.decode(combined)
|
|
122
|
+
print(result) # 'Helloworld'
|
|
123
|
+
```
|
|
124
|
+
|
|
125
|
+
## Requirements
|
|
126
|
+
|
|
127
|
+
The tokenization functionality requires a VLLM server (or compatible API) that implements:
|
|
128
|
+
- `/tokenize` endpoint (accepts `TokenizeCompletionRequest`)
|
|
129
|
+
- `/detokenize` endpoint (accepts `DetokenizeRequest`)
|
|
130
|
+
|
|
131
|
+
## Implementation Details
|
|
132
|
+
|
|
133
|
+
The `TokenizationMixin` is automatically included in the `LLM` class. It uses the model's base URL to make HTTP requests to the tokenization endpoints.
|
|
134
|
+
|
|
135
|
+
The mixin can be used standalone if needed:
|
|
136
|
+
|
|
137
|
+
```python
|
|
138
|
+
from llm_utils.lm.mixins import TokenizationMixin
|
|
139
|
+
|
|
140
|
+
class MyCustomLM(TokenizationMixin):
|
|
141
|
+
def __init__(self, base_url):
|
|
142
|
+
self.client = MOpenAI(base_url=base_url, api_key='abc')
|
|
143
|
+
```
|
|
144
|
+
|
|
145
|
+
## See Also
|
|
146
|
+
|
|
147
|
+
- Example script: `examples/tokenization_example.py`
|
|
148
|
+
- Tests: `tests/test_tokenization.py`
|
|
149
|
+
- API specification: See OpenAPI schema for endpoint details
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
# Tokenization Feature Implementation Summary
|
|
2
|
+
|
|
3
|
+
## What Was Added
|
|
4
|
+
|
|
5
|
+
Added tokenization support (encode/decode methods) to the LLM class for converting between text and token IDs.
|
|
6
|
+
|
|
7
|
+
## Changes Made
|
|
8
|
+
|
|
9
|
+
### 1. New Mixin: `TokenizationMixin`
|
|
10
|
+
**File:** `src/llm_utils/lm/mixins.py`
|
|
11
|
+
|
|
12
|
+
Added a new mixin class with two methods:
|
|
13
|
+
- `encode(text, add_special_tokens=True, return_token_strs=False)` - Convert text to token IDs
|
|
14
|
+
- `decode(token_ids)` - Convert token IDs back to text
|
|
15
|
+
|
|
16
|
+
### 2. Updated LLM Class
|
|
17
|
+
**File:** `src/llm_utils/lm/llm.py`
|
|
18
|
+
|
|
19
|
+
- Added `TokenizationMixin` to the LLM class inheritance
|
|
20
|
+
- Imported the new mixin
|
|
21
|
+
|
|
22
|
+
### 3. Updated Exports
|
|
23
|
+
**File:** `src/llm_utils/lm/__init__.py`
|
|
24
|
+
|
|
25
|
+
- Added `TokenizationMixin` to imports and `__all__`
|
|
26
|
+
|
|
27
|
+
### 4. Documentation
|
|
28
|
+
**File:** `docs/TOKENIZATION.md`
|
|
29
|
+
|
|
30
|
+
- Comprehensive documentation with API reference
|
|
31
|
+
- Usage examples for common scenarios
|
|
32
|
+
- Implementation details
|
|
33
|
+
|
|
34
|
+
### 5. Example Script
|
|
35
|
+
**File:** `examples/tokenization_example.py`
|
|
36
|
+
|
|
37
|
+
- Practical examples demonstrating all features
|
|
38
|
+
- Shows token counting, manipulation, debugging
|
|
39
|
+
|
|
40
|
+
### 6. Tests
|
|
41
|
+
**File:** `tests/test_tokenization.py`
|
|
42
|
+
|
|
43
|
+
- Unit tests for encode/decode functionality
|
|
44
|
+
- Tests for special tokens handling
|
|
45
|
+
- Manual test runner included
|
|
46
|
+
|
|
47
|
+
## API Endpoints Used
|
|
48
|
+
|
|
49
|
+
Based on the provided OpenAPI specification, the implementation uses:
|
|
50
|
+
|
|
51
|
+
1. **POST /tokenize** - Tokenizes text input
|
|
52
|
+
- Accepts: `TokenizeCompletionRequest` with `prompt`, `add_special_tokens`, `return_token_strs`
|
|
53
|
+
- Returns: `tokens` (list of ints) and optionally `token_strs`
|
|
54
|
+
|
|
55
|
+
2. **POST /detokenize** - Converts token IDs back to text
|
|
56
|
+
- Accepts: `DetokenizeRequest` with `tokens` (list of ints)
|
|
57
|
+
- Returns: `prompt` (string)
|
|
58
|
+
|
|
59
|
+
## Usage
|
|
60
|
+
|
|
61
|
+
```python
|
|
62
|
+
from llm_utils.lm import LLM
|
|
63
|
+
|
|
64
|
+
# Initialize
|
|
65
|
+
lm = LLM(base_url='http://localhost:8000/v1')
|
|
66
|
+
|
|
67
|
+
# Encode
|
|
68
|
+
token_ids = lm.encode('Hello, world!')
|
|
69
|
+
|
|
70
|
+
# Decode
|
|
71
|
+
text = lm.decode(token_ids)
|
|
72
|
+
|
|
73
|
+
# With token strings for debugging
|
|
74
|
+
token_ids, token_strs = lm.encode('Hello', return_token_strs=True)
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
## Testing
|
|
78
|
+
|
|
79
|
+
Run tests with:
|
|
80
|
+
```bash
|
|
81
|
+
# Using pytest
|
|
82
|
+
pytest tests/test_tokenization.py
|
|
83
|
+
|
|
84
|
+
# Manual test
|
|
85
|
+
python tests/test_tokenization.py
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
Run example:
|
|
89
|
+
```bash
|
|
90
|
+
python examples/tokenization_example.py
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
## Requirements
|
|
94
|
+
|
|
95
|
+
- VLLM server (or compatible) running with tokenization endpoints
|
|
96
|
+
- `requests` library (already a dependency)
|
|
97
|
+
|
|
98
|
+
## Benefits
|
|
99
|
+
|
|
100
|
+
1. **Token Counting**: Check token count before API calls to manage context windows
|
|
101
|
+
2. **Token-Level Manipulation**: Combine/split text at token boundaries
|
|
102
|
+
3. **Debugging**: Inspect exact tokenization with `return_token_strs=True`
|
|
103
|
+
4. **Consistency**: Use same tokenizer as the model server
|
|
104
|
+
5. **No Local Tokenizer**: No need to install transformers or download tokenizer locally
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
"""Example: Using LLM encode/decode methods for tokenization.
|
|
2
|
+
|
|
3
|
+
This example demonstrates how to use the tokenization functionality
|
|
4
|
+
in the LLM class to encode text to token IDs and decode token IDs
|
|
5
|
+
back to text.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from llm_utils.lm import LLM
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def main():
|
|
12
|
+
"""Demonstrate encode/decode functionality."""
|
|
13
|
+
# Initialize LLM with your VLLM server
|
|
14
|
+
lm = LLM(base_url='http://localhost:8000/v1')
|
|
15
|
+
|
|
16
|
+
# Example text
|
|
17
|
+
text = 'The quick brown fox jumps over the lazy dog.'
|
|
18
|
+
print(f'Original text: {text}\n')
|
|
19
|
+
|
|
20
|
+
# 1. Basic encoding
|
|
21
|
+
print('1. Basic encoding:')
|
|
22
|
+
token_ids = lm.encode(text)
|
|
23
|
+
print(f' Token IDs: {token_ids}')
|
|
24
|
+
print(f' Number of tokens: {len(token_ids)}\n')
|
|
25
|
+
|
|
26
|
+
# 2. Encoding with token strings
|
|
27
|
+
print('2. Encoding with token strings:')
|
|
28
|
+
token_ids, token_strs = lm.encode(text, return_token_strs=True)
|
|
29
|
+
for tid, tstr in zip(token_ids, token_strs):
|
|
30
|
+
print(f' {tid:6d} -> "{tstr}"')
|
|
31
|
+
print()
|
|
32
|
+
|
|
33
|
+
# 3. Encoding without special tokens
|
|
34
|
+
print('3. Comparing with/without special tokens:')
|
|
35
|
+
tokens_with = lm.encode(text, add_special_tokens=True)
|
|
36
|
+
tokens_without = lm.encode(text, add_special_tokens=False)
|
|
37
|
+
print(f' With special tokens: {len(tokens_with)} tokens')
|
|
38
|
+
print(f' Without special tokens: {len(tokens_without)} tokens\n')
|
|
39
|
+
|
|
40
|
+
# 4. Decoding
|
|
41
|
+
print('4. Decoding:')
|
|
42
|
+
decoded = lm.decode(token_ids)
|
|
43
|
+
print(f' Decoded text: {decoded}\n')
|
|
44
|
+
|
|
45
|
+
# 5. Practical use case: counting tokens before API call
|
|
46
|
+
print('5. Counting tokens for API calls:')
|
|
47
|
+
long_text = ' '.join(['This is a test sentence.'] * 10)
|
|
48
|
+
token_count = len(lm.encode(long_text))
|
|
49
|
+
print(f' Text length: {len(long_text)} characters')
|
|
50
|
+
print(f' Token count: {token_count} tokens')
|
|
51
|
+
print(f' Avg chars per token: {len(long_text) / token_count:.2f}\n')
|
|
52
|
+
|
|
53
|
+
# 6. Working with custom token sequences
|
|
54
|
+
print('6. Custom token manipulation:')
|
|
55
|
+
# Encode two sentences
|
|
56
|
+
sent1 = 'Hello world'
|
|
57
|
+
sent2 = 'How are you?'
|
|
58
|
+
tokens1 = lm.encode(sent1, add_special_tokens=False)
|
|
59
|
+
tokens2 = lm.encode(sent2, add_special_tokens=False)
|
|
60
|
+
|
|
61
|
+
# Combine tokens manually
|
|
62
|
+
combined_tokens = tokens1 + tokens2
|
|
63
|
+
combined_text = lm.decode(combined_tokens)
|
|
64
|
+
print(f' Sentence 1: {sent1}')
|
|
65
|
+
print(f' Sentence 2: {sent2}')
|
|
66
|
+
print(f' Combined (token-level): {combined_text}')
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
if __name__ == '__main__':
|
|
70
|
+
main()
|
|
@@ -7,6 +7,7 @@ from .lm_base import LMBase, get_model_name
|
|
|
7
7
|
from .mixins import (
|
|
8
8
|
ModelUtilsMixin,
|
|
9
9
|
TemperatureRangeMixin,
|
|
10
|
+
TokenizationMixin,
|
|
10
11
|
TwoStepPydanticMixin,
|
|
11
12
|
VLLMMixin,
|
|
12
13
|
)
|
|
@@ -14,19 +15,20 @@ from .signature import Input, InputField, Output, OutputField, Signature
|
|
|
14
15
|
|
|
15
16
|
|
|
16
17
|
__all__ = [
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
18
|
+
'LMBase',
|
|
19
|
+
'LLM',
|
|
20
|
+
'AsyncLM',
|
|
21
|
+
'AsyncLLMTask',
|
|
22
|
+
'BasePromptBuilder',
|
|
23
|
+
'LLMSignature',
|
|
24
|
+
'Signature',
|
|
25
|
+
'InputField',
|
|
26
|
+
'OutputField',
|
|
27
|
+
'Input',
|
|
28
|
+
'Output',
|
|
29
|
+
'TemperatureRangeMixin',
|
|
30
|
+
'TwoStepPydanticMixin',
|
|
31
|
+
'VLLMMixin',
|
|
32
|
+
'ModelUtilsMixin',
|
|
33
|
+
'TokenizationMixin',
|
|
32
34
|
]
|
|
@@ -20,6 +20,7 @@ from .base_prompt_builder import BasePromptBuilder
|
|
|
20
20
|
from .mixins import (
|
|
21
21
|
ModelUtilsMixin,
|
|
22
22
|
TemperatureRangeMixin,
|
|
23
|
+
TokenizationMixin,
|
|
23
24
|
TwoStepPydanticMixin,
|
|
24
25
|
VLLMMixin,
|
|
25
26
|
)
|
|
@@ -47,6 +48,7 @@ class LLM(
|
|
|
47
48
|
TwoStepPydanticMixin,
|
|
48
49
|
VLLMMixin,
|
|
49
50
|
ModelUtilsMixin,
|
|
51
|
+
TokenizationMixin,
|
|
50
52
|
):
|
|
51
53
|
"""LLM task with structured input/output handling."""
|
|
52
54
|
|
|
@@ -396,6 +396,80 @@ class VLLMMixin:
|
|
|
396
396
|
return _kill_vllm_on_port(port)
|
|
397
397
|
|
|
398
398
|
|
|
399
|
+
class TokenizationMixin:
|
|
400
|
+
"""Mixin for tokenization operations (encode/decode)."""
|
|
401
|
+
|
|
402
|
+
def encode(
|
|
403
|
+
self,
|
|
404
|
+
text: str,
|
|
405
|
+
*,
|
|
406
|
+
add_special_tokens: bool = True,
|
|
407
|
+
return_token_strs: bool = False,
|
|
408
|
+
) -> list[int] | tuple[list[int], list[str]]:
|
|
409
|
+
"""
|
|
410
|
+
Encode text to token IDs using the model's tokenizer.
|
|
411
|
+
|
|
412
|
+
Args:
|
|
413
|
+
text: Text to tokenize
|
|
414
|
+
add_special_tokens: Whether to add special tokens (e.g., BOS)
|
|
415
|
+
return_token_strs: If True, also return token strings
|
|
416
|
+
|
|
417
|
+
Returns:
|
|
418
|
+
List of token IDs, or tuple of (token IDs, token strings)
|
|
419
|
+
"""
|
|
420
|
+
import requests
|
|
421
|
+
|
|
422
|
+
# Get base_url from client and remove /v1 suffix if present
|
|
423
|
+
# (tokenize endpoint is at root level, not under /v1)
|
|
424
|
+
base_url = str(self.client.base_url).rstrip('/')
|
|
425
|
+
if base_url.endswith('/v1'):
|
|
426
|
+
base_url = base_url[:-3] # Remove '/v1'
|
|
427
|
+
|
|
428
|
+
response = requests.post(
|
|
429
|
+
f'{base_url}/tokenize',
|
|
430
|
+
json={
|
|
431
|
+
'prompt': text,
|
|
432
|
+
'add_special_tokens': add_special_tokens,
|
|
433
|
+
'return_token_strs': return_token_strs,
|
|
434
|
+
},
|
|
435
|
+
)
|
|
436
|
+
response.raise_for_status()
|
|
437
|
+
data = response.json()
|
|
438
|
+
|
|
439
|
+
if return_token_strs:
|
|
440
|
+
return data['tokens'], data.get('token_strs', [])
|
|
441
|
+
return data['tokens']
|
|
442
|
+
|
|
443
|
+
def decode(
|
|
444
|
+
self,
|
|
445
|
+
token_ids: list[int],
|
|
446
|
+
) -> str:
|
|
447
|
+
"""
|
|
448
|
+
Decode token IDs to text using the model's tokenizer.
|
|
449
|
+
|
|
450
|
+
Args:
|
|
451
|
+
token_ids: List of token IDs to decode
|
|
452
|
+
|
|
453
|
+
Returns:
|
|
454
|
+
Decoded text string
|
|
455
|
+
"""
|
|
456
|
+
import requests
|
|
457
|
+
|
|
458
|
+
# Get base_url from client and remove /v1 suffix if present
|
|
459
|
+
# (detokenize endpoint is at root level, not under /v1)
|
|
460
|
+
base_url = str(self.client.base_url).rstrip('/')
|
|
461
|
+
if base_url.endswith('/v1'):
|
|
462
|
+
base_url = base_url[:-3] # Remove '/v1'
|
|
463
|
+
|
|
464
|
+
response = requests.post(
|
|
465
|
+
f'{base_url}/detokenize',
|
|
466
|
+
json={'tokens': token_ids},
|
|
467
|
+
)
|
|
468
|
+
response.raise_for_status()
|
|
469
|
+
data = response.json()
|
|
470
|
+
return data['prompt']
|
|
471
|
+
|
|
472
|
+
|
|
399
473
|
class ModelUtilsMixin:
|
|
400
474
|
"""Mixin for model utility methods."""
|
|
401
475
|
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
"""Test tokenization functionality for LLM."""
|
|
2
|
+
|
|
3
|
+
import pytest
|
|
4
|
+
from llm_utils.lm import LLM
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def test_encode_decode():
|
|
8
|
+
"""Test basic encode/decode functionality."""
|
|
9
|
+
# Skip if no VLLM server is running
|
|
10
|
+
try:
|
|
11
|
+
lm = LLM(base_url='http://localhost:8000/v1')
|
|
12
|
+
|
|
13
|
+
# Test encode
|
|
14
|
+
text = 'Hello, world!'
|
|
15
|
+
token_ids = lm.encode(text)
|
|
16
|
+
assert isinstance(token_ids, list)
|
|
17
|
+
assert all(isinstance(t, int) for t in token_ids)
|
|
18
|
+
assert len(token_ids) > 0
|
|
19
|
+
|
|
20
|
+
# Test encode with token strings
|
|
21
|
+
token_ids_with_strs, token_strs = lm.encode(
|
|
22
|
+
text, return_token_strs=True
|
|
23
|
+
)
|
|
24
|
+
assert isinstance(token_ids_with_strs, list)
|
|
25
|
+
assert isinstance(token_strs, list)
|
|
26
|
+
assert len(token_ids_with_strs) == len(token_strs)
|
|
27
|
+
|
|
28
|
+
# Test decode
|
|
29
|
+
decoded = lm.decode(token_ids)
|
|
30
|
+
assert isinstance(decoded, str)
|
|
31
|
+
# Note: decoded text might have slight differences due to tokenizer
|
|
32
|
+
# behavior (e.g., special tokens), so we don't assert exact match
|
|
33
|
+
|
|
34
|
+
except Exception as e:
|
|
35
|
+
pytest.skip(f'VLLM server not available: {e}')
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def test_encode_with_special_tokens():
|
|
39
|
+
"""Test encode with and without special tokens."""
|
|
40
|
+
try:
|
|
41
|
+
lm = LLM(base_url='http://localhost:8000/v1')
|
|
42
|
+
|
|
43
|
+
text = 'Test text'
|
|
44
|
+
|
|
45
|
+
# With special tokens (default)
|
|
46
|
+
tokens_with = lm.encode(text, add_special_tokens=True)
|
|
47
|
+
|
|
48
|
+
# Without special tokens
|
|
49
|
+
tokens_without = lm.encode(text, add_special_tokens=False)
|
|
50
|
+
|
|
51
|
+
# Typically tokens_with should have more tokens (BOS, EOS, etc.)
|
|
52
|
+
# but this depends on the model
|
|
53
|
+
assert isinstance(tokens_with, list)
|
|
54
|
+
assert isinstance(tokens_without, list)
|
|
55
|
+
|
|
56
|
+
except Exception as e:
|
|
57
|
+
pytest.skip(f'VLLM server not available: {e}')
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
if __name__ == '__main__':
|
|
61
|
+
# Simple manual test
|
|
62
|
+
print('Testing tokenization...')
|
|
63
|
+
try:
|
|
64
|
+
lm = LLM(base_url='http://localhost:8000/v1')
|
|
65
|
+
|
|
66
|
+
text = 'Hello, how are you?'
|
|
67
|
+
print(f'Original text: {text}')
|
|
68
|
+
|
|
69
|
+
# Encode
|
|
70
|
+
token_ids = lm.encode(text)
|
|
71
|
+
print(f'Token IDs: {token_ids}')
|
|
72
|
+
|
|
73
|
+
# Encode with token strings
|
|
74
|
+
token_ids_with_strs, token_strs = lm.encode(
|
|
75
|
+
text, return_token_strs=True
|
|
76
|
+
)
|
|
77
|
+
print(f'Tokens with strings: {list(zip(token_ids_with_strs, token_strs))}')
|
|
78
|
+
|
|
79
|
+
# Decode
|
|
80
|
+
decoded = lm.decode(token_ids)
|
|
81
|
+
print(f'Decoded text: {decoded}')
|
|
82
|
+
|
|
83
|
+
print('✓ All tests passed!')
|
|
84
|
+
|
|
85
|
+
except Exception as e:
|
|
86
|
+
print(f'✗ Error: {e}')
|
|
87
|
+
import traceback
|
|
88
|
+
traceback.print_exc()
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{speedy_utils-1.1.34 → speedy_utils-1.1.35}/.github/skills/io-utilities/examples/io_example.py
RENAMED
|
File without changes
|
|
File without changes
|
{speedy_utils-1.1.34 → speedy_utils-1.1.35}/.github/skills/llm-integration/examples/llm_example.py
RENAMED
|
File without changes
|
{speedy_utils-1.1.34 → speedy_utils-1.1.35}/.github/skills/multi-threading-processing/SKILL.md
RENAMED
|
File without changes
|
{speedy_utils-1.1.34 → speedy_utils-1.1.35}/.github/skills/ray-distributed-computing/SKILL.md
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{speedy_utils-1.1.34 → speedy_utils-1.1.35}/src/speedy_utils/scripts/openapi_client_codegen.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|