speedy-utils 1.1.27__py3-none-any.whl → 1.1.29__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. llm_utils/__init__.py +16 -4
  2. llm_utils/chat_format/__init__.py +10 -10
  3. llm_utils/chat_format/display.py +33 -21
  4. llm_utils/chat_format/transform.py +17 -19
  5. llm_utils/chat_format/utils.py +6 -4
  6. llm_utils/group_messages.py +17 -14
  7. llm_utils/lm/__init__.py +6 -5
  8. llm_utils/lm/async_lm/__init__.py +1 -0
  9. llm_utils/lm/async_lm/_utils.py +10 -9
  10. llm_utils/lm/async_lm/async_llm_task.py +141 -137
  11. llm_utils/lm/async_lm/async_lm.py +48 -42
  12. llm_utils/lm/async_lm/async_lm_base.py +59 -60
  13. llm_utils/lm/async_lm/lm_specific.py +4 -3
  14. llm_utils/lm/base_prompt_builder.py +93 -70
  15. llm_utils/lm/llm.py +126 -108
  16. llm_utils/lm/llm_signature.py +4 -2
  17. llm_utils/lm/lm_base.py +72 -73
  18. llm_utils/lm/mixins.py +102 -62
  19. llm_utils/lm/openai_memoize.py +124 -87
  20. llm_utils/lm/signature.py +105 -92
  21. llm_utils/lm/utils.py +42 -23
  22. llm_utils/scripts/vllm_load_balancer.py +23 -30
  23. llm_utils/scripts/vllm_serve.py +8 -7
  24. llm_utils/vector_cache/__init__.py +9 -3
  25. llm_utils/vector_cache/cli.py +1 -1
  26. llm_utils/vector_cache/core.py +59 -63
  27. llm_utils/vector_cache/types.py +7 -5
  28. llm_utils/vector_cache/utils.py +12 -8
  29. speedy_utils/__imports.py +244 -0
  30. speedy_utils/__init__.py +90 -194
  31. speedy_utils/all.py +125 -227
  32. speedy_utils/common/clock.py +37 -42
  33. speedy_utils/common/function_decorator.py +6 -12
  34. speedy_utils/common/logger.py +43 -52
  35. speedy_utils/common/notebook_utils.py +13 -21
  36. speedy_utils/common/patcher.py +21 -17
  37. speedy_utils/common/report_manager.py +42 -44
  38. speedy_utils/common/utils_cache.py +152 -169
  39. speedy_utils/common/utils_io.py +137 -103
  40. speedy_utils/common/utils_misc.py +15 -21
  41. speedy_utils/common/utils_print.py +22 -28
  42. speedy_utils/multi_worker/process.py +66 -79
  43. speedy_utils/multi_worker/thread.py +78 -155
  44. speedy_utils/scripts/mpython.py +38 -36
  45. speedy_utils/scripts/openapi_client_codegen.py +10 -10
  46. {speedy_utils-1.1.27.dist-info → speedy_utils-1.1.29.dist-info}/METADATA +1 -1
  47. speedy_utils-1.1.29.dist-info/RECORD +57 -0
  48. vision_utils/README.md +202 -0
  49. vision_utils/__init__.py +4 -0
  50. vision_utils/io_utils.py +735 -0
  51. vision_utils/plot.py +345 -0
  52. speedy_utils-1.1.27.dist-info/RECORD +0 -52
  53. {speedy_utils-1.1.27.dist-info → speedy_utils-1.1.29.dist-info}/WHEEL +0 -0
  54. {speedy_utils-1.1.27.dist-info → speedy_utils-1.1.29.dist-info}/entry_points.txt +0 -0
@@ -3,17 +3,19 @@
3
3
  import os
4
4
  from typing import Optional
5
5
 
6
+
6
7
  def get_default_cache_path() -> str:
7
8
  """Get the default cache path based on environment."""
8
9
  cache_dir = os.getenv("EMBED_CACHE_DIR", ".")
9
10
  return os.path.join(cache_dir, "embed_cache.sqlite")
10
11
 
12
+
11
13
  def validate_model_name(model_name: str) -> bool:
12
14
  """Validate if a model name is supported."""
13
15
  # Check if it's a URL
14
16
  if model_name.startswith("http"):
15
17
  return True
16
-
18
+
17
19
  # Check if it's a valid model path/name
18
20
  supported_prefixes = [
19
21
  "Qwen/",
@@ -23,20 +25,22 @@ def validate_model_name(model_name: str) -> bool:
23
25
  "microsoft/",
24
26
  "nvidia/",
25
27
  ]
26
-
27
- return any(model_name.startswith(prefix) for prefix in supported_prefixes) or os.path.exists(model_name)
28
+
29
+ return any(
30
+ model_name.startswith(prefix) for prefix in supported_prefixes
31
+ ) or os.path.exists(model_name)
32
+
28
33
 
29
34
  def estimate_cache_size(num_texts: int, embedding_dim: int = 1024) -> str:
30
35
  """Estimate cache size for given number of texts."""
31
36
  # Rough estimate: hash (40 bytes) + text (avg 100 bytes) + embedding (embedding_dim * 4 bytes)
32
37
  bytes_per_entry = 40 + 100 + (embedding_dim * 4)
33
38
  total_bytes = num_texts * bytes_per_entry
34
-
39
+
35
40
  if total_bytes < 1024:
36
41
  return f"{total_bytes} bytes"
37
- elif total_bytes < 1024 * 1024:
42
+ if total_bytes < 1024 * 1024:
38
43
  return f"{total_bytes / 1024:.1f} KB"
39
- elif total_bytes < 1024 * 1024 * 1024:
44
+ if total_bytes < 1024 * 1024 * 1024:
40
45
  return f"{total_bytes / (1024 * 1024):.1f} MB"
41
- else:
42
- return f"{total_bytes / (1024 * 1024 * 1024):.1f} GB"
46
+ return f"{total_bytes / (1024 * 1024 * 1024):.1f} GB"
@@ -0,0 +1,244 @@
1
+ # type: ignore
2
+ from __future__ import annotations
3
+
4
+ import time
5
+ import warnings
6
+
7
+
8
+ # Suppress lazy_loader subpackage warning
9
+ warnings.filterwarnings(
10
+ 'ignore',
11
+ message='subpackages can technically be lazily loaded',
12
+ category=RuntimeWarning,
13
+ module='lazy_loader',
14
+ )
15
+
16
+ t = time.time()
17
+ # Third-party imports
18
+ try:
19
+ # Python 3.10+
20
+ from typing import ParamSpec
21
+ except ImportError: # pragma: no cover
22
+ from typing_extensions import ParamSpec # type: ignore
23
+
24
+
25
+ import asyncio
26
+ import contextlib
27
+ import copy
28
+ import ctypes
29
+ import datetime
30
+ import functools
31
+ import gc
32
+ import inspect
33
+ import io
34
+ import json
35
+ import multiprocessing
36
+ import os
37
+ import os.path as osp
38
+ import pathlib
39
+ import pickle
40
+ import pprint
41
+ import random
42
+ import re
43
+ import sys
44
+ import textwrap
45
+ import threading
46
+ import time
47
+ import traceback
48
+ import types
49
+ import uuid
50
+ import weakref
51
+ from collections import Counter, OrderedDict, defaultdict
52
+ from collections.abc import Awaitable, Callable, Iterable, Mapping, Sequence
53
+ from collections.abc import Callable as TypingCallable
54
+ from concurrent.futures import (
55
+ FIRST_COMPLETED,
56
+ Future,
57
+ ThreadPoolExecutor,
58
+ as_completed,
59
+ wait,
60
+ )
61
+ from datetime import datetime # noqa: F811
62
+ from glob import glob
63
+ from heapq import heappop, heappush
64
+ from itertools import islice
65
+ from multiprocessing import Pool
66
+ from pathlib import Path
67
+ from threading import Lock
68
+ from types import MappingProxyType
69
+ from typing import (
70
+ IO,
71
+ TYPE_CHECKING,
72
+ Annotated,
73
+ Any,
74
+ Dict,
75
+ Generic,
76
+ List,
77
+ Literal,
78
+ Optional,
79
+ Set,
80
+ Tuple,
81
+ Type,
82
+ TypeVar,
83
+ Union,
84
+ cast,
85
+ overload,
86
+ )
87
+
88
+ import cachetools
89
+ import lazy_loader as lazy
90
+ import psutil
91
+ from fastcore.parallel import parallel
92
+ from json_repair import loads as jloads
93
+ from loguru import logger
94
+ from tqdm import tqdm
95
+
96
+
97
+ # Resolve long-import-time dependencies lazily
98
+
99
+ torch = lazy.load('torch') # lazy at runtime
100
+ np = lazy.load('numpy')
101
+ pd = lazy.load('pandas')
102
+ tqdm = lazy.load('tqdm').tqdm # type: ignore # noqa: F811
103
+ pd = lazy.load('pandas')
104
+ tabulate = lazy.load('tabulate').tabulate
105
+ xxhash = lazy.load('xxhash')
106
+ get_ipython = lazy.load('IPython.core.getipython')
107
+ HTML = lazy.load('IPython.display').HTML
108
+ display = lazy.load('IPython.display').display
109
+ # logger = lazy.load('loguru').logger
110
+ BaseModel = lazy.load('pydantic').BaseModel
111
+ _pil = lazy.load('PIL.Image')
112
+ Image = _pil.Image
113
+ matplotlib = lazy.load('matplotlib')
114
+ plt = lazy.load('matplotlib.pyplot')
115
+
116
+
117
+ ray = lazy.load('ray') # lazy at runtime
118
+ if TYPE_CHECKING:
119
+ import numpy as np
120
+ import pandas as pd
121
+ import ray
122
+ import torch
123
+ import matplotlib.pyplot as plt
124
+ # xxhash
125
+ import xxhash # type: ignore
126
+ from IPython.core.getipython import get_ipython # type: ignore
127
+ from IPython.display import HTML, display # type: ignore
128
+ from loguru import logger # type: ignore
129
+ from PIL import Image
130
+ from pydantic import BaseModel # type: ignore
131
+ from tabulate import tabulate # type: ignore
132
+ from tqdm import tqdm # type: ignore
133
+
134
+ __all__ = [
135
+ # ------------------------------------------------------------------
136
+ # Lazy-loaded external modules / objects
137
+ # ------------------------------------------------------------------
138
+ 'torch',
139
+ 'np',
140
+ 'pd',
141
+ 'tqdm',
142
+ 'tabulate',
143
+ 'xxhash',
144
+ 'get_ipython',
145
+ 'HTML',
146
+ 'display',
147
+ 'BaseModel',
148
+ 'Image',
149
+ 'ray',
150
+ # ------------------------------------------------------------------
151
+ # Standard library modules imported
152
+ # ------------------------------------------------------------------
153
+ 'asyncio',
154
+ 'contextlib',
155
+ 'copy',
156
+ 'ctypes',
157
+ 'datetime',
158
+ 'functools',
159
+ 'gc',
160
+ 'inspect',
161
+ 'io',
162
+ 'json',
163
+ 'multiprocessing',
164
+ 'os',
165
+ 'osp',
166
+ 'pathlib',
167
+ 'pickle',
168
+ 'pprint',
169
+ 'random',
170
+ 're',
171
+ 'sys',
172
+ 'textwrap',
173
+ 'threading',
174
+ 'time',
175
+ 'traceback',
176
+ 'types',
177
+ 'uuid',
178
+ 'weakref',
179
+ 'warnings',
180
+ # ------------------------------------------------------------------
181
+ # Data structures
182
+ # ------------------------------------------------------------------
183
+ 'Counter',
184
+ 'OrderedDict',
185
+ 'defaultdict',
186
+ 'MappingProxyType',
187
+ # ------------------------------------------------------------------
188
+ # File & path utilities
189
+ # ------------------------------------------------------------------
190
+ 'Path',
191
+ 'glob',
192
+ # ------------------------------------------------------------------
193
+ # Concurrency / parallelism
194
+ # ------------------------------------------------------------------
195
+ 'ThreadPoolExecutor',
196
+ 'as_completed',
197
+ 'wait',
198
+ 'FIRST_COMPLETED',
199
+ 'Future',
200
+ 'Pool',
201
+ 'Lock',
202
+ # ------------------------------------------------------------------
203
+ # Algorithms / heap helpers
204
+ # ------------------------------------------------------------------
205
+ 'heappop',
206
+ 'heappush',
207
+ 'islice',
208
+ # ------------------------------------------------------------------
209
+ # Typing
210
+ # ------------------------------------------------------------------
211
+ 'Annotated',
212
+ 'Any',
213
+ 'Awaitable',
214
+ 'Callable',
215
+ 'Dict',
216
+ 'Generic',
217
+ 'IO',
218
+ 'Iterable',
219
+ 'List',
220
+ 'Literal',
221
+ 'Mapping',
222
+ 'Optional',
223
+ 'ParamSpec',
224
+ 'Sequence',
225
+ 'Set',
226
+ 'Tuple',
227
+ 'Type',
228
+ 'TYPE_CHECKING',
229
+ 'TypeVar',
230
+ 'TypingCallable',
231
+ 'Union',
232
+ 'cast',
233
+ 'overload',
234
+ # ------------------------------------------------------------------
235
+ # Third-party modules
236
+ # ------------------------------------------------------------------
237
+ 'cachetools',
238
+ 'lazy',
239
+ 'psutil',
240
+ 'parallel',
241
+ 'jloads',
242
+ 'logger',
243
+ 'plt',
244
+ ]
speedy_utils/__init__.py CHANGED
@@ -1,122 +1,18 @@
1
- # ----------------------------------------------------------------------------
2
- # speedy_utils/__init__.py
3
- #
4
- # Main entry point and public API for the Speedy Utils library
5
- #
6
- # This module exports the primary utilities for enhanced Python development
7
- # productivity including caching mechanisms, parallel processing, file I/O,
8
- # timing utilities, and data manipulation functions. It provides a convenient
9
- # single-import interface for the most commonly used functionality.
10
- #
11
- # Public API / Data Contracts:
12
- # • setup_logger(min_interval: int = 5) -> None - Configure logging system
13
- # • log(*args, **kwargs) -> None - Rate-limited logging function
14
- # • Clock() - Timing and performance measurement utility
15
- # • speedy_timer: Clock - Pre-configured global timer instance
16
- # • timef(func) -> Callable - Function execution time decorator
17
- # • retry_runtime(sleep_seconds: int, max_retry: int, exceptions) -> Callable
18
- # • memoize(func) -> Callable - Function result caching decorator
19
- # • imemoize(func) -> Callable - In-memory caching decorator (global persistent)
20
- # • identify(obj: Any) -> str - Generate unique object identifier
21
- # • identify_uuid(obj: Any) -> str - Generate UUID-based object identifier
22
- # • load_by_ext(fname: Union[str, list[str]]) -> Any - Auto-detect file format loader
23
- # • dump_json_or_pickle(obj: Any, fname: str) -> None - Smart file serializer
24
- # • load_json_or_pickle(fname: str) -> Any - Smart file deserializer
25
- # • multi_thread(func, items, **kwargs) -> list - Parallel thread execution
26
- # • multi_process(func, items, **kwargs) -> list - Parallel process execution
27
- #
28
- # Invariants / Constraints:
29
- # • MUST import only stable, tested utilities into public namespace
30
- # • SHOULD maintain backward compatibility across minor versions
31
- # • MUST provide consistent error handling across all public functions
32
- # • SHOULD use lazy imports for heavy dependencies when possible
33
- #
34
- # Usage Example:
35
- # ```python
36
- # from speedy_utils import Clock, memoize, multi_thread, load_by_ext
37
- #
38
- # @memoize
39
- # def expensive_computation(x):
40
- # return x ** 2
41
- #
42
- # timer = Clock()
43
- # timer.start()
44
- # results = multi_thread(expensive_computation, range(100))
45
- # timer.end()
46
- # data = load_by_ext("config.json")
47
- # ```
48
- #
49
- # TODO & Future Work:
50
- # • Add async variants for I/O operations
51
- # • Implement distributed caching backend
52
- # • Add GPU acceleration utilities
53
- # ----------------------------------------------------------------------------
54
-
55
- # Import specific functions and classes from modules
56
- # Logger
57
- # Standard library imports
58
- import copy
59
- import functools
60
- import gc
61
- import inspect
62
- import json
63
- import multiprocessing
64
- import os
65
- import os.path as osp
66
- import pickle
67
- import pprint
68
- import random
69
- import re
70
- import sys
71
- import textwrap
72
- import threading
73
1
  import time
74
- import traceback
75
- import uuid
76
- from collections import Counter, defaultdict
77
- from collections.abc import Callable
78
- from concurrent.futures import ThreadPoolExecutor, as_completed
79
- from glob import glob
80
- from multiprocessing import Pool
81
- from pathlib import Path
82
- from threading import Lock
83
- from typing import (
84
- Any,
85
- Awaitable,
86
- Callable as TypingCallable,
87
- Dict,
88
- Generic,
89
- Iterable,
90
- List,
91
- Literal,
92
- Mapping,
93
- Optional,
94
- Sequence,
95
- Set,
96
- Tuple,
97
- Type,
98
- TypeVar,
99
- Union,
100
- )
101
2
 
3
+
4
+ t = time.time()
102
5
  # Third-party imports
103
- import numpy as np
104
- import pandas as pd
105
- import xxhash
106
- from IPython.core.getipython import get_ipython
107
- from IPython.display import HTML, display
108
- from loguru import logger
109
- from pydantic import BaseModel
110
- from tabulate import tabulate
111
- from tqdm import tqdm
112
6
 
113
- from speedy_utils.common.logger import log, setup_logger
7
+
8
+ from .__imports import *
114
9
 
115
10
  # Clock module
116
11
  from .common.clock import Clock, speedy_timer, timef
117
12
 
118
13
  # Function decorators
119
14
  from .common.function_decorator import retry_runtime
15
+ from .common.logger import log, setup_logger
120
16
 
121
17
  # notebook
122
18
  from .common.notebook_utils import (
@@ -159,101 +55,101 @@ from .common.utils_print import (
159
55
  from .multi_worker.process import multi_process
160
56
  from .multi_worker.thread import kill_all_thread, multi_thread
161
57
 
162
- # Define __all__ explicitly
58
+
163
59
  __all__ = [
164
60
  # Standard library
165
- "random",
166
- "copy",
167
- "functools",
168
- "gc",
169
- "inspect",
170
- "json",
171
- "multiprocessing",
172
- "os",
173
- "osp",
174
- "pickle",
175
- "pprint",
176
- "re",
177
- "sys",
178
- "textwrap",
179
- "threading",
180
- "time",
181
- "traceback",
182
- "uuid",
183
- "Counter",
184
- "ThreadPoolExecutor",
185
- "as_completed",
186
- "glob",
187
- "Pool",
188
- "Path",
189
- "Lock",
190
- "defaultdict",
61
+ 'random',
62
+ 'copy',
63
+ 'functools',
64
+ 'gc',
65
+ 'inspect',
66
+ 'json',
67
+ 'multiprocessing',
68
+ 'os',
69
+ 'osp',
70
+ 'pickle',
71
+ 'pprint',
72
+ 're',
73
+ 'sys',
74
+ 'textwrap',
75
+ 'threading',
76
+ 'time',
77
+ 'traceback',
78
+ 'uuid',
79
+ 'Counter',
80
+ 'ThreadPoolExecutor',
81
+ 'as_completed',
82
+ 'glob',
83
+ 'Pool',
84
+ 'Path',
85
+ 'Lock',
86
+ 'defaultdict',
191
87
  # Typing
192
- "Any",
193
- "Awaitable",
194
- "Callable",
195
- "TypingCallable",
196
- "Dict",
197
- "Generic",
198
- "Iterable",
199
- "List",
200
- "Literal",
201
- "Mapping",
202
- "Optional",
203
- "Sequence",
204
- "Set",
205
- "Tuple",
206
- "Type",
207
- "TypeVar",
208
- "Union",
88
+ 'Any',
89
+ 'Awaitable',
90
+ 'Callable',
91
+ 'TypingCallable',
92
+ 'Dict',
93
+ 'Generic',
94
+ 'Iterable',
95
+ 'List',
96
+ 'Literal',
97
+ 'Mapping',
98
+ 'Optional',
99
+ 'Sequence',
100
+ 'Set',
101
+ 'Tuple',
102
+ 'Type',
103
+ 'TypeVar',
104
+ 'Union',
209
105
  # Third-party
210
- "pd",
211
- "xxhash",
212
- "get_ipython",
213
- "HTML",
214
- "display",
215
- "logger",
216
- "BaseModel",
217
- "tabulate",
218
- "tqdm",
219
- "np",
106
+ 'pd',
107
+ 'xxhash',
108
+ 'get_ipython',
109
+ 'HTML',
110
+ 'display',
111
+ 'logger',
112
+ 'BaseModel',
113
+ 'tabulate',
114
+ 'tqdm',
115
+ 'np',
220
116
  # Clock module
221
- "Clock",
222
- "speedy_timer",
223
- "timef",
117
+ 'Clock',
118
+ 'speedy_timer',
119
+ 'timef',
224
120
  # Function decorators
225
- "retry_runtime",
121
+ 'retry_runtime',
226
122
  # Cache utilities
227
- "memoize",
228
- "imemoize",
229
- "identify",
230
- "identify_uuid",
123
+ 'memoize',
124
+ 'imemoize',
125
+ 'identify',
126
+ 'identify_uuid',
231
127
  # IO utilities
232
- "dump_json_or_pickle",
233
- "dump_jsonl",
234
- "load_by_ext",
235
- "load_json_or_pickle",
236
- "load_jsonl",
237
- "jdumps",
238
- "jloads",
128
+ 'dump_json_or_pickle',
129
+ 'dump_jsonl',
130
+ 'load_by_ext',
131
+ 'load_json_or_pickle',
132
+ 'load_jsonl',
133
+ 'jdumps',
134
+ 'jloads',
239
135
  # Misc utilities
240
- "mkdir_or_exist",
241
- "flatten_list",
242
- "get_arg_names",
243
- "is_notebook",
244
- "convert_to_builtin_python",
245
- "dedup",
136
+ 'mkdir_or_exist',
137
+ 'flatten_list',
138
+ 'get_arg_names',
139
+ 'is_notebook',
140
+ 'convert_to_builtin_python',
141
+ 'dedup',
246
142
  # Print utilities
247
- "display_pretty_table_html",
248
- "flatten_dict",
249
- "fprint",
250
- "print_table",
251
- "setup_logger",
252
- "log",
143
+ 'display_pretty_table_html',
144
+ 'flatten_dict',
145
+ 'fprint',
146
+ 'print_table',
147
+ 'setup_logger',
148
+ 'log',
253
149
  # Multi-worker processing
254
- "multi_process",
255
- "multi_thread",
256
- "kill_all_thread",
150
+ 'multi_process',
151
+ 'multi_thread',
152
+ 'kill_all_thread',
257
153
  # Notebook utilities
258
- "change_dir",
154
+ 'change_dir',
259
155
  ]