speedy-utils 1.1.27__py3-none-any.whl → 1.1.29__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. llm_utils/__init__.py +16 -4
  2. llm_utils/chat_format/__init__.py +10 -10
  3. llm_utils/chat_format/display.py +33 -21
  4. llm_utils/chat_format/transform.py +17 -19
  5. llm_utils/chat_format/utils.py +6 -4
  6. llm_utils/group_messages.py +17 -14
  7. llm_utils/lm/__init__.py +6 -5
  8. llm_utils/lm/async_lm/__init__.py +1 -0
  9. llm_utils/lm/async_lm/_utils.py +10 -9
  10. llm_utils/lm/async_lm/async_llm_task.py +141 -137
  11. llm_utils/lm/async_lm/async_lm.py +48 -42
  12. llm_utils/lm/async_lm/async_lm_base.py +59 -60
  13. llm_utils/lm/async_lm/lm_specific.py +4 -3
  14. llm_utils/lm/base_prompt_builder.py +93 -70
  15. llm_utils/lm/llm.py +126 -108
  16. llm_utils/lm/llm_signature.py +4 -2
  17. llm_utils/lm/lm_base.py +72 -73
  18. llm_utils/lm/mixins.py +102 -62
  19. llm_utils/lm/openai_memoize.py +124 -87
  20. llm_utils/lm/signature.py +105 -92
  21. llm_utils/lm/utils.py +42 -23
  22. llm_utils/scripts/vllm_load_balancer.py +23 -30
  23. llm_utils/scripts/vllm_serve.py +8 -7
  24. llm_utils/vector_cache/__init__.py +9 -3
  25. llm_utils/vector_cache/cli.py +1 -1
  26. llm_utils/vector_cache/core.py +59 -63
  27. llm_utils/vector_cache/types.py +7 -5
  28. llm_utils/vector_cache/utils.py +12 -8
  29. speedy_utils/__imports.py +244 -0
  30. speedy_utils/__init__.py +90 -194
  31. speedy_utils/all.py +125 -227
  32. speedy_utils/common/clock.py +37 -42
  33. speedy_utils/common/function_decorator.py +6 -12
  34. speedy_utils/common/logger.py +43 -52
  35. speedy_utils/common/notebook_utils.py +13 -21
  36. speedy_utils/common/patcher.py +21 -17
  37. speedy_utils/common/report_manager.py +42 -44
  38. speedy_utils/common/utils_cache.py +152 -169
  39. speedy_utils/common/utils_io.py +137 -103
  40. speedy_utils/common/utils_misc.py +15 -21
  41. speedy_utils/common/utils_print.py +22 -28
  42. speedy_utils/multi_worker/process.py +66 -79
  43. speedy_utils/multi_worker/thread.py +78 -155
  44. speedy_utils/scripts/mpython.py +38 -36
  45. speedy_utils/scripts/openapi_client_codegen.py +10 -10
  46. {speedy_utils-1.1.27.dist-info → speedy_utils-1.1.29.dist-info}/METADATA +1 -1
  47. speedy_utils-1.1.29.dist-info/RECORD +57 -0
  48. vision_utils/README.md +202 -0
  49. vision_utils/__init__.py +4 -0
  50. vision_utils/io_utils.py +735 -0
  51. vision_utils/plot.py +345 -0
  52. speedy_utils-1.1.27.dist-info/RECORD +0 -52
  53. {speedy_utils-1.1.27.dist-info → speedy_utils-1.1.29.dist-info}/WHEEL +0 -0
  54. {speedy_utils-1.1.27.dist-info → speedy_utils-1.1.29.dist-info}/entry_points.txt +0 -0
speedy_utils/all.py CHANGED
@@ -1,231 +1,129 @@
1
- # ----------------------------------------------------------------------------
2
- # speedy_utils/all.py
3
- #
4
- # Consolidated import collection for comprehensive library access
5
- #
6
- # This module provides a unified collection of standard library, third-party,
7
- # and internal imports commonly used across data science and development
8
- # workflows. It serves as a convenience module for interactive environments
9
- # and rapid prototyping by reducing boilerplate import statements.
10
- #
11
- # Public API / Data Contracts:
12
- # • All standard library modules: collections, concurrent.futures, pathlib, etc.
13
- # • Third-party dependencies: loguru.logger, pydantic.BaseModel, tqdm, tabulate
14
- # • Core utilities: Counter, defaultdict, ThreadPoolExecutor, as_completed
15
- # • Development tools: IPython.display.HTML, get_ipython for notebook detection
16
- # • Type system: Any, Dict, List, Optional, Union, TypeVar, Generic, Literal
17
- #
18
- # Invariants / Constraints:
19
- # • MUST only import stable, widely-used packages
20
- # • SHOULD handle import failures gracefully for optional dependencies
21
- # • MUST maintain consistent import aliases across the library
22
- # • SHOULD group imports by category (stdlib, third-party, internal)
23
- #
24
- # Usage Example:
25
- # ```python
26
- # from speedy_utils.all import *
27
- #
28
- # # Now have access to common utilities without individual imports
29
- # data = defaultdict(list)
30
- # results = []
31
- # for item in tqdm(items):
32
- # results.append(process(item))
33
- #
34
- # df = tabulate(results, headers=['Item', 'Result'])
35
- # display(HTML(df))
36
- # ```
37
- #
38
- # TODO & Future Work:
39
- # • Add conditional imports for ML libraries (torch, numpy, pandas)
40
- # • Implement import health checking
41
- # • Add version compatibility warnings
42
- # ----------------------------------------------------------------------------
1
+ # from speedy_utils import ( # Clock module; Function decorators; Cache utilities; IO utilities; Misc utilities; Print utilities; Multi-worker processing
2
+ # Clock,
3
+ # convert_to_builtin_python,
4
+ # display_pretty_table_html,
5
+ # dump_json_or_pickle,
6
+ # dump_jsonl,
7
+ # flatten_dict,
8
+ # flatten_list,
9
+ # fprint,
10
+ # get_arg_names,
11
+ # identify,
12
+ # identify_uuid,
13
+ # is_notebook,
14
+ # jdumps,
15
+ # jloads,
16
+ # load_by_ext,
17
+ # load_json_or_pickle,
18
+ # load_jsonl,
19
+ # log,
20
+ # memoize,
21
+ # mkdir_or_exist,
22
+ # multi_process,
23
+ # multi_thread,
24
+ # print_table,
25
+ # retry_runtime,
26
+ # setup_logger,
27
+ # speedy_timer,
28
+ # timef,
29
+ # )
43
30
 
44
- # speedy_utils/all.py
31
+ # from .__imports import *
45
32
 
46
- # Provide a consolidated set of imports for convenience
47
33
 
48
- # Standard library imports
49
- import copy
50
- import functools
51
- import gc
52
- import inspect
53
- import json
54
- import multiprocessing
55
- import os
56
- import os.path as osp
57
- import pickle
58
- import pprint
59
- import random
60
- import re
61
- import sys
62
- import textwrap
63
- import threading
64
- import time
65
- import traceback
66
- import uuid
67
- from collections import Counter, defaultdict
68
- from collections.abc import Callable
69
- from concurrent.futures import ThreadPoolExecutor, as_completed
70
- from glob import glob
71
- from multiprocessing import Pool
72
- from pathlib import Path
73
- from threading import Lock
74
- from typing import (
75
- Any,
76
- Awaitable,
77
- Callable as TypingCallable,
78
- Dict,
79
- Generic,
80
- Iterable,
81
- List,
82
- Literal,
83
- Mapping,
84
- Optional,
85
- Sequence,
86
- Set,
87
- Tuple,
88
- Type,
89
- TypeVar,
90
- Union,
91
- )
34
+ # choice = random.choice
92
35
 
93
- # Third-party imports
94
- import numpy as np
95
- import pandas as pd
96
- import xxhash
97
- from IPython.core.getipython import get_ipython
98
- from IPython.display import HTML, display
99
- from loguru import logger
100
- from pydantic import BaseModel
101
- from tabulate import tabulate
102
- from tqdm import tqdm
103
-
104
- # Import specific functions from speedy_utils
105
- from speedy_utils import ( # Clock module; Function decorators; Cache utilities; IO utilities; Misc utilities; Print utilities; Multi-worker processing
106
- Clock,
107
- convert_to_builtin_python,
108
- display_pretty_table_html,
109
- dump_json_or_pickle,
110
- dump_jsonl,
111
- flatten_dict,
112
- flatten_list,
113
- fprint,
114
- get_arg_names,
115
- identify,
116
- identify_uuid,
117
- is_notebook,
118
- jdumps,
119
- jloads,
120
- load_by_ext,
121
- load_json_or_pickle,
122
- load_jsonl,
123
- log,
124
- memoize,
125
- mkdir_or_exist,
126
- multi_process,
127
- multi_thread,
128
- print_table,
129
- retry_runtime,
130
- setup_logger,
131
- speedy_timer,
132
- timef,
133
- )
134
-
135
-
136
- choice = random.choice
137
-
138
- # Define __all__ explicitly with all exports
139
- __all__ = [
140
- # Standard library
141
- "random",
142
- "copy",
143
- "functools",
144
- "gc",
145
- "inspect",
146
- "json",
147
- "multiprocessing",
148
- "os",
149
- "osp",
150
- "pickle",
151
- "pprint",
152
- "re",
153
- "sys",
154
- "textwrap",
155
- "threading",
156
- "time",
157
- "traceback",
158
- "uuid",
159
- "Counter",
160
- "ThreadPoolExecutor",
161
- "as_completed",
162
- "glob",
163
- "Pool",
164
- "Path",
165
- "Lock",
166
- "defaultdict",
167
- # Typing
168
- "Any",
169
- "Awaitable",
170
- "Callable",
171
- "TypingCallable",
172
- "Dict",
173
- "Generic",
174
- "Iterable",
175
- "List",
176
- "Literal",
177
- "Mapping",
178
- "Optional",
179
- "Sequence",
180
- "Set",
181
- "Tuple",
182
- "Type",
183
- "TypeVar",
184
- "Union",
185
- # Third-party
186
- "pd",
187
- "xxhash",
188
- "get_ipython",
189
- "HTML",
190
- "display",
191
- "logger",
192
- "BaseModel",
193
- "tabulate",
194
- "tqdm",
195
- "np",
196
- # Clock module
197
- "Clock",
198
- "speedy_timer",
199
- "timef",
200
- # Function decorators
201
- "retry_runtime",
202
- # Cache utilities
203
- "memoize",
204
- "identify",
205
- "identify_uuid",
206
- # IO utilities
207
- "dump_json_or_pickle",
208
- "dump_jsonl",
209
- "load_by_ext",
210
- "load_json_or_pickle",
211
- "load_jsonl",
212
- "jdumps",
213
- "jloads",
214
- # Misc utilities
215
- "mkdir_or_exist",
216
- "flatten_list",
217
- "get_arg_names",
218
- "is_notebook",
219
- "convert_to_builtin_python",
220
- # Print utilities
221
- "display_pretty_table_html",
222
- "flatten_dict",
223
- "fprint",
224
- "print_table",
225
- "setup_logger",
226
- "log",
227
- # Multi-worker processing
228
- "multi_process",
229
- "multi_thread",
230
- "choice",
231
- ]
36
+ # # Define __all__ explicitly with all exports
37
+ # __all__ = [
38
+ # # Standard library
39
+ # 'random',
40
+ # 'copy',
41
+ # 'functools',
42
+ # 'gc',
43
+ # 'inspect',
44
+ # 'json',
45
+ # 'multiprocessing',
46
+ # 'os',
47
+ # 'osp',
48
+ # 'pickle',
49
+ # 'pprint',
50
+ # 're',
51
+ # 'sys',
52
+ # 'textwrap',
53
+ # 'threading',
54
+ # 'time',
55
+ # 'traceback',
56
+ # 'uuid',
57
+ # 'Counter',
58
+ # 'ThreadPoolExecutor',
59
+ # 'as_completed',
60
+ # 'glob',
61
+ # 'Pool',
62
+ # 'Path',
63
+ # 'Lock',
64
+ # 'defaultdict',
65
+ # # Typing
66
+ # 'Any',
67
+ # 'Awaitable',
68
+ # 'Callable',
69
+ # 'TypingCallable',
70
+ # 'Dict',
71
+ # 'Generic',
72
+ # 'Iterable',
73
+ # 'List',
74
+ # 'Literal',
75
+ # 'Mapping',
76
+ # 'Optional',
77
+ # 'Sequence',
78
+ # 'Set',
79
+ # 'Tuple',
80
+ # 'Type',
81
+ # 'TypeVar',
82
+ # 'Union',
83
+ # # Third-party
84
+ # 'pd',
85
+ # 'xxhash',
86
+ # 'get_ipython',
87
+ # 'HTML',
88
+ # 'display',
89
+ # 'logger',
90
+ # 'BaseModel',
91
+ # 'tabulate',
92
+ # 'tqdm',
93
+ # 'np',
94
+ # # Clock module
95
+ # 'Clock',
96
+ # 'speedy_timer',
97
+ # 'timef',
98
+ # # Function decorators
99
+ # 'retry_runtime',
100
+ # # Cache utilities
101
+ # 'memoize',
102
+ # 'identify',
103
+ # 'identify_uuid',
104
+ # # IO utilities
105
+ # 'dump_json_or_pickle',
106
+ # 'dump_jsonl',
107
+ # 'load_by_ext',
108
+ # 'load_json_or_pickle',
109
+ # 'load_jsonl',
110
+ # 'jdumps',
111
+ # 'jloads',
112
+ # # Misc utilities
113
+ # 'mkdir_or_exist',
114
+ # 'flatten_list',
115
+ # 'get_arg_names',
116
+ # 'is_notebook',
117
+ # 'convert_to_builtin_python',
118
+ # # Print utilities
119
+ # 'display_pretty_table_html',
120
+ # 'flatten_dict',
121
+ # 'fprint',
122
+ # 'print_table',
123
+ # 'setup_logger',
124
+ # 'log',
125
+ # # Multi-worker processing
126
+ # 'multi_process',
127
+ # 'multi_thread',
128
+ # 'choice',
129
+ # ]
@@ -1,11 +1,7 @@
1
- import inspect
2
- import os
3
- import time
1
+ from ..__imports import *
4
2
 
5
- from loguru import logger
6
- from tabulate import tabulate
7
3
 
8
- __all__ = ["Clock", "timef"]
4
+ __all__ = ['Clock', 'timef']
9
5
 
10
6
 
11
7
  def timef(func):
@@ -17,7 +13,7 @@ def timef(func):
17
13
  end_time = time.time()
18
14
  execution_time = end_time - start_time
19
15
  logger.opt(depth=2).info(
20
- f"{func.__name__} took {execution_time:0.2f} seconds to execute."
16
+ f'{func.__name__} took {execution_time:0.2f} seconds to execute.'
21
17
  )
22
18
  return result
23
19
 
@@ -63,12 +59,12 @@ class Clock:
63
59
  self.start()
64
60
  self.print_counter = 0
65
61
  self.last_print_time = time.time()
66
- self.min_depth = float("inf")
62
+ self.min_depth = float('inf')
67
63
 
68
64
  def start(self):
69
65
  """Start the timer or reset if already started."""
70
66
  if self.start_time is not None:
71
- raise ValueError("Timer has already been started.")
67
+ raise ValueError('Timer has already been started.')
72
68
  self.start_time = time.time()
73
69
  self.last_checkpoint = self.start_time
74
70
  # logger.opt(depth=2).info(f"Timer started. {id(self)=}")
@@ -76,12 +72,12 @@ class Clock:
76
72
  def elapsed_time(self):
77
73
  """Return the time elapsed since the timer started."""
78
74
  if self.start_time is None:
79
- raise ValueError("Timer has not been started.")
75
+ raise ValueError('Timer has not been started.')
80
76
  return time.time() - self.start_time
81
77
 
82
78
  def log_elapsed_time(self, custom_logger=None):
83
79
  """Log the time elapsed since the timer started."""
84
- msg = f"Time elapsed: {self.elapsed_time():.2f} seconds."
80
+ msg = f'Time elapsed: {self.elapsed_time():.2f} seconds.'
85
81
  if custom_logger:
86
82
  custom_logger(msg)
87
83
  else:
@@ -92,15 +88,15 @@ class Clock:
92
88
  # assert self.start_time is not None, f"Timer has not been started. {id(self)=}"
93
89
  if not self.start_time:
94
90
  logger.opt(depth=2).warning(
95
- "Timer has not been started. Please call start() before using this method."
91
+ 'Timer has not been started. Please call start() before using this method.'
96
92
  )
97
- return
93
+ return None
98
94
  current_time = time.time()
99
95
  if self.last_checkpoint is None:
100
96
  logger.opt(depth=2).warning(
101
- "Last checkpoint is not set. Please call start() before using this method."
97
+ 'Last checkpoint is not set. Please call start() before using this method.'
102
98
  )
103
- return
99
+ return None
104
100
  elapsed = current_time - self.last_checkpoint
105
101
  self.last_checkpoint = current_time
106
102
  return elapsed
@@ -113,14 +109,14 @@ class Clock:
113
109
  if self.start_time is None:
114
110
  # raise ValueError("Timer has not been started.")
115
111
  logger.opt(depth=2).warning(
116
- "Timer has not been started. Please call start() before using this method."
112
+ 'Timer has not been started. Please call start() before using this method.'
117
113
  )
118
- return
114
+ return None
119
115
  if self.last_checkpoint is None:
120
116
  logger.opt(depth=2).warning(
121
- "Last checkpoint is not set. Please call start() before using this method."
117
+ 'Last checkpoint is not set. Please call start() before using this method.'
122
118
  )
123
- return
119
+ return None
124
120
  return time.time() - self.last_checkpoint
125
121
 
126
122
  def update_task(self, task_name):
@@ -131,7 +127,7 @@ class Clock:
131
127
 
132
128
  # Get the file and line number of the caller (the previous frame in the stack)
133
129
  caller_frame = stack[1]
134
- file_lineno = f"{os.path.basename(caller_frame.filename)}:{caller_frame.lineno}"
130
+ file_lineno = f'{os.path.basename(caller_frame.filename)}:{caller_frame.lineno}'
135
131
 
136
132
  # Calculate the depth of the current call (i.e., how far it is in the stack)
137
133
  call_depth = (
@@ -143,22 +139,21 @@ class Clock:
143
139
  # Update the task time in the internal task table
144
140
  if task_name not in self.task_times:
145
141
  self.task_times[task_name] = {
146
- "time": 0,
147
- "file_lineno": file_lineno,
148
- "depth": call_depth,
142
+ 'time': 0,
143
+ 'file_lineno': file_lineno,
144
+ 'depth': call_depth,
149
145
  }
150
- self.task_times[task_name]["time"] += self.tick()
146
+ self.task_times[task_name]['time'] += self.tick()
151
147
 
152
148
  def get_percentage_color(self, percentage):
153
149
  """Return ANSI color code based on percentage."""
154
150
  if percentage >= 75:
155
- return "\033[91m" # Red
156
- elif percentage >= 50:
157
- return "\033[93m" # Yellow
158
- elif percentage >= 25:
159
- return "\033[92m" # Green
160
- else:
161
- return "\033[94m" # Blue
151
+ return '\033[91m' # Red
152
+ if percentage >= 50:
153
+ return '\033[93m' # Yellow
154
+ if percentage >= 25:
155
+ return '\033[92m' # Green
156
+ return '\033[94m' # Blue
162
157
 
163
158
  def print_task_table(self, interval=1, max_depth=None):
164
159
  """Print the task time table at regular intervals."""
@@ -167,30 +162,30 @@ class Clock:
167
162
  if current_time - self.last_print_time > interval:
168
163
  self.print_counter += 1
169
164
  total_time = (
170
- sum(data["time"] for data in self.task_times.values()) or 1
165
+ sum(data['time'] for data in self.task_times.values()) or 1
171
166
  ) # Avoid division by zero
172
167
 
173
168
  # Prepare data for the table
174
169
  table_data = []
175
170
  for task_name, data in self.task_times.items():
176
- time_spent = data["time"]
177
- file_lineno = data["file_lineno"]
178
- depth = data["depth"] - self.min_depth
171
+ time_spent = data['time']
172
+ file_lineno = data['file_lineno']
173
+ depth = data['depth'] - self.min_depth
179
174
  if max_depth is not None and depth > max_depth:
180
175
  continue
181
176
  percentage = (time_spent / total_time) * 100
182
177
 
183
178
  # Get color code based on percentage
184
179
  color_code = self.get_percentage_color(percentage)
185
- percentage_str = f"{percentage:.2f} %"
186
- colored_percentage = f"{color_code}{percentage_str}\033[0m"
180
+ percentage_str = f'{percentage:.2f} %'
181
+ colored_percentage = f'{color_code}{percentage_str}\033[0m'
187
182
 
188
183
  table_data.append(
189
184
  [
190
185
  task_name,
191
186
  file_lineno,
192
187
  # depth,
193
- f"{time_spent:.2f} s",
188
+ f'{time_spent:.2f} s',
194
189
  colored_percentage,
195
190
  ]
196
191
  )
@@ -198,13 +193,13 @@ class Clock:
198
193
  # Add headers and log using tabulate
199
194
  table = tabulate(
200
195
  table_data,
201
- headers=["Task", "File:Line", "Time (s)", "Percentage (%)"],
202
- tablefmt="grid",
196
+ headers=['Task', 'File:Line', 'Time (s)', 'Percentage (%)'],
197
+ tablefmt='grid',
203
198
  )
204
199
 
205
200
  self.last_print_time = current_time
206
201
  # total_time_str = f"\nTotal time elapsed: {total_time:.2f} seconds."
207
- logger.opt(depth=2).info(f"\n{table}")
202
+ logger.opt(depth=2).info(f'\n{table}')
208
203
 
209
204
 
210
205
  # Example of how to instantiate the Timer
@@ -212,4 +207,4 @@ speedy_timer = Clock(start_now=False)
212
207
 
213
208
 
214
209
  # Clock, speedy_timer, timef
215
- __all__ = ["Clock", "speedy_timer", "timef"]
210
+ __all__ = ['Clock', 'speedy_timer', 'timef']
@@ -1,10 +1,4 @@
1
- import functools
2
- import time
3
- import traceback
4
- from collections.abc import Callable
5
- from typing import Any
6
-
7
- from loguru import logger
1
+ from ..__imports import *
8
2
 
9
3
 
10
4
  def retry_runtime(
@@ -33,14 +27,14 @@ def retry_runtime(
33
27
  except (SyntaxError, NameError, ImportError, TypeError) as e:
34
28
  # Don't retry on syntax/compilation errors
35
29
  logger.opt(depth=1).error(
36
- f"Critical error in {func.__name__}: {str(e)}\n{traceback.format_exc()}"
30
+ f'Critical error in {func.__name__}: {str(e)}\n{traceback.format_exc()}'
37
31
  )
38
32
  raise
39
33
 
40
34
  except exceptions as e:
41
35
  if attempt == max_retry:
42
36
  logger.opt(depth=1).error(
43
- f"Function {func.__name__} failed after {max_retry} retries: {str(e)}"
37
+ f'Function {func.__name__} failed after {max_retry} retries: {str(e)}'
44
38
  )
45
39
  raise
46
40
 
@@ -48,8 +42,8 @@ def retry_runtime(
48
42
  2 ** (attempt - 1)
49
43
  ) # Exponential backoff
50
44
  logger.opt(depth=1).warning(
51
- f"Attempt {attempt}/{max_retry} failed: {str(e)[:100]}. "
52
- f"Retrying in {backoff_time} seconds."
45
+ f'Attempt {attempt}/{max_retry} failed: {str(e)[:100]}. '
46
+ f'Retrying in {backoff_time} seconds.'
53
47
  )
54
48
  time.sleep(backoff_time)
55
49
 
@@ -60,4 +54,4 @@ def retry_runtime(
60
54
  return decorator
61
55
 
62
56
 
63
- __all__ = ["retry_runtime"]
57
+ __all__ = ['retry_runtime']