speedy-utils 1.1.27__py3-none-any.whl → 1.1.29__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. llm_utils/__init__.py +16 -4
  2. llm_utils/chat_format/__init__.py +10 -10
  3. llm_utils/chat_format/display.py +33 -21
  4. llm_utils/chat_format/transform.py +17 -19
  5. llm_utils/chat_format/utils.py +6 -4
  6. llm_utils/group_messages.py +17 -14
  7. llm_utils/lm/__init__.py +6 -5
  8. llm_utils/lm/async_lm/__init__.py +1 -0
  9. llm_utils/lm/async_lm/_utils.py +10 -9
  10. llm_utils/lm/async_lm/async_llm_task.py +141 -137
  11. llm_utils/lm/async_lm/async_lm.py +48 -42
  12. llm_utils/lm/async_lm/async_lm_base.py +59 -60
  13. llm_utils/lm/async_lm/lm_specific.py +4 -3
  14. llm_utils/lm/base_prompt_builder.py +93 -70
  15. llm_utils/lm/llm.py +126 -108
  16. llm_utils/lm/llm_signature.py +4 -2
  17. llm_utils/lm/lm_base.py +72 -73
  18. llm_utils/lm/mixins.py +102 -62
  19. llm_utils/lm/openai_memoize.py +124 -87
  20. llm_utils/lm/signature.py +105 -92
  21. llm_utils/lm/utils.py +42 -23
  22. llm_utils/scripts/vllm_load_balancer.py +23 -30
  23. llm_utils/scripts/vllm_serve.py +8 -7
  24. llm_utils/vector_cache/__init__.py +9 -3
  25. llm_utils/vector_cache/cli.py +1 -1
  26. llm_utils/vector_cache/core.py +59 -63
  27. llm_utils/vector_cache/types.py +7 -5
  28. llm_utils/vector_cache/utils.py +12 -8
  29. speedy_utils/__imports.py +244 -0
  30. speedy_utils/__init__.py +90 -194
  31. speedy_utils/all.py +125 -227
  32. speedy_utils/common/clock.py +37 -42
  33. speedy_utils/common/function_decorator.py +6 -12
  34. speedy_utils/common/logger.py +43 -52
  35. speedy_utils/common/notebook_utils.py +13 -21
  36. speedy_utils/common/patcher.py +21 -17
  37. speedy_utils/common/report_manager.py +42 -44
  38. speedy_utils/common/utils_cache.py +152 -169
  39. speedy_utils/common/utils_io.py +137 -103
  40. speedy_utils/common/utils_misc.py +15 -21
  41. speedy_utils/common/utils_print.py +22 -28
  42. speedy_utils/multi_worker/process.py +66 -79
  43. speedy_utils/multi_worker/thread.py +78 -155
  44. speedy_utils/scripts/mpython.py +38 -36
  45. speedy_utils/scripts/openapi_client_codegen.py +10 -10
  46. {speedy_utils-1.1.27.dist-info → speedy_utils-1.1.29.dist-info}/METADATA +1 -1
  47. speedy_utils-1.1.29.dist-info/RECORD +57 -0
  48. vision_utils/README.md +202 -0
  49. vision_utils/__init__.py +4 -0
  50. vision_utils/io_utils.py +735 -0
  51. vision_utils/plot.py +345 -0
  52. speedy_utils-1.1.27.dist-info/RECORD +0 -52
  53. {speedy_utils-1.1.27.dist-info → speedy_utils-1.1.29.dist-info}/WHEEL +0 -0
  54. {speedy_utils-1.1.27.dist-info → speedy_utils-1.1.29.dist-info}/entry_points.txt +0 -0
@@ -1,13 +1,4 @@
1
- # utils/utils_print.py
2
-
3
- import inspect
4
- import re
5
- import sys
6
- import time
7
- from collections import OrderedDict
8
- from typing import Annotated, Literal, Union
9
-
10
- from loguru import logger
1
+ from ..__imports import *
11
2
 
12
3
 
13
4
  # A subclass of OrderedDict to automatically evict the oldest item after max_size is exceeded
@@ -34,26 +25,26 @@ _last_log_times = _RateLimitCache(max_size=2000)
34
25
  def setup_logger(
35
26
  level: Annotated[
36
27
  Literal[
37
- "Trace",
38
- "Debug",
39
- "Info",
40
- "Success",
41
- "Warning",
42
- "Error",
43
- "Critical",
44
- "Disable",
45
- "T",
46
- "D",
47
- "I",
48
- "S",
49
- "W",
50
- "E",
51
- "C",
28
+ 'Trace',
29
+ 'Debug',
30
+ 'Info',
31
+ 'Success',
32
+ 'Warning',
33
+ 'Error',
34
+ 'Critical',
35
+ 'Disable',
36
+ 'T',
37
+ 'D',
38
+ 'I',
39
+ 'S',
40
+ 'W',
41
+ 'E',
42
+ 'C',
52
43
  ],
53
- "The desired log level",
54
- ] = "Info",
55
- enable_grep: Annotated[str, "Comma-separated patterns for enabling logs"] = "",
56
- disable_grep: Annotated[str, "Comma-separated patterns for disabling logs"] = "",
44
+ 'The desired log level',
45
+ ] = 'Info',
46
+ enable_grep: Annotated[str, 'Comma-separated patterns for enabling logs'] = '',
47
+ disable_grep: Annotated[str, 'Comma-separated patterns for disabling logs'] = '',
57
48
  min_interval: float = -1,
58
49
  max_cache_entries: int = 2000,
59
50
  ) -> None:
@@ -67,13 +58,13 @@ def setup_logger(
67
58
 
68
59
  # Map the shorthand level to the full name
69
60
  level_mapping = {
70
- "T": "TRACE",
71
- "D": "DEBUG",
72
- "I": "INFO",
73
- "S": "SUCCESS",
74
- "W": "WARNING",
75
- "E": "ERROR",
76
- "C": "CRITICAL",
61
+ 'T': 'TRACE',
62
+ 'D': 'DEBUG',
63
+ 'I': 'INFO',
64
+ 'S': 'SUCCESS',
65
+ 'W': 'WARNING',
66
+ 'E': 'ERROR',
67
+ 'C': 'CRITICAL',
77
68
  }
78
69
  level_str = level_mapping.get(level.upper(), level.upper())
79
70
 
@@ -84,8 +75,8 @@ def setup_logger(
84
75
  logger.remove()
85
76
 
86
77
  # Prepare grep patterns
87
- enable_patterns = [p.strip() for p in enable_grep.split(",") if p.strip()]
88
- disable_patterns = [p.strip() for p in disable_grep.split(",") if p.strip()]
78
+ enable_patterns = [p.strip() for p in enable_grep.split(',') if p.strip()]
79
+ disable_patterns = [p.strip() for p in disable_grep.split(',') if p.strip()]
89
80
 
90
81
  def log_filter(record):
91
82
  """
@@ -95,11 +86,11 @@ def setup_logger(
95
86
  4. Enforces a max size on the (file:line) dictionary.
96
87
  """
97
88
  # ---------- 1) Log-level check ----------
98
- if record["level"].no < logger.level(level_str).no:
89
+ if record['level'].no < logger.level(level_str).no:
99
90
  return False
100
91
 
101
92
  # ---------- 2) Grep pattern handling ----------
102
- log_message = f"{record['file']}:{record['line']} ({record['function']})"
93
+ log_message = f'{record["file"]}:{record["line"]} ({record["function"]})'
103
94
  if enable_patterns and not any(
104
95
  re.search(p, log_message) for p in enable_patterns
105
96
  ):
@@ -110,7 +101,7 @@ def setup_logger(
110
101
  return False
111
102
 
112
103
  # ---------- 3) Rate limiting by file:line ----------
113
- file_line_key = f"{record['file']}:{record['line']}"
104
+ file_line_key = f'{record["file"]}:{record["line"]}'
114
105
  now = time.time()
115
106
 
116
107
  last_time = _last_log_times.get(file_line_key)
@@ -131,20 +122,20 @@ def setup_logger(
131
122
  sys.stdout,
132
123
  colorize=True,
133
124
  format=(
134
- "<green>{time:HH:mm:ss}</green> | "
135
- "<level>{level: <8}</level> | "
136
- "<cyan>{file}:{line} ({function})</cyan> - <level>{message}</level>"
125
+ '<green>{time:HH:mm:ss}</green> | '
126
+ '<level>{level: <8}</level> | '
127
+ '<cyan>{file}:{line} ({function})</cyan> - <level>{message}</level>'
137
128
  ),
138
129
  filter=log_filter,
139
130
  )
140
131
 
141
132
  # ---------- 4) Handle "DISABLE" level ----------
142
- if level_str.upper() == "DISABLE":
143
- logger.disable("")
144
- logger.info("Logging disabled")
133
+ if level_str.upper() == 'DISABLE':
134
+ logger.disable('')
135
+ logger.info('Logging disabled')
145
136
  else:
146
- logger.enable("")
147
- logger.debug(f"Logging set to {level_str}")
137
+ logger.enable('')
138
+ logger.debug(f'Logging set to {level_str}')
148
139
 
149
140
 
150
141
  _logged_once_set = set()
@@ -158,15 +149,15 @@ def _get_call_site_id(depth=2) -> str:
158
149
  """
159
150
  frame = inspect.stack()[depth]
160
151
  # Use a stable identifier in test environment to handle mocking
161
- return f"{frame.filename}:{frame.lineno}"
152
+ return f'{frame.filename}:{frame.lineno}'
162
153
 
163
154
 
164
155
  def log(
165
156
  msg: str,
166
157
  *,
167
- level: Literal["info", "warning", "error", "critical", "success"] = "info",
158
+ level: Literal['info', 'warning', 'error', 'critical', 'success'] = 'info',
168
159
  once: bool = False,
169
- interval: Union[float, None] = None,
160
+ interval: float | None = None,
170
161
  ) -> None:
171
162
  """
172
163
  Log a message using loguru with optional `once` and `interval` control.
@@ -1,27 +1,20 @@
1
- # jupyter notebook utilities
2
- import json
3
- import os
4
- import pathlib
5
- from typing import Any
1
+ from ..__imports import *
6
2
 
7
- from IPython.display import HTML, display
8
- from tabulate import tabulate
9
3
 
10
-
11
- def change_dir(target_directory: str = "POLY") -> None:
4
+ def change_dir(target_directory: str = 'POLY') -> None:
12
5
  """Change directory to the first occurrence of x in the current path."""
13
- cur_dir = pathlib.Path("./")
6
+ cur_dir = pathlib.Path('./')
14
7
  target_dir = str(cur_dir.absolute()).split(target_directory)[0] + target_directory
15
8
  os.chdir(target_dir)
16
- print(f"Current dir: {target_dir}")
9
+ print(f'Current dir: {target_dir}')
17
10
 
18
11
 
19
12
  def display_pretty_table_html(data: dict) -> None:
20
13
  """Display a pretty HTML table in Jupyter notebooks."""
21
- table = "<table>"
14
+ table = '<table>'
22
15
  for key, value in data.items():
23
- table += f"<tr><td>{key}</td><td>{value}</td></tr>"
24
- table += "</table>"
16
+ table += f'<tr><td>{key}</td><td>{value}</td></tr>'
17
+ table += '</table>'
25
18
  display(HTML(table))
26
19
 
27
20
 
@@ -33,27 +26,26 @@ def print_table(data: Any, use_html: bool = True) -> None:
33
26
  try:
34
27
  data = json.loads(data)
35
28
  except json.JSONDecodeError as exc:
36
- raise ValueError("String input could not be decoded as JSON") from exc
29
+ raise ValueError('String input could not be decoded as JSON') from exc
37
30
 
38
31
  if isinstance(data, list):
39
32
  if all(isinstance(item, dict) for item in data):
40
33
  headers = list(data[0].keys())
41
34
  rows = [list(item.values()) for item in data]
42
35
  return tabulate(
43
- rows, headers=headers, tablefmt="html" if use_html else "grid"
36
+ rows, headers=headers, tablefmt='html' if use_html else 'grid'
44
37
  )
45
- else:
46
- raise ValueError("List must contain dictionaries")
38
+ raise ValueError('List must contain dictionaries')
47
39
 
48
40
  if isinstance(data, dict):
49
- headers = ["Key", "Value"]
41
+ headers = ['Key', 'Value']
50
42
  rows = list(data.items())
51
43
  return tabulate(
52
- rows, headers=headers, tablefmt="html" if use_html else "grid"
44
+ rows, headers=headers, tablefmt='html' if use_html else 'grid'
53
45
  )
54
46
 
55
47
  raise TypeError(
56
- "Input data must be a list of dictionaries, a dictionary, or a JSON string"
48
+ 'Input data must be a list of dictionaries, a dictionary, or a JSON string'
57
49
  )
58
50
 
59
51
  table = __get_table(data)
@@ -1,17 +1,15 @@
1
1
  # utils/patching.py
2
- import inspect
3
- import types
4
- import re
5
- from typing import Annotated, Union
2
+ from ..__imports import *
3
+
6
4
 
7
5
  def patch_method(
8
- cls: Annotated[type, "Class containing the method"],
9
- method_name: Annotated[str, "Name of the method to patch"],
6
+ cls: Annotated[type, 'Class containing the method'],
7
+ method_name: Annotated[str, 'Name of the method to patch'],
10
8
  replacements: Annotated[
11
- dict[Union[str, re.Pattern], str],
12
- "Mapping of {old_substring_or_regex: new_string} replacements"
9
+ dict[str | re.Pattern, str],
10
+ 'Mapping of {old_substring_or_regex: new_string} replacements',
13
11
  ],
14
- tag: Annotated[str, "Optional logging tag"] = "",
12
+ tag: Annotated[str, 'Optional logging tag'] = '',
15
13
  ) -> bool:
16
14
  """
17
15
  Generic patcher for replacing substrings or regex matches in a method's source code.
@@ -29,13 +27,17 @@ def patch_method(
29
27
  try:
30
28
  method = getattr(cls, method_name)
31
29
  except AttributeError:
32
- print(f"[patcher{':' + tag if tag else ''}] No method {method_name} in {cls.__name__}")
30
+ print(
31
+ f'[patcher{":" + tag if tag else ""}] No method {method_name} in {cls.__name__}'
32
+ )
33
33
  return False
34
34
 
35
35
  try:
36
36
  src = inspect.getsource(method)
37
37
  except (OSError, TypeError):
38
- print(f"[patcher{':' + tag if tag else ''}] Could not get source for {cls.__name__}.{method_name}")
38
+ print(
39
+ f'[patcher{":" + tag if tag else ""}] Could not get source for {cls.__name__}.{method_name}'
40
+ )
39
41
  return False
40
42
 
41
43
  new_src = src
@@ -51,18 +53,20 @@ def patch_method(
51
53
  new_src = new_src.replace(old, new)
52
54
  did_patch = True
53
55
  else:
54
- raise TypeError("Replacement keys must be str or re.Pattern")
56
+ raise TypeError('Replacement keys must be str or re.Pattern')
55
57
 
56
58
  if not did_patch:
57
- print(f"[patcher{':' + tag if tag else ''}] No matching patterns found in {cls.__name__}.{method_name}")
59
+ print(
60
+ f'[patcher{":" + tag if tag else ""}] No matching patterns found in {cls.__name__}.{method_name}'
61
+ )
58
62
  return False
59
63
 
60
64
  # Recompile the patched function
61
- code_obj = compile(new_src, filename=f"<patched_{method_name}>", mode="exec")
65
+ code_obj = compile(new_src, filename=f'<patched_{method_name}>', mode='exec')
62
66
  ns = {}
63
- exec(code_obj, cls.__dict__, ns) # type: ignore
67
+ exec(code_obj, cls.__dict__, ns) # type: ignore
64
68
 
65
69
  # Attach patched method back
66
- setattr(cls, method_name, types.MethodType(ns[method_name], None, cls)) # type: ignore
67
- print(f"[patcher{':' + tag if tag else ''}] Patched {cls.__name__}.{method_name}")
70
+ setattr(cls, method_name, types.MethodType(ns[method_name], None, cls)) # type: ignore
71
+ print(f'[patcher{":" + tag if tag else ""}] Patched {cls.__name__}.{method_name}')
68
72
  return True
@@ -1,110 +1,108 @@
1
- import os
2
- from collections import defaultdict
3
- from datetime import datetime
1
+ from ..__imports import *
4
2
 
5
3
 
6
4
  class ReportManager:
7
5
  def __init__(self):
8
- self.cache_dir = os.path.expanduser("~/.cache/speedy_utils")
6
+ self.cache_dir = os.path.expanduser('~/.cache/speedy_utils')
9
7
  os.makedirs(self.cache_dir, exist_ok=True)
10
8
 
11
9
  def save_report(self, errors, results, execution_time=None, metadata=None):
12
10
  report_path = os.path.join(
13
- self.cache_dir, f"report_{datetime.now().strftime('%m%d_%H%M')}.md"
11
+ self.cache_dir, f'report_{datetime.now().strftime("%m%d_%H%M")}.md'
14
12
  )
15
13
  os.makedirs(os.path.dirname(report_path), exist_ok=True)
16
14
 
17
15
  # Group errors by error type
18
16
  error_groups = defaultdict(list)
19
17
  for err in errors[:10]:
20
- error_type = err["error"].__class__.__name__
18
+ error_type = err['error'].__class__.__name__
21
19
  error_groups[error_type].append(err)
22
20
 
23
21
  md_content = [
24
- "# Multi-thread Execution Report",
25
- f"\n## Summary (Generated at {datetime.now().strftime('%Y-%m-%d %H:%M:%S')})",
22
+ '# Multi-thread Execution Report',
23
+ f'\n## Summary (Generated at {datetime.now().strftime("%Y-%m-%d %H:%M:%S")})',
26
24
  ]
27
25
 
28
26
  if metadata:
29
27
  md_content.extend(
30
28
  [
31
- "\n### Execution Configuration",
32
- f"- Mode: {metadata['mode']}",
33
- f"- Workers: {metadata['max_workers']}",
34
- f"- Execution type: {metadata['execution_mode']}",
35
- f"- Total inputs: {metadata['total_inputs']}",
29
+ '\n### Execution Configuration',
30
+ f'- Mode: {metadata["mode"]}',
31
+ f'- Workers: {metadata["max_workers"]}',
32
+ f'- Execution type: {metadata["execution_mode"]}',
33
+ f'- Total inputs: {metadata["total_inputs"]}',
36
34
  ]
37
35
  )
38
36
 
39
37
  md_content.extend(
40
38
  [
41
- "\n### Results Overview",
42
- f"- Total items processed: {len(results)}",
43
- f"- Success rate: {(len(results) - len(errors)) / len(results) * 100:.1f}%",
44
- f"- Total errors: {len(errors)}",
39
+ '\n### Results Overview',
40
+ f'- Total items processed: {len(results)}',
41
+ f'- Success rate: {(len(results) - len(errors)) / len(results) * 100:.1f}%',
42
+ f'- Total errors: {len(errors)}',
45
43
  ]
46
44
  )
47
45
 
48
46
  if execution_time:
49
- md_content.append(f"- Execution time: {execution_time:.2f}s")
47
+ md_content.append(f'- Execution time: {execution_time:.2f}s')
50
48
  md_content.append(
51
- f"- Average speed: {len(results) / execution_time:.1f} items/second"
49
+ f'- Average speed: {len(results) / execution_time:.1f} items/second'
52
50
  )
53
51
 
54
52
  if error_groups:
55
53
  md_content.extend(
56
- ["\n## Errors by Type", "Click headers to expand error details."]
54
+ ['\n## Errors by Type', 'Click headers to expand error details.']
57
55
  )
58
56
 
59
57
  for error_type, errs in error_groups.items():
60
58
  md_content.extend(
61
59
  [
62
- "\n<details>",
63
- f"<summary><b>{error_type}</b> ({len(errs)} occurrences)</summary>\n",
64
- "| Index | Input | Error Message |",
65
- "|-------|-------|---------------|",
60
+ '\n<details>',
61
+ f'<summary><b>{error_type}</b> ({len(errs)} occurrences)</summary>\n',
62
+ '| Index | Input | Error Message |',
63
+ '|-------|-------|---------------|',
66
64
  ]
67
65
  )
68
66
 
69
67
  for err in errs:
70
68
  md_content.append(
71
- f"| {err['index']} | `{err['input']}` | {str(err['error'])} |"
69
+ f'| {err["index"]} | `{err["input"]}` | {str(err["error"])} |'
72
70
  )
73
71
 
74
72
  # Add first traceback as example
75
73
  md_content.extend(
76
74
  [
77
- "\nExample traceback:",
78
- "```python",
79
- errs[0]["traceback"],
80
- "```",
81
- "</details>",
75
+ '\nExample traceback:',
76
+ '```python',
77
+ errs[0]['traceback'],
78
+ '```',
79
+ '</details>',
82
80
  ]
83
81
  )
84
82
 
85
83
  # Add a section listing all error indices
86
84
  md_content.extend(
87
85
  [
88
- "\n## Error Indices",
89
- "List of indices for items that encountered errors:",
90
- ", ".join(str(err["index"]) for err in errors),
86
+ '\n## Error Indices',
87
+ 'List of indices for items that encountered errors:',
88
+ ', '.join(str(err['index']) for err in errors),
91
89
  ]
92
90
  )
93
91
 
94
92
  md_content.extend(
95
93
  [
96
- "\n## Results Summary",
97
- f"- Successful executions: {len(results) - len(errors)}",
98
- f"- Failed executions: {len(errors)}",
99
- "\n<details>",
100
- "<summary>First 5 successful results</summary>\n",
101
- "```python",
94
+ '\n## Results Summary',
95
+ f'- Successful executions: {len(results) - len(errors)}',
96
+ f'- Failed executions: {len(errors)}',
97
+ '\n<details>',
98
+ '<summary>First 5 successful results</summary>\n',
99
+ '```python',
102
100
  str([r for r in results[:5] if r is not None]),
103
- "```",
104
- "</details>",
101
+ '```',
102
+ '</details>',
105
103
  ]
106
104
  )
107
105
 
108
- with open(report_path, "w", encoding="utf-8") as f:
109
- f.write("\n".join(md_content))
110
- print(f"Report saved at: {report_path}")
106
+ with open(report_path, 'w', encoding='utf-8') as f:
107
+ f.write('\n'.join(md_content))
108
+ print(f'Report saved at: {report_path}')