speedy-utils 1.1.42__py3-none-any.whl → 1.1.44__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- speedy_utils/__init__.py +7 -0
- speedy_utils/multi_worker/process.py +695 -85
- speedy_utils/multi_worker/thread.py +202 -42
- {speedy_utils-1.1.42.dist-info → speedy_utils-1.1.44.dist-info}/METADATA +158 -9
- {speedy_utils-1.1.42.dist-info → speedy_utils-1.1.44.dist-info}/RECORD +7 -7
- {speedy_utils-1.1.42.dist-info → speedy_utils-1.1.44.dist-info}/WHEEL +0 -0
- {speedy_utils-1.1.42.dist-info → speedy_utils-1.1.44.dist-info}/entry_points.txt +0 -0
|
@@ -1,4 +1,7 @@
|
|
|
1
1
|
from ..__imports import *
|
|
2
|
+
import linecache
|
|
3
|
+
|
|
4
|
+
from .process import ErrorStats, ErrorHandlerType
|
|
2
5
|
|
|
3
6
|
|
|
4
7
|
try:
|
|
@@ -6,6 +9,17 @@ try:
|
|
|
6
9
|
except ImportError: # pragma: no cover
|
|
7
10
|
tqdm = None # type: ignore[assignment]
|
|
8
11
|
|
|
12
|
+
try:
|
|
13
|
+
from rich.console import Console
|
|
14
|
+
from rich.panel import Panel
|
|
15
|
+
from rich.syntax import Syntax
|
|
16
|
+
from rich.text import Text
|
|
17
|
+
except ImportError: # pragma: no cover
|
|
18
|
+
Console = None # type: ignore[assignment, misc]
|
|
19
|
+
Panel = None # type: ignore[assignment, misc]
|
|
20
|
+
Syntax = None # type: ignore[assignment, misc]
|
|
21
|
+
Text = None # type: ignore[assignment, misc]
|
|
22
|
+
|
|
9
23
|
# Sensible defaults
|
|
10
24
|
DEFAULT_WORKERS = (os.cpu_count() or 4) * 2
|
|
11
25
|
|
|
@@ -25,11 +39,13 @@ class UserFunctionError(Exception):
|
|
|
25
39
|
func_name: str,
|
|
26
40
|
input_value: Any,
|
|
27
41
|
user_traceback: list[traceback.FrameSummary],
|
|
42
|
+
caller_frame: traceback.FrameSummary | None = None,
|
|
28
43
|
) -> None:
|
|
29
44
|
self.original_exception = original_exception
|
|
30
45
|
self.func_name = func_name
|
|
31
46
|
self.input_value = input_value
|
|
32
47
|
self.user_traceback = user_traceback
|
|
48
|
+
self.caller_frame = caller_frame
|
|
33
49
|
|
|
34
50
|
# Create a focused error message
|
|
35
51
|
tb_str = ''.join(traceback.format_list(user_traceback))
|
|
@@ -44,6 +60,95 @@ class UserFunctionError(Exception):
|
|
|
44
60
|
# Return focused error without infrastructure frames
|
|
45
61
|
return super().__str__()
|
|
46
62
|
|
|
63
|
+
def format_rich(self) -> None:
|
|
64
|
+
"""Format and print error with rich panels and code context."""
|
|
65
|
+
if Console is None or Panel is None or Text is None:
|
|
66
|
+
# Fallback to plain text
|
|
67
|
+
print(str(self), file=sys.stderr)
|
|
68
|
+
return
|
|
69
|
+
|
|
70
|
+
console = Console(stderr=True, force_terminal=True)
|
|
71
|
+
|
|
72
|
+
# Build traceback display with code context
|
|
73
|
+
tb_parts: list[str] = []
|
|
74
|
+
|
|
75
|
+
# Show caller frame first if available
|
|
76
|
+
if self.caller_frame and self.caller_frame.lineno is not None:
|
|
77
|
+
tb_parts.append(
|
|
78
|
+
f'[cyan]{self.caller_frame.filename}[/cyan]:[yellow]{self.caller_frame.lineno}[/yellow] '
|
|
79
|
+
f'in [green]{self.caller_frame.name}[/green]'
|
|
80
|
+
)
|
|
81
|
+
tb_parts.append('')
|
|
82
|
+
context = _get_code_context_rich(self.caller_frame.filename, self.caller_frame.lineno, 3)
|
|
83
|
+
tb_parts.extend(context)
|
|
84
|
+
tb_parts.append('')
|
|
85
|
+
|
|
86
|
+
# Show user code frames with context
|
|
87
|
+
for frame in self.user_traceback:
|
|
88
|
+
if frame.lineno is not None:
|
|
89
|
+
tb_parts.append(
|
|
90
|
+
f'[cyan]{frame.filename}[/cyan]:[yellow]{frame.lineno}[/yellow] '
|
|
91
|
+
f'in [green]{frame.name}[/green]'
|
|
92
|
+
)
|
|
93
|
+
tb_parts.append('')
|
|
94
|
+
context = _get_code_context_rich(frame.filename, frame.lineno, 3)
|
|
95
|
+
tb_parts.extend(context)
|
|
96
|
+
tb_parts.append('')
|
|
97
|
+
|
|
98
|
+
# Print with rich Panel
|
|
99
|
+
console.print()
|
|
100
|
+
console.print(
|
|
101
|
+
Panel(
|
|
102
|
+
'\n'.join(tb_parts),
|
|
103
|
+
title='[bold red]Traceback (most recent call last)[/bold red]',
|
|
104
|
+
border_style='red',
|
|
105
|
+
expand=False,
|
|
106
|
+
)
|
|
107
|
+
)
|
|
108
|
+
console.print(
|
|
109
|
+
f'[bold red]{type(self.original_exception).__name__}[/bold red]: '
|
|
110
|
+
f'{self.original_exception}'
|
|
111
|
+
)
|
|
112
|
+
console.print()
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def _get_code_context(filename: str, lineno: int, context_lines: int = 3) -> list[str]:
|
|
116
|
+
"""Get code context around a line with line numbers and highlighting."""
|
|
117
|
+
lines: list[str] = []
|
|
118
|
+
start = max(1, lineno - context_lines)
|
|
119
|
+
end = lineno + context_lines
|
|
120
|
+
|
|
121
|
+
for i in range(start, end + 1):
|
|
122
|
+
line = linecache.getline(filename, i)
|
|
123
|
+
if not line:
|
|
124
|
+
continue
|
|
125
|
+
line = line.rstrip()
|
|
126
|
+
marker = '❱' if i == lineno else ' '
|
|
127
|
+
lines.append(f' {i:4d} {marker} {line}')
|
|
128
|
+
|
|
129
|
+
return lines
|
|
130
|
+
|
|
131
|
+
def _get_code_context_rich(filename: str, lineno: int, context_lines: int = 3) -> list[str]:
|
|
132
|
+
"""Get code context with rich formatting (colors)."""
|
|
133
|
+
lines: list[str] = []
|
|
134
|
+
start = max(1, lineno - context_lines)
|
|
135
|
+
end = lineno + context_lines
|
|
136
|
+
|
|
137
|
+
for i in range(start, end + 1):
|
|
138
|
+
line = linecache.getline(filename, i)
|
|
139
|
+
if not line:
|
|
140
|
+
continue
|
|
141
|
+
line = line.rstrip()
|
|
142
|
+
num_str = f'{i:4d}'
|
|
143
|
+
|
|
144
|
+
if i == lineno:
|
|
145
|
+
# Highlight error line
|
|
146
|
+
lines.append(f'[dim]{num_str}[/dim] [red]❱[/red] {line}')
|
|
147
|
+
else:
|
|
148
|
+
# Normal context line
|
|
149
|
+
lines.append(f'[dim]{num_str} │[/dim] {line}')
|
|
150
|
+
|
|
151
|
+
return lines
|
|
47
152
|
|
|
48
153
|
_PY_SET_ASYNC_EXC = ctypes.pythonapi.PyThreadState_SetAsyncExc
|
|
49
154
|
try:
|
|
@@ -90,6 +195,7 @@ def _worker(
|
|
|
90
195
|
item: T,
|
|
91
196
|
func: Callable[[T], R],
|
|
92
197
|
fixed_kwargs: Mapping[str, Any],
|
|
198
|
+
caller_frame: traceback.FrameSummary | None = None,
|
|
93
199
|
) -> R:
|
|
94
200
|
"""Execute the function with an item and fixed kwargs."""
|
|
95
201
|
# Validate func is callable before attempting to call it
|
|
@@ -102,7 +208,7 @@ def _worker(
|
|
|
102
208
|
)
|
|
103
209
|
|
|
104
210
|
try:
|
|
105
|
-
return func(item
|
|
211
|
+
return func(item)
|
|
106
212
|
except Exception as exc:
|
|
107
213
|
# Extract user code traceback (filter out infrastructure)
|
|
108
214
|
exc_tb = sys.exc_info()[2]
|
|
@@ -114,8 +220,11 @@ def _worker(
|
|
|
114
220
|
user_frames = []
|
|
115
221
|
skip_patterns = [
|
|
116
222
|
'multi_worker/thread.py',
|
|
223
|
+
'multi_worker/process.py',
|
|
117
224
|
'concurrent/futures/',
|
|
118
225
|
'threading.py',
|
|
226
|
+
'multiprocessing/',
|
|
227
|
+
'site-packages/ray/',
|
|
119
228
|
]
|
|
120
229
|
|
|
121
230
|
for frame in tb_list:
|
|
@@ -130,6 +239,7 @@ def _worker(
|
|
|
130
239
|
func_name,
|
|
131
240
|
item,
|
|
132
241
|
user_frames,
|
|
242
|
+
caller_frame,
|
|
133
243
|
) from exc
|
|
134
244
|
|
|
135
245
|
# Fallback: re-raise original if we couldn't extract frames
|
|
@@ -140,8 +250,9 @@ def _run_batch(
|
|
|
140
250
|
items: Sequence[T],
|
|
141
251
|
func: Callable[[T], R],
|
|
142
252
|
fixed_kwargs: Mapping[str, Any],
|
|
253
|
+
caller_frame: traceback.FrameSummary | None = None,
|
|
143
254
|
) -> list[R]:
|
|
144
|
-
return [_worker(item, func, fixed_kwargs) for item in items]
|
|
255
|
+
return [_worker(item, func, fixed_kwargs, caller_frame) for item in items]
|
|
145
256
|
|
|
146
257
|
|
|
147
258
|
def _attach_metadata(fut: Future[Any], idx: int, logical_size: int) -> None:
|
|
@@ -242,7 +353,9 @@ def multi_thread(
|
|
|
242
353
|
progress_update: int = 10,
|
|
243
354
|
prefetch_factor: int = 4,
|
|
244
355
|
timeout: float | None = None,
|
|
245
|
-
stop_on_error: bool =
|
|
356
|
+
stop_on_error: bool | None = None,
|
|
357
|
+
error_handler: ErrorHandlerType = 'raise',
|
|
358
|
+
max_error_files: int = 100,
|
|
246
359
|
n_proc: int = 0,
|
|
247
360
|
store_output_pkl_file: str | None = None,
|
|
248
361
|
**fixed_kwargs: Any,
|
|
@@ -272,8 +385,16 @@ def multi_thread(
|
|
|
272
385
|
Multiplier controlling in-flight items (``workers * prefetch_factor``).
|
|
273
386
|
timeout : float | None, optional
|
|
274
387
|
Overall wall-clock timeout in seconds.
|
|
275
|
-
stop_on_error : bool, optional
|
|
276
|
-
|
|
388
|
+
stop_on_error : bool | None, optional
|
|
389
|
+
Deprecated. Use error_handler instead.
|
|
390
|
+
When True -> error_handler='raise', when False -> error_handler='log'.
|
|
391
|
+
error_handler : 'raise' | 'ignore' | 'log', optional
|
|
392
|
+
- 'raise': raise exception on first error (default)
|
|
393
|
+
- 'ignore': continue, return None for failed items
|
|
394
|
+
- 'log': same as ignore, but logs errors to files
|
|
395
|
+
max_error_files : int, optional
|
|
396
|
+
Maximum number of error log files to write (default: 100).
|
|
397
|
+
Error logs are written to .cache/speedy_utils/error_logs/{idx}.log
|
|
277
398
|
n_proc : int, optional
|
|
278
399
|
Optional process-level fan-out; ``>1`` shards work across processes.
|
|
279
400
|
store_output_pkl_file : str | None, optional
|
|
@@ -285,10 +406,20 @@ def multi_thread(
|
|
|
285
406
|
-------
|
|
286
407
|
list[R | None]
|
|
287
408
|
Collected results, preserving order when requested. Failed tasks yield
|
|
288
|
-
``None`` entries if ``
|
|
409
|
+
``None`` entries if ``error_handler`` is not 'raise'.
|
|
289
410
|
"""
|
|
290
411
|
from speedy_utils import dump_json_or_pickle, load_by_ext
|
|
291
412
|
|
|
413
|
+
# Handle deprecated stop_on_error parameter
|
|
414
|
+
if stop_on_error is not None:
|
|
415
|
+
import warnings
|
|
416
|
+
warnings.warn(
|
|
417
|
+
"stop_on_error is deprecated, use error_handler instead",
|
|
418
|
+
DeprecationWarning,
|
|
419
|
+
stacklevel=2
|
|
420
|
+
)
|
|
421
|
+
error_handler = 'raise' if stop_on_error else 'log'
|
|
422
|
+
|
|
292
423
|
if n_proc > 1:
|
|
293
424
|
import tempfile
|
|
294
425
|
|
|
@@ -319,7 +450,8 @@ def multi_thread(
|
|
|
319
450
|
progress_update=progress_update,
|
|
320
451
|
prefetch_factor=prefetch_factor,
|
|
321
452
|
timeout=timeout,
|
|
322
|
-
|
|
453
|
+
error_handler=error_handler,
|
|
454
|
+
max_error_files=max_error_files,
|
|
323
455
|
n_proc=0,
|
|
324
456
|
store_output_pkl_file=file_pkl,
|
|
325
457
|
**fixed_kwargs,
|
|
@@ -363,12 +495,30 @@ def multi_thread(
|
|
|
363
495
|
if batch == 1 and logical_total and logical_total / max(workers_val, 1) > 20_000:
|
|
364
496
|
batch = 32
|
|
365
497
|
|
|
366
|
-
src_iter:
|
|
498
|
+
src_iter: Iterator[Any] = iter(inputs)
|
|
367
499
|
if batch > 1:
|
|
368
|
-
src_iter = _group_iter(src_iter, batch)
|
|
369
|
-
src_iter = iter(src_iter)
|
|
500
|
+
src_iter = iter(_group_iter(src_iter, batch))
|
|
370
501
|
collector: _ResultCollector[Any] = _ResultCollector(ordered, logical_total)
|
|
371
502
|
|
|
503
|
+
# Initialize error stats for error handling
|
|
504
|
+
func_name = getattr(func, '__name__', repr(func))
|
|
505
|
+
error_stats = ErrorStats(
|
|
506
|
+
func_name=func_name,
|
|
507
|
+
max_error_files=max_error_files,
|
|
508
|
+
write_logs=error_handler == 'log'
|
|
509
|
+
)
|
|
510
|
+
|
|
511
|
+
# Convert inputs to list for index access in error logging
|
|
512
|
+
items_list: list[Any] | None = None
|
|
513
|
+
if error_handler != 'raise':
|
|
514
|
+
try:
|
|
515
|
+
items_list = list(inputs)
|
|
516
|
+
src_iter = iter(items_list)
|
|
517
|
+
if batch > 1:
|
|
518
|
+
src_iter = iter(_group_iter(src_iter, batch))
|
|
519
|
+
except Exception:
|
|
520
|
+
items_list = None
|
|
521
|
+
|
|
372
522
|
bar = None
|
|
373
523
|
last_bar_update = 0
|
|
374
524
|
if (
|
|
@@ -382,10 +532,22 @@ def multi_thread(
|
|
|
382
532
|
ncols=128,
|
|
383
533
|
colour='green',
|
|
384
534
|
bar_format=(
|
|
385
|
-
'{l_bar}{bar}| {n_fmt}/{total_fmt}
|
|
535
|
+
'{l_bar}{bar}| {n_fmt}/{total_fmt} '
|
|
536
|
+
'[{elapsed}<{remaining}, {rate_fmt}{postfix}]'
|
|
386
537
|
),
|
|
387
538
|
)
|
|
388
539
|
|
|
540
|
+
# Capture caller context for error reporting
|
|
541
|
+
caller_frame_obj = inspect.currentframe()
|
|
542
|
+
caller_context: traceback.FrameSummary | None = None
|
|
543
|
+
if caller_frame_obj and caller_frame_obj.f_back:
|
|
544
|
+
caller_info = inspect.getframeinfo(caller_frame_obj.f_back)
|
|
545
|
+
caller_context = traceback.FrameSummary(
|
|
546
|
+
caller_info.filename,
|
|
547
|
+
caller_info.lineno,
|
|
548
|
+
caller_info.function,
|
|
549
|
+
)
|
|
550
|
+
|
|
389
551
|
deadline = time.monotonic() + timeout if timeout is not None else None
|
|
390
552
|
max_inflight = max(workers_val * prefetch_factor, 1)
|
|
391
553
|
completed_items = 0
|
|
@@ -409,10 +571,10 @@ def multi_thread(
|
|
|
409
571
|
batch_items = list(arg)
|
|
410
572
|
if not batch_items:
|
|
411
573
|
return
|
|
412
|
-
fut = pool.submit(_run_batch, batch_items, func, fixed_kwargs_map)
|
|
574
|
+
fut = pool.submit(_run_batch, batch_items, func, fixed_kwargs_map, caller_context)
|
|
413
575
|
logical_size = len(batch_items)
|
|
414
576
|
else:
|
|
415
|
-
fut = pool.submit(_worker, arg, func, fixed_kwargs_map)
|
|
577
|
+
fut = pool.submit(_worker, arg, func, fixed_kwargs_map, caller_context)
|
|
416
578
|
logical_size = 1
|
|
417
579
|
_attach_metadata(fut, next_logical_idx, logical_size)
|
|
418
580
|
next_logical_idx += logical_size
|
|
@@ -453,37 +615,37 @@ def multi_thread(
|
|
|
453
615
|
idx, logical_size = _future_meta(fut)
|
|
454
616
|
try:
|
|
455
617
|
result = fut.result()
|
|
618
|
+
# Record success for each item in the batch
|
|
619
|
+
for _ in range(logical_size):
|
|
620
|
+
error_stats.record_success()
|
|
456
621
|
except UserFunctionError as exc:
|
|
457
|
-
# User function error
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
622
|
+
# User function error
|
|
623
|
+
if error_handler == 'raise':
|
|
624
|
+
sys.stderr.flush()
|
|
625
|
+
sys.stdout.flush()
|
|
626
|
+
exc.format_rich()
|
|
627
|
+
sys.stderr.flush()
|
|
461
628
|
_cancel_futures(inflight)
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
f'{type(orig_exc).__name__}: {orig_exc}'
|
|
472
|
-
)
|
|
473
|
-
|
|
474
|
-
# Raise a new instance of the original exception type
|
|
475
|
-
# with our clean message
|
|
476
|
-
new_exc = type(orig_exc)(clean_msg)
|
|
477
|
-
# Suppress the "from" chain to avoid showing infrastructure
|
|
478
|
-
raise new_exc from None
|
|
479
|
-
|
|
629
|
+
sys.exit(1)
|
|
630
|
+
|
|
631
|
+
# Log error with ErrorStats
|
|
632
|
+
input_val = None
|
|
633
|
+
if items_list is not None and idx < len(items_list):
|
|
634
|
+
input_val = items_list[idx]
|
|
635
|
+
error_stats.record_error(
|
|
636
|
+
idx, exc.original_exception, input_val, func_name
|
|
637
|
+
)
|
|
480
638
|
out_items = [None] * logical_size
|
|
481
639
|
except Exception as exc:
|
|
482
640
|
# Other errors (infrastructure, batching, etc.)
|
|
483
|
-
if
|
|
641
|
+
if error_handler == 'raise':
|
|
484
642
|
_cancel_futures(inflight)
|
|
485
643
|
raise
|
|
486
|
-
|
|
644
|
+
|
|
645
|
+
input_val = None
|
|
646
|
+
if items_list is not None and idx < len(items_list):
|
|
647
|
+
input_val = items_list[idx]
|
|
648
|
+
error_stats.record_error(idx, exc, input_val, func_name)
|
|
487
649
|
out_items = [None] * logical_size
|
|
488
650
|
else:
|
|
489
651
|
try:
|
|
@@ -503,15 +665,13 @@ def multi_thread(
|
|
|
503
665
|
bar.update(delta)
|
|
504
666
|
last_bar_update = completed_items
|
|
505
667
|
submitted = next_logical_idx
|
|
506
|
-
pending = (
|
|
668
|
+
pending: int | str = (
|
|
507
669
|
max(logical_total - submitted, 0)
|
|
508
670
|
if logical_total is not None
|
|
509
671
|
else '-'
|
|
510
672
|
)
|
|
511
|
-
postfix =
|
|
512
|
-
|
|
513
|
-
'pending': pending,
|
|
514
|
-
}
|
|
673
|
+
postfix: dict[str, Any] = error_stats.get_postfix_dict()
|
|
674
|
+
postfix['pending'] = pending
|
|
515
675
|
bar.set_postfix(postfix)
|
|
516
676
|
|
|
517
677
|
try:
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: speedy-utils
|
|
3
|
-
Version: 1.1.
|
|
3
|
+
Version: 1.1.44
|
|
4
4
|
Summary: Fast and easy-to-use package for data science
|
|
5
5
|
Project-URL: Homepage, https://github.com/anhvth/speedy
|
|
6
6
|
Project-URL: Repository, https://github.com/anhvth/speedy
|
|
@@ -17,7 +17,7 @@ Classifier: Programming Language :: Python :: 3.11
|
|
|
17
17
|
Classifier: Programming Language :: Python :: 3.12
|
|
18
18
|
Classifier: Programming Language :: Python :: 3.13
|
|
19
19
|
Classifier: Programming Language :: Python :: 3.14
|
|
20
|
-
Requires-Python: >=3.
|
|
20
|
+
Requires-Python: >=3.9
|
|
21
21
|
Requires-Dist: aiohttp
|
|
22
22
|
Requires-Dist: bump2version
|
|
23
23
|
Requires-Dist: cachetools
|
|
@@ -39,6 +39,7 @@ Requires-Dist: pydantic
|
|
|
39
39
|
Requires-Dist: pytest
|
|
40
40
|
Requires-Dist: ray
|
|
41
41
|
Requires-Dist: requests
|
|
42
|
+
Requires-Dist: rich>=14.3.1
|
|
42
43
|
Requires-Dist: ruff
|
|
43
44
|
Requires-Dist: scikit-learn
|
|
44
45
|
Requires-Dist: tabulate
|
|
@@ -57,13 +58,48 @@ Description-Content-Type: text/markdown
|
|
|
57
58
|
|
|
58
59
|
**Speedy Utils** is a Python utility library designed to streamline common programming tasks such as caching, parallel processing, file I/O, and data manipulation. It provides a collection of decorators, functions, and classes to enhance productivity and performance in your Python projects.
|
|
59
60
|
|
|
61
|
+
## 🚀 Recent Updates (January 27, 2026)
|
|
62
|
+
|
|
63
|
+
**Enhanced Error Handling in Parallel Processing:**
|
|
64
|
+
- Rich-formatted error tracebacks with code context and syntax highlighting
|
|
65
|
+
- Three error handling modes: 'raise', 'ignore', and 'log'
|
|
66
|
+
- Filtered tracebacks focusing on user code (hiding infrastructure)
|
|
67
|
+
- Real-time progress reporting with error/success statistics
|
|
68
|
+
- Automatic error logging to timestamped files
|
|
69
|
+
- Caller frame information showing where parallel functions were invoked
|
|
70
|
+
|
|
71
|
+
## Quick Start
|
|
72
|
+
|
|
73
|
+
### Parallel Processing with Error Handling
|
|
74
|
+
|
|
75
|
+
```python
|
|
76
|
+
from speedy_utils import multi_thread, multi_process
|
|
77
|
+
|
|
78
|
+
# Simple parallel processing
|
|
79
|
+
results = multi_thread(lambda x: x * 2, [1, 2, 3, 4, 5])
|
|
80
|
+
# Results: [2, 4, 6, 8, 10]
|
|
81
|
+
|
|
82
|
+
# Robust processing with error handling
|
|
83
|
+
def process_item(item):
|
|
84
|
+
if item == 3:
|
|
85
|
+
raise ValueError(f"Cannot process item {item}")
|
|
86
|
+
return item * 2
|
|
87
|
+
|
|
88
|
+
# Continue processing despite errors
|
|
89
|
+
results = multi_thread(process_item, [1, 2, 3, 4, 5], error_handler='log')
|
|
90
|
+
# Results: [2, 4, None, 8, 10] - errors logged automatically
|
|
91
|
+
```
|
|
92
|
+
|
|
60
93
|
## Table of Contents
|
|
61
94
|
|
|
95
|
+
- [🚀 Recent Updates](#-recent-updates-january-27-2026)
|
|
96
|
+
- [Quick Start](#quick-start)
|
|
62
97
|
- [Features](#features)
|
|
63
98
|
- [Installation](#installation)
|
|
64
99
|
- [Usage](#usage)
|
|
65
|
-
- [Caching](#caching)
|
|
66
100
|
- [Parallel Processing](#parallel-processing)
|
|
101
|
+
- [Enhanced Error Handling](#enhanced-error-handling)
|
|
102
|
+
- [Caching](#caching)
|
|
67
103
|
- [File I/O](#file-io)
|
|
68
104
|
- [Data Manipulation](#data-manipulation)
|
|
69
105
|
- [Utility Functions](#utility-functions)
|
|
@@ -72,11 +108,12 @@ Description-Content-Type: text/markdown
|
|
|
72
108
|
## Features
|
|
73
109
|
|
|
74
110
|
- **Caching Mechanisms**: Disk-based and in-memory caching to optimize function calls.
|
|
75
|
-
- **Parallel Processing**: Multi-threading, multi-processing, and asynchronous multi-threading utilities.
|
|
111
|
+
- **Parallel Processing**: Multi-threading, multi-processing, and asynchronous multi-threading utilities with enhanced error handling.
|
|
76
112
|
- **File I/O**: Simplified JSON, JSONL, and pickle file handling with support for various file extensions.
|
|
77
113
|
- **Data Manipulation**: Utilities for flattening lists and dictionaries, converting data types, and more.
|
|
78
114
|
- **Timing Utilities**: Tools to measure and log execution time of functions and processes.
|
|
79
115
|
- **Pretty Printing**: Enhanced printing functions for structured data, including HTML tables for Jupyter notebooks.
|
|
116
|
+
- **Enhanced Error Handling**: Rich error tracebacks with code context, configurable error handling modes ('raise', 'ignore', 'log'), and detailed progress reporting.
|
|
80
117
|
|
|
81
118
|
## Installation
|
|
82
119
|
|
|
@@ -161,20 +198,132 @@ result = compute_sum(5, 7) # Retrieved from in-memory cache
|
|
|
161
198
|
|
|
162
199
|
### Parallel Processing
|
|
163
200
|
|
|
164
|
-
#### Multi-threading
|
|
201
|
+
#### Multi-threading with Enhanced Error Handling
|
|
165
202
|
|
|
166
|
-
Execute functions concurrently using multiple threads
|
|
203
|
+
Execute functions concurrently using multiple threads with comprehensive error handling. The enhanced error handling provides three modes: 'raise' (default), 'ignore', and 'log'. When errors occur, you'll see rich-formatted tracebacks with code context and caller information.
|
|
167
204
|
|
|
168
205
|
```python
|
|
169
206
|
from speedy_utils import multi_thread
|
|
170
207
|
|
|
171
208
|
def process_item(item):
|
|
172
|
-
#
|
|
209
|
+
# Simulate processing that might fail
|
|
210
|
+
if item == 3:
|
|
211
|
+
raise ValueError(f"Invalid item: {item}")
|
|
173
212
|
return item * 2
|
|
174
213
|
|
|
175
214
|
items = [1, 2, 3, 4, 5]
|
|
176
|
-
|
|
177
|
-
|
|
215
|
+
|
|
216
|
+
# Default behavior: raise on first error with rich traceback
|
|
217
|
+
try:
|
|
218
|
+
results = multi_thread(process_item, items, workers=3)
|
|
219
|
+
except SystemExit:
|
|
220
|
+
print("Error occurred and was displayed with rich formatting")
|
|
221
|
+
|
|
222
|
+
# Continue processing on errors, return None for failed items
|
|
223
|
+
results = multi_thread(process_item, items, workers=3, error_handler='ignore')
|
|
224
|
+
print(results) # [2, 4, None, 8, 10]
|
|
225
|
+
|
|
226
|
+
# Log errors to files and continue processing
|
|
227
|
+
results = multi_thread(process_item, items, workers=3, error_handler='log', max_error_files=10)
|
|
228
|
+
print(results) # [2, 4, None, 8, 10] - errors logged to .cache/speedy_utils/error_logs/
|
|
229
|
+
```
|
|
230
|
+
|
|
231
|
+
#### Multi-processing with Error Handling
|
|
232
|
+
|
|
233
|
+
Process items across multiple processes with the same enhanced error handling capabilities.
|
|
234
|
+
|
|
235
|
+
```python
|
|
236
|
+
from speedy_utils import multi_process
|
|
237
|
+
|
|
238
|
+
def risky_computation(x):
|
|
239
|
+
"""Computation that might fail for certain inputs."""
|
|
240
|
+
if x % 5 == 0:
|
|
241
|
+
raise RuntimeError(f"Cannot process multiples of 5: {x}")
|
|
242
|
+
return x ** 2
|
|
243
|
+
|
|
244
|
+
data = list(range(12))
|
|
245
|
+
|
|
246
|
+
# Process with error logging (continues on errors)
|
|
247
|
+
results = multi_process(
|
|
248
|
+
risky_computation,
|
|
249
|
+
data,
|
|
250
|
+
backend='mp',
|
|
251
|
+
error_handler='log',
|
|
252
|
+
max_error_files=5
|
|
253
|
+
)
|
|
254
|
+
print(results) # [0, 1, 4, 9, 16, None, 36, 49, 64, 81, None, 121]
|
|
255
|
+
```
|
|
256
|
+
|
|
257
|
+
### Enhanced Error Handling
|
|
258
|
+
|
|
259
|
+
**Speedy Utils** now provides comprehensive error handling for parallel processing with rich formatting and detailed diagnostics.
|
|
260
|
+
|
|
261
|
+
#### Rich Error Tracebacks
|
|
262
|
+
|
|
263
|
+
When errors occur, you'll see beautifully formatted tracebacks with:
|
|
264
|
+
- **Code context**: Lines of code around the error location
|
|
265
|
+
- **Caller information**: Shows where the parallel function was invoked
|
|
266
|
+
- **Filtered frames**: Focuses on user code, hiding infrastructure details
|
|
267
|
+
- **Color coding**: Easy-to-read formatting with syntax highlighting
|
|
268
|
+
|
|
269
|
+
#### Error Handling Modes
|
|
270
|
+
|
|
271
|
+
Choose how to handle errors in parallel processing:
|
|
272
|
+
|
|
273
|
+
- **`'raise'` (default)**: Stop on first error with detailed traceback
|
|
274
|
+
- **`'ignore'`**: Continue processing, return `None` for failed items
|
|
275
|
+
- **`'log'`**: Log errors to files and continue processing
|
|
276
|
+
|
|
277
|
+
#### Error Logging
|
|
278
|
+
|
|
279
|
+
When using `error_handler='log'`, errors are automatically saved to timestamped files in `.cache/speedy_utils/error_logs/` with full context and stack traces.
|
|
280
|
+
|
|
281
|
+
#### Progress Reporting with Error Statistics
|
|
282
|
+
|
|
283
|
+
Progress bars now show real-time error and success counts:
|
|
284
|
+
|
|
285
|
+
```
|
|
286
|
+
Multi-thread [8/10] [00:02<00:00, 3.45it/s, success=8, errors=2, pending=0]
|
|
287
|
+
```
|
|
288
|
+
|
|
289
|
+
This makes it easy to monitor processing health at a glance.
|
|
290
|
+
|
|
291
|
+
#### Example: Robust Data Processing
|
|
292
|
+
|
|
293
|
+
```python
|
|
294
|
+
from speedy_utils import multi_thread
|
|
295
|
+
|
|
296
|
+
def process_data_record(record):
|
|
297
|
+
"""Process a data record that might have issues."""
|
|
298
|
+
try:
|
|
299
|
+
# Your processing logic here
|
|
300
|
+
value = record['value'] / record['divisor']
|
|
301
|
+
return {'result': value, 'status': 'success'}
|
|
302
|
+
except KeyError as e:
|
|
303
|
+
raise ValueError(f"Missing required field in record: {e}")
|
|
304
|
+
except ZeroDivisionError:
|
|
305
|
+
raise ValueError("Division by zero in record")
|
|
306
|
+
|
|
307
|
+
# Sample data with some problematic records
|
|
308
|
+
data = [
|
|
309
|
+
{'value': 10, 'divisor': 2}, # OK
|
|
310
|
+
{'value': 15, 'divisor': 0}, # Will error
|
|
311
|
+
{'value': 20, 'divisor': 4}, # OK
|
|
312
|
+
{'value': 25}, # Missing divisor - will error
|
|
313
|
+
]
|
|
314
|
+
|
|
315
|
+
# Process with error logging - continues despite errors
|
|
316
|
+
results = multi_thread(
|
|
317
|
+
process_data_record,
|
|
318
|
+
data,
|
|
319
|
+
workers=4,
|
|
320
|
+
error_handler='log',
|
|
321
|
+
max_error_files=10
|
|
322
|
+
)
|
|
323
|
+
|
|
324
|
+
print("Results:", results)
|
|
325
|
+
# Output: Results: [{'result': 5.0, 'status': 'success'}, None, {'result': 5.0, 'status': 'success'}, None]
|
|
326
|
+
# Errors are logged to files for later analysis
|
|
178
327
|
```
|
|
179
328
|
|
|
180
329
|
### File I/O
|
|
@@ -30,7 +30,7 @@ llm_utils/vector_cache/core.py,sha256=VXuYJy1AX22NHKvIXRriETip5RrmQcNp73-g-ZT774
|
|
|
30
30
|
llm_utils/vector_cache/types.py,sha256=CpMZanJSTeBVxQSqjBq6pBVWp7u2-JRcgY9t5jhykdQ,438
|
|
31
31
|
llm_utils/vector_cache/utils.py,sha256=OsiRFydv8i8HiJtPL9hh40aUv8I5pYfg2zvmtDi4DME,1446
|
|
32
32
|
speedy_utils/__imports.py,sha256=V0YzkDK4-QkK_IDXY1be6C6_STuNhXAKIp4_dM0coQs,7800
|
|
33
|
-
speedy_utils/__init__.py,sha256=
|
|
33
|
+
speedy_utils/__init__.py,sha256=1ubAYR6P0cEZJLDt7KQLLxl6ylh-T7WE7HPP94-rVLI,3045
|
|
34
34
|
speedy_utils/common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
35
35
|
speedy_utils/common/clock.py,sha256=raLtMGIgzrRej5kUt7hOUm2ZZw2THVPo-q8dMvdZOxw,7354
|
|
36
36
|
speedy_utils/common/function_decorator.py,sha256=GKXqRs_hHFFmhyhql0Br0o52WzekUnpNlm99NfaVwgY,2025
|
|
@@ -45,9 +45,9 @@ speedy_utils/common/utils_print.py,sha256=AGDB7mgJnO00QkJBH6kJb46738q3GzMUZPwtQ2
|
|
|
45
45
|
speedy_utils/multi_worker/__init__.py,sha256=urcuxzaAJp-Rl3SIwHNre3x2vyHxLR7YGiDdm-Q8GQs,361
|
|
46
46
|
speedy_utils/multi_worker/dataset_ray.py,sha256=U_l_4Y7CVpaHiApsXQSdNvals8NK87LHPS_XHiJF3qs,10044
|
|
47
47
|
speedy_utils/multi_worker/parallel_gpu_pool.py,sha256=A7llZcQbRVZqwCqNRku7TpqGCdSoIzpdcTaupgqT5nI,6108
|
|
48
|
-
speedy_utils/multi_worker/process.py,sha256=
|
|
48
|
+
speedy_utils/multi_worker/process.py,sha256=U-pjHoWZ3xOeplMl2nSxVeiJE0F9V-eswpSdK-8c3dU,45446
|
|
49
49
|
speedy_utils/multi_worker/progress.py,sha256=Ozeca-t-j1224n_dWwZkWzva9DC16SCLgScKeGtXLaQ,4717
|
|
50
|
-
speedy_utils/multi_worker/thread.py,sha256=
|
|
50
|
+
speedy_utils/multi_worker/thread.py,sha256=E7o_iUCIKmgk1tFt7mZAFT7c5q229wVzWj-trmVsxVA,27254
|
|
51
51
|
speedy_utils/scripts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
52
52
|
speedy_utils/scripts/mpython.py,sha256=uJ-QPG43cyHUptMP4MnyH7IdSTh4mzVQAejy9o1cQoE,5068
|
|
53
53
|
speedy_utils/scripts/openapi_client_codegen.py,sha256=GModmmhkvGnxljK4KczyixKDrk-VEcLaW5I0XT6tzWo,9657
|
|
@@ -55,7 +55,7 @@ vision_utils/README.md,sha256=AIDZZj8jo_QNrEjFyHwd00iOO431s-js-M2dLtVTn3I,5740
|
|
|
55
55
|
vision_utils/__init__.py,sha256=hF54sT6FAxby8kDVhOvruy4yot8O-Ateey5n96O1pQM,284
|
|
56
56
|
vision_utils/io_utils.py,sha256=pI0Va6miesBysJcllK6NXCay8HpGZsaMWwlsKB2DMgA,26510
|
|
57
57
|
vision_utils/plot.py,sha256=HkNj3osA3moPuupP1VguXfPPOW614dZO5tvC-EFKpKM,12028
|
|
58
|
-
speedy_utils-1.1.
|
|
59
|
-
speedy_utils-1.1.
|
|
60
|
-
speedy_utils-1.1.
|
|
61
|
-
speedy_utils-1.1.
|
|
58
|
+
speedy_utils-1.1.44.dist-info/METADATA,sha256=Y5wQ_VbeiPTcqUkOCEbEvOKZ-wnwKCF2nvGhSnU3AJs,13067
|
|
59
|
+
speedy_utils-1.1.44.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
60
|
+
speedy_utils-1.1.44.dist-info/entry_points.txt,sha256=rwn89AYfBUh9SRJtFbpp-u2JIKiqmZ2sczvqyO6s9cI,289
|
|
61
|
+
speedy_utils-1.1.44.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|