PyPI - speedy-utils - Versions diffs - 1.1.42__tar.gz → 1.1.43__tar.gz - Mend

speedy-utils 1.1.42tar.gz → 1.1.43tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (147) hide show

speedy_utils-1.1.43/.github/prompts/improveParallelErrorHandling.prompt.md ADDED Viewed

@@ -0,0 +1,64 @@
+---
+name: improveParallelErrorHandling
+description: Enhance error tracebacks in parallel execution with rich formatting and context
+argument-hint: the parallel execution function and backend type
+---
+Improve error handling for the specified parallel execution function to provide clean, user-focused tracebacks similar to direct function calls.
+## Requirements
+1. **Filter Internal Frames**: Remove framework/library internal frames from tracebacks, showing only user code
+2. **Add Context Lines**: Display 3 lines before and after each error location with line numbers
+3. **Include Caller Frame**: Show where the parallel execution function was called, not just where the error occurred
+4. **Rich Formatting**: Use rich library's Panel/formatting for clean, readable output
+5. **Suppress Noise**: Set environment variables or flags to suppress verbose framework error logs
+## Implementation Steps
+1. **Capture Caller Context**: Use `inspect.currentframe().f_back` to capture where the parallel function was called (filename, line number, function name)
+2. **Wrap Error Handling**: Catch framework-specific exceptions (e.g., `RayTaskError`, thread exceptions) in the execution loop
+3. **Parse/Extract Original Exception**: Get the underlying user exception from the framework wrapper
+   - Extract exception type, message, and traceback information
+   - Parse from string representation if traceback objects aren't preserved
+4. **Filter Frames**: Skip frames matching internal paths:
+   - Framework internals (e.g., `ray/_private`, `concurrent/futures`)
+   - Library worker implementations (e.g., `speedy_utils/multi_worker`)
+   - Site-packages for the framework
+5. **Format with Context**:
+   - For each user frame, show: `filepath:lineno in function_name`
+   - Use `linecache.getline()` to retrieve surrounding lines
+   - Highlight the error line with `❱` marker
+   - Number all lines (e.g., `   4 │ code here` or `   5 ❱ error here`)
+6. **Display Caller Frame First**: Show where the parallel function was invoked before showing the actual error location
+7. **Clean Exit**: Flush output streams before exiting to ensure traceback displays
+## Example Output Format
+```
+╭─────────────── Traceback (most recent call last) ───────────────╮
+│ /path/to/user/script.py:42 in main                              │
+│                                                                  │
+│   40 │ data = load_data()                                        │
+│   41 │ # Process in parallel                                     │
+│   42 ❱ results = multi_process(process_item, data, workers=8)   │
+│   43 │                                                           │
+│                                                                  │
+│ /path/to/user/module.py:15 in process_item                      │
+│                                                                  │
+│   12 │ def process_item(item):                                   │
+│   13 │     value = item['key']                                   │
+│   14 │     denominator = value - 100                             │
+│   15 ❱     return 1 / denominator                                │
+│   16 │                                                           │
+╰──────────────────────────────────────────────────────────────────╯
+ZeroDivisionError: division by zero
+```
+Apply these improvements to the specified parallel execution function, ensuring error messages are as clear as direct function calls while maintaining all performance benefits of parallel execution.

{speedy_utils-1.1.42 → speedy_utils-1.1.43}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: speedy-utils
-Version: 1.1.42
+Version: 1.1.43
 Summary: Fast and easy-to-use package for data science
 Project-URL: Homepage, https://github.com/anhvth/speedy
 Project-URL: Repository, https://github.com/anhvth/speedy
@@ -17,7 +17,7 @@ Classifier: Programming Language :: Python :: 3.11
 Classifier: Programming Language :: Python :: 3.12
 Classifier: Programming Language :: Python :: 3.13
 Classifier: Programming Language :: Python :: 3.14
-Requires-Python: >=3.8
+Requires-Python: >=3.9
 Requires-Dist: aiohttp
 Requires-Dist: bump2version
 Requires-Dist: cachetools
@@ -39,6 +39,7 @@ Requires-Dist: pydantic
 Requires-Dist: pytest
 Requires-Dist: ray
 Requires-Dist: requests
+Requires-Dist: rich>=14.3.1
 Requires-Dist: ruff
 Requires-Dist: scikit-learn
 Requires-Dist: tabulate

{speedy_utils-1.1.42 → speedy_utils-1.1.43}/pyproject.toml RENAMED Viewed

@@ -1,11 +1,11 @@
 [project]
 name = "speedy-utils"
-version = "1.1.42"
+version = "1.1.43"
 description = "Fast and easy-to-use package for data science"
 authors = [{ name = "AnhVTH", email = "anhvth.226@gmail.com" }]
 readme = "README.md"
 license = { text = "MIT" }
-requires-python = ">=3.8"
+requires-python = ">=3.9"
 dependencies = [
     "numpy",
     "requests",
@@ -33,6 +33,7 @@ dependencies = [
     "ray",
     "aiohttp",
     "pytest",
+    "rich>=14.3.1",
 ]
 classifiers = [
     "Development Status :: 4 - Beta",

speedy_utils-1.1.43/scripts/bug.py ADDED Viewed

@@ -0,0 +1,34 @@
+# type: ignore
+from speedy_utils import multi_process, multi_thread
+def do_something(x):
+    if x % 3 == 0:
+        raise ValueError(f'Error at index {x}')
+    return x * 2
+inputs = range(10)
+if __name__ == '__main__':
+    print('Testing error_handler="log" with mp backend:')
+    results = multi_process(
+        do_something,
+        inputs,
+        backend='mp',
+        error_handler='log',
+        max_error_files=5,
+    )
+    print(f'Results: {results}')
+    print()
+    # print('Testing error_handler="log" with multi_thread:')
+    # results = multi_thread(
+    #     do_something,
+    #     inputs,
+    #     error_handler='log',
+    #     max_error_files=5,
+    # )
+    # print(f'Results: {results}')

speedy_utils-1.1.43/scripts/bug_simple.py ADDED Viewed

@@ -0,0 +1,11 @@
+from speedy_utils import *
+def do_something(x):
+    x = 10
+    y = 0
+    x/y
+do_something(1)

{speedy_utils-1.1.42 → speedy_utils-1.1.43}/scripts/debug_import_time.py RENAMED Viewed

@@ -18,21 +18,20 @@ import json
 import re
 import subprocess
 import sys
-from typing import Dict, List, Tuple
+from typing import Dict, List, Optional, Tuple
 DEFAULT_MODULES = ['speedy_utils', 'llm_utils', 'vision_utils']
-def parse_x_importtime(stderr: str) -> List[Tuple[str, float]]:
+def parse_x_importtime(stderr: str) -> List[Tuple[str, float, float]]:
     """Parse -X importtime stderr into per-top-level module seconds.
-    We use the first column (self-time) aggregated per top-level module
-    as a good approximation of which third-party or heavy packages cost
-    time during import.
+    We aggregate both self-time and cumulative-time per top-level module
+    as a quick proxy for heavy imports.
     """
-    times: Dict[str, float] = {}
+    times: Dict[str, Tuple[float, float]] = {}
     pattern = re.compile(r'^\s*import time:\s*(\d+)\s*\|\s*(\d+)\s*\|\s*(.+)$')
     for line in stderr.splitlines():
         match = pattern.match(line)
@@ -40,27 +39,43 @@ def parse_x_importtime(stderr: str) -> List[Tuple[str, float]]:
             continue
         try:
             self_us = int(match.group(1))
+            cum_us = int(match.group(2))
             mod_name = match.group(3).strip()
         except Exception:
             continue
         top = mod_name.split('.', 1)[0]
-        times[top] = times.get(top, 0.0) + (self_us / 1_000_000.0)
+        self_sec = self_us / 1_000_000.0
+        cum_sec = cum_us / 1_000_000.0
+        prev_self, prev_cum = times.get(top, (0.0, 0.0))
+        times[top] = (prev_self + self_sec, prev_cum + cum_sec)
     # return sorted list (desc)
-    return sorted(times.items(), key=lambda it: it[1], reverse=True)
+    return sorted(
+        [(name, vals[0], vals[1]) for name, vals in times.items()],
+        key=lambda it: it[1],
+        reverse=True,
+    )
-def run_importtime(module: str) -> Tuple[bool, str]:
+def run_importtime(module: str, star_import: bool) -> Tuple[bool, str]:
     exe = sys.executable
-    cmd = [exe, '-X', 'importtime', '-c', f'from {module} import *']
+    if star_import:
+        code = f'from {module} import *'
+    else:
+        code = f'import {module}'
+    cmd = [exe, '-X', 'importtime', '-c', code]
     p = subprocess.run(cmd, capture_output=True, text=True, check=False)
     ok = p.returncode == 0 and bool(p.stderr.strip())
     out = p.stderr if p.stderr else p.stdout
     return ok, out
-def run_timed_import(module: str) -> Tuple[bool, str]:
+def run_timed_import(module: str, star_import: bool) -> Tuple[bool, str]:
+    if star_import:
+        import_stmt = f'from {module} import *'
+    else:
+        import_stmt = f'import {module}'
     code = (
         'import builtins, time, json\n'
         'orig = builtins.__import__\n'
@@ -74,7 +89,7 @@ def run_timed_import(module: str) -> Tuple[bool, str]:
         "        key = name.split('.',1)[0]\n"
         '        times[key] = times.get(key, 0.0) + elapsed\n'
         'builtins.__import__ = timed\n'
-        f'from {module} import *\n'
+        f'{import_stmt}\n'
         'builtins.__import__ = orig\n'
         'print(json.dumps(sorted(times.items(), key=lambda it: it[1], reverse=True)))\n'
     )
@@ -86,9 +101,30 @@ def run_timed_import(module: str) -> Tuple[bool, str]:
     return True, p.stdout.strip()
-def pretty_print_list(items: List[Tuple[str, float]]) -> None:
-    for name, sec in items:
-        print(f'{sec:6.3f}s  {name}')
+def pretty_print_list(items: List[Tuple[str, float, Optional[float]]]) -> None:
+    for name, self_sec, cum_sec in items:
+        if cum_sec is None:
+            print(f'{self_sec:6.3f}s  {name}')
+        else:
+            print(f'{self_sec:6.3f}s  {cum_sec:6.3f}s  {name}')
+def is_stdlib(name: str) -> bool:
+    if name == 'builtins':
+        return True
+    stdlib_names = getattr(sys, 'stdlib_module_names', None)
+    if stdlib_names is None:
+        return False
+    return name in stdlib_names
+def filter_stdlib(
+    items: List[Tuple[str, float, Optional[float]]],
+    show_stdlib: Optional[bool],
+) -> List[Tuple[str, float, Optional[float]]]:
+    if show_stdlib is None:
+        return items
+    return [it for it in items if is_stdlib(it[0]) is show_stdlib]
 def main(argv: List[str] | None = None) -> int:
@@ -98,11 +134,28 @@ def main(argv: List[str] | None = None) -> int:
         '--min-sec', type=float, default=0.2, help='Minimum seconds to show'
     )
     parser.add_argument('--no-x', action='store_true', help="Don't try -X importtime")
+    parser.add_argument(
+        '--star-import',
+        action='store_true',
+        help='Use "from module import *" instead of plain import',
+    )
     parser.add_argument(
         '--raw', action='store_true', help='Show raw -X output in addition'
     )
     parser.add_argument('-n', '--top', type=int, default=20)
+    stdlib_group = parser.add_mutually_exclusive_group()
+    stdlib_group.add_argument(
+        '--stdlib',
+        action='store_true',
+        help='Show only standard library modules',
+    )
+    stdlib_group.add_argument(
+        '--no-stdlib',
+        action='store_true',
+        help='Exclude standard library modules',
+    )
     args = parser.parse_args(argv)
+    show_stdlib = True if args.stdlib else False if args.no_stdlib else None
     for module in args.modules:
         print('=' * 60)
@@ -110,12 +163,16 @@ def main(argv: List[str] | None = None) -> int:
         print('=' * 60)
         if not args.no_x:
-            ok, out = run_importtime(module)
+            ok, out = run_importtime(module, args.star_import)
             if ok:
                 parsed = parse_x_importtime(out)
                 filtered = [it for it in parsed if it[1] >= args.min_sec]
+                filtered = filter_stdlib(
+                    [(n, s, c) for n, s, c in filtered], show_stdlib
+                )
                 if filtered:
                     print('Top heavy imports (from -X importtime):')
+                    print(' self    cum    module')
                     pretty_print_list(filtered[: args.top])
                 else:
                     print(
@@ -125,15 +182,20 @@ def main(argv: List[str] | None = None) -> int:
                     print('\nRaw -X importtime output:\n')
                     print(out)
                 continue
+            print(
+                f'Failed to measure imports for {module!r} with -X importtime. '
+                'Retry with --raw for details.'
+            )
         # Fallback instrumentation
-        ok, out = run_timed_import(module)
+        ok, out = run_timed_import(module, args.star_import)
         if not ok:
-            print('Failed to measure imports:\n', out)
+            print(f'Failed to measure imports for {module!r}:\n', out)
             continue
         items = json.loads(out)
         filtered = [it for it in items if it[1] >= args.min_sec]
+        filtered = filter_stdlib([(n, s, None) for n, s in filtered], show_stdlib)
         if not filtered:
             print(f'No imports >= {args.min_sec:.3f}s (fallback)')
             continue

speedy_utils-1.1.43/scripts/test.py ADDED Viewed

@@ -0,0 +1,26 @@
+from speedy_utils import multi_thread
+def demo_1_simple_error():
+    """Demo 1: Simple function error."""
+    print('\n' + '=' * 70)
+    print('DEMO 1: Simple TypeError in user function')
+    print('=' * 70)
+    def process_item(x):
+        # Intentional error - calling a list
+        my_list = [1, 2, 3]
+        return my_list(x)
+    # try:
+    multi_thread(process_item, range(300), workers=100, progress=True)
+    # except TypeError as e:
+    #     pass
+        # import traceback; traceback.print_exc()
+        # print(f'\nCaught: {type(e).__name__}')
+        # print('\nError message focuses on YOUR code:')
+        # print(f'{e}')
+demo_1_simple_error()

speedy_utils-1.1.43/scripts/test_both_backends.py ADDED Viewed

@@ -0,0 +1,25 @@
+from speedy_utils import *
+def do_something(x):
+    x = 10
+    y = 0
+    x/y
+if __name__ == '__main__':
+    inputs = range(10)
+    print("=" * 80)
+    print("Testing MP backend:")
+    print("=" * 80)
+    try:
+        multi_process(do_something, inputs, backend='mp')
+    except SystemExit:
+        pass
+    print("\n" + "=" * 80)
+    print("Testing RAY backend:")
+    print("=" * 80)
+    try:
+        multi_process(do_something, inputs, backend='ray')
+    except SystemExit:
+        pass

speedy_utils-1.1.43/scripts/test_error_handling.py ADDED Viewed

@@ -0,0 +1,37 @@
+"""Test improved error handling in multi_thread."""
+from speedy_utils import multi_thread
+def process_data(item):
+    """Process a data item - may raise errors."""
+    # Simulate some processing logic
+    value = item['value']
+    threshold = item.get('threshold', 100)
+    # This will error when value == threshold
+    result = 1 / (value - threshold)
+    return result * 2
+def main():
+    """Run test cases for error handling."""
+    # Test case 1: Error in the middle of processing
+    print("Test 1: Error with division by zero")
+    print("-" * 60)
+    data = [
+        {'value': 50, 'threshold': 100},
+        {'value': 75, 'threshold': 100},
+        {'value': 100, 'threshold': 100},  # This will error
+        {'value': 125, 'threshold': 100},
+    ]
+    try:
+        results = multi_thread(process_data, data, workers=4)
+        print(f"Results: {results}")
+    except SystemExit:
+        print("\nTest completed - error was properly reported")
+if __name__ == '__main__':
+    main()

speedy_utils-1.1.43/scripts/test_locals.py ADDED Viewed

@@ -0,0 +1,19 @@
+from speedy_utils import *
+import json
+import sys
+def process_data(item):
+    # Mix of user variables and imported modules
+    data = {'value': item}
+    multiplier = 2
+    result_list = [1, 2, 3]
+    # This will cause an error
+    denominator = 0
+    final = data['value'] * multiplier / denominator
+    return final
+if __name__ == '__main__':
+    inputs = range(5)
+    ret = multi_process(process_data, inputs, backend='mp', error_handler='log')
+    print(ret)

speedy_utils-1.1.43/scripts/test_ray_locals.py ADDED Viewed

@@ -0,0 +1,11 @@
+from speedy_utils import *
+def do_something(x):
+    x = 10
+    y = 0
+    # Intentionally cause division by zero error for testing
+    _ = x/y
+if __name__ == '__main__':
+    inputs = range(10)
+    multi_process(do_something, inputs, backend='ray')

{speedy_utils-1.1.42 → speedy_utils-1.1.43}/src/speedy_utils/__init__.py RENAMED Viewed

@@ -7,6 +7,13 @@ t = time.time()
 from .__imports import *
+# Install rich traceback for better error messages
+try:
+    from rich.traceback import install
+    install(show_locals=os.getenv('TRACEBACK_SHOW_LOCALS', 'True') == 'True')
+except ImportError:
+    pass  # rich is optional
 # Clock module
 from .common.clock import Clock, speedy_timer, timef

speedy-utils 1.1.42__tar.gz → 1.1.43__tar.gz

speedy-utils 1.1.42tar.gz → 1.1.43tar.gz