PyPI - speedy-utils - Versions diffs - 1.0.21__py3-none-any.whl → 1.0.22__py3-none-any.whl - Mend

speedy-utils 1.0.21py3-none-any.whl → 1.0.22py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

llm_utils/lm/async_lm.py +74 -13
llm_utils/lm/sync_lm.py +83 -6
llm_utils/scripts/vllm_load_balancer.py +81 -7
llm_utils/scripts/vllm_serve.py +69 -19
speedy_utils/__init__.py +100 -47
speedy_utils/all.py +43 -0
speedy_utils/multi_worker/thread.py +78 -1
{speedy_utils-1.0.21.dist-info → speedy_utils-1.0.22.dist-info}/METADATA +1 -1
{speedy_utils-1.0.21.dist-info → speedy_utils-1.0.22.dist-info}/RECORD +11 -11
{speedy_utils-1.0.21.dist-info → speedy_utils-1.0.22.dist-info}/WHEEL +0 -0
{speedy_utils-1.0.21.dist-info → speedy_utils-1.0.22.dist-info}/entry_points.txt +0 -0

llm_utils/lm/async_lm.py CHANGED Viewed

@@ -1,16 +1,76 @@
-"""An **asynchronous** drop‑in replacement for the original `LM` class.
-Usage example (Python ≥3.8):
-    from async_lm import AsyncLM
-    import asyncio
-    async def main():
-        lm = AsyncLM(model="gpt-4o-mini")
-        reply: str = await lm(prompt="Hello, world!")
-        print(reply)
-    asyncio.run(main())
+"""
+# ============================================================================= #
+# ASYNCHRONOUS LANGUAGE MODEL WRAPPER WITH CONCURRENT EXECUTION SUPPORT
+# ============================================================================= #
+#
+# Title & Intent:
+# High-performance asynchronous language model interface for concurrent LLM operations
+#
+# High-level Summary:
+# This module provides an async drop-in replacement for the synchronous LM class, designed
+# for high-throughput applications requiring concurrent language model operations. It maintains
+# full API compatibility while adding async/await semantics, connection pooling, and efficient
+# resource management. The AsyncLM class supports batch processing, concurrent request handling,
+# and maintains the same caching and type safety guarantees as the synchronous version.
+#
+# Public API / Data Contracts:
+# • AsyncLM(model, temperature=0.0, max_tokens=2000, host="localhost", port=None, **kwargs) - Async wrapper class
+# • async AsyncLM.__call__(prompt=None, messages=None, response_format=str, cache=None, **kwargs) -> str | BaseModel
+# • async AsyncLM.list_models(port=None) -> List[str] - Enumerate available models
+# • async AsyncLM.count_tokens(messages, model=None) -> int - Token counting utility
+# • async AsyncLM.price(messages, model=None, response_tokens=0) -> float - Cost estimation
+# • AsyncLM.set_model(model_name) -> None - Runtime model switching (sync method)
+# • async AsyncLM.batch_call(requests) -> List[Union[str, BaseModel]] - Concurrent batch processing
+# • TModel = TypeVar("TModel", bound=BaseModel) - Generic type for structured responses
+# • Messages = List[ChatCompletionMessageParam] - Typed message format
+#
+# Invariants / Constraints:
+# • MUST be used within async context (asyncio event loop required)
+# • MUST provide either 'prompt' or 'messages' parameter, but not both
+# • MUST properly await all async method calls
+# • Connection pooling MUST handle concurrent requests efficiently
+# • MUST maintain thread safety across concurrent operations
+# • Rate limit handling MUST use async backoff without blocking event loop
+# • MUST preserve all synchronous LM class behaviors and constraints
+# • Resource cleanup MUST occur on context manager exit or explicit close
+#
+# Usage Example:
+# ```python
+# import asyncio
+# from llm_utils.lm.async_lm import AsyncLM
+# from pydantic import BaseModel
+#
+# class SummaryResponse(BaseModel):
+#     summary: str
+#     key_points: List[str]
+#     confidence: float
+#
+# async def main():
+#     # Single async call
+#     lm = AsyncLM(model="gpt-4o-mini", temperature=0.1)
+#     response = await lm(prompt="Summarize quantum computing")
+#     print(response)
+#
+#     # Concurrent batch processing
+#     texts = ["Text 1 to summarize", "Text 2 to summarize", "Text 3 to summarize"]
+#     tasks = [lm(prompt=f"Summarize: {text}", response_format=SummaryResponse) for text in texts]
+#     summaries = await asyncio.gather(*tasks)
+#
+#     for summary in summaries:
+#         print(f"Summary: {summary.summary}")
+#         print(f"Key points: {summary.key_points}")
+#
+# asyncio.run(main())
+# ```
+#
+# TODO & Future Work:
+# • Add async context manager support for automatic resource cleanup
+# • Implement connection pool size optimization based on usage patterns
+# • Add async streaming response support with async generators
+# • Optimize memory usage for large-scale concurrent operations
+# • Add async rate limiting with priority queuing
+#
+# ============================================================================= #
 """
 import base64
@@ -857,3 +917,4 @@ class AsyncLLMTask(ABC):
             system_msg=system_prompt, user_msg=user_msg, assistant_msg=assistant_msg
         )
         return {"messages": messages}
+    arun = __call__  # alias for compatibility with other LLMTask implementations

llm_utils/lm/sync_lm.py CHANGED Viewed

@@ -1,3 +1,79 @@
+"""
+# ============================================================================= #
+# SYNCHRONOUS LANGUAGE MODEL WRAPPER WITH OPENAI COMPATIBILITY
+# ============================================================================= #
+#
+# Title & Intent:
+# Unified synchronous language model interface with caching, type safety, and OpenAI API compatibility
+#
+# High-level Summary:
+# This module provides a comprehensive synchronous wrapper for language models that supports both
+# string prompts and structured Pydantic model responses. It includes intelligent caching with
+# content-based hashing, automatic retry logic for rate limits, and seamless integration with
+# OpenAI-compatible APIs. The LM class handles message formatting, response parsing, token counting,
+# and provides detailed logging and debugging capabilities for production use.
+#
+# Public API / Data Contracts:
+# • LM(model, temperature=0.0, max_tokens=2000, host="localhost", port=None, **kwargs) - Main wrapper class
+# • LM.__call__(prompt=None, messages=None, response_format=str, cache=None, **kwargs) -> str | BaseModel
+# • LM.list_models(port=None) -> List[str] - Enumerate available models
+# • LM.count_tokens(messages, model=None) -> int - Token counting utility
+# • LM.price(messages, model=None, response_tokens=0) -> float - Cost estimation
+# • LM.set_model(model_name) -> None - Runtime model switching
+# • TModel = TypeVar("TModel", bound=BaseModel) - Generic type for structured responses
+# • Messages = List[ChatCompletionMessageParam] - Typed message format
+# • RawMsgs = Union[Messages, LegacyMsgs] - Flexible input format
+#
+# Invariants / Constraints:
+# • MUST provide either 'prompt' or 'messages' parameter, but not both
+# • MUST set model name before making API calls (auto-detection available)
+# • response_format=str MUST return string; response_format=PydanticModel MUST return model instance
+# • Caching MUST use content-based hashing for reproducible results
+# • MUST handle OpenAI rate limits with exponential backoff (up to 3 retries)
+# • MUST preserve message order and format during transformations
+# • Token counting SHOULD use tiktoken when available, fall back to character estimation
+# • MUST validate Pydantic responses and retry on parsing failures
+#
+# Usage Example:
+# ```python
+# from llm_utils.lm.sync_lm import LM
+# from pydantic import BaseModel
+#
+# class CodeResponse(BaseModel):
+#     language: str
+#     code: str
+#     explanation: str
+#
+# # String response
+# lm = LM(model="gpt-4o-mini", temperature=0.1)
+# response = lm(prompt="Write a Python hello world")
+# print(response)  # Returns string
+#
+# # Structured response
+# code_response = lm(
+#     prompt="Write a Python function to calculate fibonacci",
+#     response_format=CodeResponse
+# )
+# print(f"Language: {code_response.language}")  # Returns CodeResponse instance
+#
+# # Message-based conversation
+# messages = [
+#     {"role": "system", "content": "You are a helpful coding assistant"},
+#     {"role": "user", "content": "Explain async/await in Python"}
+# ]
+# response = lm(messages=messages, max_tokens=1000)
+# ```
+#
+# TODO & Future Work:
+# • Add streaming response support for long-form generation
+# • Implement fine-grained token usage tracking per conversation
+# • Add support for function calling and tool use
+# • Optimize caching strategy for conversation contexts
+# • Add async context manager support for resource cleanup
+#
+# ============================================================================= #
+"""
 from __future__ import annotations
 import base64
@@ -551,9 +627,9 @@ class LM:
         assert isinstance(messages, list), "Messages must be a list."
         assert len(messages) > 0, "Messages cannot be empty."
-        assert (
-            messages[0]["role"] == "system"
-        ), "First message must be a system message with instruction."
+        assert messages[0]["role"] == "system", (
+            "First message must be a system message with instruction."
+        )
         messages[0]["content"] += post_fix  # type: ignore
         model_kwargs = {}
@@ -674,14 +750,13 @@ class LM:
 @lru_cache(maxsize=10)
 def get_tokenizer(model_name: str) -> Any:
-    from transformers import AutoTokenizer # type: ignore
+    from transformers import AutoTokenizer  # type: ignore
     tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
     return tokenizer
 def inspect_word_probs(lm, tokenizer, messages):
     import numpy as np
     def compute_word_log_probs(
@@ -819,7 +894,7 @@ class LLMTask(ABC):
             temperature = 0.6
             think=False
         demo_task = DemoTask()
         demo_task({'text_to_translate': 'Translate from english to vietnamese: Hello how are you'})
     ```
@@ -864,3 +939,5 @@ class LLMTask(ABC):
         return get_conversation_one_turn(
             system_msg=system_prompt, user_msg=user_msg, assistant_msg=assistant_msg
         )
+    run = __call__  # alias for compatibility with other LLMTask implementations

llm_utils/scripts/vllm_load_balancer.py CHANGED Viewed

@@ -1,11 +1,85 @@
+"""
+# ============================================================================= #
+# VLLM LOAD BALANCER WITH HEALTH MONITORING AND DYNAMIC ROUTING
+# ============================================================================= #
+#
+# Title & Intent:
+# Production-ready TCP load balancer for vLLM model servers with health checks and connection management
+#
+# High-level Summary:
+# This module implements a high-performance load balancer specifically designed for vLLM model
+# serving infrastructure. It provides intelligent routing across multiple vLLM server instances,
+# continuous health monitoring, automatic failover, and connection pooling. The load balancer
+# uses async TCP proxying to handle concurrent requests efficiently while maintaining session
+# affinity and providing detailed metrics for monitoring and debugging.
+#
+# Public API / Data Contracts:
+# • LOAD_BALANCER_HOST = "0.0.0.0" - Load balancer bind address
+# • LOAD_BALANCER_PORT = 8008 - Load balancer listening port
+# • SCAN_TARGET_HOST = "localhost" - Target server host for health checks
+# • SCAN_PORT_START = 8140, SCAN_PORT_END = 8170 - Port range for server discovery
+# • start_load_balancer() -> None - Main entry point to start the service
+# • scan_for_healthy_servers() -> None - Background health monitoring task
+# • handle_client(reader, writer) -> None - Client connection handler
+# • relay_data(reader, writer, direction) -> None - Bidirectional data relay
+# • get_next_server() -> Optional[Tuple[str, int]] - Round-robin server selection
+#
+# Invariants / Constraints:
+# • MUST continuously monitor server health every SCAN_INTERVAL seconds
+# • MUST handle connection failures gracefully with automatic failover
+# • Health checks MUST complete within HEALTH_CHECK_TIMEOUT seconds
+# • MUST maintain connection counts for load balancing decisions
+# • Server availability MUST be updated atomically using async locks
+# • TCP connections MUST be properly closed on errors or completion
+# • MUST log all connection events and health status changes
+# • Round-robin selection MUST distribute load evenly across healthy servers
+#
+# Usage Example:
+# ```python
+# # Start the load balancer (blocking operation)
+# import asyncio
+# from llm_utils.scripts.vllm_load_balancer import start_load_balancer
+#
+# # Configure environment or modify constants as needed
+# LOAD_BALANCER_HOST = "0.0.0.0"
+# LOAD_BALANCER_PORT = 8008
+# SCAN_TARGET_HOST = "localhost"
+# SCAN_PORT_START = 8140
+# SCAN_PORT_END = 8150
+#
+# # Start the load balancer service
+# asyncio.run(start_load_balancer())
+#
+# # The service will:
+# # 1. Scan for healthy vLLM servers on ports 8140-8150
+# # 2. Accept client connections on port 8008
+# # 3. Route requests to healthy backend servers
+# # 4. Monitor server health continuously
+# # 5. Provide connection statistics
+# ```
+#
+# TODO & Future Work:
+# • Add weighted round-robin based on server capacity metrics
+# • Implement session affinity for stateful model interactions
+# • Add HTTP health check endpoints for better monitoring integration
+# • Support dynamic server registration and deregistration
+# • Add metrics export for Prometheus/Grafana monitoring
+# • Implement graceful shutdown with connection draining
+#
+# ============================================================================= #
+"""
 import asyncio
+import contextlib
 import random
 from collections import defaultdict
-from tabulate import tabulate
-import contextlib
 import aiohttp  # <-- Import aiohttp
-from speedy_utils import setup_logger
 from loguru import logger
+from tabulate import tabulate
+from speedy_utils import setup_logger
 setup_logger(min_interval=5)
 # --- Configuration ---
 LOAD_BALANCER_HOST = "0.0.0.0"
@@ -180,7 +254,9 @@ async def scan_and_update_servers():
                     if server not in connection_counts:
                         connection_counts[server] = 0
-            logger.debug(f"[{LOAD_BALANCER_PORT=}]Scan complete. Active servers: {available_servers}")
+            logger.debug(
+                f"[{LOAD_BALANCER_PORT=}]Scan complete. Active servers: {available_servers}"
+            )
         except asyncio.CancelledError:
             logger.info("Server scan task cancelled.")
@@ -219,9 +295,7 @@ async def handle_client(client_reader, client_writer):
             min_connections = float("inf")
             least_used_available_servers = []
-            for (
-                server
-            ) in (
+            for server in (
                 available_servers
             ):  # Iterate only over servers that passed health check
                 count = connection_counts.get(server, 0)

llm_utils/scripts/vllm_serve.py CHANGED Viewed

@@ -1,29 +1,79 @@
-""" "
-USAGE:
-Serve models and LoRAs with vLLM:
-Serve a LoRA model:
-svllm serve --lora LORA_NAME LORA_PATH --gpus GPU_GROUPS
-Serve a base model:
-svllm serve --model MODEL_NAME --gpus GPU_GROUPS
-Add a LoRA to a served model:
-svllm add-lora --lora LORA_NAME LORA_PATH --host_port host:port
-(if add then the port must be specify)
+"""
+# ============================================================================= #
+# VLLM MODEL SERVING AND LORA MANAGEMENT UTILITIES
+# ============================================================================= #
+#
+# Title & Intent:
+# Command-line interface for serving language models and managing LoRA adapters with vLLM
+#
+# High-level Summary:
+# This module provides a comprehensive CLI tool for deploying and managing vLLM model servers
+# with support for base models, LoRA adapters, and dynamic adapter loading. It handles GPU
+# allocation, process management, model discovery, and provides utilities for adding/removing
+# LoRA adapters to running servers. The tool simplifies the deployment of production-ready
+# language model serving infrastructure with fine-tuned model support.
+#
+# Public API / Data Contracts:
+# • serve_model(model_name, gpus, **kwargs) -> subprocess.Popen - Start vLLM server for base model
+# • serve_lora(lora_name_or_path, gpus, **kwargs) -> subprocess.Popen - Start vLLM server with LoRA
+# • add_lora(lora_name_or_path, host_port, **kwargs) -> dict - Add LoRA to running server
+# • list_loras(host_port, api_key="abc") -> None - List available LoRA adapters
+# • model_list(host_port, api_key="abc") -> None - List available models
+# • remove_lora(lora_name, host_port, api_key="abc") -> dict - Remove LoRA adapter
+# • get_lora_path(lora_name_or_path) -> str - Resolve LoRA adapter path
+# • LORA_DIR: str - Environment-configurable LoRA storage directory
+# • HF_HOME: str - Hugging Face cache directory
+#
+# Invariants / Constraints:
+# • GPU groups MUST be specified as comma-separated integers (e.g., "0,1,2,3")
+# • LoRA paths MUST exist and contain valid adapter files
+# • Server endpoints MUST be reachable for dynamic LoRA operations
+# • MUST validate model and LoRA compatibility before serving
+# • Process management MUST handle graceful shutdown on interruption
+# • MUST respect CUDA device visibility and memory constraints
+# • LoRA operations MUST verify server API compatibility
+# • MUST log all serving operations and adapter changes
+#
+# Usage Example:
+# ```bash
+# # Serve a base model on GPUs 0,1
+# svllm serve --model meta-llama/Llama-2-7b-hf --gpus 0,1
+#
+# # Serve a model with LoRA adapter
+# svllm serve --lora my-adapter /path/to/adapter --gpus 0,1,2,3
+#
+# # Add LoRA to running server
+# svllm add-lora --lora new-adapter /path/to/new-adapter --host_port localhost:8000
+#
+# # List available models
+# svllm list-models --host_port localhost:8000
+#
+# # Remove LoRA adapter
+# svllm remove-lora --lora adapter-name --host_port localhost:8000
+# ```
+#
+# TODO & Future Work:
+# • Add support for multi-node distributed serving
+# • Implement automatic model quantization options
+# • Add configuration validation before server startup
+# • Support for custom tokenizer and chat templates
+# • Add health check endpoints for load balancer integration
+# • Implement rolling updates for zero-downtime deployments
+#
+# ============================================================================= #
 """
+import argparse
 import os
 import subprocess
 from typing import List, Optional
-import argparse
-import requests
 import openai
+import requests
 from loguru import logger
 from speedy_utils.common.utils_io import load_by_ext
 LORA_DIR: str = os.environ.get("LORA_DIR", "/loras")
 LORA_DIR = os.path.abspath(LORA_DIR)
 HF_HOME: str = os.environ.get("HF_HOME", os.path.expanduser("~/.cache/huggingface"))
@@ -181,9 +231,9 @@ def get_vllm() -> str:
     vllm_binary = subprocess.check_output("which vllm", shell=True, text=True).strip()
     vllm_binary = os.getenv("VLLM_BINARY", vllm_binary)
     logger.info(f"vLLM binary: {vllm_binary}")
-    assert os.path.exists(
-        vllm_binary
-    ), f"vLLM binary not found at {vllm_binary}, please set VLLM_BINARY env variable"
+    assert os.path.exists(vllm_binary), (
+        f"vLLM binary not found at {vllm_binary}, please set VLLM_BINARY env variable"
+    )
     return vllm_binary

speedy_utils/__init__.py CHANGED Viewed

@@ -1,5 +1,97 @@
+# ----------------------------------------------------------------------------
+# speedy_utils/__init__.py
+#
+# Main entry point and public API for the Speedy Utils library
+#
+# This module exports the primary utilities for enhanced Python development
+# productivity including caching mechanisms, parallel processing, file I/O,
+# timing utilities, and data manipulation functions. It provides a convenient
+# single-import interface for the most commonly used functionality.
+#
+# Public API / Data Contracts:
+# • setup_logger(min_interval: int = 5) -> None - Configure logging system
+# • log(*args, **kwargs) -> None - Rate-limited logging function
+# • Clock() - Timing and performance measurement utility
+# • speedy_timer: Clock - Pre-configured global timer instance
+# • timef(func) -> Callable - Function execution time decorator
+# • retry_runtime(sleep_seconds: int, max_retry: int, exceptions) -> Callable
+# • memoize(func) -> Callable - Function result caching decorator
+# • identify(obj: Any) -> str - Generate unique object identifier
+# • identify_uuid(obj: Any) -> str - Generate UUID-based object identifier
+# • load_by_ext(fname: str | list[str]) -> Any - Auto-detect file format loader
+# • dump_json_or_pickle(obj: Any, fname: str) -> None - Smart file serializer
+# • load_json_or_pickle(fname: str) -> Any - Smart file deserializer
+# • multi_thread(func, items, **kwargs) -> list - Parallel thread execution
+# • multi_process(func, items, **kwargs) -> list - Parallel process execution
+#
+# Invariants / Constraints:
+# • MUST import only stable, tested utilities into public namespace
+# • SHOULD maintain backward compatibility across minor versions
+# • MUST provide consistent error handling across all public functions
+# • SHOULD use lazy imports for heavy dependencies when possible
+#
+# Usage Example:
+# ```python
+# from speedy_utils import Clock, memoize, multi_thread, load_by_ext
+#
+# @memoize
+# def expensive_computation(x):
+#     return x ** 2
+#
+# timer = Clock()
+# timer.start()
+# results = multi_thread(expensive_computation, range(100))
+# timer.end()
+# data = load_by_ext("config.json")
+# ```
+#
+# TODO & Future Work:
+# • Add async variants for I/O operations
+# • Implement distributed caching backend
+# • Add GPU acceleration utilities
+# ----------------------------------------------------------------------------
 # Import specific functions and classes from modules
 # Logger
+# Standard library imports
+import copy
+import functools
+import gc
+import inspect
+import json
+import multiprocessing
+import os
+import os.path as osp
+import pickle
+import pprint
+import random
+import re
+import sys
+import textwrap
+import threading
+import time
+import traceback
+import uuid
+from collections import Counter, defaultdict
+from collections.abc import Callable
+from concurrent.futures import ThreadPoolExecutor, as_completed
+from glob import glob
+from multiprocessing import Pool
+from pathlib import Path
+from threading import Lock
+from typing import Any, Dict, Generic, List, Literal, Optional, TypeVar, Union
+# Third-party imports
+import numpy as np
+import pandas as pd
+import xxhash
+from IPython.core.getipython import get_ipython
+from IPython.display import HTML, display
+from loguru import logger
+from pydantic import BaseModel
+from tabulate import tabulate
+from tqdm import tqdm
 from speedy_utils.common.logger import log, setup_logger
 # Clock module
@@ -8,6 +100,13 @@ from .common.clock import Clock, speedy_timer, timef
 # Function decorators
 from .common.function_decorator import retry_runtime
+# notebook
+from .common.notebook_utils import (
+    change_dir,
+    display_pretty_table_html,
+    print_table,
+)
 # Cache utilities
 from .common.utils_cache import identify, identify_uuid, memoize
@@ -36,57 +135,11 @@ from .common.utils_print import (
     flatten_dict,
     fprint,
 )
-from .common.notebook_utils import (
-    display_pretty_table_html,
-    print_table,
-)
 # Multi-worker processing
 from .multi_worker.process import multi_process
 from .multi_worker.thread import multi_thread
-# notebook
-from .common.notebook_utils import change_dir
-# Standard library imports
-import copy
-import functools
-import gc
-import inspect
-import json
-import multiprocessing
-import os
-import os.path as osp
-import pickle
-import pprint
-import random
-import re
-import sys
-import textwrap
-import threading
-import time
-import traceback
-import uuid
-from collections import Counter, defaultdict
-from collections.abc import Callable
-from concurrent.futures import ThreadPoolExecutor, as_completed
-from glob import glob
-from multiprocessing import Pool
-from pathlib import Path
-from threading import Lock
-from typing import Any, Dict, Generic, List, Literal, Optional, TypeVar, Union
-# Third-party imports
-import numpy as np
-import pandas as pd
-import xxhash
-from IPython.core.getipython import get_ipython
-from IPython.display import HTML, display
-from loguru import logger
-from pydantic import BaseModel
-from tabulate import tabulate
-from tqdm import tqdm
 # Define __all__ explicitly
 __all__ = [
     # Standard library
@@ -173,4 +226,4 @@ __all__ = [
     "multi_thread",
     # Notebook utilities
     "change_dir",
-]
+]

speedy_utils/all.py CHANGED Viewed

@@ -1,3 +1,46 @@
+# ----------------------------------------------------------------------------
+# speedy_utils/all.py
+#
+# Consolidated import collection for comprehensive library access
+#
+# This module provides a unified collection of standard library, third-party,
+# and internal imports commonly used across data science and development
+# workflows. It serves as a convenience module for interactive environments
+# and rapid prototyping by reducing boilerplate import statements.
+#
+# Public API / Data Contracts:
+# • All standard library modules: collections, concurrent.futures, pathlib, etc.
+# • Third-party dependencies: loguru.logger, pydantic.BaseModel, tqdm, tabulate
+# • Core utilities: Counter, defaultdict, ThreadPoolExecutor, as_completed
+# • Development tools: IPython.display.HTML, get_ipython for notebook detection
+# • Type system: Any, Dict, List, Optional, Union, TypeVar, Generic, Literal
+#
+# Invariants / Constraints:
+# • MUST only import stable, widely-used packages
+# • SHOULD handle import failures gracefully for optional dependencies
+# • MUST maintain consistent import aliases across the library
+# • SHOULD group imports by category (stdlib, third-party, internal)
+#
+# Usage Example:
+# ```python
+# from speedy_utils.all import *
+#
+# # Now have access to common utilities without individual imports
+# data = defaultdict(list)
+# results = []
+# for item in tqdm(items):
+#     results.append(process(item))
+#
+# df = tabulate(results, headers=['Item', 'Result'])
+# display(HTML(df))
+# ```
+#
+# TODO & Future Work:
+# • Add conditional imports for ML libraries (torch, numpy, pandas)
+# • Implement import health checking
+# • Add version compatibility warnings
+# ----------------------------------------------------------------------------
 # speedy_utils/all.py
 # Provide a consolidated set of imports for convenience

speedy_utils/multi_worker/thread.py CHANGED Viewed

@@ -1,4 +1,81 @@
-"""Provides thread-based parallel execution utilities."""
+"""
+# ============================================================================= #
+# THREAD-BASED PARALLEL EXECUTION WITH PROGRESS TRACKING AND ERROR HANDLING
+# ============================================================================= #
+#
+# Title & Intent:
+# High-performance thread pool utilities for parallel processing with comprehensive error handling
+#
+# High-level Summary:
+# This module provides robust thread-based parallel execution utilities designed for CPU-bound
+# and I/O-bound tasks requiring concurrent processing. It features intelligent worker management,
+# comprehensive error handling with detailed tracebacks, progress tracking with tqdm integration,
+# and flexible batching strategies. The module optimizes for both throughput and reliability,
+# making it suitable for data processing pipelines, batch operations, and concurrent API calls.
+#
+# Public API / Data Contracts:
+# • multi_thread(func, inputs, num_workers=None, progress=True, **kwargs) -> List[Any] - Main parallel execution
+# • multi_thread_batch(func, inputs, batch_size=10, num_workers=None, **kwargs) -> List[Any] - Batched processing
+# • DEFAULT_WORKERS = (cpu_count * 2) - Default worker thread count
+# • T = TypeVar("T"), R = TypeVar("R") - Generic type variables for input/output typing
+# • _group_iter(src, size) -> Iterable[List[T]] - Utility for chunking iterables
+# • _worker(item, func, fixed_kwargs) -> R - Individual worker function wrapper
+# • _short_tb() -> str - Shortened traceback formatter for cleaner error logs
+#
+# Invariants / Constraints:
+# • Worker count MUST be positive integer, defaults to (CPU cores * 2)
+# • Input iterables MUST be finite and non-empty for meaningful processing
+# • Functions MUST be thread-safe when used with multiple workers
+# • Error handling MUST capture and log detailed tracebacks for debugging
+# • Progress tracking MUST be optional and gracefully handle tqdm unavailability
+# • Batch processing MUST maintain input order in results
+# • MUST handle keyboard interruption gracefully with resource cleanup
+# • Thread pool MUST be properly closed and joined after completion
+#
+# Usage Example:
+# ```python
+# from speedy_utils.multi_worker.thread import multi_thread, multi_thread_batch
+# import requests
+#
+# # Simple parallel processing
+# def square(x):
+#     return x ** 2
+#
+# numbers = list(range(100))
+# results = multi_thread(square, numbers, num_workers=8)
+# print(f"Processed {len(results)} items")
+#
+# # Parallel API calls with error handling
+# def fetch_url(url):
+#     response = requests.get(url, timeout=10)
+#     return response.status_code, len(response.content)
+#
+# urls = ["http://example.com", "http://google.com", "http://github.com"]
+# results = multi_thread(fetch_url, urls, num_workers=3, progress=True)
+#
+# # Batched processing for memory efficiency
+# def process_batch(items):
+#     return [item.upper() for item in items]
+#
+# large_dataset = ["item" + str(i) for i in range(10000)]
+# batched_results = multi_thread_batch(
+#     process_batch,
+#     large_dataset,
+#     batch_size=100,
+#     num_workers=4
+# )
+# ```
+#
+# TODO & Future Work:
+# • Add adaptive worker count based on task characteristics
+# • Implement priority queuing for time-sensitive tasks
+# • Add memory usage monitoring and automatic batch size adjustment
+# • Support for async function execution within thread pool
+# • Add detailed performance metrics and timing analysis
+# • Implement graceful degradation for resource-constrained environments
+#
+# ============================================================================= #
+"""
 import os
 import time

{speedy_utils-1.0.21.dist-info → speedy_utils-1.0.22.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: speedy-utils
-Version: 1.0.21
+Version: 1.0.22
 Summary: Fast and easy-to-use package for data science
 Author: AnhVTH
 Author-email: anhvth.226@gmail.com

{speedy_utils-1.0.21.dist-info → speedy_utils-1.0.22.dist-info}/RECORD RENAMED Viewed

@@ -5,16 +5,16 @@ llm_utils/chat_format/transform.py,sha256=8TZhvUS5DrjUeMNtDIuWY54B_QZ7jjpXEL9c8F
 llm_utils/chat_format/utils.py,sha256=xTxN4HrLHcRO2PfCTR43nH1M5zCa7v0kTTdzAcGkZg0,1229
 llm_utils/group_messages.py,sha256=8CU9nKOja3xeuhdrX5CvYVveSqSKb2zQ0eeNzA88aTQ,3621
 llm_utils/lm/__init__.py,sha256=rX36_MsnekM5GHwWS56XELbm4W5x2TDwnPERDTfo0eU,194
-llm_utils/lm/async_lm.py,sha256=06QVT7iWAa8Rz48oRmnzkS-dJYOiJ0zSfKqEhZY11S8,30825
+llm_utils/lm/async_lm.py,sha256=_NmWEp_jCbD6soexXo489L40KS8xJPgtY5QxXLDYsis,34174
 llm_utils/lm/chat_html.py,sha256=FkGo0Dv_nAHYBMZzXfMu_bGQKaCx302goh3XaT-_ETc,8674
 llm_utils/lm/lm_json.py,sha256=fMt42phzFV2f6ulrtWcDXsWHi8WcG7gGkCzpIq8VSSM,1975
-llm_utils/lm/sync_lm.py,sha256=FMiAhltqUNZdzGejMBPqGfQ3PE-fUoIGW8hfjTO_fNo,31035
+llm_utils/lm/sync_lm.py,sha256=ANw_m5KiWcRwwoeQ5no6dzPFLc6j9o2oEcJtkMKqrn8,34640
 llm_utils/lm/utils.py,sha256=GMvs64DRzVnXAki4SZ-A6mx2Fi9IgeF11BA-5FB-CYg,4777
 llm_utils/scripts/README.md,sha256=yuOLnLa2od2jp4wVy3rV0rESeiV3o8zol5MNMsZx0DY,999
-llm_utils/scripts/vllm_load_balancer.py,sha256=qsVhQ3ubJ3JnOAu4psHB1PyKy9muX-mZZkFqepH8WcU,17507
-llm_utils/scripts/vllm_serve.py,sha256=CbW_3Y9Vt7eQYoGGPT3yj1nhbLYOc3b1LdJBy1sVX-Y,11976
-speedy_utils/__init__.py,sha256=kxQk4PGS3Xkxnerm0YqjF6GKTpgoaTc1vudKid-2c_A,3388
-speedy_utils/all.py,sha256=A9jiKGjo950eg1pscS9x38OWAjKGyusoAN5mrfweY4E,3090
+llm_utils/scripts/vllm_load_balancer.py,sha256=GjMdoZrdT9cSLos0qSdkLg2dwZgW1enAMsD3aTZAfNs,20845
+llm_utils/scripts/vllm_serve.py,sha256=4NaqpVs7LBvxtvTCMPsNCAOfqiWkKRttxWMmWY7SitA,14729
+speedy_utils/__init__.py,sha256=YCpiReW22zG4KkQXQe6V9BQ8bn7PtiXolOaW_iL8T4M,5734
+speedy_utils/all.py,sha256=t-HKzDmhF1MTFnmq7xRnPs5nFG_aZaLH9Ua0RM6nQ9Y,4855
 speedy_utils/common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 speedy_utils/common/clock.py,sha256=3n4FkCW0dz46O8By09V5Pve1DSMgpLDRbWEVRryryeQ,7423
 speedy_utils/common/function_decorator.py,sha256=BspJ0YuGL6elS7lWBAgELZ-sCfED_1N2P5fgH-fCRUQ,2132
@@ -27,11 +27,11 @@ speedy_utils/common/utils_misc.py,sha256=cdEuBBpiB1xpuzj0UBDHDuTIerqsMIw37ENq6EX
 speedy_utils/common/utils_print.py,sha256=iQqnOYw2EFC8TqeSDbrcnIQAUKT7FbB8Mec8b2aGAzw,4833
 speedy_utils/multi_worker/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 speedy_utils/multi_worker/process.py,sha256=BI-sgzzQ0_N8kOfaS_3ZAGZ3d6panYzJ3-BGZthY4dQ,6824
-speedy_utils/multi_worker/thread.py,sha256=9pXjvgjD0s0Hp0cZ6I3M0ndp1OlYZ1yvqbs_bcun_Kw,12775
+speedy_utils/multi_worker/thread.py,sha256=u_hTwXh7_FciMa5EukdEA1fDCY_vUC4moDceBXk2b6w,16326
 speedy_utils/scripts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 speedy_utils/scripts/mpython.py,sha256=73PHm1jqbCt2APN4xuNjD0VDKwzOj4EZsViEMQiZU2g,3853
 speedy_utils/scripts/openapi_client_codegen.py,sha256=f2125S_q0PILgH5dyzoKRz7pIvNEjCkzpi4Q4pPFRZE,9683
-speedy_utils-1.0.21.dist-info/METADATA,sha256=6OzFN9FlcbRmdnnYeDffIt9EYEZvPzZT6uxupCXT56k,7442
-speedy_utils-1.0.21.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
-speedy_utils-1.0.21.dist-info/entry_points.txt,sha256=T1t85jwx8fK6m5msdkBGIXH5R5Kd0zSL0S6erXERPzg,237
-speedy_utils-1.0.21.dist-info/RECORD,,
+speedy_utils-1.0.22.dist-info/METADATA,sha256=Ll1EUWmXjsgvn2nK2NZ-uSrf6SbkTY1mLaHRKWfgR2Q,7442
+speedy_utils-1.0.22.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
+speedy_utils-1.0.22.dist-info/entry_points.txt,sha256=T1t85jwx8fK6m5msdkBGIXH5R5Kd0zSL0S6erXERPzg,237
+speedy_utils-1.0.22.dist-info/RECORD,,

{speedy_utils-1.0.21.dist-info → speedy_utils-1.0.22.dist-info}/WHEEL RENAMED Viewed

File without changes

{speedy_utils-1.0.21.dist-info → speedy_utils-1.0.22.dist-info}/entry_points.txt RENAMED Viewed

File without changes

speedy-utils 1.0.21__py3-none-any.whl → 1.0.22__py3-none-any.whl

speedy-utils 1.0.21py3-none-any.whl → 1.0.22py3-none-any.whl