PyPI - speedy-utils - Versions diffs - 1.1.18__py3-none-any.whl → 1.1.19__py3-none-any.whl - Mend

speedy-utils 1.1.18py3-none-any.whl → 1.1.19py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

llm_utils/__init__.py +3 -2
llm_utils/lm/async_lm/async_llm_task.py +1 -0
llm_utils/lm/llm_task.py +303 -10
llm_utils/lm/openai_memoize.py +2 -2
llm_utils/vector_cache/core.py +3 -3
speedy_utils/__init__.py +2 -1
speedy_utils/common/utils_cache.py +1 -1
speedy_utils/common/utils_io.py +9 -5
speedy_utils/multi_worker/process.py +63 -6
speedy_utils/multi_worker/thread.py +94 -2
{speedy_utils-1.1.18.dist-info → speedy_utils-1.1.19.dist-info}/METADATA +34 -13
{speedy_utils-1.1.18.dist-info → speedy_utils-1.1.19.dist-info}/RECORD +19 -19
{speedy_utils-1.1.18.dist-info → speedy_utils-1.1.19.dist-info}/WHEEL +1 -1
speedy_utils-1.1.19.dist-info/entry_points.txt +5 -0
speedy_utils-1.1.18.dist-info/entry_points.txt +0 -6

llm_utils/__init__.py CHANGED Viewed

@@ -4,7 +4,7 @@ from llm_utils.vector_cache import VectorCache
 from llm_utils.lm.lm_base import get_model_name
 from llm_utils.lm.base_prompt_builder import BasePromptBuilder
+LLM = LLMTask
 from .chat_format import (
     build_chatml_input,
@@ -34,5 +34,6 @@ __all__ = [
     "MOpenAI",
     "get_model_name",
     "VectorCache",
-    "BasePromptBuilder"
+    "BasePromptBuilder",
+    "LLM"
 ]

llm_utils/lm/async_lm/async_llm_task.py CHANGED Viewed

@@ -1,3 +1,4 @@
+# type: ignore
 """
 Async LLM Task module for handling language model interactions with structured input/output.
 """

llm_utils/lm/llm_task.py CHANGED Viewed

@@ -4,10 +4,12 @@
 Simplified LLM Task module for handling language model interactions with structured input/output.
 """
+import os
 from typing import Any, Dict, List, Optional, Type, Union, cast
+import requests
 from loguru import logger
-from openai import OpenAI
+from openai import OpenAI, AuthenticationError, BadRequestError, RateLimitError
 from openai.types.chat import ChatCompletionMessageParam
 from pydantic import BaseModel
@@ -38,6 +40,90 @@ def get_base_client(
         )
+def _is_lora_path(path: str) -> bool:
+    """Check if the given path is a LoRA adapter directory.
+    Args:
+        path: Path to check
+    Returns:
+        True if the path contains adapter_config.json, False otherwise
+    """
+    if not os.path.isdir(path):
+        return False
+    adapter_config_path = os.path.join(path, 'adapter_config.json')
+    return os.path.isfile(adapter_config_path)
+def _get_port_from_client(client: OpenAI) -> Optional[int]:
+    """Extract port number from OpenAI client base_url.
+    Args:
+        client: OpenAI client instance
+    Returns:
+        Port number if found, None otherwise
+    """
+    if hasattr(client, 'base_url') and client.base_url:
+        base_url = str(client.base_url)
+        if 'localhost:' in base_url:
+            try:
+                # Extract port from localhost:PORT/v1 format
+                port_part = base_url.split('localhost:')[1].split('/')[0]
+                return int(port_part)
+            except (IndexError, ValueError):
+                pass
+    return None
+def _load_lora_adapter(lora_path: str, port: int) -> str:
+    """Load a LoRA adapter from the specified path.
+    Args:
+        lora_path: Path to the LoRA adapter directory
+        port: Port number for the API endpoint
+    Returns:
+        Name of the loaded LoRA adapter
+    Raises:
+        requests.RequestException: If the API call fails
+    """
+    lora_name = os.path.basename(lora_path.rstrip('/\\'))
+    if not lora_name:  # Handle edge case of empty basename
+        lora_name = os.path.basename(os.path.dirname(lora_path))
+    response = requests.post(
+        f'http://localhost:{port}/v1/load_lora_adapter',
+        headers={'accept': 'application/json', 'Content-Type': 'application/json'},
+        json={"lora_name": lora_name, "lora_path": os.path.abspath(lora_path)}
+    )
+    response.raise_for_status()
+    return lora_name
+def _unload_lora_adapter(lora_path: str, port: int) -> None:
+    """Unload the current LoRA adapter.
+    Args:
+        lora_path: Path to the LoRA adapter directory
+        port: Port number for the API endpoint
+    """
+    try:
+        lora_name = os.path.basename(lora_path.rstrip('/\\'))
+        if not lora_name:  # Handle edge case of empty basename
+            lora_name = os.path.basename(os.path.dirname(lora_path))
+        response = requests.post(
+            f'http://localhost:{port}/v1/unload_lora_adapter',
+            headers={'accept': 'application/json', 'Content-Type': 'application/json'},
+            json={"lora_name": lora_name, "lora_int_id": 0}
+        )
+        response.raise_for_status()
+    except requests.RequestException as e:
+        logger.warning(f"Error unloading LoRA adapter: {str(e)[:100]}")
 class LLMTask:
     """
     Language model task with structured input/output and optional system instruction.
@@ -106,6 +192,9 @@ class LLMTask:
         output_model: Type[BaseModel] | Type[str] = None,
         client: Union[OpenAI, int, str, None] = None,
         cache=True,
+        is_reasoning_model: bool = False,
+        force_lora_unload: bool = False,
+        lora_path: Optional[str] = None,
         **model_kwargs,
     ):
         """
@@ -117,6 +206,12 @@ class LLMTask:
             output_model: Output BaseModel type
             client: OpenAI client, port number, or base_url string
             cache: Whether to use cached responses (default True)
+            is_reasoning_model: Whether the model is a reasoning model (o1-preview, o1-mini, etc.)
+                              that outputs reasoning_content separately from content (default False)
+            force_lora_unload: If True, forces unloading of any existing LoRA adapter before loading
+                             a new one when lora_path is provided (default False)
+            lora_path: Optional path to LoRA adapter directory. If provided, will load the LoRA
+                      and use it as the model. Takes precedence over model parameter.
             **model_kwargs: Additional model parameters including:
                 - temperature: Controls randomness (0.0 to 2.0)
                 - n: Number of responses to generate (when n > 1, returns list)
@@ -127,6 +222,10 @@ class LLMTask:
         self.input_model = input_model
         self.output_model = output_model
         self.model_kwargs = model_kwargs
+        self.is_reasoning_model = is_reasoning_model
+        self.force_lora_unload = force_lora_unload
+        self.lora_path = lora_path
+        self.last_ai_response = None  # Store raw response from client
         # if cache:
         #     print("Caching is enabled will use llm_utils.MOpenAI")
@@ -135,11 +234,152 @@ class LLMTask:
         # else:
         #     self.client = OpenAI(base_url=base_url, api_key=api_key)
         self.client = get_base_client(client, cache=cache)
+        # check connection of client
+        try:
+            self.client.models.list()
+        except Exception as e:
+            logger.error(f"Failed to connect to OpenAI client: {str(e)}, base_url={self.client.base_url}")
+            raise e
         if not self.model_kwargs.get("model", ""):
             self.model_kwargs["model"] = self.client.models.list().data[0].id
+        # Handle LoRA loading if lora_path is provided
+        if self.lora_path:
+            self._load_lora_adapter()
         print(self.model_kwargs)
+    def _load_lora_adapter(self) -> None:
+        """
+        Load LoRA adapter from the specified lora_path.
+        This method:
+        1. Validates that lora_path is a valid LoRA directory
+        2. Checks if LoRA is already loaded (unless force_lora_unload is True)
+        3. Loads the LoRA adapter and updates the model name
+        """
+        if not self.lora_path:
+            return
+        if not _is_lora_path(self.lora_path):
+            raise ValueError(
+                f"Invalid LoRA path '{self.lora_path}': "
+                "Directory must contain 'adapter_config.json'"
+            )
+        logger.info(f"Loading LoRA adapter from: {self.lora_path}")
+        # Get the expected LoRA name (basename of the path)
+        lora_name = os.path.basename(self.lora_path.rstrip('/\\'))
+        if not lora_name:  # Handle edge case of empty basename
+            lora_name = os.path.basename(os.path.dirname(self.lora_path))
+        # Get list of available models to check if LoRA is already loaded
+        try:
+            available_models = [m.id for m in self.client.models.list().data]
+        except Exception as e:
+            logger.warning(f"Failed to list models, proceeding with LoRA load: {str(e)[:100]}")
+            available_models = []
+        # Check if LoRA is already loaded
+        if lora_name in available_models and not self.force_lora_unload:
+            logger.info(f"LoRA adapter '{lora_name}' is already loaded, using existing model")
+            self.model_kwargs["model"] = lora_name
+            return
+        # Force unload if requested
+        if self.force_lora_unload and lora_name in available_models:
+            logger.info(f"Force unloading LoRA adapter '{lora_name}' before reloading")
+            port = _get_port_from_client(self.client)
+            if port is not None:
+                try:
+                    LLMTask.unload_lora(port, lora_name)
+                    logger.info(f"Successfully unloaded LoRA adapter: {lora_name}")
+                except Exception as e:
+                    logger.warning(f"Failed to unload LoRA adapter: {str(e)[:100]}")
+        # Get port from client for API calls
+        port = _get_port_from_client(self.client)
+        if port is None:
+            raise ValueError(
+                f"Cannot load LoRA adapter '{self.lora_path}': "
+                "Unable to determine port from client base_url. "
+                "LoRA loading requires a client initialized with port number."
+            )
+        try:
+            # Load the LoRA adapter
+            loaded_lora_name = _load_lora_adapter(self.lora_path, port)
+            logger.info(f"Successfully loaded LoRA adapter: {loaded_lora_name}")
+            # Update model name to the loaded LoRA name
+            self.model_kwargs["model"] = loaded_lora_name
+        except requests.RequestException as e:
+            # Check if the error is due to LoRA already being loaded
+            error_msg = str(e)
+            if "400" in error_msg or "Bad Request" in error_msg:
+                logger.info(f"LoRA adapter may already be loaded, attempting to use '{lora_name}'")
+                # Refresh the model list to check if it's now available
+                try:
+                    updated_models = [m.id for m in self.client.models.list().data]
+                    if lora_name in updated_models:
+                        logger.info(f"Found LoRA adapter '{lora_name}' in updated model list")
+                        self.model_kwargs["model"] = lora_name
+                        return
+                except Exception:
+                    pass  # Fall through to original error
+            raise ValueError(
+                f"Failed to load LoRA adapter from '{self.lora_path}': {error_msg[:100]}"
+            )
+    def unload_lora_adapter(self, lora_path: str) -> None:
+        """
+        Unload a LoRA adapter.
+        Args:
+            lora_path: Path to the LoRA adapter directory to unload
+        Raises:
+            ValueError: If unable to determine port from client
+        """
+        port = _get_port_from_client(self.client)
+        if port is None:
+            raise ValueError(
+                "Cannot unload LoRA adapter: "
+                "Unable to determine port from client base_url. "
+                "LoRA operations require a client initialized with port number."
+            )
+        _unload_lora_adapter(lora_path, port)
+        lora_name = os.path.basename(lora_path.rstrip('/\\'))
+        logger.info(f"Unloaded LoRA adapter: {lora_name}")
+    @staticmethod
+    def unload_lora(port: int, lora_name: str) -> None:
+        """Static method to unload a LoRA adapter by name.
+        Args:
+            port: Port number for the API endpoint
+            lora_name: Name of the LoRA adapter to unload
+        Raises:
+            requests.RequestException: If the API call fails
+        """
+        try:
+            response = requests.post(
+                f'http://localhost:{port}/v1/unload_lora_adapter',
+                headers={'accept': 'application/json', 'Content-Type': 'application/json'},
+                json={"lora_name": lora_name, "lora_int_id": 0}
+            )
+            response.raise_for_status()
+            logger.info(f"Successfully unloaded LoRA adapter: {lora_name}")
+        except requests.RequestException as e:
+            logger.error(f"Error unloading LoRA adapter '{lora_name}': {str(e)[:100]}")
+            raise
     def _prepare_input(self, input_data: Union[str, BaseModel, List[Dict]]) -> Messages:
         """Convert input to messages format."""
         if isinstance(input_data, list):
@@ -200,9 +440,24 @@ class LLMTask:
         # Extract model name from kwargs for API call
         api_kwargs = {k: v for k, v in effective_kwargs.items() if k != "model"}
-        completion = self.client.chat.completions.create(
-            model=model_name, messages=messages, **api_kwargs
-        )
+        try:
+            completion = self.client.chat.completions.create(
+                model=model_name, messages=messages, **api_kwargs
+            )
+            # Store raw response from client
+            self.last_ai_response = completion
+        except (AuthenticationError, RateLimitError, BadRequestError) as exc:
+            error_msg = f"OpenAI API error ({type(exc).__name__}): {exc}"
+            logger.error(error_msg)
+            raise
+        except Exception as e:
+            is_length_error = "Length" in str(e) or "maximum context length" in str(e)
+            if is_length_error:
+                raise ValueError(
+                    f"Input too long for model {model_name}. Error: {str(e)[:100]}..."
+                )
+            # Re-raise all other exceptions
+            raise
         # print(completion)
         results: List[Dict[str, Any]] = []
@@ -211,9 +466,13 @@ class LLMTask:
                 Messages,
                 messages + [{"role": "assistant", "content": choice.message.content}],
             )
-            results.append(
-                {"parsed": choice.message.content, "messages": choice_messages}
-            )
+            result_dict = {"parsed": choice.message.content, "messages": choice_messages}
+            # Add reasoning content if this is a reasoning model
+            if self.is_reasoning_model and hasattr(choice.message, 'reasoning_content'):
+                result_dict["reasoning_content"] = choice.message.reasoning_content
+            results.append(result_dict)
         return results
     def pydantic_parse(
@@ -239,6 +498,11 @@ class LLMTask:
             List of dicts [{'parsed': parsed_model, 'messages': messages}, ...]
             When n=1: List contains one dict
             When n>1: List contains multiple dicts
+        Note:
+            This method ensures consistent Pydantic model output for both fresh and cached responses.
+            When responses are cached and loaded back, the parsed content is re-validated to maintain
+            type consistency between first-time and subsequent calls.
         """
         # Prepare messages
         messages = self._prepare_input(input_data)
@@ -265,12 +529,20 @@ class LLMTask:
                 response_format=pydantic_model_to_use,
                 **api_kwargs,
             )
+            # Store raw response from client
+            self.last_ai_response = completion
+        except (AuthenticationError, RateLimitError, BadRequestError) as exc:
+            error_msg = f"OpenAI API error ({type(exc).__name__}): {exc}"
+            logger.error(error_msg)
+            raise
         except Exception as e:
             is_length_error = "Length" in str(e) or "maximum context length" in str(e)
             if is_length_error:
                 raise ValueError(
                     f"Input too long for model {model_name}. Error: {str(e)[:100]}..."
                 )
+            # Re-raise all other exceptions
+            raise
         results: List[Dict[str, Any]] = []
         for choice in completion.choices:  # type: ignore[attr-defined]
@@ -278,9 +550,23 @@ class LLMTask:
                 Messages,
                 messages + [{"role": "assistant", "content": choice.message.content}],
             )
-            results.append(
-                {"parsed": choice.message.parsed, "messages": choice_messages}
-            )  # type: ignore[attr-defined]
+            # Ensure consistent Pydantic model output for both fresh and cached responses
+            parsed_content = choice.message.parsed  # type: ignore[attr-defined]
+            if isinstance(parsed_content, dict):
+                # Cached response: validate dict back to Pydantic model
+                parsed_content = pydantic_model_to_use.model_validate(parsed_content)
+            elif not isinstance(parsed_content, pydantic_model_to_use):
+                # Fallback: ensure it's the correct type
+                parsed_content = pydantic_model_to_use.model_validate(parsed_content)
+            result_dict = {"parsed": parsed_content, "messages": choice_messages}
+            # Add reasoning content if this is a reasoning model
+            if self.is_reasoning_model and hasattr(choice.message, 'reasoning_content'):
+                result_dict["reasoning_content"] = choice.message.reasoning_content
+            results.append(result_dict)
         return results
     def __call__(
@@ -364,6 +650,8 @@ class LLMTask:
         builder: BasePromptBuilder,
         client: Union[OpenAI, int, str, None] = None,
         cache=True,
+        is_reasoning_model: bool = False,
+        lora_path: Optional[str] = None,
         **model_kwargs,
     ) -> "LLMTask":
         """
@@ -382,6 +670,10 @@ class LLMTask:
             input_model=input_model,
             output_model=output_model,
             client=client,
+            cache=cache,
+            is_reasoning_model=is_reasoning_model,
+            lora_path=lora_path,
+            **model_kwargs,
         )
     @staticmethod
@@ -398,3 +690,4 @@ class LLMTask:
         client = get_base_client(client, cache=False)
         models = client.models.list().data
         return [m.id for m in models]

llm_utils/lm/openai_memoize.py CHANGED Viewed

@@ -40,7 +40,7 @@ class MOpenAI(OpenAI):
     def __init__(self, *args, cache=True, **kwargs):
         super().__init__(*args, **kwargs)
         if cache:
-            self.post = memoize(self.post)
+            self.post = memoize(self.post) # type: ignore
 class MAsyncOpenAI(AsyncOpenAI):
@@ -69,4 +69,4 @@ class MAsyncOpenAI(AsyncOpenAI):
     def __init__(self, *args, cache=True, **kwargs):
         super().__init__(*args, **kwargs)
         if cache:
-            self.post = memoize(self.post)
+            self.post = memoize(self.post) # type: ignore

llm_utils/vector_cache/core.py CHANGED Viewed

@@ -535,9 +535,9 @@ class VectorCache:
         if self.verbose:
             print(f"Computing embeddings for {total_items} missing texts in batches of {batch_size}...")
             if self.backend in ["vllm", "transformers"] and self._model is None:
-                print(f"⚠️  Model will be loaded on first batch (lazy loading enabled)")
+                print("⚠️  Model will be loaded on first batch (lazy loading enabled)")
             elif self.backend in ["vllm", "transformers"]:
-                print(f"✓ Model already loaded, ready for efficient batch processing")
+                print("✓ Model already loaded, ready for efficient batch processing")
         # Create progress bar
         pbar = None
@@ -571,7 +571,7 @@ class VectorCache:
                 # Update progress
                 batch_size_actual = len(batch_items)
                 if use_tqdm:
-                    pbar.update(batch_size_actual)
+                    pbar.update(batch_size_actual) # type: ignore
                 else:
                     processed_count += batch_size_actual
                     if self.verbose:

speedy_utils/__init__.py CHANGED Viewed

@@ -138,7 +138,7 @@ from .common.utils_print import (
 # Multi-worker processing
 from .multi_worker.process import multi_process
-from .multi_worker.thread import multi_thread
+from .multi_worker.thread import kill_all_thread, multi_thread
 # Define __all__ explicitly
 __all__ = [
@@ -224,6 +224,7 @@ __all__ = [
     # Multi-worker processing
     "multi_process",
     "multi_thread",
+    "kill_all_thread",
     # Notebook utilities
     "change_dir",
 ]

speedy_utils/common/utils_cache.py CHANGED Viewed

@@ -586,7 +586,7 @@ def memoize(
     verbose: bool = ...,
 ) -> Callable[[Callable[P, R]], Callable[P, R]]: ...
 @overload
-def memoize(
+def memoize( # type: ignore
     _func: None = ...,
     *,
     keys: Optional[list[str]] = ...,

speedy_utils/common/utils_io.py CHANGED Viewed

@@ -1,13 +1,18 @@
 # utils/utils_io.py
+import bz2
+import gzip
+import io
 import json
+import lzma
 import os
 import os.path as osp
 import pickle
 import time
+import warnings
 from glob import glob
 from pathlib import Path
-from typing import Any, Union
+from typing import IO, Any, Iterable, Optional, Union, cast
 from json_repair import loads as jloads
 from pydantic import BaseModel
@@ -53,7 +58,7 @@ def dump_json_or_pickle(
         except Exception as e:
             if isinstance(obj, BaseModel):
                 data = obj.model_dump()
-                from fastcore.all import obj2dict, dict2obj
+                from fastcore.all import dict2obj, obj2dict
                 obj2 = dict2obj(data)
                 with open(fname, "wb") as f:
                     pickle.dump(obj2, f)
@@ -87,8 +92,7 @@ def load_json_or_pickle(fname: str, counter=0) -> Any:
             raise ValueError(f"Error {e} while loading {fname}") from e
-import os, io, json, gzip, bz2, lzma, warnings
-from typing import Iterable, Union, IO, Any, Optional, cast
 try:
     import orjson  # type: ignore[import-not-found]  # fastest JSON parser when available
@@ -212,7 +216,7 @@ def fast_load_jsonl(
         if line_count > multiworker_threshold:
             # Use multi-worker processing
             from ..multi_worker.thread import multi_thread
             # Read all lines into chunks
             f = _open_auto(path_or_file)
             all_lines = list(f)

speedy_utils/multi_worker/process.py CHANGED Viewed

@@ -1,11 +1,20 @@
 # ray_multi_process.py
-import time, os, pickle, uuid, datetime
+import time, os, pickle, uuid, datetime, multiprocessing
 from pathlib import Path
 from typing import Any, Callable
 from tqdm import tqdm
-import ray
+import psutil
+import threading
+ray: Any
+try:
+    import ray as ray  # type: ignore
+    _HAS_RAY = True
+except Exception:  # pragma: no cover
+    ray = None  # type: ignore
+    _HAS_RAY = False
 from fastcore.parallel import parallel
 # ─── cache helpers ──────────────────────────────────────────
 def _build_cache_dir(func: Callable, items: list[Any]) -> Path:
@@ -61,7 +70,7 @@ def multi_process(
     lazy_output: bool = False,
     progress: bool = True,
     # backend: str = "ray",   # "seq", "ray", or "fastcore"
-    backend: Literal["seq", "ray", "mp", "threadpool"] = "ray",
+    backend: Literal["seq", "ray", "mp", "threadpool", "safe"] | None = None,
     # Additional optional knobs (accepted for compatibility)
     batch: int | None = None,
     ordered: bool | None = None,
@@ -75,12 +84,18 @@ def multi_process(
     backend:
         - "seq": run sequentially
         - "ray": run in parallel with Ray
-        - "fastcore": run in parallel with fastcore.parallel
+        - "mp": run in parallel with multiprocessing (uses threadpool to avoid fork warnings)
+        - "threadpool": run in parallel with thread pool
+        - "safe": run in parallel with thread pool (explicitly safe for tests)
     If lazy_output=True, every result is saved to .pkl and
     the returned list contains file paths.
     """
+    # default backend selection
+    if backend is None:
+        backend = "ray" if _HAS_RAY else "mp"
     # unify items
     if items is None and inputs is not None:
         items = list(inputs)
@@ -108,6 +123,13 @@ def multi_process(
         # ---- ray backend ----
         if backend == "ray":
+            if not _HAS_RAY:
+                msg = (
+                    "Ray backend requested but 'ray' is not installed. "
+                    "Install extra: pip install 'speedy-utils[ray]' or "
+                    "poetry install -E ray."
+                )
+                raise RuntimeError(msg)
             pbar.set_postfix_str("backend=ray")
             ensure_ray(workers, pbar)
@@ -125,10 +147,45 @@ def multi_process(
         # ---- fastcore backend ----
         if backend == "mp":
-            results = parallel(f_wrapped, items, n_workers=workers, progress=progress, threadpool=False)
+            # Use threadpool instead of multiprocessing to avoid fork warnings
+            # in multi-threaded environments like pytest
+            results = parallel(f_wrapped, items, n_workers=workers, progress=progress, threadpool=True)
             return list(results)
         if backend == "threadpool":
             results = parallel(f_wrapped, items, n_workers=workers, progress=progress, threadpool=True)
             return list(results)
+        if backend == "safe":
+            # Completely safe backend for tests - no multiprocessing, no external progress bars
+            import concurrent.futures
+            with concurrent.futures.ThreadPoolExecutor(max_workers=workers) as executor:
+                results = list(executor.map(f_wrapped, items))
         raise ValueError(f"Unsupported backend: {backend!r}")
+def cleanup_phantom_workers():
+    """
+    Kill all child processes (phantom workers) without killing the Jupyter kernel itself.
+    Also lists non-daemon threads that remain.
+    """
+    parent = psutil.Process(os.getpid())
+    # Kill only children, never the current process
+    for child in parent.children(recursive=True):
+        try:
+            print(f"🔪 Killing child process {child.pid} ({child.name()})")
+            child.kill()
+        except psutil.NoSuchProcess:
+            pass
+    # Report stray threads (can't hard-kill them in Python)
+    for t in threading.enumerate():
+        if t is threading.current_thread():
+            continue
+        if not t.daemon:
+            print(f"⚠️ Thread {t.name} is still running (cannot be force-killed).")
+    print("✅ Cleaned up child processes (kernel untouched).")
+# Usage: run this anytime after cancelling a cell

speedy_utils/multi_worker/thread.py CHANGED Viewed

@@ -77,7 +77,9 @@
 # ============================================================================= #
 """
+import ctypes
 import os
+import threading
 import time
 import traceback
 from collections.abc import Callable, Iterable
@@ -98,6 +100,42 @@ DEFAULT_WORKERS = (os.cpu_count() or 4) * 2
 T = TypeVar("T")
 R = TypeVar("R")
+SPEEDY_RUNNING_THREADS: list[threading.Thread] = []
+_SPEEDY_THREADS_LOCK = threading.Lock()
+_PY_SET_ASYNC_EXC = ctypes.pythonapi.PyThreadState_SetAsyncExc
+try:
+    _PY_SET_ASYNC_EXC.argtypes = (ctypes.c_ulong, ctypes.py_object)  # type: ignore[attr-defined]
+    _PY_SET_ASYNC_EXC.restype = ctypes.c_int  # type: ignore[attr-defined]
+except AttributeError:  # pragma: no cover - platform specific
+    pass
+def _prune_dead_threads() -> None:
+    with _SPEEDY_THREADS_LOCK:
+        SPEEDY_RUNNING_THREADS[:] = [t for t in SPEEDY_RUNNING_THREADS if t.is_alive()]
+def _track_threads(threads: Iterable[threading.Thread]) -> None:
+    if not threads:
+        return
+    with _SPEEDY_THREADS_LOCK:
+        living = [t for t in SPEEDY_RUNNING_THREADS if t.is_alive()]
+        for candidate in threads:
+            if not candidate.is_alive():
+                continue
+            if any(existing is candidate for existing in living):
+                continue
+            living.append(candidate)
+        SPEEDY_RUNNING_THREADS[:] = living
+def _track_executor_threads(pool: ThreadPoolExecutor) -> None:
+    thread_set = getattr(pool, "_threads", None)
+    if not thread_set:
+        return
+    _track_threads(tuple(thread_set))
 def _group_iter(src: Iterable[T], size: int) -> Iterable[list[T]]:
     """Yield successive chunks from iterable of specified size."""
@@ -273,11 +311,13 @@ def multi_thread(
                 fut.idx = next_logical_idx  # type: ignore[attr-defined]
                 inflight.add(fut)
                 next_logical_idx += len(arg)
+                _track_executor_threads(pool)
             else:
                 fut = pool.submit(_worker, arg, func, fixed_kwargs)
                 fut.idx = next_logical_idx  # type: ignore[attr-defined]
                 inflight.add(fut)
                 next_logical_idx += 1
+                _track_executor_threads(pool)
         try:
             # Process futures as they complete and add new ones to keep the pool busy
@@ -347,11 +387,13 @@ def multi_thread(
                                 fut2.idx = next_logical_idx  # type: ignore[attr-defined]
                                 inflight.add(fut2)
                                 next_logical_idx += len(arg)
+                                _track_executor_threads(pool)
                             else:
                                 fut2 = pool.submit(_worker, arg, func, fixed_kwargs)
                                 fut2.idx = next_logical_idx  # type: ignore[attr-defined]
                                 inflight.add(fut2)
                                 next_logical_idx += 1
+                                _track_executor_threads(pool)
                     except StopIteration:
                         pass
@@ -370,6 +412,7 @@ def multi_thread(
                 bar.close()
     if store_output_pkl_file:
         dump_json_or_pickle(results, store_output_pkl_file)
+    _prune_dead_threads()
     return results
@@ -396,9 +439,58 @@ def multi_thread_standard(
         Results in same order as input items.
     """
     with ThreadPoolExecutor(max_workers=workers) as executor:
-        futures = [executor.submit(fn, item) for item in items]
+        futures = []
+        for item in items:
+            futures.append(executor.submit(fn, item))
+            _track_executor_threads(executor)
         results = [fut.result() for fut in futures]
+    _prune_dead_threads()
     return results
-__all__ = ["multi_thread", "multi_thread_standard"]
+def _async_raise(thread_id: int, exc_type: type[BaseException]) -> bool:
+    if thread_id <= 0:
+        return False
+    if not issubclass(exc_type, BaseException):
+        raise TypeError("exc_type must derive from BaseException")
+    res = _PY_SET_ASYNC_EXC(ctypes.c_ulong(thread_id), ctypes.py_object(exc_type))
+    if res == 0:
+        return False
+    if res > 1:  # pragma: no cover - defensive branch
+        _PY_SET_ASYNC_EXC(ctypes.c_ulong(thread_id), None)
+        raise SystemError("PyThreadState_SetAsyncExc failed")
+    return True
+def kill_all_thread(exc_type: type[BaseException] = SystemExit, join_timeout: float = 0.1) -> int:
+    """Forcefully stop tracked worker threads. Returns number of threads signalled."""
+    _prune_dead_threads()
+    current = threading.current_thread()
+    with _SPEEDY_THREADS_LOCK:
+        targets = [t for t in SPEEDY_RUNNING_THREADS if t.is_alive()]
+    terminated = 0
+    for thread in targets:
+        if thread is current:
+            continue
+        ident = thread.ident
+        if ident is None:
+            continue
+        try:
+            if _async_raise(ident, exc_type):
+                terminated += 1
+                thread.join(timeout=join_timeout)
+            else:
+                logger.warning("Unable to signal thread %s", thread.name)
+        except Exception as exc:  # pragma: no cover - defensive
+            logger.error("Failed to stop thread %s: %s", thread.name, exc)
+    _prune_dead_threads()
+    return terminated
+__all__ = [
+    "SPEEDY_RUNNING_THREADS",
+    "multi_thread",
+    "multi_thread_standard",
+    "kill_all_thread",
+]

{speedy_utils-1.1.18.dist-info → speedy_utils-1.1.19.dist-info}/METADATA RENAMED Viewed

@@ -1,10 +1,14 @@
 Metadata-Version: 2.4
 Name: speedy-utils
-Version: 1.1.18
+Version: 1.1.19
 Summary: Fast and easy-to-use package for data science
-Author: AnhVTH
-Author-email: anhvth.226@gmail.com
-Requires-Python: >=3.8
+Project-URL: Homepage, https://github.com/anhvth/speedy
+Project-URL: Repository, https://github.com/anhvth/speedy
+Author-email: AnhVTH <anhvth.226@gmail.com>
+License: MIT
+Classifier: Development Status :: 4 - Beta
+Classifier: Intended Audience :: Developers
+Classifier: License :: OSI Approved :: MIT License
 Classifier: Programming Language :: Python :: 3
 Classifier: Programming Language :: Python :: 3.8
 Classifier: Programming Language :: Python :: 3.9
@@ -13,29 +17,34 @@ Classifier: Programming Language :: Python :: 3.11
 Classifier: Programming Language :: Python :: 3.12
 Classifier: Programming Language :: Python :: 3.13
 Classifier: Programming Language :: Python :: 3.14
+Requires-Python: >=3.8
+Requires-Dist: aiohttp>=3.10.11
 Requires-Dist: bump2version
 Requires-Dist: cachetools
 Requires-Dist: debugpy
 Requires-Dist: fastcore
 Requires-Dist: fastprogress
-Requires-Dist: freezegun (>=1.5.1,<2.0.0)
+Requires-Dist: freezegun>=1.5.1
 Requires-Dist: ipdb
 Requires-Dist: ipywidgets
-Requires-Dist: json-repair (>=0.25.0,<0.31.0)
+Requires-Dist: json-repair<0.31.0,>=0.25.0
 Requires-Dist: jupyterlab
 Requires-Dist: loguru
 Requires-Dist: matplotlib
 Requires-Dist: numpy
-Requires-Dist: openai (>=1.106.0,<2.0.0)
-Requires-Dist: packaging (>=23.2,<25)
+Requires-Dist: openai>=1.106.0
+Requires-Dist: packaging<25,>=23.2
 Requires-Dist: pandas
 Requires-Dist: pydantic
+Requires-Dist: pytest>=8.3.5
+Requires-Dist: ray>=2.36.1
 Requires-Dist: requests
 Requires-Dist: scikit-learn
 Requires-Dist: tabulate
 Requires-Dist: tqdm
 Requires-Dist: xxhash
-Project-URL: Homepage, https://github.com/anhvth/speedy
+Provides-Extra: ray
+Requires-Dist: ray>=2.49.1; (python_version >= '3.9') and extra == 'ray'
 Description-Content-Type: text/markdown
 # Speedy Utils
@@ -84,6 +93,19 @@ cd speedy-utils
 pip install .
 ```
+### Extras
+Optional dependencies can be installed via extras. For the `ray` backend
+support (requires Python >= 3.9):
+```bash
+# pip
+pip install 'speedy-utils[ray]'
+# Poetry (for developing this repo)
+poetry install -E ray
+```
 ## Updating from previous versions
 To update from previous versions or switch to v1.x, first uninstall any old
@@ -282,9 +304,8 @@ python speedy_utils/common/dataclass_parser.py
 Example output:
-| Field              | Value                                 |
-|--------------------|---------------------------------------|
-| from_peft          | ./outputs/llm_hn_qw32b/hn_results_r3/ |
+| Field     | Value                                 |
+| --------- | ------------------------------------- |
+| from_peft | ./outputs/llm_hn_qw32b/hn_results_r3/ |
 Please ensure your code adheres to the project's coding standards and includes appropriate tests.

{speedy_utils-1.1.18.dist-info → speedy_utils-1.1.19.dist-info}/RECORD RENAMED Viewed

@@ -1,31 +1,31 @@
-llm_utils/__init__.py,sha256=KjgorCpl2YbAGaqaOvFDDlE7V2GUzxFx_Xyz5ROnWZc,916
+llm_utils/__init__.py,sha256=n9m0iB82oygFThbDEdI5hpmozmTNhQgwX148QZulfCE,940
+llm_utils/group_messages.py,sha256=Oe2tlhg-zRodG1-hodYebddrR77j9UdE05LzJw0EvYI,3622
 llm_utils/chat_format/__init__.py,sha256=8dBIUqFJvkgQYedxBtcyxt-4tt8JxAKVap2JlTXmgaM,737
 llm_utils/chat_format/display.py,sha256=3jKDm4OTrvytK1qBhSOjRLltUIObHsYFdBLgm8SVDE8,14159
 llm_utils/chat_format/transform.py,sha256=eU0c3PdAHCNLuGP1UqPwln0B34Lv3bt_uV9v9BrlCN4,5402
 llm_utils/chat_format/utils.py,sha256=xTxN4HrLHcRO2PfCTR43nH1M5zCa7v0kTTdzAcGkZg0,1229
-llm_utils/group_messages.py,sha256=Oe2tlhg-zRodG1-hodYebddrR77j9UdE05LzJw0EvYI,3622
 llm_utils/lm/__init__.py,sha256=totIZnq1P8eNlfVco0OfdGdTNt1-wSXDSRReRRzYYxw,319
+llm_utils/lm/base_prompt_builder.py,sha256=OLqyxbA8QeYIVFzB9EqxUiE_P2p4_MD_Lq4WSwxFtKU,12136
+llm_utils/lm/llm_task.py,sha256=kyBeMDJwW9ZWq5A_OMgE-ou9GQ0bk5c9lxXOvfo31R4,27915
+llm_utils/lm/lm.py,sha256=8TaLuU7naPQbOFmiS2NQyWVLG0jUUzRRBQsR0In7GVo,7249
+llm_utils/lm/lm_base.py,sha256=pqbHZOdR7yUMpvwt8uBG1dZnt76SY_Wk8BkXQQ-mpWs,9557
+llm_utils/lm/openai_memoize.py,sha256=q1cj5tZOSEpvx4QhRNs37pVaFMpMViCdVtwRsoaXgeU,3054
+llm_utils/lm/utils.py,sha256=a0KJj8vjT2fHKb7GKGNJjJHhKLThwpxIL7vnV9Fr3ZY,4584
 llm_utils/lm/async_lm/__init__.py,sha256=PUBbCuf5u6-0GBUu-2PI6YAguzsyXj-LPkU6vccqT6E,121
 llm_utils/lm/async_lm/_utils.py,sha256=P1-pUDf_0pDmo8WTIi43t5ARlyGA1RIJfpAhz-gfA5g,6105
-llm_utils/lm/async_lm/async_llm_task.py,sha256=A5WLIN3v-zpl-sJGiykyo8wOCYEpA8ja70MJcn5t7O4,18668
+llm_utils/lm/async_lm/async_llm_task.py,sha256=-BVOk18ZD8eC2obTLgiPq39f2PP3cji17Ku-Gb7c7Xo,18683
 llm_utils/lm/async_lm/async_lm.py,sha256=e3o9cyMbkVz_jQDTjJv2ybET_5mY012zdZGjNwi4Qk4,13719
 llm_utils/lm/async_lm/async_lm_base.py,sha256=iJgtzI6pVJzWtlXGqVLwgCIb-FzZAa3E5xW8yhyHUmM,8426
 llm_utils/lm/async_lm/lm_specific.py,sha256=KmqdCm3SJ5MqN-dRJd6S5tq5-ve1X2eNWf2CMFtc_3s,3926
-llm_utils/lm/base_prompt_builder.py,sha256=OLqyxbA8QeYIVFzB9EqxUiE_P2p4_MD_Lq4WSwxFtKU,12136
-llm_utils/lm/llm_task.py,sha256=K5c27iYM9etAbdDM1WiO3-GjTvl1dkzt2sIaW3N1YA0,15483
-llm_utils/lm/lm.py,sha256=8TaLuU7naPQbOFmiS2NQyWVLG0jUUzRRBQsR0In7GVo,7249
-llm_utils/lm/lm_base.py,sha256=pqbHZOdR7yUMpvwt8uBG1dZnt76SY_Wk8BkXQQ-mpWs,9557
-llm_utils/lm/openai_memoize.py,sha256=DdMl31cV9AqLlkARajZrqAKCyhvH8JQk2SAHMSzO3mk,3024
-llm_utils/lm/utils.py,sha256=a0KJj8vjT2fHKb7GKGNJjJHhKLThwpxIL7vnV9Fr3ZY,4584
 llm_utils/scripts/README.md,sha256=yuOLnLa2od2jp4wVy3rV0rESeiV3o8zol5MNMsZx0DY,999
 llm_utils/scripts/vllm_load_balancer.py,sha256=TT5Ypq7gUcl52gRFp--ORFFjzhfGlcaX2rkRv8NxlxU,37259
 llm_utils/scripts/vllm_serve.py,sha256=gJ0-y4kybMfSt8qzye1pJqGMY3x9JLRi6Tu7RjJMnss,14771
 llm_utils/vector_cache/__init__.py,sha256=i1KQuC4OhPewYpFl9X6HlWFBuASCTx2qgGizhpZhmn0,862
 llm_utils/vector_cache/cli.py,sha256=DMXTj8nZ2_LRjprbYPb4uzq04qZtOfBbmblmaqDcCuM,6251
-llm_utils/vector_cache/core.py,sha256=P0VopzMmfnGaYTTEiccXprsyjruje3QT0_AFXF1lZC0,33582
+llm_utils/vector_cache/core.py,sha256=222LcmVJR0bFo0jRAJEG6e5ceWFfySmVbCxywScE6E4,33595
 llm_utils/vector_cache/types.py,sha256=ru8qmUZ8_lNd3_oYpjCMtpXTsqmwsSBe56Z4hTWm3xI,435
 llm_utils/vector_cache/utils.py,sha256=dwbbXlRrARrpmS4YqSlYQqrTURg0UWe8XvaAWcX05MM,1458
-speedy_utils/__init__.py,sha256=nJpUb5Oa3STDbqPSiWXoI-IvKntyRYzYxkYW4GM2i_Q,5740
+speedy_utils/__init__.py,sha256=QBvGIbrC5yczQwh4T8iu9KQx6w9u-v_JdoQfA67hLUg,5780
 speedy_utils/all.py,sha256=t-HKzDmhF1MTFnmq7xRnPs5nFG_aZaLH9Ua0RM6nQ9Y,4855
 speedy_utils/common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 speedy_utils/common/clock.py,sha256=3n4FkCW0dz46O8By09V5Pve1DSMgpLDRbWEVRryryeQ,7423
@@ -34,17 +34,17 @@ speedy_utils/common/logger.py,sha256=a2iZx0eWyfi2-2X_H2QmfuA3tfR7_XSM7Nd0GdUnUOs
 speedy_utils/common/notebook_utils.py,sha256=-97kehJ_Gg3TzDLubsLIYJcykqX1NXhbvBO6nniZSYM,2063
 speedy_utils/common/patcher.py,sha256=VCmdxyTF87qroggQkQklRPhAOPJbeBqhcJoTsLcDxNw,2303
 speedy_utils/common/report_manager.py,sha256=eBiw5KY6bWUhwki3B4lK5o8bFsp7L5x28X9GCI-Sd1w,3899
-speedy_utils/common/utils_cache.py,sha256=BCYbtu8lWkLjrPRQnOWHr41IBOLrpOvXIOI4Sg389nc,22430
-speedy_utils/common/utils_io.py,sha256=gv5YN4tYvxsUcCXwaY_hv2g6a9HNltxBC0kNE0iZLak,14284
+speedy_utils/common/utils_cache.py,sha256=8KPCWPUCm91HCH9kvV_gcshlxJl6m4tZ8yAKHhJCfUc,22445
+speedy_utils/common/utils_io.py,sha256=-RkQjYGa3zVqpgVInsdp8dbS5oLwdJdUsRz1XIUSJzg,14257
 speedy_utils/common/utils_misc.py,sha256=cdEuBBpiB1xpuzj0UBDHDuTIerqsMIw37ENq6EXliOw,1795
 speedy_utils/common/utils_print.py,sha256=syRrnSFtguxrV-elx6DDVcSGu4Qy7D_xVNZhPwbUY4A,4864
 speedy_utils/multi_worker/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-speedy_utils/multi_worker/process.py,sha256=LmNfV8tfdsf6PFTNzu12C_QWNfEUhgi1MeAJGeMTs1k,4738
-speedy_utils/multi_worker/thread.py,sha256=f02VjJV8nudg0eA_AcfPEX7tHY4-czesuzthKZs_Hdc,16351
+speedy_utils/multi_worker/process.py,sha256=ouN65PbOhg0rOGUK7ATB7zXkRA993w9iiPDZ7nZ9g0w,6881
+speedy_utils/multi_worker/thread.py,sha256=xhCPgJokCDjjPrWh6vUtCBlZgs3E6mM81WCAEKvZea0,19522
 speedy_utils/scripts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 speedy_utils/scripts/mpython.py,sha256=IvywP7Y0_V6tWfMP-4MjPvN5_KfxWF21xaLJsCIayCk,3821
 speedy_utils/scripts/openapi_client_codegen.py,sha256=f2125S_q0PILgH5dyzoKRz7pIvNEjCkzpi4Q4pPFRZE,9683
-speedy_utils-1.1.18.dist-info/METADATA,sha256=dqAnyKYkHVF3HHvzhopXo6huQE16OhFMuGcQUwH6xE0,7534
-speedy_utils-1.1.18.dist-info/WHEEL,sha256=M5asmiAlL6HEcOq52Yi5mmk9KmTVjY2RDPtO4p9DMrc,88
-speedy_utils-1.1.18.dist-info/entry_points.txt,sha256=T1t85jwx8fK6m5msdkBGIXH5R5Kd0zSL0S6erXERPzg,237
-speedy_utils-1.1.18.dist-info/RECORD,,
+speedy_utils-1.1.19.dist-info/METADATA,sha256=AHlhLIK3CLwi6f_-_qJDS1lEfXYvvacZ1RHiV_Gfnb4,8094
+speedy_utils-1.1.19.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+speedy_utils-1.1.19.dist-info/entry_points.txt,sha256=1rrFMfqvaMUE9hvwGiD6vnVh98kmgy0TARBj-v0Lfhs,244
+speedy_utils-1.1.19.dist-info/RECORD,,

{speedy_utils-1.1.18.dist-info → speedy_utils-1.1.19.dist-info}/WHEEL RENAMED Viewed

@@ -1,4 +1,4 @@
 Wheel-Version: 1.0
-Generator: poetry-core 2.2.0
+Generator: hatchling 1.27.0
 Root-Is-Purelib: true
 Tag: py3-none-any

speedy_utils-1.1.19.dist-info/entry_points.txt ADDED Viewed

@@ -0,0 +1,5 @@
+[console_scripts]
+mpython = speedy_utils.scripts.mpython:main
+openapi_client_codegen = speedy_utils.scripts.openapi_client_codegen:main
+svllm = llm_utils.scripts.vllm_serve:main
+svllm-lb = llm_utils.scripts.vllm_load_balancer:run_load_balancer

speedy_utils-1.1.18.dist-info/entry_points.txt DELETED Viewed

@@ -1,6 +0,0 @@
-[console_scripts]
-mpython=speedy_utils.scripts.mpython:main
-openapi_client_codegen=speedy_utils.scripts.openapi_client_codegen:main
-svllm=llm_utils.scripts.vllm_serve:main
-svllm-lb=llm_utils.scripts.vllm_load_balancer:run_load_balancer

speedy-utils 1.1.18__py3-none-any.whl → 1.1.19__py3-none-any.whl

speedy-utils 1.1.18py3-none-any.whl → 1.1.19py3-none-any.whl