PyPI - speedy-utils - Versions diffs - 1.1.27__py3-none-any.whl → 1.1.28__py3-none-any.whl - Mend

speedy-utils 1.1.27py3-none-any.whl → 1.1.28py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (54) hide show

llm_utils/__init__.py +16 -4
llm_utils/chat_format/__init__.py +10 -10
llm_utils/chat_format/display.py +33 -21
llm_utils/chat_format/transform.py +17 -19
llm_utils/chat_format/utils.py +6 -4
llm_utils/group_messages.py +17 -14
llm_utils/lm/__init__.py +6 -5
llm_utils/lm/async_lm/__init__.py +1 -0
llm_utils/lm/async_lm/_utils.py +10 -9
llm_utils/lm/async_lm/async_llm_task.py +141 -137
llm_utils/lm/async_lm/async_lm.py +48 -42
llm_utils/lm/async_lm/async_lm_base.py +59 -60
llm_utils/lm/async_lm/lm_specific.py +4 -3
llm_utils/lm/base_prompt_builder.py +93 -70
llm_utils/lm/llm.py +126 -108
llm_utils/lm/llm_signature.py +4 -2
llm_utils/lm/lm_base.py +72 -73
llm_utils/lm/mixins.py +102 -62
llm_utils/lm/openai_memoize.py +124 -87
llm_utils/lm/signature.py +105 -92
llm_utils/lm/utils.py +42 -23
llm_utils/scripts/vllm_load_balancer.py +23 -30
llm_utils/scripts/vllm_serve.py +8 -7
llm_utils/vector_cache/__init__.py +9 -3
llm_utils/vector_cache/cli.py +1 -1
llm_utils/vector_cache/core.py +59 -63
llm_utils/vector_cache/types.py +7 -5
llm_utils/vector_cache/utils.py +12 -8
speedy_utils/__imports.py +244 -0
speedy_utils/__init__.py +90 -194
speedy_utils/all.py +125 -227
speedy_utils/common/clock.py +37 -42
speedy_utils/common/function_decorator.py +6 -12
speedy_utils/common/logger.py +43 -52
speedy_utils/common/notebook_utils.py +13 -21
speedy_utils/common/patcher.py +21 -17
speedy_utils/common/report_manager.py +42 -44
speedy_utils/common/utils_cache.py +152 -169
speedy_utils/common/utils_io.py +137 -103
speedy_utils/common/utils_misc.py +15 -21
speedy_utils/common/utils_print.py +22 -28
speedy_utils/multi_worker/process.py +66 -79
speedy_utils/multi_worker/thread.py +78 -155
speedy_utils/scripts/mpython.py +38 -36
speedy_utils/scripts/openapi_client_codegen.py +10 -10
{speedy_utils-1.1.27.dist-info → speedy_utils-1.1.28.dist-info}/METADATA +1 -1
speedy_utils-1.1.28.dist-info/RECORD +57 -0
vision_utils/README.md +202 -0
vision_utils/__init__.py +5 -0
vision_utils/io_utils.py +470 -0
vision_utils/plot.py +345 -0
speedy_utils-1.1.27.dist-info/RECORD +0 -52
{speedy_utils-1.1.27.dist-info → speedy_utils-1.1.28.dist-info}/WHEEL +0 -0
{speedy_utils-1.1.27.dist-info → speedy_utils-1.1.28.dist-info}/entry_points.txt +0 -0

llm_utils/lm/utils.py CHANGED Viewed

@@ -1,17 +1,17 @@
 import os
-import signal
-import time
-from typing import Any, List, Optional, cast
-from loguru import logger
 # Additional imports for VLLM utilities
 import re
+import signal
 import subprocess
+import time
+from typing import Any, List, Optional, cast
 import requests
+from loguru import logger
 from openai import OpenAI
 try:
     import psutil
@@ -19,10 +19,12 @@ try:
 except ImportError:
     HAS_PSUTIL = False
     psutil = cast(Any, None)
-    logger.warning("psutil not available. Some VLLM process management features may be limited.")
+    logger.warning(
+        "psutil not available. Some VLLM process management features may be limited."
+    )
 # Global tracking of VLLM processes
-_VLLM_PROCESSES: List[subprocess.Popen] = []
+_VLLM_PROCESSES: list[subprocess.Popen] = []
 def _extract_port_from_vllm_cmd(vllm_cmd: str) -> int:
@@ -97,7 +99,12 @@ def _start_vllm_server(vllm_cmd: str, timeout: int = 120) -> subprocess.Popen:
     with open(f"/tmp/vllm_{port}.txt", "a") as log_file:
         process = subprocess.Popen(
-            cleaned_cmd.split(), stdout=log_file, stderr=subprocess.STDOUT, text=True, preexec_fn=os.setsid, env=env
+            cleaned_cmd.split(),
+            stdout=log_file,
+            stderr=subprocess.STDOUT,
+            text=True,
+            preexec_fn=os.setsid,
+            env=env,
         )
     _VLLM_PROCESSES.append(process)
@@ -147,7 +154,9 @@ def _kill_vllm_on_port(port: int) -> bool:
                         proc = psutil.Process(process.pid)
                         cmdline = " ".join(proc.cmdline())
                         if f"--port {port}" in cmdline or f"--port={port}" in cmdline:
-                            logger.info(f"Killing tracked VLLM process {process.pid} on port {port}")
+                            logger.info(
+                                f"Killing tracked VLLM process {process.pid} on port {port}"
+                            )
                             os.killpg(os.getpgid(process.pid), signal.SIGTERM)
                             try:
                                 process.wait(timeout=5)
@@ -187,8 +196,12 @@ def _kill_vllm_on_port(port: int) -> bool:
             for proc in psutil.process_iter(["pid", "cmdline"]):
                 try:
                     cmdline = " ".join(proc.info["cmdline"] or [])
-                    if "vllm" in cmdline.lower() and (f"--port {port}" in cmdline or f"--port={port}" in cmdline):
-                        logger.info(f"Killing untracked VLLM process {proc.info['pid']} on port {port}")
+                    if "vllm" in cmdline.lower() and (
+                        f"--port {port}" in cmdline or f"--port={port}" in cmdline
+                    ):
+                        logger.info(
+                            f"Killing untracked VLLM process {proc.info['pid']} on port {port}"
+                        )
                         proc.terminate()
                         try:
                             proc.wait(timeout=5)
@@ -255,7 +268,9 @@ def _is_server_running(port: int) -> bool:
         return False
-def get_base_client(client=None, cache: bool = True, api_key="abc", vllm_cmd=None, vllm_process=None) -> OpenAI:
+def get_base_client(
+    client=None, cache: bool = True, api_key="abc", vllm_cmd=None, vllm_process=None
+) -> OpenAI:
     """Get OpenAI client from various inputs."""
     from llm_utils import MOpenAI
@@ -264,17 +279,21 @@ def get_base_client(client=None, cache: bool = True, api_key="abc", vllm_cmd=Non
             # Parse environment variables from command to get clean command for port extraction
             _, cleaned_cmd = _parse_env_vars_from_cmd(vllm_cmd)
             port = _extract_port_from_vllm_cmd(cleaned_cmd)
-            return MOpenAI(base_url=f"http://localhost:{port}/v1", api_key=api_key, cache=cache)
-        else:
-            raise ValueError("Either client or vllm_cmd must be provided.")
-    elif isinstance(client, int):
-        return MOpenAI(base_url=f"http://localhost:{client}/v1", api_key=api_key, cache=cache)
-    elif isinstance(client, str):
+            return MOpenAI(
+                base_url=f"http://localhost:{port}/v1", api_key=api_key, cache=cache
+            )
+        raise ValueError("Either client or vllm_cmd must be provided.")
+    if isinstance(client, int):
+        return MOpenAI(
+            base_url=f"http://localhost:{client}/v1", api_key=api_key, cache=cache
+        )
+    if isinstance(client, str):
         return MOpenAI(base_url=client, api_key=api_key, cache=cache)
-    elif isinstance(client, OpenAI):
+    if isinstance(client, OpenAI):
         return MOpenAI(base_url=client.base_url, api_key=api_key, cache=cache)
-    else:
-        raise ValueError("Invalid client type. Must be OpenAI, port (int), base_url (str), or None.")
+    raise ValueError(
+        "Invalid client type. Must be OpenAI, port (int), base_url (str), or None."
+    )
 def _is_lora_path(path: str) -> bool:
@@ -285,7 +304,7 @@ def _is_lora_path(path: str) -> bool:
     return os.path.isfile(adapter_config_path)
-def _get_port_from_client(client: OpenAI) -> Optional[int]:
+def _get_port_from_client(client: OpenAI) -> int | None:
     """Extract port from OpenAI client base_url."""
     if hasattr(client, "base_url") and client.base_url:
         base_url = str(client.base_url)

llm_utils/scripts/vllm_load_balancer.py CHANGED Viewed

@@ -10,9 +10,11 @@ from datetime import datetime
 import aiohttp
 from loguru import logger
-from speedy_utils import setup_logger
 from tabulate import tabulate
+from speedy_utils import setup_logger
 setup_logger(min_interval=5)
@@ -132,10 +134,9 @@ def format_uptime(start_time):
     if hours > 0:
         return f"{hours}h {minutes}m {seconds}s"
-    elif minutes > 0:
+    if minutes > 0:
         return f"{minutes}m {seconds}s"
-    else:
-        return f"{seconds}s"
+    return f"{seconds}s"
 def print_banner():
@@ -247,13 +248,12 @@ async def check_server_health(session, host, port):
                 )
                 await response.release()
                 return True
-            else:
-                logger.debug(
-                    f"[{LOAD_BALANCER_PORT=}] Health check failed for {url} (Status: {response.status})"
-                )
-                await response.release()
-                return False
-    except asyncio.TimeoutError:
+            logger.debug(
+                f"[{LOAD_BALANCER_PORT=}] Health check failed for {url} (Status: {response.status})"
+            )
+            await response.release()
+            return False
+    except TimeoutError:
         logger.debug(f"Health check HTTP request timeout for {url}")
         return False
     except aiohttp.ClientConnectorError as e:
@@ -311,11 +311,11 @@ async def scan_and_update_servers():
                 if added:
                     logger.info(
-                        f"Servers added (passed /health check): {sorted(list(added))}"
+                        f"Servers added (passed /health check): {sorted(added)}"
                     )
                 if removed:
                     logger.info(
-                        f"Servers removed (failed /health check or stopped): {sorted(list(removed))}"
+                        f"Servers removed (failed /health check or stopped): {sorted(removed)}"
                     )
                     for server in removed:
                         if server in connection_counts:
@@ -329,7 +329,7 @@ async def scan_and_update_servers():
                                 f"Removed throttling timestamp for unavailable server {server}"
                             )
-                available_servers = sorted(list(current_set))
+                available_servers = sorted(current_set)
                 for server in available_servers:
                     if server not in connection_counts:
                         connection_counts[server] = 0
@@ -375,7 +375,9 @@ async def handle_client(client_reader, client_writer):
             min_connections = float("inf")
             least_used_available_servers = []
-            for server in (
+            for (
+                server
+            ) in (
                 available_servers
             ):  # Iterate only over servers that passed health check
                 count = connection_counts.get(server, 0)
@@ -705,9 +707,9 @@ async def stats_json(request):
                 {
                     "host": BACKEND_HOST,
                     "port": port,
-                    "active_connections": connection_counts.get(server, 0)
-                    if is_online
-                    else 0,
+                    "active_connections": (
+                        connection_counts.get(server, 0) if is_online else 0
+                    ),
                     "status": "ONLINE" if is_online else "OFFLINE",
                 }
             )
@@ -929,23 +931,14 @@ async def main():
             logger.info("Cancelling background tasks...")
             scan_task.cancel()
             status_task.cancel()
-            try:
+            with contextlib.suppress(asyncio.CancelledError):
                 await asyncio.gather(scan_task, status_task, return_exceptions=True)
-            except asyncio.CancelledError:
-                pass
             print(f"{Colors.BRIGHT_GREEN}✅ Shutdown complete. Goodbye!{Colors.RESET}")
             logger.info("Background tasks finished.")
 def run_load_balancer():
-    global \
-        LOAD_BALANCER_PORT, \
-        BACKEND_PORTS, \
-        BACKEND_HOST, \
-        STATUS_PRINT_INTERVAL, \
-        HEALTH_CHECK_TIMEOUT, \
-        THROTTLE_MS, \
-        STATS_PORT
+    global LOAD_BALANCER_PORT, BACKEND_PORTS, BACKEND_HOST, STATUS_PRINT_INTERVAL, HEALTH_CHECK_TIMEOUT, THROTTLE_MS, STATS_PORT
     args = parse_args()
     LOAD_BALANCER_PORT = args.port
     BACKEND_HOST = args.host
@@ -976,4 +969,4 @@ def run_load_balancer():
 if __name__ == "__main__":
-    run_load_balancer()
+    run_load_balancer()

llm_utils/scripts/vllm_serve.py CHANGED Viewed

@@ -75,6 +75,7 @@ from loguru import logger
 from llm_utils.lm.openai_memoize import MOpenAI
 from speedy_utils.common.utils_io import load_by_ext
 LORA_DIR: str = os.environ.get("LORA_DIR", "/loras")
 LORA_DIR = os.path.abspath(LORA_DIR)
 HF_HOME: str = os.environ.get("HF_HOME", os.path.expanduser("~/.cache/huggingface"))
@@ -93,8 +94,8 @@ def add_lora(
     lora_name_or_path: str,
     host_port: str,
     url: str = "http://HOST:PORT/v1/load_lora_adapter",
-    served_model_name: Optional[str] = None,
-    lora_module: Optional[str] = None,
+    served_model_name: str | None = None,
+    lora_module: str | None = None,
 ) -> dict:
     """Add a LoRA adapter to a running vLLM server."""
     url = url.replace("HOST:PORT", host_port)
@@ -126,7 +127,7 @@ def add_lora(
         return {"error": f"Request failed: {str(e)}"}
-def unload_lora(lora_name: str, host_port: str) -> Optional[dict]:
+def unload_lora(lora_name: str, host_port: str) -> dict | None:
     """Unload a LoRA adapter from a running vLLM server."""
     try:
         url = f"http://{host_port}/v1/unload_lora_adapter"
@@ -144,7 +145,7 @@ def unload_lora(lora_name: str, host_port: str) -> Optional[dict]:
 def serve(args) -> None:
     """Start vLLM containers with dynamic args."""
     print("Starting vLLM containers...,")
-    gpu_groups_arr: List[str] = args.gpu_groups.split(",")
+    gpu_groups_arr: list[str] = args.gpu_groups.split(",")
     vllm_binary: str = get_vllm()
     if args.enable_lora:
         vllm_binary = "VLLM_ALLOW_RUNTIME_LORA_UPDATING=True " + vllm_binary
@@ -232,9 +233,9 @@ def get_vllm() -> str:
     vllm_binary = subprocess.check_output("which vllm", shell=True, text=True).strip()
     vllm_binary = os.getenv("VLLM_BINARY", vllm_binary)
     logger.info(f"vLLM binary: {vllm_binary}")
-    assert os.path.exists(vllm_binary), (
-        f"vLLM binary not found at {vllm_binary}, please set VLLM_BINARY env variable"
-    )
+    assert os.path.exists(
+        vllm_binary
+    ), f"vLLM binary not found at {vllm_binary}, please set VLLM_BINARY env variable"
     return vllm_binary

llm_utils/vector_cache/__init__.py CHANGED Viewed

@@ -11,15 +11,21 @@ Example:
     # Using local model
     cache = VectorCache("Qwen/Qwen3-Embedding-0.6B")
     embeddings = cache.embeds(["Hello world", "How are you?"])
     # Using OpenAI API
     cache = VectorCache("https://api.openai.com/v1")
     embeddings = cache.embeds(["Hello world", "How are you?"])
 """
 from .core import VectorCache
-from .utils import get_default_cache_path, validate_model_name, estimate_cache_size
+from .utils import estimate_cache_size, get_default_cache_path, validate_model_name
 __version__ = "0.1.0"
 __author__ = "AnhVTH <anhvth.226@gmail.com>"
-__all__ = ["VectorCache", "get_default_cache_path", "validate_model_name", "estimate_cache_size"]
+__all__ = [
+    "VectorCache",
+    "get_default_cache_path",
+    "validate_model_name",
+    "estimate_cache_size",
+]

llm_utils/vector_cache/cli.py CHANGED Viewed

@@ -106,7 +106,7 @@ def handle_embed(args):
         if not file_path.exists():
             raise FileNotFoundError(f"File not found: {args.file}")
-        with open(file_path, "r", encoding="utf-8") as f:
+        with open(file_path, encoding="utf-8") as f:
             texts.extend([line.strip() for line in f if line.strip()])
     if not texts:

llm_utils/vector_cache/core.py CHANGED Viewed

@@ -61,18 +61,18 @@ class VectorCache:
     def __init__(
         self,
         url_or_model: str,
-        backend: Optional[Literal["vllm", "transformers", "openai"]] = None,
-        embed_size: Optional[int] = None,
-        db_path: Optional[str] = None,
+        backend: Literal["vllm", "transformers", "openai"] | None = None,
+        embed_size: int | None = None,
+        db_path: str | None = None,
         # OpenAI API parameters
-        api_key: Optional[str] = "abc",
-        model_name: Optional[str] = None,
+        api_key: str | None = "abc",
+        model_name: str | None = None,
         # vLLM parameters
         vllm_gpu_memory_utilization: float = 0.5,
         vllm_tensor_parallel_size: int = 1,
         vllm_dtype: str = "auto",
         vllm_trust_remote_code: bool = False,
-        vllm_max_model_len: Optional[int] = None,
+        vllm_max_model_len: int | None = None,
         # Transformers parameters
         transformers_device: str = "auto",
         transformers_batch_size: int = 32,
@@ -149,7 +149,6 @@ class VectorCache:
                 if self.verbose:
                     print(f"Model auto-detection failed: {e}, using default model")
                 # Fallback to default if auto-detection fails
-                pass
         # Set default db_path if not provided
         if db_path is None:
@@ -185,7 +184,7 @@ class VectorCache:
                 print(f"✓ {self.backend.upper()} model/client loaded successfully")
     def _determine_backend(
-        self, backend: Optional[Literal["vllm", "transformers", "openai"]]
+        self, backend: Literal["vllm", "transformers", "openai"] | None
     ) -> str:
         """Determine the appropriate backend based on url_or_model and user preference."""
         if backend is not None:
@@ -202,7 +201,7 @@ class VectorCache:
         # Default to vllm for local models
         return "vllm"
-    def _try_infer_model_name(self, model_name: Optional[str]) -> Optional[str]:
+    def _try_infer_model_name(self, model_name: str | None) -> str | None:
         """Infer model name for OpenAI backend if not explicitly provided."""
         if model_name:
             return model_name
@@ -243,17 +242,21 @@ class VectorCache:
         )  # Checkpoint WAL every 1000 pages
     def _ensure_schema(self) -> None:
-        self.conn.execute("""
+        self.conn.execute(
+            """
         CREATE TABLE IF NOT EXISTS cache (
             hash TEXT PRIMARY KEY,
             text TEXT,
             embedding BLOB
         )
-        """)
+        """
+        )
         # Add index for faster lookups if it doesn't exist
-        self.conn.execute("""
+        self.conn.execute(
+            """
         CREATE INDEX IF NOT EXISTS idx_cache_hash ON cache(hash)
-        """)
+        """
+        )
         self.conn.commit()
     def _load_openai_client(self) -> None:
@@ -275,7 +278,7 @@ class VectorCache:
             tensor_parallel_size = cast(int, self.config["vllm_tensor_parallel_size"])
             dtype = cast(str, self.config["vllm_dtype"])
             trust_remote_code = cast(bool, self.config["vllm_trust_remote_code"])
-            max_model_len = cast(Optional[int], self.config["vllm_max_model_len"])
+            max_model_len = cast(int | None, self.config["vllm_max_model_len"])
             vllm_kwargs = {
                 "model": self.url_or_model,
@@ -312,8 +315,7 @@ class VectorCache:
                         f"4. Ensure no other processes are using GPU memory during initialization\n"
                         f"Original error: {e}"
                     ) from e
-                else:
-                    raise
+                raise
         elif self.backend == "transformers":
             import torch  # type: ignore[import-not-found] # noqa: F401
             from transformers import (  # type: ignore[import-not-found]
@@ -345,29 +347,28 @@ class VectorCache:
     def _get_embeddings(self, texts: list[str]) -> list[list[float]]:
         """Get embeddings using the configured backend."""
         assert isinstance(texts, list), "texts must be a list"
-        assert all(isinstance(t, str) for t in texts), (
-            "all elements in texts must be strings"
-        )
+        assert all(
+            isinstance(t, str) for t in texts
+        ), "all elements in texts must be strings"
         if self.backend == "openai":
             return self._get_openai_embeddings(texts)
-        elif self.backend == "vllm":
+        if self.backend == "vllm":
             return self._get_vllm_embeddings(texts)
-        elif self.backend == "transformers":
+        if self.backend == "transformers":
             return self._get_transformers_embeddings(texts)
-        else:
-            raise ValueError(f"Unsupported backend: {self.backend}")
+        raise ValueError(f"Unsupported backend: {self.backend}")
     def _get_openai_embeddings(self, texts: list[str]) -> list[list[float]]:
         """Get embeddings using OpenAI API."""
         assert isinstance(texts, list), "texts must be a list"
-        assert all(isinstance(t, str) for t in texts), (
-            "all elements in texts must be strings"
-        )
+        assert all(
+            isinstance(t, str) for t in texts
+        ), "all elements in texts must be strings"
         # Assert valid model_name for OpenAI backend
         model_name = self.config["model_name"]
-        assert model_name is not None and model_name.strip(), (
-            f"Invalid model_name for OpenAI backend: {model_name}. Model name must be provided and non-empty."
-        )
+        assert (
+            model_name is not None and model_name.strip()
+        ), f"Invalid model_name for OpenAI backend: {model_name}. Model name must be provided and non-empty."
         if self._client is None:
             if self.verbose:
@@ -385,9 +386,9 @@ class VectorCache:
     def _get_vllm_embeddings(self, texts: list[str]) -> list[list[float]]:
         """Get embeddings using vLLM."""
         assert isinstance(texts, list), "texts must be a list"
-        assert all(isinstance(t, str) for t in texts), (
-            "all elements in texts must be strings"
-        )
+        assert all(
+            isinstance(t, str) for t in texts
+        ), "all elements in texts must be strings"
         if self._model is None:
             if self.verbose:
                 print("🔧 Loading vLLM model...")
@@ -402,9 +403,9 @@ class VectorCache:
     def _get_transformers_embeddings(self, texts: list[str]) -> list[list[float]]:
         """Get embeddings using transformers directly."""
         assert isinstance(texts, list), "texts must be a list"
-        assert all(isinstance(t, str) for t in texts), (
-            "all elements in texts must be strings"
-        )
+        assert all(
+            isinstance(t, str) for t in texts
+        ), "all elements in texts must be strings"
         if self._model is None:
             if self.verbose:
                 print("🔧 Loading Transformers model...")
@@ -464,13 +465,12 @@ class VectorCache:
         left_padding = attention_mask[:, -1].sum() == attention_mask.shape[0]
         if left_padding:
             return last_hidden_states[:, -1]
-        else:
-            sequence_lengths = attention_mask.sum(dim=1) - 1
-            batch_size = last_hidden_states.shape[0]
-            return last_hidden_states[
-                torch.arange(batch_size, device=last_hidden_states.device),
-                sequence_lengths,
-            ]
+        sequence_lengths = attention_mask.sum(dim=1) - 1
+        batch_size = last_hidden_states.shape[0]
+        return last_hidden_states[
+            torch.arange(batch_size, device=last_hidden_states.device),
+            sequence_lengths,
+        ]
     def _hash_text(self, text: str) -> str:
         return hashlib.sha1(text.encode("utf-8")).hexdigest()
@@ -486,8 +486,7 @@ class VectorCache:
             try:
                 if params is None:
                     return self.conn.execute(query)
-                else:
-                    return self.conn.execute(query, params)
+                return self.conn.execute(query, params)
             except sqlite3.OperationalError as e:
                 last_exception = e
@@ -502,9 +501,8 @@ class VectorCache:
                     time.sleep(delay)
                     continue
-                else:
-                    # Re-raise if not a lock error or max retries exceeded
-                    raise
+                # Re-raise if not a lock error or max retries exceeded
+                raise
             except Exception:
                 # Re-raise any other exceptions
                 raise
@@ -524,9 +522,9 @@ class VectorCache:
         computing missing embeddings.
         """
         assert isinstance(texts, list), "texts must be a list"
-        assert all(isinstance(t, str) for t in texts), (
-            "all elements in texts must be strings"
-        )
+        assert all(
+            isinstance(t, str) for t in texts
+        ), "all elements in texts must be strings"
         if not texts:
             return np.empty((0, 0), dtype=np.float32)
         t = time()
@@ -554,11 +552,11 @@ class VectorCache:
         # Determine which texts are missing
         if cache:
             missing_items: list[tuple[str, str]] = [
-                (t, h) for t, h in zip(texts, hashes) if h not in hit_map
+                (t, h) for t, h in zip(texts, hashes, strict=False) if h not in hit_map
             ]
         else:
             missing_items: list[tuple[str, str]] = [
-                (t, h) for t, h in zip(texts, hashes)
+                (t, h) for t, h in zip(texts, hashes, strict=False)
             ]
         if missing_items:
@@ -608,7 +606,7 @@ class VectorCache:
             # Prepare batch data for immediate insert
             batch_data: list[tuple[str, str, bytes]] = []
-            for (text, h), vec in zip(batch_items, batch_embeds):
+            for (text, h), vec in zip(batch_items, batch_embeds, strict=False):
                 arr = np.asarray(vec, dtype=np.float32)
                 batch_data.append((h, text, arr.tobytes()))
                 hit_map[h] = arr
@@ -640,9 +638,9 @@ class VectorCache:
     def __call__(self, texts: list[str], cache: bool = True) -> np.ndarray:
         assert isinstance(texts, list), "texts must be a list"
-        assert all(isinstance(t, str) for t in texts), (
-            "all elements in texts must be strings"
-        )
+        assert all(
+            isinstance(t, str) for t in texts
+        ), "all elements in texts must be strings"
         return self.embeds(texts, cache)
     def _bulk_insert(self, data: list[tuple[str, str, bytes]]) -> None:
@@ -662,7 +660,7 @@ class VectorCache:
         for attempt in range(max_retries + 1):
             try:
-                cursor = self.conn.executemany(
+                self.conn.executemany(
                     "INSERT OR IGNORE INTO cache (hash, text, embedding) VALUES (?, ?, ?)",
                     data,
                 )
@@ -688,9 +686,8 @@ class VectorCache:
                     time.sleep(delay)
                     continue
-                else:
-                    # Re-raise if not a lock error or max retries exceeded
-                    raise
+                # Re-raise if not a lock error or max retries exceeded
+                raise
             except Exception:
                 # Re-raise any other exceptions
                 raise
@@ -723,12 +720,11 @@ class VectorCache:
                     time.sleep(delay)
                     continue
-                else:
-                    raise
+                raise
             except Exception:
                 raise
-    def get_config(self) -> Dict[str, Any]:
+    def get_config(self) -> dict[str, Any]:
         """Get current configuration."""
         return {
             "url_or_model": self.url_or_model,

llm_utils/vector_cache/types.py CHANGED Viewed

@@ -1,15 +1,17 @@
 """Type definitions for the embed_cache package."""
-from typing import List, Dict, Any, Union, Optional, Tuple
+from typing import Any, Dict, List, Optional, Tuple, Union
 import numpy as np
 from numpy.typing import NDArray
 # Type aliases
-TextList = List[str]
+TextList = list[str]
 EmbeddingArray = NDArray[np.float32]
-EmbeddingList = List[List[float]]
-CacheStats = Dict[str, int]
+EmbeddingList = list[list[float]]
+CacheStats = dict[str, int]
 ModelIdentifier = str  # Either URL or model name/path
 # For backwards compatibility
-Embeddings = Union[EmbeddingArray, EmbeddingList]
+Embeddings = Union[EmbeddingArray, EmbeddingList]

speedy-utils 1.1.27__py3-none-any.whl → 1.1.28__py3-none-any.whl

speedy-utils 1.1.27py3-none-any.whl → 1.1.28py3-none-any.whl