PyPI - speedy-utils - Versions diffs - 1.1.15__tar.gz → 1.1.17__tar.gz - Mend

speedy-utils 1.1.15tar.gz → 1.1.17tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (44) hide show

{speedy_utils-1.1.15 → speedy_utils-1.1.17}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: speedy-utils
-Version: 1.1.15
+Version: 1.1.17
 Summary: Fast and easy-to-use package for data science
 Author: AnhVTH
 Author-email: anhvth.226@gmail.com

{speedy_utils-1.1.15 → speedy_utils-1.1.17}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "speedy-utils"
-version = "1.1.15"
+version = "1.1.17"
 description = "Fast and easy-to-use package for data science"
 authors = ["AnhVTH <anhvth.226@gmail.com>"]
 readme = "README.md"

{speedy_utils-1.1.15 → speedy_utils-1.1.17}/src/llm_utils/lm/async_lm/async_llm_task.py RENAMED Viewed

@@ -11,8 +11,6 @@ from venv import logger
 from openai.types.chat import ChatCompletionMessageParam
 from pydantic import BaseModel
-from pytest import Cache
-from speedy_utils import jdumps
 from speedy_utils.all import dump_json_or_pickle, identify
 from llm_utils.chat_format.display import get_conversation_one_turn

{speedy_utils-1.1.15 → speedy_utils-1.1.17}/src/llm_utils/vector_cache/core.py RENAMED Viewed

@@ -167,12 +167,11 @@ class VectorCache:
         return "vllm"
     def _try_infer_model_name(self, model_name: Optional[str]) -> Optional[str]:
         """Infer model name for OpenAI backend if not explicitly provided."""
-        # if self.backend != "openai":
-            # return model_name
         if model_name:
             return model_name
         if 'https://' in self.url_or_model:
             model_name =  "text-embedding-3-small"
         if 'http://localhost' in self.url_or_model:
             from openai import OpenAI
             client = OpenAI(base_url=self.url_or_model, api_key='abc')
@@ -277,6 +276,8 @@ class VectorCache:
     def _get_embeddings(self, texts: list[str]) -> list[list[float]]:
         """Get embeddings using the configured backend."""
+        assert isinstance(texts, list), "texts must be a list"
+        assert all(isinstance(t, str) for t in texts), "all elements in texts must be strings"
         if self.backend == "openai":
             return self._get_openai_embeddings(texts)
         elif self.backend == "vllm":
@@ -288,6 +289,8 @@ class VectorCache:
     def _get_openai_embeddings(self, texts: list[str]) -> list[list[float]]:
         """Get embeddings using OpenAI API."""
+        assert isinstance(texts, list), "texts must be a list"
+        assert all(isinstance(t, str) for t in texts), "all elements in texts must be strings"
         # Assert valid model_name for OpenAI backend
         model_name = self.config["model_name"]
         assert model_name is not None and model_name.strip(), f"Invalid model_name for OpenAI backend: {model_name}. Model name must be provided and non-empty."
@@ -304,6 +307,8 @@ class VectorCache:
     def _get_vllm_embeddings(self, texts: list[str]) -> list[list[float]]:
         """Get embeddings using vLLM."""
+        assert isinstance(texts, list), "texts must be a list"
+        assert all(isinstance(t, str) for t in texts), "all elements in texts must be strings"
         if self._model is None:
             self._load_model()
@@ -313,6 +318,8 @@ class VectorCache:
     def _get_transformers_embeddings(self, texts: list[str]) -> list[list[float]]:
         """Get embeddings using transformers directly."""
+        assert isinstance(texts, list), "texts must be a list"
+        assert all(isinstance(t, str) for t in texts), "all elements in texts must be strings"
         if self._model is None:
             self._load_model()
@@ -380,6 +387,8 @@ class VectorCache:
         handle very large input lists. A tqdm progress bar is shown while
         computing missing embeddings.
         """
+        assert isinstance(texts, list), "texts must be a list"
+        assert all(isinstance(t, str) for t in texts), "all elements in texts must be strings"
         if not texts:
             return np.empty((0, 0), dtype=np.float32)
         t = time()
@@ -436,6 +445,8 @@ class VectorCache:
         return np.vstack([hit_map[h] for h in hashes])
     def __call__(self, texts: list[str], cache: bool = True) -> np.ndarray:
+        assert isinstance(texts, list), "texts must be a list"
+        assert all(isinstance(t, str) for t in texts), "all elements in texts must be strings"
         return self.embeds(texts, cache)
     def _bulk_insert(self, data: list[tuple[str, str, bytes]]) -> None:
@@ -454,6 +465,8 @@ class VectorCache:
         Precompute embeddings for a large list of texts efficiently.
         This is optimized for bulk operations when you know all texts upfront.
         """
+        assert isinstance(texts, list), "texts must be a list"
+        assert all(isinstance(t, str) for t in texts), "all elements in texts must be strings"
         if not texts:
             return