PyPI - speedy-utils - Versions diffs - 1.1.13__tar.gz → 1.1.15__tar.gz - Mend

speedy-utils 1.1.13tar.gz → 1.1.15tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (44) hide show

{speedy_utils-1.1.13 → speedy_utils-1.1.15}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: speedy-utils
-Version: 1.1.13
+Version: 1.1.15
 Summary: Fast and easy-to-use package for data science
 Author: AnhVTH
 Author-email: anhvth.226@gmail.com
@@ -25,6 +25,7 @@ Requires-Dist: jupyterlab
 Requires-Dist: loguru
 Requires-Dist: matplotlib
 Requires-Dist: numpy
+Requires-Dist: openai (>=1.106.0,<2.0.0)
 Requires-Dist: packaging (>=23.2,<25)
 Requires-Dist: pandas
 Requires-Dist: pydantic

{speedy_utils-1.1.13 → speedy_utils-1.1.15}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "speedy-utils"
-version = "1.1.13"
+version = "1.1.15"
 description = "Fast and easy-to-use package for data science"
 authors = ["AnhVTH <anhvth.226@gmail.com>"]
 readme = "README.md"
@@ -58,6 +58,7 @@ json-repair = ">=0.25.0,<0.31.0"
 fastprogress = "*"
 freezegun = "^1.5.1"
 packaging = ">=23.2,<25"
+openai = "^1.106.0"
 [tool.poetry.scripts]
 mpython = "speedy_utils.scripts.mpython:main"

{speedy_utils-1.1.13 → speedy_utils-1.1.15}/src/llm_utils/vector_cache/core.py RENAMED Viewed

@@ -4,6 +4,7 @@ import hashlib
 import os
 import sqlite3
 from pathlib import Path
+from time import time
 from typing import Any, Dict, Literal, Optional, cast
 import numpy as np
@@ -78,7 +79,7 @@ class VectorCache:
         self.config = {
             # OpenAI
             "api_key": api_key or os.getenv("OPENAI_API_KEY"),
-            "model_name": model_name,
+            "model_name": self._try_infer_model_name(model_name),
             # vLLM
             "vllm_gpu_memory_utilization": vllm_gpu_memory_utilization,
             "vllm_tensor_parallel_size": vllm_tensor_parallel_size,
@@ -164,7 +165,22 @@ class VectorCache:
         # Default to vllm for local models
         return "vllm"
+    def _try_infer_model_name(self, model_name: Optional[str]) -> Optional[str]:
+        """Infer model name for OpenAI backend if not explicitly provided."""
+        # if self.backend != "openai":
+            # return model_name
+        if model_name:
+            return model_name
+        if 'https://' in self.url_or_model:
+            model_name =  "text-embedding-3-small"
+        if 'http://localhost' in self.url_or_model:
+            from openai import OpenAI
+            client = OpenAI(base_url=self.url_or_model, api_key='abc')
+            model_name =  client.models.list().data[0].id
+        # Default model name
+        print('Infer model name:', model_name)
+        return model_name
     def _optimize_connection(self) -> None:
         """Optimize SQLite connection for bulk operations."""
         # Performance optimizations for bulk operations
@@ -366,7 +382,7 @@ class VectorCache:
         """
         if not texts:
             return np.empty((0, 0), dtype=np.float32)
+        t = time()
         hashes = [self._hash_text(t) for t in texts]
         # Helper to yield chunks
@@ -414,6 +430,9 @@ class VectorCache:
             self._bulk_insert(bulk_insert_data)
         # Return embeddings in the original order
+        elapsed = time() - t
+        if self.verbose:
+            print(f"Retrieved {len(texts)} embeddings in {elapsed:.2f} seconds")
         return np.vstack([hit_map[h] for h in hashes])
     def __call__(self, texts: list[str], cache: bool = True) -> np.ndarray:

{speedy_utils-1.1.13 → speedy_utils-1.1.15}/src/speedy_utils/__init__.py RENAMED Viewed

@@ -18,7 +18,7 @@
 # • memoize(func) -> Callable - Function result caching decorator
 # • identify(obj: Any) -> str - Generate unique object identifier
 # • identify_uuid(obj: Any) -> str - Generate UUID-based object identifier
-# • load_by_ext(fname: str | list[str]) -> Any - Auto-detect file format loader
+# • load_by_ext(fname: Union[str, list[str]]) -> Any - Auto-detect file format loader
 # • dump_json_or_pickle(obj: Any, fname: str) -> None - Smart file serializer
 # • load_json_or_pickle(fname: str) -> Any - Smart file deserializer
 # • multi_thread(func, items, **kwargs) -> list - Parallel thread execution

{speedy_utils-1.1.13 → speedy_utils-1.1.15}/src/speedy_utils/common/utils_io.py RENAMED Viewed

@@ -92,7 +92,7 @@ def load_jsonl(path):
     return [json.loads(line) for line in lines]
-def load_by_ext(fname: str | list[str], do_memoize: bool = False) -> Any:
+def load_by_ext(fname: Union[str, list[str]], do_memoize: bool = False) -> Any:
     """
     Load data based on file extension.
     """

{speedy_utils-1.1.13 → speedy_utils-1.1.15}/src/speedy_utils/common/utils_print.py RENAMED Viewed

@@ -3,7 +3,7 @@
 import copy
 import pprint
 import textwrap
-from typing import Any
+from typing import Any, Union
 from tabulate import tabulate
@@ -24,17 +24,17 @@ def flatten_dict(d, parent_key="", sep="."):
 def fprint(
     input_data: Any,
-    key_ignore: list[str] | None = None,
-    key_keep: list[str] | None = None,
+    key_ignore: Union[list[str], None] = None,
+    key_keep: Union[list[str], None] = None,
     max_width: int = 100,
     indent: int = 2,
-    depth: int | None = None,
+    depth: Union[int, None] = None,
     table_format: str = "grid",
     str_wrap_width: int = 80,
     grep=None,
     is_notebook=None,
     f=print,
-) -> None | str:
+) -> Union[None, str]:
     """
     Pretty print structured data.
     """