speedy-utils 1.1.13__py3-none-any.whl → 1.1.15__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,6 +4,7 @@ import hashlib
4
4
  import os
5
5
  import sqlite3
6
6
  from pathlib import Path
7
+ from time import time
7
8
  from typing import Any, Dict, Literal, Optional, cast
8
9
 
9
10
  import numpy as np
@@ -78,7 +79,7 @@ class VectorCache:
78
79
  self.config = {
79
80
  # OpenAI
80
81
  "api_key": api_key or os.getenv("OPENAI_API_KEY"),
81
- "model_name": model_name,
82
+ "model_name": self._try_infer_model_name(model_name),
82
83
  # vLLM
83
84
  "vllm_gpu_memory_utilization": vllm_gpu_memory_utilization,
84
85
  "vllm_tensor_parallel_size": vllm_tensor_parallel_size,
@@ -164,7 +165,22 @@ class VectorCache:
164
165
 
165
166
  # Default to vllm for local models
166
167
  return "vllm"
167
-
168
+ def _try_infer_model_name(self, model_name: Optional[str]) -> Optional[str]:
169
+ """Infer model name for OpenAI backend if not explicitly provided."""
170
+ # if self.backend != "openai":
171
+ # return model_name
172
+ if model_name:
173
+ return model_name
174
+ if 'https://' in self.url_or_model:
175
+ model_name = "text-embedding-3-small"
176
+ if 'http://localhost' in self.url_or_model:
177
+ from openai import OpenAI
178
+ client = OpenAI(base_url=self.url_or_model, api_key='abc')
179
+ model_name = client.models.list().data[0].id
180
+
181
+ # Default model name
182
+ print('Infer model name:', model_name)
183
+ return model_name
168
184
  def _optimize_connection(self) -> None:
169
185
  """Optimize SQLite connection for bulk operations."""
170
186
  # Performance optimizations for bulk operations
@@ -366,7 +382,7 @@ class VectorCache:
366
382
  """
367
383
  if not texts:
368
384
  return np.empty((0, 0), dtype=np.float32)
369
-
385
+ t = time()
370
386
  hashes = [self._hash_text(t) for t in texts]
371
387
 
372
388
  # Helper to yield chunks
@@ -414,6 +430,9 @@ class VectorCache:
414
430
  self._bulk_insert(bulk_insert_data)
415
431
 
416
432
  # Return embeddings in the original order
433
+ elapsed = time() - t
434
+ if self.verbose:
435
+ print(f"Retrieved {len(texts)} embeddings in {elapsed:.2f} seconds")
417
436
  return np.vstack([hit_map[h] for h in hashes])
418
437
 
419
438
  def __call__(self, texts: list[str], cache: bool = True) -> np.ndarray:
speedy_utils/__init__.py CHANGED
@@ -18,7 +18,7 @@
18
18
  # • memoize(func) -> Callable - Function result caching decorator
19
19
  # • identify(obj: Any) -> str - Generate unique object identifier
20
20
  # • identify_uuid(obj: Any) -> str - Generate UUID-based object identifier
21
- # • load_by_ext(fname: str | list[str]) -> Any - Auto-detect file format loader
21
+ # • load_by_ext(fname: Union[str, list[str]]) -> Any - Auto-detect file format loader
22
22
  # • dump_json_or_pickle(obj: Any, fname: str) -> None - Smart file serializer
23
23
  # • load_json_or_pickle(fname: str) -> Any - Smart file deserializer
24
24
  # • multi_thread(func, items, **kwargs) -> list - Parallel thread execution
@@ -92,7 +92,7 @@ def load_jsonl(path):
92
92
  return [json.loads(line) for line in lines]
93
93
 
94
94
 
95
- def load_by_ext(fname: str | list[str], do_memoize: bool = False) -> Any:
95
+ def load_by_ext(fname: Union[str, list[str]], do_memoize: bool = False) -> Any:
96
96
  """
97
97
  Load data based on file extension.
98
98
  """
@@ -3,7 +3,7 @@
3
3
  import copy
4
4
  import pprint
5
5
  import textwrap
6
- from typing import Any
6
+ from typing import Any, Union
7
7
 
8
8
  from tabulate import tabulate
9
9
 
@@ -24,17 +24,17 @@ def flatten_dict(d, parent_key="", sep="."):
24
24
 
25
25
  def fprint(
26
26
  input_data: Any,
27
- key_ignore: list[str] | None = None,
28
- key_keep: list[str] | None = None,
27
+ key_ignore: Union[list[str], None] = None,
28
+ key_keep: Union[list[str], None] = None,
29
29
  max_width: int = 100,
30
30
  indent: int = 2,
31
- depth: int | None = None,
31
+ depth: Union[int, None] = None,
32
32
  table_format: str = "grid",
33
33
  str_wrap_width: int = 80,
34
34
  grep=None,
35
35
  is_notebook=None,
36
36
  f=print,
37
- ) -> None | str:
37
+ ) -> Union[None, str]:
38
38
  """
39
39
  Pretty print structured data.
40
40
  """
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: speedy-utils
3
- Version: 1.1.13
3
+ Version: 1.1.15
4
4
  Summary: Fast and easy-to-use package for data science
5
5
  Author: AnhVTH
6
6
  Author-email: anhvth.226@gmail.com
@@ -25,6 +25,7 @@ Requires-Dist: jupyterlab
25
25
  Requires-Dist: loguru
26
26
  Requires-Dist: matplotlib
27
27
  Requires-Dist: numpy
28
+ Requires-Dist: openai (>=1.106.0,<2.0.0)
28
29
  Requires-Dist: packaging (>=23.2,<25)
29
30
  Requires-Dist: pandas
30
31
  Requires-Dist: pydantic
@@ -18,10 +18,10 @@ llm_utils/scripts/vllm_load_balancer.py,sha256=TT5Ypq7gUcl52gRFp--ORFFjzhfGlcaX2
18
18
  llm_utils/scripts/vllm_serve.py,sha256=gJ0-y4kybMfSt8qzye1pJqGMY3x9JLRi6Tu7RjJMnss,14771
19
19
  llm_utils/vector_cache/__init__.py,sha256=i1KQuC4OhPewYpFl9X6HlWFBuASCTx2qgGizhpZhmn0,862
20
20
  llm_utils/vector_cache/cli.py,sha256=DMXTj8nZ2_LRjprbYPb4uzq04qZtOfBbmblmaqDcCuM,6251
21
- llm_utils/vector_cache/core.py,sha256=UqAgsFFaDQtXBFBB22YnoScGpImVfv4k2P4EONQrh50,21823
21
+ llm_utils/vector_cache/core.py,sha256=-g_y3U4hdJltHMVi8oYJTsW8p7lR973LlDqcLZ9kECk,22725
22
22
  llm_utils/vector_cache/types.py,sha256=ru8qmUZ8_lNd3_oYpjCMtpXTsqmwsSBe56Z4hTWm3xI,435
23
23
  llm_utils/vector_cache/utils.py,sha256=dwbbXlRrARrpmS4YqSlYQqrTURg0UWe8XvaAWcX05MM,1458
24
- speedy_utils/__init__.py,sha256=YCpiReW22zG4KkQXQe6V9BQ8bn7PtiXolOaW_iL8T4M,5734
24
+ speedy_utils/__init__.py,sha256=nJpUb5Oa3STDbqPSiWXoI-IvKntyRYzYxkYW4GM2i_Q,5740
25
25
  speedy_utils/all.py,sha256=t-HKzDmhF1MTFnmq7xRnPs5nFG_aZaLH9Ua0RM6nQ9Y,4855
26
26
  speedy_utils/common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
27
27
  speedy_utils/common/clock.py,sha256=3n4FkCW0dz46O8By09V5Pve1DSMgpLDRbWEVRryryeQ,7423
@@ -30,16 +30,16 @@ speedy_utils/common/logger.py,sha256=a2iZx0eWyfi2-2X_H2QmfuA3tfR7_XSM7Nd0GdUnUOs
30
30
  speedy_utils/common/notebook_utils.py,sha256=-97kehJ_Gg3TzDLubsLIYJcykqX1NXhbvBO6nniZSYM,2063
31
31
  speedy_utils/common/report_manager.py,sha256=eBiw5KY6bWUhwki3B4lK5o8bFsp7L5x28X9GCI-Sd1w,3899
32
32
  speedy_utils/common/utils_cache.py,sha256=0cQJm0nnD9755pWMdAkhDn2qIbGvJMTMUl2gibkX05E,22376
33
- speedy_utils/common/utils_io.py,sha256=qaqbZpopfcDDwsrqNOX-pZpUw7EXRTDweXywRyV-2vo,5244
33
+ speedy_utils/common/utils_io.py,sha256=76ZVgJwgjOznq5L_i2oyWuBnuwymjcktqSvB8VWTKsc,5250
34
34
  speedy_utils/common/utils_misc.py,sha256=cdEuBBpiB1xpuzj0UBDHDuTIerqsMIw37ENq6EXliOw,1795
35
- speedy_utils/common/utils_print.py,sha256=iQqnOYw2EFC8TqeSDbrcnIQAUKT7FbB8Mec8b2aGAzw,4833
35
+ speedy_utils/common/utils_print.py,sha256=syRrnSFtguxrV-elx6DDVcSGu4Qy7D_xVNZhPwbUY4A,4864
36
36
  speedy_utils/multi_worker/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
37
37
  speedy_utils/multi_worker/process.py,sha256=MJ5njqjY9nMo-Z1oXMbzkppuYVJCcCtFsLo8lbCh5zs,6849
38
38
  speedy_utils/multi_worker/thread.py,sha256=f02VjJV8nudg0eA_AcfPEX7tHY4-czesuzthKZs_Hdc,16351
39
39
  speedy_utils/scripts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
40
40
  speedy_utils/scripts/mpython.py,sha256=IvywP7Y0_V6tWfMP-4MjPvN5_KfxWF21xaLJsCIayCk,3821
41
41
  speedy_utils/scripts/openapi_client_codegen.py,sha256=f2125S_q0PILgH5dyzoKRz7pIvNEjCkzpi4Q4pPFRZE,9683
42
- speedy_utils-1.1.13.dist-info/METADATA,sha256=EYIm4tBWfKI94vxyTP0i8zn8--btk4BvCBCB4K3AAjk,7442
43
- speedy_utils-1.1.13.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
44
- speedy_utils-1.1.13.dist-info/entry_points.txt,sha256=T1t85jwx8fK6m5msdkBGIXH5R5Kd0zSL0S6erXERPzg,237
45
- speedy_utils-1.1.13.dist-info/RECORD,,
42
+ speedy_utils-1.1.15.dist-info/METADATA,sha256=GOftJglyMxP320bZFW8XX-UurB8vr_01jDNGV7ecKh8,7483
43
+ speedy_utils-1.1.15.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
44
+ speedy_utils-1.1.15.dist-info/entry_points.txt,sha256=T1t85jwx8fK6m5msdkBGIXH5R5Kd0zSL0S6erXERPzg,237
45
+ speedy_utils-1.1.15.dist-info/RECORD,,