speedy-utils 1.1.15__py3-none-any.whl → 1.1.17__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -11,8 +11,6 @@ from venv import logger
11
11
 
12
12
  from openai.types.chat import ChatCompletionMessageParam
13
13
  from pydantic import BaseModel
14
- from pytest import Cache
15
- from speedy_utils import jdumps
16
14
  from speedy_utils.all import dump_json_or_pickle, identify
17
15
 
18
16
  from llm_utils.chat_format.display import get_conversation_one_turn
@@ -167,12 +167,11 @@ class VectorCache:
167
167
  return "vllm"
168
168
  def _try_infer_model_name(self, model_name: Optional[str]) -> Optional[str]:
169
169
  """Infer model name for OpenAI backend if not explicitly provided."""
170
- # if self.backend != "openai":
171
- # return model_name
172
170
  if model_name:
173
171
  return model_name
174
172
  if 'https://' in self.url_or_model:
175
173
  model_name = "text-embedding-3-small"
174
+
176
175
  if 'http://localhost' in self.url_or_model:
177
176
  from openai import OpenAI
178
177
  client = OpenAI(base_url=self.url_or_model, api_key='abc')
@@ -277,6 +276,8 @@ class VectorCache:
277
276
 
278
277
  def _get_embeddings(self, texts: list[str]) -> list[list[float]]:
279
278
  """Get embeddings using the configured backend."""
279
+ assert isinstance(texts, list), "texts must be a list"
280
+ assert all(isinstance(t, str) for t in texts), "all elements in texts must be strings"
280
281
  if self.backend == "openai":
281
282
  return self._get_openai_embeddings(texts)
282
283
  elif self.backend == "vllm":
@@ -288,6 +289,8 @@ class VectorCache:
288
289
 
289
290
  def _get_openai_embeddings(self, texts: list[str]) -> list[list[float]]:
290
291
  """Get embeddings using OpenAI API."""
292
+ assert isinstance(texts, list), "texts must be a list"
293
+ assert all(isinstance(t, str) for t in texts), "all elements in texts must be strings"
291
294
  # Assert valid model_name for OpenAI backend
292
295
  model_name = self.config["model_name"]
293
296
  assert model_name is not None and model_name.strip(), f"Invalid model_name for OpenAI backend: {model_name}. Model name must be provided and non-empty."
@@ -304,6 +307,8 @@ class VectorCache:
304
307
 
305
308
  def _get_vllm_embeddings(self, texts: list[str]) -> list[list[float]]:
306
309
  """Get embeddings using vLLM."""
310
+ assert isinstance(texts, list), "texts must be a list"
311
+ assert all(isinstance(t, str) for t in texts), "all elements in texts must be strings"
307
312
  if self._model is None:
308
313
  self._load_model()
309
314
 
@@ -313,6 +318,8 @@ class VectorCache:
313
318
 
314
319
  def _get_transformers_embeddings(self, texts: list[str]) -> list[list[float]]:
315
320
  """Get embeddings using transformers directly."""
321
+ assert isinstance(texts, list), "texts must be a list"
322
+ assert all(isinstance(t, str) for t in texts), "all elements in texts must be strings"
316
323
  if self._model is None:
317
324
  self._load_model()
318
325
 
@@ -380,6 +387,8 @@ class VectorCache:
380
387
  handle very large input lists. A tqdm progress bar is shown while
381
388
  computing missing embeddings.
382
389
  """
390
+ assert isinstance(texts, list), "texts must be a list"
391
+ assert all(isinstance(t, str) for t in texts), "all elements in texts must be strings"
383
392
  if not texts:
384
393
  return np.empty((0, 0), dtype=np.float32)
385
394
  t = time()
@@ -436,6 +445,8 @@ class VectorCache:
436
445
  return np.vstack([hit_map[h] for h in hashes])
437
446
 
438
447
  def __call__(self, texts: list[str], cache: bool = True) -> np.ndarray:
448
+ assert isinstance(texts, list), "texts must be a list"
449
+ assert all(isinstance(t, str) for t in texts), "all elements in texts must be strings"
439
450
  return self.embeds(texts, cache)
440
451
 
441
452
  def _bulk_insert(self, data: list[tuple[str, str, bytes]]) -> None:
@@ -454,6 +465,8 @@ class VectorCache:
454
465
  Precompute embeddings for a large list of texts efficiently.
455
466
  This is optimized for bulk operations when you know all texts upfront.
456
467
  """
468
+ assert isinstance(texts, list), "texts must be a list"
469
+ assert all(isinstance(t, str) for t in texts), "all elements in texts must be strings"
457
470
  if not texts:
458
471
  return
459
472
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: speedy-utils
3
- Version: 1.1.15
3
+ Version: 1.1.17
4
4
  Summary: Fast and easy-to-use package for data science
5
5
  Author: AnhVTH
6
6
  Author-email: anhvth.226@gmail.com
@@ -7,7 +7,7 @@ llm_utils/group_messages.py,sha256=Oe2tlhg-zRodG1-hodYebddrR77j9UdE05LzJw0EvYI,3
7
7
  llm_utils/lm/__init__.py,sha256=rX36_MsnekM5GHwWS56XELbm4W5x2TDwnPERDTfo0eU,194
8
8
  llm_utils/lm/async_lm/__init__.py,sha256=PUBbCuf5u6-0GBUu-2PI6YAguzsyXj-LPkU6vccqT6E,121
9
9
  llm_utils/lm/async_lm/_utils.py,sha256=P1-pUDf_0pDmo8WTIi43t5ARlyGA1RIJfpAhz-gfA5g,6105
10
- llm_utils/lm/async_lm/async_llm_task.py,sha256=iXSTbf-KekXncVVnic-v4dTq5HBDjbyLwhgo0Y-wp7Q,19167
10
+ llm_utils/lm/async_lm/async_llm_task.py,sha256=IT63_qtKWs1Svk6tic12ALxHA9du2WnAjX5xXuYH9jA,19110
11
11
  llm_utils/lm/async_lm/async_lm.py,sha256=slGOUXFeWAy3Ak7Xj7Z4JNqCgAUBo21Hjg3RD75Ul2Q,13396
12
12
  llm_utils/lm/async_lm/async_lm_base.py,sha256=Qh9Bx_S-FboO7huUi6TqP3KiTVHDH0C-Tfbd_UJC7Cc,8122
13
13
  llm_utils/lm/async_lm/lm_specific.py,sha256=KmqdCm3SJ5MqN-dRJd6S5tq5-ve1X2eNWf2CMFtc_3s,3926
@@ -18,7 +18,7 @@ llm_utils/scripts/vllm_load_balancer.py,sha256=TT5Ypq7gUcl52gRFp--ORFFjzhfGlcaX2
18
18
  llm_utils/scripts/vllm_serve.py,sha256=gJ0-y4kybMfSt8qzye1pJqGMY3x9JLRi6Tu7RjJMnss,14771
19
19
  llm_utils/vector_cache/__init__.py,sha256=i1KQuC4OhPewYpFl9X6HlWFBuASCTx2qgGizhpZhmn0,862
20
20
  llm_utils/vector_cache/cli.py,sha256=DMXTj8nZ2_LRjprbYPb4uzq04qZtOfBbmblmaqDcCuM,6251
21
- llm_utils/vector_cache/core.py,sha256=-g_y3U4hdJltHMVi8oYJTsW8p7lR973LlDqcLZ9kECk,22725
21
+ llm_utils/vector_cache/core.py,sha256=O1C3azjPT7wyp8NOGPSSd3BPacM0fo-oSc3BdaISr6I,23769
22
22
  llm_utils/vector_cache/types.py,sha256=ru8qmUZ8_lNd3_oYpjCMtpXTsqmwsSBe56Z4hTWm3xI,435
23
23
  llm_utils/vector_cache/utils.py,sha256=dwbbXlRrARrpmS4YqSlYQqrTURg0UWe8XvaAWcX05MM,1458
24
24
  speedy_utils/__init__.py,sha256=nJpUb5Oa3STDbqPSiWXoI-IvKntyRYzYxkYW4GM2i_Q,5740
@@ -39,7 +39,7 @@ speedy_utils/multi_worker/thread.py,sha256=f02VjJV8nudg0eA_AcfPEX7tHY4-czesuzthK
39
39
  speedy_utils/scripts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
40
40
  speedy_utils/scripts/mpython.py,sha256=IvywP7Y0_V6tWfMP-4MjPvN5_KfxWF21xaLJsCIayCk,3821
41
41
  speedy_utils/scripts/openapi_client_codegen.py,sha256=f2125S_q0PILgH5dyzoKRz7pIvNEjCkzpi4Q4pPFRZE,9683
42
- speedy_utils-1.1.15.dist-info/METADATA,sha256=GOftJglyMxP320bZFW8XX-UurB8vr_01jDNGV7ecKh8,7483
43
- speedy_utils-1.1.15.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
44
- speedy_utils-1.1.15.dist-info/entry_points.txt,sha256=T1t85jwx8fK6m5msdkBGIXH5R5Kd0zSL0S6erXERPzg,237
45
- speedy_utils-1.1.15.dist-info/RECORD,,
42
+ speedy_utils-1.1.17.dist-info/METADATA,sha256=3o7AJX8v5MS-d9j-FgQWZHKikyVPmlzkm4p1-gj2ISM,7483
43
+ speedy_utils-1.1.17.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
44
+ speedy_utils-1.1.17.dist-info/entry_points.txt,sha256=T1t85jwx8fK6m5msdkBGIXH5R5Kd0zSL0S6erXERPzg,237
45
+ speedy_utils-1.1.17.dist-info/RECORD,,