speedy-utils 1.1.16__py3-none-any.whl → 1.1.17__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -11,8 +11,6 @@ from venv import logger
11
11
 
12
12
  from openai.types.chat import ChatCompletionMessageParam
13
13
  from pydantic import BaseModel
14
- from pytest import Cache
15
- from speedy_utils import jdumps
16
14
  from speedy_utils.all import dump_json_or_pickle, identify
17
15
 
18
16
  from llm_utils.chat_format.display import get_conversation_one_turn
@@ -276,6 +276,8 @@ class VectorCache:
276
276
 
277
277
  def _get_embeddings(self, texts: list[str]) -> list[list[float]]:
278
278
  """Get embeddings using the configured backend."""
279
+ assert isinstance(texts, list), "texts must be a list"
280
+ assert all(isinstance(t, str) for t in texts), "all elements in texts must be strings"
279
281
  if self.backend == "openai":
280
282
  return self._get_openai_embeddings(texts)
281
283
  elif self.backend == "vllm":
@@ -287,6 +289,8 @@ class VectorCache:
287
289
 
288
290
  def _get_openai_embeddings(self, texts: list[str]) -> list[list[float]]:
289
291
  """Get embeddings using OpenAI API."""
292
+ assert isinstance(texts, list), "texts must be a list"
293
+ assert all(isinstance(t, str) for t in texts), "all elements in texts must be strings"
290
294
  # Assert valid model_name for OpenAI backend
291
295
  model_name = self.config["model_name"]
292
296
  assert model_name is not None and model_name.strip(), f"Invalid model_name for OpenAI backend: {model_name}. Model name must be provided and non-empty."
@@ -303,6 +307,8 @@ class VectorCache:
303
307
 
304
308
  def _get_vllm_embeddings(self, texts: list[str]) -> list[list[float]]:
305
309
  """Get embeddings using vLLM."""
310
+ assert isinstance(texts, list), "texts must be a list"
311
+ assert all(isinstance(t, str) for t in texts), "all elements in texts must be strings"
306
312
  if self._model is None:
307
313
  self._load_model()
308
314
 
@@ -312,6 +318,8 @@ class VectorCache:
312
318
 
313
319
  def _get_transformers_embeddings(self, texts: list[str]) -> list[list[float]]:
314
320
  """Get embeddings using transformers directly."""
321
+ assert isinstance(texts, list), "texts must be a list"
322
+ assert all(isinstance(t, str) for t in texts), "all elements in texts must be strings"
315
323
  if self._model is None:
316
324
  self._load_model()
317
325
 
@@ -379,6 +387,8 @@ class VectorCache:
379
387
  handle very large input lists. A tqdm progress bar is shown while
380
388
  computing missing embeddings.
381
389
  """
390
+ assert isinstance(texts, list), "texts must be a list"
391
+ assert all(isinstance(t, str) for t in texts), "all elements in texts must be strings"
382
392
  if not texts:
383
393
  return np.empty((0, 0), dtype=np.float32)
384
394
  t = time()
@@ -435,6 +445,8 @@ class VectorCache:
435
445
  return np.vstack([hit_map[h] for h in hashes])
436
446
 
437
447
  def __call__(self, texts: list[str], cache: bool = True) -> np.ndarray:
448
+ assert isinstance(texts, list), "texts must be a list"
449
+ assert all(isinstance(t, str) for t in texts), "all elements in texts must be strings"
438
450
  return self.embeds(texts, cache)
439
451
 
440
452
  def _bulk_insert(self, data: list[tuple[str, str, bytes]]) -> None:
@@ -453,6 +465,8 @@ class VectorCache:
453
465
  Precompute embeddings for a large list of texts efficiently.
454
466
  This is optimized for bulk operations when you know all texts upfront.
455
467
  """
468
+ assert isinstance(texts, list), "texts must be a list"
469
+ assert all(isinstance(t, str) for t in texts), "all elements in texts must be strings"
456
470
  if not texts:
457
471
  return
458
472
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: speedy-utils
3
- Version: 1.1.16
3
+ Version: 1.1.17
4
4
  Summary: Fast and easy-to-use package for data science
5
5
  Author: AnhVTH
6
6
  Author-email: anhvth.226@gmail.com
@@ -7,7 +7,7 @@ llm_utils/group_messages.py,sha256=Oe2tlhg-zRodG1-hodYebddrR77j9UdE05LzJw0EvYI,3
7
7
  llm_utils/lm/__init__.py,sha256=rX36_MsnekM5GHwWS56XELbm4W5x2TDwnPERDTfo0eU,194
8
8
  llm_utils/lm/async_lm/__init__.py,sha256=PUBbCuf5u6-0GBUu-2PI6YAguzsyXj-LPkU6vccqT6E,121
9
9
  llm_utils/lm/async_lm/_utils.py,sha256=P1-pUDf_0pDmo8WTIi43t5ARlyGA1RIJfpAhz-gfA5g,6105
10
- llm_utils/lm/async_lm/async_llm_task.py,sha256=iXSTbf-KekXncVVnic-v4dTq5HBDjbyLwhgo0Y-wp7Q,19167
10
+ llm_utils/lm/async_lm/async_llm_task.py,sha256=IT63_qtKWs1Svk6tic12ALxHA9du2WnAjX5xXuYH9jA,19110
11
11
  llm_utils/lm/async_lm/async_lm.py,sha256=slGOUXFeWAy3Ak7Xj7Z4JNqCgAUBo21Hjg3RD75Ul2Q,13396
12
12
  llm_utils/lm/async_lm/async_lm_base.py,sha256=Qh9Bx_S-FboO7huUi6TqP3KiTVHDH0C-Tfbd_UJC7Cc,8122
13
13
  llm_utils/lm/async_lm/lm_specific.py,sha256=KmqdCm3SJ5MqN-dRJd6S5tq5-ve1X2eNWf2CMFtc_3s,3926
@@ -18,7 +18,7 @@ llm_utils/scripts/vllm_load_balancer.py,sha256=TT5Ypq7gUcl52gRFp--ORFFjzhfGlcaX2
18
18
  llm_utils/scripts/vllm_serve.py,sha256=gJ0-y4kybMfSt8qzye1pJqGMY3x9JLRi6Tu7RjJMnss,14771
19
19
  llm_utils/vector_cache/__init__.py,sha256=i1KQuC4OhPewYpFl9X6HlWFBuASCTx2qgGizhpZhmn0,862
20
20
  llm_utils/vector_cache/cli.py,sha256=DMXTj8nZ2_LRjprbYPb4uzq04qZtOfBbmblmaqDcCuM,6251
21
- llm_utils/vector_cache/core.py,sha256=rsfZbaUk8ZbAKHcStbmxeZbk8LfTvO_prmqof-WVvC0,22663
21
+ llm_utils/vector_cache/core.py,sha256=O1C3azjPT7wyp8NOGPSSd3BPacM0fo-oSc3BdaISr6I,23769
22
22
  llm_utils/vector_cache/types.py,sha256=ru8qmUZ8_lNd3_oYpjCMtpXTsqmwsSBe56Z4hTWm3xI,435
23
23
  llm_utils/vector_cache/utils.py,sha256=dwbbXlRrARrpmS4YqSlYQqrTURg0UWe8XvaAWcX05MM,1458
24
24
  speedy_utils/__init__.py,sha256=nJpUb5Oa3STDbqPSiWXoI-IvKntyRYzYxkYW4GM2i_Q,5740
@@ -39,7 +39,7 @@ speedy_utils/multi_worker/thread.py,sha256=f02VjJV8nudg0eA_AcfPEX7tHY4-czesuzthK
39
39
  speedy_utils/scripts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
40
40
  speedy_utils/scripts/mpython.py,sha256=IvywP7Y0_V6tWfMP-4MjPvN5_KfxWF21xaLJsCIayCk,3821
41
41
  speedy_utils/scripts/openapi_client_codegen.py,sha256=f2125S_q0PILgH5dyzoKRz7pIvNEjCkzpi4Q4pPFRZE,9683
42
- speedy_utils-1.1.16.dist-info/METADATA,sha256=euFPmJ3wunhuo_2aUpUGxfV-KlAjF8wFaEzHJqkQ6dM,7483
43
- speedy_utils-1.1.16.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
44
- speedy_utils-1.1.16.dist-info/entry_points.txt,sha256=T1t85jwx8fK6m5msdkBGIXH5R5Kd0zSL0S6erXERPzg,237
45
- speedy_utils-1.1.16.dist-info/RECORD,,
42
+ speedy_utils-1.1.17.dist-info/METADATA,sha256=3o7AJX8v5MS-d9j-FgQWZHKikyVPmlzkm4p1-gj2ISM,7483
43
+ speedy_utils-1.1.17.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
44
+ speedy_utils-1.1.17.dist-info/entry_points.txt,sha256=T1t85jwx8fK6m5msdkBGIXH5R5Kd0zSL0S6erXERPzg,237
45
+ speedy_utils-1.1.17.dist-info/RECORD,,