speedy-utils 1.1.13__tar.gz → 1.1.15__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {speedy_utils-1.1.13 → speedy_utils-1.1.15}/PKG-INFO +2 -1
- {speedy_utils-1.1.13 → speedy_utils-1.1.15}/pyproject.toml +2 -1
- {speedy_utils-1.1.13 → speedy_utils-1.1.15}/src/llm_utils/vector_cache/core.py +22 -3
- {speedy_utils-1.1.13 → speedy_utils-1.1.15}/src/speedy_utils/__init__.py +1 -1
- {speedy_utils-1.1.13 → speedy_utils-1.1.15}/src/speedy_utils/common/utils_io.py +1 -1
- {speedy_utils-1.1.13 → speedy_utils-1.1.15}/src/speedy_utils/common/utils_print.py +5 -5
- {speedy_utils-1.1.13 → speedy_utils-1.1.15}/README.md +0 -0
- {speedy_utils-1.1.13 → speedy_utils-1.1.15}/src/llm_utils/__init__.py +0 -0
- {speedy_utils-1.1.13 → speedy_utils-1.1.15}/src/llm_utils/chat_format/__init__.py +0 -0
- {speedy_utils-1.1.13 → speedy_utils-1.1.15}/src/llm_utils/chat_format/display.py +0 -0
- {speedy_utils-1.1.13 → speedy_utils-1.1.15}/src/llm_utils/chat_format/transform.py +0 -0
- {speedy_utils-1.1.13 → speedy_utils-1.1.15}/src/llm_utils/chat_format/utils.py +0 -0
- {speedy_utils-1.1.13 → speedy_utils-1.1.15}/src/llm_utils/group_messages.py +0 -0
- {speedy_utils-1.1.13 → speedy_utils-1.1.15}/src/llm_utils/lm/__init__.py +0 -0
- {speedy_utils-1.1.13 → speedy_utils-1.1.15}/src/llm_utils/lm/async_lm/__init__.py +0 -0
- {speedy_utils-1.1.13 → speedy_utils-1.1.15}/src/llm_utils/lm/async_lm/_utils.py +0 -0
- {speedy_utils-1.1.13 → speedy_utils-1.1.15}/src/llm_utils/lm/async_lm/async_llm_task.py +0 -0
- {speedy_utils-1.1.13 → speedy_utils-1.1.15}/src/llm_utils/lm/async_lm/async_lm.py +0 -0
- {speedy_utils-1.1.13 → speedy_utils-1.1.15}/src/llm_utils/lm/async_lm/async_lm_base.py +0 -0
- {speedy_utils-1.1.13 → speedy_utils-1.1.15}/src/llm_utils/lm/async_lm/lm_specific.py +0 -0
- {speedy_utils-1.1.13 → speedy_utils-1.1.15}/src/llm_utils/lm/openai_memoize.py +0 -0
- {speedy_utils-1.1.13 → speedy_utils-1.1.15}/src/llm_utils/lm/utils.py +0 -0
- {speedy_utils-1.1.13 → speedy_utils-1.1.15}/src/llm_utils/scripts/README.md +0 -0
- {speedy_utils-1.1.13 → speedy_utils-1.1.15}/src/llm_utils/scripts/vllm_load_balancer.py +0 -0
- {speedy_utils-1.1.13 → speedy_utils-1.1.15}/src/llm_utils/scripts/vllm_serve.py +0 -0
- {speedy_utils-1.1.13 → speedy_utils-1.1.15}/src/llm_utils/vector_cache/__init__.py +0 -0
- {speedy_utils-1.1.13 → speedy_utils-1.1.15}/src/llm_utils/vector_cache/cli.py +0 -0
- {speedy_utils-1.1.13 → speedy_utils-1.1.15}/src/llm_utils/vector_cache/types.py +0 -0
- {speedy_utils-1.1.13 → speedy_utils-1.1.15}/src/llm_utils/vector_cache/utils.py +0 -0
- {speedy_utils-1.1.13 → speedy_utils-1.1.15}/src/speedy_utils/all.py +0 -0
- {speedy_utils-1.1.13 → speedy_utils-1.1.15}/src/speedy_utils/common/__init__.py +0 -0
- {speedy_utils-1.1.13 → speedy_utils-1.1.15}/src/speedy_utils/common/clock.py +0 -0
- {speedy_utils-1.1.13 → speedy_utils-1.1.15}/src/speedy_utils/common/function_decorator.py +0 -0
- {speedy_utils-1.1.13 → speedy_utils-1.1.15}/src/speedy_utils/common/logger.py +0 -0
- {speedy_utils-1.1.13 → speedy_utils-1.1.15}/src/speedy_utils/common/notebook_utils.py +0 -0
- {speedy_utils-1.1.13 → speedy_utils-1.1.15}/src/speedy_utils/common/report_manager.py +0 -0
- {speedy_utils-1.1.13 → speedy_utils-1.1.15}/src/speedy_utils/common/utils_cache.py +0 -0
- {speedy_utils-1.1.13 → speedy_utils-1.1.15}/src/speedy_utils/common/utils_misc.py +0 -0
- {speedy_utils-1.1.13 → speedy_utils-1.1.15}/src/speedy_utils/multi_worker/__init__.py +0 -0
- {speedy_utils-1.1.13 → speedy_utils-1.1.15}/src/speedy_utils/multi_worker/process.py +0 -0
- {speedy_utils-1.1.13 → speedy_utils-1.1.15}/src/speedy_utils/multi_worker/thread.py +0 -0
- {speedy_utils-1.1.13 → speedy_utils-1.1.15}/src/speedy_utils/scripts/__init__.py +0 -0
- {speedy_utils-1.1.13 → speedy_utils-1.1.15}/src/speedy_utils/scripts/mpython.py +0 -0
- {speedy_utils-1.1.13 → speedy_utils-1.1.15}/src/speedy_utils/scripts/openapi_client_codegen.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: speedy-utils
|
|
3
|
-
Version: 1.1.
|
|
3
|
+
Version: 1.1.15
|
|
4
4
|
Summary: Fast and easy-to-use package for data science
|
|
5
5
|
Author: AnhVTH
|
|
6
6
|
Author-email: anhvth.226@gmail.com
|
|
@@ -25,6 +25,7 @@ Requires-Dist: jupyterlab
|
|
|
25
25
|
Requires-Dist: loguru
|
|
26
26
|
Requires-Dist: matplotlib
|
|
27
27
|
Requires-Dist: numpy
|
|
28
|
+
Requires-Dist: openai (>=1.106.0,<2.0.0)
|
|
28
29
|
Requires-Dist: packaging (>=23.2,<25)
|
|
29
30
|
Requires-Dist: pandas
|
|
30
31
|
Requires-Dist: pydantic
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[tool.poetry]
|
|
2
2
|
name = "speedy-utils"
|
|
3
|
-
version = "1.1.
|
|
3
|
+
version = "1.1.15"
|
|
4
4
|
description = "Fast and easy-to-use package for data science"
|
|
5
5
|
authors = ["AnhVTH <anhvth.226@gmail.com>"]
|
|
6
6
|
readme = "README.md"
|
|
@@ -58,6 +58,7 @@ json-repair = ">=0.25.0,<0.31.0"
|
|
|
58
58
|
fastprogress = "*"
|
|
59
59
|
freezegun = "^1.5.1"
|
|
60
60
|
packaging = ">=23.2,<25"
|
|
61
|
+
openai = "^1.106.0"
|
|
61
62
|
|
|
62
63
|
[tool.poetry.scripts]
|
|
63
64
|
mpython = "speedy_utils.scripts.mpython:main"
|
|
@@ -4,6 +4,7 @@ import hashlib
|
|
|
4
4
|
import os
|
|
5
5
|
import sqlite3
|
|
6
6
|
from pathlib import Path
|
|
7
|
+
from time import time
|
|
7
8
|
from typing import Any, Dict, Literal, Optional, cast
|
|
8
9
|
|
|
9
10
|
import numpy as np
|
|
@@ -78,7 +79,7 @@ class VectorCache:
|
|
|
78
79
|
self.config = {
|
|
79
80
|
# OpenAI
|
|
80
81
|
"api_key": api_key or os.getenv("OPENAI_API_KEY"),
|
|
81
|
-
"model_name": model_name,
|
|
82
|
+
"model_name": self._try_infer_model_name(model_name),
|
|
82
83
|
# vLLM
|
|
83
84
|
"vllm_gpu_memory_utilization": vllm_gpu_memory_utilization,
|
|
84
85
|
"vllm_tensor_parallel_size": vllm_tensor_parallel_size,
|
|
@@ -164,7 +165,22 @@ class VectorCache:
|
|
|
164
165
|
|
|
165
166
|
# Default to vllm for local models
|
|
166
167
|
return "vllm"
|
|
167
|
-
|
|
168
|
+
def _try_infer_model_name(self, model_name: Optional[str]) -> Optional[str]:
|
|
169
|
+
"""Infer model name for OpenAI backend if not explicitly provided."""
|
|
170
|
+
# if self.backend != "openai":
|
|
171
|
+
# return model_name
|
|
172
|
+
if model_name:
|
|
173
|
+
return model_name
|
|
174
|
+
if 'https://' in self.url_or_model:
|
|
175
|
+
model_name = "text-embedding-3-small"
|
|
176
|
+
if 'http://localhost' in self.url_or_model:
|
|
177
|
+
from openai import OpenAI
|
|
178
|
+
client = OpenAI(base_url=self.url_or_model, api_key='abc')
|
|
179
|
+
model_name = client.models.list().data[0].id
|
|
180
|
+
|
|
181
|
+
# Default model name
|
|
182
|
+
print('Infer model name:', model_name)
|
|
183
|
+
return model_name
|
|
168
184
|
def _optimize_connection(self) -> None:
|
|
169
185
|
"""Optimize SQLite connection for bulk operations."""
|
|
170
186
|
# Performance optimizations for bulk operations
|
|
@@ -366,7 +382,7 @@ class VectorCache:
|
|
|
366
382
|
"""
|
|
367
383
|
if not texts:
|
|
368
384
|
return np.empty((0, 0), dtype=np.float32)
|
|
369
|
-
|
|
385
|
+
t = time()
|
|
370
386
|
hashes = [self._hash_text(t) for t in texts]
|
|
371
387
|
|
|
372
388
|
# Helper to yield chunks
|
|
@@ -414,6 +430,9 @@ class VectorCache:
|
|
|
414
430
|
self._bulk_insert(bulk_insert_data)
|
|
415
431
|
|
|
416
432
|
# Return embeddings in the original order
|
|
433
|
+
elapsed = time() - t
|
|
434
|
+
if self.verbose:
|
|
435
|
+
print(f"Retrieved {len(texts)} embeddings in {elapsed:.2f} seconds")
|
|
417
436
|
return np.vstack([hit_map[h] for h in hashes])
|
|
418
437
|
|
|
419
438
|
def __call__(self, texts: list[str], cache: bool = True) -> np.ndarray:
|
|
@@ -18,7 +18,7 @@
|
|
|
18
18
|
# • memoize(func) -> Callable - Function result caching decorator
|
|
19
19
|
# • identify(obj: Any) -> str - Generate unique object identifier
|
|
20
20
|
# • identify_uuid(obj: Any) -> str - Generate UUID-based object identifier
|
|
21
|
-
# • load_by_ext(fname: str
|
|
21
|
+
# • load_by_ext(fname: Union[str, list[str]]) -> Any - Auto-detect file format loader
|
|
22
22
|
# • dump_json_or_pickle(obj: Any, fname: str) -> None - Smart file serializer
|
|
23
23
|
# • load_json_or_pickle(fname: str) -> Any - Smart file deserializer
|
|
24
24
|
# • multi_thread(func, items, **kwargs) -> list - Parallel thread execution
|
|
@@ -92,7 +92,7 @@ def load_jsonl(path):
|
|
|
92
92
|
return [json.loads(line) for line in lines]
|
|
93
93
|
|
|
94
94
|
|
|
95
|
-
def load_by_ext(fname: str
|
|
95
|
+
def load_by_ext(fname: Union[str, list[str]], do_memoize: bool = False) -> Any:
|
|
96
96
|
"""
|
|
97
97
|
Load data based on file extension.
|
|
98
98
|
"""
|
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
import copy
|
|
4
4
|
import pprint
|
|
5
5
|
import textwrap
|
|
6
|
-
from typing import Any
|
|
6
|
+
from typing import Any, Union
|
|
7
7
|
|
|
8
8
|
from tabulate import tabulate
|
|
9
9
|
|
|
@@ -24,17 +24,17 @@ def flatten_dict(d, parent_key="", sep="."):
|
|
|
24
24
|
|
|
25
25
|
def fprint(
|
|
26
26
|
input_data: Any,
|
|
27
|
-
key_ignore: list[str]
|
|
28
|
-
key_keep: list[str]
|
|
27
|
+
key_ignore: Union[list[str], None] = None,
|
|
28
|
+
key_keep: Union[list[str], None] = None,
|
|
29
29
|
max_width: int = 100,
|
|
30
30
|
indent: int = 2,
|
|
31
|
-
depth: int
|
|
31
|
+
depth: Union[int, None] = None,
|
|
32
32
|
table_format: str = "grid",
|
|
33
33
|
str_wrap_width: int = 80,
|
|
34
34
|
grep=None,
|
|
35
35
|
is_notebook=None,
|
|
36
36
|
f=print,
|
|
37
|
-
) -> None
|
|
37
|
+
) -> Union[None, str]:
|
|
38
38
|
"""
|
|
39
39
|
Pretty print structured data.
|
|
40
40
|
"""
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{speedy_utils-1.1.13 → speedy_utils-1.1.15}/src/speedy_utils/scripts/openapi_client_codegen.py
RENAMED
|
File without changes
|