speedy-utils 1.0.3__py3-none-any.whl → 1.0.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llm_utils/__init__.py +29 -0
- llm_utils/chat_format.py +427 -0
- llm_utils/group_messages.py +120 -0
- llm_utils/lm/__init__.py +8 -0
- llm_utils/lm/base_lm.py +304 -0
- llm_utils/lm/utils.py +130 -0
- llm_utils/scripts/vllm_load_balancer.py +353 -0
- llm_utils/scripts/vllm_serve.py +416 -0
- speedy_utils/__init__.py +85 -0
- speedy_utils/all.py +159 -0
- {speedy → speedy_utils}/common/__init__.py +0 -0
- speedy_utils/common/clock.py +215 -0
- speedy_utils/common/function_decorator.py +66 -0
- speedy_utils/common/logger.py +207 -0
- speedy_utils/common/report_manager.py +112 -0
- speedy_utils/common/utils_cache.py +264 -0
- {speedy → speedy_utils}/common/utils_io.py +66 -19
- {speedy → speedy_utils}/common/utils_misc.py +25 -11
- speedy_utils/common/utils_print.py +216 -0
- speedy_utils/multi_worker/__init__.py +0 -0
- speedy_utils/multi_worker/process.py +198 -0
- speedy_utils/multi_worker/thread.py +327 -0
- speedy_utils/scripts/mpython.py +108 -0
- speedy_utils-1.0.5.dist-info/METADATA +279 -0
- speedy_utils-1.0.5.dist-info/RECORD +27 -0
- {speedy_utils-1.0.3.dist-info → speedy_utils-1.0.5.dist-info}/WHEEL +1 -2
- speedy_utils-1.0.5.dist-info/entry_points.txt +3 -0
- speedy/__init__.py +0 -53
- speedy/common/clock.py +0 -68
- speedy/common/utils_cache.py +0 -170
- speedy/common/utils_print.py +0 -138
- speedy/multi_worker.py +0 -121
- speedy_utils-1.0.3.dist-info/METADATA +0 -22
- speedy_utils-1.0.3.dist-info/RECORD +0 -12
- speedy_utils-1.0.3.dist-info/top_level.txt +0 -1
speedy/common/utils_print.py
DELETED
|
@@ -1,138 +0,0 @@
|
|
|
1
|
-
# utils/utils_print.py
|
|
2
|
-
|
|
3
|
-
import copy
|
|
4
|
-
import json
|
|
5
|
-
import pprint
|
|
6
|
-
import textwrap
|
|
7
|
-
from typing import Any, Dict, List, Optional
|
|
8
|
-
|
|
9
|
-
from IPython.display import HTML, display
|
|
10
|
-
from pandas import get_option
|
|
11
|
-
from tabulate import tabulate
|
|
12
|
-
|
|
13
|
-
# from .utils_cache import is_interactive_env # Adjust based on actual implementation
|
|
14
|
-
# from .utils_misc import is_interactive
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
def display_pretty_table_html(data: Dict) -> None:
|
|
18
|
-
"""
|
|
19
|
-
Display a pretty HTML table in Jupyter notebooks.
|
|
20
|
-
"""
|
|
21
|
-
table = "<table>"
|
|
22
|
-
for key, value in data.items():
|
|
23
|
-
table += f"<tr><td>{key}</td><td>{value}</td></tr>"
|
|
24
|
-
table += "</table>"
|
|
25
|
-
display(HTML(table))
|
|
26
|
-
|
|
27
|
-
def fprint(
|
|
28
|
-
input_data: Any,
|
|
29
|
-
key_ignore: Optional[List[str]] = None,
|
|
30
|
-
key_keep: Optional[List[str]] = None,
|
|
31
|
-
max_width: int = 100,
|
|
32
|
-
indent: int = 2,
|
|
33
|
-
depth: Optional[int] = None,
|
|
34
|
-
table_format: str = "grid",
|
|
35
|
-
str_wrap_width: int = 80,
|
|
36
|
-
) -> None:
|
|
37
|
-
"""
|
|
38
|
-
Pretty print structured data.
|
|
39
|
-
"""
|
|
40
|
-
|
|
41
|
-
def is_interactive_env():
|
|
42
|
-
"""Check if the environment is interactive (e.g., Jupyter notebook)."""
|
|
43
|
-
try:
|
|
44
|
-
shell = get_option().__class__.__name__
|
|
45
|
-
return shell == "ZMQInteractiveShell"
|
|
46
|
-
except NameError:
|
|
47
|
-
return False
|
|
48
|
-
|
|
49
|
-
def remove_keys(d: Dict, keys: List[str]) -> Dict:
|
|
50
|
-
"""Remove specified keys from a dictionary."""
|
|
51
|
-
for key in keys:
|
|
52
|
-
parts = key.split(".")
|
|
53
|
-
sub_dict = d
|
|
54
|
-
for part in parts[:-1]:
|
|
55
|
-
sub_dict = sub_dict.get(part, {})
|
|
56
|
-
sub_dict.pop(parts[-1], None)
|
|
57
|
-
return d
|
|
58
|
-
|
|
59
|
-
def keep_keys(d: Dict, keys: List[str]) -> Dict:
|
|
60
|
-
"""Keep only specified keys in a dictionary."""
|
|
61
|
-
result = {}
|
|
62
|
-
for key in keys:
|
|
63
|
-
parts = key.split(".")
|
|
64
|
-
sub_source = d
|
|
65
|
-
sub_result = result
|
|
66
|
-
for part in parts[:-1]:
|
|
67
|
-
if part not in sub_source:
|
|
68
|
-
break
|
|
69
|
-
sub_result = sub_result.setdefault(part, {})
|
|
70
|
-
sub_source = sub_source[part]
|
|
71
|
-
else:
|
|
72
|
-
sub_result[parts[-1]] = copy.deepcopy(sub_source.get(parts[-1]))
|
|
73
|
-
return result
|
|
74
|
-
|
|
75
|
-
if hasattr(input_data, "to_dict"):
|
|
76
|
-
input_data = input_data.to_dict()
|
|
77
|
-
|
|
78
|
-
processed_data = copy.deepcopy(input_data)
|
|
79
|
-
|
|
80
|
-
if isinstance(processed_data, dict):
|
|
81
|
-
if key_keep is not None:
|
|
82
|
-
processed_data = keep_keys(processed_data, key_keep)
|
|
83
|
-
elif key_ignore is not None:
|
|
84
|
-
processed_data = remove_keys(processed_data, key_ignore)
|
|
85
|
-
|
|
86
|
-
if is_interactive_env():
|
|
87
|
-
display_pretty_table_html(processed_data)
|
|
88
|
-
return
|
|
89
|
-
|
|
90
|
-
if isinstance(processed_data, dict):
|
|
91
|
-
table = [[k, v] for k, v in processed_data.items()]
|
|
92
|
-
print(
|
|
93
|
-
tabulate(
|
|
94
|
-
table,
|
|
95
|
-
headers=["Key", "Value"],
|
|
96
|
-
tablefmt=table_format,
|
|
97
|
-
maxcolwidths=[None, max_width],
|
|
98
|
-
)
|
|
99
|
-
)
|
|
100
|
-
elif isinstance(processed_data, str):
|
|
101
|
-
wrapped_text = textwrap.fill(processed_data, width=str_wrap_width)
|
|
102
|
-
print(wrapped_text)
|
|
103
|
-
else:
|
|
104
|
-
printer = pprint.PrettyPrinter(width=max_width, indent=indent, depth=depth)
|
|
105
|
-
printer.pprint(processed_data)
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
def print_table(data: Any) -> None:
|
|
109
|
-
"""
|
|
110
|
-
Print data as a table.
|
|
111
|
-
"""
|
|
112
|
-
|
|
113
|
-
def __get_table(data: Any) -> str:
|
|
114
|
-
if isinstance(data, str):
|
|
115
|
-
try:
|
|
116
|
-
data = json.loads(data)
|
|
117
|
-
except json.JSONDecodeError as exc:
|
|
118
|
-
raise ValueError("String input could not be decoded as JSON") from exc
|
|
119
|
-
|
|
120
|
-
if isinstance(data, list):
|
|
121
|
-
if all(isinstance(item, dict) for item in data):
|
|
122
|
-
headers = list(data[0].keys())
|
|
123
|
-
rows = [list(item.values()) for item in data]
|
|
124
|
-
return tabulate(rows, headers=headers)
|
|
125
|
-
else:
|
|
126
|
-
raise ValueError("List must contain dictionaries")
|
|
127
|
-
|
|
128
|
-
if isinstance(data, dict):
|
|
129
|
-
headers = ["Key", "Value"]
|
|
130
|
-
rows = list(data.items())
|
|
131
|
-
return tabulate(rows, headers=headers)
|
|
132
|
-
|
|
133
|
-
raise TypeError(
|
|
134
|
-
"Input data must be a list of dictionaries, a dictionary, or a JSON string"
|
|
135
|
-
)
|
|
136
|
-
|
|
137
|
-
table = __get_table(data)
|
|
138
|
-
print(table)
|
speedy/multi_worker.py
DELETED
|
@@ -1,121 +0,0 @@
|
|
|
1
|
-
import inspect
|
|
2
|
-
import os
|
|
3
|
-
from concurrent.futures import ThreadPoolExecutor
|
|
4
|
-
from multiprocessing import Pool
|
|
5
|
-
from typing import Any, Callable, List
|
|
6
|
-
import asyncio
|
|
7
|
-
from loguru import logger
|
|
8
|
-
from tqdm import tqdm
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
def multi_thread(
|
|
12
|
-
func: Callable,
|
|
13
|
-
inputs: List[Any],
|
|
14
|
-
workers: int = 4,
|
|
15
|
-
verbose: bool = True,
|
|
16
|
-
desc: str | None = None,
|
|
17
|
-
) -> List[Any]:
|
|
18
|
-
if desc is None:
|
|
19
|
-
fn_name = func.__name__
|
|
20
|
-
try:
|
|
21
|
-
source_file = inspect.getsourcefile(func) or "<string>"
|
|
22
|
-
source_line = inspect.getsourcelines(func)[1]
|
|
23
|
-
file_line = f"{source_file}:{source_line}"
|
|
24
|
-
except (TypeError, OSError):
|
|
25
|
-
file_line = "Unknown location"
|
|
26
|
-
desc = f"{fn_name} at {file_line}"
|
|
27
|
-
|
|
28
|
-
with ThreadPoolExecutor(max_workers=workers) as executor:
|
|
29
|
-
# Use executor.map to apply func to inputs in order
|
|
30
|
-
map_func = executor.map(func, inputs)
|
|
31
|
-
if verbose:
|
|
32
|
-
results = list(tqdm(map_func, total=len(inputs), desc=desc))
|
|
33
|
-
else:
|
|
34
|
-
results = list(map_func)
|
|
35
|
-
return results
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
def _init_pool_processes(func):
|
|
39
|
-
global _func
|
|
40
|
-
_func = func
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
def _pool_process_executor(args):
|
|
44
|
-
# Unpack arguments if necessary
|
|
45
|
-
if isinstance(args, tuple):
|
|
46
|
-
return _func(*args)
|
|
47
|
-
else:
|
|
48
|
-
return _func(args)
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
def multi_process(
|
|
52
|
-
func: Callable,
|
|
53
|
-
inputs: List[Any],
|
|
54
|
-
workers: int = 16,
|
|
55
|
-
verbose: bool = True,
|
|
56
|
-
desc: str = "",
|
|
57
|
-
) -> List[Any]:
|
|
58
|
-
if not desc:
|
|
59
|
-
fn_name = func.__name__
|
|
60
|
-
try:
|
|
61
|
-
source_file = inspect.getsourcefile(func) or "<string>"
|
|
62
|
-
source_line = inspect.getsourcelines(func)[1]
|
|
63
|
-
file_line = f"{source_file}:{source_line}"
|
|
64
|
-
except (TypeError, OSError):
|
|
65
|
-
file_line = "Unknown location"
|
|
66
|
-
desc = f"{fn_name} at {file_line}"
|
|
67
|
-
|
|
68
|
-
if os.environ.get("DEBUG", "0") == "1":
|
|
69
|
-
logger.info("DEBUGGING set num workers to 1")
|
|
70
|
-
workers = 1
|
|
71
|
-
|
|
72
|
-
logger.info("Multi-processing {} | Num samples: {}", desc, len(inputs))
|
|
73
|
-
|
|
74
|
-
results = []
|
|
75
|
-
with Pool(
|
|
76
|
-
processes=workers, initializer=_init_pool_processes, initargs=(func,)
|
|
77
|
-
) as pool:
|
|
78
|
-
try:
|
|
79
|
-
if verbose:
|
|
80
|
-
for result in tqdm(
|
|
81
|
-
pool.imap(_pool_process_executor, inputs),
|
|
82
|
-
total=len(inputs),
|
|
83
|
-
desc=desc,
|
|
84
|
-
):
|
|
85
|
-
results.append(result)
|
|
86
|
-
else:
|
|
87
|
-
results = pool.map(_pool_process_executor, inputs)
|
|
88
|
-
except Exception as e:
|
|
89
|
-
logger.error(f"[multiprocess] Error {e}")
|
|
90
|
-
|
|
91
|
-
return results
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
async def async_multi_thread(f, inputs, desc="", user_tqdm=True):
|
|
95
|
-
"""
|
|
96
|
-
Uasge:
|
|
97
|
-
inputs = list(range(10))
|
|
98
|
-
def function(i):
|
|
99
|
-
time.sleep(1)
|
|
100
|
-
return 1/i
|
|
101
|
-
results = await amulti_thread(function, inputs)
|
|
102
|
-
"""
|
|
103
|
-
|
|
104
|
-
def ensure_output_idx(idx_i):
|
|
105
|
-
idx, i = idx_i
|
|
106
|
-
return idx, f(i)
|
|
107
|
-
|
|
108
|
-
tasks = [asyncio.to_thread(ensure_output_idx, i) for i in enumerate(inputs)]
|
|
109
|
-
if not desc:
|
|
110
|
-
desc = f"{f.__name__}"
|
|
111
|
-
|
|
112
|
-
pbar = tqdm(total=len(inputs), desc=desc, disable=not user_tqdm)
|
|
113
|
-
results = [None] * len(inputs)
|
|
114
|
-
for task in asyncio.as_completed(tasks):
|
|
115
|
-
idx, result = await task
|
|
116
|
-
results[idx] = result
|
|
117
|
-
pbar.update(1)
|
|
118
|
-
return results
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
__all__ = ["multi_thread", "multi_process", "async_multi_thread"]
|
|
@@ -1,22 +0,0 @@
|
|
|
1
|
-
Metadata-Version: 2.1
|
|
2
|
-
Name: speedy-utils
|
|
3
|
-
Version: 1.0.3
|
|
4
|
-
Summary: Fast and easy-to-use package for data science
|
|
5
|
-
Home-page: https://github.com/anhvth/speedy
|
|
6
|
-
Author: AnhVTH
|
|
7
|
-
Author-email: anhvth.226@gmail.com
|
|
8
|
-
Requires-Dist: numpy
|
|
9
|
-
Requires-Dist: requests
|
|
10
|
-
Requires-Dist: xxhash
|
|
11
|
-
Requires-Dist: loguru
|
|
12
|
-
Requires-Dist: fastcore
|
|
13
|
-
Requires-Dist: debugpy
|
|
14
|
-
Requires-Dist: ipywidgets
|
|
15
|
-
Requires-Dist: jupyterlab
|
|
16
|
-
Requires-Dist: ipdb
|
|
17
|
-
Requires-Dist: scikit-learn
|
|
18
|
-
Requires-Dist: matplotlib
|
|
19
|
-
Requires-Dist: pandas
|
|
20
|
-
Requires-Dist: tabulate
|
|
21
|
-
Requires-Dist: pydantic
|
|
22
|
-
|
|
@@ -1,12 +0,0 @@
|
|
|
1
|
-
speedy/__init__.py,sha256=GYw4FDnK1hy8_WpJxJyJEBCYh4IU5joxwG_s7uqIKKg,1147
|
|
2
|
-
speedy/multi_worker.py,sha256=kmk_Km6LkOUVntxmPKSYubMXFJBPoLoZW9NVi4UA9kc,3394
|
|
3
|
-
speedy/common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
4
|
-
speedy/common/clock.py,sha256=q92SvdJLZyCtuq8C6m7GjS6_foI8yyspsKzeR_M1owo,2033
|
|
5
|
-
speedy/common/utils_cache.py,sha256=_qxGyISJBM-r0NDSUCYSEWnEsqprORdw3yXmIkns1D4,5656
|
|
6
|
-
speedy/common/utils_io.py,sha256=X2zFgVzfYM094auApGjYOO43NgnG5P_g7rOObzPYTrQ,3472
|
|
7
|
-
speedy/common/utils_misc.py,sha256=ooMb0xEjC-HrQSQQCTRlEqcrGTJptmA43azSi6BhiD4,1479
|
|
8
|
-
speedy/common/utils_print.py,sha256=jh_ihzWTEiPpFRiLoj6It8Zo_3tPxYi8MU1J3Nw3vwk,4273
|
|
9
|
-
speedy_utils-1.0.3.dist-info/METADATA,sha256=6rSrp8BSLNGKxb7jvIO4LnK90575ycvZqBr8EFW_Tzw,538
|
|
10
|
-
speedy_utils-1.0.3.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
|
|
11
|
-
speedy_utils-1.0.3.dist-info/top_level.txt,sha256=eJxFW_gum7StgovqwA4v-9UndgnnWr4kUqcozY-aBmI,7
|
|
12
|
-
speedy_utils-1.0.3.dist-info/RECORD,,
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
speedy
|