speedy-utils 1.1.17__py3-none-any.whl → 1.1.19__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llm_utils/__init__.py +9 -1
- llm_utils/chat_format/display.py +109 -14
- llm_utils/lm/__init__.py +12 -11
- llm_utils/lm/async_lm/async_llm_task.py +1 -10
- llm_utils/lm/async_lm/async_lm.py +13 -4
- llm_utils/lm/async_lm/async_lm_base.py +24 -14
- llm_utils/lm/base_prompt_builder.py +288 -0
- llm_utils/lm/llm_task.py +693 -0
- llm_utils/lm/lm.py +207 -0
- llm_utils/lm/lm_base.py +285 -0
- llm_utils/lm/openai_memoize.py +2 -2
- llm_utils/vector_cache/core.py +285 -89
- speedy_utils/__init__.py +2 -1
- speedy_utils/common/patcher.py +68 -0
- speedy_utils/common/utils_cache.py +6 -6
- speedy_utils/common/utils_io.py +238 -8
- speedy_utils/multi_worker/process.py +180 -192
- speedy_utils/multi_worker/thread.py +94 -2
- {speedy_utils-1.1.17.dist-info → speedy_utils-1.1.19.dist-info}/METADATA +36 -14
- {speedy_utils-1.1.17.dist-info → speedy_utils-1.1.19.dist-info}/RECORD +24 -19
- {speedy_utils-1.1.17.dist-info → speedy_utils-1.1.19.dist-info}/WHEEL +1 -1
- speedy_utils-1.1.19.dist-info/entry_points.txt +5 -0
- speedy_utils-1.1.17.dist-info/entry_points.txt +0 -6
|
@@ -1,203 +1,191 @@
|
|
|
1
|
-
|
|
2
|
-
import os
|
|
3
|
-
import
|
|
4
|
-
from
|
|
5
|
-
from
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
T = TypeVar("T")
|
|
10
|
-
|
|
11
|
-
if hasattr(multiprocessing, "set_start_method"):
|
|
12
|
-
try:
|
|
13
|
-
multiprocessing.set_start_method("spawn", force=True)
|
|
14
|
-
except RuntimeError:
|
|
15
|
-
pass
|
|
16
|
-
|
|
1
|
+
# ray_multi_process.py
|
|
2
|
+
import time, os, pickle, uuid, datetime, multiprocessing
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import Any, Callable
|
|
5
|
+
from tqdm import tqdm
|
|
6
|
+
import psutil
|
|
7
|
+
import threading
|
|
8
|
+
ray: Any
|
|
17
9
|
try:
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
def
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
10
|
+
import ray as ray # type: ignore
|
|
11
|
+
_HAS_RAY = True
|
|
12
|
+
except Exception: # pragma: no cover
|
|
13
|
+
ray = None # type: ignore
|
|
14
|
+
_HAS_RAY = False
|
|
15
|
+
from fastcore.parallel import parallel
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
# ─── cache helpers ──────────────────────────────────────────
|
|
19
|
+
|
|
20
|
+
def _build_cache_dir(func: Callable, items: list[Any]) -> Path:
|
|
21
|
+
"""Build cache dir with function name + timestamp."""
|
|
22
|
+
func_name = getattr(func, "__name__", "func")
|
|
23
|
+
now = datetime.datetime.now()
|
|
24
|
+
stamp = now.strftime("%m%d_%Hh%Mm%Ss")
|
|
25
|
+
run_id = f"{func_name}_{stamp}_{uuid.uuid4().hex[:6]}"
|
|
26
|
+
path = Path(".cache") / run_id
|
|
27
|
+
path.mkdir(parents=True, exist_ok=True)
|
|
28
|
+
return path
|
|
29
|
+
|
|
30
|
+
def wrap_dump(func: Callable, cache_dir: Path | None):
|
|
31
|
+
"""Wrap a function so results are dumped to .pkl when cache_dir is set."""
|
|
32
|
+
if cache_dir is None:
|
|
33
|
+
return func
|
|
34
|
+
|
|
35
|
+
def wrapped(x, *args, **kwargs):
|
|
36
|
+
res = func(x, *args, **kwargs)
|
|
37
|
+
p = cache_dir / f"{uuid.uuid4().hex}.pkl"
|
|
38
|
+
with open(p, "wb") as fh:
|
|
39
|
+
pickle.dump(res, fh)
|
|
40
|
+
return str(p)
|
|
41
|
+
return wrapped
|
|
42
|
+
|
|
43
|
+
# ─── ray management ─────────────────────────────────────────
|
|
44
|
+
|
|
45
|
+
RAY_WORKER = None
|
|
46
|
+
|
|
47
|
+
def ensure_ray(workers: int, pbar: tqdm | None = None):
|
|
48
|
+
"""Initialize or reinitialize Ray with a given worker count, log to bar postfix."""
|
|
49
|
+
global RAY_WORKER
|
|
50
|
+
if not ray.is_initialized() or RAY_WORKER != workers:
|
|
51
|
+
if ray.is_initialized() and pbar:
|
|
52
|
+
pbar.set_postfix_str(f"Restarting Ray {workers} workers")
|
|
53
|
+
ray.shutdown()
|
|
54
|
+
t0 = time.time()
|
|
55
|
+
ray.init(num_cpus=workers, ignore_reinit_error=True)
|
|
56
|
+
took = time.time() - t0
|
|
57
|
+
if pbar:
|
|
58
|
+
pbar.set_postfix_str(f"ray.init {workers} took {took:.2f}s")
|
|
59
|
+
RAY_WORKER = workers
|
|
60
|
+
|
|
61
|
+
# ─── main API ───────────────────────────────────────────────
|
|
62
|
+
from typing import Literal
|
|
47
63
|
|
|
48
|
-
def _worker_process(
|
|
49
|
-
func: Callable, item_batch: Any, fixed_kwargs: dict, batch_size: int
|
|
50
|
-
):
|
|
51
|
-
"""Worker function executed in each process."""
|
|
52
|
-
if batch_size > 1:
|
|
53
|
-
results = []
|
|
54
|
-
for itm in item_batch:
|
|
55
|
-
try:
|
|
56
|
-
results.append(_safe_call(func, itm, fixed_kwargs))
|
|
57
|
-
except Exception:
|
|
58
|
-
results.append(None)
|
|
59
|
-
return results
|
|
60
|
-
return _safe_call(func, item_batch, fixed_kwargs)
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
# ──── public API ──────────────────────────────────────────────────────────
|
|
64
64
|
def multi_process(
|
|
65
65
|
func: Callable[[Any], Any],
|
|
66
|
-
|
|
66
|
+
items: list[Any] | None = None,
|
|
67
67
|
*,
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
progress: bool =
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
68
|
+
inputs: list[Any] | None = None,
|
|
69
|
+
workers: int | None = None,
|
|
70
|
+
lazy_output: bool = False,
|
|
71
|
+
progress: bool = True,
|
|
72
|
+
# backend: str = "ray", # "seq", "ray", or "fastcore"
|
|
73
|
+
backend: Literal["seq", "ray", "mp", "threadpool", "safe"] | None = None,
|
|
74
|
+
# Additional optional knobs (accepted for compatibility)
|
|
75
|
+
batch: int | None = None,
|
|
76
|
+
ordered: bool | None = None,
|
|
77
|
+
process_update_interval: int | None = None,
|
|
78
|
+
stop_on_error: bool | None = None,
|
|
79
|
+
**func_kwargs: Any,
|
|
78
80
|
) -> list[Any]:
|
|
79
81
|
"""
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
*(default: ``workers × 4``)*.
|
|
92
|
-
timeout – overall timeout for the mapping (seconds).
|
|
93
|
-
stop_on_error – raise immediately on first exception (default) or
|
|
94
|
-
substitute failing result with ``None``.
|
|
95
|
-
**fixed_kwargs – static keyword args forwarded to every ``func()`` call.
|
|
82
|
+
Multi-process map with selectable backend.
|
|
83
|
+
|
|
84
|
+
backend:
|
|
85
|
+
- "seq": run sequentially
|
|
86
|
+
- "ray": run in parallel with Ray
|
|
87
|
+
- "mp": run in parallel with multiprocessing (uses threadpool to avoid fork warnings)
|
|
88
|
+
- "threadpool": run in parallel with thread pool
|
|
89
|
+
- "safe": run in parallel with thread pool (explicitly safe for tests)
|
|
90
|
+
|
|
91
|
+
If lazy_output=True, every result is saved to .pkl and
|
|
92
|
+
the returned list contains file paths.
|
|
96
93
|
"""
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
94
|
+
|
|
95
|
+
# default backend selection
|
|
96
|
+
if backend is None:
|
|
97
|
+
backend = "ray" if _HAS_RAY else "mp"
|
|
98
|
+
|
|
99
|
+
# unify items
|
|
100
|
+
if items is None and inputs is not None:
|
|
101
|
+
items = list(inputs)
|
|
102
|
+
if items is None:
|
|
103
|
+
raise ValueError("'items' or 'inputs' must be provided")
|
|
102
104
|
|
|
103
105
|
if workers is None:
|
|
104
106
|
workers = os.cpu_count() or 1
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
if
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
futures.add(fut2)
|
|
191
|
-
next_idx += len(arg) if batch > 1 else 1
|
|
192
|
-
except StopIteration:
|
|
193
|
-
pass
|
|
194
|
-
break
|
|
195
|
-
|
|
196
|
-
if bar:
|
|
197
|
-
bar.update(completed - last_bar)
|
|
198
|
-
bar.close()
|
|
199
|
-
|
|
200
|
-
return results
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
__all__ = ["multi_process"]
|
|
107
|
+
|
|
108
|
+
# build cache dir + wrap func
|
|
109
|
+
cache_dir = _build_cache_dir(func, items) if lazy_output else None
|
|
110
|
+
f_wrapped = wrap_dump(func, cache_dir)
|
|
111
|
+
|
|
112
|
+
total = len(items)
|
|
113
|
+
with tqdm(total=total, desc=f"multi_process [{backend}]", disable=not progress) as pbar:
|
|
114
|
+
|
|
115
|
+
# ---- sequential backend ----
|
|
116
|
+
if backend == "seq":
|
|
117
|
+
pbar.set_postfix_str("backend=seq")
|
|
118
|
+
results = []
|
|
119
|
+
for x in items:
|
|
120
|
+
results.append(f_wrapped(x, **func_kwargs))
|
|
121
|
+
pbar.update(1)
|
|
122
|
+
return results
|
|
123
|
+
|
|
124
|
+
# ---- ray backend ----
|
|
125
|
+
if backend == "ray":
|
|
126
|
+
if not _HAS_RAY:
|
|
127
|
+
msg = (
|
|
128
|
+
"Ray backend requested but 'ray' is not installed. "
|
|
129
|
+
"Install extra: pip install 'speedy-utils[ray]' or "
|
|
130
|
+
"poetry install -E ray."
|
|
131
|
+
)
|
|
132
|
+
raise RuntimeError(msg)
|
|
133
|
+
pbar.set_postfix_str("backend=ray")
|
|
134
|
+
ensure_ray(workers, pbar)
|
|
135
|
+
|
|
136
|
+
@ray.remote
|
|
137
|
+
def _task(x):
|
|
138
|
+
return f_wrapped(x, **func_kwargs)
|
|
139
|
+
|
|
140
|
+
refs = [_task.remote(x) for x in items]
|
|
141
|
+
|
|
142
|
+
results = []
|
|
143
|
+
for r in refs:
|
|
144
|
+
results.append(ray.get(r))
|
|
145
|
+
pbar.update(1)
|
|
146
|
+
return results
|
|
147
|
+
|
|
148
|
+
# ---- fastcore backend ----
|
|
149
|
+
if backend == "mp":
|
|
150
|
+
# Use threadpool instead of multiprocessing to avoid fork warnings
|
|
151
|
+
# in multi-threaded environments like pytest
|
|
152
|
+
results = parallel(f_wrapped, items, n_workers=workers, progress=progress, threadpool=True)
|
|
153
|
+
return list(results)
|
|
154
|
+
if backend == "threadpool":
|
|
155
|
+
results = parallel(f_wrapped, items, n_workers=workers, progress=progress, threadpool=True)
|
|
156
|
+
return list(results)
|
|
157
|
+
if backend == "safe":
|
|
158
|
+
# Completely safe backend for tests - no multiprocessing, no external progress bars
|
|
159
|
+
import concurrent.futures
|
|
160
|
+
with concurrent.futures.ThreadPoolExecutor(max_workers=workers) as executor:
|
|
161
|
+
results = list(executor.map(f_wrapped, items))
|
|
162
|
+
raise ValueError(f"Unsupported backend: {backend!r}")
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
def cleanup_phantom_workers():
|
|
167
|
+
"""
|
|
168
|
+
Kill all child processes (phantom workers) without killing the Jupyter kernel itself.
|
|
169
|
+
Also lists non-daemon threads that remain.
|
|
170
|
+
"""
|
|
171
|
+
parent = psutil.Process(os.getpid())
|
|
172
|
+
|
|
173
|
+
# Kill only children, never the current process
|
|
174
|
+
for child in parent.children(recursive=True):
|
|
175
|
+
try:
|
|
176
|
+
print(f"🔪 Killing child process {child.pid} ({child.name()})")
|
|
177
|
+
child.kill()
|
|
178
|
+
except psutil.NoSuchProcess:
|
|
179
|
+
pass
|
|
180
|
+
|
|
181
|
+
# Report stray threads (can't hard-kill them in Python)
|
|
182
|
+
for t in threading.enumerate():
|
|
183
|
+
if t is threading.current_thread():
|
|
184
|
+
continue
|
|
185
|
+
if not t.daemon:
|
|
186
|
+
print(f"⚠️ Thread {t.name} is still running (cannot be force-killed).")
|
|
187
|
+
|
|
188
|
+
print("✅ Cleaned up child processes (kernel untouched).")
|
|
189
|
+
|
|
190
|
+
# Usage: run this anytime after cancelling a cell
|
|
191
|
+
|
|
@@ -77,7 +77,9 @@
|
|
|
77
77
|
# ============================================================================= #
|
|
78
78
|
"""
|
|
79
79
|
|
|
80
|
+
import ctypes
|
|
80
81
|
import os
|
|
82
|
+
import threading
|
|
81
83
|
import time
|
|
82
84
|
import traceback
|
|
83
85
|
from collections.abc import Callable, Iterable
|
|
@@ -98,6 +100,42 @@ DEFAULT_WORKERS = (os.cpu_count() or 4) * 2
|
|
|
98
100
|
T = TypeVar("T")
|
|
99
101
|
R = TypeVar("R")
|
|
100
102
|
|
|
103
|
+
SPEEDY_RUNNING_THREADS: list[threading.Thread] = []
|
|
104
|
+
_SPEEDY_THREADS_LOCK = threading.Lock()
|
|
105
|
+
|
|
106
|
+
_PY_SET_ASYNC_EXC = ctypes.pythonapi.PyThreadState_SetAsyncExc
|
|
107
|
+
try:
|
|
108
|
+
_PY_SET_ASYNC_EXC.argtypes = (ctypes.c_ulong, ctypes.py_object) # type: ignore[attr-defined]
|
|
109
|
+
_PY_SET_ASYNC_EXC.restype = ctypes.c_int # type: ignore[attr-defined]
|
|
110
|
+
except AttributeError: # pragma: no cover - platform specific
|
|
111
|
+
pass
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def _prune_dead_threads() -> None:
|
|
115
|
+
with _SPEEDY_THREADS_LOCK:
|
|
116
|
+
SPEEDY_RUNNING_THREADS[:] = [t for t in SPEEDY_RUNNING_THREADS if t.is_alive()]
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def _track_threads(threads: Iterable[threading.Thread]) -> None:
|
|
120
|
+
if not threads:
|
|
121
|
+
return
|
|
122
|
+
with _SPEEDY_THREADS_LOCK:
|
|
123
|
+
living = [t for t in SPEEDY_RUNNING_THREADS if t.is_alive()]
|
|
124
|
+
for candidate in threads:
|
|
125
|
+
if not candidate.is_alive():
|
|
126
|
+
continue
|
|
127
|
+
if any(existing is candidate for existing in living):
|
|
128
|
+
continue
|
|
129
|
+
living.append(candidate)
|
|
130
|
+
SPEEDY_RUNNING_THREADS[:] = living
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def _track_executor_threads(pool: ThreadPoolExecutor) -> None:
|
|
134
|
+
thread_set = getattr(pool, "_threads", None)
|
|
135
|
+
if not thread_set:
|
|
136
|
+
return
|
|
137
|
+
_track_threads(tuple(thread_set))
|
|
138
|
+
|
|
101
139
|
|
|
102
140
|
def _group_iter(src: Iterable[T], size: int) -> Iterable[list[T]]:
|
|
103
141
|
"""Yield successive chunks from iterable of specified size."""
|
|
@@ -273,11 +311,13 @@ def multi_thread(
|
|
|
273
311
|
fut.idx = next_logical_idx # type: ignore[attr-defined]
|
|
274
312
|
inflight.add(fut)
|
|
275
313
|
next_logical_idx += len(arg)
|
|
314
|
+
_track_executor_threads(pool)
|
|
276
315
|
else:
|
|
277
316
|
fut = pool.submit(_worker, arg, func, fixed_kwargs)
|
|
278
317
|
fut.idx = next_logical_idx # type: ignore[attr-defined]
|
|
279
318
|
inflight.add(fut)
|
|
280
319
|
next_logical_idx += 1
|
|
320
|
+
_track_executor_threads(pool)
|
|
281
321
|
|
|
282
322
|
try:
|
|
283
323
|
# Process futures as they complete and add new ones to keep the pool busy
|
|
@@ -347,11 +387,13 @@ def multi_thread(
|
|
|
347
387
|
fut2.idx = next_logical_idx # type: ignore[attr-defined]
|
|
348
388
|
inflight.add(fut2)
|
|
349
389
|
next_logical_idx += len(arg)
|
|
390
|
+
_track_executor_threads(pool)
|
|
350
391
|
else:
|
|
351
392
|
fut2 = pool.submit(_worker, arg, func, fixed_kwargs)
|
|
352
393
|
fut2.idx = next_logical_idx # type: ignore[attr-defined]
|
|
353
394
|
inflight.add(fut2)
|
|
354
395
|
next_logical_idx += 1
|
|
396
|
+
_track_executor_threads(pool)
|
|
355
397
|
except StopIteration:
|
|
356
398
|
pass
|
|
357
399
|
|
|
@@ -370,6 +412,7 @@ def multi_thread(
|
|
|
370
412
|
bar.close()
|
|
371
413
|
if store_output_pkl_file:
|
|
372
414
|
dump_json_or_pickle(results, store_output_pkl_file)
|
|
415
|
+
_prune_dead_threads()
|
|
373
416
|
return results
|
|
374
417
|
|
|
375
418
|
|
|
@@ -396,9 +439,58 @@ def multi_thread_standard(
|
|
|
396
439
|
Results in same order as input items.
|
|
397
440
|
"""
|
|
398
441
|
with ThreadPoolExecutor(max_workers=workers) as executor:
|
|
399
|
-
futures = [
|
|
442
|
+
futures = []
|
|
443
|
+
for item in items:
|
|
444
|
+
futures.append(executor.submit(fn, item))
|
|
445
|
+
_track_executor_threads(executor)
|
|
400
446
|
results = [fut.result() for fut in futures]
|
|
447
|
+
_prune_dead_threads()
|
|
401
448
|
return results
|
|
402
449
|
|
|
403
450
|
|
|
404
|
-
|
|
451
|
+
def _async_raise(thread_id: int, exc_type: type[BaseException]) -> bool:
|
|
452
|
+
if thread_id <= 0:
|
|
453
|
+
return False
|
|
454
|
+
if not issubclass(exc_type, BaseException):
|
|
455
|
+
raise TypeError("exc_type must derive from BaseException")
|
|
456
|
+
res = _PY_SET_ASYNC_EXC(ctypes.c_ulong(thread_id), ctypes.py_object(exc_type))
|
|
457
|
+
if res == 0:
|
|
458
|
+
return False
|
|
459
|
+
if res > 1: # pragma: no cover - defensive branch
|
|
460
|
+
_PY_SET_ASYNC_EXC(ctypes.c_ulong(thread_id), None)
|
|
461
|
+
raise SystemError("PyThreadState_SetAsyncExc failed")
|
|
462
|
+
return True
|
|
463
|
+
|
|
464
|
+
|
|
465
|
+
def kill_all_thread(exc_type: type[BaseException] = SystemExit, join_timeout: float = 0.1) -> int:
|
|
466
|
+
"""Forcefully stop tracked worker threads. Returns number of threads signalled."""
|
|
467
|
+
_prune_dead_threads()
|
|
468
|
+
current = threading.current_thread()
|
|
469
|
+
with _SPEEDY_THREADS_LOCK:
|
|
470
|
+
targets = [t for t in SPEEDY_RUNNING_THREADS if t.is_alive()]
|
|
471
|
+
|
|
472
|
+
terminated = 0
|
|
473
|
+
for thread in targets:
|
|
474
|
+
if thread is current:
|
|
475
|
+
continue
|
|
476
|
+
ident = thread.ident
|
|
477
|
+
if ident is None:
|
|
478
|
+
continue
|
|
479
|
+
try:
|
|
480
|
+
if _async_raise(ident, exc_type):
|
|
481
|
+
terminated += 1
|
|
482
|
+
thread.join(timeout=join_timeout)
|
|
483
|
+
else:
|
|
484
|
+
logger.warning("Unable to signal thread %s", thread.name)
|
|
485
|
+
except Exception as exc: # pragma: no cover - defensive
|
|
486
|
+
logger.error("Failed to stop thread %s: %s", thread.name, exc)
|
|
487
|
+
_prune_dead_threads()
|
|
488
|
+
return terminated
|
|
489
|
+
|
|
490
|
+
|
|
491
|
+
__all__ = [
|
|
492
|
+
"SPEEDY_RUNNING_THREADS",
|
|
493
|
+
"multi_thread",
|
|
494
|
+
"multi_thread_standard",
|
|
495
|
+
"kill_all_thread",
|
|
496
|
+
]
|
|
@@ -1,10 +1,14 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: speedy-utils
|
|
3
|
-
Version: 1.1.
|
|
3
|
+
Version: 1.1.19
|
|
4
4
|
Summary: Fast and easy-to-use package for data science
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
5
|
+
Project-URL: Homepage, https://github.com/anhvth/speedy
|
|
6
|
+
Project-URL: Repository, https://github.com/anhvth/speedy
|
|
7
|
+
Author-email: AnhVTH <anhvth.226@gmail.com>
|
|
8
|
+
License: MIT
|
|
9
|
+
Classifier: Development Status :: 4 - Beta
|
|
10
|
+
Classifier: Intended Audience :: Developers
|
|
11
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
8
12
|
Classifier: Programming Language :: Python :: 3
|
|
9
13
|
Classifier: Programming Language :: Python :: 3.8
|
|
10
14
|
Classifier: Programming Language :: Python :: 3.9
|
|
@@ -12,29 +16,35 @@ Classifier: Programming Language :: Python :: 3.10
|
|
|
12
16
|
Classifier: Programming Language :: Python :: 3.11
|
|
13
17
|
Classifier: Programming Language :: Python :: 3.12
|
|
14
18
|
Classifier: Programming Language :: Python :: 3.13
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
20
|
+
Requires-Python: >=3.8
|
|
21
|
+
Requires-Dist: aiohttp>=3.10.11
|
|
15
22
|
Requires-Dist: bump2version
|
|
16
23
|
Requires-Dist: cachetools
|
|
17
24
|
Requires-Dist: debugpy
|
|
18
25
|
Requires-Dist: fastcore
|
|
19
26
|
Requires-Dist: fastprogress
|
|
20
|
-
Requires-Dist: freezegun
|
|
27
|
+
Requires-Dist: freezegun>=1.5.1
|
|
21
28
|
Requires-Dist: ipdb
|
|
22
29
|
Requires-Dist: ipywidgets
|
|
23
|
-
Requires-Dist: json-repair
|
|
30
|
+
Requires-Dist: json-repair<0.31.0,>=0.25.0
|
|
24
31
|
Requires-Dist: jupyterlab
|
|
25
32
|
Requires-Dist: loguru
|
|
26
33
|
Requires-Dist: matplotlib
|
|
27
34
|
Requires-Dist: numpy
|
|
28
|
-
Requires-Dist: openai
|
|
29
|
-
Requires-Dist: packaging
|
|
35
|
+
Requires-Dist: openai>=1.106.0
|
|
36
|
+
Requires-Dist: packaging<25,>=23.2
|
|
30
37
|
Requires-Dist: pandas
|
|
31
38
|
Requires-Dist: pydantic
|
|
39
|
+
Requires-Dist: pytest>=8.3.5
|
|
40
|
+
Requires-Dist: ray>=2.36.1
|
|
32
41
|
Requires-Dist: requests
|
|
33
42
|
Requires-Dist: scikit-learn
|
|
34
43
|
Requires-Dist: tabulate
|
|
35
44
|
Requires-Dist: tqdm
|
|
36
45
|
Requires-Dist: xxhash
|
|
37
|
-
|
|
46
|
+
Provides-Extra: ray
|
|
47
|
+
Requires-Dist: ray>=2.49.1; (python_version >= '3.9') and extra == 'ray'
|
|
38
48
|
Description-Content-Type: text/markdown
|
|
39
49
|
|
|
40
50
|
# Speedy Utils
|
|
@@ -83,6 +93,19 @@ cd speedy-utils
|
|
|
83
93
|
pip install .
|
|
84
94
|
```
|
|
85
95
|
|
|
96
|
+
### Extras
|
|
97
|
+
|
|
98
|
+
Optional dependencies can be installed via extras. For the `ray` backend
|
|
99
|
+
support (requires Python >= 3.9):
|
|
100
|
+
|
|
101
|
+
```bash
|
|
102
|
+
# pip
|
|
103
|
+
pip install 'speedy-utils[ray]'
|
|
104
|
+
|
|
105
|
+
# Poetry (for developing this repo)
|
|
106
|
+
poetry install -E ray
|
|
107
|
+
```
|
|
108
|
+
|
|
86
109
|
## Updating from previous versions
|
|
87
110
|
|
|
88
111
|
To update from previous versions or switch to v1.x, first uninstall any old
|
|
@@ -281,9 +304,8 @@ python speedy_utils/common/dataclass_parser.py
|
|
|
281
304
|
|
|
282
305
|
Example output:
|
|
283
306
|
|
|
284
|
-
| Field
|
|
285
|
-
|
|
286
|
-
| from_peft
|
|
307
|
+
| Field | Value |
|
|
308
|
+
| --------- | ------------------------------------- |
|
|
309
|
+
| from_peft | ./outputs/llm_hn_qw32b/hn_results_r3/ |
|
|
287
310
|
|
|
288
311
|
Please ensure your code adheres to the project's coding standards and includes appropriate tests.
|
|
289
|
-
|