speedy-utils 1.1.18__py3-none-any.whl → 1.1.20__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
speedy_utils/__init__.py CHANGED
@@ -138,7 +138,7 @@ from .common.utils_print import (
138
138
 
139
139
  # Multi-worker processing
140
140
  from .multi_worker.process import multi_process
141
- from .multi_worker.thread import multi_thread
141
+ from .multi_worker.thread import kill_all_thread, multi_thread
142
142
 
143
143
  # Define __all__ explicitly
144
144
  __all__ = [
@@ -224,6 +224,7 @@ __all__ = [
224
224
  # Multi-worker processing
225
225
  "multi_process",
226
226
  "multi_thread",
227
+ "kill_all_thread",
227
228
  # Notebook utilities
228
229
  "change_dir",
229
230
  ]
@@ -258,13 +258,13 @@ def _memory_memoize(
258
258
 
259
259
  with mem_lock:
260
260
  if name in mem_cache:
261
- return mem_cache[name] # type: ignore[return-value]
261
+ return mem_cache[name]
262
262
 
263
263
  result = func(*args, **kwargs)
264
264
 
265
265
  with mem_lock:
266
266
  if name not in mem_cache:
267
- mem_cache[name] = result # type: ignore[index]
267
+ mem_cache[name] = result
268
268
  return result
269
269
 
270
270
  return wrapper
@@ -292,7 +292,7 @@ def _async_memory_memoize(
292
292
 
293
293
  async with alock:
294
294
  if name in mem_cache:
295
- return mem_cache[name] # type: ignore[return-value]
295
+ return mem_cache[name]
296
296
  task = inflight.get(name)
297
297
  if task is None:
298
298
  task = asyncio.create_task(func(*args, **kwargs)) # type: ignore[arg-type]
@@ -305,7 +305,7 @@ def _async_memory_memoize(
305
305
  inflight.pop(name, None)
306
306
 
307
307
  with mem_lock:
308
- mem_cache[name] = result # type: ignore[index]
308
+ mem_cache[name] = result
309
309
  return result
310
310
 
311
311
  return wrapper
@@ -447,7 +447,7 @@ def both_memoize(
447
447
  # Memory first
448
448
  with mem_lock:
449
449
  if mem_key in mem_cache:
450
- return mem_cache[mem_key] # type: ignore[return-value]
450
+ return mem_cache[mem_key]
451
451
 
452
452
  # Disk next
453
453
  if sub_dir == "funcs":
@@ -468,7 +468,7 @@ def both_memoize(
468
468
 
469
469
  if disk_result is not None:
470
470
  with mem_lock:
471
- mem_cache[mem_key] = disk_result # type: ignore[index]
471
+ mem_cache[mem_key] = disk_result
472
472
  return disk_result
473
473
 
474
474
  # Miss: compute, then write both
@@ -477,7 +477,7 @@ def both_memoize(
477
477
  if not osp.exists(cache_path):
478
478
  dump_json_or_pickle(result, cache_path)
479
479
  with mem_lock:
480
- mem_cache[mem_key] = result # type: ignore[index]
480
+ mem_cache[mem_key] = result
481
481
  return result
482
482
 
483
483
  return wrapper
@@ -506,7 +506,7 @@ def _async_both_memoize(
506
506
  # Memory
507
507
  async with alock:
508
508
  if mem_key in mem_cache:
509
- return mem_cache[mem_key] # type: ignore[return-value]
509
+ return mem_cache[mem_key]
510
510
 
511
511
  # Disk
512
512
  if sub_dir == "funcs":
@@ -526,7 +526,7 @@ def _async_both_memoize(
526
526
 
527
527
  if disk_result is not None:
528
528
  with mem_lock:
529
- mem_cache[mem_key] = disk_result # type: ignore[index]
529
+ mem_cache[mem_key] = disk_result
530
530
  return disk_result
531
531
 
532
532
  # Avoid duplicate async work for same key
@@ -550,7 +550,7 @@ def _async_both_memoize(
550
550
  await loop.run_in_executor(None, write_disk_cache)
551
551
 
552
552
  with mem_lock:
553
- mem_cache[mem_key] = result # type: ignore[index]
553
+ mem_cache[mem_key] = result
554
554
  return result
555
555
 
556
556
  return wrapper
@@ -561,9 +561,10 @@ def _async_both_memoize(
561
561
  # --------------------------------------------------------------------------------------
562
562
 
563
563
 
564
+ # Define overloads to preserve exact type information
564
565
  @overload
565
566
  def memoize(
566
- _func: Callable[P, R | Awaitable[R]],
567
+ _func: Callable[P, R],
567
568
  *,
568
569
  keys: Optional[list[str]] = ...,
569
570
  key: Optional[Callable[..., Any]] = ...,
@@ -572,7 +573,23 @@ def memoize(
572
573
  size: int = ...,
573
574
  ignore_self: bool = ...,
574
575
  verbose: bool = ...,
575
- ) -> Callable[P, R | Awaitable[R]]: ...
576
+ ) -> Callable[P, R]: ...
577
+
578
+
579
+ @overload
580
+ def memoize(
581
+ _func: Callable[P, Awaitable[R]],
582
+ *,
583
+ keys: Optional[list[str]] = ...,
584
+ key: Optional[Callable[..., Any]] = ...,
585
+ cache_dir: str = ...,
586
+ cache_type: Literal["memory", "disk", "both"] = ...,
587
+ size: int = ...,
588
+ ignore_self: bool = ...,
589
+ verbose: bool = ...,
590
+ ) -> Callable[P, Awaitable[R]]: ...
591
+
592
+
576
593
  @overload
577
594
  def memoize(
578
595
  _func: None = ...,
@@ -585,8 +602,10 @@ def memoize(
585
602
  ignore_self: bool = ...,
586
603
  verbose: bool = ...,
587
604
  ) -> Callable[[Callable[P, R]], Callable[P, R]]: ...
605
+
606
+
588
607
  @overload
589
- def memoize(
608
+ def memoize( # type: ignore
590
609
  _func: None = ...,
591
610
  *,
592
611
  keys: Optional[list[str]] = ...,
@@ -635,24 +654,24 @@ def memoize(
635
654
 
636
655
  if cache_type == "memory":
637
656
  if is_async:
638
- return _async_memory_memoize(target_func, size, keys, ignore_self, key) # type: ignore[return-value]
639
- return _memory_memoize(target_func, size, keys, ignore_self, key) # type: ignore[return-value]
657
+ return _async_memory_memoize(target_func, size, keys, ignore_self, key)
658
+ return _memory_memoize(target_func, size, keys, ignore_self, key)
640
659
 
641
660
  if cache_type == "disk":
642
661
  if is_async:
643
662
  return _async_disk_memoize(
644
663
  target_func, keys, cache_dir, ignore_self, verbose, key
645
- ) # type: ignore[return-value]
664
+ )
646
665
  return _disk_memoize(
647
666
  target_func, keys, cache_dir, ignore_self, verbose, key
648
- ) # type: ignore[return-value]
667
+ )
649
668
 
650
669
  # cache_type == "both"
651
670
  if is_async:
652
671
  return _async_both_memoize(
653
672
  target_func, keys, cache_dir, ignore_self, size, key
654
- ) # type: ignore[return-value]
655
- return both_memoize(target_func, keys, cache_dir, ignore_self, size, key) # type: ignore[return-value]
673
+ )
674
+ return both_memoize(target_func, keys, cache_dir, ignore_self, size, key)
656
675
 
657
676
  # Support both @memoize and @memoize(...)
658
677
  if _func is None:
@@ -1,13 +1,18 @@
1
1
  # utils/utils_io.py
2
2
 
3
+ import bz2
4
+ import gzip
5
+ import io
3
6
  import json
7
+ import lzma
4
8
  import os
5
9
  import os.path as osp
6
10
  import pickle
7
11
  import time
12
+ import warnings
8
13
  from glob import glob
9
14
  from pathlib import Path
10
- from typing import Any, Union
15
+ from typing import IO, Any, Iterable, Optional, Union, cast
11
16
 
12
17
  from json_repair import loads as jloads
13
18
  from pydantic import BaseModel
@@ -53,7 +58,7 @@ def dump_json_or_pickle(
53
58
  except Exception as e:
54
59
  if isinstance(obj, BaseModel):
55
60
  data = obj.model_dump()
56
- from fastcore.all import obj2dict, dict2obj
61
+ from fastcore.all import dict2obj, obj2dict
57
62
  obj2 = dict2obj(data)
58
63
  with open(fname, "wb") as f:
59
64
  pickle.dump(obj2, f)
@@ -87,8 +92,7 @@ def load_json_or_pickle(fname: str, counter=0) -> Any:
87
92
  raise ValueError(f"Error {e} while loading {fname}") from e
88
93
 
89
94
 
90
- import os, io, json, gzip, bz2, lzma, warnings
91
- from typing import Iterable, Union, IO, Any, Optional, cast
95
+
92
96
 
93
97
  try:
94
98
  import orjson # type: ignore[import-not-found] # fastest JSON parser when available
@@ -212,7 +216,7 @@ def fast_load_jsonl(
212
216
  if line_count > multiworker_threshold:
213
217
  # Use multi-worker processing
214
218
  from ..multi_worker.thread import multi_thread
215
-
219
+
216
220
  # Read all lines into chunks
217
221
  f = _open_auto(path_or_file)
218
222
  all_lines = list(f)
@@ -1,13 +1,32 @@
1
1
  # ray_multi_process.py
2
- import time, os, pickle, uuid, datetime
2
+ import time, os, pickle, uuid, datetime, multiprocessing
3
+ import datetime
4
+ import os
5
+ import pickle
6
+ import time
7
+ import uuid
3
8
  from pathlib import Path
4
9
  from typing import Any, Callable
5
10
  from tqdm import tqdm
11
+ import psutil
12
+ import threading
13
+ ray: Any
14
+ try:
15
+ import ray as ray # type: ignore
16
+ _HAS_RAY = True
17
+ except Exception: # pragma: no cover
18
+ ray = None # type: ignore
19
+ _HAS_RAY = False
20
+ from typing import Any, Callable, Iterable
21
+
6
22
  import ray
7
23
  from fastcore.parallel import parallel
24
+ from tqdm import tqdm
25
+
8
26
 
9
27
  # ─── cache helpers ──────────────────────────────────────────
10
28
 
29
+
11
30
  def _build_cache_dir(func: Callable, items: list[Any]) -> Path:
12
31
  """Build cache dir with function name + timestamp."""
13
32
  func_name = getattr(func, "__name__", "func")
@@ -18,6 +37,7 @@ def _build_cache_dir(func: Callable, items: list[Any]) -> Path:
18
37
  path.mkdir(parents=True, exist_ok=True)
19
38
  return path
20
39
 
40
+
21
41
  def wrap_dump(func: Callable, cache_dir: Path | None):
22
42
  """Wrap a function so results are dumped to .pkl when cache_dir is set."""
23
43
  if cache_dir is None:
@@ -29,12 +49,15 @@ def wrap_dump(func: Callable, cache_dir: Path | None):
29
49
  with open(p, "wb") as fh:
30
50
  pickle.dump(res, fh)
31
51
  return str(p)
52
+
32
53
  return wrapped
33
54
 
55
+
34
56
  # ─── ray management ─────────────────────────────────────────
35
57
 
36
58
  RAY_WORKER = None
37
59
 
60
+
38
61
  def ensure_ray(workers: int, pbar: tqdm | None = None):
39
62
  """Initialize or reinitialize Ray with a given worker count, log to bar postfix."""
40
63
  global RAY_WORKER
@@ -49,19 +72,22 @@ def ensure_ray(workers: int, pbar: tqdm | None = None):
49
72
  pbar.set_postfix_str(f"ray.init {workers} took {took:.2f}s")
50
73
  RAY_WORKER = workers
51
74
 
75
+
52
76
  # ─── main API ───────────────────────────────────────────────
53
77
  from typing import Literal
54
78
 
79
+
55
80
  def multi_process(
56
81
  func: Callable[[Any], Any],
57
- items: list[Any] | None = None,
82
+ items: Iterable[Any] | None = None,
58
83
  *,
59
- inputs: list[Any] | None = None,
84
+ inputs: Iterable[Any] | None = None,
60
85
  workers: int | None = None,
61
86
  lazy_output: bool = False,
62
87
  progress: bool = True,
63
88
  # backend: str = "ray", # "seq", "ray", or "fastcore"
64
- backend: Literal["seq", "ray", "mp", "threadpool"] = "ray",
89
+ backend: Literal["seq", "ray", "mp", "threadpool", "safe"] | None = None,
90
+ backend: Literal["seq", "ray", "mp", "threadpool"] = "mp",
65
91
  # Additional optional knobs (accepted for compatibility)
66
92
  batch: int | None = None,
67
93
  ordered: bool | None = None,
@@ -75,15 +101,25 @@ def multi_process(
75
101
  backend:
76
102
  - "seq": run sequentially
77
103
  - "ray": run in parallel with Ray
78
- - "fastcore": run in parallel with fastcore.parallel
104
+ - "mp": run in parallel with multiprocessing (uses threadpool to avoid fork warnings)
105
+ - "threadpool": run in parallel with thread pool
106
+ - "safe": run in parallel with thread pool (explicitly safe for tests)
79
107
 
80
108
  If lazy_output=True, every result is saved to .pkl and
81
109
  the returned list contains file paths.
82
110
  """
83
111
 
112
+ # default backend selection
113
+ if backend is None:
114
+ backend = "ray" if _HAS_RAY else "mp"
115
+
84
116
  # unify items
117
+ # unify items and coerce to concrete list so we can use len() and
118
+ # iterate multiple times. This accepts ranges and other iterables.
85
119
  if items is None and inputs is not None:
86
120
  items = list(inputs)
121
+ if items is not None and not isinstance(items, list):
122
+ items = list(items)
87
123
  if items is None:
88
124
  raise ValueError("'items' or 'inputs' must be provided")
89
125
 
@@ -95,8 +131,9 @@ def multi_process(
95
131
  f_wrapped = wrap_dump(func, cache_dir)
96
132
 
97
133
  total = len(items)
98
- with tqdm(total=total, desc=f"multi_process [{backend}]", disable=not progress) as pbar:
99
-
134
+ with tqdm(
135
+ total=total, desc=f"multi_process [{backend}]", disable=not progress
136
+ ) as pbar:
100
137
  # ---- sequential backend ----
101
138
  if backend == "seq":
102
139
  pbar.set_postfix_str("backend=seq")
@@ -108,6 +145,13 @@ def multi_process(
108
145
 
109
146
  # ---- ray backend ----
110
147
  if backend == "ray":
148
+ if not _HAS_RAY:
149
+ msg = (
150
+ "Ray backend requested but 'ray' is not installed. "
151
+ "Install extra: pip install 'speedy-utils[ray]' or "
152
+ "poetry install -E ray."
153
+ )
154
+ raise RuntimeError(msg)
111
155
  pbar.set_postfix_str("backend=ray")
112
156
  ensure_ray(workers, pbar)
113
157
 
@@ -125,10 +169,47 @@ def multi_process(
125
169
 
126
170
  # ---- fastcore backend ----
127
171
  if backend == "mp":
128
- results = parallel(f_wrapped, items, n_workers=workers, progress=progress, threadpool=False)
172
+ results = parallel(
173
+ f_wrapped, items, n_workers=workers, progress=progress, threadpool=False
174
+ )
129
175
  return list(results)
130
176
  if backend == "threadpool":
131
- results = parallel(f_wrapped, items, n_workers=workers, progress=progress, threadpool=True)
177
+ results = parallel(
178
+ f_wrapped, items, n_workers=workers, progress=progress, threadpool=True
179
+ )
132
180
  return list(results)
133
-
181
+ if backend == "safe":
182
+ # Completely safe backend for tests - no multiprocessing, no external progress bars
183
+ import concurrent.futures
184
+ with concurrent.futures.ThreadPoolExecutor(max_workers=workers) as executor:
185
+ results = list(executor.map(f_wrapped, items))
134
186
  raise ValueError(f"Unsupported backend: {backend!r}")
187
+
188
+
189
+
190
+ def cleanup_phantom_workers():
191
+ """
192
+ Kill all child processes (phantom workers) without killing the Jupyter kernel itself.
193
+ Also lists non-daemon threads that remain.
194
+ """
195
+ parent = psutil.Process(os.getpid())
196
+
197
+ # Kill only children, never the current process
198
+ for child in parent.children(recursive=True):
199
+ try:
200
+ print(f"🔪 Killing child process {child.pid} ({child.name()})")
201
+ child.kill()
202
+ except psutil.NoSuchProcess:
203
+ pass
204
+
205
+ # Report stray threads (can't hard-kill them in Python)
206
+ for t in threading.enumerate():
207
+ if t is threading.current_thread():
208
+ continue
209
+ if not t.daemon:
210
+ print(f"⚠️ Thread {t.name} is still running (cannot be force-killed).")
211
+
212
+ print("✅ Cleaned up child processes (kernel untouched).")
213
+
214
+ # Usage: run this anytime after cancelling a cell
215
+
@@ -77,7 +77,9 @@
77
77
  # ============================================================================= #
78
78
  """
79
79
 
80
+ import ctypes
80
81
  import os
82
+ import threading
81
83
  import time
82
84
  import traceback
83
85
  from collections.abc import Callable, Iterable
@@ -98,6 +100,42 @@ DEFAULT_WORKERS = (os.cpu_count() or 4) * 2
98
100
  T = TypeVar("T")
99
101
  R = TypeVar("R")
100
102
 
103
+ SPEEDY_RUNNING_THREADS: list[threading.Thread] = []
104
+ _SPEEDY_THREADS_LOCK = threading.Lock()
105
+
106
+ _PY_SET_ASYNC_EXC = ctypes.pythonapi.PyThreadState_SetAsyncExc
107
+ try:
108
+ _PY_SET_ASYNC_EXC.argtypes = (ctypes.c_ulong, ctypes.py_object) # type: ignore[attr-defined]
109
+ _PY_SET_ASYNC_EXC.restype = ctypes.c_int # type: ignore[attr-defined]
110
+ except AttributeError: # pragma: no cover - platform specific
111
+ pass
112
+
113
+
114
+ def _prune_dead_threads() -> None:
115
+ with _SPEEDY_THREADS_LOCK:
116
+ SPEEDY_RUNNING_THREADS[:] = [t for t in SPEEDY_RUNNING_THREADS if t.is_alive()]
117
+
118
+
119
+ def _track_threads(threads: Iterable[threading.Thread]) -> None:
120
+ if not threads:
121
+ return
122
+ with _SPEEDY_THREADS_LOCK:
123
+ living = [t for t in SPEEDY_RUNNING_THREADS if t.is_alive()]
124
+ for candidate in threads:
125
+ if not candidate.is_alive():
126
+ continue
127
+ if any(existing is candidate for existing in living):
128
+ continue
129
+ living.append(candidate)
130
+ SPEEDY_RUNNING_THREADS[:] = living
131
+
132
+
133
+ def _track_executor_threads(pool: ThreadPoolExecutor) -> None:
134
+ thread_set = getattr(pool, "_threads", None)
135
+ if not thread_set:
136
+ return
137
+ _track_threads(tuple(thread_set))
138
+
101
139
 
102
140
  def _group_iter(src: Iterable[T], size: int) -> Iterable[list[T]]:
103
141
  """Yield successive chunks from iterable of specified size."""
@@ -273,11 +311,13 @@ def multi_thread(
273
311
  fut.idx = next_logical_idx # type: ignore[attr-defined]
274
312
  inflight.add(fut)
275
313
  next_logical_idx += len(arg)
314
+ _track_executor_threads(pool)
276
315
  else:
277
316
  fut = pool.submit(_worker, arg, func, fixed_kwargs)
278
317
  fut.idx = next_logical_idx # type: ignore[attr-defined]
279
318
  inflight.add(fut)
280
319
  next_logical_idx += 1
320
+ _track_executor_threads(pool)
281
321
 
282
322
  try:
283
323
  # Process futures as they complete and add new ones to keep the pool busy
@@ -347,11 +387,13 @@ def multi_thread(
347
387
  fut2.idx = next_logical_idx # type: ignore[attr-defined]
348
388
  inflight.add(fut2)
349
389
  next_logical_idx += len(arg)
390
+ _track_executor_threads(pool)
350
391
  else:
351
392
  fut2 = pool.submit(_worker, arg, func, fixed_kwargs)
352
393
  fut2.idx = next_logical_idx # type: ignore[attr-defined]
353
394
  inflight.add(fut2)
354
395
  next_logical_idx += 1
396
+ _track_executor_threads(pool)
355
397
  except StopIteration:
356
398
  pass
357
399
 
@@ -370,6 +412,7 @@ def multi_thread(
370
412
  bar.close()
371
413
  if store_output_pkl_file:
372
414
  dump_json_or_pickle(results, store_output_pkl_file)
415
+ _prune_dead_threads()
373
416
  return results
374
417
 
375
418
 
@@ -396,9 +439,58 @@ def multi_thread_standard(
396
439
  Results in same order as input items.
397
440
  """
398
441
  with ThreadPoolExecutor(max_workers=workers) as executor:
399
- futures = [executor.submit(fn, item) for item in items]
442
+ futures = []
443
+ for item in items:
444
+ futures.append(executor.submit(fn, item))
445
+ _track_executor_threads(executor)
400
446
  results = [fut.result() for fut in futures]
447
+ _prune_dead_threads()
401
448
  return results
402
449
 
403
450
 
404
- __all__ = ["multi_thread", "multi_thread_standard"]
451
+ def _async_raise(thread_id: int, exc_type: type[BaseException]) -> bool:
452
+ if thread_id <= 0:
453
+ return False
454
+ if not issubclass(exc_type, BaseException):
455
+ raise TypeError("exc_type must derive from BaseException")
456
+ res = _PY_SET_ASYNC_EXC(ctypes.c_ulong(thread_id), ctypes.py_object(exc_type))
457
+ if res == 0:
458
+ return False
459
+ if res > 1: # pragma: no cover - defensive branch
460
+ _PY_SET_ASYNC_EXC(ctypes.c_ulong(thread_id), None)
461
+ raise SystemError("PyThreadState_SetAsyncExc failed")
462
+ return True
463
+
464
+
465
+ def kill_all_thread(exc_type: type[BaseException] = SystemExit, join_timeout: float = 0.1) -> int:
466
+ """Forcefully stop tracked worker threads. Returns number of threads signalled."""
467
+ _prune_dead_threads()
468
+ current = threading.current_thread()
469
+ with _SPEEDY_THREADS_LOCK:
470
+ targets = [t for t in SPEEDY_RUNNING_THREADS if t.is_alive()]
471
+
472
+ terminated = 0
473
+ for thread in targets:
474
+ if thread is current:
475
+ continue
476
+ ident = thread.ident
477
+ if ident is None:
478
+ continue
479
+ try:
480
+ if _async_raise(ident, exc_type):
481
+ terminated += 1
482
+ thread.join(timeout=join_timeout)
483
+ else:
484
+ logger.warning("Unable to signal thread %s", thread.name)
485
+ except Exception as exc: # pragma: no cover - defensive
486
+ logger.error("Failed to stop thread %s: %s", thread.name, exc)
487
+ _prune_dead_threads()
488
+ return terminated
489
+
490
+
491
+ __all__ = [
492
+ "SPEEDY_RUNNING_THREADS",
493
+ "multi_thread",
494
+ "multi_thread_standard",
495
+ "kill_all_thread",
496
+ ]
@@ -1,10 +1,14 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: speedy-utils
3
- Version: 1.1.18
3
+ Version: 1.1.20
4
4
  Summary: Fast and easy-to-use package for data science
5
- Author: AnhVTH
6
- Author-email: anhvth.226@gmail.com
7
- Requires-Python: >=3.8
5
+ Project-URL: Homepage, https://github.com/anhvth/speedy
6
+ Project-URL: Repository, https://github.com/anhvth/speedy
7
+ Author-email: AnhVTH <anhvth.226@gmail.com>
8
+ License: MIT
9
+ Classifier: Development Status :: 4 - Beta
10
+ Classifier: Intended Audience :: Developers
11
+ Classifier: License :: OSI Approved :: MIT License
8
12
  Classifier: Programming Language :: Python :: 3
9
13
  Classifier: Programming Language :: Python :: 3.8
10
14
  Classifier: Programming Language :: Python :: 3.9
@@ -13,29 +17,34 @@ Classifier: Programming Language :: Python :: 3.11
13
17
  Classifier: Programming Language :: Python :: 3.12
14
18
  Classifier: Programming Language :: Python :: 3.13
15
19
  Classifier: Programming Language :: Python :: 3.14
20
+ Requires-Python: >=3.8
21
+ Requires-Dist: aiohttp
16
22
  Requires-Dist: bump2version
17
23
  Requires-Dist: cachetools
18
24
  Requires-Dist: debugpy
19
25
  Requires-Dist: fastcore
20
26
  Requires-Dist: fastprogress
21
- Requires-Dist: freezegun (>=1.5.1,<2.0.0)
27
+ Requires-Dist: freezegun
22
28
  Requires-Dist: ipdb
23
29
  Requires-Dist: ipywidgets
24
- Requires-Dist: json-repair (>=0.25.0,<0.31.0)
30
+ Requires-Dist: json-repair
25
31
  Requires-Dist: jupyterlab
26
32
  Requires-Dist: loguru
27
33
  Requires-Dist: matplotlib
28
34
  Requires-Dist: numpy
29
- Requires-Dist: openai (>=1.106.0,<2.0.0)
30
- Requires-Dist: packaging (>=23.2,<25)
35
+ Requires-Dist: openai
36
+ Requires-Dist: packaging
31
37
  Requires-Dist: pandas
32
38
  Requires-Dist: pydantic
39
+ Requires-Dist: pytest
40
+ Requires-Dist: ray
33
41
  Requires-Dist: requests
34
42
  Requires-Dist: scikit-learn
35
43
  Requires-Dist: tabulate
36
44
  Requires-Dist: tqdm
37
45
  Requires-Dist: xxhash
38
- Project-URL: Homepage, https://github.com/anhvth/speedy
46
+ Provides-Extra: ray
47
+ Requires-Dist: ray>=2.49.1; (python_version >= '3.9') and extra == 'ray'
39
48
  Description-Content-Type: text/markdown
40
49
 
41
50
  # Speedy Utils
@@ -84,6 +93,19 @@ cd speedy-utils
84
93
  pip install .
85
94
  ```
86
95
 
96
+ ### Extras
97
+
98
+ Optional dependencies can be installed via extras. For the `ray` backend
99
+ support (requires Python >= 3.9):
100
+
101
+ ```bash
102
+ # pip
103
+ pip install 'speedy-utils[ray]'
104
+
105
+ # Poetry (for developing this repo)
106
+ poetry install -E ray
107
+ ```
108
+
87
109
  ## Updating from previous versions
88
110
 
89
111
  To update from previous versions or switch to v1.x, first uninstall any old
@@ -282,9 +304,8 @@ python speedy_utils/common/dataclass_parser.py
282
304
 
283
305
  Example output:
284
306
 
285
- | Field | Value |
286
- |--------------------|---------------------------------------|
287
- | from_peft | ./outputs/llm_hn_qw32b/hn_results_r3/ |
307
+ | Field | Value |
308
+ | --------- | ------------------------------------- |
309
+ | from_peft | ./outputs/llm_hn_qw32b/hn_results_r3/ |
288
310
 
289
311
  Please ensure your code adheres to the project's coding standards and includes appropriate tests.
290
-