speedy-utils 1.1.16__py3-none-any.whl → 1.1.18__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -87,9 +87,236 @@ def load_json_or_pickle(fname: str, counter=0) -> Any:
87
87
  raise ValueError(f"Error {e} while loading {fname}") from e
88
88
 
89
89
 
90
- def load_jsonl(path):
91
- lines = open(path, encoding="utf-8").read().splitlines()
92
- return [json.loads(line) for line in lines]
90
+ import os, io, json, gzip, bz2, lzma, warnings
91
+ from typing import Iterable, Union, IO, Any, Optional, cast
92
+
93
+ try:
94
+ import orjson # type: ignore[import-not-found] # fastest JSON parser when available
95
+ except Exception:
96
+ orjson = None
97
+
98
+ try:
99
+ import zstandard as zstd # type: ignore[import-not-found] # optional .zst support
100
+ except Exception:
101
+ zstd = None
102
+
103
+
104
+ def fast_load_jsonl(
105
+ path_or_file: Union[str, os.PathLike, IO],
106
+ *,
107
+ progress: bool = False,
108
+ desc: str = "Reading JSONL",
109
+ use_orjson: bool = True,
110
+ encoding: str = "utf-8",
111
+ errors: str = "strict",
112
+ on_error: str = "raise", # 'raise' | 'warn' | 'skip'
113
+ skip_empty: bool = True,
114
+ max_lines: Optional[int] = None,
115
+ use_multiworker: bool = True,
116
+ multiworker_threshold: int = 50000,
117
+ workers: Optional[int] = None,
118
+ ) -> Iterable[Any]:
119
+ """
120
+ Lazily iterate objects from a JSON Lines file.
121
+
122
+ - Streams line-by-line (constant memory).
123
+ - Optional tqdm progress over bytes (compressed size if gz/bz2/xz/zst).
124
+ - Auto-detects compression by extension: .gz, .bz2, .xz/.lzma, .zst/.zstd.
125
+ - Uses orjson if available (use_orjson=True), falls back to json.
126
+ - Automatically uses multi-worker processing for large files (>50k lines).
127
+
128
+ Args:
129
+ path_or_file: Path-like or file-like object. File-like can be binary or text.
130
+ progress: Show a tqdm progress bar (bytes). Requires `tqdm` if True.
131
+ desc: tqdm description if progress=True.
132
+ use_orjson: Prefer orjson for speed if installed.
133
+ encoding, errors: Used when decoding text or when falling back to `json`.
134
+ on_error: What to do on a malformed line: 'raise', 'warn', or 'skip'.
135
+ skip_empty: Skip blank/whitespace-only lines.
136
+ max_lines: Stop after reading this many lines (useful for sampling).
137
+ use_multiworker: Enable multi-worker processing for large files.
138
+ multiworker_threshold: Line count threshold to trigger multi-worker processing.
139
+ workers: Number of worker threads (defaults to CPU count).
140
+
141
+ Yields:
142
+ Parsed Python objects per line.
143
+ """
144
+ def _open_auto(pth_or_f) -> IO[Any]:
145
+ if hasattr(pth_or_f, "read"):
146
+ # ensure binary buffer for consistent byte-length progress
147
+ fobj = pth_or_f
148
+ # If it's text, wrap it to binary via encoding; else just return
149
+ if isinstance(fobj, io.TextIOBase):
150
+ # TextIO -> re-encode to bytes on the fly
151
+ return io.BufferedReader(io.BytesIO(fobj.read().encode(encoding, errors)))
152
+ return pth_or_f # assume binary
153
+ s = str(pth_or_f).lower()
154
+ if s.endswith(".gz"):
155
+ return gzip.open(pth_or_f, "rb") # type: ignore
156
+ if s.endswith(".bz2"):
157
+ return bz2.open(pth_or_f, "rb") # type: ignore
158
+ if s.endswith((".xz", ".lzma")):
159
+ return lzma.open(pth_or_f, "rb") # type: ignore
160
+ if s.endswith((".zst", ".zstd")) and zstd is not None:
161
+ fh = open(pth_or_f, "rb")
162
+ dctx = zstd.ZstdDecompressor()
163
+ stream = dctx.stream_reader(fh)
164
+ return io.BufferedReader(stream) # type: ignore
165
+ # plain
166
+ return open(pth_or_f, "rb", buffering=1024 * 1024)
167
+
168
+ def _count_lines_fast(file_path: Union[str, os.PathLike]) -> int:
169
+ """Quickly count lines in a file, handling compression."""
170
+ try:
171
+ f = _open_auto(file_path)
172
+ count = 0
173
+ for _ in f:
174
+ count += 1
175
+ f.close()
176
+ return count
177
+ except Exception:
178
+ # If we can't count lines, assume it's small
179
+ return 0
180
+
181
+ def _process_chunk(chunk_lines: list[bytes]) -> list[Any]:
182
+ """Process a chunk of lines and return parsed objects."""
183
+ results = []
184
+ for line_bytes in chunk_lines:
185
+ if skip_empty and not line_bytes.strip():
186
+ continue
187
+ line_bytes = line_bytes.rstrip(b"\r\n")
188
+ try:
189
+ if use_orjson and orjson is not None:
190
+ obj = orjson.loads(line_bytes)
191
+ else:
192
+ obj = json.loads(line_bytes.decode(encoding, errors))
193
+ results.append(obj)
194
+ except Exception as e:
195
+ if on_error == "raise":
196
+ raise
197
+ if on_error == "warn":
198
+ warnings.warn(f"Skipping malformed line: {e}")
199
+ # 'skip' and 'warn' both skip the line
200
+ continue
201
+ return results
202
+
203
+ # Check if we should use multi-worker processing
204
+ should_use_multiworker = (
205
+ use_multiworker
206
+ and not hasattr(path_or_file, "read") # Only for file paths, not file objects
207
+ and max_lines is None # Don't use multiworker if we're limiting lines
208
+ )
209
+
210
+ if should_use_multiworker:
211
+ line_count = _count_lines_fast(cast(Union[str, os.PathLike], path_or_file))
212
+ if line_count > multiworker_threshold:
213
+ # Use multi-worker processing
214
+ from ..multi_worker.thread import multi_thread
215
+
216
+ # Read all lines into chunks
217
+ f = _open_auto(path_or_file)
218
+ all_lines = list(f)
219
+ f.close()
220
+
221
+ # Split into chunks for workers
222
+ num_workers = workers or os.cpu_count() or 4
223
+ chunk_size = max(len(all_lines) // num_workers, 1000)
224
+ chunks = []
225
+ for i in range(0, len(all_lines), chunk_size):
226
+ chunks.append(all_lines[i:i + chunk_size])
227
+
228
+ # Process chunks in parallel
229
+ if progress:
230
+ print(f"Processing {line_count} lines with {num_workers} workers...")
231
+
232
+ chunk_results = multi_thread(_process_chunk, chunks, workers=num_workers, progress=progress)
233
+
234
+ # Flatten results and yield
235
+ for chunk_result in chunk_results:
236
+ for obj in chunk_result:
237
+ yield obj
238
+ return
239
+
240
+ # Single-threaded processing (original logic)
241
+
242
+ f = _open_auto(path_or_file)
243
+
244
+ pbar = None
245
+ if progress:
246
+ try:
247
+ from tqdm import tqdm # type: ignore
248
+ except Exception as e:
249
+ raise ImportError("tqdm is required when progress=True") from e
250
+ total = None
251
+ if not hasattr(path_or_file, "read"):
252
+ try:
253
+ path_for_size = cast(Union[str, os.PathLike], path_or_file)
254
+ total = os.path.getsize(path_for_size) # compressed size if any
255
+ except Exception:
256
+ total = None
257
+ pbar = tqdm(total=total, unit="B", unit_scale=True, desc=desc)
258
+
259
+ line_no = 0
260
+ try:
261
+ for raw_line in f:
262
+ line_no += 1
263
+ if pbar is not None:
264
+ # raw_line is bytes here; if not, compute byte length
265
+ nbytes = len(raw_line) if isinstance(raw_line, (bytes, bytearray)) else len(str(raw_line).encode(encoding, errors))
266
+ pbar.update(nbytes)
267
+
268
+ # Normalize to bytes -> str only if needed
269
+ if isinstance(raw_line, (bytes, bytearray)):
270
+ if skip_empty and not raw_line.strip():
271
+ if max_lines and line_no >= max_lines:
272
+ break
273
+ continue
274
+ line_bytes = raw_line.rstrip(b"\r\n")
275
+ # Parse
276
+ try:
277
+ if use_orjson and orjson is not None:
278
+ obj = orjson.loads(line_bytes)
279
+ else:
280
+ obj = json.loads(line_bytes.decode(encoding, errors))
281
+ except Exception as e:
282
+ if on_error == "raise":
283
+ raise
284
+ if on_error == "warn":
285
+ warnings.warn(f"Skipping malformed line {line_no}: {e}")
286
+ # 'skip' and 'warn' both skip the line
287
+ if max_lines and line_no >= max_lines:
288
+ break
289
+ continue
290
+ else:
291
+ # Text line path (unlikely)
292
+ if skip_empty and not raw_line.strip():
293
+ if max_lines and line_no >= max_lines:
294
+ break
295
+ continue
296
+ try:
297
+ obj = json.loads(raw_line)
298
+ except Exception as e:
299
+ if on_error == "raise":
300
+ raise
301
+ if on_error == "warn":
302
+ warnings.warn(f"Skipping malformed line {line_no}: {e}")
303
+ if max_lines and line_no >= max_lines:
304
+ break
305
+ continue
306
+
307
+ yield obj
308
+ if max_lines and line_no >= max_lines:
309
+ break
310
+ finally:
311
+ if pbar is not None:
312
+ pbar.close()
313
+ # Close only if we opened it (i.e., not an external stream)
314
+ if not hasattr(path_or_file, "read"):
315
+ try:
316
+ f.close()
317
+ except Exception:
318
+ pass
319
+
93
320
 
94
321
 
95
322
  def load_by_ext(fname: Union[str, list[str]], do_memoize: bool = False) -> Any:
@@ -124,7 +351,7 @@ def load_by_ext(fname: Union[str, list[str]], do_memoize: bool = False) -> Any:
124
351
 
125
352
  def load_default(path: str) -> Any:
126
353
  if path.endswith(".jsonl"):
127
- return load_jsonl(path)
354
+ return list(fast_load_jsonl(path, progress=True))
128
355
  elif path.endswith(".json"):
129
356
  try:
130
357
  return load_json_or_pickle(path)
@@ -159,14 +386,13 @@ def jdumps(obj, ensure_ascii=False, indent=2, **kwargs):
159
386
  return json.dumps(obj, ensure_ascii=ensure_ascii, indent=indent, **kwargs)
160
387
 
161
388
 
162
-
389
+ load_jsonl = lambda path: list(fast_load_jsonl(path))
163
390
 
164
391
  __all__ = [
165
392
  "dump_json_or_pickle",
166
393
  "dump_jsonl",
167
394
  "load_by_ext",
168
395
  "load_json_or_pickle",
169
- "load_jsonl",
170
396
  "jdumps",
171
397
  "jloads",
172
398
  ]
@@ -1,203 +1,134 @@
1
- import multiprocessing
2
- import os
3
- import traceback
4
- from collections.abc import Callable, Iterable, Iterator
5
- from concurrent.futures import ProcessPoolExecutor, as_completed
6
- from itertools import islice
7
- from typing import Any, TypeVar, Union, cast
1
+ # ray_multi_process.py
2
+ import time, os, pickle, uuid, datetime
3
+ from pathlib import Path
4
+ from typing import Any, Callable
5
+ from tqdm import tqdm
6
+ import ray
7
+ from fastcore.parallel import parallel
8
+
9
+ # ─── cache helpers ──────────────────────────────────────────
10
+
11
+ def _build_cache_dir(func: Callable, items: list[Any]) -> Path:
12
+ """Build cache dir with function name + timestamp."""
13
+ func_name = getattr(func, "__name__", "func")
14
+ now = datetime.datetime.now()
15
+ stamp = now.strftime("%m%d_%Hh%Mm%Ss")
16
+ run_id = f"{func_name}_{stamp}_{uuid.uuid4().hex[:6]}"
17
+ path = Path(".cache") / run_id
18
+ path.mkdir(parents=True, exist_ok=True)
19
+ return path
20
+
21
+ def wrap_dump(func: Callable, cache_dir: Path | None):
22
+ """Wrap a function so results are dumped to .pkl when cache_dir is set."""
23
+ if cache_dir is None:
24
+ return func
25
+
26
+ def wrapped(x, *args, **kwargs):
27
+ res = func(x, *args, **kwargs)
28
+ p = cache_dir / f"{uuid.uuid4().hex}.pkl"
29
+ with open(p, "wb") as fh:
30
+ pickle.dump(res, fh)
31
+ return str(p)
32
+ return wrapped
33
+
34
+ # ─── ray management ─────────────────────────────────────────
35
+
36
+ RAY_WORKER = None
37
+
38
+ def ensure_ray(workers: int, pbar: tqdm | None = None):
39
+ """Initialize or reinitialize Ray with a given worker count, log to bar postfix."""
40
+ global RAY_WORKER
41
+ if not ray.is_initialized() or RAY_WORKER != workers:
42
+ if ray.is_initialized() and pbar:
43
+ pbar.set_postfix_str(f"Restarting Ray {workers} workers")
44
+ ray.shutdown()
45
+ t0 = time.time()
46
+ ray.init(num_cpus=workers, ignore_reinit_error=True)
47
+ took = time.time() - t0
48
+ if pbar:
49
+ pbar.set_postfix_str(f"ray.init {workers} took {took:.2f}s")
50
+ RAY_WORKER = workers
51
+
52
+ # ─── main API ───────────────────────────────────────────────
53
+ from typing import Literal
8
54
 
9
- T = TypeVar("T")
10
-
11
- if hasattr(multiprocessing, "set_start_method"):
12
- try:
13
- multiprocessing.set_start_method("spawn", force=True)
14
- except RuntimeError:
15
- pass
16
-
17
- try:
18
- from tqdm import tqdm
19
- except ImportError: # pragma: no cover
20
- tqdm = None # type: ignore[assignment]
21
-
22
-
23
- # ──── internal helpers ────────────────────────────────────────────────────
24
-
25
-
26
- def _group_iter(src: Iterable[Any], size: int) -> Iterable[list[Any]]:
27
- "Yield *size*-sized chunks from *src*."
28
- it = iter(src)
29
- while chunk := list(islice(it, size)):
30
- yield chunk
31
-
32
-
33
- def _short_tb() -> str:
34
- tb = "".join(traceback.format_exc())
35
- return "\n".join(ln for ln in tb.splitlines() if "multi_process" not in ln)
36
-
37
-
38
- def _safe_call(func: Callable, obj, fixed):
39
- try:
40
- return func(obj, **fixed)
41
- except Exception as exc:
42
- func_name = getattr(func, "__name__", str(func))
43
- raise RuntimeError(
44
- f"{func_name}({obj!r}) failed: {exc}\n{_short_tb()}"
45
- ) from exc
46
-
47
-
48
- def _worker_process(
49
- func: Callable, item_batch: Any, fixed_kwargs: dict, batch_size: int
50
- ):
51
- """Worker function executed in each process."""
52
- if batch_size > 1:
53
- results = []
54
- for itm in item_batch:
55
- try:
56
- results.append(_safe_call(func, itm, fixed_kwargs))
57
- except Exception:
58
- results.append(None)
59
- return results
60
- return _safe_call(func, item_batch, fixed_kwargs)
61
-
62
-
63
- # ──── public API ──────────────────────────────────────────────────────────
64
55
  def multi_process(
65
56
  func: Callable[[Any], Any],
66
- inputs: Iterable[Any],
57
+ items: list[Any] | None = None,
67
58
  *,
68
- workers: Union[int, None] = None,
69
- batch: int = 1,
70
- ordered: bool = True,
71
- progress: bool = False,
72
- inflight: Union[int, None] = None,
73
- timeout: Union[float, None] = None,
74
- stop_on_error: bool = True,
75
- process_update_interval=10,
76
- for_loop: bool = False,
77
- **fixed_kwargs,
59
+ inputs: list[Any] | None = None,
60
+ workers: int | None = None,
61
+ lazy_output: bool = False,
62
+ progress: bool = True,
63
+ # backend: str = "ray", # "seq", "ray", or "fastcore"
64
+ backend: Literal["seq", "ray", "mp", "threadpool"] = "ray",
65
+ # Additional optional knobs (accepted for compatibility)
66
+ batch: int | None = None,
67
+ ordered: bool | None = None,
68
+ process_update_interval: int | None = None,
69
+ stop_on_error: bool | None = None,
70
+ **func_kwargs: Any,
78
71
  ) -> list[Any]:
79
72
  """
80
- Simple multi‑processing parallel map that returns a *list*.
81
-
82
- Parameters
83
- ----------
84
- func – target callable executed in separate processes, must be of the form f(obj, ...).
85
- inputs iterable with the objects.
86
- workers – process pool size (defaults to :pyfunc:`os.cpu_count()`).
87
- batch – package *batch* inputs into one call to reduce IPC overhead.
88
- ordered – keep original order; if ``False`` results stream as finished.
89
- progress – show a tqdm bar (requires *tqdm*).
90
- inflight – max logical items concurrently submitted
91
- *(default: ``workers × 4``)*.
92
- timeout – overall timeout for the mapping (seconds).
93
- stop_on_error – raise immediately on first exception (default) or
94
- substitute failing result with ``None``.
95
- **fixed_kwargs – static keyword args forwarded to every ``func()`` call.
73
+ Multi-process map with selectable backend.
74
+
75
+ backend:
76
+ - "seq": run sequentially
77
+ - "ray": run in parallel with Ray
78
+ - "fastcore": run in parallel with fastcore.parallel
79
+
80
+ If lazy_output=True, every result is saved to .pkl and
81
+ the returned list contains file paths.
96
82
  """
97
- if for_loop:
98
- ret = []
99
- for arg in inputs:
100
- ret.append(func(arg, **fixed_kwargs))
101
- return ret
83
+
84
+ # unify items
85
+ if items is None and inputs is not None:
86
+ items = list(inputs)
87
+ if items is None:
88
+ raise ValueError("'items' or 'inputs' must be provided")
102
89
 
103
90
  if workers is None:
104
91
  workers = os.cpu_count() or 1
105
- if inflight is None:
106
- inflight = workers * 4
107
- if batch < 1:
108
- raise ValueError("batch must be ≥ 1")
109
-
110
- try:
111
- n_inputs = len(inputs) # type: ignore[arg-type]
112
- except Exception:
113
- n_inputs = None
114
-
115
- src_iter: Iterator[Any] = iter(inputs)
116
- if batch > 1:
117
- src_iter = cast(Iterator[Any], _group_iter(src_iter, batch))
118
-
119
- logical_total = n_inputs
120
- bar = None
121
- last_bar = 0
122
- if progress and tqdm is not None and logical_total is not None:
123
- bar = tqdm(
124
- total=logical_total,
125
- ncols=80,
126
- colour="green",
127
- bar_format="{l_bar}{bar}| {n_fmt}/{total_fmt}"
128
- " [{elapsed}<{remaining}, {rate_fmt}{postfix}]",
129
- )
130
-
131
- if ordered and logical_total is not None:
132
- results: list[Any] = [None] * logical_total
133
- else:
134
- results = []
135
-
136
- completed = 0
137
- next_idx = 0
138
-
139
- with ProcessPoolExecutor(max_workers=workers) as pool:
140
- futures = set()
141
-
142
- for _ in range(min(inflight, workers)):
143
- try:
144
- arg = next(src_iter)
145
- except StopIteration:
146
- break
147
- fut = pool.submit(_worker_process, func, arg, fixed_kwargs, batch)
148
- fut.idx = next_idx # type: ignore[attr-defined]
149
- futures.add(fut)
150
- next_idx += len(arg) if batch > 1 else 1
151
-
152
- while futures:
153
- for fut in as_completed(futures, timeout=timeout):
154
- futures.remove(fut)
155
- idx = fut.idx # type: ignore[attr-defined]
156
- try:
157
- res = fut.result()
158
- except Exception:
159
- if stop_on_error:
160
- raise
161
- num_items = batch if batch > 1 else 1
162
- res = [None] * num_items if batch > 1 else None
163
-
164
- out_items = res if batch > 1 else [res]
165
- if out_items is None:
166
- out_items = []
167
-
168
- if ordered and logical_total is not None:
169
- if isinstance(out_items, list) and len(out_items) > 0:
170
- for i, item in enumerate(out_items):
171
- if idx + i < len(results):
172
- results[idx + i] = item
173
- else:
174
- if isinstance(out_items, list):
175
- results.extend(out_items)
176
-
177
- completed += len(out_items)
178
-
179
- if bar and completed - last_bar >= process_update_interval:
180
- bar.update(completed - last_bar)
181
- last_bar = completed
182
-
183
- try:
184
- while next_idx - completed < inflight:
185
- arg = next(src_iter)
186
- fut2 = pool.submit(
187
- _worker_process, func, arg, fixed_kwargs, batch
188
- )
189
- fut2.idx = next_idx # type: ignore[attr-defined]
190
- futures.add(fut2)
191
- next_idx += len(arg) if batch > 1 else 1
192
- except StopIteration:
193
- pass
194
- break
195
-
196
- if bar:
197
- bar.update(completed - last_bar)
198
- bar.close()
199
-
200
- return results
201
-
202
-
203
- __all__ = ["multi_process"]
92
+
93
+ # build cache dir + wrap func
94
+ cache_dir = _build_cache_dir(func, items) if lazy_output else None
95
+ f_wrapped = wrap_dump(func, cache_dir)
96
+
97
+ total = len(items)
98
+ with tqdm(total=total, desc=f"multi_process [{backend}]", disable=not progress) as pbar:
99
+
100
+ # ---- sequential backend ----
101
+ if backend == "seq":
102
+ pbar.set_postfix_str("backend=seq")
103
+ results = []
104
+ for x in items:
105
+ results.append(f_wrapped(x, **func_kwargs))
106
+ pbar.update(1)
107
+ return results
108
+
109
+ # ---- ray backend ----
110
+ if backend == "ray":
111
+ pbar.set_postfix_str("backend=ray")
112
+ ensure_ray(workers, pbar)
113
+
114
+ @ray.remote
115
+ def _task(x):
116
+ return f_wrapped(x, **func_kwargs)
117
+
118
+ refs = [_task.remote(x) for x in items]
119
+
120
+ results = []
121
+ for r in refs:
122
+ results.append(ray.get(r))
123
+ pbar.update(1)
124
+ return results
125
+
126
+ # ---- fastcore backend ----
127
+ if backend == "mp":
128
+ results = parallel(f_wrapped, items, n_workers=workers, progress=progress, threadpool=False)
129
+ return list(results)
130
+ if backend == "threadpool":
131
+ results = parallel(f_wrapped, items, n_workers=workers, progress=progress, threadpool=True)
132
+ return list(results)
133
+
134
+ raise ValueError(f"Unsupported backend: {backend!r}")
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.3
1
+ Metadata-Version: 2.4
2
2
  Name: speedy-utils
3
- Version: 1.1.16
3
+ Version: 1.1.18
4
4
  Summary: Fast and easy-to-use package for data science
5
5
  Author: AnhVTH
6
6
  Author-email: anhvth.226@gmail.com
@@ -12,6 +12,7 @@ Classifier: Programming Language :: Python :: 3.10
12
12
  Classifier: Programming Language :: Python :: 3.11
13
13
  Classifier: Programming Language :: Python :: 3.12
14
14
  Classifier: Programming Language :: Python :: 3.13
15
+ Classifier: Programming Language :: Python :: 3.14
15
16
  Requires-Dist: bump2version
16
17
  Requires-Dist: cachetools
17
18
  Requires-Dist: debugpy
@@ -1,16 +1,20 @@
1
- llm_utils/__init__.py,sha256=Ap_X4yvwCroQvTveBx7_LkGMpVnw_rBoY2NrqAI-SQ4,732
1
+ llm_utils/__init__.py,sha256=KjgorCpl2YbAGaqaOvFDDlE7V2GUzxFx_Xyz5ROnWZc,916
2
2
  llm_utils/chat_format/__init__.py,sha256=8dBIUqFJvkgQYedxBtcyxt-4tt8JxAKVap2JlTXmgaM,737
3
- llm_utils/chat_format/display.py,sha256=M-__JpcJSqjqeP4LiW7-yF8fVL37yUEUdaNC4VEgIo8,10181
3
+ llm_utils/chat_format/display.py,sha256=3jKDm4OTrvytK1qBhSOjRLltUIObHsYFdBLgm8SVDE8,14159
4
4
  llm_utils/chat_format/transform.py,sha256=eU0c3PdAHCNLuGP1UqPwln0B34Lv3bt_uV9v9BrlCN4,5402
5
5
  llm_utils/chat_format/utils.py,sha256=xTxN4HrLHcRO2PfCTR43nH1M5zCa7v0kTTdzAcGkZg0,1229
6
6
  llm_utils/group_messages.py,sha256=Oe2tlhg-zRodG1-hodYebddrR77j9UdE05LzJw0EvYI,3622
7
- llm_utils/lm/__init__.py,sha256=rX36_MsnekM5GHwWS56XELbm4W5x2TDwnPERDTfo0eU,194
7
+ llm_utils/lm/__init__.py,sha256=totIZnq1P8eNlfVco0OfdGdTNt1-wSXDSRReRRzYYxw,319
8
8
  llm_utils/lm/async_lm/__init__.py,sha256=PUBbCuf5u6-0GBUu-2PI6YAguzsyXj-LPkU6vccqT6E,121
9
9
  llm_utils/lm/async_lm/_utils.py,sha256=P1-pUDf_0pDmo8WTIi43t5ARlyGA1RIJfpAhz-gfA5g,6105
10
- llm_utils/lm/async_lm/async_llm_task.py,sha256=iXSTbf-KekXncVVnic-v4dTq5HBDjbyLwhgo0Y-wp7Q,19167
11
- llm_utils/lm/async_lm/async_lm.py,sha256=slGOUXFeWAy3Ak7Xj7Z4JNqCgAUBo21Hjg3RD75Ul2Q,13396
12
- llm_utils/lm/async_lm/async_lm_base.py,sha256=Qh9Bx_S-FboO7huUi6TqP3KiTVHDH0C-Tfbd_UJC7Cc,8122
10
+ llm_utils/lm/async_lm/async_llm_task.py,sha256=A5WLIN3v-zpl-sJGiykyo8wOCYEpA8ja70MJcn5t7O4,18668
11
+ llm_utils/lm/async_lm/async_lm.py,sha256=e3o9cyMbkVz_jQDTjJv2ybET_5mY012zdZGjNwi4Qk4,13719
12
+ llm_utils/lm/async_lm/async_lm_base.py,sha256=iJgtzI6pVJzWtlXGqVLwgCIb-FzZAa3E5xW8yhyHUmM,8426
13
13
  llm_utils/lm/async_lm/lm_specific.py,sha256=KmqdCm3SJ5MqN-dRJd6S5tq5-ve1X2eNWf2CMFtc_3s,3926
14
+ llm_utils/lm/base_prompt_builder.py,sha256=OLqyxbA8QeYIVFzB9EqxUiE_P2p4_MD_Lq4WSwxFtKU,12136
15
+ llm_utils/lm/llm_task.py,sha256=K5c27iYM9etAbdDM1WiO3-GjTvl1dkzt2sIaW3N1YA0,15483
16
+ llm_utils/lm/lm.py,sha256=8TaLuU7naPQbOFmiS2NQyWVLG0jUUzRRBQsR0In7GVo,7249
17
+ llm_utils/lm/lm_base.py,sha256=pqbHZOdR7yUMpvwt8uBG1dZnt76SY_Wk8BkXQQ-mpWs,9557
14
18
  llm_utils/lm/openai_memoize.py,sha256=DdMl31cV9AqLlkARajZrqAKCyhvH8JQk2SAHMSzO3mk,3024
15
19
  llm_utils/lm/utils.py,sha256=a0KJj8vjT2fHKb7GKGNJjJHhKLThwpxIL7vnV9Fr3ZY,4584
16
20
  llm_utils/scripts/README.md,sha256=yuOLnLa2od2jp4wVy3rV0rESeiV3o8zol5MNMsZx0DY,999
@@ -18,7 +22,7 @@ llm_utils/scripts/vllm_load_balancer.py,sha256=TT5Ypq7gUcl52gRFp--ORFFjzhfGlcaX2
18
22
  llm_utils/scripts/vllm_serve.py,sha256=gJ0-y4kybMfSt8qzye1pJqGMY3x9JLRi6Tu7RjJMnss,14771
19
23
  llm_utils/vector_cache/__init__.py,sha256=i1KQuC4OhPewYpFl9X6HlWFBuASCTx2qgGizhpZhmn0,862
20
24
  llm_utils/vector_cache/cli.py,sha256=DMXTj8nZ2_LRjprbYPb4uzq04qZtOfBbmblmaqDcCuM,6251
21
- llm_utils/vector_cache/core.py,sha256=rsfZbaUk8ZbAKHcStbmxeZbk8LfTvO_prmqof-WVvC0,22663
25
+ llm_utils/vector_cache/core.py,sha256=P0VopzMmfnGaYTTEiccXprsyjruje3QT0_AFXF1lZC0,33582
22
26
  llm_utils/vector_cache/types.py,sha256=ru8qmUZ8_lNd3_oYpjCMtpXTsqmwsSBe56Z4hTWm3xI,435
23
27
  llm_utils/vector_cache/utils.py,sha256=dwbbXlRrARrpmS4YqSlYQqrTURg0UWe8XvaAWcX05MM,1458
24
28
  speedy_utils/__init__.py,sha256=nJpUb5Oa3STDbqPSiWXoI-IvKntyRYzYxkYW4GM2i_Q,5740
@@ -28,18 +32,19 @@ speedy_utils/common/clock.py,sha256=3n4FkCW0dz46O8By09V5Pve1DSMgpLDRbWEVRryryeQ,
28
32
  speedy_utils/common/function_decorator.py,sha256=BspJ0YuGL6elS7lWBAgELZ-sCfED_1N2P5fgH-fCRUQ,2132
29
33
  speedy_utils/common/logger.py,sha256=a2iZx0eWyfi2-2X_H2QmfuA3tfR7_XSM7Nd0GdUnUOs,6435
30
34
  speedy_utils/common/notebook_utils.py,sha256=-97kehJ_Gg3TzDLubsLIYJcykqX1NXhbvBO6nniZSYM,2063
35
+ speedy_utils/common/patcher.py,sha256=VCmdxyTF87qroggQkQklRPhAOPJbeBqhcJoTsLcDxNw,2303
31
36
  speedy_utils/common/report_manager.py,sha256=eBiw5KY6bWUhwki3B4lK5o8bFsp7L5x28X9GCI-Sd1w,3899
32
- speedy_utils/common/utils_cache.py,sha256=0cQJm0nnD9755pWMdAkhDn2qIbGvJMTMUl2gibkX05E,22376
33
- speedy_utils/common/utils_io.py,sha256=76ZVgJwgjOznq5L_i2oyWuBnuwymjcktqSvB8VWTKsc,5250
37
+ speedy_utils/common/utils_cache.py,sha256=BCYbtu8lWkLjrPRQnOWHr41IBOLrpOvXIOI4Sg389nc,22430
38
+ speedy_utils/common/utils_io.py,sha256=gv5YN4tYvxsUcCXwaY_hv2g6a9HNltxBC0kNE0iZLak,14284
34
39
  speedy_utils/common/utils_misc.py,sha256=cdEuBBpiB1xpuzj0UBDHDuTIerqsMIw37ENq6EXliOw,1795
35
40
  speedy_utils/common/utils_print.py,sha256=syRrnSFtguxrV-elx6DDVcSGu4Qy7D_xVNZhPwbUY4A,4864
36
41
  speedy_utils/multi_worker/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
37
- speedy_utils/multi_worker/process.py,sha256=MJ5njqjY9nMo-Z1oXMbzkppuYVJCcCtFsLo8lbCh5zs,6849
42
+ speedy_utils/multi_worker/process.py,sha256=LmNfV8tfdsf6PFTNzu12C_QWNfEUhgi1MeAJGeMTs1k,4738
38
43
  speedy_utils/multi_worker/thread.py,sha256=f02VjJV8nudg0eA_AcfPEX7tHY4-czesuzthKZs_Hdc,16351
39
44
  speedy_utils/scripts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
40
45
  speedy_utils/scripts/mpython.py,sha256=IvywP7Y0_V6tWfMP-4MjPvN5_KfxWF21xaLJsCIayCk,3821
41
46
  speedy_utils/scripts/openapi_client_codegen.py,sha256=f2125S_q0PILgH5dyzoKRz7pIvNEjCkzpi4Q4pPFRZE,9683
42
- speedy_utils-1.1.16.dist-info/METADATA,sha256=euFPmJ3wunhuo_2aUpUGxfV-KlAjF8wFaEzHJqkQ6dM,7483
43
- speedy_utils-1.1.16.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
44
- speedy_utils-1.1.16.dist-info/entry_points.txt,sha256=T1t85jwx8fK6m5msdkBGIXH5R5Kd0zSL0S6erXERPzg,237
45
- speedy_utils-1.1.16.dist-info/RECORD,,
47
+ speedy_utils-1.1.18.dist-info/METADATA,sha256=dqAnyKYkHVF3HHvzhopXo6huQE16OhFMuGcQUwH6xE0,7534
48
+ speedy_utils-1.1.18.dist-info/WHEEL,sha256=M5asmiAlL6HEcOq52Yi5mmk9KmTVjY2RDPtO4p9DMrc,88
49
+ speedy_utils-1.1.18.dist-info/entry_points.txt,sha256=T1t85jwx8fK6m5msdkBGIXH5R5Kd0zSL0S6erXERPzg,237
50
+ speedy_utils-1.1.18.dist-info/RECORD,,