speedy-utils 1.1.5__py3-none-any.whl → 1.1.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -8,12 +8,12 @@ from IPython.display import HTML, display
8
8
  from tabulate import tabulate
9
9
 
10
10
 
11
- def change_dir(target_directory: str = 'POLY') -> None:
11
+ def change_dir(target_directory: str = "POLY") -> None:
12
12
  """Change directory to the first occurrence of x in the current path."""
13
- cur_dir = pathlib.Path('./')
13
+ cur_dir = pathlib.Path("./")
14
14
  target_dir = str(cur_dir.absolute()).split(target_directory)[0] + target_directory
15
15
  os.chdir(target_dir)
16
- print(f'Current dir: {target_dir}')
16
+ print(f"Current dir: {target_dir}")
17
17
 
18
18
 
19
19
  def display_pretty_table_html(data: dict) -> None:
@@ -60,4 +60,4 @@ def print_table(data: Any, use_html: bool = True) -> None:
60
60
  if use_html:
61
61
  display(HTML(table))
62
62
  else:
63
- print(table)
63
+ print(table)
@@ -3,7 +3,6 @@ from collections import defaultdict
3
3
  from datetime import datetime
4
4
 
5
5
 
6
-
7
6
  class ReportManager:
8
7
  def __init__(self):
9
8
  self.cache_dir = os.path.expanduser("~/.cache/speedy_utils")
@@ -41,7 +40,7 @@ class ReportManager:
41
40
  [
42
41
  "\n### Results Overview",
43
42
  f"- Total items processed: {len(results)}",
44
- f"- Success rate: {(len(results) - len(errors))/len(results)*100:.1f}%",
43
+ f"- Success rate: {(len(results) - len(errors)) / len(results) * 100:.1f}%",
45
44
  f"- Total errors: {len(errors)}",
46
45
  ]
47
46
  )
@@ -49,7 +48,7 @@ class ReportManager:
49
48
  if execution_time:
50
49
  md_content.append(f"- Execution time: {execution_time:.2f}s")
51
50
  md_content.append(
52
- f"- Average speed: {len(results)/execution_time:.1f} items/second"
51
+ f"- Average speed: {len(results) / execution_time:.1f} items/second"
53
52
  )
54
53
 
55
54
  if error_groups:
@@ -1,3 +1,4 @@
1
+ import asyncio
1
2
  import functools
2
3
  import inspect
3
4
  import json
@@ -6,7 +7,7 @@ import os.path as osp
6
7
  import pickle
7
8
  import uuid
8
9
  from threading import Lock
9
- from typing import Any, Literal
10
+ from typing import Any, Awaitable, Callable, Literal, TypeVar
10
11
 
11
12
  import cachetools
12
13
  import pandas as pd
@@ -26,6 +27,10 @@ thread_locker = Lock()
26
27
  disk_lock = Lock()
27
28
  mem_lock = Lock()
28
29
 
30
+ # Add async-specific types
31
+ T = TypeVar('T')
32
+ AsyncFunc = Callable[..., Awaitable[T]]
33
+
29
34
 
30
35
  def compute_func_id(func, args, kwargs, ignore_self, keys):
31
36
  func_source = get_source(func)
@@ -144,6 +149,61 @@ def _disk_memoize(func, keys, cache_dir, ignore_self, verbose):
144
149
  return wrapper
145
150
 
146
151
 
152
+ def _async_disk_memoize(func, keys, cache_dir, ignore_self, verbose):
153
+ @functools.wraps(func)
154
+ async def wrapper(*args, **kwargs):
155
+ try:
156
+ # Compute cache path as before
157
+ func_source, sub_dir, key_id = compute_func_id(
158
+ func, args, kwargs, ignore_self, keys
159
+ )
160
+ if func_source is None:
161
+ return await func(*args, **kwargs)
162
+ if sub_dir == "funcs":
163
+ cache_path = osp.join(cache_dir, sub_dir, func.__name__, key_id)
164
+ else:
165
+ cache_path = osp.join(cache_dir, sub_dir, key_id)
166
+ mkdir_or_exist(osp.dirname(cache_path))
167
+
168
+ # First check with disk lock (run in thread to avoid blocking)
169
+ def check_cache():
170
+ with disk_lock:
171
+ if osp.exists(cache_path):
172
+ try:
173
+ return load_json_or_pickle(cache_path)
174
+ except Exception as e:
175
+ if osp.exists(cache_path):
176
+ os.remove(cache_path)
177
+ logger.opt(depth=1).warning(
178
+ f"Error loading cache: {str(e)[:100]}, continue to recompute"
179
+ )
180
+ return None
181
+
182
+ # Run cache check in thread pool to avoid blocking
183
+ loop = asyncio.get_event_loop()
184
+ cached_result = await loop.run_in_executor(None, check_cache)
185
+ if cached_result is not None:
186
+ return cached_result
187
+
188
+ result = await func(*args, **kwargs)
189
+
190
+ # Write result under disk lock (run in thread to avoid blocking)
191
+ def write_cache():
192
+ with disk_lock:
193
+ if not osp.exists(cache_path):
194
+ dump_json_or_pickle(result, cache_path)
195
+
196
+ await loop.run_in_executor(None, write_cache)
197
+ return result
198
+ except Exception as e:
199
+ logger.opt(depth=1).warning(
200
+ f"Failed to cache {func.__name__}: {e}, continue to recompute without cache"
201
+ )
202
+ return await func(*args, **kwargs)
203
+
204
+ return wrapper
205
+
206
+
147
207
  def _memory_memoize(func, size, keys, ignore_self):
148
208
  global LRU_MEM_CACHE
149
209
  if LRU_MEM_CACHE.maxsize != size:
@@ -176,6 +236,38 @@ def _memory_memoize(func, size, keys, ignore_self):
176
236
  return wrapper
177
237
 
178
238
 
239
+ def _async_memory_memoize(func, size, keys, ignore_self):
240
+ global LRU_MEM_CACHE
241
+ if LRU_MEM_CACHE.maxsize != size:
242
+ LRU_MEM_CACHE = cachetools.LRUCache(maxsize=size)
243
+
244
+ @functools.wraps(func)
245
+ async def wrapper(*args, **kwargs):
246
+ func_source, sub_dir, key_id = compute_func_id(
247
+ func, args, kwargs, ignore_self, keys
248
+ )
249
+ if func_source is None:
250
+ return await func(*args, **kwargs)
251
+ name = identify((func_source, sub_dir, key_id))
252
+
253
+ if not hasattr(func, "_mem_cache"):
254
+ func._mem_cache = LRU_MEM_CACHE
255
+
256
+ with mem_lock:
257
+ if name in func._mem_cache:
258
+ # logger.debug(f"Cache HIT (memory) for {func.__name__}, key={name}")
259
+ return func._mem_cache[name]
260
+
261
+ result = await func(*args, **kwargs)
262
+
263
+ with mem_lock:
264
+ if name not in func._mem_cache:
265
+ func._mem_cache[name] = result
266
+ return result
267
+
268
+ return wrapper
269
+
270
+
179
271
  def both_memoize(func, keys, cache_dir, ignore_self):
180
272
  @functools.wraps(func)
181
273
  def wrapper(*args, **kwargs):
@@ -220,6 +312,63 @@ def both_memoize(func, keys, cache_dir, ignore_self):
220
312
  return wrapper
221
313
 
222
314
 
315
+ def _async_both_memoize(func, keys, cache_dir, ignore_self):
316
+ @functools.wraps(func)
317
+ async def wrapper(*args, **kwargs):
318
+ func_source, sub_dir, key_id = compute_func_id(
319
+ func, args, kwargs, ignore_self, keys
320
+ )
321
+ if func_source is None:
322
+ return await func(*args, **kwargs)
323
+
324
+ mem_key = identify((func_source, sub_dir, key_id))
325
+ if not hasattr(func, "_mem_cache"):
326
+ func._mem_cache = LRU_MEM_CACHE
327
+
328
+ with mem_lock:
329
+ if mem_key in func._mem_cache:
330
+ # logger.debug(f"Cache HIT (memory) for {func.__name__}, key={mem_key}")
331
+ return func._mem_cache[mem_key]
332
+
333
+ if sub_dir == "funcs":
334
+ cache_path = osp.join(cache_dir, sub_dir, func.__name__, key_id)
335
+ else:
336
+ cache_path = osp.join(cache_dir, sub_dir, key_id)
337
+ mkdir_or_exist(osp.dirname(cache_path))
338
+
339
+ # Check disk cache in thread pool to avoid blocking
340
+ def check_disk_cache():
341
+ with disk_lock:
342
+ if osp.exists(cache_path):
343
+ return load_json_or_pickle(cache_path)
344
+ return None
345
+
346
+ loop = asyncio.get_event_loop()
347
+ disk_result = await loop.run_in_executor(None, check_disk_cache)
348
+
349
+ if disk_result is not None:
350
+ with mem_lock:
351
+ func._mem_cache[mem_key] = disk_result
352
+ return disk_result
353
+
354
+ # logger.debug(f"Cache MISS for {func.__name__}, key={cache_path}")
355
+ result = await func(*args, **kwargs)
356
+
357
+ # Write to disk in thread pool to avoid blocking
358
+ def write_disk_cache():
359
+ with disk_lock:
360
+ if not osp.exists(cache_path):
361
+ dump_json_or_pickle(result, cache_path)
362
+
363
+ await loop.run_in_executor(None, write_disk_cache)
364
+
365
+ with mem_lock:
366
+ func._mem_cache[mem_key] = result
367
+ return result
368
+
369
+ return wrapper
370
+
371
+
223
372
  def memoize(
224
373
  _func=None,
225
374
  *,
@@ -234,7 +383,17 @@ def memoize(
234
383
  cache_dir = osp.expanduser(cache_dir)
235
384
 
236
385
  def decorator(func):
386
+ # Check if function is async
387
+ is_async = inspect.iscoroutinefunction(func)
388
+
237
389
  if cache_type == "memory":
390
+ if is_async:
391
+ return _async_memory_memoize(
392
+ func,
393
+ size,
394
+ keys,
395
+ ignore_self,
396
+ )
238
397
  return _memory_memoize(
239
398
  func,
240
399
  size,
@@ -242,6 +401,14 @@ def memoize(
242
401
  ignore_self,
243
402
  )
244
403
  elif cache_type == "disk":
404
+ if is_async:
405
+ return _async_disk_memoize(
406
+ func,
407
+ keys,
408
+ cache_dir,
409
+ ignore_self,
410
+ verbose,
411
+ )
245
412
  return _disk_memoize(
246
413
  func,
247
414
  keys,
@@ -249,6 +416,15 @@ def memoize(
249
416
  ignore_self,
250
417
  verbose,
251
418
  )
419
+
420
+ # cache_type == "both"
421
+ if is_async:
422
+ return _async_both_memoize(
423
+ func,
424
+ keys,
425
+ cache_dir,
426
+ ignore_self,
427
+ )
252
428
  return both_memoize(
253
429
  func,
254
430
  keys,
@@ -256,9 +432,63 @@ def memoize(
256
432
  verbose,
257
433
  )
258
434
 
435
+ # Handle both @memoize and @memoize() usage patterns
259
436
  if _func is None:
260
437
  return decorator
261
- return decorator(_func)
438
+ else:
439
+ return decorator(_func)
440
+
441
+
442
+ def amemoize(
443
+ _func=None,
444
+ *,
445
+ keys: list[str] | None = None,
446
+ cache_dir: str = SPEED_CACHE_DIR,
447
+ cache_type: Literal["memory", "disk", "both"] = "disk",
448
+ size: int = 10240,
449
+ ignore_self: bool = True,
450
+ verbose: bool = False,
451
+ ):
452
+ """
453
+ Async-specific memoization decorator for coroutine functions.
454
+
455
+ Args:
456
+ _func: The async function to memoize (when used without parentheses)
457
+ keys: Specific argument keys to use for cache key generation
458
+ cache_dir: Directory for disk cache storage
459
+ cache_type: Type of caching - "memory", "disk", or "both"
460
+ size: Size of memory cache (for memory/both types)
461
+ ignore_self: Whether to ignore 'self' parameter in cache key
462
+ verbose: Enable verbose logging
463
+
464
+ Returns:
465
+ Decorated async function with memoization
466
+
467
+ Example:
468
+ @amemoize(cache_type="both")
469
+ async def my_async_func(x: int) -> str:
470
+ return str(x)
471
+ """
472
+ if "~/" in cache_dir:
473
+ cache_dir = osp.expanduser(cache_dir)
474
+
475
+ def decorator(func):
476
+ # Ensure the function is actually async
477
+ if not inspect.iscoroutinefunction(func):
478
+ raise ValueError(f"amemoize can only be used with async functions. {func.__name__} is not async.")
479
+
480
+ if cache_type == "memory":
481
+ return _async_memory_memoize(func, size, keys, ignore_self)
482
+ elif cache_type == "disk":
483
+ return _async_disk_memoize(func, keys, cache_dir, ignore_self, verbose)
484
+ else: # cache_type == "both"
485
+ return _async_both_memoize(func, keys, cache_dir, ignore_self)
486
+
487
+ # Handle both @amemoize and @amemoize() usage patterns
488
+ if _func is None:
489
+ return decorator
490
+ else:
491
+ return decorator(_func)
262
492
 
263
493
 
264
- __all__ = ["memoize", "identify", "identify_uuid"]
494
+ __all__ = ["memoize", "identify", "identify_uuid", "amemoize"]
@@ -147,6 +147,8 @@ def jdumps(obj, ensure_ascii=False, indent=2, **kwargs):
147
147
  return json.dumps(obj, ensure_ascii=ensure_ascii, indent=indent, **kwargs)
148
148
 
149
149
 
150
+
151
+
150
152
  __all__ = [
151
153
  "dump_json_or_pickle",
152
154
  "dump_jsonl",
@@ -91,9 +91,7 @@ def main():
91
91
  cpu_end = ((i + 1) * cpu_per_process - 1) % args.total_cpu
92
92
  ENV = f"CUDA_VISIBLE_DEVICES={gpu} MP_ID={i} MP_TOTAL={args.total_fold}"
93
93
  if taskset_path:
94
- fold_cmd = (
95
- f"{ENV} {taskset_path} -c {cpu_start}-{cpu_end} {path_python} {cmd_str}"
96
- )
94
+ fold_cmd = f"{ENV} {taskset_path} -c {cpu_start}-{cpu_end} {path_python} {cmd_str}"
97
95
  else:
98
96
  fold_cmd = f"{ENV} {path_python} {cmd_str}"
99
97
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: speedy-utils
3
- Version: 1.1.5
3
+ Version: 1.1.7
4
4
  Summary: Fast and easy-to-use package for data science
5
5
  Author: AnhVTH
6
6
  Author-email: anhvth.226@gmail.com
@@ -0,0 +1,39 @@
1
+ llm_utils/__init__.py,sha256=AYmJ297r0OjYmV1uNWFwznbqfuLTjCV7L2Ee12jxhpw,602
2
+ llm_utils/chat_format/__init__.py,sha256=8dBIUqFJvkgQYedxBtcyxt-4tt8JxAKVap2JlTXmgaM,737
3
+ llm_utils/chat_format/display.py,sha256=M-__JpcJSqjqeP4LiW7-yF8fVL37yUEUdaNC4VEgIo8,10181
4
+ llm_utils/chat_format/transform.py,sha256=eU0c3PdAHCNLuGP1UqPwln0B34Lv3bt_uV9v9BrlCN4,5402
5
+ llm_utils/chat_format/utils.py,sha256=xTxN4HrLHcRO2PfCTR43nH1M5zCa7v0kTTdzAcGkZg0,1229
6
+ llm_utils/group_messages.py,sha256=Oe2tlhg-zRodG1-hodYebddrR77j9UdE05LzJw0EvYI,3622
7
+ llm_utils/lm/__init__.py,sha256=rX36_MsnekM5GHwWS56XELbm4W5x2TDwnPERDTfo0eU,194
8
+ llm_utils/lm/async_lm/__init__.py,sha256=PUBbCuf5u6-0GBUu-2PI6YAguzsyXj-LPkU6vccqT6E,121
9
+ llm_utils/lm/async_lm/_utils.py,sha256=P1-pUDf_0pDmo8WTIi43t5ARlyGA1RIJfpAhz-gfA5g,6105
10
+ llm_utils/lm/async_lm/async_llm_task.py,sha256=i5Z9yLBnxFK_937JHFMu7A-tImQnRuGAsnWDcC70syg,18648
11
+ llm_utils/lm/async_lm/async_lm.py,sha256=J1KC7qCpG_CyJMWca4q71la7JHoANiLLSNQrQH44-z0,14045
12
+ llm_utils/lm/async_lm/async_lm_base.py,sha256=BIIKAgmSZ9S5gLIf4AKGWg8N1eAr0l_fRfgG6UnKCK4,14839
13
+ llm_utils/lm/async_lm/lm_specific.py,sha256=KmqdCm3SJ5MqN-dRJd6S5tq5-ve1X2eNWf2CMFtc_3s,3926
14
+ llm_utils/lm/utils.py,sha256=a0KJj8vjT2fHKb7GKGNJjJHhKLThwpxIL7vnV9Fr3ZY,4584
15
+ llm_utils/scripts/README.md,sha256=yuOLnLa2od2jp4wVy3rV0rESeiV3o8zol5MNMsZx0DY,999
16
+ llm_utils/scripts/vllm_load_balancer.py,sha256=TT5Ypq7gUcl52gRFp--ORFFjzhfGlcaX2rkRv8NxlxU,37259
17
+ llm_utils/scripts/vllm_serve.py,sha256=4NaqpVs7LBvxtvTCMPsNCAOfqiWkKRttxWMmWY7SitA,14729
18
+ speedy_utils/__init__.py,sha256=ZtnitBT13OS3xjmsVoVHjmL5RIWaH12PMcp6UDHQjaE,5776
19
+ speedy_utils/all.py,sha256=t-HKzDmhF1MTFnmq7xRnPs5nFG_aZaLH9Ua0RM6nQ9Y,4855
20
+ speedy_utils/common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
21
+ speedy_utils/common/clock.py,sha256=3n4FkCW0dz46O8By09V5Pve1DSMgpLDRbWEVRryryeQ,7423
22
+ speedy_utils/common/function_decorator.py,sha256=BspJ0YuGL6elS7lWBAgELZ-sCfED_1N2P5fgH-fCRUQ,2132
23
+ speedy_utils/common/logger.py,sha256=JqW9gG4ujfq4RldNeYP2p52BYgCwjkYeGGYyzLn6mfY,6422
24
+ speedy_utils/common/notebook_utils.py,sha256=-97kehJ_Gg3TzDLubsLIYJcykqX1NXhbvBO6nniZSYM,2063
25
+ speedy_utils/common/report_manager.py,sha256=eBiw5KY6bWUhwki3B4lK5o8bFsp7L5x28X9GCI-Sd1w,3899
26
+ speedy_utils/common/utils_cache.py,sha256=G0M_iv3T8QqbBNNiS1LDz6MrRycQjiYLMzmHYpDUCjU,16348
27
+ speedy_utils/common/utils_io.py,sha256=tfptex3pbmhXOftr__V-3DbhuDVSm01j4vg39R5jbwI,4792
28
+ speedy_utils/common/utils_misc.py,sha256=cdEuBBpiB1xpuzj0UBDHDuTIerqsMIw37ENq6EXliOw,1795
29
+ speedy_utils/common/utils_print.py,sha256=iQqnOYw2EFC8TqeSDbrcnIQAUKT7FbB8Mec8b2aGAzw,4833
30
+ speedy_utils/multi_worker/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
31
+ speedy_utils/multi_worker/process.py,sha256=BI-sgzzQ0_N8kOfaS_3ZAGZ3d6panYzJ3-BGZthY4dQ,6824
32
+ speedy_utils/multi_worker/thread.py,sha256=u_hTwXh7_FciMa5EukdEA1fDCY_vUC4moDceBXk2b6w,16326
33
+ speedy_utils/scripts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
34
+ speedy_utils/scripts/mpython.py,sha256=IvywP7Y0_V6tWfMP-4MjPvN5_KfxWF21xaLJsCIayCk,3821
35
+ speedy_utils/scripts/openapi_client_codegen.py,sha256=f2125S_q0PILgH5dyzoKRz7pIvNEjCkzpi4Q4pPFRZE,9683
36
+ speedy_utils-1.1.7.dist-info/METADATA,sha256=EV1Jj7hpGp5Zf9hkj0chArF9iVmJUCUFeSFW2VkXzvI,7441
37
+ speedy_utils-1.1.7.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
38
+ speedy_utils-1.1.7.dist-info/entry_points.txt,sha256=T1t85jwx8fK6m5msdkBGIXH5R5Kd0zSL0S6erXERPzg,237
39
+ speedy_utils-1.1.7.dist-info/RECORD,,