speedy-utils 1.0.4__py3-none-any.whl → 1.0.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,138 +0,0 @@
1
- # utils/utils_print.py
2
-
3
- import copy
4
- import json
5
- import pprint
6
- import textwrap
7
- from typing import Any, Dict, List, Optional
8
-
9
- from IPython.display import HTML, display
10
- from pandas import get_option
11
- from tabulate import tabulate
12
-
13
- # from .utils_cache import is_interactive_env # Adjust based on actual implementation
14
- # from .utils_misc import is_interactive
15
-
16
-
17
- def display_pretty_table_html(data: Dict) -> None:
18
- """
19
- Display a pretty HTML table in Jupyter notebooks.
20
- """
21
- table = "<table>"
22
- for key, value in data.items():
23
- table += f"<tr><td>{key}</td><td>{value}</td></tr>"
24
- table += "</table>"
25
- display(HTML(table))
26
-
27
- def fprint(
28
- input_data: Any,
29
- key_ignore: Optional[List[str]] = None,
30
- key_keep: Optional[List[str]] = None,
31
- max_width: int = 100,
32
- indent: int = 2,
33
- depth: Optional[int] = None,
34
- table_format: str = "grid",
35
- str_wrap_width: int = 80,
36
- ) -> None:
37
- """
38
- Pretty print structured data.
39
- """
40
-
41
- def is_interactive_env():
42
- """Check if the environment is interactive (e.g., Jupyter notebook)."""
43
- try:
44
- shell = get_option().__class__.__name__
45
- return shell == "ZMQInteractiveShell"
46
- except NameError:
47
- return False
48
-
49
- def remove_keys(d: Dict, keys: List[str]) -> Dict:
50
- """Remove specified keys from a dictionary."""
51
- for key in keys:
52
- parts = key.split(".")
53
- sub_dict = d
54
- for part in parts[:-1]:
55
- sub_dict = sub_dict.get(part, {})
56
- sub_dict.pop(parts[-1], None)
57
- return d
58
-
59
- def keep_keys(d: Dict, keys: List[str]) -> Dict:
60
- """Keep only specified keys in a dictionary."""
61
- result = {}
62
- for key in keys:
63
- parts = key.split(".")
64
- sub_source = d
65
- sub_result = result
66
- for part in parts[:-1]:
67
- if part not in sub_source:
68
- break
69
- sub_result = sub_result.setdefault(part, {})
70
- sub_source = sub_source[part]
71
- else:
72
- sub_result[parts[-1]] = copy.deepcopy(sub_source.get(parts[-1]))
73
- return result
74
-
75
- if hasattr(input_data, "to_dict"):
76
- input_data = input_data.to_dict()
77
-
78
- processed_data = copy.deepcopy(input_data)
79
-
80
- if isinstance(processed_data, dict):
81
- if key_keep is not None:
82
- processed_data = keep_keys(processed_data, key_keep)
83
- elif key_ignore is not None:
84
- processed_data = remove_keys(processed_data, key_ignore)
85
-
86
- if is_interactive_env():
87
- display_pretty_table_html(processed_data)
88
- return
89
-
90
- if isinstance(processed_data, dict):
91
- table = [[k, v] for k, v in processed_data.items()]
92
- print(
93
- tabulate(
94
- table,
95
- headers=["Key", "Value"],
96
- tablefmt=table_format,
97
- maxcolwidths=[None, max_width],
98
- )
99
- )
100
- elif isinstance(processed_data, str):
101
- wrapped_text = textwrap.fill(processed_data, width=str_wrap_width)
102
- print(wrapped_text)
103
- else:
104
- printer = pprint.PrettyPrinter(width=max_width, indent=indent, depth=depth)
105
- printer.pprint(processed_data)
106
-
107
-
108
- def print_table(data: Any) -> None:
109
- """
110
- Print data as a table.
111
- """
112
-
113
- def __get_table(data: Any) -> str:
114
- if isinstance(data, str):
115
- try:
116
- data = json.loads(data)
117
- except json.JSONDecodeError as exc:
118
- raise ValueError("String input could not be decoded as JSON") from exc
119
-
120
- if isinstance(data, list):
121
- if all(isinstance(item, dict) for item in data):
122
- headers = list(data[0].keys())
123
- rows = [list(item.values()) for item in data]
124
- return tabulate(rows, headers=headers)
125
- else:
126
- raise ValueError("List must contain dictionaries")
127
-
128
- if isinstance(data, dict):
129
- headers = ["Key", "Value"]
130
- rows = list(data.items())
131
- return tabulate(rows, headers=headers)
132
-
133
- raise TypeError(
134
- "Input data must be a list of dictionaries, a dictionary, or a JSON string"
135
- )
136
-
137
- table = __get_table(data)
138
- print(table)
speedy/multi_worker.py DELETED
@@ -1,121 +0,0 @@
1
- import inspect
2
- import os
3
- from concurrent.futures import ThreadPoolExecutor
4
- from multiprocessing import Pool
5
- from typing import Any, Callable, List
6
- import asyncio
7
- from loguru import logger
8
- from tqdm import tqdm
9
-
10
-
11
- def multi_thread(
12
- func: Callable,
13
- inputs: List[Any],
14
- workers: int = 4,
15
- verbose: bool = True,
16
- desc: str | None = None,
17
- ) -> List[Any]:
18
- if desc is None:
19
- fn_name = func.__name__
20
- try:
21
- source_file = inspect.getsourcefile(func) or "<string>"
22
- source_line = inspect.getsourcelines(func)[1]
23
- file_line = f"{source_file}:{source_line}"
24
- except (TypeError, OSError):
25
- file_line = "Unknown location"
26
- desc = f"{fn_name} at {file_line}"
27
-
28
- with ThreadPoolExecutor(max_workers=workers) as executor:
29
- # Use executor.map to apply func to inputs in order
30
- map_func = executor.map(func, inputs)
31
- if verbose:
32
- results = list(tqdm(map_func, total=len(inputs), desc=desc))
33
- else:
34
- results = list(map_func)
35
- return results
36
-
37
-
38
- def _init_pool_processes(func):
39
- global _func
40
- _func = func
41
-
42
-
43
- def _pool_process_executor(args):
44
- # Unpack arguments if necessary
45
- if isinstance(args, tuple):
46
- return _func(*args)
47
- else:
48
- return _func(args)
49
-
50
-
51
- def multi_process(
52
- func: Callable,
53
- inputs: List[Any],
54
- workers: int = 16,
55
- verbose: bool = True,
56
- desc: str = "",
57
- ) -> List[Any]:
58
- if not desc:
59
- fn_name = func.__name__
60
- try:
61
- source_file = inspect.getsourcefile(func) or "<string>"
62
- source_line = inspect.getsourcelines(func)[1]
63
- file_line = f"{source_file}:{source_line}"
64
- except (TypeError, OSError):
65
- file_line = "Unknown location"
66
- desc = f"{fn_name} at {file_line}"
67
-
68
- if os.environ.get("DEBUG", "0") == "1":
69
- logger.info("DEBUGGING set num workers to 1")
70
- workers = 1
71
-
72
- logger.info("Multi-processing {} | Num samples: {}", desc, len(inputs))
73
-
74
- results = []
75
- with Pool(
76
- processes=workers, initializer=_init_pool_processes, initargs=(func,)
77
- ) as pool:
78
- try:
79
- if verbose:
80
- for result in tqdm(
81
- pool.imap(_pool_process_executor, inputs),
82
- total=len(inputs),
83
- desc=desc,
84
- ):
85
- results.append(result)
86
- else:
87
- results = pool.map(_pool_process_executor, inputs)
88
- except Exception as e:
89
- logger.error(f"[multiprocess] Error {e}")
90
-
91
- return results
92
-
93
-
94
- async def async_multi_thread(f, inputs, desc="", user_tqdm=True):
95
- """
96
- Uasge:
97
- inputs = list(range(10))
98
- def function(i):
99
- time.sleep(1)
100
- return 1/i
101
- results = await amulti_thread(function, inputs)
102
- """
103
-
104
- def ensure_output_idx(idx_i):
105
- idx, i = idx_i
106
- return idx, f(i)
107
-
108
- tasks = [asyncio.to_thread(ensure_output_idx, i) for i in enumerate(inputs)]
109
- if not desc:
110
- desc = f"{f.__name__}"
111
-
112
- pbar = tqdm(total=len(inputs), desc=desc, disable=not user_tqdm)
113
- results = [None] * len(inputs)
114
- for task in asyncio.as_completed(tasks):
115
- idx, result = await task
116
- results[idx] = result
117
- pbar.update(1)
118
- return results
119
-
120
-
121
- __all__ = ["multi_thread", "multi_process", "async_multi_thread"]
@@ -1,22 +0,0 @@
1
- Metadata-Version: 2.1
2
- Name: speedy-utils
3
- Version: 1.0.4
4
- Summary: Fast and easy-to-use package for data science
5
- Home-page: https://github.com/anhvth/speedy
6
- Author: AnhVTH
7
- Author-email: anhvth.226@gmail.com
8
- Requires-Dist: numpy
9
- Requires-Dist: requests
10
- Requires-Dist: xxhash
11
- Requires-Dist: loguru
12
- Requires-Dist: fastcore
13
- Requires-Dist: debugpy
14
- Requires-Dist: ipywidgets
15
- Requires-Dist: jupyterlab
16
- Requires-Dist: ipdb
17
- Requires-Dist: scikit-learn
18
- Requires-Dist: matplotlib
19
- Requires-Dist: pandas
20
- Requires-Dist: tabulate
21
- Requires-Dist: pydantic
22
-
@@ -1,12 +0,0 @@
1
- speedy/__init__.py,sha256=GYw4FDnK1hy8_WpJxJyJEBCYh4IU5joxwG_s7uqIKKg,1147
2
- speedy/multi_worker.py,sha256=kmk_Km6LkOUVntxmPKSYubMXFJBPoLoZW9NVi4UA9kc,3394
3
- speedy/common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
- speedy/common/clock.py,sha256=q92SvdJLZyCtuq8C6m7GjS6_foI8yyspsKzeR_M1owo,2033
5
- speedy/common/utils_cache.py,sha256=_qxGyISJBM-r0NDSUCYSEWnEsqprORdw3yXmIkns1D4,5656
6
- speedy/common/utils_io.py,sha256=X2zFgVzfYM094auApGjYOO43NgnG5P_g7rOObzPYTrQ,3472
7
- speedy/common/utils_misc.py,sha256=ooMb0xEjC-HrQSQQCTRlEqcrGTJptmA43azSi6BhiD4,1479
8
- speedy/common/utils_print.py,sha256=jh_ihzWTEiPpFRiLoj6It8Zo_3tPxYi8MU1J3Nw3vwk,4273
9
- speedy_utils-1.0.4.dist-info/METADATA,sha256=1riv8kebeRccR2fzPDyv02T0E-vus86HjgzxZvaFF9U,538
10
- speedy_utils-1.0.4.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
11
- speedy_utils-1.0.4.dist-info/top_level.txt,sha256=eJxFW_gum7StgovqwA4v-9UndgnnWr4kUqcozY-aBmI,7
12
- speedy_utils-1.0.4.dist-info/RECORD,,
@@ -1 +0,0 @@
1
- speedy