speedy-utils 1.0.4__py3-none-any.whl → 1.0.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llm_utils/__init__.py +31 -0
- llm_utils/chat_format/__init__.py +34 -0
- llm_utils/chat_format/display.py +274 -0
- llm_utils/chat_format/transform.py +149 -0
- llm_utils/chat_format/utils.py +43 -0
- llm_utils/group_messages.py +120 -0
- llm_utils/lm/__init__.py +8 -0
- llm_utils/lm/lm.py +304 -0
- llm_utils/lm/utils.py +130 -0
- llm_utils/scripts/vllm_load_balancer.py +435 -0
- llm_utils/scripts/vllm_serve.py +416 -0
- speedy_utils/__init__.py +85 -0
- speedy_utils/all.py +159 -0
- {speedy → speedy_utils}/common/__init__.py +0 -0
- speedy_utils/common/clock.py +215 -0
- speedy_utils/common/function_decorator.py +66 -0
- speedy_utils/common/logger.py +207 -0
- speedy_utils/common/report_manager.py +112 -0
- speedy_utils/common/utils_cache.py +264 -0
- {speedy → speedy_utils}/common/utils_io.py +66 -19
- {speedy → speedy_utils}/common/utils_misc.py +25 -11
- speedy_utils/common/utils_print.py +216 -0
- speedy_utils/multi_worker/__init__.py +0 -0
- speedy_utils/multi_worker/process.py +198 -0
- speedy_utils/multi_worker/thread.py +327 -0
- speedy_utils/scripts/mpython.py +108 -0
- speedy_utils-1.0.9.dist-info/METADATA +287 -0
- speedy_utils-1.0.9.dist-info/RECORD +30 -0
- {speedy_utils-1.0.4.dist-info → speedy_utils-1.0.9.dist-info}/WHEEL +1 -2
- speedy_utils-1.0.9.dist-info/entry_points.txt +5 -0
- speedy/__init__.py +0 -53
- speedy/common/clock.py +0 -68
- speedy/common/utils_cache.py +0 -170
- speedy/common/utils_print.py +0 -138
- speedy/multi_worker.py +0 -121
- speedy_utils-1.0.4.dist-info/METADATA +0 -22
- speedy_utils-1.0.4.dist-info/RECORD +0 -12
- speedy_utils-1.0.4.dist-info/top_level.txt +0 -1
|
@@ -0,0 +1,287 @@
|
|
|
1
|
+
Metadata-Version: 2.3
|
|
2
|
+
Name: speedy-utils
|
|
3
|
+
Version: 1.0.9
|
|
4
|
+
Summary: Fast and easy-to-use package for data science
|
|
5
|
+
Author: AnhVTH
|
|
6
|
+
Author-email: anhvth.226@gmail.com
|
|
7
|
+
Requires-Python: >=3.9
|
|
8
|
+
Classifier: Programming Language :: Python :: 3
|
|
9
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
10
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
11
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
14
|
+
Requires-Dist: bump2version
|
|
15
|
+
Requires-Dist: cachetools
|
|
16
|
+
Requires-Dist: debugpy
|
|
17
|
+
Requires-Dist: fastcore
|
|
18
|
+
Requires-Dist: fastprogress
|
|
19
|
+
Requires-Dist: freezegun (>=1.5.1,<2.0.0)
|
|
20
|
+
Requires-Dist: ipdb
|
|
21
|
+
Requires-Dist: ipywidgets
|
|
22
|
+
Requires-Dist: json-repair (>=0.40.0,<0.41.0)
|
|
23
|
+
Requires-Dist: jupyterlab
|
|
24
|
+
Requires-Dist: loguru
|
|
25
|
+
Requires-Dist: matplotlib
|
|
26
|
+
Requires-Dist: numpy
|
|
27
|
+
Requires-Dist: packaging (>=23.2,<25)
|
|
28
|
+
Requires-Dist: pandas
|
|
29
|
+
Requires-Dist: pydantic
|
|
30
|
+
Requires-Dist: requests
|
|
31
|
+
Requires-Dist: scikit-learn
|
|
32
|
+
Requires-Dist: tabulate
|
|
33
|
+
Requires-Dist: tqdm
|
|
34
|
+
Requires-Dist: xxhash
|
|
35
|
+
Project-URL: Homepage, https://github.com/anhvth/speedy
|
|
36
|
+
Description-Content-Type: text/markdown
|
|
37
|
+
|
|
38
|
+
# Speedy Utils
|
|
39
|
+
|
|
40
|
+

|
|
41
|
+

|
|
42
|
+

|
|
43
|
+
|
|
44
|
+
**Speedy Utils** is a Python utility library designed to streamline common programming tasks such as caching, parallel processing, file I/O, and data manipulation. It provides a collection of decorators, functions, and classes to enhance productivity and performance in your Python projects.
|
|
45
|
+
|
|
46
|
+
## Table of Contents
|
|
47
|
+
|
|
48
|
+
- [Features](#features)
|
|
49
|
+
- [Installation](#installation)
|
|
50
|
+
- [Usage](#usage)
|
|
51
|
+
- [Caching](#caching)
|
|
52
|
+
- [Parallel Processing](#parallel-processing)
|
|
53
|
+
- [File I/O](#file-io)
|
|
54
|
+
- [Data Manipulation](#data-manipulation)
|
|
55
|
+
- [Utility Functions](#utility-functions)
|
|
56
|
+
- [Testing](#testing)
|
|
57
|
+
|
|
58
|
+
## Features
|
|
59
|
+
|
|
60
|
+
- **Caching Mechanisms**: Disk-based and in-memory caching to optimize function calls.
|
|
61
|
+
- **Parallel Processing**: Multi-threading, multi-processing, and asynchronous multi-threading utilities.
|
|
62
|
+
- **File I/O**: Simplified JSON, JSONL, and pickle file handling with support for various file extensions.
|
|
63
|
+
- **Data Manipulation**: Utilities for flattening lists and dictionaries, converting data types, and more.
|
|
64
|
+
- **Timing Utilities**: Tools to measure and log execution time of functions and processes.
|
|
65
|
+
- **Pretty Printing**: Enhanced printing functions for structured data, including HTML tables for Jupyter notebooks.
|
|
66
|
+
|
|
67
|
+
## Installation
|
|
68
|
+
|
|
69
|
+
You can install **Speedy Utils** via [PyPI](https://pypi.org/project/speedy-utils/) using `pip`:
|
|
70
|
+
|
|
71
|
+
```bash
|
|
72
|
+
uv pip install speedy-utils
|
|
73
|
+
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
Alternatively, install directly from the repository:
|
|
77
|
+
|
|
78
|
+
```bash
|
|
79
|
+
uv pip install git+https://github.com/anhvth/speedy
|
|
80
|
+
cd speedy-utils
|
|
81
|
+
pip install .
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
## Updating from previous versions
|
|
85
|
+
|
|
86
|
+
To update from previous versions or switch to v1.x, first uninstall any old
|
|
87
|
+
packages, then install the latest version:
|
|
88
|
+
|
|
89
|
+
```bash
|
|
90
|
+
pip uninstall speedy_llm_utils speedy_utils
|
|
91
|
+
pip install -e ./ # for local development
|
|
92
|
+
# or
|
|
93
|
+
pip install speedy_utils -U # for PyPI upgrade
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
## Usage
|
|
97
|
+
|
|
98
|
+
Below are examples demonstrating how to utilize various features of **Speedy Utils**.
|
|
99
|
+
|
|
100
|
+
### Caching
|
|
101
|
+
|
|
102
|
+
#### Memoize Decorator
|
|
103
|
+
|
|
104
|
+
Cache the results of function calls to disk to avoid redundant computations.
|
|
105
|
+
|
|
106
|
+
```python
|
|
107
|
+
from speedy_utils import memoize
|
|
108
|
+
|
|
109
|
+
@memoize
|
|
110
|
+
def expensive_function(x):
|
|
111
|
+
# Simulate an expensive computation
|
|
112
|
+
import time
|
|
113
|
+
time.sleep(2)
|
|
114
|
+
return x * x
|
|
115
|
+
|
|
116
|
+
result = expensive_function(4) # Takes ~2 seconds
|
|
117
|
+
result = expensive_function(4) # Retrieved from cache instantly
|
|
118
|
+
```
|
|
119
|
+
|
|
120
|
+
#### In-Memory Memoization
|
|
121
|
+
|
|
122
|
+
Cache function results in memory for faster access within the same runtime.
|
|
123
|
+
|
|
124
|
+
```python
|
|
125
|
+
from speedy_utils import imemoize
|
|
126
|
+
|
|
127
|
+
@imemoize
|
|
128
|
+
def compute_sum(a, b):
|
|
129
|
+
return a + b
|
|
130
|
+
|
|
131
|
+
result = compute_sum(5, 7) # Computed and cached
|
|
132
|
+
result = compute_sum(5, 7) # Retrieved from in-memory cache
|
|
133
|
+
```
|
|
134
|
+
|
|
135
|
+
### Parallel Processing
|
|
136
|
+
|
|
137
|
+
#### Multi-threading
|
|
138
|
+
|
|
139
|
+
Execute functions concurrently using multiple threads. This approach is straightforward and automatically handles both notebook and Python script executions. In a notebook environment, it delegates the running thread to a separate process. If interrupted, it immediately stops this process, avoiding thread dependency issues where threads continue running until all tasks are completed.
|
|
140
|
+
|
|
141
|
+
```python
|
|
142
|
+
from speedy_utils import multi_thread
|
|
143
|
+
|
|
144
|
+
def process_item(item):
|
|
145
|
+
# Your processing logic
|
|
146
|
+
return item * 2
|
|
147
|
+
|
|
148
|
+
items = [1, 2, 3, 4, 5]
|
|
149
|
+
results = multi_thread(process_item, items, workers=3)
|
|
150
|
+
print(results) # [2, 4, 6, 8, 10]
|
|
151
|
+
```
|
|
152
|
+
|
|
153
|
+
### File I/O
|
|
154
|
+
|
|
155
|
+
#### Dumping Data
|
|
156
|
+
|
|
157
|
+
Save data in JSON, JSONL, or pickle formats.
|
|
158
|
+
|
|
159
|
+
```python
|
|
160
|
+
from speedy_utils import dump_json_or_pickle, dump_jsonl
|
|
161
|
+
|
|
162
|
+
data = {"name": "Alice", "age": 30}
|
|
163
|
+
|
|
164
|
+
# Save as JSON
|
|
165
|
+
dump_json_or_pickle(data, "data.json")
|
|
166
|
+
|
|
167
|
+
# Save as JSONL
|
|
168
|
+
dump_jsonl([data, {"name": "Bob", "age": 25}], "data.jsonl")
|
|
169
|
+
|
|
170
|
+
# Save as Pickle
|
|
171
|
+
dump_json_or_pickle(data, "data.pkl")
|
|
172
|
+
```
|
|
173
|
+
|
|
174
|
+
#### Loading Data
|
|
175
|
+
|
|
176
|
+
Load data based on file extensions.
|
|
177
|
+
|
|
178
|
+
```python
|
|
179
|
+
from speedy_utils import load_json_or_pickle, load_by_ext
|
|
180
|
+
|
|
181
|
+
# Load JSON
|
|
182
|
+
data = load_json_or_pickle("data.json")
|
|
183
|
+
|
|
184
|
+
# Load JSONL
|
|
185
|
+
data_list = load_json_or_pickle("data.jsonl")
|
|
186
|
+
|
|
187
|
+
# Load Pickle
|
|
188
|
+
data = load_json_or_pickle("data.pkl")
|
|
189
|
+
|
|
190
|
+
# Load based on extension with parallel processing
|
|
191
|
+
loaded_data = load_by_ext(["data.json", "data.pkl"])
|
|
192
|
+
```
|
|
193
|
+
|
|
194
|
+
### Data Manipulation
|
|
195
|
+
|
|
196
|
+
#### Flattening Lists and Dictionaries
|
|
197
|
+
|
|
198
|
+
```python
|
|
199
|
+
from speedy_utils import flatten_list, flatten_dict
|
|
200
|
+
|
|
201
|
+
nested_list = [[1, 2], [3, 4], [5]]
|
|
202
|
+
flat_list = flatten_list(nested_list)
|
|
203
|
+
print(flat_list) # [1, 2, 3, 4, 5]
|
|
204
|
+
|
|
205
|
+
nested_dict = {"a": {"b": 1, "c": 2}, "d": 3}
|
|
206
|
+
flat_dict = flatten_dict(nested_dict)
|
|
207
|
+
print(flat_dict) # {'a.b': 1, 'a.c': 2, 'd': 3}
|
|
208
|
+
```
|
|
209
|
+
|
|
210
|
+
#### Converting to Built-in Python Types
|
|
211
|
+
|
|
212
|
+
```python
|
|
213
|
+
from speedy_utils import convert_to_builtin_python
|
|
214
|
+
from pydantic import BaseModel
|
|
215
|
+
|
|
216
|
+
class User(BaseModel):
|
|
217
|
+
name: str
|
|
218
|
+
age: int
|
|
219
|
+
|
|
220
|
+
user = User(name="Charlie", age=28)
|
|
221
|
+
builtin_user = convert_to_builtin_python(user)
|
|
222
|
+
print(builtin_user) # {'name': 'Charlie', 'age': 28}
|
|
223
|
+
```
|
|
224
|
+
|
|
225
|
+
### Utility Functions
|
|
226
|
+
|
|
227
|
+
#### Pretty Printing
|
|
228
|
+
|
|
229
|
+
```python
|
|
230
|
+
from speedy_utils import fprint, print_table
|
|
231
|
+
|
|
232
|
+
data = {"name": "Dana", "age": 22, "city": "New York"}
|
|
233
|
+
|
|
234
|
+
# Pretty print as table
|
|
235
|
+
fprint(data)
|
|
236
|
+
|
|
237
|
+
# Print as table using tabulate
|
|
238
|
+
print_table(data)
|
|
239
|
+
```
|
|
240
|
+
|
|
241
|
+
#### Timing Utilities
|
|
242
|
+
|
|
243
|
+
```python
|
|
244
|
+
from speedy_utils import timef, Clock
|
|
245
|
+
|
|
246
|
+
@timef
|
|
247
|
+
def slow_function():
|
|
248
|
+
import time
|
|
249
|
+
time.sleep(3)
|
|
250
|
+
return "Done"
|
|
251
|
+
|
|
252
|
+
result = slow_function() # Prints execution time
|
|
253
|
+
|
|
254
|
+
# Using Clock
|
|
255
|
+
clock = Clock()
|
|
256
|
+
# ... your code ...
|
|
257
|
+
clock.log()
|
|
258
|
+
```
|
|
259
|
+
|
|
260
|
+
## Testing
|
|
261
|
+
|
|
262
|
+
The project includes a comprehensive test suite using `unittest`. To run the tests, execute the following command in the project root directory:
|
|
263
|
+
|
|
264
|
+
```bash
|
|
265
|
+
python test.py
|
|
266
|
+
```
|
|
267
|
+
|
|
268
|
+
Ensure all dependencies are installed before running tests:
|
|
269
|
+
|
|
270
|
+
```bash
|
|
271
|
+
pip install -r requirements.txt
|
|
272
|
+
```
|
|
273
|
+
|
|
274
|
+
Run the script to parse and display the arguments:
|
|
275
|
+
|
|
276
|
+
```bash
|
|
277
|
+
python speedy_utils/common/dataclass_parser.py
|
|
278
|
+
```
|
|
279
|
+
|
|
280
|
+
Example output:
|
|
281
|
+
|
|
282
|
+
| Field | Value |
|
|
283
|
+
|--------------------|---------------------------------------|
|
|
284
|
+
| from_peft | ./outputs/llm_hn_qw32b/hn_results_r3/ |
|
|
285
|
+
|
|
286
|
+
Please ensure your code adheres to the project's coding standards and includes appropriate tests.
|
|
287
|
+
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
llm_utils/__init__.py,sha256=ibEVUPkL11M4htL-3uXkSyyUZiIO-TZD6IzWVmi8QYw,697
|
|
2
|
+
llm_utils/chat_format/__init__.py,sha256=8dBIUqFJvkgQYedxBtcyxt-4tt8JxAKVap2JlTXmgaM,737
|
|
3
|
+
llm_utils/chat_format/display.py,sha256=a3zWzo47SUf4i-uic-dwf-vxtu6gZWLbnJrszjjZjQ8,9801
|
|
4
|
+
llm_utils/chat_format/transform.py,sha256=328V18FOgRQzljAl9Mh8NF4Tl-N3cZZIPmAwHQspXCY,5461
|
|
5
|
+
llm_utils/chat_format/utils.py,sha256=xTxN4HrLHcRO2PfCTR43nH1M5zCa7v0kTTdzAcGkZg0,1229
|
|
6
|
+
llm_utils/group_messages.py,sha256=wyiZzs7O8yK2lyIakV2x-1CrrWVT12sjnP1vVnmPet4,3606
|
|
7
|
+
llm_utils/lm/__init__.py,sha256=vXFILZLBmmpg39cy5XniQPSMzoFQCE3wdfz39EtqDKU,71
|
|
8
|
+
llm_utils/lm/lm.py,sha256=wLVX7-kE8odyS-mLew0rihswxWxoEn7smxFGHoPKv_4,10739
|
|
9
|
+
llm_utils/lm/utils.py,sha256=-fDNueiXKQI6RDoNHJYNyORomf2XlCf2doJZ3GEV2Io,4762
|
|
10
|
+
llm_utils/scripts/vllm_load_balancer.py,sha256=MgMnnoKWJQc-l2fspUSkyA9wxL1RkXd7wdBLJNQBlr4,17384
|
|
11
|
+
llm_utils/scripts/vllm_serve.py,sha256=uFS5kNXZ7kZ9rQms63LnliGEVV3rATT6dEppGTgoR0s,13910
|
|
12
|
+
speedy_utils/__init__.py,sha256=I2bSfDIE9yRF77tnHW0vqfExDA2m1gUx4AH8C9XmGtg,1707
|
|
13
|
+
speedy_utils/all.py,sha256=A9jiKGjo950eg1pscS9x38OWAjKGyusoAN5mrfweY4E,3090
|
|
14
|
+
speedy_utils/common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
15
|
+
speedy_utils/common/clock.py,sha256=3n4FkCW0dz46O8By09V5Pve1DSMgpLDRbWEVRryryeQ,7423
|
|
16
|
+
speedy_utils/common/function_decorator.py,sha256=r_r42qCWuNcu0_aH7musf2BWvcJfgZrD81G28mDcolw,2226
|
|
17
|
+
speedy_utils/common/logger.py,sha256=NIOlhhACpcc0BUTSJ8oDYrLp23J2gW_KJFyRVdLN2tY,6432
|
|
18
|
+
speedy_utils/common/report_manager.py,sha256=dgGfS_fHbZiQMsLzkgnj0OfB758t1x6B4MhjsetSl9A,3930
|
|
19
|
+
speedy_utils/common/utils_cache.py,sha256=gXX5qTXpCG3qDgjnOsSvxM4LkQurmcsg4QRv_zOBG1E,8378
|
|
20
|
+
speedy_utils/common/utils_io.py,sha256=d7PKz5tOPrwHDr7GXuYmILvjXJOFEwfzAEIuUcYaI60,4790
|
|
21
|
+
speedy_utils/common/utils_misc.py,sha256=nsQOu2jcplcFHVQ1CnOjEpNcdxIINveGxB493Cqo63U,1812
|
|
22
|
+
speedy_utils/common/utils_print.py,sha256=QRaL2QPbks4Mtol_gJy3ZdahgUfzUEtcOp4--lBlzYI,6709
|
|
23
|
+
speedy_utils/multi_worker/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
24
|
+
speedy_utils/multi_worker/process.py,sha256=XwQlffxzRFnCVeKjDNBZDwFfUQHiJiuFA12MRGJVru8,6708
|
|
25
|
+
speedy_utils/multi_worker/thread.py,sha256=9pXjvgjD0s0Hp0cZ6I3M0ndp1OlYZ1yvqbs_bcun_Kw,12775
|
|
26
|
+
speedy_utils/scripts/mpython.py,sha256=ZzkBWI5Xw3vPoMx8xQt2x4mOFRjtwWqfvAJ5_ngyWgw,3816
|
|
27
|
+
speedy_utils-1.0.9.dist-info/METADATA,sha256=bLbYeo_uDW0hasjtbmuZoslF-S6qkO-gURfbZhnEuIo,7391
|
|
28
|
+
speedy_utils-1.0.9.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
|
|
29
|
+
speedy_utils-1.0.9.dist-info/entry_points.txt,sha256=rP43satgw1uHcKUAlmVxS-MTAQImL-03-WwLIB5a300,165
|
|
30
|
+
speedy_utils-1.0.9.dist-info/RECORD,,
|
speedy/__init__.py
DELETED
|
@@ -1,53 +0,0 @@
|
|
|
1
|
-
from .common.clock import Clock, timef
|
|
2
|
-
from .common.utils_cache import (
|
|
3
|
-
ICACHE,
|
|
4
|
-
SPEED_CACHE_DIR,
|
|
5
|
-
identify,
|
|
6
|
-
imemoize,
|
|
7
|
-
imemoize_v2,
|
|
8
|
-
memoize,
|
|
9
|
-
memoize_method,
|
|
10
|
-
memoize_v2,
|
|
11
|
-
)
|
|
12
|
-
from .common.utils_io import (
|
|
13
|
-
dump_json_or_pickle,
|
|
14
|
-
dump_jsonl,
|
|
15
|
-
load_by_ext,
|
|
16
|
-
load_json_or_pickle,
|
|
17
|
-
)
|
|
18
|
-
from .common.utils_misc import (
|
|
19
|
-
convert_to_builtin_python,
|
|
20
|
-
flatten_list,
|
|
21
|
-
get_arg_names,
|
|
22
|
-
is_interactive,
|
|
23
|
-
mkdir_or_exist,
|
|
24
|
-
)
|
|
25
|
-
from .common.utils_print import fprint, print_table
|
|
26
|
-
from .multi_worker import async_multi_thread, multi_process, multi_thread
|
|
27
|
-
|
|
28
|
-
__all__ = [
|
|
29
|
-
"SPEED_CACHE_DIR",
|
|
30
|
-
"ICACHE",
|
|
31
|
-
"mkdir_or_exist",
|
|
32
|
-
"dump_jsonl",
|
|
33
|
-
"dump_json_or_pickle",
|
|
34
|
-
"timef", # Ensure timef is moved to an appropriate module or included here
|
|
35
|
-
"load_json_or_pickle",
|
|
36
|
-
"load_by_ext",
|
|
37
|
-
"identify",
|
|
38
|
-
"memoize",
|
|
39
|
-
"imemoize",
|
|
40
|
-
"imemoize_v2",
|
|
41
|
-
"flatten_list",
|
|
42
|
-
"fprint",
|
|
43
|
-
"get_arg_names",
|
|
44
|
-
"memoize_v2",
|
|
45
|
-
"is_interactive",
|
|
46
|
-
"print_table",
|
|
47
|
-
"convert_to_builtin_python",
|
|
48
|
-
"Clock",
|
|
49
|
-
"multi_thread",
|
|
50
|
-
"multi_process",
|
|
51
|
-
"async_multi_thread",
|
|
52
|
-
"memoize_method",
|
|
53
|
-
]
|
speedy/common/clock.py
DELETED
|
@@ -1,68 +0,0 @@
|
|
|
1
|
-
import time
|
|
2
|
-
from loguru import logger
|
|
3
|
-
|
|
4
|
-
__all__ = ["Clock"]
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
def timef(func):
|
|
8
|
-
def wrapper(*args, **kwargs):
|
|
9
|
-
start_time = time.time()
|
|
10
|
-
result = func(*args, **kwargs)
|
|
11
|
-
end_time = time.time()
|
|
12
|
-
execution_time = end_time - start_time
|
|
13
|
-
print(f"{func.__name__} took {execution_time:0.2f} seconds to execute.")
|
|
14
|
-
return result
|
|
15
|
-
|
|
16
|
-
return wrapper
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
class Clock:
|
|
20
|
-
def __init__(self, start_now=True):
|
|
21
|
-
self.start_time = None
|
|
22
|
-
self.time_table = {}
|
|
23
|
-
self.last_check = None
|
|
24
|
-
if start_now:
|
|
25
|
-
self.start()
|
|
26
|
-
self.pbar_counter = 0
|
|
27
|
-
self.last_print = time.time()
|
|
28
|
-
|
|
29
|
-
def start(self):
|
|
30
|
-
self.start_time = time.time() if self.start_time is None else self.start_time
|
|
31
|
-
self.last_check = self.start_time
|
|
32
|
-
|
|
33
|
-
def since_start(self):
|
|
34
|
-
if self.start_time is None:
|
|
35
|
-
raise ValueError("Clock has not been started.")
|
|
36
|
-
return time.time() - self.start_time
|
|
37
|
-
|
|
38
|
-
def log(self, custom_logger=None):
|
|
39
|
-
msg = f"Time elapsed: {self.since_start():.2f} seconds."
|
|
40
|
-
if custom_logger:
|
|
41
|
-
custom_logger(msg)
|
|
42
|
-
else:
|
|
43
|
-
logger.info(msg)
|
|
44
|
-
|
|
45
|
-
def since_last_check(self):
|
|
46
|
-
now = time.time()
|
|
47
|
-
elapsed = now - self.last_check
|
|
48
|
-
self.last_check = now
|
|
49
|
-
return elapsed
|
|
50
|
-
|
|
51
|
-
def update(self, name):
|
|
52
|
-
if not name in self.time_table:
|
|
53
|
-
self.time_table[name] = 0
|
|
54
|
-
self.time_table[name] += self.since_last_check()
|
|
55
|
-
|
|
56
|
-
def print_table(self, every=1):
|
|
57
|
-
now = time.time()
|
|
58
|
-
if now - self.last_print > every:
|
|
59
|
-
self.pbar_counter += 1
|
|
60
|
-
total_time = sum(self.time_table.values())
|
|
61
|
-
desc = "Time table: "
|
|
62
|
-
for name, t in self.time_table.items():
|
|
63
|
-
percentage = (t / total_time) * 100
|
|
64
|
-
desc += "{}: avg_time: {:.2f} s ({:.2f}%), total: {} s".format(
|
|
65
|
-
name, t, percentage, total_time
|
|
66
|
-
)
|
|
67
|
-
logger.info(desc)
|
|
68
|
-
self.last_print = now
|
speedy/common/utils_cache.py
DELETED
|
@@ -1,170 +0,0 @@
|
|
|
1
|
-
# utils/utils_cache.py
|
|
2
|
-
|
|
3
|
-
import functools
|
|
4
|
-
import inspect
|
|
5
|
-
import os
|
|
6
|
-
import os.path as osp
|
|
7
|
-
import pickle
|
|
8
|
-
import traceback
|
|
9
|
-
from typing import Any, Callable, Dict, List, Optional
|
|
10
|
-
|
|
11
|
-
import xxhash
|
|
12
|
-
from loguru import logger
|
|
13
|
-
|
|
14
|
-
from .utils_io import dump_json_or_pickle, load_json_or_pickle
|
|
15
|
-
from .utils_misc import mkdir_or_exist
|
|
16
|
-
|
|
17
|
-
SPEED_CACHE_DIR = osp.join(osp.expanduser("~"), ".cache/av")
|
|
18
|
-
ICACHE: Dict[str, Any] = {}
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
def identify(x: Any) -> str:
|
|
22
|
-
"""Return an hex digest of the input."""
|
|
23
|
-
return xxhash.xxh64(pickle.dumps(x), seed=0).hexdigest()
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
def memoize(
|
|
27
|
-
func: Callable,
|
|
28
|
-
ignore_self: bool = True,
|
|
29
|
-
cache_dir: str = SPEED_CACHE_DIR,
|
|
30
|
-
cache_type: str = ".pkl",
|
|
31
|
-
verbose: bool = False,
|
|
32
|
-
cache_key: Optional[str] = None,
|
|
33
|
-
) -> Callable:
|
|
34
|
-
"""Cache result of function call on disk."""
|
|
35
|
-
assert cache_type in [".pkl", ".json"]
|
|
36
|
-
if os.environ.get("AV_MEMOIZE_DISABLE", "0") == "1":
|
|
37
|
-
logger.info("Memoize is disabled")
|
|
38
|
-
return func
|
|
39
|
-
|
|
40
|
-
@functools.wraps(func)
|
|
41
|
-
def memoized_func(*args, **kwargs):
|
|
42
|
-
try:
|
|
43
|
-
arg_names = inspect.getfullargspec(func).args
|
|
44
|
-
func_source = inspect.getsource(func).replace(" ", "")
|
|
45
|
-
if cache_key is not None:
|
|
46
|
-
logger.info(f"Use cache_key={kwargs[cache_key]}")
|
|
47
|
-
fid = [func_source, kwargs[cache_key]]
|
|
48
|
-
func_id = identify(fid)
|
|
49
|
-
elif len(arg_names) > 0 and arg_names[0] == "self" and ignore_self:
|
|
50
|
-
func_id = identify((func_source, args[1:], kwargs))
|
|
51
|
-
else:
|
|
52
|
-
func_id = identify((func_source, args, kwargs))
|
|
53
|
-
|
|
54
|
-
cache_path = osp.join(
|
|
55
|
-
cache_dir, "funcs", func.__name__, f"{func_id}{cache_type}"
|
|
56
|
-
)
|
|
57
|
-
mkdir_or_exist(os.path.dirname(cache_path))
|
|
58
|
-
if osp.exists(cache_path):
|
|
59
|
-
if verbose:
|
|
60
|
-
logger.info(f"Load from cache file: {cache_path}")
|
|
61
|
-
result = load_json_or_pickle(cache_path)
|
|
62
|
-
else:
|
|
63
|
-
result = func(*args, **kwargs)
|
|
64
|
-
dump_json_or_pickle(result, cache_path)
|
|
65
|
-
return result
|
|
66
|
-
except Exception as e:
|
|
67
|
-
traceback.print_exc()
|
|
68
|
-
logger.warning(f"Exception: {e}, using default function call")
|
|
69
|
-
return func(*args, **kwargs)
|
|
70
|
-
|
|
71
|
-
return memoized_func
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
def imemoize(func: Callable) -> Callable:
|
|
75
|
-
"""Memoize a function into memory."""
|
|
76
|
-
|
|
77
|
-
@functools.wraps(func)
|
|
78
|
-
def _f(*args, **kwargs):
|
|
79
|
-
ident_name = identify((inspect.getsource(func), args, kwargs))
|
|
80
|
-
try:
|
|
81
|
-
return ICACHE[ident_name]
|
|
82
|
-
except KeyError:
|
|
83
|
-
result = func(*args, **kwargs)
|
|
84
|
-
ICACHE[ident_name] = result
|
|
85
|
-
return result
|
|
86
|
-
|
|
87
|
-
return _f
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
def imemoize_v2(keys: List[str]) -> Callable:
|
|
91
|
-
"""Memoize a function into memory based on specified keys."""
|
|
92
|
-
|
|
93
|
-
def decorator(func: Callable) -> Callable:
|
|
94
|
-
@functools.wraps(func)
|
|
95
|
-
def wrapper(*args, **kwargs):
|
|
96
|
-
arg_names = inspect.getfullargspec(func).args
|
|
97
|
-
args_dict = dict(zip(arg_names, args))
|
|
98
|
-
all_args = {**args_dict, **kwargs}
|
|
99
|
-
key_values = {key: all_args[key] for key in keys if key in all_args}
|
|
100
|
-
if not key_values:
|
|
101
|
-
return func(*args, **kwargs)
|
|
102
|
-
|
|
103
|
-
ident_name = identify((func.__name__, tuple(sorted(key_values.items()))))
|
|
104
|
-
try:
|
|
105
|
-
return ICACHE[ident_name]
|
|
106
|
-
except KeyError:
|
|
107
|
-
result = func(*args, **kwargs)
|
|
108
|
-
ICACHE[ident_name] = result
|
|
109
|
-
return result
|
|
110
|
-
|
|
111
|
-
return wrapper
|
|
112
|
-
|
|
113
|
-
return decorator
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
def memoize_v2(keys: List[str], cache_dir: str = SPEED_CACHE_DIR) -> Callable:
|
|
117
|
-
"""Decorator to memoize function results based on specific keys."""
|
|
118
|
-
|
|
119
|
-
def decorator(func: Callable) -> Callable:
|
|
120
|
-
@functools.wraps(func)
|
|
121
|
-
def wrapper(*args, **kwargs):
|
|
122
|
-
args_key_values = {}
|
|
123
|
-
for i, arg in enumerate(args):
|
|
124
|
-
arg_name = inspect.getfullargspec(func).args[i]
|
|
125
|
-
args_key_values[arg_name] = arg
|
|
126
|
-
args_key_values.update(kwargs)
|
|
127
|
-
|
|
128
|
-
values = [args_key_values[key] for key in keys if key in args_key_values]
|
|
129
|
-
if not values:
|
|
130
|
-
return func(*args, **kwargs)
|
|
131
|
-
|
|
132
|
-
key_id = identify(values)
|
|
133
|
-
func_source = inspect.getsource(func).replace(" ", "")
|
|
134
|
-
func_id = identify(func_source)
|
|
135
|
-
key_names = "_".join(keys)
|
|
136
|
-
cache_path = osp.join(
|
|
137
|
-
cache_dir, f"{func.__name__}_{func_id}", f"{key_names}_{key_id}.pkl"
|
|
138
|
-
)
|
|
139
|
-
if osp.exists(cache_path):
|
|
140
|
-
return load_json_or_pickle(cache_path)
|
|
141
|
-
result = func(*args, **kwargs)
|
|
142
|
-
dump_json_or_pickle(result, cache_path)
|
|
143
|
-
return result
|
|
144
|
-
|
|
145
|
-
return wrapper
|
|
146
|
-
|
|
147
|
-
return decorator
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
def memoize_method(method):
|
|
151
|
-
"""
|
|
152
|
-
Decorator function to memoize (cache) results of a class method.
|
|
153
|
-
|
|
154
|
-
This decorator caches the output of the wrapped method based on its input arguments
|
|
155
|
-
(both positional and keyword). If the method is called again with the same arguments,
|
|
156
|
-
it returns the cached result instead of executing the method again.
|
|
157
|
-
|
|
158
|
-
Args:
|
|
159
|
-
method (Callable): The decorated method whose result will be memoized.
|
|
160
|
-
"""
|
|
161
|
-
cache = {}
|
|
162
|
-
|
|
163
|
-
def cached_method(cls, *args, **kwargs):
|
|
164
|
-
cache_key = identify([args, kwargs])
|
|
165
|
-
logger.debug("HIT" if cache_key in cache else "MISS")
|
|
166
|
-
if cache_key not in cache:
|
|
167
|
-
cache[cache_key] = method(cls, *args, **kwargs)
|
|
168
|
-
return cache[cache_key]
|
|
169
|
-
|
|
170
|
-
return cached_method
|