speedy-utils 1.0.3__tar.gz → 1.0.5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- speedy_utils-1.0.5/PKG-INFO +279 -0
- speedy_utils-1.0.5/README.md +241 -0
- speedy_utils-1.0.5/pyproject.toml +63 -0
- speedy_utils-1.0.5/src/llm_utils/__init__.py +29 -0
- speedy_utils-1.0.5/src/llm_utils/chat_format.py +427 -0
- speedy_utils-1.0.5/src/llm_utils/group_messages.py +120 -0
- speedy_utils-1.0.5/src/llm_utils/lm/__init__.py +8 -0
- speedy_utils-1.0.5/src/llm_utils/lm/base_lm.py +304 -0
- speedy_utils-1.0.5/src/llm_utils/lm/utils.py +130 -0
- speedy_utils-1.0.5/src/llm_utils/scripts/vllm_load_balancer.py +353 -0
- speedy_utils-1.0.5/src/llm_utils/scripts/vllm_serve.py +416 -0
- speedy_utils-1.0.5/src/speedy_utils/__init__.py +85 -0
- speedy_utils-1.0.5/src/speedy_utils/all.py +159 -0
- {speedy_utils-1.0.3/speedy → speedy_utils-1.0.5/src/speedy_utils}/common/__init__.py +0 -0
- speedy_utils-1.0.5/src/speedy_utils/common/clock.py +215 -0
- speedy_utils-1.0.5/src/speedy_utils/common/function_decorator.py +66 -0
- speedy_utils-1.0.5/src/speedy_utils/common/logger.py +207 -0
- speedy_utils-1.0.5/src/speedy_utils/common/report_manager.py +112 -0
- speedy_utils-1.0.5/src/speedy_utils/common/utils_cache.py +264 -0
- {speedy_utils-1.0.3/speedy → speedy_utils-1.0.5/src/speedy_utils}/common/utils_io.py +66 -19
- {speedy_utils-1.0.3/speedy → speedy_utils-1.0.5/src/speedy_utils}/common/utils_misc.py +25 -11
- speedy_utils-1.0.5/src/speedy_utils/common/utils_print.py +216 -0
- speedy_utils-1.0.5/src/speedy_utils/multi_worker/__init__.py +0 -0
- speedy_utils-1.0.5/src/speedy_utils/multi_worker/process.py +198 -0
- speedy_utils-1.0.5/src/speedy_utils/multi_worker/thread.py +327 -0
- speedy_utils-1.0.5/src/speedy_utils/scripts/mpython.py +108 -0
- speedy_utils-1.0.3/PKG-INFO +0 -21
- speedy_utils-1.0.3/pyproject.toml +0 -3
- speedy_utils-1.0.3/setup.cfg +0 -4
- speedy_utils-1.0.3/setup.py +0 -28
- speedy_utils-1.0.3/speedy/__init__.py +0 -53
- speedy_utils-1.0.3/speedy/common/clock.py +0 -68
- speedy_utils-1.0.3/speedy/common/utils_cache.py +0 -170
- speedy_utils-1.0.3/speedy/common/utils_print.py +0 -138
- speedy_utils-1.0.3/speedy/multi_worker.py +0 -121
- speedy_utils-1.0.3/speedy_utils.egg-info/PKG-INFO +0 -21
- speedy_utils-1.0.3/speedy_utils.egg-info/SOURCES.txt +0 -15
- speedy_utils-1.0.3/speedy_utils.egg-info/dependency_links.txt +0 -1
- speedy_utils-1.0.3/speedy_utils.egg-info/requires.txt +0 -14
- speedy_utils-1.0.3/speedy_utils.egg-info/top_level.txt +0 -1
|
@@ -0,0 +1,279 @@
|
|
|
1
|
+
Metadata-Version: 2.3
|
|
2
|
+
Name: speedy-utils
|
|
3
|
+
Version: 1.0.5
|
|
4
|
+
Summary: Fast and easy-to-use package for data science
|
|
5
|
+
Author: AnhVTH
|
|
6
|
+
Author-email: anhvth.226@gmail.com
|
|
7
|
+
Requires-Python: >=3.9
|
|
8
|
+
Classifier: Programming Language :: Python :: 3
|
|
9
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
10
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
11
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
14
|
+
Requires-Dist: bump2version
|
|
15
|
+
Requires-Dist: cachetools
|
|
16
|
+
Requires-Dist: debugpy
|
|
17
|
+
Requires-Dist: fastcore
|
|
18
|
+
Requires-Dist: fastprogress
|
|
19
|
+
Requires-Dist: freezegun (>=1.5.1,<2.0.0)
|
|
20
|
+
Requires-Dist: ipdb
|
|
21
|
+
Requires-Dist: ipywidgets
|
|
22
|
+
Requires-Dist: json-repair (>=0.40.0,<0.41.0)
|
|
23
|
+
Requires-Dist: jupyterlab
|
|
24
|
+
Requires-Dist: loguru
|
|
25
|
+
Requires-Dist: matplotlib
|
|
26
|
+
Requires-Dist: numpy
|
|
27
|
+
Requires-Dist: packaging (>=23.2,<25)
|
|
28
|
+
Requires-Dist: pandas
|
|
29
|
+
Requires-Dist: pydantic
|
|
30
|
+
Requires-Dist: requests
|
|
31
|
+
Requires-Dist: scikit-learn
|
|
32
|
+
Requires-Dist: tabulate
|
|
33
|
+
Requires-Dist: tqdm
|
|
34
|
+
Requires-Dist: xxhash
|
|
35
|
+
Project-URL: Homepage, https://github.com/anhvth/speedy
|
|
36
|
+
Description-Content-Type: text/markdown
|
|
37
|
+
|
|
38
|
+
# Speedy Utils
|
|
39
|
+
|
|
40
|
+

|
|
41
|
+

|
|
42
|
+

|
|
43
|
+
|
|
44
|
+
**Speedy Utils** is a Python utility library designed to streamline common programming tasks such as caching, parallel processing, file I/O, and data manipulation. It provides a collection of decorators, functions, and classes to enhance productivity and performance in your Python projects.
|
|
45
|
+
|
|
46
|
+
## Table of Contents
|
|
47
|
+
|
|
48
|
+
- [Features](#features)
|
|
49
|
+
- [Installation](#installation)
|
|
50
|
+
- [Usage](#usage)
|
|
51
|
+
- [Caching](#caching)
|
|
52
|
+
- [Parallel Processing](#parallel-processing)
|
|
53
|
+
- [File I/O](#file-io)
|
|
54
|
+
- [Data Manipulation](#data-manipulation)
|
|
55
|
+
- [Utility Functions](#utility-functions)
|
|
56
|
+
- [Testing](#testing)
|
|
57
|
+
- [Deployment](#deployment)
|
|
58
|
+
- [Contributing](#contributing)
|
|
59
|
+
- [License](#license)
|
|
60
|
+
|
|
61
|
+
## Features
|
|
62
|
+
|
|
63
|
+
- **Caching Mechanisms**: Disk-based and in-memory caching to optimize function calls.
|
|
64
|
+
- **Parallel Processing**: Multi-threading, multi-processing, and asynchronous multi-threading utilities.
|
|
65
|
+
- **File I/O**: Simplified JSON, JSONL, and pickle file handling with support for various file extensions.
|
|
66
|
+
- **Data Manipulation**: Utilities for flattening lists and dictionaries, converting data types, and more.
|
|
67
|
+
- **Timing Utilities**: Tools to measure and log execution time of functions and processes.
|
|
68
|
+
- **Pretty Printing**: Enhanced printing functions for structured data, including HTML tables for Jupyter notebooks.
|
|
69
|
+
|
|
70
|
+
## Installation
|
|
71
|
+
|
|
72
|
+
You can install **Speedy Utils** via [PyPI](https://pypi.org/project/speedy-utils/) using `pip`:
|
|
73
|
+
|
|
74
|
+
```bash
|
|
75
|
+
uv pip install speedy-utils
|
|
76
|
+
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
Alternatively, install directly from the repository:
|
|
80
|
+
|
|
81
|
+
```bash
|
|
82
|
+
uv pip install git+https://github.com/anhvth/speedy
|
|
83
|
+
cd speedy-utils
|
|
84
|
+
pip install .
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
## Usage
|
|
88
|
+
|
|
89
|
+
Below are examples demonstrating how to utilize various features of **Speedy Utils**.
|
|
90
|
+
|
|
91
|
+
### Caching
|
|
92
|
+
|
|
93
|
+
#### Memoize Decorator
|
|
94
|
+
|
|
95
|
+
Cache the results of function calls to disk to avoid redundant computations.
|
|
96
|
+
|
|
97
|
+
```python
|
|
98
|
+
from speedy_utils import memoize
|
|
99
|
+
|
|
100
|
+
@memoize
|
|
101
|
+
def expensive_function(x):
|
|
102
|
+
# Simulate an expensive computation
|
|
103
|
+
import time
|
|
104
|
+
time.sleep(2)
|
|
105
|
+
return x * x
|
|
106
|
+
|
|
107
|
+
result = expensive_function(4) # Takes ~2 seconds
|
|
108
|
+
result = expensive_function(4) # Retrieved from cache instantly
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
#### In-Memory Memoization
|
|
112
|
+
|
|
113
|
+
Cache function results in memory for faster access within the same runtime.
|
|
114
|
+
|
|
115
|
+
```python
|
|
116
|
+
from speedy_utils import imemoize
|
|
117
|
+
|
|
118
|
+
@imemoize
|
|
119
|
+
def compute_sum(a, b):
|
|
120
|
+
return a + b
|
|
121
|
+
|
|
122
|
+
result = compute_sum(5, 7) # Computed and cached
|
|
123
|
+
result = compute_sum(5, 7) # Retrieved from in-memory cache
|
|
124
|
+
```
|
|
125
|
+
|
|
126
|
+
### Parallel Processing
|
|
127
|
+
|
|
128
|
+
#### Multi-threading
|
|
129
|
+
|
|
130
|
+
Execute functions concurrently using multiple threads. This approach is straightforward and automatically handles both notebook and Python script executions. In a notebook environment, it delegates the running thread to a separate process. If interrupted, it immediately stops this process, avoiding thread dependency issues where threads continue running until all tasks are completed.
|
|
131
|
+
|
|
132
|
+
```python
|
|
133
|
+
from speedy_utils import multi_thread
|
|
134
|
+
|
|
135
|
+
def process_item(item):
|
|
136
|
+
# Your processing logic
|
|
137
|
+
return item * 2
|
|
138
|
+
|
|
139
|
+
items = [1, 2, 3, 4, 5]
|
|
140
|
+
results = multi_thread(process_item, items, workers=3)
|
|
141
|
+
print(results) # [2, 4, 6, 8, 10]
|
|
142
|
+
```
|
|
143
|
+
|
|
144
|
+
### File I/O
|
|
145
|
+
|
|
146
|
+
#### Dumping Data
|
|
147
|
+
|
|
148
|
+
Save data in JSON, JSONL, or pickle formats.
|
|
149
|
+
|
|
150
|
+
```python
|
|
151
|
+
from speedy_utils import dump_json_or_pickle, dump_jsonl
|
|
152
|
+
|
|
153
|
+
data = {"name": "Alice", "age": 30}
|
|
154
|
+
|
|
155
|
+
# Save as JSON
|
|
156
|
+
dump_json_or_pickle(data, "data.json")
|
|
157
|
+
|
|
158
|
+
# Save as JSONL
|
|
159
|
+
dump_jsonl([data, {"name": "Bob", "age": 25}], "data.jsonl")
|
|
160
|
+
|
|
161
|
+
# Save as Pickle
|
|
162
|
+
dump_json_or_pickle(data, "data.pkl")
|
|
163
|
+
```
|
|
164
|
+
|
|
165
|
+
#### Loading Data
|
|
166
|
+
|
|
167
|
+
Load data based on file extensions.
|
|
168
|
+
|
|
169
|
+
```python
|
|
170
|
+
from speedy_utils import load_json_or_pickle, load_by_ext
|
|
171
|
+
|
|
172
|
+
# Load JSON
|
|
173
|
+
data = load_json_or_pickle("data.json")
|
|
174
|
+
|
|
175
|
+
# Load JSONL
|
|
176
|
+
data_list = load_json_or_pickle("data.jsonl")
|
|
177
|
+
|
|
178
|
+
# Load Pickle
|
|
179
|
+
data = load_json_or_pickle("data.pkl")
|
|
180
|
+
|
|
181
|
+
# Load based on extension with parallel processing
|
|
182
|
+
loaded_data = load_by_ext(["data.json", "data.pkl"])
|
|
183
|
+
```
|
|
184
|
+
|
|
185
|
+
### Data Manipulation
|
|
186
|
+
|
|
187
|
+
#### Flattening Lists and Dictionaries
|
|
188
|
+
|
|
189
|
+
```python
|
|
190
|
+
from speedy_utils import flatten_list, flatten_dict
|
|
191
|
+
|
|
192
|
+
nested_list = [[1, 2], [3, 4], [5]]
|
|
193
|
+
flat_list = flatten_list(nested_list)
|
|
194
|
+
print(flat_list) # [1, 2, 3, 4, 5]
|
|
195
|
+
|
|
196
|
+
nested_dict = {"a": {"b": 1, "c": 2}, "d": 3}
|
|
197
|
+
flat_dict = flatten_dict(nested_dict)
|
|
198
|
+
print(flat_dict) # {'a.b': 1, 'a.c': 2, 'd': 3}
|
|
199
|
+
```
|
|
200
|
+
|
|
201
|
+
#### Converting to Built-in Python Types
|
|
202
|
+
|
|
203
|
+
```python
|
|
204
|
+
from speedy_utils import convert_to_builtin_python
|
|
205
|
+
from pydantic import BaseModel
|
|
206
|
+
|
|
207
|
+
class User(BaseModel):
|
|
208
|
+
name: str
|
|
209
|
+
age: int
|
|
210
|
+
|
|
211
|
+
user = User(name="Charlie", age=28)
|
|
212
|
+
builtin_user = convert_to_builtin_python(user)
|
|
213
|
+
print(builtin_user) # {'name': 'Charlie', 'age': 28}
|
|
214
|
+
```
|
|
215
|
+
|
|
216
|
+
### Utility Functions
|
|
217
|
+
|
|
218
|
+
#### Pretty Printing
|
|
219
|
+
|
|
220
|
+
```python
|
|
221
|
+
from speedy_utils import fprint, print_table
|
|
222
|
+
|
|
223
|
+
data = {"name": "Dana", "age": 22, "city": "New York"}
|
|
224
|
+
|
|
225
|
+
# Pretty print as table
|
|
226
|
+
fprint(data)
|
|
227
|
+
|
|
228
|
+
# Print as table using tabulate
|
|
229
|
+
print_table(data)
|
|
230
|
+
```
|
|
231
|
+
|
|
232
|
+
#### Timing Utilities
|
|
233
|
+
|
|
234
|
+
```python
|
|
235
|
+
from speedy_utils import timef, Clock
|
|
236
|
+
|
|
237
|
+
@timef
|
|
238
|
+
def slow_function():
|
|
239
|
+
import time
|
|
240
|
+
time.sleep(3)
|
|
241
|
+
return "Done"
|
|
242
|
+
|
|
243
|
+
result = slow_function() # Prints execution time
|
|
244
|
+
|
|
245
|
+
# Using Clock
|
|
246
|
+
clock = Clock()
|
|
247
|
+
# ... your code ...
|
|
248
|
+
clock.log()
|
|
249
|
+
```
|
|
250
|
+
|
|
251
|
+
## Testing
|
|
252
|
+
|
|
253
|
+
The project includes a comprehensive test suite using `unittest`. To run the tests, execute the following command in the project root directory:
|
|
254
|
+
|
|
255
|
+
```bash
|
|
256
|
+
python test.py
|
|
257
|
+
```
|
|
258
|
+
|
|
259
|
+
Ensure all dependencies are installed before running tests:
|
|
260
|
+
|
|
261
|
+
```bash
|
|
262
|
+
pip install -r requirements.txt
|
|
263
|
+
```
|
|
264
|
+
|
|
265
|
+
|
|
266
|
+
Run the script to parse and display the arguments:
|
|
267
|
+
|
|
268
|
+
```bash
|
|
269
|
+
python speedy_utils/common/dataclass_parser.py
|
|
270
|
+
```
|
|
271
|
+
|
|
272
|
+
Example output:
|
|
273
|
+
|
|
274
|
+
| Field | Value |
|
|
275
|
+
|--------------------|---------------------------------------|
|
|
276
|
+
| from_peft | ./outputs/llm_hn_qw32b/hn_results_r3/ |
|
|
277
|
+
|
|
278
|
+
Please ensure your code adheres to the project's coding standards and includes appropriate tests.
|
|
279
|
+
|
|
@@ -0,0 +1,241 @@
|
|
|
1
|
+
# Speedy Utils
|
|
2
|
+
|
|
3
|
+

|
|
4
|
+

|
|
5
|
+

|
|
6
|
+
|
|
7
|
+
**Speedy Utils** is a Python utility library designed to streamline common programming tasks such as caching, parallel processing, file I/O, and data manipulation. It provides a collection of decorators, functions, and classes to enhance productivity and performance in your Python projects.
|
|
8
|
+
|
|
9
|
+
## Table of Contents
|
|
10
|
+
|
|
11
|
+
- [Features](#features)
|
|
12
|
+
- [Installation](#installation)
|
|
13
|
+
- [Usage](#usage)
|
|
14
|
+
- [Caching](#caching)
|
|
15
|
+
- [Parallel Processing](#parallel-processing)
|
|
16
|
+
- [File I/O](#file-io)
|
|
17
|
+
- [Data Manipulation](#data-manipulation)
|
|
18
|
+
- [Utility Functions](#utility-functions)
|
|
19
|
+
- [Testing](#testing)
|
|
20
|
+
- [Deployment](#deployment)
|
|
21
|
+
- [Contributing](#contributing)
|
|
22
|
+
- [License](#license)
|
|
23
|
+
|
|
24
|
+
## Features
|
|
25
|
+
|
|
26
|
+
- **Caching Mechanisms**: Disk-based and in-memory caching to optimize function calls.
|
|
27
|
+
- **Parallel Processing**: Multi-threading, multi-processing, and asynchronous multi-threading utilities.
|
|
28
|
+
- **File I/O**: Simplified JSON, JSONL, and pickle file handling with support for various file extensions.
|
|
29
|
+
- **Data Manipulation**: Utilities for flattening lists and dictionaries, converting data types, and more.
|
|
30
|
+
- **Timing Utilities**: Tools to measure and log execution time of functions and processes.
|
|
31
|
+
- **Pretty Printing**: Enhanced printing functions for structured data, including HTML tables for Jupyter notebooks.
|
|
32
|
+
|
|
33
|
+
## Installation
|
|
34
|
+
|
|
35
|
+
You can install **Speedy Utils** via [PyPI](https://pypi.org/project/speedy-utils/) using `pip`:
|
|
36
|
+
|
|
37
|
+
```bash
|
|
38
|
+
uv pip install speedy-utils
|
|
39
|
+
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
Alternatively, install directly from the repository:
|
|
43
|
+
|
|
44
|
+
```bash
|
|
45
|
+
uv pip install git+https://github.com/anhvth/speedy
|
|
46
|
+
cd speedy-utils
|
|
47
|
+
pip install .
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
## Usage
|
|
51
|
+
|
|
52
|
+
Below are examples demonstrating how to utilize various features of **Speedy Utils**.
|
|
53
|
+
|
|
54
|
+
### Caching
|
|
55
|
+
|
|
56
|
+
#### Memoize Decorator
|
|
57
|
+
|
|
58
|
+
Cache the results of function calls to disk to avoid redundant computations.
|
|
59
|
+
|
|
60
|
+
```python
|
|
61
|
+
from speedy_utils import memoize
|
|
62
|
+
|
|
63
|
+
@memoize
|
|
64
|
+
def expensive_function(x):
|
|
65
|
+
# Simulate an expensive computation
|
|
66
|
+
import time
|
|
67
|
+
time.sleep(2)
|
|
68
|
+
return x * x
|
|
69
|
+
|
|
70
|
+
result = expensive_function(4) # Takes ~2 seconds
|
|
71
|
+
result = expensive_function(4) # Retrieved from cache instantly
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
#### In-Memory Memoization
|
|
75
|
+
|
|
76
|
+
Cache function results in memory for faster access within the same runtime.
|
|
77
|
+
|
|
78
|
+
```python
|
|
79
|
+
from speedy_utils import imemoize
|
|
80
|
+
|
|
81
|
+
@imemoize
|
|
82
|
+
def compute_sum(a, b):
|
|
83
|
+
return a + b
|
|
84
|
+
|
|
85
|
+
result = compute_sum(5, 7) # Computed and cached
|
|
86
|
+
result = compute_sum(5, 7) # Retrieved from in-memory cache
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
### Parallel Processing
|
|
90
|
+
|
|
91
|
+
#### Multi-threading
|
|
92
|
+
|
|
93
|
+
Execute functions concurrently using multiple threads. This approach is straightforward and automatically handles both notebook and Python script executions. In a notebook environment, it delegates the running thread to a separate process. If interrupted, it immediately stops this process, avoiding thread dependency issues where threads continue running until all tasks are completed.
|
|
94
|
+
|
|
95
|
+
```python
|
|
96
|
+
from speedy_utils import multi_thread
|
|
97
|
+
|
|
98
|
+
def process_item(item):
|
|
99
|
+
# Your processing logic
|
|
100
|
+
return item * 2
|
|
101
|
+
|
|
102
|
+
items = [1, 2, 3, 4, 5]
|
|
103
|
+
results = multi_thread(process_item, items, workers=3)
|
|
104
|
+
print(results) # [2, 4, 6, 8, 10]
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
### File I/O
|
|
108
|
+
|
|
109
|
+
#### Dumping Data
|
|
110
|
+
|
|
111
|
+
Save data in JSON, JSONL, or pickle formats.
|
|
112
|
+
|
|
113
|
+
```python
|
|
114
|
+
from speedy_utils import dump_json_or_pickle, dump_jsonl
|
|
115
|
+
|
|
116
|
+
data = {"name": "Alice", "age": 30}
|
|
117
|
+
|
|
118
|
+
# Save as JSON
|
|
119
|
+
dump_json_or_pickle(data, "data.json")
|
|
120
|
+
|
|
121
|
+
# Save as JSONL
|
|
122
|
+
dump_jsonl([data, {"name": "Bob", "age": 25}], "data.jsonl")
|
|
123
|
+
|
|
124
|
+
# Save as Pickle
|
|
125
|
+
dump_json_or_pickle(data, "data.pkl")
|
|
126
|
+
```
|
|
127
|
+
|
|
128
|
+
#### Loading Data
|
|
129
|
+
|
|
130
|
+
Load data based on file extensions.
|
|
131
|
+
|
|
132
|
+
```python
|
|
133
|
+
from speedy_utils import load_json_or_pickle, load_by_ext
|
|
134
|
+
|
|
135
|
+
# Load JSON
|
|
136
|
+
data = load_json_or_pickle("data.json")
|
|
137
|
+
|
|
138
|
+
# Load JSONL
|
|
139
|
+
data_list = load_json_or_pickle("data.jsonl")
|
|
140
|
+
|
|
141
|
+
# Load Pickle
|
|
142
|
+
data = load_json_or_pickle("data.pkl")
|
|
143
|
+
|
|
144
|
+
# Load based on extension with parallel processing
|
|
145
|
+
loaded_data = load_by_ext(["data.json", "data.pkl"])
|
|
146
|
+
```
|
|
147
|
+
|
|
148
|
+
### Data Manipulation
|
|
149
|
+
|
|
150
|
+
#### Flattening Lists and Dictionaries
|
|
151
|
+
|
|
152
|
+
```python
|
|
153
|
+
from speedy_utils import flatten_list, flatten_dict
|
|
154
|
+
|
|
155
|
+
nested_list = [[1, 2], [3, 4], [5]]
|
|
156
|
+
flat_list = flatten_list(nested_list)
|
|
157
|
+
print(flat_list) # [1, 2, 3, 4, 5]
|
|
158
|
+
|
|
159
|
+
nested_dict = {"a": {"b": 1, "c": 2}, "d": 3}
|
|
160
|
+
flat_dict = flatten_dict(nested_dict)
|
|
161
|
+
print(flat_dict) # {'a.b': 1, 'a.c': 2, 'd': 3}
|
|
162
|
+
```
|
|
163
|
+
|
|
164
|
+
#### Converting to Built-in Python Types
|
|
165
|
+
|
|
166
|
+
```python
|
|
167
|
+
from speedy_utils import convert_to_builtin_python
|
|
168
|
+
from pydantic import BaseModel
|
|
169
|
+
|
|
170
|
+
class User(BaseModel):
|
|
171
|
+
name: str
|
|
172
|
+
age: int
|
|
173
|
+
|
|
174
|
+
user = User(name="Charlie", age=28)
|
|
175
|
+
builtin_user = convert_to_builtin_python(user)
|
|
176
|
+
print(builtin_user) # {'name': 'Charlie', 'age': 28}
|
|
177
|
+
```
|
|
178
|
+
|
|
179
|
+
### Utility Functions
|
|
180
|
+
|
|
181
|
+
#### Pretty Printing
|
|
182
|
+
|
|
183
|
+
```python
|
|
184
|
+
from speedy_utils import fprint, print_table
|
|
185
|
+
|
|
186
|
+
data = {"name": "Dana", "age": 22, "city": "New York"}
|
|
187
|
+
|
|
188
|
+
# Pretty print as table
|
|
189
|
+
fprint(data)
|
|
190
|
+
|
|
191
|
+
# Print as table using tabulate
|
|
192
|
+
print_table(data)
|
|
193
|
+
```
|
|
194
|
+
|
|
195
|
+
#### Timing Utilities
|
|
196
|
+
|
|
197
|
+
```python
|
|
198
|
+
from speedy_utils import timef, Clock
|
|
199
|
+
|
|
200
|
+
@timef
|
|
201
|
+
def slow_function():
|
|
202
|
+
import time
|
|
203
|
+
time.sleep(3)
|
|
204
|
+
return "Done"
|
|
205
|
+
|
|
206
|
+
result = slow_function() # Prints execution time
|
|
207
|
+
|
|
208
|
+
# Using Clock
|
|
209
|
+
clock = Clock()
|
|
210
|
+
# ... your code ...
|
|
211
|
+
clock.log()
|
|
212
|
+
```
|
|
213
|
+
|
|
214
|
+
## Testing
|
|
215
|
+
|
|
216
|
+
The project includes a comprehensive test suite using `unittest`. To run the tests, execute the following command in the project root directory:
|
|
217
|
+
|
|
218
|
+
```bash
|
|
219
|
+
python test.py
|
|
220
|
+
```
|
|
221
|
+
|
|
222
|
+
Ensure all dependencies are installed before running tests:
|
|
223
|
+
|
|
224
|
+
```bash
|
|
225
|
+
pip install -r requirements.txt
|
|
226
|
+
```
|
|
227
|
+
|
|
228
|
+
|
|
229
|
+
Run the script to parse and display the arguments:
|
|
230
|
+
|
|
231
|
+
```bash
|
|
232
|
+
python speedy_utils/common/dataclass_parser.py
|
|
233
|
+
```
|
|
234
|
+
|
|
235
|
+
Example output:
|
|
236
|
+
|
|
237
|
+
| Field | Value |
|
|
238
|
+
|--------------------|---------------------------------------|
|
|
239
|
+
| from_peft | ./outputs/llm_hn_qw32b/hn_results_r3/ |
|
|
240
|
+
|
|
241
|
+
Please ensure your code adheres to the project's coding standards and includes appropriate tests.
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
[tool.poetry]
|
|
2
|
+
name = "speedy-utils"
|
|
3
|
+
version = "1.0.5"
|
|
4
|
+
description = "Fast and easy-to-use package for data science"
|
|
5
|
+
authors = ["AnhVTH <anhvth.226@gmail.com>"]
|
|
6
|
+
readme = "README.md"
|
|
7
|
+
homepage = "https://github.com/anhvth/speedy"
|
|
8
|
+
packages = [
|
|
9
|
+
{ include = "speedy_utils", from = "src" },
|
|
10
|
+
{ include = "llm_utils", from = "src" },
|
|
11
|
+
]
|
|
12
|
+
|
|
13
|
+
[build-system]
|
|
14
|
+
requires = ["poetry-core>=1.0.5"]
|
|
15
|
+
build-backend = "poetry.core.masonry.api"
|
|
16
|
+
|
|
17
|
+
[tool.black]
|
|
18
|
+
line-length = 88
|
|
19
|
+
target-version = ["py310"]
|
|
20
|
+
include = '\.pyi?$'
|
|
21
|
+
|
|
22
|
+
[tool.isort]
|
|
23
|
+
profile = "black"
|
|
24
|
+
line_length = 88
|
|
25
|
+
multi_line_output = 3
|
|
26
|
+
|
|
27
|
+
[tool.mypy]
|
|
28
|
+
python_version = "3.10"
|
|
29
|
+
warn_return_any = true
|
|
30
|
+
warn_unused_configs = true
|
|
31
|
+
disallow_untyped_defs = true
|
|
32
|
+
disallow_incomplete_defs = true
|
|
33
|
+
check_untyped_defs = true
|
|
34
|
+
disallow_untyped_decorators = true
|
|
35
|
+
no_implicit_optional = true
|
|
36
|
+
strict_optional = true
|
|
37
|
+
|
|
38
|
+
[tool.poetry.dependencies]
|
|
39
|
+
python = ">=3.9"
|
|
40
|
+
numpy = "*"
|
|
41
|
+
requests = "*"
|
|
42
|
+
xxhash = "*"
|
|
43
|
+
loguru = "*"
|
|
44
|
+
fastcore = "*"
|
|
45
|
+
debugpy = "*"
|
|
46
|
+
ipywidgets = "*"
|
|
47
|
+
jupyterlab = "*"
|
|
48
|
+
ipdb = "*"
|
|
49
|
+
scikit-learn = "*"
|
|
50
|
+
matplotlib = "*"
|
|
51
|
+
pandas = "*"
|
|
52
|
+
tabulate = "*"
|
|
53
|
+
pydantic = "*"
|
|
54
|
+
tqdm = "*"
|
|
55
|
+
cachetools = "*"
|
|
56
|
+
bump2version = "*"
|
|
57
|
+
json-repair = ">=0.40.0,<0.41.0"
|
|
58
|
+
fastprogress = "*"
|
|
59
|
+
freezegun = "^1.5.1"
|
|
60
|
+
packaging = ">=23.2,<25"
|
|
61
|
+
|
|
62
|
+
[tool.poetry.scripts]
|
|
63
|
+
mpython = "speedy_utils.scripts.mpython:main"
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
from .chat_format import (
|
|
2
|
+
transform_messages,
|
|
3
|
+
transform_messages_to_chatml,
|
|
4
|
+
display_chat_messages_as_html,
|
|
5
|
+
get_conversation_one_turn,
|
|
6
|
+
display_diff_two_string,
|
|
7
|
+
display_conversations,
|
|
8
|
+
build_chatml_input,
|
|
9
|
+
format_msgs,
|
|
10
|
+
)
|
|
11
|
+
from .lm import LM
|
|
12
|
+
from .group_messages import (
|
|
13
|
+
split_indices_by_length,
|
|
14
|
+
group_messages_by_len,
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
__all__ = [
|
|
18
|
+
"transform_messages",
|
|
19
|
+
"transform_messages_to_chatml",
|
|
20
|
+
"display_chat_messages_as_html",
|
|
21
|
+
"get_conversation_one_turn",
|
|
22
|
+
"display_diff_two_string",
|
|
23
|
+
"display_conversations",
|
|
24
|
+
"build_chatml_input",
|
|
25
|
+
"format_msgs",
|
|
26
|
+
"split_indices_by_length",
|
|
27
|
+
"group_messages_by_len",
|
|
28
|
+
"LM",
|
|
29
|
+
]
|