structai 0.1.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- structai/__init__.py +586 -0
- structai/io.py +227 -0
- structai/llm_api.py +713 -0
- structai/mp.py +96 -0
- structai/openai_server.py +79 -0
- structai/utils.py +209 -0
- structai-0.1.6.dist-info/METADATA +601 -0
- structai-0.1.6.dist-info/RECORD +11 -0
- structai-0.1.6.dist-info/WHEEL +5 -0
- structai-0.1.6.dist-info/licenses/LICENSE +21 -0
- structai-0.1.6.dist-info/top_level.txt +1 -0
structai/io.py
ADDED
|
@@ -0,0 +1,227 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import json
|
|
3
|
+
import pickle
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def load_file(path):
|
|
7
|
+
"""
|
|
8
|
+
Automatically reads files based on their file extensions:
|
|
9
|
+
Supported formats: json, jsonl, csv, txt, md, pkl, parquet, py, npy, pt, png, jpg
|
|
10
|
+
"""
|
|
11
|
+
ext = os.path.splitext(path)[1].lower()
|
|
12
|
+
|
|
13
|
+
if ext == ".json":
|
|
14
|
+
with open(path, "r", encoding="utf-8") as f:
|
|
15
|
+
return json.load(f)
|
|
16
|
+
|
|
17
|
+
elif ext == ".jsonl":
|
|
18
|
+
with open(path, "r", encoding="utf-8") as f:
|
|
19
|
+
return [json.loads(line) for line in f]
|
|
20
|
+
|
|
21
|
+
elif ext == ".csv":
|
|
22
|
+
import pandas as pd
|
|
23
|
+
return pd.read_csv(path)
|
|
24
|
+
|
|
25
|
+
elif ext == ".txt":
|
|
26
|
+
with open(path, "r", encoding="utf-8") as f:
|
|
27
|
+
return f.read()
|
|
28
|
+
|
|
29
|
+
elif ext == ".md":
|
|
30
|
+
with open(path, "r", encoding="utf-8") as f:
|
|
31
|
+
return f.read()
|
|
32
|
+
|
|
33
|
+
elif ext == ".pkl":
|
|
34
|
+
with open(path, "rb") as f:
|
|
35
|
+
return pickle.load(f)
|
|
36
|
+
|
|
37
|
+
elif ext == ".parquet":
|
|
38
|
+
import pandas as pd
|
|
39
|
+
return pd.read_parquet(path)
|
|
40
|
+
|
|
41
|
+
elif ext == ".xlsx":
|
|
42
|
+
import pandas as pd
|
|
43
|
+
return pd.read_excel(path)
|
|
44
|
+
|
|
45
|
+
elif ext == ".py":
|
|
46
|
+
with open(path, "r", encoding="utf-8") as f:
|
|
47
|
+
return f.read()
|
|
48
|
+
|
|
49
|
+
elif ext == ".npy":
|
|
50
|
+
import numpy as np
|
|
51
|
+
return np.load(path)
|
|
52
|
+
|
|
53
|
+
elif ext == ".pt":
|
|
54
|
+
import torch
|
|
55
|
+
return torch.load(path)
|
|
56
|
+
|
|
57
|
+
elif ext in [".png", ".jpg", ".jpeg"]:
|
|
58
|
+
from PIL import Image
|
|
59
|
+
return Image.open(path)
|
|
60
|
+
|
|
61
|
+
else:
|
|
62
|
+
raise ValueError(f"Unsupported file format: {ext}")
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def save_file(data, path):
|
|
66
|
+
"""
|
|
67
|
+
Automatically save files based on their file extensions:
|
|
68
|
+
Supported formats: json, jsonl, csv, txt, md, pkl, parquet, py, npy, pt, png, jpg
|
|
69
|
+
"""
|
|
70
|
+
# Ensure directory exists
|
|
71
|
+
os.makedirs(os.path.dirname(os.path.abspath(path)), exist_ok=True)
|
|
72
|
+
|
|
73
|
+
ext = os.path.splitext(path)[1].lower()
|
|
74
|
+
|
|
75
|
+
# JSON
|
|
76
|
+
if ext == ".json":
|
|
77
|
+
with open(path, "w", encoding="utf-8") as f:
|
|
78
|
+
json.dump(data, f, ensure_ascii=False, indent=4)
|
|
79
|
+
|
|
80
|
+
# JSON Lines
|
|
81
|
+
elif ext == ".jsonl":
|
|
82
|
+
with open(path, "w", encoding="utf-8") as f:
|
|
83
|
+
for item in data:
|
|
84
|
+
f.write(json.dumps(item, ensure_ascii=False) + "\n")
|
|
85
|
+
|
|
86
|
+
# CSV
|
|
87
|
+
elif ext == ".csv":
|
|
88
|
+
import pandas as pd
|
|
89
|
+
if isinstance(data, pd.DataFrame):
|
|
90
|
+
data.to_csv(path, index=False)
|
|
91
|
+
else:
|
|
92
|
+
raise ValueError("Saving CSV requires a pandas DataFrame.")
|
|
93
|
+
|
|
94
|
+
# TXT
|
|
95
|
+
elif ext == ".txt":
|
|
96
|
+
with open(path, "w", encoding="utf-8") as f:
|
|
97
|
+
f.write(str(data))
|
|
98
|
+
|
|
99
|
+
# Markdown
|
|
100
|
+
elif ext == ".md":
|
|
101
|
+
with open(path, "w", encoding="utf-8") as f:
|
|
102
|
+
f.write(str(data))
|
|
103
|
+
|
|
104
|
+
# Pickle
|
|
105
|
+
elif ext == ".pkl":
|
|
106
|
+
with open(path, "wb") as f:
|
|
107
|
+
pickle.dump(data, f)
|
|
108
|
+
|
|
109
|
+
# Parquet
|
|
110
|
+
elif ext == ".parquet":
|
|
111
|
+
import pandas as pd
|
|
112
|
+
if isinstance(data, pd.DataFrame):
|
|
113
|
+
data.to_parquet(path, index=False)
|
|
114
|
+
else:
|
|
115
|
+
raise ValueError("Saving parquet requires a pandas DataFrame.")
|
|
116
|
+
|
|
117
|
+
# Excel
|
|
118
|
+
elif ext == ".xlsx":
|
|
119
|
+
import pandas as pd
|
|
120
|
+
if isinstance(data, pd.DataFrame):
|
|
121
|
+
data.to_excel(path, index=False)
|
|
122
|
+
else:
|
|
123
|
+
raise ValueError("Saving Excel requires a pandas DataFrame.")
|
|
124
|
+
|
|
125
|
+
# Python script
|
|
126
|
+
elif ext == ".py":
|
|
127
|
+
with open(path, "w", encoding="utf-8") as f:
|
|
128
|
+
f.write(str(data))
|
|
129
|
+
|
|
130
|
+
# Numpy array
|
|
131
|
+
elif ext == ".npy":
|
|
132
|
+
import numpy as np
|
|
133
|
+
np.save(path, data)
|
|
134
|
+
|
|
135
|
+
# PyTorch tensor/model
|
|
136
|
+
elif ext == ".pt":
|
|
137
|
+
import torch
|
|
138
|
+
torch.save(data, path)
|
|
139
|
+
|
|
140
|
+
# Image
|
|
141
|
+
elif ext in [".png", ".jpg", ".jpeg"]:
|
|
142
|
+
import numpy as np
|
|
143
|
+
from PIL import Image
|
|
144
|
+
if isinstance(data, np.ndarray):
|
|
145
|
+
Image.fromarray(data).save(path)
|
|
146
|
+
else:
|
|
147
|
+
data.save(path)
|
|
148
|
+
|
|
149
|
+
else:
|
|
150
|
+
raise ValueError(f"Unsupported file format: {ext}")
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
def print_once(msg):
|
|
154
|
+
if not hasattr(print_once, "_printed"):
|
|
155
|
+
print(msg)
|
|
156
|
+
print_once._printed = True
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
def make_print_once():
|
|
160
|
+
printed = False
|
|
161
|
+
|
|
162
|
+
def inner(msg):
|
|
163
|
+
nonlocal printed
|
|
164
|
+
if not printed:
|
|
165
|
+
print(msg)
|
|
166
|
+
printed = True
|
|
167
|
+
return inner
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
if __name__ == "__main__":
|
|
171
|
+
# python -m structai.io
|
|
172
|
+
print("Testing io.py...")
|
|
173
|
+
|
|
174
|
+
# Test data
|
|
175
|
+
test_dict = {"key": "value 🌍", "num": 123}
|
|
176
|
+
test_list = [{"a": 1}, {"b": 2}]
|
|
177
|
+
test_str = "Hello World"
|
|
178
|
+
|
|
179
|
+
# Define paths
|
|
180
|
+
base_path = "test_io_temp"
|
|
181
|
+
json_path = f"{base_path}.json"
|
|
182
|
+
jsonl_path = f"{base_path}.jsonl"
|
|
183
|
+
txt_path = f"{base_path}.txt"
|
|
184
|
+
pkl_path = f"{base_path}.pkl"
|
|
185
|
+
|
|
186
|
+
try:
|
|
187
|
+
# Test JSON
|
|
188
|
+
save_file(test_dict, json_path)
|
|
189
|
+
loaded_dict = load_file(json_path)
|
|
190
|
+
assert loaded_dict == test_dict, f"[===ERROR===][structai][io.py][main] JSON mismatch: {loaded_dict} != {test_dict}"
|
|
191
|
+
print("JSON test passed")
|
|
192
|
+
|
|
193
|
+
# Test JSONL
|
|
194
|
+
save_file(test_list, jsonl_path)
|
|
195
|
+
loaded_list = load_file(jsonl_path)
|
|
196
|
+
assert loaded_list == test_list, f"[===ERROR===][structai][io.py][main] JSONL mismatch: {loaded_list} != {test_list}"
|
|
197
|
+
print("JSONL test passed")
|
|
198
|
+
|
|
199
|
+
# Test TXT
|
|
200
|
+
save_file(test_str, txt_path)
|
|
201
|
+
loaded_str = load_file(txt_path)
|
|
202
|
+
assert loaded_str == test_str, f"[===ERROR===][structai][io.py][main] TXT mismatch: {loaded_str} != {test_str}"
|
|
203
|
+
print("TXT test passed")
|
|
204
|
+
|
|
205
|
+
# Test Pickle
|
|
206
|
+
save_file(test_dict, pkl_path)
|
|
207
|
+
loaded_pkl = load_file(pkl_path)
|
|
208
|
+
assert loaded_pkl == test_dict, f"[===ERROR===][structai][io.py][main] Pickle mismatch: {loaded_pkl} != {test_dict}"
|
|
209
|
+
print("Pickle test passed")
|
|
210
|
+
|
|
211
|
+
# Test print_once
|
|
212
|
+
print("Testing print_once (should see 'Hello Once' only once):")
|
|
213
|
+
print_once("Hello Once")
|
|
214
|
+
print_once("Hello Once")
|
|
215
|
+
|
|
216
|
+
po = make_print_once()
|
|
217
|
+
print("Testing make_print_once (should see 'Hello Again' only once):")
|
|
218
|
+
po("Hello Again")
|
|
219
|
+
po("Hello Again")
|
|
220
|
+
|
|
221
|
+
finally:
|
|
222
|
+
# Cleanup
|
|
223
|
+
for p in [json_path, jsonl_path, txt_path, pkl_path]:
|
|
224
|
+
if os.path.exists(p):
|
|
225
|
+
os.remove(p)
|
|
226
|
+
|
|
227
|
+
print("io.py tests completed.")
|