structai 0.1.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
structai/io.py ADDED
@@ -0,0 +1,227 @@
1
+ import os
2
+ import json
3
+ import pickle
4
+
5
+
6
+ def load_file(path):
7
+ """
8
+ Automatically reads files based on their file extensions:
9
+ Supported formats: json, jsonl, csv, txt, md, pkl, parquet, py, npy, pt, png, jpg
10
+ """
11
+ ext = os.path.splitext(path)[1].lower()
12
+
13
+ if ext == ".json":
14
+ with open(path, "r", encoding="utf-8") as f:
15
+ return json.load(f)
16
+
17
+ elif ext == ".jsonl":
18
+ with open(path, "r", encoding="utf-8") as f:
19
+ return [json.loads(line) for line in f]
20
+
21
+ elif ext == ".csv":
22
+ import pandas as pd
23
+ return pd.read_csv(path)
24
+
25
+ elif ext == ".txt":
26
+ with open(path, "r", encoding="utf-8") as f:
27
+ return f.read()
28
+
29
+ elif ext == ".md":
30
+ with open(path, "r", encoding="utf-8") as f:
31
+ return f.read()
32
+
33
+ elif ext == ".pkl":
34
+ with open(path, "rb") as f:
35
+ return pickle.load(f)
36
+
37
+ elif ext == ".parquet":
38
+ import pandas as pd
39
+ return pd.read_parquet(path)
40
+
41
+ elif ext == ".xlsx":
42
+ import pandas as pd
43
+ return pd.read_excel(path)
44
+
45
+ elif ext == ".py":
46
+ with open(path, "r", encoding="utf-8") as f:
47
+ return f.read()
48
+
49
+ elif ext == ".npy":
50
+ import numpy as np
51
+ return np.load(path)
52
+
53
+ elif ext == ".pt":
54
+ import torch
55
+ return torch.load(path)
56
+
57
+ elif ext in [".png", ".jpg", ".jpeg"]:
58
+ from PIL import Image
59
+ return Image.open(path)
60
+
61
+ else:
62
+ raise ValueError(f"Unsupported file format: {ext}")
63
+
64
+
65
+ def save_file(data, path):
66
+ """
67
+ Automatically save files based on their file extensions:
68
+ Supported formats: json, jsonl, csv, txt, md, pkl, parquet, py, npy, pt, png, jpg
69
+ """
70
+ # Ensure directory exists
71
+ os.makedirs(os.path.dirname(os.path.abspath(path)), exist_ok=True)
72
+
73
+ ext = os.path.splitext(path)[1].lower()
74
+
75
+ # JSON
76
+ if ext == ".json":
77
+ with open(path, "w", encoding="utf-8") as f:
78
+ json.dump(data, f, ensure_ascii=False, indent=4)
79
+
80
+ # JSON Lines
81
+ elif ext == ".jsonl":
82
+ with open(path, "w", encoding="utf-8") as f:
83
+ for item in data:
84
+ f.write(json.dumps(item, ensure_ascii=False) + "\n")
85
+
86
+ # CSV
87
+ elif ext == ".csv":
88
+ import pandas as pd
89
+ if isinstance(data, pd.DataFrame):
90
+ data.to_csv(path, index=False)
91
+ else:
92
+ raise ValueError("Saving CSV requires a pandas DataFrame.")
93
+
94
+ # TXT
95
+ elif ext == ".txt":
96
+ with open(path, "w", encoding="utf-8") as f:
97
+ f.write(str(data))
98
+
99
+ # Markdown
100
+ elif ext == ".md":
101
+ with open(path, "w", encoding="utf-8") as f:
102
+ f.write(str(data))
103
+
104
+ # Pickle
105
+ elif ext == ".pkl":
106
+ with open(path, "wb") as f:
107
+ pickle.dump(data, f)
108
+
109
+ # Parquet
110
+ elif ext == ".parquet":
111
+ import pandas as pd
112
+ if isinstance(data, pd.DataFrame):
113
+ data.to_parquet(path, index=False)
114
+ else:
115
+ raise ValueError("Saving parquet requires a pandas DataFrame.")
116
+
117
+ # Excel
118
+ elif ext == ".xlsx":
119
+ import pandas as pd
120
+ if isinstance(data, pd.DataFrame):
121
+ data.to_excel(path, index=False)
122
+ else:
123
+ raise ValueError("Saving Excel requires a pandas DataFrame.")
124
+
125
+ # Python script
126
+ elif ext == ".py":
127
+ with open(path, "w", encoding="utf-8") as f:
128
+ f.write(str(data))
129
+
130
+ # Numpy array
131
+ elif ext == ".npy":
132
+ import numpy as np
133
+ np.save(path, data)
134
+
135
+ # PyTorch tensor/model
136
+ elif ext == ".pt":
137
+ import torch
138
+ torch.save(data, path)
139
+
140
+ # Image
141
+ elif ext in [".png", ".jpg", ".jpeg"]:
142
+ import numpy as np
143
+ from PIL import Image
144
+ if isinstance(data, np.ndarray):
145
+ Image.fromarray(data).save(path)
146
+ else:
147
+ data.save(path)
148
+
149
+ else:
150
+ raise ValueError(f"Unsupported file format: {ext}")
151
+
152
+
153
+ def print_once(msg):
154
+ if not hasattr(print_once, "_printed"):
155
+ print(msg)
156
+ print_once._printed = True
157
+
158
+
159
+ def make_print_once():
160
+ printed = False
161
+
162
+ def inner(msg):
163
+ nonlocal printed
164
+ if not printed:
165
+ print(msg)
166
+ printed = True
167
+ return inner
168
+
169
+
170
+ if __name__ == "__main__":
171
+ # python -m structai.io
172
+ print("Testing io.py...")
173
+
174
+ # Test data
175
+ test_dict = {"key": "value 🌍", "num": 123}
176
+ test_list = [{"a": 1}, {"b": 2}]
177
+ test_str = "Hello World"
178
+
179
+ # Define paths
180
+ base_path = "test_io_temp"
181
+ json_path = f"{base_path}.json"
182
+ jsonl_path = f"{base_path}.jsonl"
183
+ txt_path = f"{base_path}.txt"
184
+ pkl_path = f"{base_path}.pkl"
185
+
186
+ try:
187
+ # Test JSON
188
+ save_file(test_dict, json_path)
189
+ loaded_dict = load_file(json_path)
190
+ assert loaded_dict == test_dict, f"[===ERROR===][structai][io.py][main] JSON mismatch: {loaded_dict} != {test_dict}"
191
+ print("JSON test passed")
192
+
193
+ # Test JSONL
194
+ save_file(test_list, jsonl_path)
195
+ loaded_list = load_file(jsonl_path)
196
+ assert loaded_list == test_list, f"[===ERROR===][structai][io.py][main] JSONL mismatch: {loaded_list} != {test_list}"
197
+ print("JSONL test passed")
198
+
199
+ # Test TXT
200
+ save_file(test_str, txt_path)
201
+ loaded_str = load_file(txt_path)
202
+ assert loaded_str == test_str, f"[===ERROR===][structai][io.py][main] TXT mismatch: {loaded_str} != {test_str}"
203
+ print("TXT test passed")
204
+
205
+ # Test Pickle
206
+ save_file(test_dict, pkl_path)
207
+ loaded_pkl = load_file(pkl_path)
208
+ assert loaded_pkl == test_dict, f"[===ERROR===][structai][io.py][main] Pickle mismatch: {loaded_pkl} != {test_dict}"
209
+ print("Pickle test passed")
210
+
211
+ # Test print_once
212
+ print("Testing print_once (should see 'Hello Once' only once):")
213
+ print_once("Hello Once")
214
+ print_once("Hello Once")
215
+
216
+ po = make_print_once()
217
+ print("Testing make_print_once (should see 'Hello Again' only once):")
218
+ po("Hello Again")
219
+ po("Hello Again")
220
+
221
+ finally:
222
+ # Cleanup
223
+ for p in [json_path, jsonl_path, txt_path, pkl_path]:
224
+ if os.path.exists(p):
225
+ os.remove(p)
226
+
227
+ print("io.py tests completed.")