xlin 0.1.8__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
xlin/__init__.py ADDED
File without changes
xlin/ischinese.py ADDED
@@ -0,0 +1,13 @@
1
+ def text_is_all_chinese(test: str):
2
+ for ch in test:
3
+ if '\u4e00' <= ch <= '\u9fff':
4
+ continue
5
+ return False
6
+ return True
7
+
8
+
9
+ def text_contains_chinese(test: str):
10
+ for ch in test:
11
+ if '\u4e00' <= ch <= '\u9fff':
12
+ return True
13
+ return False
xlin/jsonl.py ADDED
@@ -0,0 +1,223 @@
1
+ import json
2
+ from typing import *
3
+
4
+ from pathlib import Path
5
+ from loguru import logger
6
+ import pandas as pd
7
+
8
+
9
+ def dataframe_to_json_list(df: pd.DataFrame):
10
+ """
11
+ Args:
12
+ df (pd.DataFrame): df
13
+
14
+ Returns:
15
+ List[Dict[str, str]]: json list: [{"col1": "xxx", "col2": "xxx", ...}, ...]
16
+ """
17
+ json_list = []
18
+ for i, line in df.iterrows():
19
+ json_list.append(dict(line))
20
+ return json_list
21
+
22
+
23
+ def transform_dataframe_to_json_list(df: pd.DataFrame, row_transform):
24
+ """
25
+ Args:
26
+ df (pd.DataFrame): df
27
+ row_transform : lambda row: prompt_template.format(row['query']), "", row['label']
28
+
29
+ Returns:
30
+ List[Dict[str, str]]: json list: [{"instruction": "xxx", "input": "xxx", "output": "xxx"}, ...]
31
+ """
32
+ out_list = list()
33
+ for _, row in df.iterrows():
34
+ instruction, input, output = row_transform(row)
35
+ out_list.append({"instruction": instruction, "input": input, "output": output})
36
+ return out_list
37
+
38
+
39
+ def jsonlist_to_dataframe(json_list: List[Dict[str, str]]):
40
+ """
41
+ Args:
42
+ json_list (List[Dict[str, str]]): json list: [{"col1": "xxx", "col2": "xxx", ...}, ...]
43
+
44
+ Returns:
45
+ pd.DataFrame: df
46
+ """
47
+ return pd.DataFrame(json_list)
48
+
49
+
50
+ def is_jsonl(filepath: str):
51
+ with open(filepath) as f:
52
+ try:
53
+ l = next(f) # 读取一行,用来判断文件是json还是jsonl格式
54
+ f.seek(0)
55
+ except:
56
+ return False
57
+
58
+ try:
59
+ _ = json.loads(l)
60
+ except ValueError:
61
+ return False # 第一行不是json,所以是json格式
62
+ else:
63
+ return True # 第一行是json,所以是jsonl格式
64
+
65
+ def load_text(filename):
66
+ with open(filename, 'r') as f:
67
+ return f.read()
68
+
69
+
70
+ def load_json_or_jsonl(filepath: str):
71
+ if is_jsonl(filepath):
72
+ return load_json_list(filepath)
73
+ return load_json(filepath)
74
+
75
+
76
+ def load_json(filename: str):
77
+ with open(filename, "r", encoding="utf-8") as f:
78
+ return json.load(f)
79
+
80
+
81
+ def save_json(json_list: Union[Dict[str, str], List[Dict[str, str]]], filename: str):
82
+ Path(filename).parent.mkdir(parents=True, exist_ok=True)
83
+ with open(filename, "w", encoding="utf-8") as f:
84
+ return json.dump(json_list, f, ensure_ascii=False, separators=(",", ":"), indent=2)
85
+
86
+
87
+ def load_json_list(filename: str):
88
+ with open(filename, "r", encoding="utf-8") as f:
89
+ lines = f.readlines()
90
+ json_list = []
91
+ for i in lines:
92
+ try:
93
+ obj = json.loads(i.strip())
94
+ except:
95
+ print("格式损坏数据,无法加载")
96
+ print(i)
97
+ continue
98
+ json_list.append(obj)
99
+ return json_list
100
+
101
+
102
+ def save_json_list(json_list: List[Dict[str, str]], filename: str):
103
+ Path(filename).parent.mkdir(parents=True, exist_ok=True)
104
+ with open(filename, "w", encoding="utf-8") as f:
105
+ f.write("\n".join([json.dumps(line, ensure_ascii=False, separators=(",", ":")) for line in json_list]))
106
+
107
+
108
+ def merge_json_list(filenames: List[str], output_filename: str):
109
+ json_list = []
110
+ for filename in filenames:
111
+ json_list.extend(load_json_list(filename))
112
+ save_json_list(json_list, output_filename)
113
+
114
+
115
+ def jsonlist_dict_summary(jsonlist_dict: Dict[str, List[dict]]):
116
+ rows = []
117
+ for k, jsonlist in jsonlist_dict.items():
118
+ if len(jsonlist) == 0:
119
+ continue
120
+ row = {
121
+ "sheet_name": k,
122
+ "length": len(jsonlist),
123
+ "columns": str(list(jsonlist[0].keys())),
124
+ }
125
+ rows.append(row)
126
+ df = pd.DataFrame(rows)
127
+ return df
128
+
129
+
130
+ def print_in_json(text: str):
131
+ print(json.dumps({"text": text}, indent=2, ensure_ascii=False))
132
+
133
+
134
+ def apply_changes_to_jsonlist(
135
+ jsonlist: List[Dict[str, str]],
136
+ changes: Dict[str, Callable[[Dict[str, str]], Tuple[Literal["deleted", "updated", "unchanged"], Dict[str, str]]]],
137
+ verbose=False,
138
+ **kwargs,
139
+ ):
140
+ rows = jsonlist
141
+ total_updated = 0
142
+ total_deleted = 0
143
+ for name, change in changes.items():
144
+ new_rows = []
145
+ updated = 0
146
+ deleted = 0
147
+ for row in rows:
148
+ status, new_row = change(row, **kwargs)
149
+ if status == "deleted":
150
+ deleted += 1
151
+ continue
152
+ if status == "updated":
153
+ updated += 1
154
+ new_rows.append(new_row)
155
+ rows = new_rows
156
+ msgs = []
157
+ if updated > 0:
158
+ total_updated += updated
159
+ msgs += [f"updated {updated}"]
160
+ if deleted > 0:
161
+ total_deleted += deleted
162
+ msgs += [f"deleted {deleted}"]
163
+ if verbose and updated > 0 or deleted > 0:
164
+ logger.info(f"{name}: {', '.join(msgs)}, remained {len(new_rows)} rows.")
165
+ return rows, total_updated, total_deleted
166
+
167
+
168
+ def apply_changes_to_paths(
169
+ paths: List[Path],
170
+ changes: Dict[str, Callable[[Dict[str, str]], Tuple[Literal["deleted", "updated", "unchanged"], Dict[str, str]]]],
171
+ verbose=False,
172
+ save=False,
173
+ load_json=load_json,
174
+ save_json=save_json,
175
+ **kwargs,
176
+ ):
177
+ total_updated = 0
178
+ total_deleted = 0
179
+ for path in paths:
180
+ if verbose:
181
+ print("checking", path)
182
+ jsonlist = load_json(path)
183
+ kwargs["path"] = path
184
+ new_jsonlist, updated, deleted = apply_changes_to_jsonlist(jsonlist, changes, verbose, **kwargs)
185
+ msgs = [f"total {len(jsonlist)} -> {len(new_jsonlist)}"]
186
+ if updated > 0:
187
+ total_updated += updated
188
+ msgs += [f"updated {updated}"]
189
+ if deleted > 0:
190
+ msgs += [f"deleted {deleted}"]
191
+ total_deleted += deleted
192
+ if updated > 0 or deleted > 0:
193
+ print(f"{path}: {', '.join(msgs)}")
194
+ if save:
195
+ if len(new_jsonlist) > 0:
196
+ save_json(new_jsonlist, path)
197
+ else:
198
+ path.unlink()
199
+ print(f"total: updated {total_updated}, deleted {total_deleted}")
200
+
201
+
202
+ def backup_current_output(row: Dict[str, str], output_key="output"):
203
+ if "old_output" in row:
204
+ for i in range(1, 10):
205
+ if f"old_output{i}" not in row:
206
+ row[f"old_output{i}"] = row[output_key]
207
+ break
208
+ else:
209
+ row["old_output"] = row[output_key]
210
+ return row
211
+
212
+
213
+ def backup_and_set_output(row: Dict[str, str], output: str):
214
+ backup_current_output(row)
215
+ row["output"] = output
216
+ return row
217
+
218
+
219
+ def generator_from_paths(paths: List[Path], load_data: Callable[[Path], List[Dict[str, Any]]] = load_json):
220
+ for path in paths:
221
+ jsonlist: List[Dict[str, Any]] = load_data(path)
222
+ for row in jsonlist:
223
+ yield path, row
@@ -0,0 +1,214 @@
1
+ import time
2
+ import os
3
+ import multiprocessing
4
+ from multiprocessing.pool import ThreadPool
5
+ from typing import *
6
+
7
+ import pandas as pd
8
+ from pathlib import Path
9
+ from tqdm import tqdm
10
+ from loguru import logger
11
+
12
+ from xlin.jsonl import load_json_list, save_json_list, load_json, save_json
13
+ from xlin.util import ls
14
+
15
+
16
+ def element_mapping(
17
+ iterator: List[Any],
18
+ mapping_func: Callable[[Any], Tuple[bool, Any]],
19
+ use_multiprocessing=True,
20
+ thread_pool_size=int(os.getenv("THREAD_POOL_SIZE", 5)),
21
+ ):
22
+ rows = []
23
+ if use_multiprocessing:
24
+ pool = ThreadPool(thread_pool_size)
25
+ results = pool.map(mapping_func, iterator)
26
+ pool.close()
27
+ for ok, row in results:
28
+ if ok:
29
+ rows.append(row)
30
+ else:
31
+ for row in tqdm(iterator):
32
+ ok, row = mapping_func(row)
33
+ if ok:
34
+ rows.append(row)
35
+ return rows
36
+
37
+
38
+ def batch_mapping(
39
+ iterator: List[Any],
40
+ mapping_func: Callable[[List[Any]], Tuple[bool, List[Any]]],
41
+ use_multiprocessing=True,
42
+ thread_pool_size=int(os.getenv("THREAD_POOL_SIZE", 5)),
43
+ batch_size=4,
44
+ ):
45
+ batch_iterator = []
46
+ batch = []
47
+ for i, item in enumerate(iterator):
48
+ batch.append(item)
49
+ if len(batch) == batch_size:
50
+ batch_iterator.append(batch)
51
+ batch = []
52
+ if len(batch) > 0:
53
+ batch_iterator.append(batch)
54
+ rows = element_mapping(batch_iterator, mapping_func, use_multiprocessing, thread_pool_size)
55
+ rows = [row for batch in rows for row in batch]
56
+ return rows
57
+
58
+
59
+ def dataframe_with_row_mapping(
60
+ df: pd.DataFrame,
61
+ mapping_func: Callable[[dict], Tuple[bool, dict]],
62
+ use_multiprocessing=True,
63
+ thread_pool_size=int(os.getenv("THREAD_POOL_SIZE", 5)),
64
+ ):
65
+ rows = element_mapping(df.iterrows(), lambda x: mapping_func(x[1]), use_multiprocessing, thread_pool_size)
66
+ df = pd.DataFrame(rows)
67
+ return df
68
+
69
+
70
+ def multiprocessing_mapping_jsonlist(
71
+ jsonlist: List[Any],
72
+ output_path: Optional[Union[str, Path]],
73
+ partial_func,
74
+ batch_size=multiprocessing.cpu_count(),
75
+ cache_batch_num=1,
76
+ thread_pool_size=int(os.getenv("THREAD_POOL_SIZE", 5)),
77
+ ):
78
+ """mapping a column to another column
79
+
80
+ Args:
81
+ df (DataFrame): [description]
82
+ output_path (Path): 数据量大的时候需要缓存
83
+ partial_func (function): (Dict[str, str]) -> Dict[str, str]
84
+ """
85
+ need_caching = output_path is not None
86
+ tmp_list, output_list = list(), list()
87
+ start_idx = 0
88
+ if need_caching:
89
+ output_path = Path(output_path)
90
+ if output_path.exists():
91
+ output_list = load_json_list(output_path)
92
+ start_idx = len(output_list)
93
+ logger.warning(f"Cache found {output_path} has {start_idx} rows. This process will continue at row index {start_idx}.")
94
+ logger.warning(f"缓存 {output_path} 存在 {start_idx} 行. 本次处理将从第 {start_idx} 行开始.")
95
+ else:
96
+ output_path.parent.mkdir(parents=True, exist_ok=True)
97
+ pool = ThreadPool(thread_pool_size)
98
+ logger.debug(f"pool size: {thread_pool_size}, cpu count: {multiprocessing.cpu_count()}")
99
+ start_time = time.time()
100
+ last_save_time = start_time
101
+ for i, line in tqdm(list(enumerate(jsonlist))):
102
+ if i < start_idx:
103
+ continue
104
+ tmp_list.append(line)
105
+ if len(tmp_list) == batch_size:
106
+ results = pool.map(partial_func, tmp_list)
107
+ output_list.extend([x for x in results])
108
+ tmp_list = list()
109
+ if need_caching and (i // batch_size) % cache_batch_num == 0:
110
+ current_time = time.time()
111
+ if current_time - last_save_time < 3:
112
+ # 如果多进程处理太快,为了不让 IO 成为瓶颈拉慢进度,不足 3 秒的批次都忽略,也不缓存中间结果
113
+ last_save_time = current_time
114
+ continue
115
+ save_json_list(output_list, output_path)
116
+ last_save_time = time.time()
117
+ if len(tmp_list) > 0:
118
+ results = pool.map(partial_func, tmp_list)
119
+ output_list.extend([x for x in results])
120
+ pool.close()
121
+ if need_caching:
122
+ save_json_list(output_list, output_path)
123
+ return output_list
124
+
125
+
126
+ def multiprocessing_mapping(
127
+ df: pd.DataFrame,
128
+ output_path: Optional[Union[str, Path]],
129
+ partial_func,
130
+ batch_size=multiprocessing.cpu_count(),
131
+ cache_batch_num=1,
132
+ thread_pool_size=int(os.getenv("THREAD_POOL_SIZE", 5)),
133
+ ):
134
+ """mapping a column to another column
135
+
136
+ Args:
137
+ df (DataFrame): [description]
138
+ output_path (Path): 数据量大的时候需要缓存
139
+ partial_func (function): (Dict[str, str]) -> Dict[str, str]
140
+ """
141
+ need_caching = output_path is not None
142
+ tmp_list, output_list = list(), list()
143
+ start_idx = 0
144
+ if need_caching:
145
+ output_path = Path(output_path)
146
+ if output_path.exists():
147
+ # existed_df = read_as_dataframe(output_path)
148
+ # start_idx = len(existed_df)
149
+ # output_list = dataframe_to_json_list(existed_df)
150
+ # logger.warning(f"Cache found {output_path} has {start_idx} rows. This process will continue at row index {start_idx}.")
151
+ # logger.warning(f"缓存 {output_path} 存在 {start_idx} 行. 本次处理将从第 {start_idx} 行开始.")
152
+ pass
153
+ else:
154
+ output_path.parent.mkdir(parents=True, exist_ok=True)
155
+ pool = ThreadPool(thread_pool_size)
156
+ logger.debug(f"pool size: {thread_pool_size}, cpu count: {multiprocessing.cpu_count()}")
157
+ start_time = time.time()
158
+ last_save_time = start_time
159
+ for i, line in tqdm(list(df.iterrows())):
160
+ if i < start_idx:
161
+ continue
162
+ line_info: dict = line.to_dict()
163
+ line_info: Dict[str, str] = {str(k): str(v) for k, v in line_info.items()}
164
+ tmp_list.append(line_info)
165
+ if len(tmp_list) == batch_size:
166
+ results = pool.map(partial_func, tmp_list)
167
+ output_list.extend([x for x in results])
168
+ tmp_list = list()
169
+ if need_caching and (i // batch_size) % cache_batch_num == 0:
170
+ current_time = time.time()
171
+ if current_time - last_save_time < 3:
172
+ # 如果多进程处理太快,为了不让 IO 成为瓶颈拉慢进度,不足 3 秒的批次都忽略,也不缓存中间结果
173
+ last_save_time = current_time
174
+ continue
175
+ output_df = pd.DataFrame(output_list)
176
+ output_df.to_excel(output_path, index=False)
177
+ last_save_time = time.time()
178
+ if len(tmp_list) > 0:
179
+ results = pool.map(partial_func, tmp_list)
180
+ output_list.extend([x for x in results])
181
+ pool.close()
182
+ output_df = pd.DataFrame(output_list)
183
+ if need_caching:
184
+ output_df.to_excel(output_path, index=False)
185
+ return output_df, output_list
186
+
187
+
188
+ def continue_run(
189
+ jsonfiles: List[str],
190
+ save_dir: str,
191
+ mapping_func,
192
+ load_func=load_json,
193
+ save_func=save_json,
194
+ batch_size=1024,
195
+ cache_size=8,
196
+ ):
197
+ save_dir: Path = Path(save_dir)
198
+ save_dir.mkdir(parents=True, exist_ok=True)
199
+ new_jsonfiles = []
200
+ for jsonfile in ls(jsonfiles):
201
+ jsonlist = load_func(jsonfile)
202
+ output_filepath = save_dir / jsonfile.name
203
+ for row in jsonlist:
204
+ row["来源"] = jsonfile.name
205
+ new_jsonlist = multiprocessing_mapping_jsonlist(
206
+ jsonlist,
207
+ output_filepath,
208
+ mapping_func,
209
+ batch_size,
210
+ cache_size,
211
+ )
212
+ save_func(new_jsonlist, output_filepath)
213
+ new_jsonfiles.append(output_filepath)
214
+ return new_jsonfiles
@@ -0,0 +1,233 @@
1
+ from collections import defaultdict
2
+ import os
3
+ from typing import Callable, Dict, List, Optional, Tuple, Union
4
+ from pathlib import Path
5
+ from loguru import logger
6
+
7
+ import pandas as pd
8
+ import pyexcel
9
+
10
+ from xlin.util import ls
11
+ from xlin.jsonl import dataframe_to_json_list, load_json, load_json_list, save_json_list
12
+ from xlin.xls2xlsx import is_xslx
13
+
14
+
15
+ def valid_to_read_as_dataframe(filename: str) -> bool:
16
+ suffix_list = [".json", ".jsonl", ".xlsx", "xls", ".csv"]
17
+ return any([filename.endswith(suffix) for suffix in suffix_list])
18
+
19
+
20
+ def read_as_dataframe(filepath: Union[str, Path], sheet_name: Optional[str] = None, fill_empty_str_to_na=True) -> pd.DataFrame:
21
+ """
22
+ 读取文件为表格
23
+ """
24
+ filepath = Path(filepath)
25
+ if filepath.is_dir():
26
+ paths = ls(filepath, expand_all_subdir=True)
27
+ df_list = []
28
+ for path in paths:
29
+ try:
30
+ df = read_as_dataframe(path, sheet_name, fill_empty_str_to_na)
31
+ df['数据来源'] = path.name
32
+ except:
33
+ df = pd.DataFrame()
34
+ df_list.append(df)
35
+ df = pd.concat(df_list)
36
+ if fill_empty_str_to_na:
37
+ df.fillna("", inplace=True)
38
+ return df
39
+ filename = filepath.name
40
+ if filename.endswith(".json") or filename.endswith(".jsonl"):
41
+ try:
42
+ json_list = load_json(filepath)
43
+ except:
44
+ json_list = load_json_list(filepath)
45
+ df = pd.DataFrame(json_list)
46
+ elif filename.endswith(".xlsx"):
47
+ df = pd.read_excel(filepath) if sheet_name is None else pd.read_excel(filepath, sheet_name)
48
+ elif filename.endswith(".xls"):
49
+ if is_xslx(filepath):
50
+ df = pd.read_excel(filepath) if sheet_name is None else pd.read_excel(filepath, sheet_name)
51
+ else:
52
+ df = pyexcel.get_sheet(file_name=filepath)
53
+ elif filename.endswith(".csv"):
54
+ df = pd.read_csv(filepath)
55
+ else:
56
+ raise ValueError(f"Unsupported filetype {filepath}. filetype not in [json, jsonl, xlsx, xls, csv]")
57
+ if fill_empty_str_to_na:
58
+ df.fillna("", inplace=True)
59
+ return df
60
+
61
+
62
+ def read_maybe_dir_as_dataframe(filepath: Union[str, Path], sheet_name: Optional[str] = None) -> pd.DataFrame:
63
+ """
64
+ input path 可能是文件夹,此时将文件夹下的所有表格拼接到一起返回,要求所有表头一致
65
+ 如果不是文件夹,则为文件,尝试直接读取为表格返回
66
+ """
67
+ out_list = list()
68
+ if not isinstance(filepath, Path):
69
+ filepath = Path(filepath)
70
+ if not filepath.exists():
71
+ raise ValueError(f"Path Not Exist: {filepath}")
72
+ if not filepath.is_dir():
73
+ return read_as_dataframe(filepath, sheet_name)
74
+
75
+ files = os.listdir(filepath)
76
+ for file_name in files:
77
+ if not valid_to_read_as_dataframe(file_name):
78
+ continue
79
+ input_file = filepath / file_name
80
+ df = read_as_dataframe(input_file, sheet_name)
81
+ df.fillna("", inplace=True)
82
+ for _, line in df.iterrows():
83
+ line = line.to_dict()
84
+ out_list.append(line)
85
+ df = pd.DataFrame(out_list)
86
+ return df
87
+
88
+
89
+ def read_as_dataframe_dict(filepath: Union[str, Path], fill_empty_str_to_na=True):
90
+ filepath = Path(filepath)
91
+ if filepath.is_dir():
92
+ paths = ls(filepath, expand_all_subdir=True)
93
+ df_dict_list = []
94
+ for path in paths:
95
+ try:
96
+ df_dict = read_as_dataframe_dict(path, fill_empty_str_to_na)
97
+ except:
98
+ df_dict = {}
99
+ df_dict_list.append(df_dict)
100
+ df_dict = merge_multiple_df_dict(df_dict_list)
101
+ return df_dict
102
+ df_dict: Dict[str, pd.DataFrame] = pd.read_excel(filepath, sheet_name=None)
103
+ if isinstance(df_dict, dict):
104
+ for name, df in df_dict.items():
105
+ if fill_empty_str_to_na:
106
+ df.fillna("", inplace=True)
107
+ df['数据来源'] = filepath.name
108
+ elif isinstance(df_dict, pd.DataFrame):
109
+ if fill_empty_str_to_na:
110
+ df_dict.fillna("", inplace=True)
111
+ df_dict['数据来源'] = filepath.name
112
+ return df_dict
113
+
114
+
115
+ def df_dict_summary(df_dict: Dict[str, pd.DataFrame]):
116
+ rows = []
117
+ for k, df in df_dict.items():
118
+ row = {
119
+ "sheet_name": k,
120
+ "length": len(df),
121
+ "columns": str(list(df.columns)),
122
+ }
123
+ rows.append(row)
124
+ df = pd.DataFrame(rows)
125
+ return df
126
+
127
+
128
+ def save_df_dict(df_dict: Dict[str, pd.DataFrame], output_filepath: Union[str, Path]):
129
+ if not isinstance(output_filepath, Path):
130
+ output_filepath = Path(output_filepath)
131
+ output_filepath.parent.mkdir(parents=True, exist_ok=True)
132
+ with pd.ExcelWriter(output_filepath, engine="xlsxwriter") as writer:
133
+ for k, df in df_dict.items():
134
+ if len(k) > 31:
135
+ logger.warning(f"表名太长,自动截断了:[{k}]的长度为{len(k)}")
136
+ df.to_excel(writer, sheet_name=k[:31], index=False)
137
+ return output_filepath
138
+
139
+
140
+ def save_df_from_jsonlist(jsonlist: List[Dict[str, str]], output_filepath: Union[str, Path]):
141
+ df = pd.DataFrame(jsonlist)
142
+ return save_df(df, output_filepath)
143
+
144
+
145
+ def save_df(df: pd.DataFrame, output_filepath: Union[str, Path]):
146
+ if not isinstance(output_filepath, Path):
147
+ output_filepath = Path(output_filepath)
148
+ output_filepath.parent.mkdir(parents=True, exist_ok=True)
149
+ df.to_excel(output_filepath, index=False)
150
+ return output_filepath
151
+
152
+
153
+ def lazy_build_dataframe(name: str, output_filepath: Path, func, filetype: str = "xlsx"):
154
+ logger.info(name)
155
+ output_filepath.parent.mkdir(parents=True, exist_ok=True)
156
+ if output_filepath.exists():
157
+ df = read_as_dataframe(output_filepath)
158
+ else:
159
+ df: pd.DataFrame = func()
160
+ filename = output_filepath.name.split(".")[0]
161
+ if filetype == "xlsx":
162
+ df.to_excel(output_filepath.parent / f"{filename}.xlsx", index=False)
163
+ elif filetype == "json":
164
+ save_json_list(dataframe_to_json_list(df), output_filepath.parent / f"{filename}.json")
165
+ elif filetype == "jsonl":
166
+ save_json_list(dataframe_to_json_list(df), output_filepath.parent / f"{filename}.jsonl")
167
+ else:
168
+ logger.warning(f"不认识的 {filetype},默认保存为 xlsx")
169
+ df.to_excel(output_filepath.parent / f"{filename}.xlsx", index=False)
170
+ logger.info(f"{name}结果保存在 {output_filepath}")
171
+ return df
172
+
173
+
174
+ def lazy_build_dataframe_dict(name: str, output_filepath: Path, df_dict: Dict[str, pd.DataFrame], func, skip_sheets: List[str] = list()):
175
+ logger.info(name)
176
+ output_filepath.parent.mkdir(parents=True, exist_ok=True)
177
+ if output_filepath.exists():
178
+ new_df_dict = read_as_dataframe_dict(output_filepath)
179
+ else:
180
+ new_df_dict = {}
181
+ for sheet_name, df in df_dict.items():
182
+ if sheet_name in skip_sheets:
183
+ continue
184
+ df = func(sheet_name, df)
185
+ new_df_dict[sheet_name] = df
186
+ save_df_dict(new_df_dict, output_filepath)
187
+ logger.info(f"{name}结果保存在 {output_filepath}")
188
+ return new_df_dict
189
+
190
+
191
+ def merge_multiple_df_dict(list_of_df_dict: List[Dict[str, pd.DataFrame]], sort=True):
192
+ df_dict_merged = defaultdict(list)
193
+ for df_dict in list_of_df_dict:
194
+ for k, df in df_dict.items():
195
+ df_dict_merged[k].append(df)
196
+ df_dict_merged: Dict[str, pd.DataFrame] = {k: pd.concat(v) for k, v in df_dict_merged.items()}
197
+ if sort:
198
+ df_dict_merged: Dict[str, pd.DataFrame] = {k: df_dict_merged[k] for k in sorted(df_dict_merged)}
199
+ return df_dict_merged
200
+
201
+
202
+ def remove_duplicate_and_sort(df: pd.DataFrame, key_col="query", sort_by='label'):
203
+ query_to_rows = {}
204
+ for i, row in df.iterrows():
205
+ query_to_rows[row[key_col]] = row
206
+ rows = sorted(list(query_to_rows.values()), key=lambda row: row[sort_by])
207
+ df_filtered = pd.DataFrame(rows)
208
+ return df_filtered
209
+
210
+
211
+ def color_negative_red(x):
212
+ color = "red" if x < 0 else ""
213
+ return f"color: {color}"
214
+
215
+
216
+ def highlight_max(x):
217
+ is_max = x == x.max()
218
+ return [("background-color: yellow" if m else "") for m in is_max]
219
+
220
+
221
+ def split_dataframe(df: pd.DataFrame, output_dir: Union[str, Path], tag: str, split_count=6):
222
+ output_dir = Path(output_dir)
223
+ output_dir.mkdir(parents=True, exist_ok=True)
224
+ rows = dataframe_to_json_list(df)
225
+ split_step = len(rows) // split_count + 1
226
+ df_list = []
227
+ for i in range(0, len(rows), split_step):
228
+ filepath = output_dir / f"{tag}_{i // split_step}.xlsx"
229
+ df_i = pd.DataFrame(rows[i:i+split_step])
230
+ df_i.to_excel(filepath, index=False)
231
+ df_list.append(df_i)
232
+ return df_list
233
+
xlin/statistic.py ADDED
@@ -0,0 +1,33 @@
1
+ from typing import List
2
+
3
+ import pandas as pd
4
+
5
+
6
+
7
+ def bucket_count(length: List[int], step=50, skip_zero_count=False):
8
+ grouped_count = []
9
+ j = 0
10
+ for i in range(0, max(length) + step, step):
11
+ grouped_count.append(0)
12
+ while j < len(length) and length[j] < i:
13
+ grouped_count[i // step] += 1
14
+ j += 1
15
+ x, y = [], []
16
+ for i, j in enumerate(grouped_count):
17
+ if i == 0:
18
+ continue
19
+ if skip_zero_count and j == 0:
20
+ continue
21
+ print(f"[{(i-1)*step}, {i*step}) {j} {sum(grouped_count[:i+1])/len(length)*100:.2f}%")
22
+ x.append((i - 1) * step)
23
+ y.append(j)
24
+ return x, y
25
+
26
+
27
+ def statistic_char_length(df: pd.DataFrame, instruction_key="instruction"):
28
+ length = []
29
+ for i, row in df.iterrows():
30
+ length.append(len(row[instruction_key]))
31
+ length.sort()
32
+ return length
33
+
xlin/terminal_color.py ADDED
@@ -0,0 +1,10 @@
1
+
2
+ blue = "\x1b[34m"
3
+ cyan = "\x1b[36;21m"
4
+ green = "\x1b[32;21m"
5
+ orange = "\x1b[33;21m"
6
+ grey = "\x1b[38;21m"
7
+ yellow = "\x1b[33;21m"
8
+ red = "\x1b[31;21m"
9
+ bold_red = "\x1b[31;1m"
10
+ reset = "\x1b[0m"
xlin/util.py ADDED
@@ -0,0 +1,321 @@
1
+ from typing import *
2
+ from collections import defaultdict
3
+ from pathlib import Path
4
+ import pandas as pd
5
+ import os
6
+ import asyncio
7
+ import datetime
8
+ from loguru import logger
9
+ import shutil
10
+ import random
11
+
12
+
13
+ date_str = datetime.datetime.now().strftime("%Y%m%d")
14
+ datetime_str = datetime.datetime.now().strftime("%Y%m%d_%Hh%Mm%Ss")
15
+
16
+
17
+ def random_timestamp(start_timestamp=None, end_timestamp=None):
18
+ if start_timestamp is None:
19
+ start_timestamp = datetime.datetime(2024, 1, 1).timestamp()
20
+ if end_timestamp is None:
21
+ end_timestamp = datetime.datetime.now().timestamp()
22
+ return random.uniform(start_timestamp, end_timestamp)
23
+
24
+
25
+ def random_timestamp_str(start_timestamp=None, end_timestamp=None, format="%Y年%m月%d日%H时%M分"):
26
+ return datetime.datetime.fromtimestamp(random_timestamp(start_timestamp, end_timestamp)).strftime(format)
27
+
28
+
29
+ def auto_retry_to_get_data(retry_times, request, data_key="data", *args, **kwargs):
30
+ if retry_times == 0:
31
+ return {}
32
+ resp = request(*args, **kwargs)
33
+ if resp is not None:
34
+ if data_key is None:
35
+ return resp
36
+ elif data_key in resp and resp[data_key] is not None:
37
+ return resp[data_key]
38
+ logger.debug("[error! retrying...]", resp)
39
+ return auto_retry_to_get_data(retry_times - 1, request, data_key, *args, **kwargs)
40
+
41
+
42
+ def append_column(df: pd.DataFrame, query_column: str, output_column: str, transform):
43
+ query = df[query_column].tolist()
44
+ loop = asyncio.get_event_loop()
45
+ result = loop.run_until_complete(transform(query))
46
+ df[output_column] = [str(r) for r in result]
47
+ return df
48
+
49
+
50
+ def request_wrapper(request_num=10):
51
+ def request_wrapper_body(func):
52
+ def wrapper(*args, **kwargs):
53
+ c = request_num
54
+ excute_num = 0
55
+ while c > 0:
56
+ c -= 1
57
+ res = func(*args, **kwargs)
58
+ excute_num += 1
59
+ if res != "-1":
60
+ logger.debug("{} excute_num: {}".format(func.__name__, excute_num))
61
+ return res
62
+ logger.debug("{} excute_num: {}".format(func.__name__, excute_num))
63
+ return ""
64
+
65
+ return wrapper
66
+
67
+ return request_wrapper_body
68
+
69
+
70
+ def copy_file(input_filepath, output_filepath, force_overwrite=False, verbose=False):
71
+ if verbose:
72
+ logger.info(f"正在复制 {input_filepath} 到 {output_filepath}")
73
+ if not isinstance(output_filepath, Path):
74
+ output_filepath = Path(output_filepath)
75
+ if output_filepath.exists() and not force_overwrite:
76
+ if verbose:
77
+ logger.warning(f"文件已存在,跳过复制:{output_filepath}")
78
+ return output_filepath
79
+ shutil.copy(input_filepath, output_filepath, follow_symlinks=True)
80
+ return output_filepath
81
+
82
+
83
+ def rm(dir_path: Union[str, Path, List[str], List[Path]], filter: Callable[[Path], bool] = lambda filepath: True, expand_all_subdir=True, debug=False):
84
+ if isinstance(dir_path, str) and "," in dir_path:
85
+ for path in dir_path.split(","):
86
+ rm(path, filter, expand_all_subdir)
87
+ return
88
+ if isinstance(dir_path, list):
89
+ for path in dir_path:
90
+ rm(path, filter, expand_all_subdir)
91
+ return
92
+ dir_path = Path(dir_path)
93
+ if not dir_path.exists():
94
+ if debug:
95
+ print(f"路径不存在 {dir_path}")
96
+ return
97
+ if not dir_path.is_dir():
98
+ if filter(dir_path):
99
+ dir_path.unlink()
100
+ if debug:
101
+ print(f"删除文件 {dir_path}")
102
+ return
103
+ filenames = os.listdir(dir_path)
104
+ for filename in sorted(filenames):
105
+ filepath = dir_path / filename
106
+ if debug:
107
+ print("checking", filepath)
108
+ if filepath.is_dir():
109
+ paths = ls(filepath, filter, expand_all_subdir)
110
+ if len(paths) > 0:
111
+ rm(paths, filter, expand_all_subdir)
112
+ child = filepath
113
+ while child.exists() and len(os.listdir(child)) > 0:
114
+ child = child / os.listdir(child)[0]
115
+ while child != filepath:
116
+ if child.exists() and len(os.listdir(child)) == 0:
117
+ child.rmdir()
118
+ if debug:
119
+ print(f"删除空文件夹 {child}")
120
+ else:
121
+ break
122
+ if filepath.exists() and len(os.listdir(filepath)) == 0:
123
+ filepath.rmdir()
124
+ if debug:
125
+ print(f"删除空文件夹 {filepath}")
126
+ elif filter(filepath):
127
+ rm(filepath, filter, expand_all_subdir)
128
+ if dir_path.exists() and len(os.listdir(dir_path)) == 0:
129
+ dir_path.rmdir()
130
+ if debug:
131
+ print(f"删除空文件夹 {dir_path}")
132
+
133
+
134
+ def cp(
135
+ input_dir_path: Union[str, Path, List[str], List[Path]],
136
+ output_dir_path: Union[str, Path],
137
+ base_input_dir: Optional[Union[Path, str]] = None,
138
+ force_overwrite: bool = False,
139
+ filter: Callable[[Path], bool] = lambda filepath: True,
140
+ expand_all_subdir=True,
141
+ verbose=False,
142
+ ):
143
+ input_paths = ls(input_dir_path, filter, expand_all_subdir)
144
+ if len(input_paths) == 0:
145
+ if verbose:
146
+ logger.warning(f"no files in {input_dir_path}")
147
+ return
148
+ if base_input_dir is None:
149
+ # 计算最大公共路径
150
+ if len(input_paths) > 1:
151
+ base_input_dir = os.path.commonpath([str(p) for p in input_paths])
152
+ else:
153
+ base_input_dir = input_paths[0].parent
154
+ base_input_dir = Path(base_input_dir)
155
+ output_dir_path = Path(output_dir_path)
156
+ for input_path in input_paths:
157
+ relative_path = input_path.relative_to(base_input_dir)
158
+ output_path = output_dir_path / relative_path
159
+ output_path.parent.mkdir(parents=True, exist_ok=True)
160
+ copy_file(input_path, output_path, force_overwrite, verbose)
161
+
162
+
163
+ def ls(dir_path: Union[str, Path, List[str], List[Path]], filter: Callable[[Path], bool] = lambda filepath: True, expand_all_subdir=True):
164
+ """list all files, return a list of filepaths
165
+
166
+ Args:
167
+ dir_path (Union[str, Path]): dir
168
+ filter ((Path) -> bool, optional): filter. Defaults to lambda filepath:True.
169
+ expand_all_subdir (bool, optional): _description_. Defaults to True.
170
+
171
+ Returns:
172
+ List[Path]: not null, may be empty list []
173
+ """
174
+ filepaths: List[Path] = []
175
+ if isinstance(dir_path, str) and "," in dir_path:
176
+ for path in dir_path.split(","):
177
+ filepaths.extend(ls(path, filter, expand_all_subdir))
178
+ return filepaths
179
+ if isinstance(dir_path, list):
180
+ for path in dir_path:
181
+ filepaths.extend(ls(path, filter, expand_all_subdir))
182
+ return filepaths
183
+ dir_path = Path(dir_path)
184
+ if not dir_path.exists():
185
+ return filepaths
186
+ if not dir_path.is_dir():
187
+ if filter(dir_path):
188
+ return [dir_path]
189
+ else:
190
+ return filepaths
191
+ filenames = os.listdir(dir_path)
192
+ for filename in sorted(filenames):
193
+ filepath = dir_path / filename
194
+ if filepath.is_dir():
195
+ if expand_all_subdir:
196
+ filepaths.extend(ls(filepath, filter, expand_all_subdir))
197
+ elif filter(filepath):
198
+ filepaths.append(filepath)
199
+ return filepaths
200
+
201
+
202
+ def clean_empty_folder(dir_path):
203
+ dir_path = Path(dir_path)
204
+ sub_names = os.listdir(dir_path)
205
+ if not sub_names or len(sub_names) == 0:
206
+ print(f"clean empty folder: {dir_path}")
207
+ dir_path.rmdir()
208
+ clean_empty_folder(dir_path.parent)
209
+ else:
210
+ for sub_name in sub_names:
211
+ path = dir_path / sub_name
212
+ if path.is_dir():
213
+ clean_empty_folder(path)
214
+
215
+
216
+ def grouped_col_list(df: pd.DataFrame, key_col="query", value_col="output"):
217
+ grouped = defaultdict(list)
218
+ if key_col not in df.columns:
219
+ logger.warning(f"`{key_col}` not in columns: {list(df.columns)}")
220
+ return grouped
221
+ for i, row in df.iterrows():
222
+ grouped[row[key_col]].append(row[value_col])
223
+ return grouped
224
+
225
+
226
+ def grouped_col(df: pd.DataFrame, key_col="query", value_col="output"):
227
+ grouped = {}
228
+ if key_col not in df.columns:
229
+ logger.warning(f"`{key_col}` not in columns: {list(df.columns)}")
230
+ return grouped
231
+ for i, row in df.iterrows():
232
+ grouped[row[key_col]] = row[value_col]
233
+ return grouped
234
+
235
+
236
+ def grouped_row(df: pd.DataFrame, key_col="query"):
237
+ grouped = defaultdict(list)
238
+ if key_col not in df.columns:
239
+ logger.warning(f"`{key_col}` not in columns: {list(df.columns)}")
240
+ return grouped
241
+ for i, row in df.iterrows():
242
+ grouped[row[key_col]].append(row)
243
+ return grouped
244
+
245
+
246
+ def grouped_row_in_jsonlist(jsonlist: List[Dict[str, Any]], key_col="query"):
247
+ grouped = defaultdict(list)
248
+ for i, row in enumerate(jsonlist):
249
+ if key_col not in row:
250
+ logger.warning(f"`{key_col}` not in row: {row}")
251
+ notfound_key = f"NotFound:{key_col}"
252
+ grouped[notfound_key].append(row)
253
+ continue
254
+ grouped[row[key_col]].append(row)
255
+ return grouped
256
+
257
+
258
+ def submit_file(path: Union[str, Path], target_dir: Union[str, Path]):
259
+ p = Path(path).absolute()
260
+ target_dir = Path(target_dir).absolute()
261
+ logger.info(f"正在复制到目标文件夹 {target_dir}")
262
+ if p.is_dir():
263
+ logger.info(f"文件夹 {p}")
264
+ filenames = os.listdir(path)
265
+ for filename in filenames:
266
+ src_file = p / filename
267
+ tgt_file = target_dir / filename
268
+ copy_file(src_file, tgt_file)
269
+ logger.info(f"已复制 {filename} 到 {tgt_file}")
270
+ else:
271
+ filename = p.name
272
+ logger.info(f"文件 {filename}")
273
+ src_file = p
274
+ tgt_file = target_dir / filename
275
+ copy_file(src_file, tgt_file)
276
+ logger.info(f"已复制 {filename} 到 {tgt_file}")
277
+ filenames = os.listdir(target_dir)
278
+ logger.info("现在目标文件夹下的文件有:\n" + "\n".join(filenames))
279
+
280
+
281
+ def pretty_limited_text(text: str, limited_length: int = 300, language="zh"):
282
+ text = str(text).strip()
283
+ if len(text) > limited_length:
284
+ # if language == "zh":
285
+ # tail = f"...(共{len(text)}字)"
286
+ # else:
287
+ # tail = f"...({len(text)} words in total)"
288
+ # return text[: limited_length - len(tail)] + tail
289
+ return text[: limited_length // 2] + text[-limited_length // 2 :]
290
+ return text
291
+
292
+
293
+ def bucket_count(length):
294
+ grouped_count = []
295
+ j = 0
296
+ for i in range(0, max(length), 50):
297
+ grouped_count.append(0)
298
+ while length[j] < i:
299
+ grouped_count[i // 50] += 1
300
+ j += 1
301
+ for i, j in enumerate(grouped_count):
302
+ if i == 0 or j == 0:
303
+ continue
304
+ print(f"[{(i-1)*50}, {i*50}) {j} {sum(grouped_count[:i+1])/len(length)*100:.2f}%")
305
+
306
+
307
+ def sortedCounter(obj, by="key", reverse=False, return_list=False):
308
+ c = Counter(obj)
309
+ c_list = [(k, c[k]) for k in c]
310
+ if by == "key":
311
+ c_list = sorted(c_list, key=lambda x: x[0], reverse=reverse)
312
+ elif by in ["value", "count"]:
313
+ c_list = sorted(c_list, key=lambda x: x[1], reverse=reverse)
314
+ else:
315
+ raise Exception(f"unsupported by: {by}")
316
+ c = Counter()
317
+ for k, v in c_list:
318
+ c[k] = v
319
+ if return_list:
320
+ return c, c_list
321
+ return c
xlin/uuid.py ADDED
@@ -0,0 +1,13 @@
1
+ from typing import *
2
+ import uuid
3
+
4
+ import pandas as pd
5
+
6
+
7
+ def append_uuid_column(df: pd.DataFrame, uuid_key="uuid"):
8
+ rows = []
9
+ for i, row in df.iterrows():
10
+ row[uuid_key] = str(uuid.uuid4())
11
+ rows.append(row)
12
+ df = pd.DataFrame(rows)
13
+ return df
xlin/xls2xlsx.py ADDED
@@ -0,0 +1,33 @@
1
+ # pip install pyexcel pyexcel-xls pyexcel-xlsx
2
+ import os
3
+
4
+ import pyexcel as p
5
+
6
+
7
+ def convert_xls_dir_to_xlsx(data_dir):
8
+ filenames = os.listdir(data_dir)
9
+ for filename in filenames:
10
+ if filename.endswith(".xls"):
11
+ convert_xls_to_xlsx(os.path.join(data_dir, filename))
12
+
13
+ def convert_xls_to_xlsx(file_name):
14
+ converted_filename = file_name + 'x'
15
+ if is_xslx(file_name):
16
+ # rename to .xlsx
17
+ with open(file_name, 'rb') as f:
18
+ with open(converted_filename, 'wb') as f2:
19
+ f2.write(f.read())
20
+ return converted_filename
21
+ sheet = p.get_sheet(file_name=file_name)
22
+ sheet.save_as(converted_filename)
23
+ return converted_filename
24
+
25
+
26
+ def is_xslx(filename):
27
+ with open(filename, 'rb') as f:
28
+ first_four_bytes = f.read()[:4]
29
+ return first_four_bytes == b'PK\x03\x04'
30
+
31
+ if __name__ == "__main__":
32
+ import sys
33
+ convert_xls_to_xlsx(sys.argv[1])
xlin/yaml.py ADDED
@@ -0,0 +1,8 @@
1
+ import yaml
2
+
3
+
4
+ def load_yaml(yaml_file: str):
5
+ with open(yaml_file, "r", encoding="utf-8") as f:
6
+ file_str = f.read()
7
+ schema = yaml.safe_load(file_str)
8
+ return schema
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2024 兮尘
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,31 @@
1
+ Metadata-Version: 2.1
2
+ Name: xlin
3
+ Version: 0.1.8
4
+ Summary: toolbox for LinXueyuan
5
+ License: MIT
6
+ Author: XiChen
7
+ Author-email: 23211526+LinXueyuanStdio@users.noreply.github.com
8
+ Classifier: License :: OSI Approved :: MIT License
9
+ Classifier: Programming Language :: Python :: 2
10
+ Classifier: Programming Language :: Python :: 2.7
11
+ Classifier: Programming Language :: Python :: 3
12
+ Classifier: Programming Language :: Python :: 3.4
13
+ Classifier: Programming Language :: Python :: 3.5
14
+ Classifier: Programming Language :: Python :: 3.6
15
+ Classifier: Programming Language :: Python :: 3.7
16
+ Classifier: Programming Language :: Python :: 3.8
17
+ Classifier: Programming Language :: Python :: 3.9
18
+ Classifier: Programming Language :: Python :: 3.10
19
+ Classifier: Programming Language :: Python :: 3.11
20
+ Classifier: Programming Language :: Python :: 3.12
21
+ Requires-Dist: loguru
22
+ Requires-Dist: pandas
23
+ Requires-Dist: pyexcel
24
+ Requires-Dist: pyexcel-xls
25
+ Requires-Dist: pyexcel-xlsx
26
+ Requires-Dist: xlsxwriter
27
+ Description-Content-Type: text/markdown
28
+
29
+ # xlin
30
+ 个人 python 工具代码
31
+
@@ -0,0 +1,15 @@
1
+ xlin/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
+ xlin/ischinese.py,sha256=Ia9IMQ6q-UHkdLwqS70L1fTnfSPbluFrv_I1UqsKquo,293
3
+ xlin/jsonl.py,sha256=DvVM241a9VgQlp5WIMPRv-JIolT0RdSxw47IG_fc7xE,6690
4
+ xlin/multiprocess_mapping.py,sha256=pmzyEUYpbpIZ_ezyvWWWRpr7D7n4t3E3jW1nGXBbVck,7652
5
+ xlin/read_as_dataframe.py,sha256=ir3HUT6dt3crqa3xnlcNn8j3wqjSIGJgiIVLP3KkBaQ,8678
6
+ xlin/statistic.py,sha256=BLj8hszlbBT5xDIfd70_YtOb8QgZEvYXiFJDGXBwCfw,881
7
+ xlin/terminal_color.py,sha256=nfE-CY2BzjY2eZbm9yk8r-AuyJ-hchmLXhASCb4HAIA,191
8
+ xlin/util.py,sha256=SOQUh506GQlljJYLYuI6nScSTOrgRQnMq2xfxSvKIlI,11303
9
+ xlin/uuid.py,sha256=gouvm7_DL22sIhXl-g4e6S2qzIZtmE3SEp00xy1upyg,271
10
+ xlin/xls2xlsx.py,sha256=5zfcM0gmunFQOcOj9nYd9Dj0HMhU7-cPKnPIy6Ot9iU,930
11
+ xlin/yaml.py,sha256=kICi7G3Td5q2MaSXXt85qNTWoHMgjzt7pvn7r3C4dME,183
12
+ xlin-0.1.8.dist-info/LICENSE,sha256=KX0dDCYlO4DskqMZY8qeY94EZMrDRNnNqlGLkXVlKyM,1063
13
+ xlin-0.1.8.dist-info/METADATA,sha256=XRXH6JMQMlIhnRbalOxAze0QmkZRIy6lunDVXp2jyXg,1046
14
+ xlin-0.1.8.dist-info/WHEEL,sha256=IrRNNNJ-uuL1ggO5qMvT1GGhQVdQU54d6ZpYqEZfEWo,92
15
+ xlin-0.1.8.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: poetry-core 1.9.0
3
+ Root-Is-Purelib: true
4
+ Tag: py2.py3-none-any