xlin 0.1.26__tar.gz → 0.1.27__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {xlin-0.1.26 → xlin-0.1.27}/PKG-INFO +1 -1
- {xlin-0.1.26 → xlin-0.1.27}/pyproject.toml +1 -1
- {xlin-0.1.26 → xlin-0.1.27}/xlin/multiprocess_mapping.py +20 -11
- {xlin-0.1.26 → xlin-0.1.27}/LICENSE +0 -0
- {xlin-0.1.26 → xlin-0.1.27}/README.md +0 -0
- {xlin-0.1.26 → xlin-0.1.27}/xlin/__init__.py +0 -0
- {xlin-0.1.26 → xlin-0.1.27}/xlin/ischinese.py +0 -0
- {xlin-0.1.26 → xlin-0.1.27}/xlin/jsonl.py +0 -0
- {xlin-0.1.26 → xlin-0.1.27}/xlin/metric.py +0 -0
- {xlin-0.1.26 → xlin-0.1.27}/xlin/read_as_dataframe.py +0 -0
- {xlin-0.1.26 → xlin-0.1.27}/xlin/statistic.py +0 -0
- {xlin-0.1.26 → xlin-0.1.27}/xlin/timing.py +0 -0
- {xlin-0.1.26 → xlin-0.1.27}/xlin/util.py +0 -0
- {xlin-0.1.26 → xlin-0.1.27}/xlin/xls2xlsx.py +0 -0
- {xlin-0.1.26 → xlin-0.1.27}/xlin/yaml.py +0 -0
@@ -85,6 +85,7 @@ def xmap(
|
|
85
85
|
retry_count=0, # 失败重试次数
|
86
86
|
force_overwrite=False, # 是否强制覆盖输出文件
|
87
87
|
is_batch_work_func=False, # 是否批量处理函数
|
88
|
+
verbose=False, # 是否打印详细信息
|
88
89
|
):
|
89
90
|
"""高效处理JSON列表,支持多进程/多线程
|
90
91
|
|
@@ -113,25 +114,30 @@ def xmap(
|
|
113
114
|
output_path = Path(output_path)
|
114
115
|
if output_path.exists():
|
115
116
|
if force_overwrite:
|
116
|
-
|
117
|
+
if verbose:
|
118
|
+
logger.warning(f"强制覆盖输出文件: {output_path}")
|
117
119
|
output_path.unlink()
|
118
120
|
else:
|
119
121
|
output_list = load_json_list(output_path)
|
120
122
|
start_idx = len(output_list)
|
121
|
-
|
123
|
+
if verbose:
|
124
|
+
logger.info(f"继续处理: 已有{start_idx}条记录,共{len(jsonlist)}条")
|
122
125
|
else:
|
123
126
|
output_path.parent.mkdir(parents=True, exist_ok=True)
|
124
127
|
|
125
128
|
# 选择线程池或进程池
|
126
129
|
if use_process_pool:
|
127
130
|
pool_cls = multiprocessing.Pool
|
128
|
-
|
131
|
+
if verbose:
|
132
|
+
logger.info(f"使用进程池(ProcessPool),适用于CPU密集型任务")
|
129
133
|
else:
|
130
134
|
pool_cls = ThreadPool
|
131
|
-
|
135
|
+
if verbose:
|
136
|
+
logger.info(f"使用线程池(ThreadPool),适用于IO密集型任务")
|
132
137
|
|
133
138
|
with pool_cls(thread_pool_size) as pool:
|
134
|
-
|
139
|
+
if verbose:
|
140
|
+
logger.info(f"池大小: {thread_pool_size}, 批处理大小: {batch_size}, 分块大小: {chunksize}")
|
135
141
|
|
136
142
|
# 准备要处理的数据
|
137
143
|
remaining_items = jsonlist[start_idx:]
|
@@ -149,10 +155,12 @@ def xmap(
|
|
149
155
|
return list(map_func(work_func, items_batch, chunksize))
|
150
156
|
except Exception as e:
|
151
157
|
if retry_remaining > 0:
|
152
|
-
|
158
|
+
if verbose:
|
159
|
+
logger.warning(f"批处理失败,重试中 ({retry_count-retry_remaining+1}/{retry_count}): {e}")
|
153
160
|
return process_batch(items_batch, retry_remaining - 1)
|
154
161
|
else:
|
155
|
-
|
162
|
+
if verbose:
|
163
|
+
logger.error(f"批处理失败: {e}")
|
156
164
|
raise
|
157
165
|
|
158
166
|
# 处理数据
|
@@ -175,20 +183,21 @@ def xmap(
|
|
175
183
|
|
176
184
|
# 性能统计
|
177
185
|
items_per_second = len(batch) / batch_time if batch_time > 0 else 0
|
178
|
-
pbar.set_postfix_str(f"速率: {items_per_second:.1f}项/秒")
|
186
|
+
pbar.set_postfix_str(f"速率: {items_per_second:.1f} 项/秒")
|
179
187
|
|
180
188
|
# 缓存逻辑
|
181
189
|
if need_caching and (i // batch_size) % cache_batch_num == 0:
|
182
190
|
# 仅当处理速度足够慢时才保存缓存,避免IO成为瓶颈
|
183
191
|
if batch_time > 3 or i + batch_size >= total_items:
|
184
192
|
save_json_list(output_list, output_path)
|
185
|
-
logger.debug(f"已保存{len(output_list)}条记录到{output_path}")
|
193
|
+
logger.debug(f"已保存 {len(output_list)} 条记录到 {output_path}")
|
186
194
|
|
187
195
|
# 最终保存
|
188
196
|
if need_caching:
|
189
197
|
save_json_list(output_list, output_path)
|
190
|
-
|
191
|
-
|
198
|
+
if verbose:
|
199
|
+
drop_count = len(jsonlist) - len(output_list)
|
200
|
+
logger.info(f"处理完成,共处理 {len(jsonlist)} 条记录" + f", 丢弃 {drop_count} 条记录" if drop_count > 0 else "")
|
192
201
|
|
193
202
|
return output_list
|
194
203
|
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|