xlin 0.1.26__tar.gz → 0.1.27__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: xlin
3
- Version: 0.1.26
3
+ Version: 0.1.27
4
4
  Summary: toolbox for LinXueyuan
5
5
  License: MIT
6
6
  Author: LinXueyuanStdio
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "xlin"
3
- version = "0.1.26"
3
+ version = "0.1.27"
4
4
  description = "toolbox for LinXueyuan"
5
5
  authors = ["LinXueyuanStdio <23211526+LinXueyuanStdio@users.noreply.github.com>"]
6
6
  license = "MIT"
@@ -85,6 +85,7 @@ def xmap(
85
85
  retry_count=0, # 失败重试次数
86
86
  force_overwrite=False, # 是否强制覆盖输出文件
87
87
  is_batch_work_func=False, # 是否批量处理函数
88
+ verbose=False, # 是否打印详细信息
88
89
  ):
89
90
  """高效处理JSON列表,支持多进程/多线程
90
91
 
@@ -113,25 +114,30 @@ def xmap(
113
114
  output_path = Path(output_path)
114
115
  if output_path.exists():
115
116
  if force_overwrite:
116
- logger.warning(f"强制覆盖输出文件: {output_path}")
117
+ if verbose:
118
+ logger.warning(f"强制覆盖输出文件: {output_path}")
117
119
  output_path.unlink()
118
120
  else:
119
121
  output_list = load_json_list(output_path)
120
122
  start_idx = len(output_list)
121
- logger.info(f"继续处理: 已有{start_idx}条记录,共{len(jsonlist)}条")
123
+ if verbose:
124
+ logger.info(f"继续处理: 已有{start_idx}条记录,共{len(jsonlist)}条")
122
125
  else:
123
126
  output_path.parent.mkdir(parents=True, exist_ok=True)
124
127
 
125
128
  # 选择线程池或进程池
126
129
  if use_process_pool:
127
130
  pool_cls = multiprocessing.Pool
128
- logger.info(f"使用进程池(ProcessPool),适用于CPU密集型任务")
131
+ if verbose:
132
+ logger.info(f"使用进程池(ProcessPool),适用于CPU密集型任务")
129
133
  else:
130
134
  pool_cls = ThreadPool
131
- logger.info(f"使用线程池(ThreadPool),适用于IO密集型任务")
135
+ if verbose:
136
+ logger.info(f"使用线程池(ThreadPool),适用于IO密集型任务")
132
137
 
133
138
  with pool_cls(thread_pool_size) as pool:
134
- logger.info(f"池大小: {thread_pool_size}, 批处理大小: {batch_size}, 分块大小: {chunksize}")
139
+ if verbose:
140
+ logger.info(f"池大小: {thread_pool_size}, 批处理大小: {batch_size}, 分块大小: {chunksize}")
135
141
 
136
142
  # 准备要处理的数据
137
143
  remaining_items = jsonlist[start_idx:]
@@ -149,10 +155,12 @@ def xmap(
149
155
  return list(map_func(work_func, items_batch, chunksize))
150
156
  except Exception as e:
151
157
  if retry_remaining > 0:
152
- logger.warning(f"批处理失败,重试中 ({retry_count-retry_remaining+1}/{retry_count}): {e}")
158
+ if verbose:
159
+ logger.warning(f"批处理失败,重试中 ({retry_count-retry_remaining+1}/{retry_count}): {e}")
153
160
  return process_batch(items_batch, retry_remaining - 1)
154
161
  else:
155
- logger.error(f"批处理失败: {e}")
162
+ if verbose:
163
+ logger.error(f"批处理失败: {e}")
156
164
  raise
157
165
 
158
166
  # 处理数据
@@ -175,20 +183,21 @@ def xmap(
175
183
 
176
184
  # 性能统计
177
185
  items_per_second = len(batch) / batch_time if batch_time > 0 else 0
178
- pbar.set_postfix_str(f"速率: {items_per_second:.1f}项/秒")
186
+ pbar.set_postfix_str(f"速率: {items_per_second:.1f} 项/秒")
179
187
 
180
188
  # 缓存逻辑
181
189
  if need_caching and (i // batch_size) % cache_batch_num == 0:
182
190
  # 仅当处理速度足够慢时才保存缓存,避免IO成为瓶颈
183
191
  if batch_time > 3 or i + batch_size >= total_items:
184
192
  save_json_list(output_list, output_path)
185
- logger.debug(f"已保存{len(output_list)}条记录到{output_path}")
193
+ logger.debug(f"已保存 {len(output_list)} 条记录到 {output_path}")
186
194
 
187
195
  # 最终保存
188
196
  if need_caching:
189
197
  save_json_list(output_list, output_path)
190
- drop_count = len(jsonlist) - len(output_list)
191
- logger.info(f"处理完成,共处理{len(jsonlist)}条记录" + ", 丢弃{len(jsonlist) - len(output_list)}条记录" if drop_count > 0 else "")
198
+ if verbose:
199
+ drop_count = len(jsonlist) - len(output_list)
200
+ logger.info(f"处理完成,共处理 {len(jsonlist)} 条记录" + f", 丢弃 {drop_count} 条记录" if drop_count > 0 else "")
192
201
 
193
202
  return output_list
194
203
 
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes