tretool 0.3.0__py3-none-any.whl → 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tretool/__init__.py +2 -0
- tretool/transform/docx.py +544 -0
- tretool/ziplib.py +664 -0
- {tretool-0.3.0.dist-info → tretool-1.0.0.dist-info}/METADATA +4 -2
- {tretool-0.3.0.dist-info → tretool-1.0.0.dist-info}/RECORD +8 -6
- {tretool-0.3.0.dist-info → tretool-1.0.0.dist-info}/WHEEL +0 -0
- {tretool-0.3.0.dist-info → tretool-1.0.0.dist-info}/licenses/LICENSE +0 -0
- {tretool-0.3.0.dist-info → tretool-1.0.0.dist-info}/top_level.txt +0 -0
tretool/__init__.py
CHANGED
@@ -34,6 +34,8 @@ if sys.version_info >= MIN_PY_VERSION:
|
|
34
34
|
from . import timelib
|
35
35
|
from . import transform
|
36
36
|
|
37
|
+
from . import ziplib
|
38
|
+
|
37
39
|
else:
|
38
40
|
current_version = f"{sys.version_info.major}.{sys.version_info.minor}"
|
39
41
|
required_version = f"{MIN_PY_VERSION[0]}.{MIN_PY_VERSION[1]}"
|
@@ -0,0 +1,544 @@
|
|
1
|
+
import os
|
2
|
+
import sys
|
3
|
+
import logging
|
4
|
+
import time
|
5
|
+
from abc import ABC, abstractmethod
|
6
|
+
from pathlib import Path
|
7
|
+
from typing import Union, List, Optional, Dict, Callable
|
8
|
+
from concurrent.futures import ThreadPoolExecutor
|
9
|
+
from functools import wraps, lru_cache
|
10
|
+
import argparse
|
11
|
+
|
12
|
+
# 第三方库导入
|
13
|
+
try:
|
14
|
+
from docx import Document
|
15
|
+
from docx.shared import Pt, RGBColor
|
16
|
+
from docx.enum.text import WD_PARAGRAPH_ALIGNMENT
|
17
|
+
from PIL import Image
|
18
|
+
import pandas as pd
|
19
|
+
except ImportError as e:
|
20
|
+
print(f"缺少依赖库: {e}")
|
21
|
+
sys.exit(1)
|
22
|
+
|
23
|
+
# 日志配置
|
24
|
+
logging.basicConfig(
|
25
|
+
level=logging.INFO,
|
26
|
+
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
27
|
+
handlers=[
|
28
|
+
logging.StreamHandler(),
|
29
|
+
logging.FileHandler('docx_processor.log')
|
30
|
+
]
|
31
|
+
)
|
32
|
+
|
33
|
+
# 自定义异常
|
34
|
+
class DocxProcessingError(Exception):
|
35
|
+
"""基础处理异常"""
|
36
|
+
pass
|
37
|
+
|
38
|
+
class DocxPermissionError(DocxProcessingError):
|
39
|
+
"""DOCX权限错误"""
|
40
|
+
pass
|
41
|
+
|
42
|
+
class DocxCorruptedError(DocxProcessingError):
|
43
|
+
"""DOCX文件损坏"""
|
44
|
+
pass
|
45
|
+
|
46
|
+
class UnsupportedFormatError(DocxProcessingError):
|
47
|
+
"""不支持的格式"""
|
48
|
+
pass
|
49
|
+
|
50
|
+
class DocxProcessor(ABC):
|
51
|
+
"""
|
52
|
+
DOCX文档处理器抽象基类
|
53
|
+
|
54
|
+
提供DOCX文档处理的基础功能,包括:
|
55
|
+
- 文件路径验证
|
56
|
+
- 输出路径处理
|
57
|
+
- 基本错误处理
|
58
|
+
|
59
|
+
子类应实现:
|
60
|
+
- process() 方法: 执行实际处理逻辑
|
61
|
+
- supported_operations() 类方法: 返回支持的操作列表
|
62
|
+
"""
|
63
|
+
|
64
|
+
def __init__(self, docx_path: Union[str, Path]):
|
65
|
+
"""
|
66
|
+
初始化DOCX处理器
|
67
|
+
|
68
|
+
参数:
|
69
|
+
docx_path: DOCX文件路径
|
70
|
+
"""
|
71
|
+
self.docx_path = Path(docx_path)
|
72
|
+
self.logger = logging.getLogger(self.__class__.__name__)
|
73
|
+
self._progress_callback = None
|
74
|
+
|
75
|
+
# 验证文件
|
76
|
+
if not self.docx_path.exists():
|
77
|
+
raise FileNotFoundError(f"DOCX文件不存在: {docx_path}")
|
78
|
+
if self.docx_path.suffix.lower() != '.docx':
|
79
|
+
raise ValueError("输入文件必须是DOCX格式")
|
80
|
+
|
81
|
+
self.logger.info(f"初始化处理器,处理文件: {docx_path}")
|
82
|
+
|
83
|
+
@abstractmethod
|
84
|
+
def process(self, output_path: Union[str, Path], **kwargs) -> Union[str, List[str]]:
|
85
|
+
"""
|
86
|
+
处理DOCX文档
|
87
|
+
|
88
|
+
参数:
|
89
|
+
output_path: 输出文件路径
|
90
|
+
**kwargs: 处理选项
|
91
|
+
|
92
|
+
返回:
|
93
|
+
处理后的文件路径或路径列表
|
94
|
+
"""
|
95
|
+
pass
|
96
|
+
|
97
|
+
@classmethod
|
98
|
+
@abstractmethod
|
99
|
+
def supported_operations(cls) -> List[str]:
|
100
|
+
"""返回支持的操作列表"""
|
101
|
+
return []
|
102
|
+
|
103
|
+
def set_progress_callback(self, callback: Callable[[int, int], None]):
|
104
|
+
"""设置进度回调函数"""
|
105
|
+
self._progress_callback = callback
|
106
|
+
|
107
|
+
def _update_progress(self, current: int, total: int):
|
108
|
+
"""更新进度"""
|
109
|
+
if self._progress_callback:
|
110
|
+
self._progress_callback(current, total)
|
111
|
+
|
112
|
+
def get_metadata(self) -> Dict[str, str]:
|
113
|
+
"""获取DOCX元数据"""
|
114
|
+
try:
|
115
|
+
doc = Document(self.docx_path)
|
116
|
+
return {
|
117
|
+
'title': doc.core_properties.title,
|
118
|
+
'author': doc.core_properties.author,
|
119
|
+
'created': str(doc.core_properties.created),
|
120
|
+
'modified': str(doc.core_properties.modified),
|
121
|
+
'paragraphs': len(doc.paragraphs),
|
122
|
+
'tables': len(doc.tables)
|
123
|
+
}
|
124
|
+
except Exception as e:
|
125
|
+
self.logger.warning(f"获取元数据失败: {str(e)}")
|
126
|
+
return {}
|
127
|
+
|
128
|
+
def _prepare_output_path(self, output_path: Union[str, Path],
|
129
|
+
default_extension: str) -> Path:
|
130
|
+
"""
|
131
|
+
准备输出路径
|
132
|
+
|
133
|
+
参数:
|
134
|
+
output_path: 输出路径
|
135
|
+
default_extension: 默认文件扩展名
|
136
|
+
|
137
|
+
返回:
|
138
|
+
处理后的Path对象
|
139
|
+
"""
|
140
|
+
output_path = Path(output_path)
|
141
|
+
|
142
|
+
# 如果是目录,自动生成文件名
|
143
|
+
if output_path.is_dir():
|
144
|
+
output_path = output_path / f"{self.docx_path.stem}.{default_extension}"
|
145
|
+
# 如果没有扩展名,添加默认扩展名
|
146
|
+
elif not output_path.suffix:
|
147
|
+
output_path = output_path.with_suffix(f".{default_extension}")
|
148
|
+
|
149
|
+
# 创建父目录(如果不存在)
|
150
|
+
output_path.parent.mkdir(parents=True, exist_ok=True)
|
151
|
+
|
152
|
+
return output_path
|
153
|
+
|
154
|
+
# 重试装饰器
|
155
|
+
def retry(max_attempts=3, delay=1, exceptions=(Exception,)):
|
156
|
+
def decorator(f):
|
157
|
+
@wraps(f)
|
158
|
+
def wrapper(*args, **kwargs):
|
159
|
+
last_error = None
|
160
|
+
for attempt in range(1, max_attempts+1):
|
161
|
+
try:
|
162
|
+
return f(*args, **kwargs)
|
163
|
+
except exceptions as e:
|
164
|
+
last_error = e
|
165
|
+
if attempt < max_attempts:
|
166
|
+
time.sleep(delay)
|
167
|
+
raise last_error
|
168
|
+
return wrapper
|
169
|
+
return decorator
|
170
|
+
|
171
|
+
# 具体处理器实现
|
172
|
+
class DocxToPDFConverter(DocxProcessor):
|
173
|
+
@retry(max_attempts=3, delay=0.5)
|
174
|
+
def process(self, output_path: Union[str, Path], **kwargs) -> str:
|
175
|
+
"""
|
176
|
+
将DOCX转换为PDF
|
177
|
+
|
178
|
+
参数:
|
179
|
+
output_path: 输出文件路径
|
180
|
+
**kwargs:
|
181
|
+
- pages: 要转换的页码范围(如'1-3,5')
|
182
|
+
- quality: 输出质量(high/medium/low)
|
183
|
+
|
184
|
+
返回:
|
185
|
+
转换后的文件路径
|
186
|
+
"""
|
187
|
+
try:
|
188
|
+
output_path = self._prepare_output_path(output_path, 'pdf')
|
189
|
+
|
190
|
+
# 尝试使用docx2pdf库
|
191
|
+
try:
|
192
|
+
from docx2pdf import convert
|
193
|
+
convert(str(self.docx_path), str(output_path))
|
194
|
+
return str(output_path)
|
195
|
+
except ImportError:
|
196
|
+
self.logger.warning("docx2pdf未安装,尝试使用unoconv")
|
197
|
+
# 回退到unoconv
|
198
|
+
try:
|
199
|
+
import subprocess
|
200
|
+
subprocess.run(['unoconv', '-f', 'pdf', '-o', str(output_path), str(self.docx_path)], check=True)
|
201
|
+
return str(output_path)
|
202
|
+
except Exception as e:
|
203
|
+
raise DocxProcessingError(f"转换为PDF失败,请安装docx2pdf或unoconv: {str(e)}")
|
204
|
+
except Exception as e:
|
205
|
+
raise DocxProcessingError(f"转换为PDF失败: {str(e)}")
|
206
|
+
|
207
|
+
@classmethod
|
208
|
+
def supported_operations(cls) -> List[str]:
|
209
|
+
return ['pdf']
|
210
|
+
|
211
|
+
class DocxToTextConverter(DocxProcessor):
|
212
|
+
@retry(max_attempts=3, delay=0.5)
|
213
|
+
def process(self, output_path: Union[str, Path], **kwargs) -> str:
|
214
|
+
"""
|
215
|
+
将DOCX转换为纯文本
|
216
|
+
|
217
|
+
参数:
|
218
|
+
output_path: 输出文件路径
|
219
|
+
**kwargs:
|
220
|
+
- include_tables: 是否包含表格内容(True/False)
|
221
|
+
- encoding: 文本编码(默认utf-8)
|
222
|
+
|
223
|
+
返回:
|
224
|
+
转换后的文件路径
|
225
|
+
"""
|
226
|
+
try:
|
227
|
+
output_path = self._prepare_output_path(output_path, 'txt')
|
228
|
+
include_tables = kwargs.get('include_tables', False)
|
229
|
+
encoding = kwargs.get('encoding', 'utf-8')
|
230
|
+
|
231
|
+
doc = Document(self.docx_path)
|
232
|
+
text = []
|
233
|
+
|
234
|
+
for para in doc.paragraphs:
|
235
|
+
text.append(para.text)
|
236
|
+
|
237
|
+
if include_tables:
|
238
|
+
for table in doc.tables:
|
239
|
+
for row in table.rows:
|
240
|
+
for cell in row.cells:
|
241
|
+
text.append(cell.text)
|
242
|
+
|
243
|
+
with open(output_path, 'w', encoding=encoding) as f:
|
244
|
+
f.write('\n'.join(text))
|
245
|
+
|
246
|
+
return str(output_path)
|
247
|
+
except Exception as e:
|
248
|
+
raise DocxProcessingError(f"转换为文本失败: {str(e)}")
|
249
|
+
|
250
|
+
@classmethod
|
251
|
+
def supported_operations(cls) -> List[str]:
|
252
|
+
return ['txt']
|
253
|
+
|
254
|
+
class DocxToHTMLConverter(DocxProcessor):
|
255
|
+
@retry(max_attempts=3, delay=0.5)
|
256
|
+
def process(self, output_path: Union[str, Path], **kwargs) -> str:
|
257
|
+
"""
|
258
|
+
将DOCX转换为HTML
|
259
|
+
|
260
|
+
参数:
|
261
|
+
output_path: 输出文件路径
|
262
|
+
**kwargs:
|
263
|
+
- css: 自定义CSS样式
|
264
|
+
- include_images: 是否包含图像(True/False)
|
265
|
+
|
266
|
+
返回:
|
267
|
+
转换后的HTML文件路径
|
268
|
+
"""
|
269
|
+
try:
|
270
|
+
output_path = self._prepare_output_path(output_path, 'html')
|
271
|
+
|
272
|
+
# 尝试使用pandoc
|
273
|
+
try:
|
274
|
+
import subprocess
|
275
|
+
subprocess.run([
|
276
|
+
'pandoc', '-s', str(self.docx_path),
|
277
|
+
'-o', str(output_path),
|
278
|
+
'--css', kwargs.get('css', '')
|
279
|
+
], check=True)
|
280
|
+
return str(output_path)
|
281
|
+
except Exception:
|
282
|
+
self.logger.warning("pandoc不可用,使用基本转换")
|
283
|
+
# 基本实现
|
284
|
+
doc = Document(self.docx_path)
|
285
|
+
html_content = [
|
286
|
+
'<!DOCTYPE html>',
|
287
|
+
'<html>',
|
288
|
+
'<head>',
|
289
|
+
'<meta charset="UTF-8">',
|
290
|
+
f'<title>{self.docx_path.stem}</title>',
|
291
|
+
f'<style>{kwargs.get("css", "body { font-family: Arial; }")}</style>',
|
292
|
+
'</head>',
|
293
|
+
'<body>'
|
294
|
+
]
|
295
|
+
|
296
|
+
for para in doc.paragraphs:
|
297
|
+
html_content.append(f'<p>{para.text}</p>')
|
298
|
+
|
299
|
+
html_content.extend(['</body>', '</html>'])
|
300
|
+
|
301
|
+
with open(output_path, 'w', encoding='utf-8') as f:
|
302
|
+
f.write('\n'.join(html_content))
|
303
|
+
|
304
|
+
return str(output_path)
|
305
|
+
except Exception as e:
|
306
|
+
raise DocxProcessingError(f"转换为HTML失败: {str(e)}")
|
307
|
+
|
308
|
+
@classmethod
|
309
|
+
def supported_operations(cls) -> List[str]:
|
310
|
+
return ['html']
|
311
|
+
|
312
|
+
class DocxToCSVConverter(DocxProcessor):
|
313
|
+
@retry(max_attempts=3, delay=0.5)
|
314
|
+
def process(self, output_path: Union[str, Path], **kwargs) -> Union[str, List[str]]:
|
315
|
+
"""
|
316
|
+
提取DOCX中的表格为CSV
|
317
|
+
|
318
|
+
参数:
|
319
|
+
output_path: 输出文件路径或目录
|
320
|
+
**kwargs:
|
321
|
+
- table_indexes: 要提取的表格索引列表(如[0,2])
|
322
|
+
- encoding: CSV文件编码(默认utf-8)
|
323
|
+
|
324
|
+
返回:
|
325
|
+
单个CSV文件路径或多个CSV文件路径列表
|
326
|
+
"""
|
327
|
+
try:
|
328
|
+
table_indexes = kwargs.get('table_indexes', None)
|
329
|
+
encoding = kwargs.get('encoding', 'utf-8')
|
330
|
+
|
331
|
+
doc = Document(self.docx_path)
|
332
|
+
tables = doc.tables
|
333
|
+
|
334
|
+
if not tables:
|
335
|
+
raise DocxProcessingError("未找到表格数据")
|
336
|
+
|
337
|
+
if table_indexes is None:
|
338
|
+
table_indexes = range(len(tables))
|
339
|
+
|
340
|
+
output_path = Path(output_path)
|
341
|
+
if len(table_indexes) == 1:
|
342
|
+
output_path = self._prepare_output_path(output_path, 'csv')
|
343
|
+
df = self._table_to_dataframe(tables[table_indexes[0]])
|
344
|
+
df.to_csv(output_path, index=False, encoding=encoding)
|
345
|
+
return str(output_path)
|
346
|
+
else:
|
347
|
+
output_path.mkdir(parents=True, exist_ok=True)
|
348
|
+
output_files = []
|
349
|
+
for i in table_indexes:
|
350
|
+
if i < len(tables):
|
351
|
+
table_path = output_path / f"table_{i}.csv"
|
352
|
+
df = self._table_to_dataframe(tables[i])
|
353
|
+
df.to_csv(table_path, index=False, encoding=encoding)
|
354
|
+
output_files.append(str(table_path))
|
355
|
+
return output_files
|
356
|
+
except Exception as e:
|
357
|
+
raise DocxProcessingError(f"提取表格失败: {str(e)}")
|
358
|
+
|
359
|
+
def _table_to_dataframe(self, table) -> pd.DataFrame:
|
360
|
+
"""将Word表格转换为DataFrame"""
|
361
|
+
data = []
|
362
|
+
for row in table.rows:
|
363
|
+
row_data = []
|
364
|
+
for cell in row.cells:
|
365
|
+
row_data.append(cell.text)
|
366
|
+
data.append(row_data)
|
367
|
+
return pd.DataFrame(data)
|
368
|
+
|
369
|
+
@classmethod
|
370
|
+
def supported_operations(cls) -> List[str]:
|
371
|
+
return ['csv']
|
372
|
+
|
373
|
+
class DocxEditor(DocxProcessor):
|
374
|
+
"""DOCX文档编辑器"""
|
375
|
+
@retry(max_attempts=3, delay=0.5)
|
376
|
+
def process(self, output_path: Union[str, Path], **kwargs) -> str:
|
377
|
+
"""
|
378
|
+
编辑DOCX文档
|
379
|
+
|
380
|
+
参数:
|
381
|
+
output_path: 输出文件路径
|
382
|
+
**kwargs:
|
383
|
+
- replace: 替换字典 {旧文本: 新文本}
|
384
|
+
- add_watermark: 水印文本
|
385
|
+
- font_size: 字体大小(默认12)
|
386
|
+
- font_color: 字体颜色(十六进制,如#FF0000)
|
387
|
+
|
388
|
+
返回:
|
389
|
+
编辑后的文件路径
|
390
|
+
"""
|
391
|
+
try:
|
392
|
+
output_path = self._prepare_output_path(output_path, 'docx')
|
393
|
+
|
394
|
+
doc = Document(self.docx_path)
|
395
|
+
|
396
|
+
# 文本替换
|
397
|
+
if 'replace' in kwargs:
|
398
|
+
for old_text, new_text in kwargs['replace'].items():
|
399
|
+
for para in doc.paragraphs:
|
400
|
+
if old_text in para.text:
|
401
|
+
para.text = para.text.replace(old_text, new_text)
|
402
|
+
|
403
|
+
# 添加水印
|
404
|
+
if 'add_watermark' in kwargs:
|
405
|
+
watermark = kwargs['add_watermark']
|
406
|
+
font_size = kwargs.get('font_size', 12)
|
407
|
+
font_color = kwargs.get('font_color', '#C0C0C0')
|
408
|
+
|
409
|
+
for section in doc.sections:
|
410
|
+
header = section.header
|
411
|
+
paragraph = header.paragraphs[0] if header.paragraphs else header.add_paragraph()
|
412
|
+
paragraph.alignment = WD_PARAGRAPH_ALIGNMENT.CENTER
|
413
|
+
|
414
|
+
run = paragraph.add_run(watermark)
|
415
|
+
run.font.size = Pt(font_size)
|
416
|
+
run.font.color.rgb = RGBColor(*self._hex_to_rgb(font_color))
|
417
|
+
|
418
|
+
doc.save(output_path)
|
419
|
+
return str(output_path)
|
420
|
+
except Exception as e:
|
421
|
+
raise DocxProcessingError(f"编辑文档失败: {str(e)}")
|
422
|
+
|
423
|
+
def _hex_to_rgb(self, hex_color: str) -> tuple:
|
424
|
+
"""十六进制颜色转RGB"""
|
425
|
+
hex_color = hex_color.lstrip('#')
|
426
|
+
return tuple(int(hex_color[i:i+2], 16) for i in (0, 2, 4))
|
427
|
+
|
428
|
+
@classmethod
|
429
|
+
def supported_operations(cls) -> List[str]:
|
430
|
+
return ['edit']
|
431
|
+
|
432
|
+
# 工厂类
|
433
|
+
class DocxProcessorFactory:
|
434
|
+
_operation_map = {
|
435
|
+
'pdf': (DocxToPDFConverter, 'PDF文档'),
|
436
|
+
'txt': (DocxToTextConverter, '纯文本文件'),
|
437
|
+
'html': (DocxToHTMLConverter, 'HTML网页'),
|
438
|
+
'csv': (DocxToCSVConverter, 'CSV表格数据'),
|
439
|
+
'edit': (DocxEditor, '文档编辑')
|
440
|
+
}
|
441
|
+
|
442
|
+
@staticmethod
|
443
|
+
@lru_cache(maxsize=32)
|
444
|
+
def get_processor(operation: str, docx_path: str) -> DocxProcessor:
|
445
|
+
"""
|
446
|
+
获取指定操作的处理器
|
447
|
+
|
448
|
+
参数:
|
449
|
+
operation: 操作类型
|
450
|
+
docx_path: DOCX文件路径
|
451
|
+
|
452
|
+
返回:
|
453
|
+
DocxProcessor实例
|
454
|
+
|
455
|
+
抛出:
|
456
|
+
UnsupportedOperationError: 当操作不支持时
|
457
|
+
"""
|
458
|
+
operation = operation.lower()
|
459
|
+
if operation not in DocxProcessorFactory._operation_map:
|
460
|
+
raise UnsupportedFormatError(f"不支持的操作: {operation}")
|
461
|
+
|
462
|
+
return DocxProcessorFactory._operation_map[operation][0](docx_path)
|
463
|
+
|
464
|
+
@staticmethod
|
465
|
+
def get_supported_operations() -> Dict[str, str]:
|
466
|
+
"""获取所有支持的操作及其描述"""
|
467
|
+
return {op: desc for op, (_, desc) in DocxProcessorFactory._operation_map.items()}
|
468
|
+
|
469
|
+
# 命令行接口
|
470
|
+
def parse_args():
|
471
|
+
parser = argparse.ArgumentParser(
|
472
|
+
description='DOCX文档处理工具 - 支持多种格式转换和编辑',
|
473
|
+
formatter_class=argparse.ArgumentDefaultsHelpFormatter
|
474
|
+
)
|
475
|
+
parser.add_argument('input', help='输入DOCX文件路径')
|
476
|
+
parser.add_argument('output', help='输出文件路径或目录')
|
477
|
+
parser.add_argument('-o', '--operation', required=True,
|
478
|
+
choices=DocxProcessorFactory.get_supported_operations().keys(),
|
479
|
+
help='操作类型')
|
480
|
+
parser.add_argument('--pages', help='页码范围(仅PDF转换)')
|
481
|
+
parser.add_argument('--quality', choices=['high', 'medium', 'low'],
|
482
|
+
default='medium', help='输出质量(仅PDF转换)')
|
483
|
+
parser.add_argument('--include-tables', action='store_true',
|
484
|
+
help='包含表格内容(仅文本转换)')
|
485
|
+
parser.add_argument('--table-indexes', help='要提取的表格索引(如0,2,3)')
|
486
|
+
parser.add_argument('--replace', nargs=2, action='append',
|
487
|
+
metavar=('OLD', 'NEW'), help='文本替换(编辑操作)')
|
488
|
+
parser.add_argument('--watermark', help='水印文本(编辑操作)')
|
489
|
+
parser.add_argument('--font-size', type=int, default=12,
|
490
|
+
help='水印字体大小(编辑操作)')
|
491
|
+
parser.add_argument('--font-color', default='#C0C0C0',
|
492
|
+
help='水印字体颜色(十六进制,编辑操作)')
|
493
|
+
return parser.parse_args()
|
494
|
+
|
495
|
+
def main():
|
496
|
+
args = parse_args()
|
497
|
+
|
498
|
+
try:
|
499
|
+
# 获取处理器
|
500
|
+
processor = DocxProcessorFactory.get_processor(args.operation, args.input)
|
501
|
+
|
502
|
+
# 准备参数
|
503
|
+
kwargs = {}
|
504
|
+
if args.operation == 'pdf':
|
505
|
+
kwargs.update({
|
506
|
+
'pages': args.pages,
|
507
|
+
'quality': args.quality
|
508
|
+
})
|
509
|
+
elif args.operation == 'txt':
|
510
|
+
kwargs['include_tables'] = args.include_tables
|
511
|
+
elif args.operation == 'csv':
|
512
|
+
if args.table_indexes:
|
513
|
+
kwargs['table_indexes'] = [int(i) for i in args.table_indexes.split(',')]
|
514
|
+
elif args.operation == 'edit':
|
515
|
+
if args.replace:
|
516
|
+
kwargs['replace'] = dict(args.replace)
|
517
|
+
if args.watermark:
|
518
|
+
kwargs['add_watermark'] = args.watermark
|
519
|
+
kwargs['font_size'] = args.font_size
|
520
|
+
kwargs['font_color'] = args.font_color
|
521
|
+
|
522
|
+
# 设置进度回调
|
523
|
+
def progress_callback(current, total):
|
524
|
+
print(f"\r进度: {current}/{total} ({current/total:.1%})", end='', flush=True)
|
525
|
+
processor.set_progress_callback(progress_callback)
|
526
|
+
|
527
|
+
# 执行处理
|
528
|
+
print(f"开始处理: {args.input} -> {args.output} ({args.operation})")
|
529
|
+
result = processor.process(args.output, **kwargs)
|
530
|
+
|
531
|
+
print("\n处理成功!")
|
532
|
+
if isinstance(result, list):
|
533
|
+
print(f"生成文件 {len(result)} 个:")
|
534
|
+
for file in result:
|
535
|
+
print(f" - {file}")
|
536
|
+
else:
|
537
|
+
print(f"输出文件: {result}")
|
538
|
+
|
539
|
+
except Exception as e:
|
540
|
+
print(f"\n处理失败: {e}", file=sys.stderr)
|
541
|
+
sys.exit(1)
|
542
|
+
|
543
|
+
if __name__ == "__main__":
|
544
|
+
main()
|
tretool/ziplib.py
ADDED
@@ -0,0 +1,664 @@
|
|
1
|
+
"""
|
2
|
+
secure_zip.py - 支持加密的ZIP文件操作库
|
3
|
+
|
4
|
+
功能:
|
5
|
+
1. 支持AES加密(128/192/256位)和传统PKWARE加密
|
6
|
+
2. 创建/解压加密ZIP文件
|
7
|
+
3. 查看加密ZIP内容
|
8
|
+
4. 完整性检查
|
9
|
+
"""
|
10
|
+
|
11
|
+
import os
|
12
|
+
import struct
|
13
|
+
import zlib
|
14
|
+
import shutil
|
15
|
+
import hashlib
|
16
|
+
import random
|
17
|
+
from typing import List, Union, Optional, Dict, BinaryIO, Tuple
|
18
|
+
from pathlib import Path
|
19
|
+
from dataclasses import dataclass
|
20
|
+
from datetime import datetime
|
21
|
+
from Crypto.Cipher import AES
|
22
|
+
from Crypto.Util.Padding import pad, unpad
|
23
|
+
from Crypto.Random import get_random_bytes
|
24
|
+
|
25
|
+
# ZIP文件格式常量
|
26
|
+
ZIP_SIGNATURE = b'PK\x03\x04'
|
27
|
+
ZIP_CENTRAL_DIR_SIGNATURE = b'PK\x01\x02'
|
28
|
+
ZIP_END_OF_CENTRAL_DIR_SIGNATURE = b'PK\x05\x06'
|
29
|
+
|
30
|
+
# 加密相关常量
|
31
|
+
COMPRESSION_STORED = 0
|
32
|
+
COMPRESSION_DEFLATED = 8
|
33
|
+
ENCRYPTION_TRADITIONAL = 0x01
|
34
|
+
ENCRYPTION_AES128 = 0x02
|
35
|
+
ENCRYPTION_AES192 = 0x03
|
36
|
+
ENCRYPTION_AES256 = 0x04
|
37
|
+
AES_BLOCK_SIZE = 16
|
38
|
+
AES_SALT_SIZE = 16
|
39
|
+
AES_PASSWORD_VERIFIER_SIZE = 2
|
40
|
+
AES_MAC_SIZE = 10
|
41
|
+
AES_KEY_LENGTHS = {
|
42
|
+
ENCRYPTION_AES128: 16,
|
43
|
+
ENCRYPTION_AES192: 24,
|
44
|
+
ENCRYPTION_AES256: 32
|
45
|
+
}
|
46
|
+
|
47
|
+
@dataclass
|
48
|
+
class ZipFileHeader:
|
49
|
+
version: int
|
50
|
+
flags: int
|
51
|
+
compression: int
|
52
|
+
mod_time: int
|
53
|
+
mod_date: int
|
54
|
+
crc32: int
|
55
|
+
compressed_size: int
|
56
|
+
uncompressed_size: int
|
57
|
+
filename: str
|
58
|
+
extra: bytes
|
59
|
+
file_offset: int
|
60
|
+
is_encrypted: bool = False
|
61
|
+
encryption_method: int = 0
|
62
|
+
aes_strength: int = 0
|
63
|
+
salt: bytes = b''
|
64
|
+
password_verifier: bytes = b''
|
65
|
+
|
66
|
+
class SecureZipFile:
|
67
|
+
def __init__(self, filename: Union[str, Path], mode: str = 'r', password: Optional[str] = None):
|
68
|
+
"""
|
69
|
+
初始化加密ZIP文件对象
|
70
|
+
|
71
|
+
参数:
|
72
|
+
filename: ZIP文件路径
|
73
|
+
mode: 打开模式 ('r'读取, 'w'写入, 'a'追加)
|
74
|
+
password: 加密密码(可选)
|
75
|
+
"""
|
76
|
+
self.filename = Path(filename)
|
77
|
+
self.mode = mode
|
78
|
+
self.password = password
|
79
|
+
self.file_headers: Dict[str, ZipFileHeader] = {}
|
80
|
+
self.fp: Optional[BinaryIO] = None
|
81
|
+
|
82
|
+
if mode == 'r':
|
83
|
+
self._read_zip_file()
|
84
|
+
elif mode == 'w':
|
85
|
+
self.fp = open(self.filename, 'wb')
|
86
|
+
elif mode == 'a':
|
87
|
+
if self.filename.exists():
|
88
|
+
self._read_zip_file()
|
89
|
+
self.fp = open(self.filename, 'r+b')
|
90
|
+
# 定位到中央目录前
|
91
|
+
self.fp.seek(self.end_of_central_dir_offset)
|
92
|
+
else:
|
93
|
+
self.fp = open(self.filename, 'wb')
|
94
|
+
else:
|
95
|
+
raise ValueError("Invalid mode, must be 'r', 'w' or 'a'")
|
96
|
+
|
97
|
+
def _read_zip_file(self):
|
98
|
+
"""读取ZIP文件并解析文件头信息"""
|
99
|
+
if not self.filename.exists():
|
100
|
+
raise FileNotFoundError(f"ZIP file not found: {self.filename}")
|
101
|
+
|
102
|
+
self.fp = open(self.filename, 'rb')
|
103
|
+
self._find_end_of_central_dir()
|
104
|
+
self._read_central_directory()
|
105
|
+
|
106
|
+
def _find_end_of_central_dir(self):
|
107
|
+
"""定位并读取ZIP文件尾部的中央目录结束记录"""
|
108
|
+
file_size = self.filename.stat().st_size
|
109
|
+
max_comment_len = 65535
|
110
|
+
search_size = min(file_size, max_comment_len + 22)
|
111
|
+
|
112
|
+
self.fp.seek(file_size - search_size)
|
113
|
+
data = self.fp.read()
|
114
|
+
|
115
|
+
pos = data.rfind(ZIP_END_OF_CENTRAL_DIR_SIGNATURE)
|
116
|
+
if pos < 0:
|
117
|
+
raise ValueError("Not a valid ZIP file (end of central directory signature not found)")
|
118
|
+
|
119
|
+
end_record = data[pos:pos+22]
|
120
|
+
(
|
121
|
+
self.disk_number,
|
122
|
+
self.central_dir_disk,
|
123
|
+
self.disk_entries,
|
124
|
+
self.total_entries,
|
125
|
+
self.central_dir_size,
|
126
|
+
self.central_dir_offset,
|
127
|
+
self.comment_length
|
128
|
+
) = struct.unpack('<HHHHIIH', end_record[4:22])
|
129
|
+
|
130
|
+
self.end_of_central_dir_offset = file_size - (search_size - pos)
|
131
|
+
|
132
|
+
def _read_central_directory(self):
|
133
|
+
"""读取中央目录并解析文件头信息"""
|
134
|
+
self.fp.seek(self.central_dir_offset)
|
135
|
+
|
136
|
+
while True:
|
137
|
+
signature = self.fp.read(4)
|
138
|
+
if signature != ZIP_CENTRAL_DIR_SIGNATURE:
|
139
|
+
break
|
140
|
+
|
141
|
+
header_data = self.fp.read(42)
|
142
|
+
(
|
143
|
+
version_made_by, version_needed, flags, compression,
|
144
|
+
mod_time, mod_date, crc32, compressed_size, uncompressed_size,
|
145
|
+
filename_len, extra_len, comment_len, disk_num_start,
|
146
|
+
internal_attrs, external_attrs, local_header_offset
|
147
|
+
) = struct.unpack('<HHHHHHIIIHHHHHII', header_data)
|
148
|
+
|
149
|
+
filename = self.fp.read(filename_len).decode('utf-8')
|
150
|
+
extra = self.fp.read(extra_len)
|
151
|
+
comment = self.fp.read(comment_len)
|
152
|
+
|
153
|
+
is_encrypted = (flags & 0x1) != 0
|
154
|
+
encryption_method = 0
|
155
|
+
aes_strength = 0
|
156
|
+
salt = b''
|
157
|
+
password_verifier = b''
|
158
|
+
|
159
|
+
if is_encrypted:
|
160
|
+
# 检查AES加密标志
|
161
|
+
if (flags & 0x40) != 0:
|
162
|
+
# AES加密
|
163
|
+
encryption_method = (flags >> 8) & 0xFF
|
164
|
+
aes_strength = AES_KEY_LENGTHS.get(encryption_method, 0)
|
165
|
+
|
166
|
+
# 从额外字段中读取salt和验证器
|
167
|
+
extra_pos = 0
|
168
|
+
while extra_pos < len(extra):
|
169
|
+
header_id, data_size = struct.unpack_from('<HH', extra, extra_pos)
|
170
|
+
extra_pos += 4
|
171
|
+
if header_id == 0x9901: # AE-x ID
|
172
|
+
aes_data = extra[extra_pos:extra_pos+data_size]
|
173
|
+
if len(aes_data) >= 7:
|
174
|
+
aes_version, vendor_id, strength = struct.unpack_from('<HBB', aes_data, 0)
|
175
|
+
salt = aes_data[7:7+AES_SALT_SIZE]
|
176
|
+
password_verifier = aes_data[7+AES_SALT_SIZE:7+AES_SALT_SIZE+AES_PASSWORD_VERIFIER_SIZE]
|
177
|
+
break
|
178
|
+
extra_pos += data_size
|
179
|
+
else:
|
180
|
+
# 传统PKWARE加密
|
181
|
+
encryption_method = ENCRYPTION_TRADITIONAL
|
182
|
+
|
183
|
+
# 保存文件头信息
|
184
|
+
self.file_headers[filename] = ZipFileHeader(
|
185
|
+
version=version_needed,
|
186
|
+
flags=flags,
|
187
|
+
compression=compression,
|
188
|
+
mod_time=mod_time,
|
189
|
+
mod_date=mod_date,
|
190
|
+
crc32=crc32,
|
191
|
+
compressed_size=compressed_size,
|
192
|
+
uncompressed_size=uncompressed_size,
|
193
|
+
filename=filename,
|
194
|
+
extra=extra,
|
195
|
+
file_offset=local_header_offset,
|
196
|
+
is_encrypted=is_encrypted,
|
197
|
+
encryption_method=encryption_method,
|
198
|
+
aes_strength=aes_strength,
|
199
|
+
salt=salt,
|
200
|
+
password_verifier=password_verifier
|
201
|
+
)
|
202
|
+
|
203
|
+
def close(self):
|
204
|
+
"""关闭ZIP文件"""
|
205
|
+
if self.fp is not None:
|
206
|
+
if self.mode in ('w', 'a'):
|
207
|
+
self._write_central_directory()
|
208
|
+
self.fp.close()
|
209
|
+
self.fp = None
|
210
|
+
|
211
|
+
def __enter__(self):
|
212
|
+
return self
|
213
|
+
|
214
|
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
215
|
+
self.close()
|
216
|
+
|
217
|
+
def _generate_aes_key(self, salt: bytes, key_length: int) -> Tuple[bytes, bytes]:
|
218
|
+
"""生成AES密钥和验证器"""
|
219
|
+
if not self.password:
|
220
|
+
raise ValueError("Password is required for AES encryption")
|
221
|
+
|
222
|
+
# 使用PBKDF2生成密钥
|
223
|
+
key = hashlib.pbkdf2_hmac(
|
224
|
+
'sha1',
|
225
|
+
self.password.encode('utf-8'),
|
226
|
+
salt,
|
227
|
+
1000, # 迭代次数
|
228
|
+
key_length * 2 + 2 # 密钥+MAC密钥+验证器
|
229
|
+
)
|
230
|
+
|
231
|
+
encryption_key = key[:key_length]
|
232
|
+
mac_key = key[key_length:key_length*2]
|
233
|
+
password_verifier = key[key_length*2:key_length*2+2]
|
234
|
+
|
235
|
+
return encryption_key, mac_key, password_verifier
|
236
|
+
|
237
|
+
def _traditional_encrypt(self, data: bytes) -> bytes:
|
238
|
+
"""传统PKWARE加密"""
|
239
|
+
if not self.password:
|
240
|
+
raise ValueError("Password is required for traditional encryption")
|
241
|
+
|
242
|
+
# 初始化加密密钥
|
243
|
+
keys = [0x12345678, 0x23456789, 0x34567890]
|
244
|
+
for c in self.password.encode('utf-8'):
|
245
|
+
keys = self._update_keys(keys, c)
|
246
|
+
|
247
|
+
# 加密数据
|
248
|
+
encrypted_data = bytearray()
|
249
|
+
for i, b in enumerate(data):
|
250
|
+
c = b ^ self._crc32_crypt_byte(keys[2])
|
251
|
+
encrypted_data.append(c)
|
252
|
+
keys = self._update_keys(keys, c)
|
253
|
+
|
254
|
+
return bytes(encrypted_data)
|
255
|
+
|
256
|
+
def _traditional_decrypt(self, data: bytes) -> bytes:
|
257
|
+
"""传统PKWARE解密"""
|
258
|
+
if not self.password:
|
259
|
+
raise ValueError("Password is required for traditional decryption")
|
260
|
+
|
261
|
+
# 初始化加密密钥
|
262
|
+
keys = [0x12345678, 0x23456789, 0x34567890]
|
263
|
+
for c in self.password.encode('utf-8'):
|
264
|
+
keys = self._update_keys(keys, c)
|
265
|
+
|
266
|
+
# 解密数据
|
267
|
+
decrypted_data = bytearray()
|
268
|
+
for i, b in enumerate(data):
|
269
|
+
c = b ^ self._crc32_crypt_byte(keys[2])
|
270
|
+
decrypted_data.append(c)
|
271
|
+
keys = self._update_keys(keys, c)
|
272
|
+
|
273
|
+
return bytes(decrypted_data)
|
274
|
+
|
275
|
+
def _update_keys(self, keys: List[int], c: int) -> List[int]:
|
276
|
+
"""更新传统加密密钥"""
|
277
|
+
keys[0] = zlib.crc32(bytes([c]), keys[0]) & 0xFFFFFFFF
|
278
|
+
keys[1] = (keys[1] + (keys[0] & 0xFF)) & 0xFFFFFFFF
|
279
|
+
keys[1] = (keys[1] * 134775813 + 1) & 0xFFFFFFFF
|
280
|
+
keys[2] = zlib.crc32(bytes([keys[1] >> 24]), keys[2]) & 0xFFFFFFFF
|
281
|
+
return keys
|
282
|
+
|
283
|
+
def _crc32_crypt_byte(self, key: int) -> int:
|
284
|
+
"""传统加密的字节加密函数"""
|
285
|
+
temp = (key | 2) & 0xFFFF
|
286
|
+
return ((temp * (temp ^ 1)) >> 8) & 0xFF
|
287
|
+
|
288
|
+
def _write_central_directory(self):
|
289
|
+
"""写入中央目录和结束记录"""
|
290
|
+
if self.fp is None:
|
291
|
+
raise ValueError("ZIP file not open")
|
292
|
+
|
293
|
+
central_dir_start = self.fp.tell()
|
294
|
+
|
295
|
+
for header in self.file_headers.values():
|
296
|
+
self.fp.write(ZIP_CENTRAL_DIR_SIGNATURE)
|
297
|
+
self.fp.write(struct.pack(
|
298
|
+
'<HHHHHHIIIHHHHHII',
|
299
|
+
20, # version made by
|
300
|
+
20, # version needed to extract
|
301
|
+
header.flags,
|
302
|
+
header.compression,
|
303
|
+
header.mod_time,
|
304
|
+
header.mod_date,
|
305
|
+
header.crc32,
|
306
|
+
header.compressed_size,
|
307
|
+
header.uncompressed_size,
|
308
|
+
len(header.filename.encode('utf-8')),
|
309
|
+
len(header.extra),
|
310
|
+
0, # file comment length
|
311
|
+
0, # disk number start
|
312
|
+
0, # internal file attributes
|
313
|
+
0o644 << 16, # external file attributes
|
314
|
+
header.file_offset
|
315
|
+
))
|
316
|
+
self.fp.write(header.filename.encode('utf-8'))
|
317
|
+
self.fp.write(header.extra)
|
318
|
+
|
319
|
+
central_dir_end = self.fp.tell()
|
320
|
+
central_dir_size = central_dir_end - central_dir_start
|
321
|
+
|
322
|
+
self.fp.write(ZIP_END_OF_CENTRAL_DIR_SIGNATURE)
|
323
|
+
self.fp.write(struct.pack(
|
324
|
+
'<HHHHIIH',
|
325
|
+
0, # number of this disk
|
326
|
+
0, # disk where central directory starts
|
327
|
+
len(self.file_headers),
|
328
|
+
len(self.file_headers),
|
329
|
+
central_dir_size,
|
330
|
+
central_dir_start,
|
331
|
+
0 # ZIP file comment length
|
332
|
+
))
|
333
|
+
|
334
|
+
def write(self, filename: str, data: bytes, compress: bool = True,
|
335
|
+
encryption_method: int = 0) -> None:
|
336
|
+
"""
|
337
|
+
向ZIP文件中写入一个文件
|
338
|
+
|
339
|
+
参数:
|
340
|
+
filename: ZIP内的文件名
|
341
|
+
data: 文件数据
|
342
|
+
compress: 是否压缩数据
|
343
|
+
encryption_method: 加密方法 (0=不加密, 1=传统加密, 2=AES128, 3=AES192, 4=AES256)
|
344
|
+
"""
|
345
|
+
if self.fp is None or self.mode not in ('w', 'a'):
|
346
|
+
raise ValueError("ZIP file not open for writing")
|
347
|
+
|
348
|
+
# 计算CRC32校验和
|
349
|
+
crc32 = zlib.crc32(data) & 0xFFFFFFFF
|
350
|
+
|
351
|
+
if compress:
|
352
|
+
compressed_data = zlib.compress(data)
|
353
|
+
compression = COMPRESSION_DEFLATED
|
354
|
+
else:
|
355
|
+
compressed_data = data
|
356
|
+
compression = COMPRESSION_STORED
|
357
|
+
|
358
|
+
# 加密数据
|
359
|
+
is_encrypted = encryption_method != 0
|
360
|
+
salt = b''
|
361
|
+
password_verifier = b''
|
362
|
+
extra = b''
|
363
|
+
|
364
|
+
if is_encrypted:
|
365
|
+
if not self.password:
|
366
|
+
raise ValueError("Password is required for encryption")
|
367
|
+
|
368
|
+
if encryption_method == ENCRYPTION_TRADITIONAL:
|
369
|
+
# 传统PKWARE加密
|
370
|
+
encrypted_data = self._traditional_encrypt(compressed_data)
|
371
|
+
flags = 0x1 # 加密标志
|
372
|
+
elif encryption_method in (ENCRYPTION_AES128, ENCRYPTION_AES192, ENCRYPTION_AES256):
|
373
|
+
# AES加密
|
374
|
+
key_length = AES_KEY_LENGTHS[encryption_method]
|
375
|
+
salt = get_random_bytes(AES_SALT_SIZE)
|
376
|
+
encryption_key, mac_key, password_verifier = self._generate_aes_key(salt, key_length)
|
377
|
+
|
378
|
+
# 创建AES加密器
|
379
|
+
cipher = AES.new(encryption_key, AES.MODE_CBC, iv=salt)
|
380
|
+
padded_data = pad(compressed_data, AES_BLOCK_SIZE)
|
381
|
+
encrypted_data = cipher.encrypt(padded_data)
|
382
|
+
|
383
|
+
# 添加HMAC-SHA1验证码(简化实现)
|
384
|
+
mac = hashlib.sha1(encrypted_data).digest()[:AES_MAC_SIZE]
|
385
|
+
encrypted_data += mac
|
386
|
+
|
387
|
+
# 设置AES额外字段
|
388
|
+
aes_extra = struct.pack('<HBB', 0x9901, 7, encryption_method - 1)
|
389
|
+
extra = aes_extra + salt + password_verifier
|
390
|
+
flags = 0x41 # 加密标志 + AES标志
|
391
|
+
else:
|
392
|
+
raise ValueError("Unsupported encryption method")
|
393
|
+
else:
|
394
|
+
encrypted_data = compressed_data
|
395
|
+
flags = 0
|
396
|
+
|
397
|
+
# 记录本地文件头位置
|
398
|
+
file_offset = self.fp.tell()
|
399
|
+
|
400
|
+
# 写入本地文件头
|
401
|
+
self.fp.write(ZIP_SIGNATURE)
|
402
|
+
self.fp.write(struct.pack(
|
403
|
+
'<HHHHHIII',
|
404
|
+
20, # version needed to extract
|
405
|
+
flags, # general purpose bit flag
|
406
|
+
compression,
|
407
|
+
0, # last mod time (simplified)
|
408
|
+
0, # last mod date (simplified)
|
409
|
+
crc32,
|
410
|
+
len(encrypted_data),
|
411
|
+
len(data)
|
412
|
+
))
|
413
|
+
|
414
|
+
# 写入文件名和额外字段
|
415
|
+
self.fp.write(filename.encode('utf-8'))
|
416
|
+
if extra:
|
417
|
+
self.fp.write(extra)
|
418
|
+
|
419
|
+
# 写入加密数据
|
420
|
+
self.fp.write(encrypted_data)
|
421
|
+
|
422
|
+
# 保存文件头信息
|
423
|
+
self.file_headers[filename] = ZipFileHeader(
|
424
|
+
version=20,
|
425
|
+
flags=flags,
|
426
|
+
compression=compression,
|
427
|
+
mod_time=0,
|
428
|
+
mod_date=0,
|
429
|
+
crc32=crc32,
|
430
|
+
compressed_size=len(encrypted_data),
|
431
|
+
uncompressed_size=len(data),
|
432
|
+
filename=filename,
|
433
|
+
extra=extra,
|
434
|
+
file_offset=file_offset,
|
435
|
+
is_encrypted=is_encrypted,
|
436
|
+
encryption_method=encryption_method,
|
437
|
+
aes_strength=AES_KEY_LENGTHS.get(encryption_method, 0),
|
438
|
+
salt=salt,
|
439
|
+
password_verifier=password_verifier
|
440
|
+
)
|
441
|
+
|
442
|
+
def extract(self, member: str, path: Optional[Union[str, Path]] = None) -> None:
|
443
|
+
"""
|
444
|
+
从ZIP文件中提取一个文件
|
445
|
+
|
446
|
+
参数:
|
447
|
+
member: 要提取的文件名
|
448
|
+
path: 提取目标路径(默认为当前目录)
|
449
|
+
"""
|
450
|
+
if self.fp is None or self.mode != 'r':
|
451
|
+
raise ValueError("ZIP file not open for reading")
|
452
|
+
|
453
|
+
if member not in self.file_headers:
|
454
|
+
raise KeyError(f"File not found in ZIP: {member}")
|
455
|
+
|
456
|
+
header = self.file_headers[member]
|
457
|
+
target_path = Path(path or '.') / member
|
458
|
+
|
459
|
+
# 确保目标目录存在
|
460
|
+
target_path.parent.mkdir(parents=True, exist_ok=True)
|
461
|
+
|
462
|
+
# 定位到文件数据
|
463
|
+
self.fp.seek(header.file_offset)
|
464
|
+
signature = self.fp.read(4)
|
465
|
+
if signature != ZIP_SIGNATURE:
|
466
|
+
raise ValueError("Invalid local file header signature")
|
467
|
+
|
468
|
+
# 读取本地文件头
|
469
|
+
local_header = self.fp.read(26)
|
470
|
+
(
|
471
|
+
version, flags, compression, mod_time, mod_date,
|
472
|
+
crc32, compressed_size, uncompressed_size,
|
473
|
+
filename_len, extra_len
|
474
|
+
) = struct.unpack('<HHHHHIIIHH', local_header)
|
475
|
+
|
476
|
+
# 跳过文件名和额外字段
|
477
|
+
filename = self.fp.read(filename_len).decode('utf-8')
|
478
|
+
extra = self.fp.read(extra_len)
|
479
|
+
|
480
|
+
# 读取加密数据
|
481
|
+
encrypted_data = self.fp.read(compressed_size)
|
482
|
+
|
483
|
+
# 解密数据
|
484
|
+
if header.is_encrypted:
|
485
|
+
if not self.password:
|
486
|
+
raise ValueError("Password is required for encrypted file")
|
487
|
+
|
488
|
+
if header.encryption_method == ENCRYPTION_TRADITIONAL:
|
489
|
+
# 传统PKWARE解密
|
490
|
+
decrypted_data = self._traditional_decrypt(encrypted_data)
|
491
|
+
elif header.encryption_method in (ENCRYPTION_AES128, ENCRYPTION_AES192, ENCRYPTION_AES256):
|
492
|
+
# AES解密
|
493
|
+
key_length = header.aes_strength
|
494
|
+
encryption_key, mac_key, password_verifier = self._generate_aes_key(header.salt, key_length)
|
495
|
+
|
496
|
+
# 验证密码
|
497
|
+
if header.password_verifier != password_verifier:
|
498
|
+
raise ValueError("Incorrect password")
|
499
|
+
|
500
|
+
# 分离数据和MAC
|
501
|
+
if len(encrypted_data) < AES_MAC_SIZE:
|
502
|
+
raise ValueError("Invalid encrypted data length")
|
503
|
+
|
504
|
+
data_part = encrypted_data[:-AES_MAC_SIZE]
|
505
|
+
mac = encrypted_data[-AES_MAC_SIZE:]
|
506
|
+
|
507
|
+
# 验证MAC(简化实现)
|
508
|
+
computed_mac = hashlib.sha1(data_part).digest()[:AES_MAC_SIZE]
|
509
|
+
if mac != computed_mac:
|
510
|
+
raise ValueError("MAC verification failed")
|
511
|
+
|
512
|
+
# 解密数据
|
513
|
+
cipher = AES.new(encryption_key, AES.MODE_CBC, iv=header.salt)
|
514
|
+
decrypted_padded_data = cipher.decrypt(data_part)
|
515
|
+
decrypted_data = unpad(decrypted_padded_data, AES_BLOCK_SIZE)
|
516
|
+
else:
|
517
|
+
raise ValueError("Unsupported encryption method")
|
518
|
+
else:
|
519
|
+
decrypted_data = encrypted_data
|
520
|
+
|
521
|
+
# 解压数据
|
522
|
+
if compression == COMPRESSION_STORED:
|
523
|
+
data = decrypted_data
|
524
|
+
elif compression == COMPRESSION_DEFLATED:
|
525
|
+
data = zlib.decompress(decrypted_data)
|
526
|
+
else:
|
527
|
+
raise ValueError(f"Unsupported compression method: {compression}")
|
528
|
+
|
529
|
+
# 验证CRC32
|
530
|
+
if zlib.crc32(data) & 0xFFFFFFFF != header.crc32:
|
531
|
+
raise ValueError("CRC32 checksum failed")
|
532
|
+
|
533
|
+
# 写入目标文件
|
534
|
+
with open(target_path, 'wb') as f:
|
535
|
+
f.write(data)
|
536
|
+
|
537
|
+
def namelist(self) -> List[str]:
|
538
|
+
"""返回ZIP文件中所有文件的名称列表"""
|
539
|
+
return list(self.file_headers.keys())
|
540
|
+
|
541
|
+
def testzip(self) -> Optional[str]:
|
542
|
+
"""
|
543
|
+
测试ZIP文件中所有文件的完整性
|
544
|
+
|
545
|
+
返回:
|
546
|
+
第一个损坏的文件名,如果所有文件都完好则返回None
|
547
|
+
"""
|
548
|
+
if self.fp is None or self.mode != 'r':
|
549
|
+
raise ValueError("ZIP file not open for reading")
|
550
|
+
|
551
|
+
for filename, header in self.file_headers.items():
|
552
|
+
try:
|
553
|
+
self.extract(filename, '/dev/null') # 尝试提取到虚拟位置
|
554
|
+
except:
|
555
|
+
return filename
|
556
|
+
|
557
|
+
return None
|
558
|
+
|
559
|
+
# 高级API函数
|
560
|
+
def create_secure_zip(
|
561
|
+
zip_path: Union[str, Path],
|
562
|
+
files_to_zip: List[Union[str, Path]],
|
563
|
+
password: Optional[str] = None,
|
564
|
+
encryption_method: int = 0,
|
565
|
+
compression: bool = True,
|
566
|
+
overwrite: bool = False
|
567
|
+
) -> None:
|
568
|
+
"""
|
569
|
+
创建加密的ZIP文件
|
570
|
+
|
571
|
+
参数:
|
572
|
+
zip_path: 要创建的ZIP文件路径
|
573
|
+
files_to_zip: 要压缩的文件/文件夹列表
|
574
|
+
password: 加密密码
|
575
|
+
encryption_method: 加密方法 (0=不加密, 1=传统加密, 2=AES128, 3=AES192, 4=AES256)
|
576
|
+
compression: 是否压缩数据
|
577
|
+
overwrite: 是否覆盖已存在的ZIP文件
|
578
|
+
|
579
|
+
异常:
|
580
|
+
FileExistsError: 当ZIP文件已存在且不允许覆盖时
|
581
|
+
FileNotFoundError: 当要压缩的文件不存在时
|
582
|
+
ValueError: 当需要密码但未提供时
|
583
|
+
"""
|
584
|
+
zip_path = Path(zip_path)
|
585
|
+
if zip_path.exists() and not overwrite:
|
586
|
+
raise FileExistsError(f"ZIP file already exists: {zip_path}")
|
587
|
+
|
588
|
+
with SecureZipFile(zip_path, 'w', password) as zipf:
|
589
|
+
for item in files_to_zip:
|
590
|
+
item = Path(item)
|
591
|
+
if not item.exists():
|
592
|
+
raise FileNotFoundError(f"File not found: {item}")
|
593
|
+
|
594
|
+
if item.is_file():
|
595
|
+
with open(item, 'rb') as f:
|
596
|
+
data = f.read()
|
597
|
+
zipf.write(str(item.name), data, compress=compression, encryption_method=encryption_method)
|
598
|
+
elif item.is_dir():
|
599
|
+
for root, _, files in os.walk(item):
|
600
|
+
for file in files:
|
601
|
+
file_path = Path(root) / file
|
602
|
+
rel_path = str(file_path.relative_to(item.parent))
|
603
|
+
with open(file_path, 'rb') as f:
|
604
|
+
data = f.read()
|
605
|
+
zipf.write(rel_path, data, compress=compression, encryption_method=encryption_method)
|
606
|
+
|
607
|
+
def extract_secure_zip(
|
608
|
+
zip_path: Union[str, Path],
|
609
|
+
extract_to: Union[str, Path],
|
610
|
+
password: Optional[str] = None,
|
611
|
+
members: Optional[List[str]] = None,
|
612
|
+
overwrite: bool = False
|
613
|
+
) -> None:
|
614
|
+
"""
|
615
|
+
解压加密的ZIP文件
|
616
|
+
|
617
|
+
参数:
|
618
|
+
zip_path: ZIP文件路径
|
619
|
+
extract_to: 解压目标目录
|
620
|
+
password: 解密密码
|
621
|
+
members: 可选,只解压指定的文件
|
622
|
+
overwrite: 是否覆盖已存在的文件
|
623
|
+
|
624
|
+
异常:
|
625
|
+
FileNotFoundError: 当ZIP文件不存在时
|
626
|
+
ValueError: 当ZIP文件损坏或密码错误时
|
627
|
+
"""
|
628
|
+
zip_path = Path(zip_path)
|
629
|
+
extract_to = Path(extract_to)
|
630
|
+
|
631
|
+
if not zip_path.exists():
|
632
|
+
raise FileNotFoundError(f"ZIP file not found: {zip_path}")
|
633
|
+
|
634
|
+
if not overwrite:
|
635
|
+
# 检查是否会覆盖已有文件
|
636
|
+
with SecureZipFile(zip_path, 'r', password) as zipf:
|
637
|
+
for member in members or zipf.namelist():
|
638
|
+
dest = extract_to / member
|
639
|
+
if dest.exists():
|
640
|
+
raise FileExistsError(f"File exists and overwrite=False: {dest}")
|
641
|
+
|
642
|
+
with SecureZipFile(zip_path, 'r', password) as zipf:
|
643
|
+
for member in members or zipf.namelist():
|
644
|
+
zipf.extract(member, extract_to)
|
645
|
+
|
646
|
+
def is_encrypted_zip(zip_path: Union[str, Path]) -> bool:
|
647
|
+
"""
|
648
|
+
检查ZIP文件是否加密
|
649
|
+
|
650
|
+
参数:
|
651
|
+
zip_path: ZIP文件路径
|
652
|
+
|
653
|
+
返回:
|
654
|
+
如果ZIP文件包含加密文件则返回True,否则返回False
|
655
|
+
|
656
|
+
异常:
|
657
|
+
FileNotFoundError: 当ZIP文件不存在时
|
658
|
+
"""
|
659
|
+
zip_path = Path(zip_path)
|
660
|
+
if not zip_path.exists():
|
661
|
+
raise FileNotFoundError(f"ZIP file not found: {zip_path}")
|
662
|
+
|
663
|
+
with SecureZipFile(zip_path, 'r') as zipf:
|
664
|
+
return any(header.is_encrypted for header in zipf.file_headers.values())
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: tretool
|
3
|
-
Version: 0.
|
3
|
+
Version: 1.0.0
|
4
4
|
Summary: 一个有着许多功能的Python工具库
|
5
5
|
Author-email: Jemy <sh_ljr_2013@163.com>
|
6
6
|
License-Expression: MIT
|
@@ -29,4 +29,6 @@ Dynamic: license-file
|
|
29
29
|
|
30
30
|
### 使用pip
|
31
31
|
bash
|
32
|
-
```
|
32
|
+
```
|
33
|
+
pip install tretool
|
34
|
+
```
|
@@ -1,4 +1,4 @@
|
|
1
|
-
tretool/__init__.py,sha256=
|
1
|
+
tretool/__init__.py,sha256=l0ufgWz4BFP1WSrnDh80T7GCFOIudoJgXBuwwZYBz54,1434
|
2
2
|
tretool/config.py,sha256=Lmu3w5RRjGkX3T36EWdLonztvcKQq0mRvjZ2XU0R01A,16601
|
3
3
|
tretool/decoratorlib.py,sha256=Vz3275svoHd28yPZyJFF-uF_oVoKuHPsU-_skMohF0Q,15460
|
4
4
|
tretool/encoding.py,sha256=pysRjF-RDw1iVlM6J8yME6EEZTHcB9SnWofNk_B1cog,15341
|
@@ -12,11 +12,13 @@ tretool/plugin.py,sha256=CacUi_1iapnBVejfmf5vj5oE26-NIAPajVZ2I3QIQt4,20048
|
|
12
12
|
tretool/smartCache.py,sha256=OuH98BsrWC_F-bqhCRomBvFcuVWvfVnc_iyQfZpo_40,18513
|
13
13
|
tretool/tasklib.py,sha256=rEME3kt5K2yXAr9gYdl5keBA15Wchm2POluFHBhUwUM,25867
|
14
14
|
tretool/timelib.py,sha256=XH1o8WDoj4W41vO-AuRV782d6IuqvmcCuKEkdabHsjg,16559
|
15
|
+
tretool/ziplib.py,sha256=tRlaOQDEGETBjl6M4tnQ7Wz-GzQL2bfryLj1rKHa_9s,24742
|
15
16
|
tretool/plugin/plu.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
16
17
|
tretool/transform/__init__.py,sha256=-0UKgEwky5AAV_UNGSRdKhYbrkAjUHZIO4Lj0z-ITPE,17
|
18
|
+
tretool/transform/docx.py,sha256=zWYJxYuHvy4YUcUkuoJHx78qqnWwR7spP1KDltKYFek,19863
|
17
19
|
tretool/transform/pdf.py,sha256=rYgXbZDSCx_405KK_FP2ZfiBKVr-EI8wwi_S9-ImHNk,20990
|
18
|
-
tretool-0.
|
19
|
-
tretool-0.
|
20
|
-
tretool-0.
|
21
|
-
tretool-0.
|
22
|
-
tretool-0.
|
20
|
+
tretool-1.0.0.dist-info/licenses/LICENSE,sha256=6kbiFSfobTZ7beWiKnHpN902HgBx-Jzgcme0SvKqhKY,1091
|
21
|
+
tretool-1.0.0.dist-info/METADATA,sha256=QgRN5n9vyfakFhBtIkWQZ5VfX-LCxGlDKf6vr7Cfnl4,953
|
22
|
+
tretool-1.0.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
23
|
+
tretool-1.0.0.dist-info/top_level.txt,sha256=0kbUVnSHjYxSRD1ziCdpw73ECdl7QysxeNHlRuY9xtc,8
|
24
|
+
tretool-1.0.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|