zippathlib 0.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- zippathlib/__init__.py +11 -0
- zippathlib/__main__.py +249 -0
- zippathlib/zip_pathlib.py +769 -0
- zippathlib-0.6.0.dist-info/METADATA +249 -0
- zippathlib-0.6.0.dist-info/RECORD +9 -0
- zippathlib-0.6.0.dist-info/WHEEL +5 -0
- zippathlib-0.6.0.dist-info/entry_points.txt +2 -0
- zippathlib-0.6.0.dist-info/licenses/LICENSE +21 -0
- zippathlib-0.6.0.dist-info/top_level.txt +1 -0
zippathlib/__init__.py
ADDED
zippathlib/__main__.py
ADDED
|
@@ -0,0 +1,249 @@
|
|
|
1
|
+
import argparse
|
|
2
|
+
import sys
|
|
3
|
+
import zipfile
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
from rich import print as rprint
|
|
7
|
+
from rich.tree import Tree as RichTree
|
|
8
|
+
|
|
9
|
+
from zippathlib import ZipPath
|
|
10
|
+
|
|
11
|
+
DEFAULT_SIZE_LIMIT = 2 * 1024**3 # 2GB
|
|
12
|
+
|
|
13
|
+
def get_version() -> str:
|
|
14
|
+
from . import __version__
|
|
15
|
+
return __version__
|
|
16
|
+
|
|
17
|
+
def make_parser() -> argparse.ArgumentParser:
|
|
18
|
+
parser = argparse.ArgumentParser()
|
|
19
|
+
parser.prog = 'zippathlib'
|
|
20
|
+
parser.add_argument("zip_file", help="Zip file to explore")
|
|
21
|
+
parser.add_argument("path_within_zip", nargs='?', default="",
|
|
22
|
+
help="Path within the zip file (optional)")
|
|
23
|
+
|
|
24
|
+
# options
|
|
25
|
+
parser.add_argument(
|
|
26
|
+
"-V", "--version",
|
|
27
|
+
action="version",
|
|
28
|
+
version=f"%(prog)s {get_version()}"
|
|
29
|
+
)
|
|
30
|
+
parser.add_argument("--tree" , action="store_true", help="list all files in a tree-like format")
|
|
31
|
+
parser.add_argument(
|
|
32
|
+
"-x", "--extract",
|
|
33
|
+
nargs="?", const=".", default=None,
|
|
34
|
+
dest="outputdir",
|
|
35
|
+
help="extract files from zip file to a directory or '-' for stdout, default is '.'"
|
|
36
|
+
)
|
|
37
|
+
parser.add_argument(
|
|
38
|
+
"--limit", default=2*1024**3,
|
|
39
|
+
type = _h2i,
|
|
40
|
+
help="guard value against malicious ZIP files that uncompress"
|
|
41
|
+
" to excessive sizes; specify as an integer or float value"
|
|
42
|
+
" optionally followed by a multiplier suffix K,M,G,T,P,E, or Z;"
|
|
43
|
+
f" default is {_i2h(DEFAULT_SIZE_LIMIT).rstrip('B')}"
|
|
44
|
+
)
|
|
45
|
+
parser.add_argument(
|
|
46
|
+
"--check",
|
|
47
|
+
choices=["duplicates", "limit", "d", "l"],
|
|
48
|
+
help="check ZIP file for duplicates, or for files larger than LIMIT"
|
|
49
|
+
)
|
|
50
|
+
parser.add_argument(
|
|
51
|
+
"--purge",
|
|
52
|
+
action="store_true",
|
|
53
|
+
help="purge ZIP file of duplicate file entries",
|
|
54
|
+
)
|
|
55
|
+
parser.add_argument(
|
|
56
|
+
"--new",
|
|
57
|
+
dest="create_new_zip",
|
|
58
|
+
action="store_true",
|
|
59
|
+
help="create new ZIP file"
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
return parser
|
|
63
|
+
|
|
64
|
+
def _i2h(n: int) -> str:
|
|
65
|
+
if n < 1024 * 1024:
|
|
66
|
+
return f"{n:,} bytes"
|
|
67
|
+
n /= 1024
|
|
68
|
+
for prefix in "MGTPEZ":
|
|
69
|
+
n /= 1024
|
|
70
|
+
if n < 1024:
|
|
71
|
+
break
|
|
72
|
+
return f"{n:,.2f}{prefix}B"
|
|
73
|
+
|
|
74
|
+
def _h2i(s: str) -> int:
|
|
75
|
+
if not s:
|
|
76
|
+
return 0
|
|
77
|
+
|
|
78
|
+
s = s.rstrip("B").replace(",", "").removesuffix(" bytes")
|
|
79
|
+
|
|
80
|
+
if s.isdigit():
|
|
81
|
+
return int(s)
|
|
82
|
+
|
|
83
|
+
n = 1
|
|
84
|
+
for prefix in "KMGTPEZ":
|
|
85
|
+
n *= 1024
|
|
86
|
+
if prefix == s[-1]:
|
|
87
|
+
break
|
|
88
|
+
return int(n * float(s[:-1]))
|
|
89
|
+
|
|
90
|
+
def _extract_file(zippath:ZipPath, outputdir: Path | str | None = None):
|
|
91
|
+
"""Extract a file from the zip archive."""
|
|
92
|
+
if not zippath.exists():
|
|
93
|
+
raise FileNotFoundError(f"File {str(zippath)!r} not found in zip archive.")
|
|
94
|
+
|
|
95
|
+
data = zippath.read_bytes()
|
|
96
|
+
path, _, name = zippath._path.rpartition("/")
|
|
97
|
+
(outputdir / path).mkdir(parents=True, exist_ok=True)
|
|
98
|
+
outputpath = outputdir / path / name
|
|
99
|
+
outputpath.write_bytes(data)
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def _construct_tree(zippath:ZipPath) -> RichTree:
|
|
103
|
+
"""Construct a rich Tree for the given zip path."""
|
|
104
|
+
|
|
105
|
+
ret = RichTree("")
|
|
106
|
+
parent_stack: list[tuple[RichTree, int]] = [(ret, zippath._depth - 1)]
|
|
107
|
+
|
|
108
|
+
for node in zippath.riterdir():
|
|
109
|
+
if not node.name:
|
|
110
|
+
continue
|
|
111
|
+
|
|
112
|
+
# unwind stack until depth < node's depth (which will be the new node's parent)
|
|
113
|
+
while node._depth <= parent_stack[-1][-1]:
|
|
114
|
+
parent_stack.pop()
|
|
115
|
+
|
|
116
|
+
parent = parent_stack[-1][0]
|
|
117
|
+
node_branch = parent.add(node.name)
|
|
118
|
+
|
|
119
|
+
parent_stack.append((node_branch, node._depth))
|
|
120
|
+
|
|
121
|
+
return ret
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
def main() -> int:
|
|
125
|
+
args = make_parser().parse_args()
|
|
126
|
+
|
|
127
|
+
# unpack positional args into locals
|
|
128
|
+
zip_filename = args.zip_file
|
|
129
|
+
path_within_zip = args.path_within_zip
|
|
130
|
+
NL = "\n"
|
|
131
|
+
|
|
132
|
+
try:
|
|
133
|
+
# See if zip_filename is actually a ZIP file - if not, this
|
|
134
|
+
# will raise an exception and we'll either create a new
|
|
135
|
+
# archive or just reraise
|
|
136
|
+
try:
|
|
137
|
+
zipfile.ZipFile(zip_filename)
|
|
138
|
+
except FileNotFoundError:
|
|
139
|
+
if args.create_new_zip:
|
|
140
|
+
zip_file_path = Path(zip_filename)
|
|
141
|
+
ZipPath.create(zip_file_path)
|
|
142
|
+
else:
|
|
143
|
+
raise
|
|
144
|
+
|
|
145
|
+
zip_path: ZipPath = ZipPath(zip_filename)
|
|
146
|
+
|
|
147
|
+
if "*" in path_within_zip:
|
|
148
|
+
# Handle * wildcard for path_within_zip
|
|
149
|
+
files = [item for item in zip_path.glob(path_within_zip)]
|
|
150
|
+
print("Files:", *map(str, files), sep=NL)
|
|
151
|
+
else:
|
|
152
|
+
|
|
153
|
+
if path_within_zip:
|
|
154
|
+
zip_path = zip_path / path_within_zip
|
|
155
|
+
|
|
156
|
+
if not zip_path.exists():
|
|
157
|
+
raise ValueError(f"{path_within_zip!r} does not exist in {zip_filename}")
|
|
158
|
+
|
|
159
|
+
if args.tree:
|
|
160
|
+
# print pretty tree of ZIP contents
|
|
161
|
+
rprint(_construct_tree(zip_path))
|
|
162
|
+
|
|
163
|
+
elif args.check:
|
|
164
|
+
if args.check[:1] == "d":
|
|
165
|
+
duplicates = zip_path.scan_for_duplicates()
|
|
166
|
+
if duplicates:
|
|
167
|
+
print("Duplicate files found")
|
|
168
|
+
for fname, count in duplicates:
|
|
169
|
+
print(f"{fname} ({count})")
|
|
170
|
+
return 1
|
|
171
|
+
|
|
172
|
+
elif args.check[:1] == "l":
|
|
173
|
+
large_files = zip_path.scan_for_large_files(args.limit)
|
|
174
|
+
if large_files:
|
|
175
|
+
print("Large files found")
|
|
176
|
+
for fname, size in large_files:
|
|
177
|
+
print(f"{fname} - {_i2h(size)}")
|
|
178
|
+
return 1
|
|
179
|
+
|
|
180
|
+
elif args.purge:
|
|
181
|
+
# guard against files > args.limit in size
|
|
182
|
+
duplicates: list[tuple[str, int]] = zip_path.scan_for_duplicates()
|
|
183
|
+
if duplicates:
|
|
184
|
+
deduped: list[zipfile.ZipInfo] = zip_path.get_deduplicated_entries()
|
|
185
|
+
if any(entry.file_size > args.limit for entry in deduped):
|
|
186
|
+
raise ValueError(f"Files larger than {_i2h(args.limit)} were found.")
|
|
187
|
+
|
|
188
|
+
zip_path.purge_duplicates(replace=True)
|
|
189
|
+
print("Rebuilt ZIP file with duplicate entries removed")
|
|
190
|
+
else:
|
|
191
|
+
print("No duplicate file entries found")
|
|
192
|
+
|
|
193
|
+
elif args.outputdir:
|
|
194
|
+
# extracting one or more files/directories
|
|
195
|
+
if args.outputdir == "-":
|
|
196
|
+
if zip_path.is_file():
|
|
197
|
+
# dump to stdout
|
|
198
|
+
print(zip_path.read_text())
|
|
199
|
+
else:
|
|
200
|
+
raise ValueError("Cannot dump directory to stdout")
|
|
201
|
+
else:
|
|
202
|
+
# extract files to given directory
|
|
203
|
+
zip_file_path = Path(zip_filename)
|
|
204
|
+
outputdir = Path(args.outputdir) / zip_file_path.stem
|
|
205
|
+
total_size = zip_path.total_size()
|
|
206
|
+
if total_size > args.limit:
|
|
207
|
+
raise ValueError(f"Total file size {_i2h(total_size)} exceeds extract limit {_i2h(args.limit)}")
|
|
208
|
+
|
|
209
|
+
for file in zip_path.riterdir():
|
|
210
|
+
if file.is_file():
|
|
211
|
+
print(f"extracting {file}")
|
|
212
|
+
_extract_file(file, outputdir)
|
|
213
|
+
else:
|
|
214
|
+
# make directory, in case it is an empty dir
|
|
215
|
+
(outputdir / file._path).mkdir(parents=True, exist_ok=True)
|
|
216
|
+
|
|
217
|
+
elif args.create_new_zip:
|
|
218
|
+
print(f"ZIP archive {zip_filename!r} exists")
|
|
219
|
+
return 1
|
|
220
|
+
|
|
221
|
+
else:
|
|
222
|
+
# just browsing
|
|
223
|
+
if zip_path.is_file():
|
|
224
|
+
print(f"File: {zip_path} ({_i2h(zip_path.size())})")
|
|
225
|
+
content = zip_path.read_text()
|
|
226
|
+
print(
|
|
227
|
+
f"Content:{NL}{content[:100]}"
|
|
228
|
+
f"{'...' if len(content) > 100 else ''}"
|
|
229
|
+
)
|
|
230
|
+
|
|
231
|
+
elif zip_path.is_dir():
|
|
232
|
+
print(f"Directory: {zip_path} (total size {_i2h(zip_path.total_size())})")
|
|
233
|
+
print("Contents:")
|
|
234
|
+
for item in zip_path.iterdir():
|
|
235
|
+
type_indicator = "FD"[item.is_dir()]
|
|
236
|
+
print(f" [{type_indicator}] {item.name} ({_i2h(item.size()) if item.is_file() else _i2h(item.total_size())})")
|
|
237
|
+
else:
|
|
238
|
+
print(f"Path does not exist: {zip_path}")
|
|
239
|
+
|
|
240
|
+
except Exception as e:
|
|
241
|
+
print(f"Error: {type(e).__name__}: {e}")
|
|
242
|
+
# raise
|
|
243
|
+
return 1
|
|
244
|
+
|
|
245
|
+
return 0
|
|
246
|
+
|
|
247
|
+
|
|
248
|
+
if __name__ == '__main__':
|
|
249
|
+
raise SystemExit(main())
|
|
@@ -0,0 +1,769 @@
|
|
|
1
|
+
#!/usr/bin/env python
|
|
2
|
+
"""
|
|
3
|
+
ZipPath - A pathlib.Path subclass for accessing files in ZIP archives
|
|
4
|
+
|
|
5
|
+
This module provides a standalone utility class for working with files inside
|
|
6
|
+
ZIP archives using a familiar pathlib-like interface.
|
|
7
|
+
"""
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import contextlib
|
|
11
|
+
import fnmatch
|
|
12
|
+
import functools
|
|
13
|
+
import io
|
|
14
|
+
import os
|
|
15
|
+
import warnings
|
|
16
|
+
from pathlib import Path, PurePosixPath, PurePath
|
|
17
|
+
import stat
|
|
18
|
+
from typing import BinaryIO, TextIO, Any, NamedTuple
|
|
19
|
+
|
|
20
|
+
from collections.abc import Iterator
|
|
21
|
+
import zipfile
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class ZipPathDuplicateFileWarning(UserWarning):
|
|
25
|
+
"""
|
|
26
|
+
Writing to an existing file entry in a ZIP archive does not overwrite
|
|
27
|
+
the previous contents, but creates a duplicate entry for that file.
|
|
28
|
+
"""
|
|
29
|
+
def __init__(self, file_path: str):
|
|
30
|
+
message = f"File overwrite creates duplicate ZIP entry: {file_path!r}"
|
|
31
|
+
super().__init__(message)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class _ZipWriteFile:
|
|
35
|
+
"""
|
|
36
|
+
A file-like object for writing to a file within a ZIP archive.
|
|
37
|
+
|
|
38
|
+
This class buffers written data and updates the ZIP file when closed.
|
|
39
|
+
"""
|
|
40
|
+
|
|
41
|
+
def __init__(self, zip_path: ZipPath, mode: str, encoding: str | None = None):
|
|
42
|
+
"""
|
|
43
|
+
Initialize a _ZipWriteFile object.
|
|
44
|
+
|
|
45
|
+
Args:
|
|
46
|
+
zip_path: The ZipPath object representing the file to write
|
|
47
|
+
mode: The file mode ('w', 'wb', 'a', 'ab')
|
|
48
|
+
encoding: Text encoding for text modes
|
|
49
|
+
"""
|
|
50
|
+
self.zip_path = zip_path
|
|
51
|
+
self.mode = mode
|
|
52
|
+
self.encoding = encoding
|
|
53
|
+
self.closed = False
|
|
54
|
+
|
|
55
|
+
# Create the appropriate buffer based on mode
|
|
56
|
+
if 'b' in mode: # Binary mode
|
|
57
|
+
self.buffer = io.BytesIO()
|
|
58
|
+
else: # Text mode
|
|
59
|
+
self.buffer = io.StringIO()
|
|
60
|
+
|
|
61
|
+
# Check if we're appending and the file exists
|
|
62
|
+
if 'a' in mode and zip_path.is_file():
|
|
63
|
+
# Read existing content
|
|
64
|
+
if 'b' in mode:
|
|
65
|
+
existing_data = zip_path.read_bytes()
|
|
66
|
+
self.buffer.write(existing_data)
|
|
67
|
+
else:
|
|
68
|
+
existing_text = zip_path.read_text(encoding=encoding or 'utf-8')
|
|
69
|
+
self.buffer.write(existing_text)
|
|
70
|
+
|
|
71
|
+
def write(self, data: str | bytes) -> int:
|
|
72
|
+
"""
|
|
73
|
+
Write data to the buffer.
|
|
74
|
+
|
|
75
|
+
Args:
|
|
76
|
+
data: The data to write
|
|
77
|
+
|
|
78
|
+
Returns:
|
|
79
|
+
Number of characters/bytes written
|
|
80
|
+
|
|
81
|
+
Raises:
|
|
82
|
+
ValueError: If the file is closed
|
|
83
|
+
"""
|
|
84
|
+
if self.closed:
|
|
85
|
+
raise ValueError("I/O operation on closed file")
|
|
86
|
+
return self.buffer.write(data)
|
|
87
|
+
|
|
88
|
+
def close(self) -> None:
|
|
89
|
+
"""
|
|
90
|
+
Close the file and update the ZIP archive.
|
|
91
|
+
"""
|
|
92
|
+
if self.closed:
|
|
93
|
+
return
|
|
94
|
+
|
|
95
|
+
self.buffer.seek(0)
|
|
96
|
+
|
|
97
|
+
# Get the data from the buffer
|
|
98
|
+
if 'b' in self.mode: # Binary mode
|
|
99
|
+
data = self.buffer.read()
|
|
100
|
+
else: # Text mode
|
|
101
|
+
text = self.buffer.read()
|
|
102
|
+
data = text.encode(self.encoding or 'utf-8')
|
|
103
|
+
|
|
104
|
+
# Update the ZIP file
|
|
105
|
+
normalized_path = self.zip_path._normalize_path(self.zip_path._path)
|
|
106
|
+
|
|
107
|
+
# Open the ZIP file in append mode
|
|
108
|
+
with zipfile.ZipFile(
|
|
109
|
+
self.zip_path.zip_filename,
|
|
110
|
+
mode='a',
|
|
111
|
+
compression=zipfile.ZIP_DEFLATED,
|
|
112
|
+
compresslevel=9,
|
|
113
|
+
) as zf:
|
|
114
|
+
zf.writestr(normalized_path, data)
|
|
115
|
+
|
|
116
|
+
self.closed = True
|
|
117
|
+
self.buffer.close()
|
|
118
|
+
|
|
119
|
+
def __enter__(self) -> _ZipWriteFile:
|
|
120
|
+
"""Context manager enter method."""
|
|
121
|
+
return self
|
|
122
|
+
|
|
123
|
+
def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
|
|
124
|
+
"""Context manager exit method."""
|
|
125
|
+
self.close()
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
class _ZipStat(NamedTuple):
|
|
129
|
+
st_mode: int = 0
|
|
130
|
+
st_ino: int = 0
|
|
131
|
+
st_dev: int = 0
|
|
132
|
+
st_nlink: int = 1
|
|
133
|
+
st_uid: int = 65534 # nobody
|
|
134
|
+
st_gid: int = 65534 # nogroup
|
|
135
|
+
st_size: int = 0
|
|
136
|
+
st_atime: int = 0
|
|
137
|
+
st_mtime: int = 0
|
|
138
|
+
st_ctime: int = 0
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
class ZipPath(PurePosixPath):
|
|
142
|
+
"""
|
|
143
|
+
A pathlib.Path-like interface to files within a ZIP archive.
|
|
144
|
+
|
|
145
|
+
This class allows you to navigate and access files within a ZIP archive
|
|
146
|
+
using a familiar pathlib-like interface. It handles the details of opening
|
|
147
|
+
and closing the ZIP file as needed.
|
|
148
|
+
|
|
149
|
+
Examples:
|
|
150
|
+
# Open a file from a ZIP archive
|
|
151
|
+
zip_path = ZipPath('archive.zip', 'path/to/file.txt')
|
|
152
|
+
with zip_path.open() as f:
|
|
153
|
+
content = f.read()
|
|
154
|
+
|
|
155
|
+
# List all files in a directory within the ZIP
|
|
156
|
+
zip_path = ZipPath('archive.zip', 'some/directory')
|
|
157
|
+
for file in zip_path.iterdir():
|
|
158
|
+
print(file)
|
|
159
|
+
|
|
160
|
+
# Check if a file exists in the ZIP
|
|
161
|
+
if ZipPath('archive.zip', 'path/to/file.txt').exists():
|
|
162
|
+
print("File exists!")
|
|
163
|
+
"""
|
|
164
|
+
|
|
165
|
+
def __init__(self, zip_filename: str | PurePath, path: str = '', mode="r") -> None:
|
|
166
|
+
"""
|
|
167
|
+
Initialize a ZipPath object.
|
|
168
|
+
|
|
169
|
+
Args:
|
|
170
|
+
zip_filename: Path to the ZIP file
|
|
171
|
+
path: Path within the ZIP file (default: root of the ZIP)
|
|
172
|
+
"""
|
|
173
|
+
super().__init__(path)
|
|
174
|
+
self.zip_filename = Path(zip_filename)
|
|
175
|
+
self._path = path
|
|
176
|
+
self._mode = mode
|
|
177
|
+
self._zipfile_stat = os.stat(self.zip_filename)
|
|
178
|
+
|
|
179
|
+
@classmethod
|
|
180
|
+
def at_path(cls, source_path: Path | None, dest_path: Path = None) -> ZipPath:
|
|
181
|
+
"""
|
|
182
|
+
ZIP archive creator, returning a ZipPath object for the newly-created ZIP archive
|
|
183
|
+
"""
|
|
184
|
+
if dest_path is None:
|
|
185
|
+
dest = source_path.parent / f"{source_path.stem}.zip"
|
|
186
|
+
else:
|
|
187
|
+
dest = dest_path
|
|
188
|
+
|
|
189
|
+
if dest.exists():
|
|
190
|
+
return ZipPath(dest)
|
|
191
|
+
else:
|
|
192
|
+
# print(f"Creating {dest}")
|
|
193
|
+
with zipfile.ZipFile(
|
|
194
|
+
dest,
|
|
195
|
+
mode="w",
|
|
196
|
+
compression=zipfile.ZIP_DEFLATED,
|
|
197
|
+
compresslevel=9,
|
|
198
|
+
) as new_zf:
|
|
199
|
+
if source_path is not None:
|
|
200
|
+
for file in source_path.rglob("*"):
|
|
201
|
+
if file.is_file():
|
|
202
|
+
# print(f"adding {file}")
|
|
203
|
+
new_zf.write(
|
|
204
|
+
file,
|
|
205
|
+
file.relative_to(source_path.parent),
|
|
206
|
+
)
|
|
207
|
+
ret = ZipPath(dest)
|
|
208
|
+
return ret
|
|
209
|
+
|
|
210
|
+
@classmethod
|
|
211
|
+
def create(cls, new_zip_path: Path) -> ZipPath:
|
|
212
|
+
return cls.at_path(None, new_zip_path)
|
|
213
|
+
|
|
214
|
+
@property
|
|
215
|
+
def _depth(self):
|
|
216
|
+
"""Internal property for recursion and tree formatting"""
|
|
217
|
+
return self._path.count("/")
|
|
218
|
+
|
|
219
|
+
def is_valid(self) -> bool:
|
|
220
|
+
"""
|
|
221
|
+
Validation function to confirm that a ZipPath object is valid and the
|
|
222
|
+
zip file exists.
|
|
223
|
+
:return:
|
|
224
|
+
"""
|
|
225
|
+
try:
|
|
226
|
+
self._get_zipfile()
|
|
227
|
+
except Exception:
|
|
228
|
+
return False
|
|
229
|
+
else:
|
|
230
|
+
return True
|
|
231
|
+
|
|
232
|
+
def __str__(self) -> str:
|
|
233
|
+
"""Return a string representation of the path."""
|
|
234
|
+
return f"{self.zip_filename}::{self._path}"
|
|
235
|
+
|
|
236
|
+
def __repr__(self) -> str:
|
|
237
|
+
"""Return a detailed string representation of the path."""
|
|
238
|
+
return f"ZipPath('{self.zip_filename}', '{self._path}')"
|
|
239
|
+
|
|
240
|
+
def _get_zipfile(self) -> zipfile.ZipFile:
|
|
241
|
+
"""
|
|
242
|
+
Internal method to return the ZIP file that underlies the ZipPath object's file model.
|
|
243
|
+
|
|
244
|
+
Returns:
|
|
245
|
+
An open zipfile.ZipFile object
|
|
246
|
+
|
|
247
|
+
Raises:
|
|
248
|
+
FileNotFoundError: If the ZIP file doesn't exist
|
|
249
|
+
zipfile.BadZipFile: If the file is not a valid ZIP file
|
|
250
|
+
"""
|
|
251
|
+
if not self.zip_filename.exists():
|
|
252
|
+
raise FileNotFoundError(f"ZIP file not found: '{self.zip_filename}'")
|
|
253
|
+
return zipfile.ZipFile(self.zip_filename, mode=self._mode)
|
|
254
|
+
|
|
255
|
+
def _normalize_path(self, path: str) -> str:
|
|
256
|
+
"""
|
|
257
|
+
Normalize a path within the ZIP file.
|
|
258
|
+
|
|
259
|
+
Args:
|
|
260
|
+
path: Path to normalize
|
|
261
|
+
|
|
262
|
+
Returns:
|
|
263
|
+
Normalized path
|
|
264
|
+
"""
|
|
265
|
+
# Remove leading slash if present
|
|
266
|
+
return path.removeprefix("/")
|
|
267
|
+
|
|
268
|
+
def joinpath(self, *paths: str) -> ZipPath:
|
|
269
|
+
"""
|
|
270
|
+
Join this path with one or more path components.
|
|
271
|
+
|
|
272
|
+
Args:
|
|
273
|
+
*paths: Path components to join
|
|
274
|
+
|
|
275
|
+
Returns:
|
|
276
|
+
A new ZipPath object with the joined path
|
|
277
|
+
"""
|
|
278
|
+
new_path = PurePosixPath(self._path).joinpath(*paths)
|
|
279
|
+
return ZipPath(self.zip_filename, str(new_path), mode=self._mode)
|
|
280
|
+
|
|
281
|
+
def __truediv__(self, key: str) -> ZipPath:
|
|
282
|
+
"""
|
|
283
|
+
Join this path with another using the / operator.
|
|
284
|
+
|
|
285
|
+
Args:
|
|
286
|
+
key: Path component to join
|
|
287
|
+
|
|
288
|
+
Returns:
|
|
289
|
+
A new ZipPath object with the joined path
|
|
290
|
+
"""
|
|
291
|
+
return self.joinpath(key)
|
|
292
|
+
|
|
293
|
+
def exists(self) -> bool:
|
|
294
|
+
"""
|
|
295
|
+
Check if this path exists in the ZIP file.
|
|
296
|
+
|
|
297
|
+
Returns:
|
|
298
|
+
True if the path exists, False otherwise
|
|
299
|
+
"""
|
|
300
|
+
if not self.zip_filename.exists():
|
|
301
|
+
return False
|
|
302
|
+
|
|
303
|
+
try:
|
|
304
|
+
with self._get_zipfile() as zf:
|
|
305
|
+
# For the root directory
|
|
306
|
+
if not self._path:
|
|
307
|
+
return True
|
|
308
|
+
|
|
309
|
+
# For files
|
|
310
|
+
normalized_path = self._normalize_path(self._path)
|
|
311
|
+
if normalized_path in zf.namelist():
|
|
312
|
+
return True
|
|
313
|
+
|
|
314
|
+
# For directories (check if any file starts with this path)
|
|
315
|
+
if not normalized_path.endswith('/'):
|
|
316
|
+
normalized_path += '/'
|
|
317
|
+
|
|
318
|
+
for name in zf.namelist():
|
|
319
|
+
if name.startswith(normalized_path):
|
|
320
|
+
return True
|
|
321
|
+
|
|
322
|
+
return False
|
|
323
|
+
except zipfile.BadZipFile:
|
|
324
|
+
return False
|
|
325
|
+
|
|
326
|
+
def is_file(self) -> bool:
|
|
327
|
+
"""
|
|
328
|
+
Check if this path is a file in the ZIP archive.
|
|
329
|
+
|
|
330
|
+
Returns:
|
|
331
|
+
True if the path is a file, False otherwise
|
|
332
|
+
"""
|
|
333
|
+
if not self.zip_filename.exists():
|
|
334
|
+
return False
|
|
335
|
+
|
|
336
|
+
try:
|
|
337
|
+
with self._get_zipfile() as zf:
|
|
338
|
+
normalized_path = self._normalize_path(self._path)
|
|
339
|
+
return normalized_path in zf.namelist() and not normalized_path.endswith('/')
|
|
340
|
+
except zipfile.BadZipFile:
|
|
341
|
+
return False
|
|
342
|
+
|
|
343
|
+
def is_dir(self) -> bool:
|
|
344
|
+
"""
|
|
345
|
+
Check if this path is a directory in the ZIP archive.
|
|
346
|
+
|
|
347
|
+
Returns:
|
|
348
|
+
True if the path is a directory, False otherwise
|
|
349
|
+
"""
|
|
350
|
+
if not self.zip_filename.exists():
|
|
351
|
+
return False
|
|
352
|
+
|
|
353
|
+
if self.is_root():
|
|
354
|
+
return True
|
|
355
|
+
|
|
356
|
+
try:
|
|
357
|
+
with self._get_zipfile() as zf:
|
|
358
|
+
# Root directory
|
|
359
|
+
if not self._path:
|
|
360
|
+
return True
|
|
361
|
+
|
|
362
|
+
normalized_path = self._normalize_path(self._path)
|
|
363
|
+
|
|
364
|
+
# Explicit directory entry
|
|
365
|
+
if normalized_path.endswith('/'):
|
|
366
|
+
return normalized_path in zf.namelist()
|
|
367
|
+
else:
|
|
368
|
+
dir_path = normalized_path + '/'
|
|
369
|
+
if dir_path in zf.namelist():
|
|
370
|
+
return True
|
|
371
|
+
|
|
372
|
+
# Implicit directory (contains files)
|
|
373
|
+
if any(name.startswith(dir_path) for name in zf.namelist()):
|
|
374
|
+
return True
|
|
375
|
+
|
|
376
|
+
return False
|
|
377
|
+
except zipfile.BadZipFile:
|
|
378
|
+
return False
|
|
379
|
+
|
|
380
|
+
def is_root(self) -> bool:
|
|
381
|
+
"""
|
|
382
|
+
Boolean function for determining if the ZipPath object is at the root of
|
|
383
|
+
the ZIP archive.
|
|
384
|
+
"""
|
|
385
|
+
return self._path == ''
|
|
386
|
+
|
|
387
|
+
def iterdir(self) -> Iterator[ZipPath]:
|
|
388
|
+
"""
|
|
389
|
+
Iterate over the files and directories in this directory.
|
|
390
|
+
|
|
391
|
+
Returns:
|
|
392
|
+
An iterator of ZipPath objects
|
|
393
|
+
|
|
394
|
+
Raises:
|
|
395
|
+
NotADirectoryError: If this path is not a directory
|
|
396
|
+
"""
|
|
397
|
+
if not self.is_dir():
|
|
398
|
+
raise NotADirectoryError(f"Not a directory: {self}")
|
|
399
|
+
|
|
400
|
+
with self._get_zipfile() as zf:
|
|
401
|
+
prefix = self._normalize_path(self._path)
|
|
402
|
+
if prefix and not prefix.endswith('/'):
|
|
403
|
+
prefix += '/'
|
|
404
|
+
|
|
405
|
+
# Track directories we've seen to avoid duplicates
|
|
406
|
+
seen_dirs = set()
|
|
407
|
+
|
|
408
|
+
for name in zf.namelist():
|
|
409
|
+
if name.startswith(prefix):
|
|
410
|
+
# Get the relative path from the current directory
|
|
411
|
+
rel_path = name[len(prefix):]
|
|
412
|
+
if not rel_path:
|
|
413
|
+
continue
|
|
414
|
+
|
|
415
|
+
# Get the first component of the relative path
|
|
416
|
+
parts = rel_path.split('/', 1)
|
|
417
|
+
first_part = parts[0]
|
|
418
|
+
|
|
419
|
+
if len(parts) > 1: # This is a subdirectory
|
|
420
|
+
if first_part not in seen_dirs:
|
|
421
|
+
seen_dirs.add(first_part)
|
|
422
|
+
yield ZipPath(self.zip_filename, f"{prefix}{first_part}", mode=self._mode)
|
|
423
|
+
else: # This is a file
|
|
424
|
+
yield ZipPath(self.zip_filename, f"{prefix}{first_part}", mode=self._mode)
|
|
425
|
+
|
|
426
|
+
def glob(self, pattern: str) -> Iterator[ZipPath]:
|
|
427
|
+
"""
|
|
428
|
+
Iterate over paths matching a glob pattern.
|
|
429
|
+
|
|
430
|
+
Args:
|
|
431
|
+
pattern: Glob pattern to match
|
|
432
|
+
|
|
433
|
+
Returns:
|
|
434
|
+
An iterator of ZipPath objects matching the pattern
|
|
435
|
+
"""
|
|
436
|
+
with self._get_zipfile() as zf:
|
|
437
|
+
prefix = self._normalize_path(self._path)
|
|
438
|
+
if prefix and not prefix.endswith('/'):
|
|
439
|
+
prefix += '/'
|
|
440
|
+
|
|
441
|
+
if pattern.startswith("**/"):
|
|
442
|
+
list_src = zf.namelist()
|
|
443
|
+
pattern = pattern.removeprefix("**/")
|
|
444
|
+
else:
|
|
445
|
+
list_src = [str(p).partition("::")[-1] for p in self.iterdir()]
|
|
446
|
+
|
|
447
|
+
for name in list_src:
|
|
448
|
+
if name.startswith(prefix):
|
|
449
|
+
# Get the relative path from the current directory
|
|
450
|
+
rel_path = name[len(prefix):]
|
|
451
|
+
if not rel_path:
|
|
452
|
+
continue
|
|
453
|
+
|
|
454
|
+
# Check if it matches the pattern
|
|
455
|
+
if fnmatch.fnmatch(rel_path, pattern):
|
|
456
|
+
yield ZipPath(self.zip_filename, f"{prefix}{rel_path}")
|
|
457
|
+
|
|
458
|
+
def riterdir(self) -> Iterator[ZipPath]:
|
|
459
|
+
"""
|
|
460
|
+
Recursive iterator listing all files and directories in this directory and
|
|
461
|
+
its subdirectories (depth first).
|
|
462
|
+
"""
|
|
463
|
+
to_visit: list[ZipPath] = [self]
|
|
464
|
+
while to_visit:
|
|
465
|
+
current = to_visit.pop()
|
|
466
|
+
yield current
|
|
467
|
+
if current.is_dir():
|
|
468
|
+
for entry in current.iterdir():
|
|
469
|
+
to_visit.append(entry)
|
|
470
|
+
|
|
471
|
+
def rglob(self, pattern: str) -> Iterator[ZipPath]:
|
|
472
|
+
"""
|
|
473
|
+
Recursive glob iterator listing all files and directories in this directory and its subdirectories
|
|
474
|
+
that match the given pattern.
|
|
475
|
+
|
|
476
|
+
Args:
|
|
477
|
+
pattern: Glob pattern to match
|
|
478
|
+
|
|
479
|
+
Returns:
|
|
480
|
+
An iterator of ZipPath objects matching the pattern
|
|
481
|
+
"""
|
|
482
|
+
for path in self.riterdir():
|
|
483
|
+
if fnmatch.fnmatch(path.name, pattern):
|
|
484
|
+
yield path
|
|
485
|
+
|
|
486
|
+
def __iter__(self):
|
|
487
|
+
return (line for line in self.read_text().splitlines())
|
|
488
|
+
|
|
489
|
+
def open(self, mode: str = 'r', encoding: str | None = None) -> BinaryIO | TextIO:
|
|
490
|
+
"""
|
|
491
|
+
Open the file pointed to by this path.
|
|
492
|
+
|
|
493
|
+
Args:
|
|
494
|
+
mode: Open mode ('r', 'rb', 'w', 'wb', etc.)
|
|
495
|
+
encoding: Text encoding (for text modes)
|
|
496
|
+
|
|
497
|
+
Returns:
|
|
498
|
+
A file-like object
|
|
499
|
+
|
|
500
|
+
Raises:
|
|
501
|
+
FileNotFoundError: If the file doesn't exist in read mode
|
|
502
|
+
IsADirectoryError: If the path points to a directory
|
|
503
|
+
"""
|
|
504
|
+
# Reading modes
|
|
505
|
+
if 'r' in mode and not any(c in mode for c in 'wa'):
|
|
506
|
+
if not self.is_file():
|
|
507
|
+
if self.is_dir():
|
|
508
|
+
raise IsADirectoryError(f"Is a directory: {self}")
|
|
509
|
+
raise FileNotFoundError(f"File not found in ZIP: {self}")
|
|
510
|
+
|
|
511
|
+
with self._get_zipfile() as zf:
|
|
512
|
+
normalized_path = self._normalize_path(self._path)
|
|
513
|
+
file_data = zf.read(normalized_path)
|
|
514
|
+
|
|
515
|
+
if 'b' in mode: # Binary mode
|
|
516
|
+
return io.BytesIO(file_data)
|
|
517
|
+
else: # Text mode
|
|
518
|
+
encoding = encoding or 'utf-8'
|
|
519
|
+
return io.StringIO(file_data.decode(encoding))
|
|
520
|
+
|
|
521
|
+
# Writing modes
|
|
522
|
+
elif any(c in mode for c in 'wa'):
|
|
523
|
+
if self.is_dir():
|
|
524
|
+
raise IsADirectoryError(f"Is a directory: {self}")
|
|
525
|
+
|
|
526
|
+
# For writing, we return a custom file-like object that will update the ZIP when closed
|
|
527
|
+
if 'b' in mode: # Binary mode
|
|
528
|
+
return _ZipWriteFile(self, mode, None)
|
|
529
|
+
else: # Text mode
|
|
530
|
+
encoding = encoding or 'utf-8'
|
|
531
|
+
return _ZipWriteFile(self, mode, encoding)
|
|
532
|
+
|
|
533
|
+
else:
|
|
534
|
+
raise ValueError(f"Unsupported file mode: {mode}")
|
|
535
|
+
|
|
536
|
+
def read_text(self, encoding: str = 'utf-8') -> str:
|
|
537
|
+
"""
|
|
538
|
+
Read the contents of this file as text.
|
|
539
|
+
|
|
540
|
+
Args:
|
|
541
|
+
encoding: Text encoding
|
|
542
|
+
|
|
543
|
+
Returns:
|
|
544
|
+
The file contents as a string
|
|
545
|
+
|
|
546
|
+
Raises:
|
|
547
|
+
FileNotFoundError: If the file doesn't exist
|
|
548
|
+
IsADirectoryError: If the path points to a directory
|
|
549
|
+
"""
|
|
550
|
+
return self.read_bytes().decode(encoding)
|
|
551
|
+
|
|
552
|
+
def write_text(self, data: str, encoding: str = 'utf-8') -> int:
|
|
553
|
+
"""
|
|
554
|
+
Write text to this file.
|
|
555
|
+
|
|
556
|
+
Args:
|
|
557
|
+
data: The text to write
|
|
558
|
+
encoding: Text encoding
|
|
559
|
+
|
|
560
|
+
Returns:
|
|
561
|
+
The number of characters written
|
|
562
|
+
|
|
563
|
+
Raises:
|
|
564
|
+
IsADirectoryError: If the path points to a directory
|
|
565
|
+
"""
|
|
566
|
+
self._clear_cached_info()
|
|
567
|
+
if self.exists():
|
|
568
|
+
warnings.warn(
|
|
569
|
+
ZipPathDuplicateFileWarning(self._path),
|
|
570
|
+
stacklevel=2,
|
|
571
|
+
)
|
|
572
|
+
with warnings.catch_warnings():
|
|
573
|
+
warnings.simplefilter("ignore", category=UserWarning)
|
|
574
|
+
with self.open('wt', encoding=encoding) as f:
|
|
575
|
+
return f.write(data)
|
|
576
|
+
|
|
577
|
+
def read_bytes(self) -> bytes:
|
|
578
|
+
"""
|
|
579
|
+
Read the contents of this file as bytes.
|
|
580
|
+
|
|
581
|
+
Returns:
|
|
582
|
+
The file contents as bytes
|
|
583
|
+
|
|
584
|
+
Raises:
|
|
585
|
+
FileNotFoundError: If the file doesn't exist
|
|
586
|
+
IsADirectoryError: If the path points to a directory
|
|
587
|
+
"""
|
|
588
|
+
with self.open('rb') as f:
|
|
589
|
+
return f.read()
|
|
590
|
+
|
|
591
|
+
def write_bytes(self, data: bytes) -> int:
|
|
592
|
+
"""
|
|
593
|
+
Write bytes to this file.
|
|
594
|
+
|
|
595
|
+
Args:
|
|
596
|
+
data: The bytes to write
|
|
597
|
+
|
|
598
|
+
Returns:
|
|
599
|
+
The number of bytes written
|
|
600
|
+
|
|
601
|
+
Raises:
|
|
602
|
+
IsADirectoryError: If the path points to a directory
|
|
603
|
+
"""
|
|
604
|
+
self._clear_cached_info()
|
|
605
|
+
if self.exists():
|
|
606
|
+
warnings.warn(
|
|
607
|
+
ZipPathDuplicateFileWarning(self._path),
|
|
608
|
+
stacklevel=2,
|
|
609
|
+
)
|
|
610
|
+
with warnings.catch_warnings():
|
|
611
|
+
warnings.simplefilter("ignore", category=UserWarning)
|
|
612
|
+
with self.open('wb') as f:
|
|
613
|
+
return f.write(data)
|
|
614
|
+
|
|
615
|
+
@property
|
|
616
|
+
def parent(self) -> ZipPath:
|
|
617
|
+
"""
|
|
618
|
+
Return the parent directory of this path.
|
|
619
|
+
|
|
620
|
+
Returns:
|
|
621
|
+
A ZipPath object pointing to the parent directory
|
|
622
|
+
"""
|
|
623
|
+
parent_path = str(PurePath(self._path).parent)
|
|
624
|
+
return ZipPath(self.zip_filename, parent_path, mode=self._mode)
|
|
625
|
+
|
|
626
|
+
@functools.cached_property
|
|
627
|
+
def _info(self) -> zipfile.ZipInfo | None:
|
|
628
|
+
if not self.is_file() or not self.exists():
|
|
629
|
+
return None
|
|
630
|
+
|
|
631
|
+
with self._get_zipfile() as zf:
|
|
632
|
+
info: zipfile.ZipInfo = zf.getinfo(self._path)
|
|
633
|
+
|
|
634
|
+
return info
|
|
635
|
+
|
|
636
|
+
def _clear_cached_info(self):
|
|
637
|
+
with contextlib.suppress(AttributeError):
|
|
638
|
+
del self._info
|
|
639
|
+
|
|
640
|
+
def stat(self) -> os.stat_result:
|
|
641
|
+
"""
|
|
642
|
+
Return a simulated stat.stat_result object for this file/directory.
|
|
643
|
+
"""
|
|
644
|
+
if not self.exists():
|
|
645
|
+
raise FileNotFoundError(f"File not found in ZIP: {self}")
|
|
646
|
+
|
|
647
|
+
ret_st_mode = (
|
|
648
|
+
(stat.S_IFDIR if self.is_dir() else stat.S_IFREG)
|
|
649
|
+
| stat.S_IREAD
|
|
650
|
+
| (stat.S_IWRITE if "w" in self._mode else 0)
|
|
651
|
+
)
|
|
652
|
+
|
|
653
|
+
if self.is_file():
|
|
654
|
+
ret_st_size = self._info.file_size
|
|
655
|
+
else:
|
|
656
|
+
ret_st_size = 0
|
|
657
|
+
|
|
658
|
+
ret_atime = ret_mtime = ret_ctime = int(self._zipfile_stat.st_mtime)
|
|
659
|
+
|
|
660
|
+
return os.stat_result(
|
|
661
|
+
_ZipStat(
|
|
662
|
+
st_mode=ret_st_mode,
|
|
663
|
+
st_size=ret_st_size,
|
|
664
|
+
st_atime=ret_atime,
|
|
665
|
+
st_mtime=ret_mtime,
|
|
666
|
+
st_ctime=ret_ctime,
|
|
667
|
+
)
|
|
668
|
+
)
|
|
669
|
+
|
|
670
|
+
def size(self) -> int:
|
|
671
|
+
return self.stat().st_size
|
|
672
|
+
|
|
673
|
+
def total_size(self) -> int:
|
|
674
|
+
return sum(p.size() for p in self.riterdir())
|
|
675
|
+
|
|
676
|
+
def rmdir(self) -> None:
|
|
677
|
+
"""Not supported."""
|
|
678
|
+
raise NotImplementedError(f"{type(self).__name__} does not support removing directories")
|
|
679
|
+
|
|
680
|
+
def unlink(self, *args) -> None:
|
|
681
|
+
"""Not supported."""
|
|
682
|
+
raise NotImplementedError(f"{type(self).__name__} does not support removing files")
|
|
683
|
+
|
|
684
|
+
def rename(self, *args) -> None:
|
|
685
|
+
"""Not supported."""
|
|
686
|
+
raise NotImplementedError(f"{type(self).__name__} does not support renaming files or directories")
|
|
687
|
+
|
|
688
|
+
def replace(self, *args) -> None:
|
|
689
|
+
"""Not supported."""
|
|
690
|
+
raise NotImplementedError(f"{type(self).__name__} does not support replacing files or directories")
|
|
691
|
+
|
|
692
|
+
def chmod(self, *args) -> None:
|
|
693
|
+
"""Not supported."""
|
|
694
|
+
raise NotImplementedError(f"{type(self).__name__} does not support changing file permissions")
|
|
695
|
+
|
|
696
|
+
def scan_for_large_files(self, cutoff_size: int) -> list[tuple[str, int]]:
|
|
697
|
+
large_files = []
|
|
698
|
+
for f in self.riterdir():
|
|
699
|
+
if f.is_file() and f.size() > cutoff_size:
|
|
700
|
+
large_files.append((f._path, f.size()))
|
|
701
|
+
return large_files
|
|
702
|
+
|
|
703
|
+
def scan_for_duplicates(self) -> list[tuple[str, int]]:
|
|
704
|
+
from collections import Counter
|
|
705
|
+
with self._get_zipfile() as zf:
|
|
706
|
+
file_tally = Counter(zf.namelist())
|
|
707
|
+
dupes = [(name, count) for name, count in file_tally.items() if count > 1]
|
|
708
|
+
return dupes
|
|
709
|
+
|
|
710
|
+
def get_deduplicated_entries(self) -> list[zipfile.ZipInfo]:
|
|
711
|
+
with self._get_zipfile() as zf:
|
|
712
|
+
deduped = {
|
|
713
|
+
info.filename: info for info in zf.infolist()
|
|
714
|
+
}
|
|
715
|
+
return list(deduped.values())
|
|
716
|
+
|
|
717
|
+
def purge_duplicates(
|
|
718
|
+
self,
|
|
719
|
+
*,
|
|
720
|
+
workdir: Path | str = None,
|
|
721
|
+
replace: bool = False,
|
|
722
|
+
keep: bool = False,
|
|
723
|
+
):
|
|
724
|
+
"""
|
|
725
|
+
Remove duplicate versions of any files.
|
|
726
|
+
|
|
727
|
+
Since ZIP files do not support actual deletion of entries, this requires creating
|
|
728
|
+
a new ZIP archive, and only copying the deduplicated files into it.
|
|
729
|
+
|
|
730
|
+
Since this involves extracting files from the original ZIP, we also need to
|
|
731
|
+
guard against malicious ZIP bomb files.
|
|
732
|
+
"""
|
|
733
|
+
import tempfile
|
|
734
|
+
import shutil
|
|
735
|
+
|
|
736
|
+
if workdir is None:
|
|
737
|
+
workdir = Path(tempfile.gettempdir())
|
|
738
|
+
if isinstance(workdir, str):
|
|
739
|
+
workdir = Path(workdir)
|
|
740
|
+
|
|
741
|
+
dest = workdir / self.zip_filename.name
|
|
742
|
+
|
|
743
|
+
dupes = self.scan_for_duplicates()
|
|
744
|
+
if not dupes:
|
|
745
|
+
return
|
|
746
|
+
|
|
747
|
+
deduped: list[zipfile.ZipInfo] = self.get_deduplicated_entries()
|
|
748
|
+
if deduped:
|
|
749
|
+
with zipfile.ZipFile(
|
|
750
|
+
dest,
|
|
751
|
+
mode="w",
|
|
752
|
+
compression=zipfile.ZIP_DEFLATED,
|
|
753
|
+
compresslevel=9,
|
|
754
|
+
) as new_zf:
|
|
755
|
+
for entry_info in deduped:
|
|
756
|
+
if entry_info.is_dir():
|
|
757
|
+
continue
|
|
758
|
+
entry = self / entry_info.filename
|
|
759
|
+
print(f"adding {entry._path}")
|
|
760
|
+
new_zf.writestr(entry._path, entry.read_bytes())
|
|
761
|
+
|
|
762
|
+
if replace:
|
|
763
|
+
if keep:
|
|
764
|
+
shutil.copy2(dest, self.zip_filename)
|
|
765
|
+
else:
|
|
766
|
+
shutil.move(dest, self.zip_filename)
|
|
767
|
+
else:
|
|
768
|
+
if not keep:
|
|
769
|
+
dest.unlink()
|
|
@@ -0,0 +1,249 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: zippathlib
|
|
3
|
+
Version: 0.6.0
|
|
4
|
+
Summary: A `pathlib.Path`-like class for accessing the contents of ZIP archives
|
|
5
|
+
Author-email: Paul McGuire <ptmcg_pm+zippathlib@gmail.com>
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/ptmcg/zippathlib
|
|
8
|
+
Project-URL: Source, https://github.com/ptmcg/zippathlib.git
|
|
9
|
+
Classifier: Development Status :: 4 - Beta
|
|
10
|
+
Classifier: Intended Audience :: Developers
|
|
11
|
+
Classifier: Intended Audience :: Information Technology
|
|
12
|
+
Classifier: Operating System :: OS Independent
|
|
13
|
+
Classifier: Programming Language :: Python
|
|
14
|
+
Classifier: Programming Language :: Python :: 3
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
18
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
|
19
|
+
Classifier: Programming Language :: Python :: Free Threading :: 3 - Stable
|
|
20
|
+
Classifier: Topic :: System :: Archiving
|
|
21
|
+
Classifier: Topic :: System :: Archiving :: Compression
|
|
22
|
+
Classifier: Topic :: System :: Filesystems
|
|
23
|
+
Classifier: Topic :: Utilities
|
|
24
|
+
Classifier: Typing :: Typed
|
|
25
|
+
Requires-Python: >=3.12
|
|
26
|
+
Description-Content-Type: text/markdown
|
|
27
|
+
License-File: LICENSE
|
|
28
|
+
Requires-Dist: rich>=9.7.0
|
|
29
|
+
Dynamic: license-file
|
|
30
|
+
|
|
31
|
+
# zippathlib - Provides a pathlib.Path-like class for accessing files in ZIP archives
|
|
32
|
+
|
|
33
|
+
`zippathlib` is a Python library that provides a standalone `ZipPath` class for working with files
|
|
34
|
+
inside ZIP archives using a familiar pathlib.Path-like interface. This allows you to navigate and
|
|
35
|
+
access files within a ZIP archive without first extracting them. From your Python code, you can access
|
|
36
|
+
the contents using the familar `pathlib.Path` API, instead of the standard library's `zipfile` module, and
|
|
37
|
+
perform operations like reading, writing, checking existence of files and directories, etc.
|
|
38
|
+
|
|
39
|
+
`zippathlib` is also accessible from the command line, allowing you to list contents of ZIP archives,
|
|
40
|
+
extract individual files or directories, and view the contents of the ZIP archive as a tree.
|
|
41
|
+
|
|
42
|
+
<!-- TOC -->
|
|
43
|
+
* [zippathlib - Provides a pathlib.Path-like class for accessing files in ZIP archives](#zippathlib---provides-a-pathlibpath-like-class-for-accessing-files-in-zip-archives)
|
|
44
|
+
* [Features](#features)
|
|
45
|
+
* [Doesn't Python's `zipfile` module already have a `Path` class?](#doesnt-pythons-zipfile-module-already-have-a-path-class)
|
|
46
|
+
* [Usage - Command line](#usage---command-line)
|
|
47
|
+
* [List the root directory of a ZIP archive](#list-the-root-directory-of-a-zip-archive)
|
|
48
|
+
* [List the files in a directory](#list-the-files-in-a-directory)
|
|
49
|
+
* [List the first few lines of a file](#list-the-first-few-lines-of-a-file)
|
|
50
|
+
* [Extract a file from a ZIP archive to the local filesystem](#extract-a-file-from-a-zip-archive-to-the-local-filesystem)
|
|
51
|
+
* [Extract a file from a ZIP archive to stdout](#extract-a-file-from-a-zip-archive-to-stdout)
|
|
52
|
+
* [View the contents of a ZIP archive (or a subdirectory within the ZIP archive) as a tree](#view-the-contents-of-a-zip-archive-or-a-subdirectory-within-the-zip-archive-as-a-tree)
|
|
53
|
+
* [Usage - `zippathlib.ZipPath` API](#usage---zippathlibzippath-api)
|
|
54
|
+
* [Open a file from a ZIP archive for reading](#open-a-file-from-a-zip-archive-for-reading)
|
|
55
|
+
* [Store a file into a ZIP archive, and use the '/' operator to navigate or create directories](#store-a-file-into-a-zip-archive-and-use-the--operator-to-navigate-or-create-directories)
|
|
56
|
+
* [List all files in a directory within the ZIP](#list-all-files-in-a-directory-within-the-zip)
|
|
57
|
+
* [Recursively list all directories and files in the ZIP](#recursively-list-all-directories-and-files-in-the-zip)
|
|
58
|
+
* [Check if a file exists in the ZIP](#check-if-a-file-exists-in-the-zip)
|
|
59
|
+
* [Installation](#installation)
|
|
60
|
+
* [Testing](#testing)
|
|
61
|
+
* [Contributing](#contributing)
|
|
62
|
+
* [License](#license)
|
|
63
|
+
<!-- TOC -->
|
|
64
|
+
|
|
65
|
+
## Features
|
|
66
|
+
- `ZipPath` class provides a pathlib-like API to access ZIP archive files and directories.
|
|
67
|
+
- Supports path composition using the '/' operator.
|
|
68
|
+
- Supports basic read/write operations on files within a ZIP archive, including opening files for reading, writing data, etc.
|
|
69
|
+
- Allows navigating the directory structure within a ZIP archive using familiar `pathlib` methods like `iterdir()`, `joinpath()`, etc.
|
|
70
|
+
`riterdir()` provides a recursive listing of the directories and files in the archive.
|
|
71
|
+
- Command-line interface for browsing a ZIP archive's contents, or for extracting files to the local filesystem.
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
## Doesn't Python's `zipfile` module already have a `Path` class?
|
|
75
|
+
|
|
76
|
+
The Python standard library module `zipfile` does provide a `Path` [class](https://docs.python.org/3/library/zipfile.html#zipfile.Path),
|
|
77
|
+
but it is primarily for read support in existing ZIP files. It doesn't provide a complete `pathlib`-like API for working with files and directories.
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
## Usage - Command line
|
|
81
|
+
After installing it, the `zippathlib` module can be run from the command line with `python -m zippathlib` or just `zippathlib`.
|
|
82
|
+
|
|
83
|
+
$ zippathlib -h
|
|
84
|
+
usage: zippathlib [-h] [-V] [--tree] [-x [OUTPUTDIR]] [--limit LIMIT] [--check {duplicates,limit,d,l}] [--purge]
|
|
85
|
+
zip_file [path_within_zip]
|
|
86
|
+
|
|
87
|
+
positional arguments:
|
|
88
|
+
zip_file Zip file to explore
|
|
89
|
+
path_within_zip Path within the zip file (optional)
|
|
90
|
+
|
|
91
|
+
options:
|
|
92
|
+
-h, --help show this help message and exit
|
|
93
|
+
-V, --version show program's version number and exit
|
|
94
|
+
--tree list all files in a tree-like format
|
|
95
|
+
-x, --extract [OUTPUTDIR]
|
|
96
|
+
extract files from zip file to a directory or '-' for stdout,
|
|
97
|
+
default is '.'
|
|
98
|
+
--limit LIMIT guard value against malicious ZIP files that uncompress to excessive
|
|
99
|
+
sizes; specify as an integer or float value optionally followed by a
|
|
100
|
+
multiplier suffix K,M,G,T,P,E, or Z; default is 2.00G
|
|
101
|
+
--check {duplicates,limit,d,l}
|
|
102
|
+
check ZIP file for duplicates, or for files larger than LIMIT
|
|
103
|
+
--purge purge ZIP file of duplicate file entries
|
|
104
|
+
|
|
105
|
+
### List the root directory of a ZIP archive
|
|
106
|
+
|
|
107
|
+
$ zippathlib .\dist\zippathlib-0.4.0-py3-none-any.whl
|
|
108
|
+
Directory: dist\zippathlib-0.4.0-py3-none-any.whl:: (total size 39.76KB)
|
|
109
|
+
Contents:
|
|
110
|
+
[D] zippathlib (28.32KB)
|
|
111
|
+
[D] zippathlib-0.4.0.dist-info (11.44KB)
|
|
112
|
+
|
|
113
|
+
### List the files in a directory
|
|
114
|
+
|
|
115
|
+
$ zippathlib .\dist\zippathlib-0.4.0-py3-none-any.whl zippathlib-0.4.0.dist-info
|
|
116
|
+
Directory: dist\zippathlib-0.4.0-py3-none-any.whl::zippathlib-0.4.0.dist-info (total size 11.44KB)
|
|
117
|
+
Contents:
|
|
118
|
+
[D] licenses (1.06KB)
|
|
119
|
+
[F] METADATA (9.49KB)
|
|
120
|
+
[F] WHEEL (91 bytes)
|
|
121
|
+
[F] entry_points.txt (56 bytes)
|
|
122
|
+
[F] top_level.txt (11 bytes)
|
|
123
|
+
[F] RECORD (748 bytes
|
|
124
|
+
|
|
125
|
+
### List the first few lines of a file
|
|
126
|
+
|
|
127
|
+
$ zippathlib .\dist\zippathlib-0.4.0-py3-none-any.whl zippathlib-0.4.0.dist-info/licenses/LICENSE
|
|
128
|
+
File: dist\zippathlib-0.4.0-py3-none-any.whl::zippathlib-0.4.0.dist-info/licenses/LICENSE (1.06KB)
|
|
129
|
+
Content:
|
|
130
|
+
MIT License
|
|
131
|
+
|
|
132
|
+
Copyright (c) 2025 Paul McGuire
|
|
133
|
+
|
|
134
|
+
Permission is hereby granted, free of charge, to a...
|
|
135
|
+
|
|
136
|
+
### Extract a file from a ZIP archive to the local filesystem
|
|
137
|
+
|
|
138
|
+
# if outputdir is omitted, file is extracted to the current directory
|
|
139
|
+
$ zippathlib .\dist\zippathlib-0.4.0-py3-none-any.whl zippathlib-0.4.0.dist-info/licenses/LICENSE --extract tmp
|
|
140
|
+
extracting dist\zippathlib-0.4.0-py3-none-any.whl::zippathlib-0.4.0.dist-info/licenses/LICENSE
|
|
141
|
+
|
|
142
|
+
### Extract a file from a ZIP archive to stdout
|
|
143
|
+
|
|
144
|
+
# if outputdir is "-", file is extracted and printed to stdout
|
|
145
|
+
$ zippathlib .\dist\zippathlib-0.4.0-py3-none-any.whl zippathlib-0.4.0.dist-info/licenses/LICENSE --extract -
|
|
146
|
+
MIT License
|
|
147
|
+
|
|
148
|
+
Copyright (c) 2025 Paul McGuire
|
|
149
|
+
|
|
150
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
151
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
152
|
+
...
|
|
153
|
+
|
|
154
|
+
### View the contents of a ZIP archive (or a subdirectory within the ZIP archive) as a tree
|
|
155
|
+
|
|
156
|
+
$ zippathlib .\dist\zippathlib-0.4.0-py3-none-any.whl --tree
|
|
157
|
+
|
|
158
|
+
├── zippathlib-0.4.0.dist-info
|
|
159
|
+
│ ├── RECORD
|
|
160
|
+
│ ├── top_level.txt
|
|
161
|
+
│ ├── entry_points.txt
|
|
162
|
+
│ ├── WHEEL
|
|
163
|
+
│ ├── METADATA
|
|
164
|
+
│ └── licenses
|
|
165
|
+
│ └── LICENSE
|
|
166
|
+
└── zippathlib
|
|
167
|
+
├── zip_pathlib.py
|
|
168
|
+
├── __main__.py
|
|
169
|
+
└── __init__.py
|
|
170
|
+
|
|
171
|
+
(Thanks to Will McGugan's `rich` library for making the tree output so easy.)
|
|
172
|
+
|
|
173
|
+
## Usage - `zippathlib.ZipPath` API
|
|
174
|
+
|
|
175
|
+
Here are some examples on how to use `ZipPath` in your Python code:
|
|
176
|
+
|
|
177
|
+
### Open a file from a ZIP archive for reading
|
|
178
|
+
|
|
179
|
+
```python
|
|
180
|
+
from zippathlib import ZipPath
|
|
181
|
+
|
|
182
|
+
zip_path = ZipPath('archive.zip', 'path/to/file.txt')
|
|
183
|
+
with zip_path.open() as f:
|
|
184
|
+
content = f.read()
|
|
185
|
+
```
|
|
186
|
+
|
|
187
|
+
### Store a file into a ZIP archive, and use the '/' operator to navigate or create directories
|
|
188
|
+
|
|
189
|
+
```python
|
|
190
|
+
from zippathlib import ZipPath
|
|
191
|
+
|
|
192
|
+
zp = ZipPath('archive.zip')
|
|
193
|
+
content_file = zp / 'some/directory' / 'file.txt'
|
|
194
|
+
content_file.write_text("This is too easy!")
|
|
195
|
+
```
|
|
196
|
+
|
|
197
|
+
|
|
198
|
+
### List all files in a directory within the ZIP
|
|
199
|
+
|
|
200
|
+
```python
|
|
201
|
+
from zippathlib import ZipPath
|
|
202
|
+
|
|
203
|
+
zip_path = ZipPath('archive.zip')
|
|
204
|
+
for file in (zip_path / 'some' / 'directory').iterdir():
|
|
205
|
+
print(file)
|
|
206
|
+
```
|
|
207
|
+
|
|
208
|
+
### Recursively list all directories and files in the ZIP
|
|
209
|
+
|
|
210
|
+
```python
|
|
211
|
+
from zippathlib import ZipPath
|
|
212
|
+
|
|
213
|
+
zip_path = ZipPath('archive.zip')
|
|
214
|
+
|
|
215
|
+
# list all contents, in depth first search
|
|
216
|
+
for file in zip_path.riterdir():
|
|
217
|
+
print(file)
|
|
218
|
+
```
|
|
219
|
+
|
|
220
|
+
### Check if a file exists in the ZIP
|
|
221
|
+
|
|
222
|
+
```python
|
|
223
|
+
from zippathlib import ZipPath
|
|
224
|
+
|
|
225
|
+
if ZipPath('archive.zip', 'path/to/file.txt').exists():
|
|
226
|
+
print("File exists")
|
|
227
|
+
```
|
|
228
|
+
|
|
229
|
+
## Installation
|
|
230
|
+
You can install `ZipPath` using `pip` or `pipx`:
|
|
231
|
+
|
|
232
|
+
```bash
|
|
233
|
+
pip install zippathlib
|
|
234
|
+
```
|
|
235
|
+
|
|
236
|
+
## Testing
|
|
237
|
+
Tests are located in the tests directory and can be run using pytest:
|
|
238
|
+
|
|
239
|
+
```bash
|
|
240
|
+
pytest tests
|
|
241
|
+
```
|
|
242
|
+
|
|
243
|
+
## Contributing
|
|
244
|
+
Contributions are welcome. Please open an issue or submit a pull request on [GitHub](https://github.com/ptmcg/zippathlib).
|
|
245
|
+
|
|
246
|
+
If you have ideas or suggestions, feel free to drop a note in the GitHub repo [Discussions](https://github.com/ptmcg/zippathlib/discussions).
|
|
247
|
+
|
|
248
|
+
## License
|
|
249
|
+
This project is licensed under the MIT License - see the LICENSE file for details.
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
zippathlib/__init__.py,sha256=qxC_vTSj8evxfsgf1x-k6sMbGeW5hAWfnTgYatt7nZA,228
|
|
2
|
+
zippathlib/__main__.py,sha256=7ynPcm2FBM5CFhEJEzNk1xbjQ4EH_f2UjO-OdL04lTQ,8568
|
|
3
|
+
zippathlib/zip_pathlib.py,sha256=EbFY1IHRiMB5-txpLdFIa2sq01KWgehOAghnMEHfo-o,24334
|
|
4
|
+
zippathlib-0.6.0.dist-info/licenses/LICENSE,sha256=B0BZBr37IGE4EPrbiOE93PnBCVyrKLXJqBXC8EFHzgE,1069
|
|
5
|
+
zippathlib-0.6.0.dist-info/METADATA,sha256=NblnxGFP2daBwLYwCL7fTrwPFTRjR5_vEWo3VLGOrnE,10458
|
|
6
|
+
zippathlib-0.6.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
7
|
+
zippathlib-0.6.0.dist-info/entry_points.txt,sha256=oGQt99IKEGPX1DBW3L7boQxFDl2L9a67XQw5LyUYrZk,56
|
|
8
|
+
zippathlib-0.6.0.dist-info/top_level.txt,sha256=uM8DB-MPxHohOJpSSggSc-wro5y_nw-VNtkcE7rSzHI,11
|
|
9
|
+
zippathlib-0.6.0.dist-info/RECORD,,
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 Paul McGuire
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
zippathlib
|