zipremove 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
zipremove/__init__.py ADDED
@@ -0,0 +1,647 @@
1
+ import copy
2
+ import io
3
+ import os
4
+ import struct
5
+ from zipfile import *
6
+ from zipfile import ( # noqa: F401
7
+ _DD_SIGNATURE,
8
+ _FH_COMPRESSED_SIZE,
9
+ _FH_COMPRESSION_METHOD,
10
+ _FH_CRC,
11
+ _FH_EXTRA_FIELD_LENGTH,
12
+ _FH_FILENAME_LENGTH,
13
+ _FH_GENERAL_PURPOSE_FLAG_BITS,
14
+ _FH_SIGNATURE,
15
+ _FH_UNCOMPRESSED_SIZE,
16
+ LZMADecompressor,
17
+ _get_compressor,
18
+ _get_decompressor,
19
+ crc32,
20
+ sizeFileHeader,
21
+ stringFileHeader,
22
+ structFileHeader,
23
+ )
24
+
25
+ # polyfills
26
+ try:
27
+ ZIP_ZSTANDARD
28
+ except NameError:
29
+ # polyfill for Python < 3.14
30
+ ZIP_ZSTANDARD = 93
31
+
32
+ try:
33
+ from zipfile import _MASK_USE_DATA_DESCRIPTOR
34
+ except ImportError:
35
+ # polyfill for Python < 3.11
36
+ _MASK_USE_DATA_DESCRIPTOR = 1 << 3
37
+
38
+ try:
39
+ from zipfile import _sanitize_filename
40
+ except ImportError:
41
+ # polyfill for Python < 3.11
42
+ def _sanitize_filename(filename):
43
+ null_byte = filename.find(chr(0))
44
+ if null_byte >= 0:
45
+ filename = filename[0:null_byte]
46
+ if os.sep != "/" and os.sep in filename:
47
+ filename = filename.replace(os.sep, "/")
48
+ if os.altsep and os.altsep != "/" and os.altsep in filename:
49
+ filename = filename.replace(os.altsep, "/")
50
+ return filename
51
+
52
+
53
+ class _ZipRepacker:
54
+ """Class for ZipFile repacking."""
55
+ def __init__(self, *, strict_descriptor=False, chunk_size=2**20, debug=0):
56
+ self.debug = debug # Level of printing: 0 through 3
57
+ self.chunk_size = chunk_size
58
+ self.strict_descriptor = strict_descriptor
59
+
60
+ def _debug(self, level, *msg):
61
+ if self.debug >= level:
62
+ print(*msg)
63
+
64
+ def copy(self, zfile, zinfo, filename):
65
+ # make a copy of zinfo
66
+ zinfo2 = copy.deepcopy(zinfo)
67
+
68
+ # apply sanitized new filename as in `ZipInfo.__init__`
69
+ zinfo2.orig_filename = filename
70
+ zinfo2.filename = _sanitize_filename(filename)
71
+
72
+ zinfo2.header_offset = zfile.start_dir
73
+
74
+ # polyfill: update zinfo2._end_offset if exists
75
+ # (Python >= 3.8 with fix #109858)
76
+ if hasattr(zinfo2, '_end_offset'):
77
+ zinfo2._end_offset = None
78
+
79
+ # write to a new local file header
80
+ fp = zfile.fp
81
+ sizes = self._calc_local_file_entry_size(fp, zinfo)
82
+ fp.seek(zinfo2.header_offset)
83
+ fp.write(zinfo2.FileHeader())
84
+ self._copy_bytes(fp, zinfo.header_offset + sum(sizes[:3]), fp.tell(), sum(sizes[3:]))
85
+ zfile.start_dir = fp.tell()
86
+
87
+ # add to filelist
88
+ zfile.filelist.append(zinfo2)
89
+ zfile.NameToInfo[zinfo2.filename] = zinfo2
90
+
91
+ zfile._didModify = True
92
+
93
+ def repack(self, zfile, removed=None):
94
+ """
95
+ Repack the ZIP file, stripping unreferenced local file entries.
96
+
97
+ Assumes that local file entries are stored consecutively, with no gaps
98
+ or overlaps.
99
+
100
+ Behavior:
101
+
102
+ 1. If any referenced entry overlaps with another, a `BadZipFile` error
103
+ is raised since safe repacking cannot be guaranteed.
104
+
105
+ 2. Data before the first referenced entry is stripped only when it
106
+ appears to be a sequence of consecutive entries with no extra
107
+ following bytes; extra preceeding bytes are preserved.
108
+
109
+ 3. Data between referenced entries is stripped only when it appears to
110
+ be a sequence of consecutive entries with no extra preceding bytes;
111
+ extra following bytes are preserved.
112
+
113
+ 4. This is to prevent an unexpected data removal (false positive),
114
+ though a false negative may happen in certain rare cases.
115
+
116
+ Examples:
117
+
118
+ Stripping before the first referenced entry:
119
+
120
+ [random bytes]
121
+ [unreferenced local file entry]
122
+ [random bytes]
123
+ <-- stripping start
124
+ [unreferenced local file entry]
125
+ [unreferenced local file entry]
126
+ <-- stripping end
127
+ [local file entry 1] (or central directory)
128
+ ...
129
+
130
+ Stripping between referenced entries:
131
+
132
+ ...
133
+ [local file entry]
134
+ <-- stripping start
135
+ [unreferenced local file entry]
136
+ [unreferenced local file entry]
137
+ <-- stripping end
138
+ [random bytes]
139
+ [unreferenced local file entry]
140
+ [random bytes]
141
+ [local file entry] (or central directory)
142
+ ...
143
+
144
+ No stripping:
145
+
146
+ [unreferenced local file entry]
147
+ [random bytes]
148
+ [local file entry 1] (or central directory)
149
+ ...
150
+
151
+ No stripping:
152
+
153
+ ...
154
+ [local file entry]
155
+ [random bytes]
156
+ [unreferenced local file entry]
157
+ [local file entry] (or central directory)
158
+ ...
159
+
160
+ Side effects:
161
+ - Modifies the ZIP file in place.
162
+ - Updates zfile.start_dir to account for removed data.
163
+ - Sets zfile._didModify to True.
164
+ - Updates header_offset and _end_offset of referenced ZipInfo
165
+ instances.
166
+
167
+ Parameters:
168
+ zfile: A ZipFile object representing the archive to repack.
169
+ removed: Optional. A sequence of ZipInfo instances representing
170
+ the previously removed entries. When provided, only their
171
+ corresponding local file entries are stripped.
172
+ """
173
+ removed_zinfos = set(removed or ())
174
+
175
+ fp = zfile.fp
176
+
177
+ # get a sorted filelist by header offset, in case the dir order
178
+ # doesn't match the actual entry order
179
+ filelist = (*zfile.filelist, *removed_zinfos)
180
+ filelist = sorted(filelist, key=lambda x: x.header_offset)
181
+
182
+ # calculate each entry size and validate
183
+ entry_size_list = []
184
+ used_entry_size_list = []
185
+ for i, zinfo in enumerate(filelist):
186
+ try:
187
+ offset = filelist[i + 1].header_offset
188
+ except IndexError:
189
+ offset = zfile.start_dir
190
+ entry_size = offset - zinfo.header_offset
191
+
192
+ # may raise on an invalid local file header
193
+ used_entry_size = sum(self._calc_local_file_entry_size(fp, zinfo))
194
+
195
+ self._debug(3, i, zinfo.orig_filename, zinfo.header_offset, entry_size, used_entry_size)
196
+ if used_entry_size > entry_size:
197
+ raise BadZipFile(
198
+ f"Overlapped entries: {zinfo.orig_filename!r} ")
199
+
200
+ if removed is not None and zinfo not in removed_zinfos:
201
+ used_entry_size = entry_size
202
+
203
+ entry_size_list.append(entry_size)
204
+ used_entry_size_list.append(used_entry_size)
205
+
206
+ # calculate the starting entry offset (bytes to skip)
207
+ if removed is None:
208
+ try:
209
+ offset = filelist[0].header_offset
210
+ except IndexError:
211
+ offset = zfile.start_dir
212
+ entry_offset = self._calc_initial_entry_offset(fp, offset)
213
+ else:
214
+ entry_offset = 0
215
+
216
+ # move file entries
217
+ for i, zinfo in enumerate(filelist):
218
+ entry_size = entry_size_list[i]
219
+ used_entry_size = used_entry_size_list[i]
220
+
221
+ # update the header and move entry data to the new position
222
+ old_header_offset = zinfo.header_offset
223
+ zinfo.header_offset -= entry_offset
224
+
225
+ if zinfo in removed_zinfos:
226
+ self._copy_bytes(
227
+ fp,
228
+ old_header_offset + used_entry_size,
229
+ zinfo.header_offset,
230
+ entry_size - used_entry_size
231
+ )
232
+
233
+ # update entry_offset for subsequent files to follow
234
+ entry_offset += used_entry_size
235
+
236
+ else:
237
+ if entry_offset > 0:
238
+ self._copy_bytes(fp, old_header_offset, zinfo.header_offset, used_entry_size)
239
+
240
+ if used_entry_size < entry_size:
241
+ stale_entry_size = self._validate_local_file_entry_sequence(
242
+ fp,
243
+ old_header_offset + used_entry_size,
244
+ old_header_offset + entry_size,
245
+ )
246
+ else:
247
+ stale_entry_size = 0
248
+
249
+ if stale_entry_size > 0:
250
+ self._copy_bytes(
251
+ fp,
252
+ old_header_offset + used_entry_size + stale_entry_size,
253
+ zinfo.header_offset + used_entry_size,
254
+ entry_size - used_entry_size - stale_entry_size,
255
+ )
256
+
257
+ # update entry_offset for subsequent files to follow
258
+ entry_offset += stale_entry_size
259
+
260
+ # update state
261
+ zfile.start_dir -= entry_offset
262
+ zfile._didModify = True
263
+
264
+ # polyfill: update ZipInfo._end_offset if exists
265
+ # (Python >= 3.8 with fix #109858)
266
+ if hasattr(ZipInfo, '_end_offset'):
267
+ end_offset = zfile.start_dir
268
+ for zinfo in reversed(filelist):
269
+ if zinfo in removed_zinfos:
270
+ zinfo._end_offset = None
271
+ else:
272
+ if zinfo._end_offset is not None:
273
+ zinfo._end_offset = end_offset
274
+ end_offset = zinfo.header_offset
275
+
276
+ def _calc_initial_entry_offset(self, fp, data_offset):
277
+ checked_offsets = {}
278
+ if data_offset > 0:
279
+ self._debug(3, 'scanning file signatures before:', data_offset)
280
+ for pos in self._iter_scan_signature(fp, stringFileHeader, 0, data_offset):
281
+ self._debug(3, 'checking file signature at:', pos)
282
+ entry_size = self._validate_local_file_entry_sequence(fp, pos, data_offset, checked_offsets)
283
+ if entry_size == data_offset - pos:
284
+ return entry_size
285
+ return 0
286
+
287
+ def _iter_scan_signature(self, fp, signature, start_offset, end_offset, chunk_size=4096):
288
+ sig_len = len(signature)
289
+ remainder = b''
290
+ pos = start_offset
291
+
292
+ while pos < end_offset:
293
+ # required for each loop since fp may be changed during each yield
294
+ fp.seek(pos)
295
+
296
+ chunk = remainder + fp.read(min(chunk_size, end_offset - pos))
297
+
298
+ delta = pos - len(remainder)
299
+ idx = 0
300
+ while True:
301
+ idx = chunk.find(signature, idx)
302
+ if idx == -1:
303
+ break
304
+
305
+ yield delta + idx
306
+ idx += 1
307
+
308
+ remainder = chunk[-(sig_len - 1):]
309
+ pos += chunk_size
310
+
311
+ def _validate_local_file_entry_sequence(self, fp, start_offset, end_offset, checked_offsets=None):
312
+ offset = start_offset
313
+
314
+ while offset < end_offset:
315
+ self._debug(3, 'checking local file entry at:', offset)
316
+
317
+ # Cache checked offsets to improve performance.
318
+ try:
319
+ entry_size = checked_offsets[offset]
320
+ except (KeyError, TypeError):
321
+ entry_size = self._validate_local_file_entry(fp, offset, end_offset)
322
+ if checked_offsets is not None:
323
+ checked_offsets[offset] = entry_size
324
+ else:
325
+ self._debug(3, 'read from checked cache:', offset)
326
+
327
+ if entry_size is None:
328
+ break
329
+
330
+ offset += entry_size
331
+
332
+ return offset - start_offset
333
+
334
+ def _validate_local_file_entry(self, fp, offset, end_offset):
335
+ fp.seek(offset)
336
+ try:
337
+ fheader = self._read_local_file_header(fp)
338
+ except BadZipFile:
339
+ return None
340
+
341
+ # Create a dummy ZipInfo to utilize parsing.
342
+ # Flush only the required information.
343
+ zinfo = ZipInfo()
344
+ zinfo.header_offset = offset
345
+ zinfo.flag_bits = fheader[_FH_GENERAL_PURPOSE_FLAG_BITS]
346
+ zinfo.compress_size = fheader[_FH_COMPRESSED_SIZE]
347
+ zinfo.file_size = fheader[_FH_UNCOMPRESSED_SIZE]
348
+ zinfo.CRC = fheader[_FH_CRC]
349
+
350
+ filename = fp.read(fheader[_FH_FILENAME_LENGTH])
351
+ zinfo.extra = fp.read(fheader[_FH_EXTRA_FIELD_LENGTH])
352
+ pos = fp.tell()
353
+
354
+ if pos > end_offset:
355
+ return None
356
+
357
+ try:
358
+ # parse zip64
359
+ try:
360
+ zinfo._decodeExtra(crc32(filename))
361
+ except TypeError:
362
+ # polyfill for Python < 3.12
363
+ zinfo._decodeExtra()
364
+ except BadZipFile:
365
+ return None
366
+
367
+ dd_size = 0
368
+
369
+ if zinfo.flag_bits & _MASK_USE_DATA_DESCRIPTOR:
370
+ # According to the spec, these fields should be zero when data
371
+ # descriptor is used. Otherwise treat as a false positive on
372
+ # random bytes to return early, as scanning for data descriptor
373
+ # is rather expensive.
374
+ if not (zinfo.CRC == zinfo.compress_size == zinfo.file_size == 0):
375
+ return None
376
+
377
+ zip64 = fheader[_FH_UNCOMPRESSED_SIZE] == 0xffffffff
378
+
379
+ dd = self._scan_data_descriptor(fp, pos, end_offset, zip64)
380
+ if dd is None:
381
+ dd = self._scan_data_descriptor_no_sig_by_decompression(
382
+ fp, pos, end_offset, zip64, fheader[_FH_COMPRESSION_METHOD])
383
+ if dd is False:
384
+ if not self.strict_descriptor:
385
+ dd = self._scan_data_descriptor_no_sig(fp, pos, end_offset, zip64)
386
+ else:
387
+ dd = None
388
+ if dd is None:
389
+ return None
390
+
391
+ zinfo.CRC, zinfo.compress_size, zinfo.file_size, dd_size = dd
392
+
393
+ return (
394
+ sizeFileHeader +
395
+ fheader[_FH_FILENAME_LENGTH] + fheader[_FH_EXTRA_FIELD_LENGTH] +
396
+ zinfo.compress_size +
397
+ dd_size
398
+ )
399
+
400
+ def _read_local_file_header(self, fp):
401
+ fheader = fp.read(sizeFileHeader)
402
+ if len(fheader) != sizeFileHeader:
403
+ raise BadZipFile("Truncated file header")
404
+ fheader = struct.unpack(structFileHeader, fheader)
405
+ if fheader[_FH_SIGNATURE] != stringFileHeader:
406
+ raise BadZipFile("Bad magic number for file header")
407
+ return fheader
408
+
409
+ def _scan_data_descriptor(self, fp, offset, end_offset, zip64):
410
+ dd_fmt = '<LLQQ' if zip64 else '<LLLL'
411
+ dd_size = struct.calcsize(dd_fmt)
412
+
413
+ # scan for signature and take the first valid descriptor
414
+ for pos in self._iter_scan_signature(
415
+ fp, struct.pack('<L', _DD_SIGNATURE), offset, end_offset
416
+ ):
417
+ fp.seek(pos)
418
+ dd = fp.read(min(dd_size, end_offset - pos))
419
+ try:
420
+ _, crc, compress_size, file_size = struct.unpack(dd_fmt, dd)
421
+ except struct.error:
422
+ continue
423
+
424
+ # @TODO: also check CRC to better guard from a false positive?
425
+ if pos - offset != compress_size:
426
+ continue
427
+
428
+ return crc, compress_size, file_size, dd_size
429
+
430
+ return None
431
+
432
+ def _scan_data_descriptor_no_sig(self, fp, offset, end_offset, zip64, chunk_size=8192):
433
+ dd_fmt = '<LQQ' if zip64 else '<LLL'
434
+ dd_size = struct.calcsize(dd_fmt)
435
+
436
+ pos = offset
437
+ remainder = b''
438
+
439
+ fp.seek(offset)
440
+ while pos < end_offset:
441
+ chunk = remainder + fp.read(min(chunk_size, end_offset - pos))
442
+
443
+ delta = pos - len(remainder) - offset
444
+ mv = memoryview(chunk)
445
+ for i in range(len(chunk) - dd_size + 1):
446
+ dd = mv[i:i + dd_size]
447
+ try:
448
+ crc, compress_size, file_size = struct.unpack(dd_fmt, dd)
449
+ except struct.error:
450
+ continue
451
+ if delta + i != compress_size:
452
+ continue
453
+
454
+ return crc, compress_size, file_size, dd_size
455
+
456
+ remainder = chunk[-(dd_size - 1):]
457
+ pos += chunk_size
458
+
459
+ return None
460
+
461
+ def _scan_data_descriptor_no_sig_by_decompression(self, fp, offset, end_offset, zip64, method):
462
+ try:
463
+ decompressor = _get_decompressor(method)
464
+ except RuntimeError:
465
+ return False
466
+
467
+ if decompressor is None:
468
+ return False
469
+
470
+ # Current LZMADecompressor is unreliable since it's `.eof` is usually
471
+ # not set as expected.
472
+ if isinstance(decompressor, LZMADecompressor):
473
+ return False
474
+
475
+ dd_fmt = '<LQQ' if zip64 else '<LLL'
476
+ dd_size = struct.calcsize(dd_fmt)
477
+
478
+ if end_offset - dd_size < offset:
479
+ return None
480
+
481
+ try:
482
+ pos = self._trace_compressed_block_end(fp, offset, end_offset - dd_size, decompressor)
483
+ except Exception:
484
+ return None
485
+
486
+ fp.seek(pos)
487
+ dd = fp.read(dd_size)
488
+ try:
489
+ crc, compress_size, file_size = struct.unpack(dd_fmt, dd)
490
+ except struct.error:
491
+ return None
492
+ if pos - offset != compress_size:
493
+ return None
494
+
495
+ return crc, compress_size, file_size, dd_size
496
+
497
+ def _trace_compressed_block_end(self, fp, offset, end_offset, decompressor, chunk_size=4096):
498
+ fp.seek(offset)
499
+ read_size = 0
500
+ while True:
501
+ chunk = fp.read(min(chunk_size, end_offset - offset - read_size))
502
+ if not chunk:
503
+ raise EOFError('Unexpected EOF while decompressing')
504
+
505
+ # may raise on error
506
+ decompressor.decompress(chunk)
507
+
508
+ read_size += len(chunk)
509
+
510
+ if decompressor.eof:
511
+ unused_len = len(decompressor.unused_data)
512
+ return offset + read_size - unused_len
513
+
514
+ def _calc_local_file_entry_size(self, fp, zinfo):
515
+ fp.seek(zinfo.header_offset)
516
+ fheader = self._read_local_file_header(fp)
517
+
518
+ if zinfo.flag_bits & _MASK_USE_DATA_DESCRIPTOR:
519
+ zip64 = fheader[_FH_UNCOMPRESSED_SIZE] == 0xffffffff
520
+ dd_fmt = '<LLQQ' if zip64 else '<LLLL'
521
+ fp.seek(
522
+ fheader[_FH_FILENAME_LENGTH] + fheader[_FH_EXTRA_FIELD_LENGTH] +
523
+ zinfo.compress_size,
524
+ os.SEEK_CUR,
525
+ )
526
+ if fp.read(struct.calcsize('<L')) != struct.pack('<L', _DD_SIGNATURE):
527
+ dd_fmt = '<LQQ' if zip64 else '<LLL'
528
+ dd_size = struct.calcsize(dd_fmt)
529
+ else:
530
+ dd_size = 0
531
+
532
+ return (
533
+ sizeFileHeader,
534
+ fheader[_FH_FILENAME_LENGTH],
535
+ fheader[_FH_EXTRA_FIELD_LENGTH],
536
+ zinfo.compress_size,
537
+ dd_size,
538
+ )
539
+
540
+ def _copy_bytes(self, fp, old_offset, new_offset, size):
541
+ read_size = 0
542
+ while read_size < size:
543
+ fp.seek(old_offset + read_size)
544
+ data = fp.read(min(size - read_size, self.chunk_size))
545
+ fp.seek(new_offset + read_size)
546
+ fp.write(data)
547
+ fp.flush()
548
+ read_size += len(data)
549
+
550
+
551
+ class ZipFile(ZipFile):
552
+ def copy(self, zinfo_or_arcname, filename, *, chunk_size=2**20):
553
+ """Copy a member in the archive."""
554
+ if self.mode not in ('w', 'x', 'a'):
555
+ raise ValueError("copy() requires mode 'w', 'x', or 'a'")
556
+ if not self.fp:
557
+ raise ValueError(
558
+ "Attempt to write to ZIP archive that was already closed")
559
+ if self._writing:
560
+ raise ValueError(
561
+ "Can't write to ZIP archive while an open writing handle exists."
562
+ )
563
+ if not self._seekable:
564
+ raise io.UnsupportedOperation("copy() requires a seekable stream.")
565
+
566
+ with self._lock:
567
+ # get the zinfo
568
+ # raise KeyError if arcname does not exist
569
+ if isinstance(zinfo_or_arcname, ZipInfo):
570
+ zinfo = zinfo_or_arcname
571
+ if zinfo not in self.filelist:
572
+ raise KeyError('There is no item %r in the archive' % zinfo)
573
+ else:
574
+ zinfo = self.getinfo(zinfo_or_arcname)
575
+
576
+ self._writing = True
577
+ try:
578
+ _ZipRepacker(chunk_size=chunk_size).copy(self, zinfo, filename)
579
+ finally:
580
+ self._writing = False
581
+
582
+ return zinfo
583
+
584
+ def remove(self, zinfo_or_arcname):
585
+ """Remove a member from the archive."""
586
+ if self.mode not in ('w', 'x', 'a'):
587
+ raise ValueError("remove() requires mode 'w', 'x', or 'a'")
588
+ if not self.fp:
589
+ raise ValueError(
590
+ "Attempt to write to ZIP archive that was already closed")
591
+ if self._writing:
592
+ raise ValueError(
593
+ "Can't write to ZIP archive while an open writing handle exists."
594
+ )
595
+
596
+ with self._lock:
597
+ # get the zinfo
598
+ # raise KeyError if arcname does not exist
599
+ if isinstance(zinfo_or_arcname, ZipInfo):
600
+ zinfo = zinfo_or_arcname
601
+ if zinfo not in self.filelist:
602
+ raise KeyError('There is no item %r in the archive' % zinfo)
603
+ else:
604
+ zinfo = self.getinfo(zinfo_or_arcname)
605
+
606
+ self.filelist.remove(zinfo)
607
+
608
+ try:
609
+ del self.NameToInfo[zinfo.filename]
610
+ except KeyError:
611
+ pass
612
+
613
+ # Avoid missing entry if there is another entry having the same name,
614
+ # to prevent an error on `testzip()`.
615
+ # Reverse the order as NameToInfo normally stores the last added one.
616
+ for zi in reversed(self.filelist):
617
+ if zi.filename == zinfo.filename:
618
+ self.NameToInfo.setdefault(zi.filename, zi)
619
+ break
620
+
621
+ self._didModify = True
622
+
623
+ return zinfo
624
+
625
+ def repack(self, removed=None, **opts):
626
+ """Repack a zip file, removing non-referenced file entries.
627
+
628
+ The archive must be opened with mode 'a', as mode 'w'/'x' do not
629
+ truncate the file when closed. This cannot be simplely changed as
630
+ they may be used on an unseekable file buffer, which disallows
631
+ truncation."""
632
+ if self.mode != 'a':
633
+ raise ValueError("repack() requires mode 'a'")
634
+ if not self.fp:
635
+ raise ValueError(
636
+ "Attempt to write to ZIP archive that was already closed")
637
+ if self._writing:
638
+ raise ValueError(
639
+ "Can't write to ZIP archive while an open writing handle exists"
640
+ )
641
+
642
+ with self._lock:
643
+ self._writing = True
644
+ try:
645
+ _ZipRepacker(**opts).repack(self, removed)
646
+ finally:
647
+ self._writing = False
@@ -0,0 +1,176 @@
1
+ Metadata-Version: 2.4
2
+ Name: zipremove
3
+ Version: 0.1.0
4
+ Summary: Extend `zipfile` with `remove`-related functionalities
5
+ Home-page: https://github.com/danny0838/zipremove
6
+ Author: Danny Lin
7
+ Author-email: danny0838@gmail.com
8
+ License: MIT
9
+ Classifier: Development Status :: 4 - Beta
10
+ Classifier: Intended Audience :: Developers
11
+ Classifier: Topic :: System :: Archiving :: Compression
12
+ Classifier: Programming Language :: Python :: 3
13
+ Classifier: Programming Language :: Python :: 3.9
14
+ Classifier: Programming Language :: Python :: 3.10
15
+ Classifier: Programming Language :: Python :: 3.11
16
+ Classifier: Programming Language :: Python :: 3.12
17
+ Classifier: Programming Language :: Python :: 3.13
18
+ Classifier: Programming Language :: Python :: 3.14
19
+ Classifier: Operating System :: OS Independent
20
+ Requires-Python: ~=3.9
21
+ Description-Content-Type: text/markdown
22
+ License-File: LICENSE.txt
23
+ Provides-Extra: dev
24
+ Requires-Dist: tox>=4.0; extra == "dev"
25
+ Requires-Dist: build; extra == "dev"
26
+ Requires-Dist: twine>=4.0; extra == "dev"
27
+ Requires-Dist: flake8>=5.0; extra == "dev"
28
+ Requires-Dist: flake8-comprehensions>=3.12; extra == "dev"
29
+ Requires-Dist: flake8-bugbear>=22.0; extra == "dev"
30
+ Requires-Dist: flake8-isort>=6.0; extra == "dev"
31
+ Requires-Dist: isort>=5.5; extra == "dev"
32
+ Dynamic: license-file
33
+
34
+ This package extends `zipfile` with `remove`-related functionalities.
35
+
36
+ ## API
37
+
38
+ * `ZipFile.remove(zinfo_or_arcname)`
39
+
40
+ Removes a member from the archive. *zinfo_or_arcname* may be the full path
41
+ of the member or a `ZipInfo` instance.
42
+
43
+ If multiple members share the same full path, only one is removed when
44
+ a path is provided.
45
+
46
+ This does not physically remove the local file entry from the archive;
47
+ the ZIP file size remains unchanged. Call `ZipFile.repack` afterwards
48
+ to reclaim space.
49
+
50
+ The archive must be opened with mode ``'w'``, ``'x'`` or ``'a'``.
51
+
52
+ Returns the removed `ZipInfo` instance.
53
+
54
+ Calling `remove` on a closed ZipFile will raise a `ValueError`.
55
+
56
+ * `ZipFile.repack(removed=None, *, strict_descriptor=False[, chunk_size])`
57
+
58
+ Rewrites the archive to remove stale local file entries, shrinking the ZIP
59
+ file size.
60
+
61
+ If *removed* is provided, it must be a sequence of `ZipInfo` objects
62
+ representing removed entries; only their corresponding local file entries
63
+ will be removed.
64
+
65
+ If *removed* is not provided, local file entries no longer referenced in the
66
+ central directory will be removed. The algorithm assumes that local file
67
+ entries are stored consecutively:
68
+
69
+ 1. Data before the first referenced entry is removed only when it appears to
70
+ be a sequence of consecutive entries with no extra following bytes; extra
71
+ preceeding bytes are preserved.
72
+ 2. Data between referenced entries is removed only when it appears to
73
+ be a sequence of consecutive entries with no extra preceding bytes; extra
74
+ following bytes are preserved.
75
+
76
+ ``strict_descriptor=True`` can be provided to skip the slower scan for an
77
+ unsigned data descriptor (deprecated in the latest ZIP specification and is
78
+ only used by legacy tools) when checking for bytes resembling a valid local
79
+ file entry. This improves performance, but may cause some stale local file
80
+ entries to be preserved, as any entry using an unsigned descriptor cannot
81
+ be detected.
82
+
83
+ *chunk_size* may be specified to control the buffer size when moving
84
+ entry data (default is 1 MiB).
85
+
86
+ The archive must be opened with mode ``'a'``.
87
+
88
+ Calling `repack` on a closed ZipFile will raise a `ValueError`.
89
+
90
+ * `ZipFile.copy(zinfo_or_arcname, new_arcname[, chunk_size])`
91
+
92
+ Copies a member *zinfo_or_arcname* to *new_arcname* in the archive.
93
+ *zinfo_or_arcname* may be the full path of the member or a `ZipInfo`
94
+ instance.
95
+
96
+ *chunk_size* may be specified to control the buffer size when copying
97
+ entry data (default is 1 MiB).
98
+
99
+ The archive must be opened with mode ``'w'``, ``'x'`` or ``'a'``, and the
100
+ underlying stream must be seekable.
101
+
102
+ Returns the original version of the copied `ZipInfo` instance.
103
+
104
+ Calling `copy` on a closed ZipFile will raise a `ValueError`.
105
+
106
+
107
+ ## Examples
108
+
109
+ ### Remove files and reclaim space
110
+
111
+ ```python
112
+ import os
113
+ import zipremove as zipfile
114
+
115
+ with zipfile.ZipFile('archive.zip', 'w') as zh:
116
+ zh.writestr('file1', 'content1')
117
+ zh.writestr('file2', 'content2')
118
+ zh.writestr('file3', 'content3')
119
+ zh.writestr('file4', 'content4')
120
+
121
+ print(os.path.getsize('archive.zip')) # 398
122
+
123
+ with zipfile.ZipFile('archive.zip', 'a') as zh:
124
+ zh.remove('file1')
125
+ zh.remove('file2')
126
+ zh.remove('file3')
127
+ zh.repack()
128
+
129
+ print(os.path.getsize('archive.zip')) # 116
130
+ ```
131
+
132
+ ### Remove files under a directory and reclaim space
133
+
134
+ ```python
135
+ import os
136
+ import zipremove as zipfile
137
+
138
+ with zipfile.ZipFile('archive.zip', 'w') as zh:
139
+ zh.writestr('file0', 'content0')
140
+ zh.writestr('folder/file1', 'content1')
141
+ zh.writestr('folder/file2', 'content2')
142
+ zh.writestr('folder/file3', 'content3')
143
+
144
+ print(os.path.getsize('archive.zip')) # 440
145
+
146
+ with zipfile.ZipFile('archive.zip', 'a') as zh:
147
+ zinfos = [zh.remove(n) for n in zh.namelist() if n.startswith('folder/')]
148
+ zh.repack(zinfos)
149
+
150
+ print(os.path.getsize('archive.zip')) # 116
151
+ ```
152
+
153
+ ### Rename files under a directory and reclaim space
154
+
155
+ ```python
156
+ import os
157
+ import zipremove as zipfile
158
+
159
+ with zipfile.ZipFile('archive.zip', 'w') as zh:
160
+ zh.writestr('file0', 'content0')
161
+ zh.writestr('folder1/file1', 'content1')
162
+ zh.writestr('folder1/file2', 'content2')
163
+ zh.writestr('folder1/file3', 'content3')
164
+
165
+ print(os.path.getsize('archive.zip')) # 446
166
+
167
+ with zipfile.ZipFile('archive.zip', 'a') as zh:
168
+ for n in zh.namelist():
169
+ if n.startswith('folder1/'):
170
+ n2 = 'folder2/' + n[len('folder1/'):]
171
+ zh.copy(n, n2)
172
+ zh.remove(n)
173
+ zh.repack()
174
+
175
+ print(os.path.getsize('archive.zip')) # 446
176
+ ```
@@ -0,0 +1,6 @@
1
+ zipremove/__init__.py,sha256=7Zsq41HokO_OXm8hTOJzLfSQzYmoF9CYgf3855YVq4Q,23190
2
+ zipremove-0.1.0.dist-info/licenses/LICENSE.txt,sha256=DPhKIVISoyY27Og_OjvjwUeaoLmQkV0A5ZooG-0qyU8,1087
3
+ zipremove-0.1.0.dist-info/METADATA,sha256=SchVYUvN_ZzIpw9lV6HgbKvwTTTCBEad8WGrc_Vz7V8,6012
4
+ zipremove-0.1.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
5
+ zipremove-0.1.0.dist-info/top_level.txt,sha256=o5uNDGXkYnXQwCAqW36Y39ETXOC54gNiCohsM8Uzk80,10
6
+ zipremove-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (80.9.0)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Danny Lin
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1 @@
1
+ zipremove