zipremove 0.2.0__tar.gz → 0.4.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: zipremove
3
- Version: 0.2.0
3
+ Version: 0.4.0
4
4
  Summary: Extend `zipfile` with `remove`-related functionalities
5
5
  Home-page: https://github.com/danny0838/zipremove
6
6
  Author: Danny Lin
@@ -10,6 +10,7 @@ Classifier: Development Status :: 4 - Beta
10
10
  Classifier: Intended Audience :: Developers
11
11
  Classifier: Topic :: System :: Archiving :: Compression
12
12
  Classifier: Programming Language :: Python :: 3
13
+ Classifier: Programming Language :: Python :: 3 :: Only
13
14
  Classifier: Programming Language :: Python :: 3.9
14
15
  Classifier: Programming Language :: Python :: 3.10
15
16
  Classifier: Programming Language :: Python :: 3.11
@@ -31,6 +32,13 @@ Requires-Dist: flake8-isort>=6.0; extra == "dev"
31
32
  Requires-Dist: isort>=5.5; extra == "dev"
32
33
  Dynamic: license-file
33
34
 
35
+ ![PyPI version](https://img.shields.io/pypi/v/zipremove.svg)
36
+ ![Python Versions](https://img.shields.io/pypi/pyversions/zipremove)
37
+ ![Status](https://img.shields.io/pypi/status/zipremove)
38
+ ![License](https://img.shields.io/github/license/danny0838/zipremove)
39
+ [![Downloads](https://static.pepy.tech/personalized-badge/zipremove?period=month&left_text=Downloads)](https://pepy.tech/project/zipremove)
40
+ [![Pull request](https://img.shields.io/github/pulls/detail/state/python/cpython/134627)](https://github.com/python/cpython/pull/134627)
41
+
34
42
  This package extends `zipfile` with `remove`-related functionalities.
35
43
 
36
44
  ## API
@@ -54,30 +62,32 @@ This package extends `zipfile` with `remove`-related functionalities.
54
62
 
55
63
  * `ZipFile.repack(removed=None, *, strict_descriptor=False[, chunk_size])`
56
64
 
57
- Rewrites the archive to remove stale local file entries, shrinking the ZIP
58
- file size.
65
+ Rewrites the archive to remove stale local file entries, shrinking its file
66
+ size.
59
67
 
60
68
  If *removed* is provided, it must be a sequence of `ZipInfo` objects
61
69
  representing removed entries; only their corresponding local file entries
62
70
  will be removed.
63
71
 
64
- If *removed* is not provided, local file entries no longer referenced in the
65
- central directory will be removed. The algorithm assumes that local file
66
- entries are stored consecutively:
72
+ If *removed* is not provided, the archive is scanned to identify and remove
73
+ local file entries that are no longer referenced in the central directory.
74
+ The algorithm assumes that local file entries (and the central directory,
75
+ which is mostly treated as the "last entry") are stored consecutively:
67
76
 
68
77
  1. Data before the first referenced entry is removed only when it appears to
69
78
  be a sequence of consecutive entries with no extra following bytes; extra
70
- preceeding bytes are preserved.
79
+ preceding bytes are preserved.
71
80
  2. Data between referenced entries is removed only when it appears to
72
81
  be a sequence of consecutive entries with no extra preceding bytes; extra
73
82
  following bytes are preserved.
74
-
75
- ``strict_descriptor=True`` can be provided to skip the slower scan for an
76
- unsigned data descriptor (deprecated in the latest ZIP specification and is
77
- only used by legacy tools) when checking for bytes resembling a valid local
78
- file entry. This improves performance, but may cause some stale local file
79
- entries to be preserved, as any entry using an unsigned descriptor cannot
80
- be detected.
83
+ 3. Entries must not overlap. If any entry's data overlaps with another, a
84
+ `BadZipFile` error is raised and no changes are made.
85
+
86
+ When scanning, setting `strict_descriptor=True` disables detection of any
87
+ entry using an unsigned data descriptor (deprecated in the ZIP specification
88
+ since version 6.3.0, released on 2006-09-29, and used only by some legacy
89
+ tools). This improves performance, but may cause some stale entries to be
90
+ preserved.
81
91
 
82
92
  *chunk_size* may be specified to control the buffer size when moving
83
93
  entry data (default is 1 MiB).
@@ -1,3 +1,10 @@
1
+ ![PyPI version](https://img.shields.io/pypi/v/zipremove.svg)
2
+ ![Python Versions](https://img.shields.io/pypi/pyversions/zipremove)
3
+ ![Status](https://img.shields.io/pypi/status/zipremove)
4
+ ![License](https://img.shields.io/github/license/danny0838/zipremove)
5
+ [![Downloads](https://static.pepy.tech/personalized-badge/zipremove?period=month&left_text=Downloads)](https://pepy.tech/project/zipremove)
6
+ [![Pull request](https://img.shields.io/github/pulls/detail/state/python/cpython/134627)](https://github.com/python/cpython/pull/134627)
7
+
1
8
  This package extends `zipfile` with `remove`-related functionalities.
2
9
 
3
10
  ## API
@@ -21,30 +28,32 @@ This package extends `zipfile` with `remove`-related functionalities.
21
28
 
22
29
  * `ZipFile.repack(removed=None, *, strict_descriptor=False[, chunk_size])`
23
30
 
24
- Rewrites the archive to remove stale local file entries, shrinking the ZIP
25
- file size.
31
+ Rewrites the archive to remove stale local file entries, shrinking its file
32
+ size.
26
33
 
27
34
  If *removed* is provided, it must be a sequence of `ZipInfo` objects
28
35
  representing removed entries; only their corresponding local file entries
29
36
  will be removed.
30
37
 
31
- If *removed* is not provided, local file entries no longer referenced in the
32
- central directory will be removed. The algorithm assumes that local file
33
- entries are stored consecutively:
38
+ If *removed* is not provided, the archive is scanned to identify and remove
39
+ local file entries that are no longer referenced in the central directory.
40
+ The algorithm assumes that local file entries (and the central directory,
41
+ which is mostly treated as the "last entry") are stored consecutively:
34
42
 
35
43
  1. Data before the first referenced entry is removed only when it appears to
36
44
  be a sequence of consecutive entries with no extra following bytes; extra
37
- preceeding bytes are preserved.
45
+ preceding bytes are preserved.
38
46
  2. Data between referenced entries is removed only when it appears to
39
47
  be a sequence of consecutive entries with no extra preceding bytes; extra
40
48
  following bytes are preserved.
41
-
42
- ``strict_descriptor=True`` can be provided to skip the slower scan for an
43
- unsigned data descriptor (deprecated in the latest ZIP specification and is
44
- only used by legacy tools) when checking for bytes resembling a valid local
45
- file entry. This improves performance, but may cause some stale local file
46
- entries to be preserved, as any entry using an unsigned descriptor cannot
47
- be detected.
49
+ 3. Entries must not overlap. If any entry's data overlaps with another, a
50
+ `BadZipFile` error is raised and no changes are made.
51
+
52
+ When scanning, setting `strict_descriptor=True` disables detection of any
53
+ entry using an unsigned data descriptor (deprecated in the ZIP specification
54
+ since version 6.3.0, released on 2006-09-29, and used only by some legacy
55
+ tools). This improves performance, but may cause some stale entries to be
56
+ preserved.
48
57
 
49
58
  *chunk_size* may be specified to control the buffer size when moving
50
59
  entry data (default is 1 MiB).
@@ -1,6 +1,6 @@
1
1
  [metadata]
2
2
  name = zipremove
3
- version = 0.2.0
3
+ version = 0.4.0
4
4
  author = Danny Lin
5
5
  author_email = danny0838@gmail.com
6
6
  url = https://github.com/danny0838/zipremove
@@ -13,6 +13,7 @@ classifiers =
13
13
  Intended Audience :: Developers
14
14
  Topic :: System :: Archiving :: Compression
15
15
  Programming Language :: Python :: 3
16
+ Programming Language :: Python :: 3 :: Only
16
17
  Programming Language :: Python :: 3.9
17
18
  Programming Language :: Python :: 3.10
18
19
  Programming Language :: Python :: 3.11
@@ -3,7 +3,9 @@ import io
3
3
  import os
4
4
  import struct
5
5
  from zipfile import *
6
- from zipfile import ( # noqa: F401
6
+ from zipfile import __all__ # noqa: F401
7
+ from zipfile import _get_compressor # noqa: F401
8
+ from zipfile import (
7
9
  _DD_SIGNATURE,
8
10
  _FH_COMPRESSED_SIZE,
9
11
  _FH_COMPRESSION_METHOD,
@@ -14,7 +16,6 @@ from zipfile import ( # noqa: F401
14
16
  _FH_SIGNATURE,
15
17
  _FH_UNCOMPRESSED_SIZE,
16
18
  LZMADecompressor,
17
- _get_compressor,
18
19
  _get_decompressor,
19
20
  crc32,
20
21
  sizeFileHeader,
@@ -29,6 +30,12 @@ except NameError:
29
30
  # polyfill for Python < 3.14
30
31
  ZIP_ZSTANDARD = 93
31
32
 
33
+ try:
34
+ from zipfile import _MASK_ENCRYPTED
35
+ except ImportError:
36
+ # polyfill for Python < 3.11
37
+ _MASK_ENCRYPTED = 1 << 0
38
+
32
39
  try:
33
40
  from zipfile import _MASK_USE_DATA_DESCRIPTOR
34
41
  except ImportError:
@@ -49,6 +56,18 @@ except ImportError:
49
56
  filename = filename.replace(os.altsep, "/")
50
57
  return filename
51
58
 
59
+ try:
60
+ LZMADecompressor().unused_data
61
+ except AttributeError:
62
+ # polyfill to support LZMADecompressor().unused_data
63
+ @property
64
+ def unused_data(self):
65
+ try:
66
+ return self._decomp.unused_data
67
+ except AttributeError:
68
+ return b''
69
+ LZMADecompressor.unused_data = unused_data
70
+
52
71
 
53
72
  class _ZipRepacker:
54
73
  """Class for ZipFile repacking."""
@@ -385,8 +404,11 @@ class _ZipRepacker:
385
404
 
386
405
  dd = self._scan_data_descriptor(fp, pos, end_offset, zip64)
387
406
  if dd is None and not self.strict_descriptor:
388
- dd = self._scan_data_descriptor_no_sig_by_decompression(
389
- fp, pos, end_offset, zip64, fheader[_FH_COMPRESSION_METHOD])
407
+ if zinfo.flag_bits & _MASK_ENCRYPTED:
408
+ dd = False
409
+ else:
410
+ dd = self._scan_data_descriptor_no_sig_by_decompression(
411
+ fp, pos, end_offset, zip64, fheader[_FH_COMPRESSION_METHOD])
390
412
  if dd is False:
391
413
  dd = self._scan_data_descriptor_no_sig(fp, pos, end_offset, zip64)
392
414
  if dd is None:
@@ -471,14 +493,10 @@ class _ZipRepacker:
471
493
  if decompressor is None:
472
494
  return False
473
495
 
474
- # Current LZMADecompressor is unreliable since it's `.eof` is usually
475
- # not set as expected.
476
- if isinstance(decompressor, LZMADecompressor):
477
- return False
478
-
479
496
  dd_fmt = '<LQQ' if zip64 else '<LLL'
480
497
  dd_size = struct.calcsize(dd_fmt)
481
498
 
499
+ # early return and prevent potential `fp.read(-1)`
482
500
  if end_offset - dd_size < offset:
483
501
  return None
484
502
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: zipremove
3
- Version: 0.2.0
3
+ Version: 0.4.0
4
4
  Summary: Extend `zipfile` with `remove`-related functionalities
5
5
  Home-page: https://github.com/danny0838/zipremove
6
6
  Author: Danny Lin
@@ -10,6 +10,7 @@ Classifier: Development Status :: 4 - Beta
10
10
  Classifier: Intended Audience :: Developers
11
11
  Classifier: Topic :: System :: Archiving :: Compression
12
12
  Classifier: Programming Language :: Python :: 3
13
+ Classifier: Programming Language :: Python :: 3 :: Only
13
14
  Classifier: Programming Language :: Python :: 3.9
14
15
  Classifier: Programming Language :: Python :: 3.10
15
16
  Classifier: Programming Language :: Python :: 3.11
@@ -31,6 +32,13 @@ Requires-Dist: flake8-isort>=6.0; extra == "dev"
31
32
  Requires-Dist: isort>=5.5; extra == "dev"
32
33
  Dynamic: license-file
33
34
 
35
+ ![PyPI version](https://img.shields.io/pypi/v/zipremove.svg)
36
+ ![Python Versions](https://img.shields.io/pypi/pyversions/zipremove)
37
+ ![Status](https://img.shields.io/pypi/status/zipremove)
38
+ ![License](https://img.shields.io/github/license/danny0838/zipremove)
39
+ [![Downloads](https://static.pepy.tech/personalized-badge/zipremove?period=month&left_text=Downloads)](https://pepy.tech/project/zipremove)
40
+ [![Pull request](https://img.shields.io/github/pulls/detail/state/python/cpython/134627)](https://github.com/python/cpython/pull/134627)
41
+
34
42
  This package extends `zipfile` with `remove`-related functionalities.
35
43
 
36
44
  ## API
@@ -54,30 +62,32 @@ This package extends `zipfile` with `remove`-related functionalities.
54
62
 
55
63
  * `ZipFile.repack(removed=None, *, strict_descriptor=False[, chunk_size])`
56
64
 
57
- Rewrites the archive to remove stale local file entries, shrinking the ZIP
58
- file size.
65
+ Rewrites the archive to remove stale local file entries, shrinking its file
66
+ size.
59
67
 
60
68
  If *removed* is provided, it must be a sequence of `ZipInfo` objects
61
69
  representing removed entries; only their corresponding local file entries
62
70
  will be removed.
63
71
 
64
- If *removed* is not provided, local file entries no longer referenced in the
65
- central directory will be removed. The algorithm assumes that local file
66
- entries are stored consecutively:
72
+ If *removed* is not provided, the archive is scanned to identify and remove
73
+ local file entries that are no longer referenced in the central directory.
74
+ The algorithm assumes that local file entries (and the central directory,
75
+ which is mostly treated as the "last entry") are stored consecutively:
67
76
 
68
77
  1. Data before the first referenced entry is removed only when it appears to
69
78
  be a sequence of consecutive entries with no extra following bytes; extra
70
- preceeding bytes are preserved.
79
+ preceding bytes are preserved.
71
80
  2. Data between referenced entries is removed only when it appears to
72
81
  be a sequence of consecutive entries with no extra preceding bytes; extra
73
82
  following bytes are preserved.
74
-
75
- ``strict_descriptor=True`` can be provided to skip the slower scan for an
76
- unsigned data descriptor (deprecated in the latest ZIP specification and is
77
- only used by legacy tools) when checking for bytes resembling a valid local
78
- file entry. This improves performance, but may cause some stale local file
79
- entries to be preserved, as any entry using an unsigned descriptor cannot
80
- be detected.
83
+ 3. Entries must not overlap. If any entry's data overlaps with another, a
84
+ `BadZipFile` error is raised and no changes are made.
85
+
86
+ When scanning, setting `strict_descriptor=True` disables detection of any
87
+ entry using an unsigned data descriptor (deprecated in the ZIP specification
88
+ since version 6.3.0, released on 2006-09-29, and used only by some legacy
89
+ tools). This improves performance, but may cause some stale entries to be
90
+ preserved.
81
91
 
82
92
  *chunk_size* may be specified to control the buffer size when moving
83
93
  entry data (default is 1 MiB).
@@ -6,6 +6,7 @@ import sys
6
6
  import unittest
7
7
  import unittest.mock as mock
8
8
  import warnings
9
+ from contextlib import nullcontext
9
10
 
10
11
  import zipremove as zipfile
11
12
 
@@ -610,6 +611,32 @@ class AbstractRepackTests(RepackHelperMixin):
610
611
  with zipfile.ZipFile(TESTFN) as zh:
611
612
  self.assertIsNone(zh.testzip())
612
613
 
614
+ def test_repack_propagation(self):
615
+ """Should call internal API with adequate parameters."""
616
+ self._prepare_zip_from_test_files(TESTFN, self.test_files)
617
+
618
+ with zipfile.ZipFile(TESTFN, 'a', self.compression) as zh:
619
+ zi = zh.remove(zh.infolist()[0])
620
+ with mock.patch.object(zipfile._ZipRepacker, 'repack') as m_rp:
621
+ zh.repack()
622
+ m_rp.assert_called_once_with(zh, None)
623
+
624
+ with zipfile.ZipFile(TESTFN, 'a', self.compression) as zh:
625
+ zi = zh.remove(zh.infolist()[0])
626
+ with mock.patch.object(zipfile._ZipRepacker, 'repack') as m_rp:
627
+ zh.repack([zi])
628
+ m_rp.assert_called_once_with(zh, [zi])
629
+
630
+ with zipfile.ZipFile(TESTFN, 'a', self.compression) as zh:
631
+ with mock.patch.object(zipfile, '_ZipRepacker') as m_rp:
632
+ zh.repack()
633
+ m_rp.assert_called_once_with()
634
+
635
+ with zipfile.ZipFile(TESTFN, 'a', self.compression) as zh:
636
+ with mock.patch.object(zipfile, '_ZipRepacker') as m_rp:
637
+ zh.repack(strict_descriptor=True, chunk_size=1024)
638
+ m_rp.assert_called_once_with(strict_descriptor=True, chunk_size=1024)
639
+
613
640
  def test_repack_bytes_before_first_file(self):
614
641
  """Should preserve random bytes before the first recorded local file entry."""
615
642
  for ii in ([], [0], [0, 1], [0, 1, 2]):
@@ -847,222 +874,6 @@ class AbstractRepackTests(RepackHelperMixin):
847
874
  with zipfile.ZipFile(TESTFN) as zh:
848
875
  self.assertIsNone(zh.testzip())
849
876
 
850
- @requires_zip64fix()
851
- def test_repack_zip64(self):
852
- """Should correctly handle file entries with zip64."""
853
- for ii in ([0], [0, 1], [1], [2]):
854
- with self.subTest(remove=ii):
855
- # calculate the expected results
856
- test_files = [data for j, data in enumerate(self.test_files) if j not in ii]
857
- expected_zinfos = self._prepare_zip_from_test_files(TESTFN, test_files, force_zip64=True)
858
- expected_size = os.path.getsize(TESTFN)
859
-
860
- # do the removal and check the result
861
- self._prepare_zip_from_test_files(TESTFN, self.test_files, force_zip64=True)
862
- with zipfile.ZipFile(TESTFN, 'a', self.compression) as zh:
863
- for i in ii:
864
- zh.remove(self.test_files[i][0])
865
- zh.repack()
866
-
867
- # check infolist
868
- self.assertEqual(
869
- [ComparableZipInfo(zi) for zi in zh.infolist()],
870
- expected_zinfos,
871
- )
872
-
873
- # check file size
874
- self.assertEqual(os.path.getsize(TESTFN), expected_size)
875
-
876
- # make sure the zip file is still valid
877
- with zipfile.ZipFile(TESTFN) as zh:
878
- self.assertIsNone(zh.testzip())
879
-
880
- def test_repack_data_descriptor(self):
881
- """Should correctly handle file entries using data descriptor."""
882
- for ii in ([0], [0, 1], [1], [2]):
883
- with self.subTest(remove=ii):
884
- # calculate the expected results
885
- test_files = [data for j, data in enumerate(self.test_files) if j not in ii]
886
- with open(TESTFN, 'wb') as fh:
887
- expected_zinfos = self._prepare_zip_from_test_files(Unseekable(fh), test_files)
888
- expected_size = os.path.getsize(TESTFN)
889
-
890
- # do the removal and check the result
891
- with open(TESTFN, 'wb') as fh:
892
- self._prepare_zip_from_test_files(Unseekable(fh), self.test_files)
893
- with zipfile.ZipFile(TESTFN, 'a', self.compression) as zh:
894
- # make sure data descriptor bit is really set (by making zipfile unseekable)
895
- for zi in zh.infolist():
896
- self.assertTrue(zi.flag_bits & 8, f'data descriptor not used: {zi.filename}')
897
-
898
- for i in ii:
899
- zh.remove(self.test_files[i][0])
900
- zh.repack()
901
-
902
- # check infolist
903
- self.assertEqual(
904
- [ComparableZipInfo(zi) for zi in zh.infolist()],
905
- expected_zinfos,
906
- )
907
-
908
- # check file size
909
- self.assertEqual(os.path.getsize(TESTFN), expected_size)
910
-
911
- # make sure the zip file is still valid
912
- with zipfile.ZipFile(TESTFN) as zh:
913
- self.assertIsNone(zh.testzip())
914
-
915
- @requires_zip64fix()
916
- def test_repack_data_descriptor_and_zip64(self):
917
- """Should correctly handle file entries using data descriptor and zip64."""
918
- for ii in ([0], [0, 1], [1], [2]):
919
- with self.subTest(remove=ii):
920
- # calculate the expected results
921
- test_files = [data for j, data in enumerate(self.test_files) if j not in ii]
922
- with open(TESTFN, 'wb') as fh:
923
- expected_zinfos = self._prepare_zip_from_test_files(Unseekable(fh), test_files, force_zip64=True)
924
- expected_size = os.path.getsize(TESTFN)
925
-
926
- # do the removal and check the result
927
- with open(TESTFN, 'wb') as fh:
928
- self._prepare_zip_from_test_files(Unseekable(fh), self.test_files, force_zip64=True)
929
- with zipfile.ZipFile(TESTFN, 'a', self.compression) as zh:
930
- # make sure data descriptor bit is really set (by making zipfile unseekable)
931
- for zi in zh.infolist():
932
- self.assertTrue(zi.flag_bits & 8, f'data descriptor not used: {zi.filename}')
933
-
934
- for i in ii:
935
- zh.remove(self.test_files[i][0])
936
- zh.repack()
937
-
938
- # check infolist
939
- self.assertEqual(
940
- [ComparableZipInfo(zi) for zi in zh.infolist()],
941
- expected_zinfos,
942
- )
943
-
944
- # check file size
945
- self.assertEqual(os.path.getsize(TESTFN), expected_size)
946
-
947
- # make sure the zip file is still valid
948
- with zipfile.ZipFile(TESTFN) as zh:
949
- self.assertIsNone(zh.testzip())
950
-
951
- def test_repack_data_descriptor_no_sig(self):
952
- """Should correctly handle file entries using data descriptor without signature."""
953
- for ii in ([0], [0, 1], [1], [2]):
954
- with self.subTest(remove=ii):
955
- # calculate the expected results
956
- test_files = [data for j, data in enumerate(self.test_files) if j not in ii]
957
- with open(TESTFN, 'wb') as fh:
958
- with mock.patch.object(struct, 'pack', side_effect=struct_pack_no_dd_sig):
959
- expected_zinfos = self._prepare_zip_from_test_files(Unseekable(fh), test_files)
960
- expected_size = os.path.getsize(TESTFN)
961
-
962
- # do the removal and check the result
963
- with open(TESTFN, 'wb') as fh:
964
- with mock.patch.object(struct, 'pack', side_effect=struct_pack_no_dd_sig):
965
- self._prepare_zip_from_test_files(Unseekable(fh), self.test_files)
966
- with zipfile.ZipFile(TESTFN, 'a', self.compression) as zh:
967
- # make sure data descriptor bit is really set (by making zipfile unseekable)
968
- for zi in zh.infolist():
969
- self.assertTrue(zi.flag_bits & 8, f'data descriptor flag not set: {zi.filename}')
970
-
971
- for i in ii:
972
- zh.remove(self.test_files[i][0])
973
- zh.repack()
974
-
975
- # check infolist
976
- self.assertEqual(
977
- [ComparableZipInfo(zi) for zi in zh.infolist()],
978
- expected_zinfos,
979
- )
980
-
981
- # check file size
982
- self.assertEqual(os.path.getsize(TESTFN), expected_size)
983
-
984
- # make sure the zip file is still valid
985
- with zipfile.ZipFile(TESTFN) as zh:
986
- self.assertIsNone(zh.testzip())
987
-
988
- def test_repack_data_descriptor_no_sig_strict(self):
989
- """Should skip data descriptor without signature when `strict_descriptor` is set."""
990
- for ii in ([0], [0, 1], [1], [2]):
991
- with self.subTest(remove=ii):
992
- # calculate the expected results
993
- with open(TESTFN, 'wb') as fh:
994
- with mock.patch.object(struct, 'pack', side_effect=struct_pack_no_dd_sig):
995
- self._prepare_zip_from_test_files(Unseekable(fh), self.test_files)
996
- with zipfile.ZipFile(TESTFN, 'a') as zh:
997
- for i in ii:
998
- zh.remove(self.test_files[i][0])
999
- expected_zinfos = [ComparableZipInfo(zi) for zi in zh.infolist()]
1000
- expected_size = os.path.getsize(TESTFN)
1001
-
1002
- # do the removal and check the result
1003
- with open(TESTFN, 'wb') as fh:
1004
- with mock.patch.object(struct, 'pack', side_effect=struct_pack_no_dd_sig):
1005
- self._prepare_zip_from_test_files(Unseekable(fh), self.test_files)
1006
- with zipfile.ZipFile(TESTFN, 'a', self.compression) as zh:
1007
- # make sure data descriptor bit is really set (by making zipfile unseekable)
1008
- for zi in zh.infolist():
1009
- self.assertTrue(zi.flag_bits & 8, f'data descriptor flag not set: {zi.filename}')
1010
-
1011
- for i in ii:
1012
- zh.remove(self.test_files[i][0])
1013
- zh.repack(strict_descriptor=True)
1014
-
1015
- # check infolist
1016
- self.assertEqual(
1017
- [ComparableZipInfo(zi) for zi in zh.infolist()],
1018
- expected_zinfos,
1019
- )
1020
-
1021
- # check file size
1022
- self.assertEqual(os.path.getsize(TESTFN), expected_size)
1023
-
1024
- # make sure the zip file is still valid
1025
- with zipfile.ZipFile(TESTFN) as zh:
1026
- self.assertIsNone(zh.testzip())
1027
-
1028
- @requires_zip64fix()
1029
- def test_repack_data_descriptor_no_sig_and_zip64(self):
1030
- """Should correctly handle file entries using data descriptor without signature and zip64."""
1031
- for ii in ([0], [0, 1], [1], [2]):
1032
- with self.subTest(remove=ii):
1033
- # calculate the expected results
1034
- test_files = [data for j, data in enumerate(self.test_files) if j not in ii]
1035
- with open(TESTFN, 'wb') as fh:
1036
- with mock.patch.object(struct, 'pack', side_effect=struct_pack_no_dd_sig):
1037
- expected_zinfos = self._prepare_zip_from_test_files(Unseekable(fh), test_files, force_zip64=True)
1038
- expected_size = os.path.getsize(TESTFN)
1039
-
1040
- # do the removal and check the result
1041
- with open(TESTFN, 'wb') as fh:
1042
- with mock.patch.object(struct, 'pack', side_effect=struct_pack_no_dd_sig):
1043
- self._prepare_zip_from_test_files(Unseekable(fh), self.test_files, force_zip64=True)
1044
- with zipfile.ZipFile(TESTFN, 'a', self.compression) as zh:
1045
- # make sure data descriptor bit is really set (by making zipfile unseekable)
1046
- for zi in zh.infolist():
1047
- self.assertTrue(zi.flag_bits & 8, f'data descriptor flag not set: {zi.filename}')
1048
-
1049
- for i in ii:
1050
- zh.remove(self.test_files[i][0])
1051
- zh.repack()
1052
-
1053
- # check infolist
1054
- self.assertEqual(
1055
- [ComparableZipInfo(zi) for zi in zh.infolist()],
1056
- expected_zinfos,
1057
- )
1058
-
1059
- # check file size
1060
- self.assertEqual(os.path.getsize(TESTFN), expected_size)
1061
-
1062
- # make sure the zip file is still valid
1063
- with zipfile.ZipFile(TESTFN) as zh:
1064
- self.assertIsNone(zh.testzip())
1065
-
1066
877
  def test_repack_prepended_bytes(self):
1067
878
  for ii in ([], [0], [0, 1], [1], [2]):
1068
879
  with self.subTest(remove=ii):
@@ -1575,6 +1386,320 @@ class OtherRepackTests(unittest.TestCase):
1575
1386
  self.assertEqual(fz.read(), expected)
1576
1387
 
1577
1388
  class ZipRepackerTests(unittest.TestCase):
1389
+ def _generate_local_file_entry(self, arcname, raw_bytes,
1390
+ compression=zipfile.ZIP_STORED,
1391
+ force_zip64=False, dd=False, dd_sig=True):
1392
+ fz = io.BytesIO()
1393
+ f = Unseekable(fz) if dd else fz
1394
+ cm = (mock.patch.object(struct, 'pack', side_effect=struct_pack_no_dd_sig)
1395
+ if not dd_sig else nullcontext())
1396
+ with zipfile.ZipFile(f, 'w', compression=compression) as zh:
1397
+ with cm:
1398
+ with zh.open(arcname, 'w', force_zip64=force_zip64) as fh:
1399
+ fh.write(raw_bytes)
1400
+ fz.seek(0)
1401
+ return fz.read()
1402
+
1403
+ def test_validate_local_file_entry_stored(self):
1404
+ self._test_validate_local_file_entry(method=zipfile.ZIP_STORED)
1405
+
1406
+ @requires_zlib()
1407
+ def test_validate_local_file_entry_zlib(self):
1408
+ self._test_validate_local_file_entry(method=zipfile.ZIP_DEFLATED)
1409
+
1410
+ @requires_bz2()
1411
+ def test_validate_local_file_entry_bz2(self):
1412
+ self._test_validate_local_file_entry(method=zipfile.ZIP_BZIP2)
1413
+
1414
+ @requires_lzma()
1415
+ def test_validate_local_file_entry_lzma(self):
1416
+ self._test_validate_local_file_entry(method=zipfile.ZIP_LZMA)
1417
+
1418
+ @requires_zstd()
1419
+ def test_validate_local_file_entry_zstd(self):
1420
+ self._test_validate_local_file_entry(method=zipfile.ZIP_ZSTANDARD)
1421
+
1422
+ def _test_validate_local_file_entry(self, method):
1423
+ repacker = zipfile._ZipRepacker()
1424
+
1425
+ # basic
1426
+ bytes_ = self._generate_local_file_entry(
1427
+ 'file.txt', b'dummy', compression=method)
1428
+ fz = io.BytesIO(bytes_)
1429
+ with mock.patch.object(repacker, '_scan_data_descriptor',
1430
+ wraps=repacker._scan_data_descriptor) as m_sdd, \
1431
+ mock.patch.object(repacker, '_scan_data_descriptor_no_sig_by_decompression',
1432
+ wraps=repacker._scan_data_descriptor_no_sig_by_decompression) as m_sddnsbd, \
1433
+ mock.patch.object(repacker, '_scan_data_descriptor_no_sig',
1434
+ wraps=repacker._scan_data_descriptor_no_sig) as m_sddns:
1435
+ result = repacker._validate_local_file_entry(fz, 0, len(bytes_))
1436
+ self.assertEqual(result, len(bytes_))
1437
+ m_sdd.assert_not_called()
1438
+ m_sddnsbd.assert_not_called()
1439
+ m_sddns.assert_not_called()
1440
+
1441
+ # offset
1442
+ with mock.patch.object(repacker, '_scan_data_descriptor',
1443
+ wraps=repacker._scan_data_descriptor) as m_sdd, \
1444
+ mock.patch.object(repacker, '_scan_data_descriptor_no_sig_by_decompression',
1445
+ wraps=repacker._scan_data_descriptor_no_sig_by_decompression) as m_sddnsbd, \
1446
+ mock.patch.object(repacker, '_scan_data_descriptor_no_sig',
1447
+ wraps=repacker._scan_data_descriptor_no_sig) as m_sddns:
1448
+ result = repacker._validate_local_file_entry(fz, 0, len(bytes_) + 1)
1449
+ self.assertEqual(result, len(bytes_))
1450
+ m_sdd.assert_not_called()
1451
+ m_sddnsbd.assert_not_called()
1452
+ m_sddns.assert_not_called()
1453
+
1454
+ bytes_ = b'pre' + bytes_ + b'post'
1455
+ fz = io.BytesIO(bytes_)
1456
+ with mock.patch.object(repacker, '_scan_data_descriptor',
1457
+ wraps=repacker._scan_data_descriptor) as m_sdd, \
1458
+ mock.patch.object(repacker, '_scan_data_descriptor_no_sig_by_decompression',
1459
+ wraps=repacker._scan_data_descriptor_no_sig_by_decompression) as m_sddnsbd, \
1460
+ mock.patch.object(repacker, '_scan_data_descriptor_no_sig',
1461
+ wraps=repacker._scan_data_descriptor_no_sig) as m_sddns:
1462
+ result = repacker._validate_local_file_entry(fz, 3, len(bytes_) - 4)
1463
+ self.assertEqual(result, len(bytes_) - 7)
1464
+ m_sdd.assert_not_called()
1465
+ m_sddnsbd.assert_not_called()
1466
+ m_sddns.assert_not_called()
1467
+
1468
+ with mock.patch.object(repacker, '_scan_data_descriptor',
1469
+ wraps=repacker._scan_data_descriptor) as m_sdd, \
1470
+ mock.patch.object(repacker, '_scan_data_descriptor_no_sig_by_decompression',
1471
+ wraps=repacker._scan_data_descriptor_no_sig_by_decompression) as m_sddnsbd, \
1472
+ mock.patch.object(repacker, '_scan_data_descriptor_no_sig',
1473
+ wraps=repacker._scan_data_descriptor_no_sig) as m_sddns:
1474
+ result = repacker._validate_local_file_entry(fz, 3, len(bytes_))
1475
+ self.assertEqual(result, len(bytes_) - 7)
1476
+ m_sdd.assert_not_called()
1477
+ m_sddnsbd.assert_not_called()
1478
+ m_sddns.assert_not_called()
1479
+
1480
+ # return None if no match at given offset
1481
+ with mock.patch.object(repacker, '_scan_data_descriptor',
1482
+ wraps=repacker._scan_data_descriptor) as m_sdd, \
1483
+ mock.patch.object(repacker, '_scan_data_descriptor_no_sig_by_decompression',
1484
+ wraps=repacker._scan_data_descriptor_no_sig_by_decompression) as m_sddnsbd, \
1485
+ mock.patch.object(repacker, '_scan_data_descriptor_no_sig',
1486
+ wraps=repacker._scan_data_descriptor_no_sig) as m_sddns:
1487
+ result = repacker._validate_local_file_entry(fz, 2, len(bytes_) - 4)
1488
+ self.assertEqual(result, None)
1489
+ m_sdd.assert_not_called()
1490
+ m_sddnsbd.assert_not_called()
1491
+ m_sddns.assert_not_called()
1492
+
1493
+ with mock.patch.object(repacker, '_scan_data_descriptor',
1494
+ wraps=repacker._scan_data_descriptor) as m_sdd, \
1495
+ mock.patch.object(repacker, '_scan_data_descriptor_no_sig_by_decompression',
1496
+ wraps=repacker._scan_data_descriptor_no_sig_by_decompression) as m_sddnsbd, \
1497
+ mock.patch.object(repacker, '_scan_data_descriptor_no_sig',
1498
+ wraps=repacker._scan_data_descriptor_no_sig) as m_sddns:
1499
+ result = repacker._validate_local_file_entry(fz, 4, len(bytes_) - 4)
1500
+ self.assertEqual(result, None)
1501
+ m_sdd.assert_not_called()
1502
+ m_sddnsbd.assert_not_called()
1503
+ m_sddns.assert_not_called()
1504
+
1505
+ # return None if no sufficient header length
1506
+ bytes_ = self._generate_local_file_entry(
1507
+ 'file.txt', b'dummy', compression=method)
1508
+ bytes_ = bytes_[:29]
1509
+ fz = io.BytesIO(bytes_)
1510
+ with mock.patch.object(repacker, '_scan_data_descriptor',
1511
+ wraps=repacker._scan_data_descriptor) as m_sdd, \
1512
+ mock.patch.object(repacker, '_scan_data_descriptor_no_sig_by_decompression',
1513
+ wraps=repacker._scan_data_descriptor_no_sig_by_decompression) as m_sddnsbd, \
1514
+ mock.patch.object(repacker, '_scan_data_descriptor_no_sig',
1515
+ wraps=repacker._scan_data_descriptor_no_sig) as m_sddns:
1516
+ result = repacker._validate_local_file_entry(fz, 0, len(bytes_))
1517
+ self.assertEqual(result, None)
1518
+ m_sdd.assert_not_called()
1519
+ m_sddnsbd.assert_not_called()
1520
+ m_sddns.assert_not_called()
1521
+
1522
+ # data descriptor
1523
+ bytes_ = self._generate_local_file_entry(
1524
+ 'file.txt', b'dummy', compression=method, dd=True)
1525
+ fz = io.BytesIO(bytes_)
1526
+ with mock.patch.object(repacker, '_scan_data_descriptor',
1527
+ wraps=repacker._scan_data_descriptor) as m_sdd, \
1528
+ mock.patch.object(repacker, '_scan_data_descriptor_no_sig_by_decompression',
1529
+ wraps=repacker._scan_data_descriptor_no_sig_by_decompression) as m_sddnsbd, \
1530
+ mock.patch.object(repacker, '_scan_data_descriptor_no_sig',
1531
+ wraps=repacker._scan_data_descriptor_no_sig) as m_sddns:
1532
+ result = repacker._validate_local_file_entry(fz, 0, len(bytes_))
1533
+ self.assertEqual(result, len(bytes_))
1534
+ m_sdd.assert_called_once_with(fz, 38, len(bytes_), False)
1535
+ m_sddnsbd.assert_not_called()
1536
+ m_sddns.assert_not_called()
1537
+
1538
+ # data descriptor (unsigned)
1539
+ bytes_ = self._generate_local_file_entry(
1540
+ 'file.txt', b'dummy', compression=method, dd=True, dd_sig=False)
1541
+ fz = io.BytesIO(bytes_)
1542
+ with mock.patch.object(repacker, '_scan_data_descriptor',
1543
+ wraps=repacker._scan_data_descriptor) as m_sdd, \
1544
+ mock.patch.object(repacker, '_scan_data_descriptor_no_sig_by_decompression',
1545
+ wraps=repacker._scan_data_descriptor_no_sig_by_decompression) as m_sddnsbd, \
1546
+ mock.patch.object(repacker, '_scan_data_descriptor_no_sig',
1547
+ wraps=repacker._scan_data_descriptor_no_sig) as m_sddns:
1548
+ result = repacker._validate_local_file_entry(fz, 0, len(bytes_))
1549
+ self.assertEqual(result, len(bytes_))
1550
+ m_sdd.assert_called_once_with(fz, 38, len(bytes_), False)
1551
+ m_sddnsbd.assert_called_once_with(fz, 38, len(bytes_), False, method)
1552
+ if repacker._scan_data_descriptor_no_sig_by_decompression(fz, 38, len(bytes_), False, method):
1553
+ m_sddns.assert_not_called()
1554
+ else:
1555
+ m_sddns.assert_called_once_with(fz, 38, len(bytes_), False)
1556
+
1557
+ # return None for data descriptor (unsigned) if `strict_descriptor=True`
1558
+ repacker = zipfile._ZipRepacker(strict_descriptor=True)
1559
+ bytes_ = self._generate_local_file_entry(
1560
+ 'file.txt', b'dummy', compression=method, dd=True, dd_sig=False)
1561
+ fz = io.BytesIO(bytes_)
1562
+ with mock.patch.object(repacker, '_scan_data_descriptor',
1563
+ wraps=repacker._scan_data_descriptor) as m_sdd, \
1564
+ mock.patch.object(repacker, '_scan_data_descriptor_no_sig_by_decompression',
1565
+ wraps=repacker._scan_data_descriptor_no_sig_by_decompression) as m_sddnsbd, \
1566
+ mock.patch.object(repacker, '_scan_data_descriptor_no_sig',
1567
+ wraps=repacker._scan_data_descriptor_no_sig) as m_sddns:
1568
+ result = repacker._validate_local_file_entry(fz, 0, len(bytes_))
1569
+ self.assertEqual(result, None)
1570
+ m_sdd.assert_called_once_with(fz, 38, len(bytes_), False)
1571
+ m_sddnsbd.assert_not_called()
1572
+ m_sddns.assert_not_called()
1573
+
1574
+ @requires_zip64fix()
1575
+ def test_validate_local_file_entry_zip64_stored(self):
1576
+ self._test_validate_local_file_entry_zip64(method=zipfile.ZIP_STORED)
1577
+
1578
+ @requires_zip64fix()
1579
+ @requires_zlib()
1580
+ def test_validate_local_file_entry_zip64_zlib(self):
1581
+ self._test_validate_local_file_entry_zip64(method=zipfile.ZIP_DEFLATED)
1582
+
1583
+ @requires_zip64fix()
1584
+ @requires_bz2()
1585
+ def test_validate_local_file_entry_zip64_bz2(self):
1586
+ self._test_validate_local_file_entry_zip64(method=zipfile.ZIP_BZIP2)
1587
+
1588
+ @requires_zip64fix()
1589
+ @requires_lzma()
1590
+ def test_validate_local_file_entry_zip64_lzma(self):
1591
+ self._test_validate_local_file_entry_zip64(method=zipfile.ZIP_LZMA)
1592
+
1593
+ @requires_zip64fix()
1594
+ @requires_zstd()
1595
+ def test_validate_local_file_entry_zip64_zstd(self):
1596
+ self._test_validate_local_file_entry_zip64(method=zipfile.ZIP_ZSTANDARD)
1597
+
1598
+ def _test_validate_local_file_entry_zip64(self, method):
1599
+ repacker = zipfile._ZipRepacker()
1600
+
1601
+ # zip64
1602
+ bytes_ = self._generate_local_file_entry(
1603
+ 'file.txt', b'dummy', compression=method, force_zip64=True)
1604
+ fz = io.BytesIO(bytes_)
1605
+ with mock.patch.object(repacker, '_scan_data_descriptor',
1606
+ wraps=repacker._scan_data_descriptor) as m_sdd, \
1607
+ mock.patch.object(repacker, '_scan_data_descriptor_no_sig_by_decompression',
1608
+ wraps=repacker._scan_data_descriptor_no_sig_by_decompression) as m_sddnsbd, \
1609
+ mock.patch.object(repacker, '_scan_data_descriptor_no_sig',
1610
+ wraps=repacker._scan_data_descriptor_no_sig) as m_sddns:
1611
+ result = repacker._validate_local_file_entry(fz, 0, len(bytes_))
1612
+ self.assertEqual(result, len(bytes_))
1613
+ m_sdd.assert_not_called()
1614
+ m_sddnsbd.assert_not_called()
1615
+ m_sddns.assert_not_called()
1616
+
1617
+ # data descriptor + zip64
1618
+ bytes_ = self._generate_local_file_entry(
1619
+ 'file.txt', b'dummy', compression=method, force_zip64=True, dd=True)
1620
+ fz = io.BytesIO(bytes_)
1621
+ with mock.patch.object(repacker, '_scan_data_descriptor',
1622
+ wraps=repacker._scan_data_descriptor) as m_sdd, \
1623
+ mock.patch.object(repacker, '_scan_data_descriptor_no_sig_by_decompression',
1624
+ wraps=repacker._scan_data_descriptor_no_sig_by_decompression) as m_sddnsbd, \
1625
+ mock.patch.object(repacker, '_scan_data_descriptor_no_sig',
1626
+ wraps=repacker._scan_data_descriptor_no_sig) as m_sddns:
1627
+ result = repacker._validate_local_file_entry(fz, 0, len(bytes_))
1628
+ self.assertEqual(result, len(bytes_))
1629
+ m_sdd.assert_called_once_with(fz, 58, len(bytes_), True)
1630
+ m_sddnsbd.assert_not_called()
1631
+ m_sddns.assert_not_called()
1632
+
1633
+ # data descriptor (unsigned) + zip64
1634
+ bytes_ = self._generate_local_file_entry(
1635
+ 'file.txt', b'dummy', compression=method, force_zip64=True, dd=True, dd_sig=False)
1636
+ fz = io.BytesIO(bytes_)
1637
+ with mock.patch.object(repacker, '_scan_data_descriptor',
1638
+ wraps=repacker._scan_data_descriptor) as m_sdd, \
1639
+ mock.patch.object(repacker, '_scan_data_descriptor_no_sig_by_decompression',
1640
+ wraps=repacker._scan_data_descriptor_no_sig_by_decompression) as m_sddnsbd, \
1641
+ mock.patch.object(repacker, '_scan_data_descriptor_no_sig',
1642
+ wraps=repacker._scan_data_descriptor_no_sig) as m_sddns:
1643
+ result = repacker._validate_local_file_entry(fz, 0, len(bytes_))
1644
+ self.assertEqual(result, len(bytes_))
1645
+ m_sdd.assert_called_once_with(fz, 58, len(bytes_), True)
1646
+ m_sddnsbd.assert_called_once_with(fz, 58, len(bytes_), True, method)
1647
+ if repacker._scan_data_descriptor_no_sig_by_decompression(fz, 58, len(bytes_), True, method):
1648
+ m_sddns.assert_not_called()
1649
+ else:
1650
+ m_sddns.assert_called_once_with(fz, 58, len(bytes_), True)
1651
+
1652
+ # return None for data descriptor (unsigned) if `strict_descriptor=True`
1653
+ repacker = zipfile._ZipRepacker(strict_descriptor=True)
1654
+ bytes_ = self._generate_local_file_entry(
1655
+ 'file.txt', b'dummy', compression=method, force_zip64=True, dd=True, dd_sig=False)
1656
+ fz = io.BytesIO(bytes_)
1657
+ with mock.patch.object(repacker, '_scan_data_descriptor',
1658
+ wraps=repacker._scan_data_descriptor) as m_sdd, \
1659
+ mock.patch.object(repacker, '_scan_data_descriptor_no_sig_by_decompression',
1660
+ wraps=repacker._scan_data_descriptor_no_sig_by_decompression) as m_sddnsbd, \
1661
+ mock.patch.object(repacker, '_scan_data_descriptor_no_sig',
1662
+ wraps=repacker._scan_data_descriptor_no_sig) as m_sddns:
1663
+ result = repacker._validate_local_file_entry(fz, 0, len(bytes_))
1664
+ self.assertEqual(result, None)
1665
+ m_sdd.assert_called_once_with(fz, 58, len(bytes_), True)
1666
+ m_sddnsbd.assert_not_called()
1667
+ m_sddns.assert_not_called()
1668
+
1669
+ def test_validate_local_file_entry_encrypted(self):
1670
+ repacker = zipfile._ZipRepacker()
1671
+
1672
+ bytes_ = (
1673
+ b'PK\x03\x04'
1674
+ b'\x14\x00'
1675
+ b'\x09\x00'
1676
+ b'\x08\x00'
1677
+ b'\xAB\x28'
1678
+ b'\xD2\x5A'
1679
+ b'\x00\x00\x00\x00'
1680
+ b'\x00\x00\x00\x00'
1681
+ b'\x00\x00\x00\x00'
1682
+ b'\x08\x00'
1683
+ b'\x00\x00'
1684
+ b'file.txt'
1685
+ b'\x97\xF1\x83\x34\x9D\xC4\x8C\xD3\xED\x79\x8C\xA2\xBB\x49\xFF\x1B\x89'
1686
+ b'\x3F\xF2\xF4\x4F'
1687
+ b'\x11\x00\x00\x00'
1688
+ b'\x05\x00\x00\x00'
1689
+ )
1690
+ fz = io.BytesIO(bytes_)
1691
+ with mock.patch.object(repacker, '_scan_data_descriptor',
1692
+ wraps=repacker._scan_data_descriptor) as m_sdd, \
1693
+ mock.patch.object(repacker, '_scan_data_descriptor_no_sig_by_decompression',
1694
+ wraps=repacker._scan_data_descriptor_no_sig_by_decompression) as m_sddnsbd, \
1695
+ mock.patch.object(repacker, '_scan_data_descriptor_no_sig',
1696
+ wraps=repacker._scan_data_descriptor_no_sig) as m_sddns:
1697
+ result = repacker._validate_local_file_entry(fz, 0, len(bytes_))
1698
+ self.assertEqual(result, len(bytes_))
1699
+ m_sdd.assert_called_once_with(fz, 38, len(bytes_), False)
1700
+ m_sddnsbd.assert_not_called()
1701
+ m_sddns.assert_called_once_with(fz, 38, len(bytes_), False)
1702
+
1578
1703
  def test_iter_scan_signature(self):
1579
1704
  bytes_ = b'sig__sig__sig__sig'
1580
1705
  ln = len(bytes_)
@@ -1624,134 +1749,176 @@ class ZipRepackerTests(unittest.TestCase):
1624
1749
  def test_scan_data_descriptor(self):
1625
1750
  repacker = zipfile._ZipRepacker()
1626
1751
 
1752
+ sig = zipfile._DD_SIGNATURE
1753
+ raw_bytes = comp_bytes = b'dummy'
1754
+ raw_len = comp_len = len(raw_bytes)
1755
+ raw_crc = zipfile.crc32(raw_bytes)
1756
+
1627
1757
  # basic
1628
- bytes_ = b'dummyPK\x07\x08\x3f\xf2\xf4\x4f\x05\x00\x00\x00\x05\x00\x00\x00'
1758
+ bytes_ = comp_bytes + struct.pack('<4L', sig, raw_crc, comp_len, raw_len)
1629
1759
  self.assertEqual(
1630
1760
  repacker._scan_data_descriptor(io.BytesIO(bytes_), 0, len(bytes_), False),
1631
- (0x4ff4f23f, 5, 5, 16),
1761
+ (raw_crc, comp_len, raw_len, 16),
1632
1762
  )
1633
1763
 
1634
1764
  # return None if no signature
1635
- bytes_ = b'dummy\x3f\xf2\xf4\x4f\x05\x00\x00\x00\x05\x00\x00\x00'
1765
+ bytes_ = comp_bytes + struct.pack('<3L', raw_crc, comp_len, raw_len)
1636
1766
  self.assertEqual(
1637
1767
  repacker._scan_data_descriptor(io.BytesIO(bytes_), 0, len(bytes_), False),
1638
1768
  None,
1639
1769
  )
1640
1770
 
1641
- # return None if not unpackable
1642
- bytes_ = b'PK\x07\x08'
1771
+ # return None if compressed size not match
1772
+ bytes_ = comp_bytes + struct.pack('<4L', sig, raw_crc, comp_len + 1, raw_len)
1643
1773
  self.assertEqual(
1644
1774
  repacker._scan_data_descriptor(io.BytesIO(bytes_), 0, len(bytes_), False),
1645
1775
  None,
1646
1776
  )
1647
1777
 
1648
- # return None if compressed size not match
1649
- bytes_ = b'dummPK\x07\x08\x3f\xf2\xf4\x4f\x05\x00\x00\x00\x05\x00\x00\x00'
1778
+ bytes_ = comp_bytes + struct.pack('<4L', sig, raw_crc, comp_len - 1, raw_len)
1779
+ self.assertEqual(
1780
+ repacker._scan_data_descriptor(io.BytesIO(bytes_), 0, len(bytes_), False),
1781
+ None,
1782
+ )
1783
+
1784
+ bytes_ = b'1' + comp_bytes + struct.pack('<4L', sig, raw_crc, comp_len, raw_len)
1785
+ self.assertEqual(
1786
+ repacker._scan_data_descriptor(io.BytesIO(bytes_), 0, len(bytes_), False),
1787
+ None,
1788
+ )
1789
+
1790
+ bytes_ = comp_bytes[1:] + struct.pack('<4L', sig, raw_crc, comp_len, raw_len)
1650
1791
  self.assertEqual(
1651
1792
  repacker._scan_data_descriptor(io.BytesIO(bytes_), 0, len(bytes_), False),
1652
1793
  None,
1653
1794
  )
1654
1795
 
1655
1796
  # zip64
1656
- bytes_ = b'dummyPK\x07\x08\x3f\xf2\xf4\x4f\x05\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00'
1797
+ bytes_ = comp_bytes + struct.pack('<2L2Q', sig, raw_crc, comp_len, raw_len)
1657
1798
  self.assertEqual(
1658
1799
  repacker._scan_data_descriptor(io.BytesIO(bytes_), 0, len(bytes_), True),
1659
- (0x4ff4f23f, 5, 5, 24),
1800
+ (raw_crc, comp_len, raw_len, 24),
1660
1801
  )
1661
1802
 
1662
1803
  # offset
1663
- bytes_ = b'dummyPK\x07\x08\x3f\xf2\xf4\x4f\x05\x00\x00\x00\x05\x00\x00\x00'
1804
+ bytes_ = comp_bytes + struct.pack('<4L', sig, raw_crc, comp_len, raw_len)
1664
1805
  self.assertEqual(
1665
1806
  repacker._scan_data_descriptor(io.BytesIO(bytes_), 1, len(bytes_), False),
1666
1807
  None,
1667
1808
  )
1668
1809
 
1669
- bytes_ = b'123dummyPK\x07\x08\x3f\xf2\xf4\x4f\x05\x00\x00\x00\x05\x00\x00\x00'
1810
+ bytes_ = b'123' + comp_bytes + struct.pack('<4L', sig, raw_crc, comp_len, raw_len)
1670
1811
  self.assertEqual(
1671
1812
  repacker._scan_data_descriptor(io.BytesIO(bytes_), 0, len(bytes_), False),
1672
1813
  None,
1673
1814
  )
1674
1815
  self.assertEqual(
1675
1816
  repacker._scan_data_descriptor(io.BytesIO(bytes_), 3, len(bytes_), False),
1676
- (0x4ff4f23f, 5, 5, 16),
1817
+ (raw_crc, comp_len, raw_len, 16),
1677
1818
  )
1678
1819
 
1679
1820
  # end_offset
1680
- bytes_ = b'dummyPK\x07\x08\x3f\xf2\xf4\x4f\x05\x00\x00\x00\x05\x00\x00\x00'
1821
+ bytes_ = comp_bytes + struct.pack('<4L', sig, raw_crc, comp_len, raw_len)
1681
1822
  self.assertEqual(
1682
1823
  repacker._scan_data_descriptor(io.BytesIO(bytes_), 0, len(bytes_) - 1, False),
1683
1824
  None,
1684
1825
  )
1685
1826
 
1686
- bytes_ = b'dummyPK\x07\x08\x3f\xf2\xf4\x4f\x05\x00\x00\x00\x05\x00\x00\x00123'
1827
+ bytes_ = comp_bytes + struct.pack('<4L', sig, raw_crc, comp_len, raw_len) + b'123'
1687
1828
  self.assertEqual(
1688
1829
  repacker._scan_data_descriptor(io.BytesIO(bytes_), 0, len(bytes_) - 3, False),
1689
- (0x4ff4f23f, 5, 5, 16),
1830
+ (raw_crc, comp_len, raw_len, 16),
1831
+ )
1832
+ self.assertEqual(
1833
+ repacker._scan_data_descriptor(io.BytesIO(bytes_), 0, len(bytes_), False),
1834
+ (raw_crc, comp_len, raw_len, 16),
1690
1835
  )
1691
1836
 
1692
1837
  def test_scan_data_descriptor_no_sig(self):
1693
1838
  repacker = zipfile._ZipRepacker()
1694
1839
 
1840
+ raw_bytes = comp_bytes = b'dummy'
1841
+ raw_len = comp_len = len(raw_bytes)
1842
+ raw_crc = zipfile.crc32(raw_bytes)
1843
+
1695
1844
  # basic
1696
- bytes_ = b'dummy\x3f\xf2\xf4\x4f\x05\x00\x00\x00\x05\x00\x00\x00'
1845
+ bytes_ = comp_bytes + struct.pack('<3L', raw_crc, comp_len, raw_len)
1697
1846
  self.assertEqual(
1698
1847
  repacker._scan_data_descriptor_no_sig(io.BytesIO(bytes_), 0, len(bytes_), False),
1699
- (0x4ff4f23f, 5, 5, 12),
1848
+ (raw_crc, comp_len, raw_len, 12),
1700
1849
  )
1701
1850
 
1702
1851
  # return None if compressed size not match
1703
- bytes_ = b'dumm\x3f\xf2\xf4\x4f\x05\x00\x00\x00\x05\x00\x00\x00'
1852
+ bytes_ = comp_bytes + struct.pack('<3L', raw_crc, comp_len + 1, raw_len)
1704
1853
  self.assertEqual(
1705
1854
  repacker._scan_data_descriptor_no_sig(io.BytesIO(bytes_), 0, len(bytes_), False),
1706
1855
  None,
1707
1856
  )
1708
1857
 
1709
- # zip64
1710
- bytes_ = b'dummy\x3f\xf2\xf4\x4f\x05\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00'
1858
+ bytes_ = comp_bytes + struct.pack('<3L', raw_crc, comp_len - 1, raw_len)
1711
1859
  self.assertEqual(
1712
- repacker._scan_data_descriptor_no_sig(io.BytesIO(bytes_), 0, len(bytes_), True),
1713
- (0x4ff4f23f, 5, 5, 20),
1860
+ repacker._scan_data_descriptor_no_sig(io.BytesIO(bytes_), 0, len(bytes_), False),
1861
+ None,
1714
1862
  )
1715
1863
 
1716
- # offset
1717
- bytes_ = b'dummy\x3f\xf2\xf4\x4f\x05\x00\x00\x00\x05\x00\x00\x00'
1864
+ bytes_ = b'1' + comp_bytes + struct.pack('<3L', raw_crc, comp_len, raw_len)
1718
1865
  self.assertEqual(
1719
- repacker._scan_data_descriptor_no_sig(io.BytesIO(bytes_), 1, len(bytes_), False),
1866
+ repacker._scan_data_descriptor_no_sig(io.BytesIO(bytes_), 0, len(bytes_), False),
1720
1867
  None,
1721
1868
  )
1722
1869
 
1723
- bytes_ = b'123dummy\x3f\xf2\xf4\x4f\x05\x00\x00\x00\x05\x00\x00\x00'
1870
+ bytes_ = comp_bytes[1:] + struct.pack('<3L', raw_crc, comp_len, raw_len)
1724
1871
  self.assertEqual(
1725
1872
  repacker._scan_data_descriptor_no_sig(io.BytesIO(bytes_), 0, len(bytes_), False),
1726
1873
  None,
1727
1874
  )
1875
+
1876
+ # zip64
1877
+ bytes_ = comp_bytes + struct.pack('<L2Q', raw_crc, comp_len, raw_len)
1878
+ self.assertEqual(
1879
+ repacker._scan_data_descriptor_no_sig(io.BytesIO(bytes_), 0, len(bytes_), True),
1880
+ (raw_crc, comp_len, raw_len, 20),
1881
+ )
1882
+
1883
+ # offset
1884
+ bytes_ = comp_bytes + struct.pack('<3L', raw_crc, comp_len, raw_len)
1885
+ self.assertEqual(
1886
+ repacker._scan_data_descriptor_no_sig(io.BytesIO(bytes_), 1, len(bytes_), False),
1887
+ None,
1888
+ )
1889
+
1890
+ bytes_ = b'123' + comp_bytes + struct.pack('<3L', raw_crc, comp_len, raw_len)
1728
1891
  self.assertEqual(
1729
1892
  repacker._scan_data_descriptor_no_sig(io.BytesIO(bytes_), 3, len(bytes_), False),
1730
- (0x4ff4f23f, 5, 5, 12),
1893
+ (raw_crc, comp_len, raw_len, 12),
1731
1894
  )
1732
1895
 
1733
1896
  # end_offset
1734
- bytes_ = b'dummy\x3f\xf2\xf4\x4f\x05\x00\x00\x00\x05\x00\x00\x00'
1897
+ bytes_ = comp_bytes + struct.pack('<3L', raw_crc, comp_len, raw_len)
1735
1898
  self.assertEqual(
1736
1899
  repacker._scan_data_descriptor_no_sig(io.BytesIO(bytes_), 0, len(bytes_) - 1, False),
1737
1900
  None,
1738
1901
  )
1739
1902
 
1740
- bytes_ = b'dummy\x3f\xf2\xf4\x4f\x05\x00\x00\x00\x05\x00\x00\x00123'
1903
+ bytes_ = comp_bytes + struct.pack('<3L', raw_crc, comp_len, raw_len) + b'123'
1741
1904
  self.assertEqual(
1742
1905
  repacker._scan_data_descriptor_no_sig(io.BytesIO(bytes_), 0, len(bytes_) - 3, False),
1743
- (0x4ff4f23f, 5, 5, 12),
1906
+ (raw_crc, comp_len, raw_len, 12),
1907
+ )
1908
+ self.assertEqual(
1909
+ repacker._scan_data_descriptor_no_sig(io.BytesIO(bytes_), 0, len(bytes_), False),
1910
+ (raw_crc, comp_len, raw_len, 12),
1744
1911
  )
1745
1912
 
1746
1913
  # chunk_size
1747
- bytes_ = b'dummy\x3f\xf2\xf4\x4f\x05\x00\x00\x00\x05\x00\x00\x00'
1914
+ bytes_ = comp_bytes + struct.pack('<3L', raw_crc, comp_len, raw_len)
1748
1915
  self.assertEqual(
1749
1916
  repacker._scan_data_descriptor_no_sig(io.BytesIO(bytes_), 0, len(bytes_), False, 12),
1750
- (0x4ff4f23f, 5, 5, 12),
1917
+ (raw_crc, comp_len, raw_len, 12),
1751
1918
  )
1752
1919
  self.assertEqual(
1753
1920
  repacker._scan_data_descriptor_no_sig(io.BytesIO(bytes_), 0, len(bytes_), False, 1),
1754
- (0x4ff4f23f, 5, 5, 12),
1921
+ (raw_crc, comp_len, raw_len, 12),
1755
1922
  )
1756
1923
 
1757
1924
  def test_scan_data_descriptor_no_sig_by_decompression_stored(self):
@@ -1767,7 +1934,7 @@ class ZipRepackerTests(unittest.TestCase):
1767
1934
 
1768
1935
  @requires_lzma()
1769
1936
  def test_scan_data_descriptor_no_sig_by_decompression_lzma(self):
1770
- self._test_scan_data_descriptor_no_sig_by_decompression_invalid(zipfile.ZIP_LZMA)
1937
+ self._test_scan_data_descriptor_no_sig_by_decompression(zipfile.ZIP_LZMA)
1771
1938
 
1772
1939
  @requires_zstd()
1773
1940
  def test_scan_data_descriptor_no_sig_by_decompression_zstd(self):
@@ -1780,37 +1947,40 @@ class ZipRepackerTests(unittest.TestCase):
1780
1947
  def _test_scan_data_descriptor_no_sig_by_decompression(self, method):
1781
1948
  repacker = zipfile._ZipRepacker()
1782
1949
 
1783
- compressor = zipfile._get_compressor(method)
1950
+ raw_bytes = b'dummy'
1951
+ raw_len = len(raw_bytes)
1952
+ raw_crc = zipfile.crc32(raw_bytes)
1784
1953
 
1785
- comp_bytes = compressor.compress(b'dummy')
1954
+ compressor = zipfile._get_compressor(method)
1955
+ comp_bytes = compressor.compress(raw_bytes)
1786
1956
  comp_bytes += compressor.flush()
1787
1957
  comp_len = len(comp_bytes)
1788
1958
 
1789
1959
  # basic
1790
- bytes_ = comp_bytes + b'\x3f\xf2\xf4\x4f' + struct.pack('<L', comp_len) + b'\x05\x00\x00\x00'
1960
+ bytes_ = comp_bytes + struct.pack('<3L', raw_crc, comp_len, raw_len)
1791
1961
  self.assertEqual(
1792
1962
  repacker._scan_data_descriptor_no_sig_by_decompression(
1793
1963
  io.BytesIO(bytes_), 0, len(bytes_), False, method),
1794
- (0x4ff4f23f, comp_len, 5, 12),
1964
+ (raw_crc, comp_len, raw_len, 12),
1795
1965
  )
1796
1966
 
1797
- # return None if insufficient data length
1798
- bytes_ = b'\x3f\xf2\xf4\x4f\x05\x00\x00\x00\x05\x00\x00\x00'
1967
+ # return None if data length < DD signature
1968
+ bytes_ = comp_bytes + struct.pack('<3L', raw_crc, comp_len, raw_len)
1799
1969
  self.assertEqual(
1800
1970
  repacker._scan_data_descriptor_no_sig_by_decompression(
1801
- io.BytesIO(bytes_), 0, len(bytes_) - 1, False, method),
1971
+ io.BytesIO(bytes_), 0, 11, False, method),
1802
1972
  None,
1803
1973
  )
1804
1974
 
1805
- bytes_ = b'\x3f\xf2\xf4\x4f\x05\x00\x00\x00\x05\x00\x00'
1975
+ # return None if compressed size not match
1976
+ bytes_ = comp_bytes + struct.pack('<3L', raw_crc, comp_len + 1, raw_len)
1806
1977
  self.assertEqual(
1807
1978
  repacker._scan_data_descriptor_no_sig_by_decompression(
1808
- io.BytesIO(bytes_), 0, len(bytes_) + 1, False, method),
1979
+ io.BytesIO(bytes_), 0, len(bytes_), False, method),
1809
1980
  None,
1810
1981
  )
1811
1982
 
1812
- # return None if compressed size not match
1813
- bytes_ = comp_bytes + b'\x3f\xf2\xf4\x4f' + struct.pack('<L', comp_len - 1) + b'\x05\x00\x00\x00'
1983
+ bytes_ = comp_bytes + struct.pack('<3L', raw_crc, comp_len - 1, raw_len)
1814
1984
  self.assertEqual(
1815
1985
  repacker._scan_data_descriptor_no_sig_by_decompression(
1816
1986
  io.BytesIO(bytes_), 0, len(bytes_), False, method),
@@ -1818,41 +1988,46 @@ class ZipRepackerTests(unittest.TestCase):
1818
1988
  )
1819
1989
 
1820
1990
  # zip64
1821
- bytes_ = comp_bytes + b'\x3f\xf2\xf4\x4f' + struct.pack('<Q', comp_len) + b'\x05\x00\x00\x00\x00\x00\x00\x00'
1991
+ bytes_ = comp_bytes + struct.pack('<L2Q', raw_crc, comp_len, raw_len)
1822
1992
  self.assertEqual(
1823
1993
  repacker._scan_data_descriptor_no_sig_by_decompression(
1824
1994
  io.BytesIO(bytes_), 0, len(bytes_), True, method),
1825
- (0x4ff4f23f, comp_len, 5, 20),
1995
+ (raw_crc, comp_len, raw_len, 20),
1826
1996
  )
1827
1997
 
1828
1998
  # offset
1829
- bytes_ = comp_bytes + b'\x3f\xf2\xf4\x4f' + struct.pack('<L', comp_len) + b'\x05\x00\x00\x00'
1999
+ bytes_ = comp_bytes + struct.pack('<3L', raw_crc, comp_len, raw_len)
1830
2000
  self.assertEqual(
1831
2001
  repacker._scan_data_descriptor_no_sig_by_decompression(
1832
2002
  io.BytesIO(bytes_), 1, len(bytes_), False, method),
1833
2003
  None,
1834
2004
  )
1835
2005
 
1836
- bytes_ = b'123' + comp_bytes + b'\x3f\xf2\xf4\x4f' + struct.pack('<L', comp_len) + b'\x05\x00\x00\x00'
2006
+ bytes_ = b'123' + comp_bytes + struct.pack('<3L', raw_crc, comp_len, raw_len)
1837
2007
  self.assertEqual(
1838
2008
  repacker._scan_data_descriptor_no_sig_by_decompression(
1839
2009
  io.BytesIO(bytes_), 3, len(bytes_), False, method),
1840
- (0x4ff4f23f, comp_len, 5, 12),
2010
+ (raw_crc, comp_len, raw_len, 12),
1841
2011
  )
1842
2012
 
1843
2013
  # end_offset
1844
- bytes_ = comp_bytes + b'\x3f\xf2\xf4\x4f' + struct.pack('<L', comp_len) + b'\x05\x00\x00\x00'
2014
+ bytes_ = comp_bytes + struct.pack('<3L', raw_crc, comp_len, raw_len)
1845
2015
  self.assertEqual(
1846
2016
  repacker._scan_data_descriptor_no_sig_by_decompression(
1847
- io.BytesIO(bytes_), 0, len(bytes_) - 2, False, method),
2017
+ io.BytesIO(bytes_), 0, len(bytes_) - 1, False, method),
1848
2018
  None,
1849
2019
  )
1850
2020
 
1851
- bytes_ = comp_bytes + b'\x3f\xf2\xf4\x4f' + struct.pack('<L', comp_len) + b'\x05\x00\x00\x00123'
2021
+ bytes_ = comp_bytes + struct.pack('<3L', raw_crc, comp_len, raw_len) + b'123'
1852
2022
  self.assertEqual(
1853
2023
  repacker._scan_data_descriptor_no_sig_by_decompression(
1854
- io.BytesIO(bytes_), 0, len(bytes_) - 2, False, method),
1855
- (0x4ff4f23f, comp_len, 5, 12),
2024
+ io.BytesIO(bytes_), 0, len(bytes_) - 3, False, method),
2025
+ (raw_crc, comp_len, raw_len, 12),
2026
+ )
2027
+ self.assertEqual(
2028
+ repacker._scan_data_descriptor_no_sig_by_decompression(
2029
+ io.BytesIO(bytes_), 0, len(bytes_), False, method),
2030
+ (raw_crc, comp_len, raw_len, 12),
1856
2031
  )
1857
2032
 
1858
2033
  def _test_scan_data_descriptor_no_sig_by_decompression_invalid(self, method):
@@ -13,10 +13,10 @@ from .test_zipfile import struct_pack_no_dd_sig
13
13
 
14
14
  # polyfills
15
15
  try:
16
- from test.test_zipfile.test_core import Unseekable
16
+ from test.test_zipfile.test_core import Unseekable, requires_zlib
17
17
  except ImportError:
18
18
  # polyfill for Python < 3.12
19
- from test.test_zipfile import Unseekable
19
+ from test.test_zipfile import Unseekable, requires_zlib
20
20
 
21
21
  ENABLED_RESOURCES = set(os.environ.get("TEST_RESOURCES", "").split(","))
22
22
 
@@ -172,5 +172,39 @@ class TestRepack(unittest.TestCase):
172
172
  zh.repack()
173
173
  self.assertIsNone(zh.testzip())
174
174
 
175
+ @requires_zlib()
176
+ def test_strip_removed_large_file_with_dd_no_sig_by_decompression(self):
177
+ """Should scan for the data descriptor (without signature) of a removed
178
+ large file without causing a memory issue."""
179
+ # Try the temp file. If we do TESTFN2, then it hogs
180
+ # gigabytes of disk space for the duration of the test.
181
+ with TemporaryFile() as f:
182
+ tracemalloc.start()
183
+ self._test_strip_removed_large_file_with_dd_no_sig_by_decompression(
184
+ f, zipfile.ZIP_DEFLATED)
185
+ self.assertFalse(f.closed)
186
+ current, peak = tracemalloc.get_traced_memory()
187
+ tracemalloc.stop()
188
+ self.assertLess(peak, self.allowed_memory)
189
+
190
+ def _test_strip_removed_large_file_with_dd_no_sig_by_decompression(self, f, method):
191
+ file = 'file.txt'
192
+ file1 = 'largefile.txt'
193
+ data = b'Sed ut perspiciatis unde omnis iste natus error sit voluptatem'
194
+ with mock.patch('zipfile.struct.pack', side_effect=struct_pack_no_dd_sig):
195
+ with zipfile.ZipFile(Unseekable(f), 'w', compression=method) as zh:
196
+ with zh.open(file1, 'w', force_zip64=True) as fh:
197
+ self._write_large_file(fh)
198
+ zh.writestr(file, data)
199
+
200
+ with zipfile.ZipFile(f, 'a') as zh:
201
+ # make sure data descriptor bit is really set (by making zip file unseekable)
202
+ for zi in zh.infolist():
203
+ self.assertTrue(zi.flag_bits & 8, f'data descriptor flag not set: {zi.filename}')
204
+
205
+ zh.remove(file1)
206
+ zh.repack()
207
+ self.assertIsNone(zh.testzip())
208
+
175
209
  if __name__ == "__main__":
176
210
  unittest.main()
File without changes
File without changes
File without changes