zipFly64 1.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- zipfly64-1.0.0/LICENSE +21 -0
- zipfly64-1.0.0/PKG-INFO +125 -0
- zipfly64-1.0.0/README.md +89 -0
- zipfly64-1.0.0/pyproject.toml +27 -0
- zipfly64-1.0.0/setup.cfg +4 -0
- zipfly64-1.0.0/src/zipFly/BaseFile.py +85 -0
- zipfly64-1.0.0/src/zipFly/Compressor.py +37 -0
- zipfly64-1.0.0/src/zipFly/GenFile.py +43 -0
- zipfly64-1.0.0/src/zipFly/LocalFile.py +59 -0
- zipfly64-1.0.0/src/zipFly/ZipBase.py +256 -0
- zipfly64-1.0.0/src/zipFly/ZipFly.py +110 -0
- zipfly64-1.0.0/src/zipFly/__init__.py +5 -0
- zipfly64-1.0.0/src/zipFly/consts.py +68 -0
- zipfly64-1.0.0/src/zipFly64.egg-info/PKG-INFO +125 -0
- zipfly64-1.0.0/src/zipFly64.egg-info/SOURCES.txt +16 -0
- zipfly64-1.0.0/src/zipFly64.egg-info/dependency_links.txt +1 -0
- zipfly64-1.0.0/src/zipFly64.egg-info/requires.txt +1 -0
- zipfly64-1.0.0/src/zipFly64.egg-info/top_level.txt +1 -0
zipfly64-1.0.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2024 Pamparampam
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
zipfly64-1.0.0/PKG-INFO
ADDED
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: zipFly64
|
|
3
|
+
Version: 1.0.0
|
|
4
|
+
Summary: Stream zip64 archives on the fly.
|
|
5
|
+
Author-email: Pamparampampam <reallpamparampam.pl@gmail.com>
|
|
6
|
+
License: MIT License
|
|
7
|
+
|
|
8
|
+
Copyright (c) 2024 Pamparampam
|
|
9
|
+
|
|
10
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
11
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
12
|
+
in the Software without restriction, including without limitation the rights
|
|
13
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
14
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
15
|
+
furnished to do so, subject to the following conditions:
|
|
16
|
+
|
|
17
|
+
The above copyright notice and this permission notice shall be included in all
|
|
18
|
+
copies or substantial portions of the Software.
|
|
19
|
+
|
|
20
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
21
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
22
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
23
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
24
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
25
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
26
|
+
SOFTWARE.
|
|
27
|
+
Project-URL: Homepage, https://github.com/pam-param-pam/ZipFly
|
|
28
|
+
Keywords: zip64,zip,streaming,zipfly
|
|
29
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
30
|
+
Classifier: Programming Language :: Python
|
|
31
|
+
Classifier: Programming Language :: Python :: 3
|
|
32
|
+
Requires-Python: >=3.7
|
|
33
|
+
Description-Content-Type: text/markdown
|
|
34
|
+
License-File: LICENSE
|
|
35
|
+
Requires-Dist: aiofiles>=24.1.0
|
|
36
|
+
|
|
37
|
+
# ZipFly
|
|
38
|
+
|
|
39
|
+
<a href="http://forthebadge.com/"><img src="https://forthebadge.com/images/badges/0-percent-optimized.svg" alt="forthebadge"/></a>
|
|
40
|
+
<a href="http://forthebadge.com/"><img src="https://forthebadge.com/images/badges/gluten-free.png" alt="forthebadge"/></a>
|
|
41
|
+
<a href="http://forthebadge.com/"><img src="https://web.archive.org/web/20230604002050/https://forthebadge.com/images/badges/mom-made-pizza-rolls.svg" alt="forthebadge"/></a>
|
|
42
|
+
|
|
43
|
+
<img src="https://img.shields.io/badge/ZIP64-Certified-lightGreen" alt="Build Status"/>
|
|
44
|
+
<img src="https://img.shields.io/badge/build-failing-red" alt="Build Status"/>
|
|
45
|
+
<img src="https://img.shields.io/badge/made with-hate-orange" alt="Build Status"/>
|
|
46
|
+
<img src="https://img.shields.io/badge/fuck-zip-green" alt="Build Status"/>
|
|
47
|
+
|
|
48
|
+
### ZipFly is a library for creating & streaming ZIP64 archives "on the fly"
|
|
49
|
+
|
|
50
|
+
**It allows to create/fetch file content dynamically while the archive is streamed.**
|
|
51
|
+
|
|
52
|
+
- No temporary files, data is streamed directly
|
|
53
|
+
- Ability to calculate archive size before streaming even begins
|
|
54
|
+
- Supported `deflate` compression method
|
|
55
|
+
- Small memory usage, streaming is done using yield statement
|
|
56
|
+
- Archive structure is created on the fly, and all data can be created during stream
|
|
57
|
+
- Files included into archive can be generated on the fly using Python generators
|
|
58
|
+
- **Independent of the goofy 🤮🤮 python's standard ZipFile implementation**
|
|
59
|
+
- No dependencies
|
|
60
|
+
- Automatic detection and changing of duplicate names
|
|
61
|
+
- `Zip64` format compatible files
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
This library is based upon [this library](https://github.com/kbbdy/zipstream) <sub>_(this library was a piece of work...)_<sub>
|
|
65
|
+
|
|
66
|
+
## Typical Usage
|
|
67
|
+
|
|
68
|
+
```py
|
|
69
|
+
from zipFly import ZipFly, LocalFile, consts
|
|
70
|
+
# compression_method is optional, defaults to consts.NO_COMPRESSION
|
|
71
|
+
file1 = LocalFile(file_path='files/lqbfa61deebf1.mp4', compression_method=consts.NO_COMPRESSION) # or consts.COMPRESSION_DEFLATE
|
|
72
|
+
file2 = LocalFile(file_path='public/2ae9dcd01a3aa.mp4', name="files/my_file2.mp4") # override the file name
|
|
73
|
+
file3 = LocalFile(file_path='files/4shaw1dax4da.mp4', name="my_file3.mp4") # you control the directory path by specifying it in name
|
|
74
|
+
|
|
75
|
+
files = [file1, file2, file3]
|
|
76
|
+
|
|
77
|
+
zipFly = ZipFly(files)
|
|
78
|
+
|
|
79
|
+
# save to file, or do something else with the stream() generator
|
|
80
|
+
with open("out/file.zip", 'wb') as f_out:
|
|
81
|
+
for chunk in zipFly.stream():
|
|
82
|
+
f_out.write(chunk)
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
### Supports dynamically created files
|
|
86
|
+
```py
|
|
87
|
+
from zipFly import ZipFly, GenFile, LocalFile, consts
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def file_generator():
|
|
91
|
+
yield b"uga buga"
|
|
92
|
+
yield b"a29jaGFtIGFsdGVybmF0eXdraQ=="
|
|
93
|
+
yield b"2137"
|
|
94
|
+
|
|
95
|
+
# size is optional, it allows to calculate the total size of the archive before any data is generated
|
|
96
|
+
# modification_time in epoch time, defaults to time.time()
|
|
97
|
+
file1 = GenFile(name="file.txt", generator=file_generator(), modification_time=time.time(), size=size, compression_method=consts.COMPRESSION_DEFLATE)
|
|
98
|
+
file2 = LocalFile(file_path='files/as61aade2ebfd.mp4', compression_method=consts.NO_COMPRESSION) # or consts.COMPRESSION_DEFLATE
|
|
99
|
+
|
|
100
|
+
files = [file1, file2]
|
|
101
|
+
|
|
102
|
+
zipFly = ZipFly(files)
|
|
103
|
+
archive_size = zipFly.calculate_archive_size() # raises ValueError if it can't calculate size
|
|
104
|
+
|
|
105
|
+
# for example you can set as content length in http response
|
|
106
|
+
response['Content-Length'] = archive_size
|
|
107
|
+
|
|
108
|
+
for chunk in zipFly.stream():
|
|
109
|
+
# do something
|
|
110
|
+
|
|
111
|
+
```
|
|
112
|
+
### Other
|
|
113
|
+
I created this library for my I Drive project.
|
|
114
|
+
|
|
115
|
+
If you have a different case scenario, and LocalFile and GenFile are not enough, you can extend BaseFile and everything else should work out of the box.
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
### PS
|
|
120
|
+
|
|
121
|
+
I wholeheartedly hope everyone responsible for creating ZIP documentation gets slaughtered in the most gore and painful way 😊 (in game)
|
|
122
|
+
|
|
123
|
+
(pls redo ur [docs](https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT))
|
|
124
|
+
|
|
125
|
+
|
zipfly64-1.0.0/README.md
ADDED
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
# ZipFly
|
|
2
|
+
|
|
3
|
+
<a href="http://forthebadge.com/"><img src="https://forthebadge.com/images/badges/0-percent-optimized.svg" alt="forthebadge"/></a>
|
|
4
|
+
<a href="http://forthebadge.com/"><img src="https://forthebadge.com/images/badges/gluten-free.png" alt="forthebadge"/></a>
|
|
5
|
+
<a href="http://forthebadge.com/"><img src="https://web.archive.org/web/20230604002050/https://forthebadge.com/images/badges/mom-made-pizza-rolls.svg" alt="forthebadge"/></a>
|
|
6
|
+
|
|
7
|
+
<img src="https://img.shields.io/badge/ZIP64-Certified-lightGreen" alt="Build Status"/>
|
|
8
|
+
<img src="https://img.shields.io/badge/build-failing-red" alt="Build Status"/>
|
|
9
|
+
<img src="https://img.shields.io/badge/made with-hate-orange" alt="Build Status"/>
|
|
10
|
+
<img src="https://img.shields.io/badge/fuck-zip-green" alt="Build Status"/>
|
|
11
|
+
|
|
12
|
+
### ZipFly is a library for creating & streaming ZIP64 archives "on the fly"
|
|
13
|
+
|
|
14
|
+
**It allows to create/fetch file content dynamically while the archive is streamed.**
|
|
15
|
+
|
|
16
|
+
- No temporary files, data is streamed directly
|
|
17
|
+
- Ability to calculate archive size before streaming even begins
|
|
18
|
+
- Supported `deflate` compression method
|
|
19
|
+
- Small memory usage, streaming is done using yield statement
|
|
20
|
+
- Archive structure is created on the fly, and all data can be created during stream
|
|
21
|
+
- Files included into archive can be generated on the fly using Python generators
|
|
22
|
+
- **Independent of the goofy 🤮🤮 python's standard ZipFile implementation**
|
|
23
|
+
- No dependencies
|
|
24
|
+
- Automatic detection and changing of duplicate names
|
|
25
|
+
- `Zip64` format compatible files
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
This library is based upon [this library](https://github.com/kbbdy/zipstream) <sub>_(this library was a piece of work...)_<sub>
|
|
29
|
+
|
|
30
|
+
## Typical Usage
|
|
31
|
+
|
|
32
|
+
```py
|
|
33
|
+
from zipFly import ZipFly, LocalFile, consts
|
|
34
|
+
# compression_method is optional, defaults to consts.NO_COMPRESSION
|
|
35
|
+
file1 = LocalFile(file_path='files/lqbfa61deebf1.mp4', compression_method=consts.NO_COMPRESSION) # or consts.COMPRESSION_DEFLATE
|
|
36
|
+
file2 = LocalFile(file_path='public/2ae9dcd01a3aa.mp4', name="files/my_file2.mp4") # override the file name
|
|
37
|
+
file3 = LocalFile(file_path='files/4shaw1dax4da.mp4', name="my_file3.mp4") # you control the directory path by specifying it in name
|
|
38
|
+
|
|
39
|
+
files = [file1, file2, file3]
|
|
40
|
+
|
|
41
|
+
zipFly = ZipFly(files)
|
|
42
|
+
|
|
43
|
+
# save to file, or do something else with the stream() generator
|
|
44
|
+
with open("out/file.zip", 'wb') as f_out:
|
|
45
|
+
for chunk in zipFly.stream():
|
|
46
|
+
f_out.write(chunk)
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
### Supports dynamically created files
|
|
50
|
+
```py
|
|
51
|
+
from zipFly import ZipFly, GenFile, LocalFile, consts
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def file_generator():
|
|
55
|
+
yield b"uga buga"
|
|
56
|
+
yield b"a29jaGFtIGFsdGVybmF0eXdraQ=="
|
|
57
|
+
yield b"2137"
|
|
58
|
+
|
|
59
|
+
# size is optional, it allows to calculate the total size of the archive before any data is generated
|
|
60
|
+
# modification_time in epoch time, defaults to time.time()
|
|
61
|
+
file1 = GenFile(name="file.txt", generator=file_generator(), modification_time=time.time(), size=size, compression_method=consts.COMPRESSION_DEFLATE)
|
|
62
|
+
file2 = LocalFile(file_path='files/as61aade2ebfd.mp4', compression_method=consts.NO_COMPRESSION) # or consts.COMPRESSION_DEFLATE
|
|
63
|
+
|
|
64
|
+
files = [file1, file2]
|
|
65
|
+
|
|
66
|
+
zipFly = ZipFly(files)
|
|
67
|
+
archive_size = zipFly.calculate_archive_size() # raises ValueError if it can't calculate size
|
|
68
|
+
|
|
69
|
+
# for example you can set as content length in http response
|
|
70
|
+
response['Content-Length'] = archive_size
|
|
71
|
+
|
|
72
|
+
for chunk in zipFly.stream():
|
|
73
|
+
# do something
|
|
74
|
+
|
|
75
|
+
```
|
|
76
|
+
### Other
|
|
77
|
+
I created this library for my I Drive project.
|
|
78
|
+
|
|
79
|
+
If you have a different case scenario, and LocalFile and GenFile are not enough, you can extend BaseFile and everything else should work out of the box.
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
### PS
|
|
84
|
+
|
|
85
|
+
I wholeheartedly hope everyone responsible for creating ZIP documentation gets slaughtered in the most gore and painful way 😊 (in game)
|
|
86
|
+
|
|
87
|
+
(pls redo ur [docs](https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT))
|
|
88
|
+
|
|
89
|
+
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
# pyproject.toml
|
|
2
|
+
|
|
3
|
+
[build-system]
|
|
4
|
+
requires = ["setuptools>=61.0.0", "wheel"]
|
|
5
|
+
build-backend = "setuptools.build_meta"
|
|
6
|
+
|
|
7
|
+
[project]
|
|
8
|
+
name = "zipFly64"
|
|
9
|
+
version = "1.0.0"
|
|
10
|
+
description = "Stream zip64 archives on the fly."
|
|
11
|
+
readme = "README.md"
|
|
12
|
+
authors = [{ name = "Pamparampampam", email = "reallpamparampam.pl@gmail.com" }]
|
|
13
|
+
license = { file = "LICENSE" }
|
|
14
|
+
classifiers = [
|
|
15
|
+
"License :: OSI Approved :: MIT License",
|
|
16
|
+
"Programming Language :: Python",
|
|
17
|
+
"Programming Language :: Python :: 3",
|
|
18
|
+
]
|
|
19
|
+
keywords = ["zip64", "zip", "streaming", "zipfly"]
|
|
20
|
+
dependencies = [
|
|
21
|
+
"aiofiles >= 24.1.0",
|
|
22
|
+
]
|
|
23
|
+
requires-python = ">=3.7"
|
|
24
|
+
|
|
25
|
+
[project.urls]
|
|
26
|
+
Homepage = "https://github.com/pam-param-pam/ZipFly"
|
|
27
|
+
|
zipfly64-1.0.0/setup.cfg
ADDED
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
from abc import ABC, abstractmethod
|
|
2
|
+
from typing import Generator, AsyncGenerator
|
|
3
|
+
|
|
4
|
+
from zipFly import consts
|
|
5
|
+
from zipFly.Compressor import Compressor
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class BaseFile(ABC):
|
|
9
|
+
def __init__(self, compression_method: int):
|
|
10
|
+
self.original_size = 0
|
|
11
|
+
self.compressed_size = 0
|
|
12
|
+
self.offset = 0 # Offset to local file header
|
|
13
|
+
self.crc = 0
|
|
14
|
+
self.flags = 0b00001000 # flag about using data descriptor is always on
|
|
15
|
+
self.compression_method = compression_method or consts.NO_COMPRESSION
|
|
16
|
+
|
|
17
|
+
def __str__(self):
|
|
18
|
+
return f"FILE[{self.name}]"
|
|
19
|
+
|
|
20
|
+
def generate_processed_file_data(self) -> Generator[bytes, None, None]:
|
|
21
|
+
compressor = Compressor(self)
|
|
22
|
+
|
|
23
|
+
"""
|
|
24
|
+
Generates compressed file data
|
|
25
|
+
"""
|
|
26
|
+
for chunk in self._generate_file_data():
|
|
27
|
+
chunk = compressor.process(chunk)
|
|
28
|
+
if len(chunk) > 0:
|
|
29
|
+
yield chunk
|
|
30
|
+
chunk = compressor.tail()
|
|
31
|
+
if len(chunk) > 0:
|
|
32
|
+
yield chunk
|
|
33
|
+
|
|
34
|
+
async def async_generate_processed_file_data(self) -> AsyncGenerator[bytes, None]:
|
|
35
|
+
compressor = Compressor(self)
|
|
36
|
+
|
|
37
|
+
"""
|
|
38
|
+
Generates compressed file data
|
|
39
|
+
"""
|
|
40
|
+
async for chunk in self._async_generate_file_data():
|
|
41
|
+
chunk = compressor.process(chunk)
|
|
42
|
+
if len(chunk) > 0:
|
|
43
|
+
yield chunk
|
|
44
|
+
chunk = compressor.tail()
|
|
45
|
+
if len(chunk) > 0:
|
|
46
|
+
yield chunk
|
|
47
|
+
|
|
48
|
+
def get_mod_time(self) -> int:
|
|
49
|
+
return int(self.modification_time) & 0xFFFF
|
|
50
|
+
|
|
51
|
+
def get_mod_date(self) -> int:
|
|
52
|
+
return int(self.modification_time / 86400 + 365 * 20) & 0xFFFF
|
|
53
|
+
|
|
54
|
+
@property
|
|
55
|
+
def file_path_bytes(self) -> bytes:
|
|
56
|
+
try:
|
|
57
|
+
return self.name.encode("ascii")
|
|
58
|
+
except UnicodeError:
|
|
59
|
+
self.flags |= consts.UTF8_FLAG
|
|
60
|
+
return self.name.encode("utf-8")
|
|
61
|
+
|
|
62
|
+
@abstractmethod
|
|
63
|
+
def _generate_file_data(self) -> Generator[bytes, None, None]:
|
|
64
|
+
raise NotImplementedError
|
|
65
|
+
|
|
66
|
+
@abstractmethod
|
|
67
|
+
async def _async_generate_file_data(self) -> AsyncGenerator[bytes, None]:
|
|
68
|
+
raise NotImplementedError
|
|
69
|
+
|
|
70
|
+
@abstractmethod
|
|
71
|
+
def set_file_name(self, new_name: str) -> None:
|
|
72
|
+
raise NotImplementedError
|
|
73
|
+
|
|
74
|
+
@property
|
|
75
|
+
def size(self) -> int:
|
|
76
|
+
raise NotImplementedError
|
|
77
|
+
|
|
78
|
+
@property
|
|
79
|
+
def modification_time(self) -> float:
|
|
80
|
+
raise NotImplementedError
|
|
81
|
+
|
|
82
|
+
@property
|
|
83
|
+
def name(self) -> str:
|
|
84
|
+
raise NotImplementedError
|
|
85
|
+
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
import zlib
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class Compressor:
|
|
5
|
+
def __init__(self, file):
|
|
6
|
+
self.file = file
|
|
7
|
+
|
|
8
|
+
if file.compression_method == 0:
|
|
9
|
+
self.process = self._process_through
|
|
10
|
+
self.tail = self._no_tail
|
|
11
|
+
elif file.compression_method == 8: # deflate compression
|
|
12
|
+
self.compr = zlib.compressobj(5, zlib.DEFLATED, -15)
|
|
13
|
+
self.process = self._process_deflate
|
|
14
|
+
self.tail = self._tail_deflate
|
|
15
|
+
|
|
16
|
+
# no compression
|
|
17
|
+
def _process_through(self, chunk):
|
|
18
|
+
self.file.original_size += len(chunk)
|
|
19
|
+
self.file.compressed_size += len(chunk)
|
|
20
|
+
self.file.crc = zlib.crc32(chunk, self.file.crc)
|
|
21
|
+
return chunk
|
|
22
|
+
|
|
23
|
+
def _no_tail(self):
|
|
24
|
+
return b''
|
|
25
|
+
|
|
26
|
+
# deflate compression
|
|
27
|
+
def _process_deflate(self, chunk):
|
|
28
|
+
self.file.original_size += len(chunk)
|
|
29
|
+
self.file.crc = zlib.crc32(chunk, self.file.crc)
|
|
30
|
+
chunk = self.compr.compress(chunk)
|
|
31
|
+
self.file.compressed_size += len(chunk)
|
|
32
|
+
return chunk
|
|
33
|
+
|
|
34
|
+
def _tail_deflate(self):
|
|
35
|
+
chunk = self.compr.flush(zlib.Z_FINISH)
|
|
36
|
+
self.file.compressed_size += len(chunk)
|
|
37
|
+
return chunk
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
import time
|
|
2
|
+
from typing import Generator, AsyncGenerator, Union
|
|
3
|
+
from zipFly.BaseFile import BaseFile
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class GenFile(BaseFile):
|
|
7
|
+
|
|
8
|
+
def __init__(self, name: str, generator: Union[Generator[bytes, None, None], AsyncGenerator[bytes, None]], compression_method: int = None, modification_time: float = None, size: int = None):
|
|
9
|
+
super().__init__(compression_method)
|
|
10
|
+
self._name = name
|
|
11
|
+
self.generator = generator
|
|
12
|
+
self._size = size
|
|
13
|
+
self._modification_time = modification_time if modification_time else time.time()
|
|
14
|
+
|
|
15
|
+
def _generate_file_data(self) -> Generator[bytes, None, None]:
|
|
16
|
+
if isinstance(self.generator, Generator):
|
|
17
|
+
yield from self.generator
|
|
18
|
+
else:
|
|
19
|
+
raise ValueError("self.generator must be of type Generator")
|
|
20
|
+
|
|
21
|
+
async def _async_generate_file_data(self) -> AsyncGenerator[bytes, None]:
|
|
22
|
+
if isinstance(self.generator, AsyncGenerator):
|
|
23
|
+
async for chunk in self.generator:
|
|
24
|
+
yield chunk
|
|
25
|
+
else:
|
|
26
|
+
raise ValueError("self.generator must be of type AsyncIterator")
|
|
27
|
+
|
|
28
|
+
@property
|
|
29
|
+
def name(self) -> str:
|
|
30
|
+
return self._name
|
|
31
|
+
|
|
32
|
+
@property
|
|
33
|
+
def size(self) -> int:
|
|
34
|
+
if self._size is not None:
|
|
35
|
+
return self._size
|
|
36
|
+
raise ValueError("Archive size not known before streaming. Probably GenFile() is missing size attribute.")
|
|
37
|
+
|
|
38
|
+
@property
|
|
39
|
+
def modification_time(self) -> float:
|
|
40
|
+
return self._modification_time
|
|
41
|
+
|
|
42
|
+
def set_file_name(self, new_name: str) -> None:
|
|
43
|
+
self._name = new_name
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import time
|
|
3
|
+
from typing import Generator, AsyncGenerator
|
|
4
|
+
from zipFly.BaseFile import BaseFile
|
|
5
|
+
|
|
6
|
+
import aiofiles
|
|
7
|
+
|
|
8
|
+
class LocalFile(BaseFile):
|
|
9
|
+
|
|
10
|
+
async def _async_generate_file_data(self) -> AsyncGenerator[bytes, None]:
|
|
11
|
+
|
|
12
|
+
async with aiofiles.open(self._file_path, "rb") as fh:
|
|
13
|
+
while True:
|
|
14
|
+
part = await fh.read(self.chunk_size)
|
|
15
|
+
if not part:
|
|
16
|
+
break
|
|
17
|
+
yield part
|
|
18
|
+
|
|
19
|
+
def __init__(self, file_path: str, name: str = None, compression_method: int = None):
|
|
20
|
+
if not os.path.isfile(file_path):
|
|
21
|
+
raise ValueError(f"{file_path} is not a correct file path.")
|
|
22
|
+
self._file_path = file_path
|
|
23
|
+
self.chunk_size = 1048
|
|
24
|
+
self._name = name if name else file_path
|
|
25
|
+
super().__init__(compression_method)
|
|
26
|
+
|
|
27
|
+
def _generate_file_data(self) -> Generator[bytes, None, None]:
|
|
28
|
+
with open(self._file_path, 'rb') as file:
|
|
29
|
+
while True:
|
|
30
|
+
chunk = file.read(self.chunk_size)
|
|
31
|
+
if not chunk:
|
|
32
|
+
break
|
|
33
|
+
yield chunk
|
|
34
|
+
|
|
35
|
+
@property
|
|
36
|
+
def name(self) -> str:
|
|
37
|
+
return self._name
|
|
38
|
+
|
|
39
|
+
@property
|
|
40
|
+
def size(self) -> int:
|
|
41
|
+
return os.path.getsize(self._file_path)
|
|
42
|
+
|
|
43
|
+
@property
|
|
44
|
+
def modification_time(self) -> float:
|
|
45
|
+
return os.path.getmtime(self._file_path)
|
|
46
|
+
|
|
47
|
+
def get_mod_time(self) -> int:
|
|
48
|
+
# Extract hours, minutes, and seconds from the modification time
|
|
49
|
+
t = time.localtime(self.modification_time)
|
|
50
|
+
return ((t.tm_hour << 11) | (t.tm_min << 5) | (t.tm_sec // 2)) & 0xFFFF
|
|
51
|
+
|
|
52
|
+
def get_mod_date(self) -> int:
|
|
53
|
+
# Extract year, month, and day from the modification time
|
|
54
|
+
t = time.localtime(self.modification_time)
|
|
55
|
+
year = t.tm_year - 1980 # ZIP format years start from 1980
|
|
56
|
+
return ((year << 9) | (t.tm_mon << 5) | t.tm_mday) & 0xFFFF
|
|
57
|
+
|
|
58
|
+
def set_file_name(self, new_name: str) -> None:
|
|
59
|
+
self._name = new_name
|
|
@@ -0,0 +1,256 @@
|
|
|
1
|
+
from collections import defaultdict
|
|
2
|
+
from typing import List
|
|
3
|
+
|
|
4
|
+
from zipFly import consts
|
|
5
|
+
from zipFly.BaseFile import BaseFile
|
|
6
|
+
|
|
7
|
+
"""
|
|
8
|
+
Since the Official ZIP docs are terrible, here's a detailed structure of the zip this library builds. (pretty sure mine's just as bad lol)
|
|
9
|
+
|
|
10
|
+
[local file header 1] |
|
|
11
|
+
[file data 1] |
|
|
12
|
+
[data descriptor 1] |
|
|
13
|
+
. |
|
|
14
|
+
. } - This part of the of zip holds the file data. Local file headers are lowkey useless(nevertheless needed for zip to work).
|
|
15
|
+
. | Data descriptors allow to stream the file(and create file headers) without knowing the size of file data. Instead of putting
|
|
16
|
+
[local file header n] | things like: CRC, uncompressed_size, compressed_size etc in file headers, you only put placeholder values there (0xFFFFFFFF),
|
|
17
|
+
[file data n] | and fill them later in data descriptor.
|
|
18
|
+
[data descriptor n] |
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
=================This part is called Central Directory Structure============== <------------ TO HERE -------------------------------------------<
|
|
22
|
+
|
|
|
23
|
+
[central directory header 1] | |
|
|
24
|
+
[extra field 1] | |
|
|
25
|
+
. } - I have no idea why it's called 'central directory header'. It should be called central directory file headers. |
|
|
26
|
+
. | From now on, i will call central directory as just 'cdir'. Cdir headers are structures that again hold the file information |
|
|
27
|
+
. | Cdir headers are retarded, and their compressed_size, uncompressed_size, offset values are in 4 bytes, meaning you can't put |
|
|
28
|
+
[central directory header n] | values > 4GB. Hence the ZIP64 uses a special structure called 'extra field'. Just like before, in cdir headers we put |
|
|
29
|
+
[extra field n] | placeholder values, and fill them later in extra field. |
|
|
30
|
+
|
|
|
31
|
+
An important thing to pay attention in cdir header, is **offset**. This offset is the amount of bytes from the |
|
|
32
|
+
beginning of the file, to the start of local file header. So for 1st file the offset is 0, for the 2nd it's length of |
|
|
33
|
+
'[local file header 1]' + '[file data 1]' + '[data descriptor 1]'. |
|
|
34
|
+
|
|
|
35
|
+
=================This part I call End of Central Directory Structure (It's still a part of cdir structure(I think))=============== <----- TO HERE -----------------|-----------<
|
|
36
|
+
| |
|
|
37
|
+
[zip64 end of central directory record] | | |
|
|
38
|
+
[zip64 end of central directory locator] } - This is the actual end of the file. End of cdir directory record is a legacy(and kinda useless, nevertheless required). | |
|
|
39
|
+
[end of central directory record] | Size we work with files >4GB, the end of cdir directory record, again can't hold values like: compressed_size, | |
|
|
40
|
+
uncompressed_size, offset. We again use placeholder values, and put the actual ones in | |
|
|
41
|
+
'zip64 end of central directory record'. 'zip64 end of cdir locator' is used to locate the 'zip64 end of cdir record'. | |
|
|
42
|
+
| |
|
|
43
|
+
An important thing to pay attention to are **offsets*. There are two district ones here. | |
|
|
44
|
+
1st offset is in 'zip64 end of cdir record'. It's the amount of bytes from the start of the file to start of -------------> |
|
|
45
|
+
*Central Directory Structure*. |
|
|
46
|
+
2nd offset is in 'zip64 end of cdir locator'. It's the amount of bytes from the start of the file to start of -------------------------->
|
|
47
|
+
'zip64 end of cdir record'.
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
Other goofy things are:
|
|
51
|
+
'extra_field_len' in 'central directory header' is 28, even tho in the 'extra field' it self, the 'extra_field_size' is 24. That's because the first
|
|
52
|
+
is the full length of the 'extra field' structure, while the second doesn't include 'extra_field_size' and
|
|
53
|
+
'signature' which are each 2 bytes, so together they are the 'missing' 4 bytes.
|
|
54
|
+
|
|
55
|
+
'size_of_zip64_end_of_cdir_record' in zip64 end of cdir record is 44 bytes. Cuz: 'signature' - 4 bytes, 'size_of_zip64_end_of_central_dir_record' - 8 bytes,
|
|
56
|
+
'version_made_by' - 2 bytes, 'version_to_extract' - 2 bytes, 'number_of_this_disk' - 4 bytes, 'cd_start' - 4 bytes, 'cd_entries_this_disk' - 8 bytes,
|
|
57
|
+
'cd_entries_total' - 8 bytes, 'cd_size' - 8 bytes, 'cd_offset' - 8 bytes.
|
|
58
|
+
So, 4 + 8 + 2 + 2 + 4 + 4 + 8 + 8 + 8 + 8 = 56, but we again don't include signature, and 'size_of_zip64_end_of_central_dir_record' so 56 - 4 - 8 = 44
|
|
59
|
+
|
|
60
|
+
I hope, that i made it a bit more clear to anyone reading, including future me.
|
|
61
|
+
"""
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def process_file_names(files):
|
|
65
|
+
name_counts = defaultdict(int)
|
|
66
|
+
for file in files:
|
|
67
|
+
# Split the name into base and extension
|
|
68
|
+
base, ext = file.name.rsplit('.', 1) if '.' in file.name else (file.name, '')
|
|
69
|
+
|
|
70
|
+
# Increment the count for this base name
|
|
71
|
+
name_counts[base] += 1
|
|
72
|
+
|
|
73
|
+
# Append the count to the base name if it's not the first occurrence
|
|
74
|
+
if name_counts[base] > 1:
|
|
75
|
+
new_base = f"{base} ({name_counts[base] - 1})"
|
|
76
|
+
else:
|
|
77
|
+
new_base = base
|
|
78
|
+
|
|
79
|
+
# Reassemble the filename
|
|
80
|
+
file.set_file_name(f"{new_base}.{ext}" if ext else new_base)
|
|
81
|
+
|
|
82
|
+
return files
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
class ZipBase:
|
|
86
|
+
|
|
87
|
+
def __init__(self, files: List[BaseFile]):
|
|
88
|
+
self.__version_to_extract = 45
|
|
89
|
+
|
|
90
|
+
# process file names to make sure there are no duplicates
|
|
91
|
+
processed_files = process_file_names(files)
|
|
92
|
+
self.files = processed_files
|
|
93
|
+
|
|
94
|
+
self.__offset = 0 # Tracks the current offset within the ZIP archive
|
|
95
|
+
self._cdir_size = 0
|
|
96
|
+
self._offset_to_start_of_central_dir = 0
|
|
97
|
+
self.__version_made_by = 0x0345 # UNIX and ZIP version 45
|
|
98
|
+
|
|
99
|
+
def _make_local_file_header(self, file: BaseFile) -> bytes:
|
|
100
|
+
"""
|
|
101
|
+
Create local file header for a ZIP64 archive (4.3.7)
|
|
102
|
+
"""
|
|
103
|
+
|
|
104
|
+
fields = {
|
|
105
|
+
"signature": consts.LOCAL_FILE_HEADER_SIGNATURE,
|
|
106
|
+
"version_to_extract": self.__version_to_extract,
|
|
107
|
+
"flags": file.flags,
|
|
108
|
+
"compression": file.compression_method,
|
|
109
|
+
"mod_time": file.get_mod_time(),
|
|
110
|
+
"mod_date": file.get_mod_date(),
|
|
111
|
+
"crc": 0xFFFFFFFF, # Placeholder (will be updated in data descriptor)
|
|
112
|
+
"uncompressed_size": 0xFFFFFFFF, # Placeholder (will be updated in data descriptor)
|
|
113
|
+
"compressed_size": 0xFFFFFFFF, # Placeholder (will be updated in data descriptor)
|
|
114
|
+
"file_name_len": len(file.file_path_bytes),
|
|
115
|
+
"extra_field_len": 0 # 0 cuz no extra field is used with local file header
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
# Pack the local file header structure
|
|
119
|
+
header = consts.LOCAL_FILE_HEADER_TUPLE(**fields)
|
|
120
|
+
header = consts.LOCAL_FILE_HEADER_STRUCT.pack(*header)
|
|
121
|
+
header += file.file_path_bytes
|
|
122
|
+
|
|
123
|
+
return header
|
|
124
|
+
|
|
125
|
+
def _make_data_descriptor(self, file: BaseFile) -> bytes:
|
|
126
|
+
"""
|
|
127
|
+
Create data descriptor. (4.3.9)
|
|
128
|
+
"""
|
|
129
|
+
|
|
130
|
+
fields = {
|
|
131
|
+
"signature": consts.ZIP64_DATA_DESCRIPTOR_SIGNATURE,
|
|
132
|
+
"crc": file.crc & 0xffffffff, # hack for making CRC unsigned long
|
|
133
|
+
"uncompressed_size": file.original_size,
|
|
134
|
+
"compressed_size": file.compressed_size,
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
descriptor = consts.ZIP64_DATA_DESCRIPTOR_TUPLE(**fields)
|
|
138
|
+
descriptor = consts.ZIP64_DATA_DESCRIPTOR_STRUCT.pack(*descriptor)
|
|
139
|
+
|
|
140
|
+
return descriptor
|
|
141
|
+
|
|
142
|
+
def _make_cdir_file_header(self, file: BaseFile) -> bytes:
|
|
143
|
+
"""
|
|
144
|
+
Create central directory file header for ZIP64 archive. (4.3.12)
|
|
145
|
+
"""
|
|
146
|
+
fields = {
|
|
147
|
+
"signature": consts.CENTRAL_DIR_FILE_HEADER_SIGNATURE,
|
|
148
|
+
"version_made_by": self.__version_made_by,
|
|
149
|
+
"version_to_extract": self.__version_to_extract,
|
|
150
|
+
"flags": file.flags,
|
|
151
|
+
"compression": file.compression_method,
|
|
152
|
+
"mod_time": file.get_mod_time(),
|
|
153
|
+
"mod_date": file.get_mod_date(),
|
|
154
|
+
"crc": file.crc,
|
|
155
|
+
"compressed_size": 0xFFFFFFFF, # Placeholder (will be updated in zip64 extra field)
|
|
156
|
+
"uncompressed_size": 0xFFFFFFFF, # Placeholder (will be updated in zip64 extra field)
|
|
157
|
+
"file_name_len": len(file.file_path_bytes),
|
|
158
|
+
"extra_field_len": 28,
|
|
159
|
+
"file_comment_len": 0,
|
|
160
|
+
"disk_start": 0,
|
|
161
|
+
"internal_file_attr": 0,
|
|
162
|
+
"external_file_attr": 0,
|
|
163
|
+
"offset": 0xFFFFFFFF # Placeholder (will be updated in zip64 extra field)
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
cdfh = consts.CENTRAL_DIR_FILE_HEADER_TUPLE(**fields)
|
|
167
|
+
cdfh = consts.CENTRAL_DIR_FILE_HEADER_STRUCT.pack(*cdfh)
|
|
168
|
+
cdfh += file.file_path_bytes
|
|
169
|
+
|
|
170
|
+
return cdfh
|
|
171
|
+
|
|
172
|
+
def _make_zip64_extra_field(self, file: BaseFile) -> bytes:
|
|
173
|
+
"""
|
|
174
|
+
Create the ZIP64 extra field. (4.5.3)
|
|
175
|
+
"""
|
|
176
|
+
fields = {
|
|
177
|
+
"signature": consts.ZIP64_EXTRA_FIELD_SIGNATURE,
|
|
178
|
+
"extra_field_size": 24,
|
|
179
|
+
"size": file.original_size,
|
|
180
|
+
"compressed_size": file.compressed_size,
|
|
181
|
+
"offset": file.offset,
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
extra = consts.ZIP64_EXTRA_FIELD_TUPLE(**fields)
|
|
185
|
+
extra = consts.ZIP64_EXTRA_FIELD_STRUCT.pack(*extra)
|
|
186
|
+
|
|
187
|
+
return extra
|
|
188
|
+
|
|
189
|
+
def _make_zip64_end_of_cdir_record(self) -> bytes:
|
|
190
|
+
"""
|
|
191
|
+
Create the ZIP64 end of central directory record. (4.3.14)
|
|
192
|
+
"""
|
|
193
|
+
fields = {
|
|
194
|
+
"signature": consts.ZIP64_END_OF_CENTRAL_DIR_RECORD_SIGNATURE,
|
|
195
|
+
"size_of_zip64_end_of_cdir_record": 44, # 44 bytes for the ZIP64 end of central directory record itself
|
|
196
|
+
"version_made_by": self.__version_made_by,
|
|
197
|
+
"version_to_extract": self.__version_to_extract,
|
|
198
|
+
"number_of_this_disk": 0,
|
|
199
|
+
"cd_start": 0,
|
|
200
|
+
"cd_entries_this_disk": len(self.files),
|
|
201
|
+
"cd_entries_total": len(self.files),
|
|
202
|
+
"cd_size": self._cdir_size,
|
|
203
|
+
"cd_offset": self._offset_to_start_of_central_dir
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
cdend = consts.ZIP64_END_OF_CENTRAL_DIR_RECORD_TUPLE(**fields)
|
|
207
|
+
cdend = consts.ZIP64_END_OF_CENTRAL_DIR_RECORD_STRUCT.pack(*cdend)
|
|
208
|
+
|
|
209
|
+
return cdend
|
|
210
|
+
|
|
211
|
+
def _make_zip64_end_of_cdir_locator(self) -> bytes:
|
|
212
|
+
"""
|
|
213
|
+
Create the ZIP64 end of central directory locator. (4.3.15)
|
|
214
|
+
"""
|
|
215
|
+
fields = {
|
|
216
|
+
"signature": consts.ZIP64_END_OF_CENTRAL_DIR_LOCATOR_SIGNATURE,
|
|
217
|
+
"disk_with_zip64_end": 0,
|
|
218
|
+
"zip64_end_offset": self.__offset,
|
|
219
|
+
"total_disks": 1
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
locator = consts.ZIP64_END_OF_CENTRAL_DIR_LOCATOR_TUPLE(**fields)
|
|
223
|
+
|
|
224
|
+
locator = consts.ZIP64_END_OF_CENTRAL_DIR_LOCATOR_STRUCT.pack(*locator)
|
|
225
|
+
|
|
226
|
+
return locator
|
|
227
|
+
|
|
228
|
+
def _make_end_of_cdir_record(self) -> bytes:
|
|
229
|
+
"""
|
|
230
|
+
Create the end of central directory record. (4.3.16)
|
|
231
|
+
"""
|
|
232
|
+
fields = {
|
|
233
|
+
"signature": consts.END_OF_CENTRAL_DIR_RECORD_SIGNATURE,
|
|
234
|
+
"number_of_this_disk": 0,
|
|
235
|
+
"number_of_disk_with_start_central_dir": 0,
|
|
236
|
+
"total_entries_on_this_disk": len(self.files),
|
|
237
|
+
"total_entries_total": len(self.files),
|
|
238
|
+
"central_directory_size": 0xFFFFFFFF,
|
|
239
|
+
"offset_of_central_directory": 0xFFFFFFFF,
|
|
240
|
+
"comment_length": 0 # No comment
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
eocd = consts.END_OF_CENTRAL_DIR_RECORD_TUPLE(**fields)
|
|
244
|
+
eocd = consts.END_OF_CENTRAL_DIR_RECORD_STRUCT.pack(*eocd)
|
|
245
|
+
|
|
246
|
+
return eocd
|
|
247
|
+
|
|
248
|
+
def _add_offset(self, value: int) -> None:
|
|
249
|
+
self.__offset += value
|
|
250
|
+
|
|
251
|
+
def _get_offset(self) -> int:
|
|
252
|
+
return self.__offset
|
|
253
|
+
|
|
254
|
+
|
|
255
|
+
|
|
256
|
+
|
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
from typing import Generator, AsyncGenerator
|
|
2
|
+
|
|
3
|
+
from zipFly import BaseFile
|
|
4
|
+
from zipFly.ZipBase import ZipBase
|
|
5
|
+
|
|
6
|
+
class ZipFly(ZipBase):
|
|
7
|
+
|
|
8
|
+
def calculate_archive_size(self) -> int:
|
|
9
|
+
LOCAL_FILE_HEADER_SIZE = 30
|
|
10
|
+
DATA_DESCRIPTOR_SIZE = 24
|
|
11
|
+
CENTRAL_DIR_HEADER_SIZE = 46
|
|
12
|
+
ZIP64_EXTRA_FIELD_SIZE = 28
|
|
13
|
+
ZIP64_END_OF_CDIR_RECORD_SIZE = 56
|
|
14
|
+
ZIP64_END_OF_CDIR_LOCATOR_SIZE = 20
|
|
15
|
+
END_OF_CDIR_RECORD_CD_RECORD_SIZE = 22
|
|
16
|
+
|
|
17
|
+
total_size = 0
|
|
18
|
+
|
|
19
|
+
for file in self.files:
|
|
20
|
+
local_file_header_size = LOCAL_FILE_HEADER_SIZE + len(file.file_path_bytes)
|
|
21
|
+
|
|
22
|
+
total_size += local_file_header_size
|
|
23
|
+
total_size += file.size
|
|
24
|
+
total_size += DATA_DESCRIPTOR_SIZE
|
|
25
|
+
|
|
26
|
+
central_directory_header_size = CENTRAL_DIR_HEADER_SIZE + len(file.file_path_bytes) + ZIP64_EXTRA_FIELD_SIZE
|
|
27
|
+
|
|
28
|
+
total_size += central_directory_header_size
|
|
29
|
+
|
|
30
|
+
total_size += ZIP64_END_OF_CDIR_RECORD_SIZE
|
|
31
|
+
total_size += ZIP64_END_OF_CDIR_LOCATOR_SIZE
|
|
32
|
+
total_size += END_OF_CDIR_RECORD_CD_RECORD_SIZE
|
|
33
|
+
|
|
34
|
+
return total_size
|
|
35
|
+
|
|
36
|
+
def _make_end_structures(self) -> Generator[bytes, None, None]:
|
|
37
|
+
"""
|
|
38
|
+
Make zip64 end structures, which include:
|
|
39
|
+
central directory file header for every file,
|
|
40
|
+
zip64 extra field for every file,
|
|
41
|
+
zip64 end of central dir record,
|
|
42
|
+
zip64 end of central dir locator
|
|
43
|
+
end of central dir record
|
|
44
|
+
"""
|
|
45
|
+
# Save offset to start of central dir for zip64 end of cdir record
|
|
46
|
+
self._offset_to_start_of_central_dir = self._get_offset()
|
|
47
|
+
|
|
48
|
+
# Stream central directory entries
|
|
49
|
+
for file in self.files:
|
|
50
|
+
chunk = self._make_cdir_file_header(file)
|
|
51
|
+
chunk += self._make_zip64_extra_field(file)
|
|
52
|
+
self._cdir_size = len(chunk)
|
|
53
|
+
self._add_offset(len(chunk))
|
|
54
|
+
|
|
55
|
+
yield chunk
|
|
56
|
+
|
|
57
|
+
yield self._make_zip64_end_of_cdir_record()
|
|
58
|
+
|
|
59
|
+
yield self._make_zip64_end_of_cdir_locator()
|
|
60
|
+
|
|
61
|
+
yield self._make_end_of_cdir_record()
|
|
62
|
+
|
|
63
|
+
async def _async_stream_single_file(self, file: BaseFile) -> AsyncGenerator[bytes, None]:
|
|
64
|
+
|
|
65
|
+
yield self._make_local_file_header(file)
|
|
66
|
+
|
|
67
|
+
async for chunk in file.async_generate_processed_file_data():
|
|
68
|
+
yield chunk
|
|
69
|
+
|
|
70
|
+
yield self._make_data_descriptor(file)
|
|
71
|
+
|
|
72
|
+
async def async_stream(self) -> AsyncGenerator[bytes, None]:
|
|
73
|
+
# stream files
|
|
74
|
+
for file in self.files:
|
|
75
|
+
|
|
76
|
+
file.offset = self._get_offset()
|
|
77
|
+
async for chunk in self._async_stream_single_file(file):
|
|
78
|
+
self._add_offset(len(chunk))
|
|
79
|
+
yield chunk
|
|
80
|
+
|
|
81
|
+
# stream zip structures
|
|
82
|
+
for chunk in self._make_end_structures():
|
|
83
|
+
yield chunk
|
|
84
|
+
|
|
85
|
+
def stream(self) -> Generator[bytes, None, None]:
|
|
86
|
+
# stream files
|
|
87
|
+
for file in self.files:
|
|
88
|
+
print("offset")
|
|
89
|
+
print(self._get_offset())
|
|
90
|
+
file.offset = self._get_offset()
|
|
91
|
+
for chunk in self._stream_single_file(file):
|
|
92
|
+
self._add_offset(len(chunk))
|
|
93
|
+
yield chunk
|
|
94
|
+
|
|
95
|
+
# stream zip structures
|
|
96
|
+
for chunk in self._make_end_structures():
|
|
97
|
+
yield chunk
|
|
98
|
+
|
|
99
|
+
def _stream_single_file(self, file: BaseFile) -> Generator[bytes, None, None]:
|
|
100
|
+
"""
|
|
101
|
+
stream single zip file with header and descriptor at the end.
|
|
102
|
+
"""
|
|
103
|
+
yield self._make_local_file_header(file)
|
|
104
|
+
|
|
105
|
+
yield from file.generate_processed_file_data()
|
|
106
|
+
|
|
107
|
+
yield self._make_data_descriptor(file)
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
from collections import namedtuple
|
|
2
|
+
import struct
|
|
3
|
+
|
|
4
|
+
# ZIP CONSTANTS
|
|
5
|
+
ZIP64_VERSION = 45
|
|
6
|
+
UTF8_FLAG = 0x800 # utf-8 filename encoding flag
|
|
7
|
+
|
|
8
|
+
# ZIP COMPRESSION METHODS
|
|
9
|
+
NO_COMPRESSION = 0
|
|
10
|
+
COMPRESSION_DEFLATE = 8
|
|
11
|
+
# COMPRESSION_BZIP2 = 12
|
|
12
|
+
# COMPRESSION_LZMA = 14
|
|
13
|
+
|
|
14
|
+
# LOCAL FILE HEADER
|
|
15
|
+
LOCAL_FILE_HEADER_SIGNATURE = b'\x50\x4b\x03\x04'
|
|
16
|
+
LOCAL_FILE_HEADER_STRUCT = struct.Struct(b"<4sHHHHHLLLHH")
|
|
17
|
+
LOCAL_FILE_HEADER_TUPLE = namedtuple("fileheader",
|
|
18
|
+
("signature", "version_to_extract", "flags",
|
|
19
|
+
"compression", "mod_time", "mod_date",
|
|
20
|
+
"crc", "uncompressed_size", "compressed_size",
|
|
21
|
+
"file_name_len", "extra_field_len"))
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
# FILE DESCRIPTOR
|
|
25
|
+
ZIP64_DATA_DESCRIPTOR_SIGNATURE = b'\x50\x4b\x07\x08'
|
|
26
|
+
ZIP64_DATA_DESCRIPTOR_STRUCT = struct.Struct(b"<4sLQQ")
|
|
27
|
+
ZIP64_DATA_DESCRIPTOR_TUPLE = namedtuple("filecrc", ("signature", "crc", "compressed_size", "uncompressed_size"))
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
# CENTRAL DIRECTORY FILE HEADER
|
|
31
|
+
CENTRAL_DIR_FILE_HEADER_SIGNATURE = b'\x50\x4b\x01\x02'
|
|
32
|
+
CENTRAL_DIR_FILE_HEADER_STRUCT = struct.Struct(b"<4sHHHHHHLLLHHHHHLL")
|
|
33
|
+
CENTRAL_DIR_FILE_HEADER_TUPLE = namedtuple("cdfileheader",
|
|
34
|
+
("signature", "version_made_by", "version_to_extract", "flags",
|
|
35
|
+
"compression", "mod_time", "mod_date", "crc",
|
|
36
|
+
"compressed_size", "uncompressed_size", "file_name_len", "extra_field_len",
|
|
37
|
+
"file_comment_len", "disk_start", "internal_file_attr", "external_file_attr", "offset"))
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
# ZIP64 EXTRA FIELD
|
|
41
|
+
ZIP64_EXTRA_FIELD_SIGNATURE = b'\x01\x00'
|
|
42
|
+
ZIP64_EXTRA_FIELD_STRUCT = struct.Struct(b"<2sHQQQ")
|
|
43
|
+
ZIP64_EXTRA_FIELD_TUPLE = namedtuple("extra", ("signature", "extra_field_size", "size", "compressed_size", "offset"))
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
# ZIP64 END OF CENTRAL DIRECTORY RECORD
|
|
47
|
+
ZIP64_END_OF_CENTRAL_DIR_RECORD_SIGNATURE = b'\x50\x4b\x06\x06'
|
|
48
|
+
ZIP64_END_OF_CENTRAL_DIR_RECORD_STRUCT = struct.Struct(b"<4sQHHIIQQQQ")
|
|
49
|
+
ZIP64_END_OF_CENTRAL_DIR_RECORD_TUPLE = namedtuple("zip64end",
|
|
50
|
+
("signature", "size_of_zip64_end_of_cdir_record", "version_made_by", "version_to_extract",
|
|
51
|
+
"number_of_this_disk", "cd_start", "cd_entries_this_disk", "cd_entries_total",
|
|
52
|
+
"cd_size", "cd_offset"))
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
# END OF CENTRAL DIRECTORY LOCATOR
|
|
56
|
+
ZIP64_END_OF_CENTRAL_DIR_LOCATOR_SIGNATURE = b'\x50\x4b\x06\x07'
|
|
57
|
+
ZIP64_END_OF_CENTRAL_DIR_LOCATOR_STRUCT = struct.Struct(b"<4sLQL")
|
|
58
|
+
ZIP64_END_OF_CENTRAL_DIR_LOCATOR_TUPLE = namedtuple("eocdlocator",
|
|
59
|
+
("signature", "disk_with_zip64_end", "zip64_end_offset", "total_disks"))
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
# END OF CENTRAL DIRECTORY RECORD
|
|
63
|
+
END_OF_CENTRAL_DIR_RECORD_SIGNATURE = b'P\x4b\x05\x06'
|
|
64
|
+
END_OF_CENTRAL_DIR_RECORD_STRUCT = struct.Struct(b"<4sHHHHLLH")
|
|
65
|
+
END_OF_CENTRAL_DIR_RECORD_TUPLE = namedtuple("eocdlocator", ("signature", "number_of_this_disk", "number_of_disk_with_start_central_dir",
|
|
66
|
+
"total_entries_on_this_disk", "total_entries_total",
|
|
67
|
+
"central_directory_size", "offset_of_central_directory",
|
|
68
|
+
"comment_length"))
|
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: zipFly64
|
|
3
|
+
Version: 1.0.0
|
|
4
|
+
Summary: Stream zip64 archives on the fly.
|
|
5
|
+
Author-email: Pamparampampam <reallpamparampam.pl@gmail.com>
|
|
6
|
+
License: MIT License
|
|
7
|
+
|
|
8
|
+
Copyright (c) 2024 Pamparampam
|
|
9
|
+
|
|
10
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
11
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
12
|
+
in the Software without restriction, including without limitation the rights
|
|
13
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
14
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
15
|
+
furnished to do so, subject to the following conditions:
|
|
16
|
+
|
|
17
|
+
The above copyright notice and this permission notice shall be included in all
|
|
18
|
+
copies or substantial portions of the Software.
|
|
19
|
+
|
|
20
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
21
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
22
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
23
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
24
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
25
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
26
|
+
SOFTWARE.
|
|
27
|
+
Project-URL: Homepage, https://github.com/pam-param-pam/ZipFly
|
|
28
|
+
Keywords: zip64,zip,streaming,zipfly
|
|
29
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
30
|
+
Classifier: Programming Language :: Python
|
|
31
|
+
Classifier: Programming Language :: Python :: 3
|
|
32
|
+
Requires-Python: >=3.7
|
|
33
|
+
Description-Content-Type: text/markdown
|
|
34
|
+
License-File: LICENSE
|
|
35
|
+
Requires-Dist: aiofiles>=24.1.0
|
|
36
|
+
|
|
37
|
+
# ZipFly
|
|
38
|
+
|
|
39
|
+
<a href="http://forthebadge.com/"><img src="https://forthebadge.com/images/badges/0-percent-optimized.svg" alt="forthebadge"/></a>
|
|
40
|
+
<a href="http://forthebadge.com/"><img src="https://forthebadge.com/images/badges/gluten-free.png" alt="forthebadge"/></a>
|
|
41
|
+
<a href="http://forthebadge.com/"><img src="https://web.archive.org/web/20230604002050/https://forthebadge.com/images/badges/mom-made-pizza-rolls.svg" alt="forthebadge"/></a>
|
|
42
|
+
|
|
43
|
+
<img src="https://img.shields.io/badge/ZIP64-Certified-lightGreen" alt="Build Status"/>
|
|
44
|
+
<img src="https://img.shields.io/badge/build-failing-red" alt="Build Status"/>
|
|
45
|
+
<img src="https://img.shields.io/badge/made with-hate-orange" alt="Build Status"/>
|
|
46
|
+
<img src="https://img.shields.io/badge/fuck-zip-green" alt="Build Status"/>
|
|
47
|
+
|
|
48
|
+
### ZipFly is a library for creating & streaming ZIP64 archives "on the fly"
|
|
49
|
+
|
|
50
|
+
**It allows to create/fetch file content dynamically while the archive is streamed.**
|
|
51
|
+
|
|
52
|
+
- No temporary files, data is streamed directly
|
|
53
|
+
- Ability to calculate archive size before streaming even begins
|
|
54
|
+
- Supported `deflate` compression method
|
|
55
|
+
- Small memory usage, streaming is done using yield statement
|
|
56
|
+
- Archive structure is created on the fly, and all data can be created during stream
|
|
57
|
+
- Files included into archive can be generated on the fly using Python generators
|
|
58
|
+
- **Independent of the goofy 🤮🤮 python's standard ZipFile implementation**
|
|
59
|
+
- No dependencies
|
|
60
|
+
- Automatic detection and changing of duplicate names
|
|
61
|
+
- `Zip64` format compatible files
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
This library is based upon [this library](https://github.com/kbbdy/zipstream) <sub>_(this library was a piece of work...)_<sub>
|
|
65
|
+
|
|
66
|
+
## Typical Usage
|
|
67
|
+
|
|
68
|
+
```py
|
|
69
|
+
from zipFly import ZipFly, LocalFile, consts
|
|
70
|
+
# compression_method is optional, defaults to consts.NO_COMPRESSION
|
|
71
|
+
file1 = LocalFile(file_path='files/lqbfa61deebf1.mp4', compression_method=consts.NO_COMPRESSION) # or consts.COMPRESSION_DEFLATE
|
|
72
|
+
file2 = LocalFile(file_path='public/2ae9dcd01a3aa.mp4', name="files/my_file2.mp4") # override the file name
|
|
73
|
+
file3 = LocalFile(file_path='files/4shaw1dax4da.mp4', name="my_file3.mp4") # you control the directory path by specifying it in name
|
|
74
|
+
|
|
75
|
+
files = [file1, file2, file3]
|
|
76
|
+
|
|
77
|
+
zipFly = ZipFly(files)
|
|
78
|
+
|
|
79
|
+
# save to file, or do something else with the stream() generator
|
|
80
|
+
with open("out/file.zip", 'wb') as f_out:
|
|
81
|
+
for chunk in zipFly.stream():
|
|
82
|
+
f_out.write(chunk)
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
### Supports dynamically created files
|
|
86
|
+
```py
|
|
87
|
+
from zipFly import ZipFly, GenFile, LocalFile, consts
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def file_generator():
|
|
91
|
+
yield b"uga buga"
|
|
92
|
+
yield b"a29jaGFtIGFsdGVybmF0eXdraQ=="
|
|
93
|
+
yield b"2137"
|
|
94
|
+
|
|
95
|
+
# size is optional, it allows to calculate the total size of the archive before any data is generated
|
|
96
|
+
# modification_time in epoch time, defaults to time.time()
|
|
97
|
+
file1 = GenFile(name="file.txt", generator=file_generator(), modification_time=time.time(), size=size, compression_method=consts.COMPRESSION_DEFLATE)
|
|
98
|
+
file2 = LocalFile(file_path='files/as61aade2ebfd.mp4', compression_method=consts.NO_COMPRESSION) # or consts.COMPRESSION_DEFLATE
|
|
99
|
+
|
|
100
|
+
files = [file1, file2]
|
|
101
|
+
|
|
102
|
+
zipFly = ZipFly(files)
|
|
103
|
+
archive_size = zipFly.calculate_archive_size() # raises ValueError if it can't calculate size
|
|
104
|
+
|
|
105
|
+
# for example you can set as content length in http response
|
|
106
|
+
response['Content-Length'] = archive_size
|
|
107
|
+
|
|
108
|
+
for chunk in zipFly.stream():
|
|
109
|
+
# do something
|
|
110
|
+
|
|
111
|
+
```
|
|
112
|
+
### Other
|
|
113
|
+
I created this library for my I Drive project.
|
|
114
|
+
|
|
115
|
+
If you have a different case scenario, and LocalFile and GenFile are not enough, you can extend BaseFile and everything else should work out of the box.
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
### PS
|
|
120
|
+
|
|
121
|
+
I wholeheartedly hope everyone responsible for creating ZIP documentation gets slaughtered in the most gore and painful way 😊 (in game)
|
|
122
|
+
|
|
123
|
+
(pls redo ur [docs](https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT))
|
|
124
|
+
|
|
125
|
+
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
LICENSE
|
|
2
|
+
README.md
|
|
3
|
+
pyproject.toml
|
|
4
|
+
src/zipFly/BaseFile.py
|
|
5
|
+
src/zipFly/Compressor.py
|
|
6
|
+
src/zipFly/GenFile.py
|
|
7
|
+
src/zipFly/LocalFile.py
|
|
8
|
+
src/zipFly/ZipBase.py
|
|
9
|
+
src/zipFly/ZipFly.py
|
|
10
|
+
src/zipFly/__init__.py
|
|
11
|
+
src/zipFly/consts.py
|
|
12
|
+
src/zipFly64.egg-info/PKG-INFO
|
|
13
|
+
src/zipFly64.egg-info/SOURCES.txt
|
|
14
|
+
src/zipFly64.egg-info/dependency_links.txt
|
|
15
|
+
src/zipFly64.egg-info/requires.txt
|
|
16
|
+
src/zipFly64.egg-info/top_level.txt
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
aiofiles>=24.1.0
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
zipFly
|