zipFly64 1.2.0__tar.gz → 1.2.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {zipfly64-1.2.0/src/zipFly64.egg-info → zipfly64-1.2.3}/PKG-INFO +36 -14
- {zipfly64-1.2.0 → zipfly64-1.2.3}/README.md +33 -12
- zipfly64-1.2.3/pyproject.toml +116 -0
- {zipfly64-1.2.0 → zipfly64-1.2.3}/src/zipFly/BaseFile.py +13 -13
- {zipfly64-1.2.0 → zipfly64-1.2.3}/src/zipFly/GenFile.py +11 -10
- {zipfly64-1.2.0 → zipfly64-1.2.3}/src/zipFly/LocalFile.py +12 -9
- {zipfly64-1.2.0 → zipfly64-1.2.3}/src/zipFly/ZipBase.py +1 -57
- {zipfly64-1.2.0 → zipfly64-1.2.3}/src/zipFly/ZipFly.py +82 -14
- {zipfly64-1.2.0 → zipfly64-1.2.3/src/zipFly64.egg-info}/PKG-INFO +36 -14
- {zipfly64-1.2.0 → zipfly64-1.2.3}/src/zipFly64.egg-info/SOURCES.txt +4 -1
- zipfly64-1.2.3/tests/test_utils.py +57 -0
- zipfly64-1.2.3/tests/test_zipfly.py +556 -0
- zipfly64-1.2.3/tests/test_zipfly_4GB.py +111 -0
- zipfly64-1.2.0/pyproject.toml +0 -27
- {zipfly64-1.2.0 → zipfly64-1.2.3}/LICENSE +0 -0
- {zipfly64-1.2.0 → zipfly64-1.2.3}/setup.cfg +0 -0
- {zipfly64-1.2.0 → zipfly64-1.2.3}/src/zipFly/Compressor.py +0 -0
- {zipfly64-1.2.0 → zipfly64-1.2.3}/src/zipFly/__init__.py +0 -0
- {zipfly64-1.2.0 → zipfly64-1.2.3}/src/zipFly/consts.py +0 -0
- {zipfly64-1.2.0 → zipfly64-1.2.3}/src/zipFly64.egg-info/dependency_links.txt +0 -0
- {zipfly64-1.2.0 → zipfly64-1.2.3}/src/zipFly64.egg-info/requires.txt +0 -0
- {zipfly64-1.2.0 → zipfly64-1.2.3}/src/zipFly64.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: zipFly64
|
|
3
|
-
Version: 1.2.
|
|
3
|
+
Version: 1.2.3
|
|
4
4
|
Summary: Stream zip64 archives on the fly.
|
|
5
5
|
Author: Pamparampampam
|
|
6
6
|
License: MIT License
|
|
@@ -33,6 +33,7 @@ Requires-Python: >=3.7
|
|
|
33
33
|
Description-Content-Type: text/markdown
|
|
34
34
|
License-File: LICENSE
|
|
35
35
|
Requires-Dist: aiofiles>=24.1.0
|
|
36
|
+
Dynamic: license-file
|
|
36
37
|
|
|
37
38
|
# ZipFly
|
|
38
39
|
|
|
@@ -64,6 +65,7 @@ Generating ZIPs on-demand in a web server is a typical use case for zipFly.**
|
|
|
64
65
|
- Only 1 dependency
|
|
65
66
|
- Automatic detection and changing of duplicate names
|
|
66
67
|
- `Zip64` format compatible files
|
|
68
|
+
- **21.37%** test coverage
|
|
67
69
|
|
|
68
70
|
|
|
69
71
|
This library is based upon [this library](https://github.com/kbbdy/zipstream) <sub>_(this library was a piece of work...)_<sub>
|
|
@@ -91,8 +93,7 @@ with open("out/file.zip", 'wb') as f_out:
|
|
|
91
93
|
for chunk in zipFly.stream():
|
|
92
94
|
f_out.write(chunk)
|
|
93
95
|
```
|
|
94
|
-
|
|
95
|
-
> You mustn't reuse `ZipFly` instances. They should be re-created everytime you call `stream()` or `async_stream()`
|
|
96
|
+
|
|
96
97
|
|
|
97
98
|
### Supports dynamically created files
|
|
98
99
|
```py
|
|
@@ -106,7 +107,7 @@ def file_generator():
|
|
|
106
107
|
|
|
107
108
|
# size is optional, it allows to calculate the total size of the archive before any data is generated
|
|
108
109
|
# modification_time in epoch time, defaults to time.time()
|
|
109
|
-
file1 = GenFile(name="file.txt", generator=
|
|
110
|
+
file1 = GenFile(name="file.txt", generator=file_generator(), modification_time=time.time(), size=size, compression_method=consts.COMPRESSION_DEFLATE)
|
|
110
111
|
file2 = LocalFile(file_path='files/as61aade2ebfd.mp4', compression_method=consts.NO_COMPRESSION) # or consts.COMPRESSION_DEFLATE
|
|
111
112
|
|
|
112
113
|
files = [file1, file2]
|
|
@@ -126,7 +127,7 @@ for chunk in zipFly.stream():
|
|
|
126
127
|
```py
|
|
127
128
|
import asyncio
|
|
128
129
|
from zipFly import ZipFly, LocalFile, consts, GenFile
|
|
129
|
-
file1 = GenFile(name="file.txt", generator=
|
|
130
|
+
file1 = GenFile(name="file.txt", generator=file_generator())
|
|
130
131
|
file2 = LocalFile(file_path='public/2ae9dcd01a3aa.mp4', name="files/my_file2.mp4")
|
|
131
132
|
|
|
132
133
|
files = [file1, file2]
|
|
@@ -153,12 +154,10 @@ If you use `LocalFile` then it's not a problem as it can very fast go tru the en
|
|
|
153
154
|
|
|
154
155
|
```py
|
|
155
156
|
|
|
156
|
-
file1 = GenFile(name="file.txt", generator=
|
|
157
|
+
file1 = GenFile(name="file.txt", generator=file_generator(), crc=crc)
|
|
157
158
|
file2 = LocalFile(file_path='public/2ae9dcd01a3aa.mp4', name="files/my_file2.mp4")
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
zipFly1 = ZipFly(files)
|
|
161
|
-
zipFly2 = ZipFly(files)
|
|
159
|
+
files1 = [file1, file2]
|
|
160
|
+
zipFly1 = ZipFly(files1)
|
|
162
161
|
|
|
163
162
|
# Simulating pause/resume
|
|
164
163
|
STOP_BYTE = 300
|
|
@@ -173,7 +172,13 @@ async def async_save_pause():
|
|
|
173
172
|
byte_offset += len(chunk)
|
|
174
173
|
if byte_offset >= STOP_BYTE:
|
|
175
174
|
break
|
|
176
|
-
|
|
175
|
+
|
|
176
|
+
# Later...
|
|
177
|
+
file3 = GenFile(name="file.txt", generator=file_generator(), crc=crc)
|
|
178
|
+
file4 = LocalFile(file_path='public/2ae9dcd01a3aa.mp4', name="files/my_file2.mp4")
|
|
179
|
+
files2 = [file3, file4]
|
|
180
|
+
zipFly2 = ZipFly(files2)
|
|
181
|
+
|
|
177
182
|
async def async_save_resume():
|
|
178
183
|
with open("out/file.zip", 'ab') as f_out: # Append mode
|
|
179
184
|
async for chunk in zipFly2.async_stream(byte_offset=STOP_BYTE):
|
|
@@ -190,16 +195,33 @@ If resume ZipFly instance has diffrent files than pause ZipFly instance there wi
|
|
|
190
195
|
> [!NOTE]
|
|
191
196
|
> For byte offset mode to work you must use `const.NO_COMPRESSION` and specify `crc` for `GenFile`
|
|
192
197
|
|
|
198
|
+
> [!CAUTION]
|
|
199
|
+
> You mustn't reuse `ZipFly` instances. They should be re-created everytime you call `stream()` or `async_stream()`
|
|
200
|
+
|
|
201
|
+
> [!CAUTION]
|
|
202
|
+
> You mustn't reuse `GenFile` instances.
|
|
193
203
|
|
|
194
204
|
### Other
|
|
195
205
|
Python is not optimized for async I/O operations, thus to speed up the async streaming the chunk_size is changed to 4MB, you can override this by passing chunksize as argument to LocalFile.
|
|
196
206
|
|
|
197
|
-
|
|
198
|
-
I created this library for my I Drive project.
|
|
207
|
+
I created this library for my [iDrive](https://github.com/pam-param-pam/I-Drive) project.
|
|
199
208
|
|
|
200
209
|
If you have a different use case scenario, and LocalFile and GenFile are not enough, you can extend BaseFile and everything else should work out of the box.
|
|
201
210
|
|
|
211
|
+
If you extend `BaseFile` keep in mind that `zipFly` attempts to "deepcopy" files. It will successfully
|
|
212
|
+
deepcopy `LocalFile`, so LocalFile instances can be re-used. However, it will completely skip deep-coping any file
|
|
213
|
+
instance that has a generator.
|
|
214
|
+
|
|
215
|
+
### Testing
|
|
202
216
|
|
|
217
|
+
With [pytest](https://docs.pytest.org/en/stable/) and
|
|
218
|
+
[pytest-asyncio](https://pytest-asyncio.readthedocs.io/en/stable/) installed,
|
|
219
|
+
call `pytest` from the top-level directory (same as this `README.md`)
|
|
220
|
+
to run tests.
|
|
221
|
+
The 4GB tests are slow. If your machine has enough memory (~4GB free) and a fast
|
|
222
|
+
disk/SSD, [pytest-xdist](https://pytest-xdist.readthedocs.io/en/stable/)
|
|
223
|
+
can speed things up by running tests in parallel.
|
|
224
|
+
Use it by calling `pytest -n auto`.
|
|
203
225
|
|
|
204
226
|
### PS
|
|
205
227
|
|
|
@@ -28,6 +28,7 @@ Generating ZIPs on-demand in a web server is a typical use case for zipFly.**
|
|
|
28
28
|
- Only 1 dependency
|
|
29
29
|
- Automatic detection and changing of duplicate names
|
|
30
30
|
- `Zip64` format compatible files
|
|
31
|
+
- **21.37%** test coverage
|
|
31
32
|
|
|
32
33
|
|
|
33
34
|
This library is based upon [this library](https://github.com/kbbdy/zipstream) <sub>_(this library was a piece of work...)_<sub>
|
|
@@ -55,8 +56,7 @@ with open("out/file.zip", 'wb') as f_out:
|
|
|
55
56
|
for chunk in zipFly.stream():
|
|
56
57
|
f_out.write(chunk)
|
|
57
58
|
```
|
|
58
|
-
|
|
59
|
-
> You mustn't reuse `ZipFly` instances. They should be re-created everytime you call `stream()` or `async_stream()`
|
|
59
|
+
|
|
60
60
|
|
|
61
61
|
### Supports dynamically created files
|
|
62
62
|
```py
|
|
@@ -70,7 +70,7 @@ def file_generator():
|
|
|
70
70
|
|
|
71
71
|
# size is optional, it allows to calculate the total size of the archive before any data is generated
|
|
72
72
|
# modification_time in epoch time, defaults to time.time()
|
|
73
|
-
file1 = GenFile(name="file.txt", generator=
|
|
73
|
+
file1 = GenFile(name="file.txt", generator=file_generator(), modification_time=time.time(), size=size, compression_method=consts.COMPRESSION_DEFLATE)
|
|
74
74
|
file2 = LocalFile(file_path='files/as61aade2ebfd.mp4', compression_method=consts.NO_COMPRESSION) # or consts.COMPRESSION_DEFLATE
|
|
75
75
|
|
|
76
76
|
files = [file1, file2]
|
|
@@ -90,7 +90,7 @@ for chunk in zipFly.stream():
|
|
|
90
90
|
```py
|
|
91
91
|
import asyncio
|
|
92
92
|
from zipFly import ZipFly, LocalFile, consts, GenFile
|
|
93
|
-
file1 = GenFile(name="file.txt", generator=
|
|
93
|
+
file1 = GenFile(name="file.txt", generator=file_generator())
|
|
94
94
|
file2 = LocalFile(file_path='public/2ae9dcd01a3aa.mp4', name="files/my_file2.mp4")
|
|
95
95
|
|
|
96
96
|
files = [file1, file2]
|
|
@@ -117,12 +117,10 @@ If you use `LocalFile` then it's not a problem as it can very fast go tru the en
|
|
|
117
117
|
|
|
118
118
|
```py
|
|
119
119
|
|
|
120
|
-
file1 = GenFile(name="file.txt", generator=
|
|
120
|
+
file1 = GenFile(name="file.txt", generator=file_generator(), crc=crc)
|
|
121
121
|
file2 = LocalFile(file_path='public/2ae9dcd01a3aa.mp4', name="files/my_file2.mp4")
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
zipFly1 = ZipFly(files)
|
|
125
|
-
zipFly2 = ZipFly(files)
|
|
122
|
+
files1 = [file1, file2]
|
|
123
|
+
zipFly1 = ZipFly(files1)
|
|
126
124
|
|
|
127
125
|
# Simulating pause/resume
|
|
128
126
|
STOP_BYTE = 300
|
|
@@ -137,7 +135,13 @@ async def async_save_pause():
|
|
|
137
135
|
byte_offset += len(chunk)
|
|
138
136
|
if byte_offset >= STOP_BYTE:
|
|
139
137
|
break
|
|
140
|
-
|
|
138
|
+
|
|
139
|
+
# Later...
|
|
140
|
+
file3 = GenFile(name="file.txt", generator=file_generator(), crc=crc)
|
|
141
|
+
file4 = LocalFile(file_path='public/2ae9dcd01a3aa.mp4', name="files/my_file2.mp4")
|
|
142
|
+
files2 = [file3, file4]
|
|
143
|
+
zipFly2 = ZipFly(files2)
|
|
144
|
+
|
|
141
145
|
async def async_save_resume():
|
|
142
146
|
with open("out/file.zip", 'ab') as f_out: # Append mode
|
|
143
147
|
async for chunk in zipFly2.async_stream(byte_offset=STOP_BYTE):
|
|
@@ -154,16 +158,33 @@ If resume ZipFly instance has diffrent files than pause ZipFly instance there wi
|
|
|
154
158
|
> [!NOTE]
|
|
155
159
|
> For byte offset mode to work you must use `const.NO_COMPRESSION` and specify `crc` for `GenFile`
|
|
156
160
|
|
|
161
|
+
> [!CAUTION]
|
|
162
|
+
> You mustn't reuse `ZipFly` instances. They should be re-created everytime you call `stream()` or `async_stream()`
|
|
163
|
+
|
|
164
|
+
> [!CAUTION]
|
|
165
|
+
> You mustn't reuse `GenFile` instances.
|
|
157
166
|
|
|
158
167
|
### Other
|
|
159
168
|
Python is not optimized for async I/O operations, thus to speed up the async streaming the chunk_size is changed to 4MB, you can override this by passing chunksize as argument to LocalFile.
|
|
160
169
|
|
|
161
|
-
|
|
162
|
-
I created this library for my I Drive project.
|
|
170
|
+
I created this library for my [iDrive](https://github.com/pam-param-pam/I-Drive) project.
|
|
163
171
|
|
|
164
172
|
If you have a different use case scenario, and LocalFile and GenFile are not enough, you can extend BaseFile and everything else should work out of the box.
|
|
165
173
|
|
|
174
|
+
If you extend `BaseFile` keep in mind that `zipFly` attempts to "deepcopy" files. It will successfully
|
|
175
|
+
deepcopy `LocalFile`, so LocalFile instances can be re-used. However, it will completely skip deep-coping any file
|
|
176
|
+
instance that has a generator.
|
|
177
|
+
|
|
178
|
+
### Testing
|
|
166
179
|
|
|
180
|
+
With [pytest](https://docs.pytest.org/en/stable/) and
|
|
181
|
+
[pytest-asyncio](https://pytest-asyncio.readthedocs.io/en/stable/) installed,
|
|
182
|
+
call `pytest` from the top-level directory (same as this `README.md`)
|
|
183
|
+
to run tests.
|
|
184
|
+
The 4GB tests are slow. If your machine has enough memory (~4GB free) and a fast
|
|
185
|
+
disk/SSD, [pytest-xdist](https://pytest-xdist.readthedocs.io/en/stable/)
|
|
186
|
+
can speed things up by running tests in parallel.
|
|
187
|
+
Use it by calling `pytest -n auto`.
|
|
167
188
|
|
|
168
189
|
### PS
|
|
169
190
|
|
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
# pyproject.toml
|
|
2
|
+
|
|
3
|
+
[build-system]
|
|
4
|
+
requires = ["setuptools>=61.0.0", "wheel"]
|
|
5
|
+
build-backend = "setuptools.build_meta"
|
|
6
|
+
|
|
7
|
+
[project]
|
|
8
|
+
name = "zipFly64"
|
|
9
|
+
version = "1.2.3"
|
|
10
|
+
description = "Stream zip64 archives on the fly."
|
|
11
|
+
readme = "README.md"
|
|
12
|
+
authors = [{ name = "Pamparampampam" }]
|
|
13
|
+
license = { file = "LICENSE" }
|
|
14
|
+
classifiers = [
|
|
15
|
+
"License :: OSI Approved :: MIT License",
|
|
16
|
+
"Programming Language :: Python",
|
|
17
|
+
"Programming Language :: Python :: 3",
|
|
18
|
+
]
|
|
19
|
+
keywords = ["zip64", "zip", "streaming", "zipfly", "zipfly64"]
|
|
20
|
+
dependencies = [
|
|
21
|
+
"aiofiles >= 24.1.0",
|
|
22
|
+
]
|
|
23
|
+
requires-python = ">=3.7"
|
|
24
|
+
|
|
25
|
+
[project.urls]
|
|
26
|
+
Github = "https://github.com/pam-param-pam/ZipFly"
|
|
27
|
+
|
|
28
|
+
[tool.pytest.ini_options]
|
|
29
|
+
minversion = "6.0"
|
|
30
|
+
addopts = "-ra -q"
|
|
31
|
+
testpaths = [
|
|
32
|
+
"tests",
|
|
33
|
+
]
|
|
34
|
+
markers = [
|
|
35
|
+
"slow: marks tests as slow (deselect with '-m \"not slow\"')",
|
|
36
|
+
"asyncio",
|
|
37
|
+
]
|
|
38
|
+
|
|
39
|
+
[tool.ruff]
|
|
40
|
+
# Exclude a variety of commonly ignored directories.
|
|
41
|
+
exclude = [
|
|
42
|
+
".bzr",
|
|
43
|
+
".direnv",
|
|
44
|
+
".eggs",
|
|
45
|
+
".git",
|
|
46
|
+
".git-rewrite",
|
|
47
|
+
".hg",
|
|
48
|
+
".ipynb_checkpoints",
|
|
49
|
+
".mypy_cache",
|
|
50
|
+
".nox",
|
|
51
|
+
".pants.d",
|
|
52
|
+
".pyenv",
|
|
53
|
+
".pytest_cache",
|
|
54
|
+
".pytype",
|
|
55
|
+
".ruff_cache",
|
|
56
|
+
".svn",
|
|
57
|
+
".tox",
|
|
58
|
+
".venv",
|
|
59
|
+
".vscode",
|
|
60
|
+
"__pypackages__",
|
|
61
|
+
"_build",
|
|
62
|
+
"buck-out",
|
|
63
|
+
"build",
|
|
64
|
+
"dist",
|
|
65
|
+
"node_modules",
|
|
66
|
+
"site-packages",
|
|
67
|
+
"venv",
|
|
68
|
+
]
|
|
69
|
+
|
|
70
|
+
line-length = 190
|
|
71
|
+
indent-width = 4
|
|
72
|
+
|
|
73
|
+
# Assume Python 3.9
|
|
74
|
+
target-version = "py39"
|
|
75
|
+
|
|
76
|
+
[tool.ruff.lint]
|
|
77
|
+
# Enable Pyflakes (`F`) and a subset of the pycodestyle (`E`) codes by default.
|
|
78
|
+
# Unlike Flake8, Ruff doesn't enable pycodestyle warnings (`W`) or
|
|
79
|
+
# McCabe complexity (`C901`) by default.
|
|
80
|
+
select = ["E4", "E7", "E9", "F" ,"ALL"]
|
|
81
|
+
ignore = []
|
|
82
|
+
# Allow fix for all enabled rules (when `--fix`) is provided.
|
|
83
|
+
fixable = ["ALL"]
|
|
84
|
+
# Disable fix for unused imports (`F401`).
|
|
85
|
+
unfixable = ["F401"]
|
|
86
|
+
|
|
87
|
+
# Allow unused variables when underscore-prefixed.
|
|
88
|
+
dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$"
|
|
89
|
+
|
|
90
|
+
[tool.ruff.format]
|
|
91
|
+
# Like Black, use double quotes for strings.
|
|
92
|
+
quote-style = "double"
|
|
93
|
+
|
|
94
|
+
# Like Black, indent with spaces, rather than tabs.
|
|
95
|
+
indent-style = "space"
|
|
96
|
+
|
|
97
|
+
# Like Black, respect magic trailing commas.
|
|
98
|
+
skip-magic-trailing-comma = false
|
|
99
|
+
|
|
100
|
+
# Like Black, automatically detect the appropriate line ending.
|
|
101
|
+
line-ending = "auto"
|
|
102
|
+
|
|
103
|
+
# Enable auto-formatting of code examples in docstrings. Markdown,
|
|
104
|
+
# reStructuredText code/literal blocks and doctests are all supported.
|
|
105
|
+
#
|
|
106
|
+
# This is currently disabled by default, but it is planned for this
|
|
107
|
+
# to be opt-out in the future.
|
|
108
|
+
docstring-code-format = false
|
|
109
|
+
|
|
110
|
+
# Set the line length limit used when formatting code snippets in
|
|
111
|
+
# docstrings.
|
|
112
|
+
#
|
|
113
|
+
# This only has an effect when the `docstring-code-format` setting is
|
|
114
|
+
# enabled.
|
|
115
|
+
docstring-code-line-length = "dynamic"
|
|
116
|
+
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import time
|
|
2
2
|
from abc import ABC, abstractmethod
|
|
3
|
-
from
|
|
3
|
+
from collections.abc import AsyncGenerator, Generator
|
|
4
4
|
|
|
5
5
|
from . import consts
|
|
6
6
|
from .Compressor import Compressor
|
|
@@ -18,10 +18,13 @@ class BaseFile(ABC):
|
|
|
18
18
|
def __str__(self):
|
|
19
19
|
return f"FILE[{self.name}]"
|
|
20
20
|
|
|
21
|
-
def
|
|
21
|
+
def _check_if_used(self):
|
|
22
22
|
if self.__used:
|
|
23
|
-
raise
|
|
23
|
+
raise RuntimeError("Do not re-use file instances. Recreate it.")
|
|
24
24
|
self.__used = True
|
|
25
|
+
|
|
26
|
+
def generate_processed_file_data(self) -> Generator[bytes, None, None]:
|
|
27
|
+
self._check_if_used()
|
|
25
28
|
compressor = Compressor(self)
|
|
26
29
|
|
|
27
30
|
"""
|
|
@@ -31,15 +34,12 @@ class BaseFile(ABC):
|
|
|
31
34
|
chunk = compressor.process(chunk)
|
|
32
35
|
if len(chunk) > 0:
|
|
33
36
|
yield chunk
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
+
chunk = compressor.tail()
|
|
38
|
+
if len(chunk) > 0:
|
|
39
|
+
yield chunk
|
|
37
40
|
|
|
38
41
|
async def async_generate_processed_file_data(self) -> AsyncGenerator[bytes, None]:
|
|
39
|
-
|
|
40
|
-
raise KeyError("ERROR: This file has already been used for streaming")
|
|
41
|
-
self.__used = True
|
|
42
|
-
|
|
42
|
+
self._check_if_used()
|
|
43
43
|
compressor = Compressor(self)
|
|
44
44
|
|
|
45
45
|
"""
|
|
@@ -49,9 +49,9 @@ class BaseFile(ABC):
|
|
|
49
49
|
chunk = compressor.process(chunk)
|
|
50
50
|
if len(chunk) > 0:
|
|
51
51
|
yield chunk
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
52
|
+
chunk = compressor.tail()
|
|
53
|
+
if len(chunk) > 0:
|
|
54
|
+
yield chunk
|
|
55
55
|
|
|
56
56
|
def get_mod_time(self) -> int:
|
|
57
57
|
# Extract hours, minutes, and seconds from the modification time
|
|
@@ -1,21 +1,22 @@
|
|
|
1
1
|
import time
|
|
2
|
-
from typing import Generator, AsyncGenerator,
|
|
2
|
+
from typing import Generator, AsyncGenerator, Union
|
|
3
|
+
|
|
4
|
+
from . import consts
|
|
3
5
|
from .BaseFile import BaseFile
|
|
4
6
|
|
|
5
7
|
|
|
6
8
|
class GenFile(BaseFile):
|
|
7
|
-
|
|
8
|
-
def __init__(self, name: str, generator:
|
|
9
|
+
"""DO NOT REUSE GenFile instances!"""
|
|
10
|
+
def __init__(self, name: str, generator: Union[Generator[bytes, None, None], AsyncGenerator[bytes, None]], compression_method: int = consts.NO_COMPRESSION, modification_time: float = None, size: int = None, crc: int = None):
|
|
9
11
|
super().__init__(compression_method)
|
|
10
12
|
self._name = name
|
|
11
|
-
self.
|
|
13
|
+
self._generator = generator
|
|
12
14
|
self._size = size
|
|
13
|
-
self.
|
|
15
|
+
self._overriden_crc = crc # used in byte offset mode
|
|
14
16
|
self._modification_time = modification_time if modification_time else time.time()
|
|
15
17
|
|
|
16
18
|
def _get_generator(self):
|
|
17
|
-
|
|
18
|
-
return self._generator_func()
|
|
19
|
+
return self._generator
|
|
19
20
|
|
|
20
21
|
def _generate_file_data(self) -> Generator[bytes, None, None]:
|
|
21
22
|
generator = self._get_generator()
|
|
@@ -40,7 +41,7 @@ class GenFile(BaseFile):
|
|
|
40
41
|
def size(self) -> int:
|
|
41
42
|
if self._size is not None:
|
|
42
43
|
return self._size
|
|
43
|
-
raise
|
|
44
|
+
raise RuntimeError("Archive size not known before streaming. Probably GenFile() is missing size attribute.")
|
|
44
45
|
|
|
45
46
|
@property
|
|
46
47
|
def modification_time(self) -> float:
|
|
@@ -50,6 +51,6 @@ class GenFile(BaseFile):
|
|
|
50
51
|
self._name = new_name
|
|
51
52
|
|
|
52
53
|
def calculate_crc(self) -> int:
|
|
53
|
-
if self.
|
|
54
|
-
return self.
|
|
54
|
+
if self._overriden_crc:
|
|
55
|
+
return self._overriden_crc
|
|
55
56
|
raise ValueError("Crc must be explicitly set to allow for byte offset streaming!")
|
|
@@ -1,20 +1,23 @@
|
|
|
1
1
|
import os
|
|
2
|
-
import time
|
|
3
2
|
import zlib
|
|
4
|
-
from
|
|
5
|
-
from
|
|
6
|
-
import binascii
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import Generator, AsyncGenerator, Union
|
|
7
5
|
|
|
8
6
|
import aiofiles
|
|
9
7
|
|
|
10
|
-
|
|
8
|
+
from . import consts
|
|
9
|
+
from .BaseFile import BaseFile
|
|
11
10
|
|
|
12
|
-
|
|
13
|
-
|
|
11
|
+
|
|
12
|
+
class LocalFile(BaseFile):
|
|
13
|
+
def __init__(self, file_path: Union[str, Path], name: str = None, compression_method: int = consts.NO_COMPRESSION, chunk_size=None):
|
|
14
|
+
file_path = Path(file_path)
|
|
15
|
+
if not file_path.is_file():
|
|
14
16
|
raise ValueError(f"{file_path} is not a correct file path.")
|
|
15
|
-
|
|
17
|
+
|
|
18
|
+
self._file_path = str(file_path)
|
|
16
19
|
self.chunk_size = chunk_size
|
|
17
|
-
self._name = name if name else
|
|
20
|
+
self._name = name if name else self._file_path
|
|
18
21
|
super().__init__(compression_method)
|
|
19
22
|
|
|
20
23
|
async def _async_generate_file_data(self) -> AsyncGenerator[bytes, None]:
|
|
@@ -1,6 +1,3 @@
|
|
|
1
|
-
import copy
|
|
2
|
-
import types
|
|
3
|
-
from collections import defaultdict
|
|
4
1
|
from typing import List
|
|
5
2
|
|
|
6
3
|
from . import consts
|
|
@@ -63,63 +60,10 @@ I hope, that i made it a bit more clear to anyone reading, including future me.
|
|
|
63
60
|
"""
|
|
64
61
|
|
|
65
62
|
|
|
66
|
-
def process_file_names(files) -> list[BaseFile]:
|
|
67
|
-
name_counts = defaultdict(int)
|
|
68
|
-
for file in files:
|
|
69
|
-
# Split the name into base and extension
|
|
70
|
-
base, ext = file.name.rsplit('.', 1) if '.' in file.name else (file.name, '')
|
|
71
|
-
|
|
72
|
-
# Increment the count for this base name
|
|
73
|
-
name_counts[base] += 1
|
|
74
|
-
|
|
75
|
-
# Append the count to the base name if it's not the first occurrence
|
|
76
|
-
if name_counts[base] > 1:
|
|
77
|
-
new_base = f"{base} ({name_counts[base] - 1})"
|
|
78
|
-
else:
|
|
79
|
-
new_base = base
|
|
80
|
-
|
|
81
|
-
# Reassemble the filename
|
|
82
|
-
file.set_file_name(f"{new_base}.{ext}" if ext else new_base)
|
|
83
|
-
|
|
84
|
-
return files
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
def deepcopy_skip_generators(obj_list):
|
|
88
|
-
"""
|
|
89
|
-
Deep copies a list of objects while skipping generator attributes.
|
|
90
|
-
"""
|
|
91
|
-
|
|
92
|
-
def custom_copy(obj, memo=None):
|
|
93
|
-
if memo is None:
|
|
94
|
-
memo = {}
|
|
95
|
-
|
|
96
|
-
if isinstance(obj, dict):
|
|
97
|
-
return {k: custom_copy(v, memo) for k, v in obj.items()}
|
|
98
|
-
elif isinstance(obj, (list, tuple, set)):
|
|
99
|
-
return type(obj)(custom_copy(item, memo) for item in obj)
|
|
100
|
-
elif isinstance(obj, (int, float, str, bool, type(None))): # Immutable types
|
|
101
|
-
return obj
|
|
102
|
-
elif isinstance(obj, (types.GeneratorType, types.AsyncGeneratorType)): # Skip generators
|
|
103
|
-
return obj
|
|
104
|
-
elif hasattr(obj, '__dict__'): # Handle custom objects
|
|
105
|
-
new_obj = copy.copy(obj) # Shallow copy first
|
|
106
|
-
for key, value in obj.__dict__.items():
|
|
107
|
-
setattr(new_obj, key, custom_copy(value, memo))
|
|
108
|
-
return new_obj
|
|
109
|
-
else:
|
|
110
|
-
return copy.deepcopy(obj, memo) # Default deep copy
|
|
111
|
-
|
|
112
|
-
return [custom_copy(obj) for obj in obj_list]
|
|
113
|
-
|
|
114
|
-
|
|
115
63
|
class ZipBase:
|
|
116
|
-
|
|
117
64
|
def __init__(self, files: List[BaseFile]):
|
|
118
65
|
self.__version_to_extract = 45
|
|
119
|
-
|
|
120
|
-
# process file names to make sure there are no duplicates
|
|
121
|
-
processed_files = process_file_names(deepcopy_skip_generators(files))
|
|
122
|
-
self.files = processed_files
|
|
66
|
+
self.files = files
|
|
123
67
|
|
|
124
68
|
self.__offset = 0 # Tracks the current offset within the ZIP archive
|
|
125
69
|
self._cdir_size = 0
|