zipFly64 1.2.0__tar.gz → 1.2.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.2
1
+ Metadata-Version: 2.4
2
2
  Name: zipFly64
3
- Version: 1.2.0
3
+ Version: 1.2.3
4
4
  Summary: Stream zip64 archives on the fly.
5
5
  Author: Pamparampampam
6
6
  License: MIT License
@@ -33,6 +33,7 @@ Requires-Python: >=3.7
33
33
  Description-Content-Type: text/markdown
34
34
  License-File: LICENSE
35
35
  Requires-Dist: aiofiles>=24.1.0
36
+ Dynamic: license-file
36
37
 
37
38
  # ZipFly
38
39
 
@@ -64,6 +65,7 @@ Generating ZIPs on-demand in a web server is a typical use case for zipFly.**
64
65
  - Only 1 dependency
65
66
  - Automatic detection and changing of duplicate names
66
67
  - `Zip64` format compatible files
68
+ - **21.37%** test coverage
67
69
 
68
70
 
69
71
  This library is based upon [this library](https://github.com/kbbdy/zipstream) <sub>_(this library was a piece of work...)_<sub>
@@ -91,8 +93,7 @@ with open("out/file.zip", 'wb') as f_out:
91
93
  for chunk in zipFly.stream():
92
94
  f_out.write(chunk)
93
95
  ```
94
- > [!CAUTION]
95
- > You mustn't reuse `ZipFly` instances. They should be re-created everytime you call `stream()` or `async_stream()`
96
+
96
97
 
97
98
  ### Supports dynamically created files
98
99
  ```py
@@ -106,7 +107,7 @@ def file_generator():
106
107
 
107
108
  # size is optional, it allows to calculate the total size of the archive before any data is generated
108
109
  # modification_time in epoch time, defaults to time.time()
109
- file1 = GenFile(name="file.txt", generator=lambda: file_generator(), modification_time=time.time(), size=size, compression_method=consts.COMPRESSION_DEFLATE)
110
+ file1 = GenFile(name="file.txt", generator=file_generator(), modification_time=time.time(), size=size, compression_method=consts.COMPRESSION_DEFLATE)
110
111
  file2 = LocalFile(file_path='files/as61aade2ebfd.mp4', compression_method=consts.NO_COMPRESSION) # or consts.COMPRESSION_DEFLATE
111
112
 
112
113
  files = [file1, file2]
@@ -126,7 +127,7 @@ for chunk in zipFly.stream():
126
127
  ```py
127
128
  import asyncio
128
129
  from zipFly import ZipFly, LocalFile, consts, GenFile
129
- file1 = GenFile(name="file.txt", generator=lambda: file_generator())
130
+ file1 = GenFile(name="file.txt", generator=file_generator())
130
131
  file2 = LocalFile(file_path='public/2ae9dcd01a3aa.mp4', name="files/my_file2.mp4")
131
132
 
132
133
  files = [file1, file2]
@@ -153,12 +154,10 @@ If you use `LocalFile` then it's not a problem as it can very fast go tru the en
153
154
 
154
155
  ```py
155
156
 
156
- file1 = GenFile(name="file.txt", generator=lambda: file_generator(), crc=crc)
157
+ file1 = GenFile(name="file.txt", generator=file_generator(), crc=crc)
157
158
  file2 = LocalFile(file_path='public/2ae9dcd01a3aa.mp4', name="files/my_file2.mp4")
158
- files = [file1, file2]
159
-
160
- zipFly1 = ZipFly(files)
161
- zipFly2 = ZipFly(files)
159
+ files1 = [file1, file2]
160
+ zipFly1 = ZipFly(files1)
162
161
 
163
162
  # Simulating pause/resume
164
163
  STOP_BYTE = 300
@@ -173,7 +172,13 @@ async def async_save_pause():
173
172
  byte_offset += len(chunk)
174
173
  if byte_offset >= STOP_BYTE:
175
174
  break
176
-
175
+
176
+ # Later...
177
+ file3 = GenFile(name="file.txt", generator=file_generator(), crc=crc)
178
+ file4 = LocalFile(file_path='public/2ae9dcd01a3aa.mp4', name="files/my_file2.mp4")
179
+ files2 = [file3, file4]
180
+ zipFly2 = ZipFly(files2)
181
+
177
182
  async def async_save_resume():
178
183
  with open("out/file.zip", 'ab') as f_out: # Append mode
179
184
  async for chunk in zipFly2.async_stream(byte_offset=STOP_BYTE):
@@ -190,16 +195,33 @@ If resume ZipFly instance has diffrent files than pause ZipFly instance there wi
190
195
  > [!NOTE]
191
196
  > For byte offset mode to work you must use `const.NO_COMPRESSION` and specify `crc` for `GenFile`
192
197
 
198
+ > [!CAUTION]
199
+ > You mustn't reuse `ZipFly` instances. They should be re-created everytime you call `stream()` or `async_stream()`
200
+
201
+ > [!CAUTION]
202
+ > You mustn't reuse `GenFile` instances.
193
203
 
194
204
  ### Other
195
205
  Python is not optimized for async I/O operations, thus to speed up the async streaming the chunk_size is changed to 4MB, you can override this by passing chunksize as argument to LocalFile.
196
206
 
197
-
198
- I created this library for my I Drive project.
207
+ I created this library for my [iDrive](https://github.com/pam-param-pam/I-Drive) project.
199
208
 
200
209
  If you have a different use case scenario, and LocalFile and GenFile are not enough, you can extend BaseFile and everything else should work out of the box.
201
210
 
211
+ If you extend `BaseFile` keep in mind that `zipFly` attempts to "deepcopy" files. It will successfully
212
+ deepcopy `LocalFile`, so LocalFile instances can be re-used. However, it will completely skip deep-coping any file
213
+ instance that has a generator.
214
+
215
+ ### Testing
202
216
 
217
+ With [pytest](https://docs.pytest.org/en/stable/) and
218
+ [pytest-asyncio](https://pytest-asyncio.readthedocs.io/en/stable/) installed,
219
+ call `pytest` from the top-level directory (same as this `README.md`)
220
+ to run tests.
221
+ The 4GB tests are slow. If your machine has enough memory (~4GB free) and a fast
222
+ disk/SSD, [pytest-xdist](https://pytest-xdist.readthedocs.io/en/stable/)
223
+ can speed things up by running tests in parallel.
224
+ Use it by calling `pytest -n auto`.
203
225
 
204
226
  ### PS
205
227
 
@@ -28,6 +28,7 @@ Generating ZIPs on-demand in a web server is a typical use case for zipFly.**
28
28
  - Only 1 dependency
29
29
  - Automatic detection and changing of duplicate names
30
30
  - `Zip64` format compatible files
31
+ - **21.37%** test coverage
31
32
 
32
33
 
33
34
  This library is based upon [this library](https://github.com/kbbdy/zipstream) <sub>_(this library was a piece of work...)_<sub>
@@ -55,8 +56,7 @@ with open("out/file.zip", 'wb') as f_out:
55
56
  for chunk in zipFly.stream():
56
57
  f_out.write(chunk)
57
58
  ```
58
- > [!CAUTION]
59
- > You mustn't reuse `ZipFly` instances. They should be re-created everytime you call `stream()` or `async_stream()`
59
+
60
60
 
61
61
  ### Supports dynamically created files
62
62
  ```py
@@ -70,7 +70,7 @@ def file_generator():
70
70
 
71
71
  # size is optional, it allows to calculate the total size of the archive before any data is generated
72
72
  # modification_time in epoch time, defaults to time.time()
73
- file1 = GenFile(name="file.txt", generator=lambda: file_generator(), modification_time=time.time(), size=size, compression_method=consts.COMPRESSION_DEFLATE)
73
+ file1 = GenFile(name="file.txt", generator=file_generator(), modification_time=time.time(), size=size, compression_method=consts.COMPRESSION_DEFLATE)
74
74
  file2 = LocalFile(file_path='files/as61aade2ebfd.mp4', compression_method=consts.NO_COMPRESSION) # or consts.COMPRESSION_DEFLATE
75
75
 
76
76
  files = [file1, file2]
@@ -90,7 +90,7 @@ for chunk in zipFly.stream():
90
90
  ```py
91
91
  import asyncio
92
92
  from zipFly import ZipFly, LocalFile, consts, GenFile
93
- file1 = GenFile(name="file.txt", generator=lambda: file_generator())
93
+ file1 = GenFile(name="file.txt", generator=file_generator())
94
94
  file2 = LocalFile(file_path='public/2ae9dcd01a3aa.mp4', name="files/my_file2.mp4")
95
95
 
96
96
  files = [file1, file2]
@@ -117,12 +117,10 @@ If you use `LocalFile` then it's not a problem as it can very fast go tru the en
117
117
 
118
118
  ```py
119
119
 
120
- file1 = GenFile(name="file.txt", generator=lambda: file_generator(), crc=crc)
120
+ file1 = GenFile(name="file.txt", generator=file_generator(), crc=crc)
121
121
  file2 = LocalFile(file_path='public/2ae9dcd01a3aa.mp4', name="files/my_file2.mp4")
122
- files = [file1, file2]
123
-
124
- zipFly1 = ZipFly(files)
125
- zipFly2 = ZipFly(files)
122
+ files1 = [file1, file2]
123
+ zipFly1 = ZipFly(files1)
126
124
 
127
125
  # Simulating pause/resume
128
126
  STOP_BYTE = 300
@@ -137,7 +135,13 @@ async def async_save_pause():
137
135
  byte_offset += len(chunk)
138
136
  if byte_offset >= STOP_BYTE:
139
137
  break
140
-
138
+
139
+ # Later...
140
+ file3 = GenFile(name="file.txt", generator=file_generator(), crc=crc)
141
+ file4 = LocalFile(file_path='public/2ae9dcd01a3aa.mp4', name="files/my_file2.mp4")
142
+ files2 = [file3, file4]
143
+ zipFly2 = ZipFly(files2)
144
+
141
145
  async def async_save_resume():
142
146
  with open("out/file.zip", 'ab') as f_out: # Append mode
143
147
  async for chunk in zipFly2.async_stream(byte_offset=STOP_BYTE):
@@ -154,16 +158,33 @@ If resume ZipFly instance has diffrent files than pause ZipFly instance there wi
154
158
  > [!NOTE]
155
159
  > For byte offset mode to work you must use `const.NO_COMPRESSION` and specify `crc` for `GenFile`
156
160
 
161
+ > [!CAUTION]
162
+ > You mustn't reuse `ZipFly` instances. They should be re-created everytime you call `stream()` or `async_stream()`
163
+
164
+ > [!CAUTION]
165
+ > You mustn't reuse `GenFile` instances.
157
166
 
158
167
  ### Other
159
168
  Python is not optimized for async I/O operations, thus to speed up the async streaming the chunk_size is changed to 4MB, you can override this by passing chunksize as argument to LocalFile.
160
169
 
161
-
162
- I created this library for my I Drive project.
170
+ I created this library for my [iDrive](https://github.com/pam-param-pam/I-Drive) project.
163
171
 
164
172
  If you have a different use case scenario, and LocalFile and GenFile are not enough, you can extend BaseFile and everything else should work out of the box.
165
173
 
174
+ If you extend `BaseFile` keep in mind that `zipFly` attempts to "deepcopy" files. It will successfully
175
+ deepcopy `LocalFile`, so LocalFile instances can be re-used. However, it will completely skip deep-coping any file
176
+ instance that has a generator.
177
+
178
+ ### Testing
166
179
 
180
+ With [pytest](https://docs.pytest.org/en/stable/) and
181
+ [pytest-asyncio](https://pytest-asyncio.readthedocs.io/en/stable/) installed,
182
+ call `pytest` from the top-level directory (same as this `README.md`)
183
+ to run tests.
184
+ The 4GB tests are slow. If your machine has enough memory (~4GB free) and a fast
185
+ disk/SSD, [pytest-xdist](https://pytest-xdist.readthedocs.io/en/stable/)
186
+ can speed things up by running tests in parallel.
187
+ Use it by calling `pytest -n auto`.
167
188
 
168
189
  ### PS
169
190
 
@@ -0,0 +1,116 @@
1
+ # pyproject.toml
2
+
3
+ [build-system]
4
+ requires = ["setuptools>=61.0.0", "wheel"]
5
+ build-backend = "setuptools.build_meta"
6
+
7
+ [project]
8
+ name = "zipFly64"
9
+ version = "1.2.3"
10
+ description = "Stream zip64 archives on the fly."
11
+ readme = "README.md"
12
+ authors = [{ name = "Pamparampampam" }]
13
+ license = { file = "LICENSE" }
14
+ classifiers = [
15
+ "License :: OSI Approved :: MIT License",
16
+ "Programming Language :: Python",
17
+ "Programming Language :: Python :: 3",
18
+ ]
19
+ keywords = ["zip64", "zip", "streaming", "zipfly", "zipfly64"]
20
+ dependencies = [
21
+ "aiofiles >= 24.1.0",
22
+ ]
23
+ requires-python = ">=3.7"
24
+
25
+ [project.urls]
26
+ Github = "https://github.com/pam-param-pam/ZipFly"
27
+
28
+ [tool.pytest.ini_options]
29
+ minversion = "6.0"
30
+ addopts = "-ra -q"
31
+ testpaths = [
32
+ "tests",
33
+ ]
34
+ markers = [
35
+ "slow: marks tests as slow (deselect with '-m \"not slow\"')",
36
+ "asyncio",
37
+ ]
38
+
39
+ [tool.ruff]
40
+ # Exclude a variety of commonly ignored directories.
41
+ exclude = [
42
+ ".bzr",
43
+ ".direnv",
44
+ ".eggs",
45
+ ".git",
46
+ ".git-rewrite",
47
+ ".hg",
48
+ ".ipynb_checkpoints",
49
+ ".mypy_cache",
50
+ ".nox",
51
+ ".pants.d",
52
+ ".pyenv",
53
+ ".pytest_cache",
54
+ ".pytype",
55
+ ".ruff_cache",
56
+ ".svn",
57
+ ".tox",
58
+ ".venv",
59
+ ".vscode",
60
+ "__pypackages__",
61
+ "_build",
62
+ "buck-out",
63
+ "build",
64
+ "dist",
65
+ "node_modules",
66
+ "site-packages",
67
+ "venv",
68
+ ]
69
+
70
+ line-length = 190
71
+ indent-width = 4
72
+
73
+ # Assume Python 3.9
74
+ target-version = "py39"
75
+
76
+ [tool.ruff.lint]
77
+ # Enable Pyflakes (`F`) and a subset of the pycodestyle (`E`) codes by default.
78
+ # Unlike Flake8, Ruff doesn't enable pycodestyle warnings (`W`) or
79
+ # McCabe complexity (`C901`) by default.
80
+ select = ["E4", "E7", "E9", "F" ,"ALL"]
81
+ ignore = []
82
+ # Allow fix for all enabled rules (when `--fix`) is provided.
83
+ fixable = ["ALL"]
84
+ # Disable fix for unused imports (`F401`).
85
+ unfixable = ["F401"]
86
+
87
+ # Allow unused variables when underscore-prefixed.
88
+ dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$"
89
+
90
+ [tool.ruff.format]
91
+ # Like Black, use double quotes for strings.
92
+ quote-style = "double"
93
+
94
+ # Like Black, indent with spaces, rather than tabs.
95
+ indent-style = "space"
96
+
97
+ # Like Black, respect magic trailing commas.
98
+ skip-magic-trailing-comma = false
99
+
100
+ # Like Black, automatically detect the appropriate line ending.
101
+ line-ending = "auto"
102
+
103
+ # Enable auto-formatting of code examples in docstrings. Markdown,
104
+ # reStructuredText code/literal blocks and doctests are all supported.
105
+ #
106
+ # This is currently disabled by default, but it is planned for this
107
+ # to be opt-out in the future.
108
+ docstring-code-format = false
109
+
110
+ # Set the line length limit used when formatting code snippets in
111
+ # docstrings.
112
+ #
113
+ # This only has an effect when the `docstring-code-format` setting is
114
+ # enabled.
115
+ docstring-code-line-length = "dynamic"
116
+
@@ -1,6 +1,6 @@
1
1
  import time
2
2
  from abc import ABC, abstractmethod
3
- from typing import Generator, AsyncGenerator
3
+ from collections.abc import AsyncGenerator, Generator
4
4
 
5
5
  from . import consts
6
6
  from .Compressor import Compressor
@@ -18,10 +18,13 @@ class BaseFile(ABC):
18
18
  def __str__(self):
19
19
  return f"FILE[{self.name}]"
20
20
 
21
- def generate_processed_file_data(self) -> Generator[bytes, None, None]:
21
+ def _check_if_used(self):
22
22
  if self.__used:
23
- raise KeyError("ERROR: This file has already been used for streaming")
23
+ raise RuntimeError("Do not re-use file instances. Recreate it.")
24
24
  self.__used = True
25
+
26
+ def generate_processed_file_data(self) -> Generator[bytes, None, None]:
27
+ self._check_if_used()
25
28
  compressor = Compressor(self)
26
29
 
27
30
  """
@@ -31,15 +34,12 @@ class BaseFile(ABC):
31
34
  chunk = compressor.process(chunk)
32
35
  if len(chunk) > 0:
33
36
  yield chunk
34
- chunk = compressor.tail()
35
- if len(chunk) > 0:
36
- yield chunk
37
+ chunk = compressor.tail()
38
+ if len(chunk) > 0:
39
+ yield chunk
37
40
 
38
41
  async def async_generate_processed_file_data(self) -> AsyncGenerator[bytes, None]:
39
- if self.__used:
40
- raise KeyError("ERROR: This file has already been used for streaming")
41
- self.__used = True
42
-
42
+ self._check_if_used()
43
43
  compressor = Compressor(self)
44
44
 
45
45
  """
@@ -49,9 +49,9 @@ class BaseFile(ABC):
49
49
  chunk = compressor.process(chunk)
50
50
  if len(chunk) > 0:
51
51
  yield chunk
52
- chunk = compressor.tail()
53
- if len(chunk) > 0:
54
- yield chunk
52
+ chunk = compressor.tail()
53
+ if len(chunk) > 0:
54
+ yield chunk
55
55
 
56
56
  def get_mod_time(self) -> int:
57
57
  # Extract hours, minutes, and seconds from the modification time
@@ -1,21 +1,22 @@
1
1
  import time
2
- from typing import Generator, AsyncGenerator, Callable
2
+ from typing import Generator, AsyncGenerator, Union
3
+
4
+ from . import consts
3
5
  from .BaseFile import BaseFile
4
6
 
5
7
 
6
8
  class GenFile(BaseFile):
7
-
8
- def __init__(self, name: str, generator: Callable, compression_method: int = None, modification_time: float = None, size: int = None, crc: int = None):
9
+ """DO NOT REUSE GenFile instances!"""
10
+ def __init__(self, name: str, generator: Union[Generator[bytes, None, None], AsyncGenerator[bytes, None]], compression_method: int = consts.NO_COMPRESSION, modification_time: float = None, size: int = None, crc: int = None):
9
11
  super().__init__(compression_method)
10
12
  self._name = name
11
- self._generator_func = generator
13
+ self._generator = generator
12
14
  self._size = size
13
- self._crc = crc
15
+ self._overriden_crc = crc # used in byte offset mode
14
16
  self._modification_time = modification_time if modification_time else time.time()
15
17
 
16
18
  def _get_generator(self):
17
- """Return a new generator instance every time this is called."""
18
- return self._generator_func()
19
+ return self._generator
19
20
 
20
21
  def _generate_file_data(self) -> Generator[bytes, None, None]:
21
22
  generator = self._get_generator()
@@ -40,7 +41,7 @@ class GenFile(BaseFile):
40
41
  def size(self) -> int:
41
42
  if self._size is not None:
42
43
  return self._size
43
- raise ValueError("Archive size not known before streaming. Probably GenFile() is missing size attribute.")
44
+ raise RuntimeError("Archive size not known before streaming. Probably GenFile() is missing size attribute.")
44
45
 
45
46
  @property
46
47
  def modification_time(self) -> float:
@@ -50,6 +51,6 @@ class GenFile(BaseFile):
50
51
  self._name = new_name
51
52
 
52
53
  def calculate_crc(self) -> int:
53
- if self._crc:
54
- return self._crc
54
+ if self._overriden_crc:
55
+ return self._overriden_crc
55
56
  raise ValueError("Crc must be explicitly set to allow for byte offset streaming!")
@@ -1,20 +1,23 @@
1
1
  import os
2
- import time
3
2
  import zlib
4
- from typing import Generator, AsyncGenerator
5
- from .BaseFile import BaseFile
6
- import binascii
3
+ from pathlib import Path
4
+ from typing import Generator, AsyncGenerator, Union
7
5
 
8
6
  import aiofiles
9
7
 
10
- class LocalFile(BaseFile):
8
+ from . import consts
9
+ from .BaseFile import BaseFile
11
10
 
12
- def __init__(self, file_path: str, name: str = None, compression_method: int = None, chunk_size=None):
13
- if not os.path.isfile(file_path):
11
+
12
+ class LocalFile(BaseFile):
13
+ def __init__(self, file_path: Union[str, Path], name: str = None, compression_method: int = consts.NO_COMPRESSION, chunk_size=None):
14
+ file_path = Path(file_path)
15
+ if not file_path.is_file():
14
16
  raise ValueError(f"{file_path} is not a correct file path.")
15
- self._file_path = file_path
17
+
18
+ self._file_path = str(file_path)
16
19
  self.chunk_size = chunk_size
17
- self._name = name if name else file_path
20
+ self._name = name if name else self._file_path
18
21
  super().__init__(compression_method)
19
22
 
20
23
  async def _async_generate_file_data(self) -> AsyncGenerator[bytes, None]:
@@ -1,6 +1,3 @@
1
- import copy
2
- import types
3
- from collections import defaultdict
4
1
  from typing import List
5
2
 
6
3
  from . import consts
@@ -63,63 +60,10 @@ I hope, that i made it a bit more clear to anyone reading, including future me.
63
60
  """
64
61
 
65
62
 
66
- def process_file_names(files) -> list[BaseFile]:
67
- name_counts = defaultdict(int)
68
- for file in files:
69
- # Split the name into base and extension
70
- base, ext = file.name.rsplit('.', 1) if '.' in file.name else (file.name, '')
71
-
72
- # Increment the count for this base name
73
- name_counts[base] += 1
74
-
75
- # Append the count to the base name if it's not the first occurrence
76
- if name_counts[base] > 1:
77
- new_base = f"{base} ({name_counts[base] - 1})"
78
- else:
79
- new_base = base
80
-
81
- # Reassemble the filename
82
- file.set_file_name(f"{new_base}.{ext}" if ext else new_base)
83
-
84
- return files
85
-
86
-
87
- def deepcopy_skip_generators(obj_list):
88
- """
89
- Deep copies a list of objects while skipping generator attributes.
90
- """
91
-
92
- def custom_copy(obj, memo=None):
93
- if memo is None:
94
- memo = {}
95
-
96
- if isinstance(obj, dict):
97
- return {k: custom_copy(v, memo) for k, v in obj.items()}
98
- elif isinstance(obj, (list, tuple, set)):
99
- return type(obj)(custom_copy(item, memo) for item in obj)
100
- elif isinstance(obj, (int, float, str, bool, type(None))): # Immutable types
101
- return obj
102
- elif isinstance(obj, (types.GeneratorType, types.AsyncGeneratorType)): # Skip generators
103
- return obj
104
- elif hasattr(obj, '__dict__'): # Handle custom objects
105
- new_obj = copy.copy(obj) # Shallow copy first
106
- for key, value in obj.__dict__.items():
107
- setattr(new_obj, key, custom_copy(value, memo))
108
- return new_obj
109
- else:
110
- return copy.deepcopy(obj, memo) # Default deep copy
111
-
112
- return [custom_copy(obj) for obj in obj_list]
113
-
114
-
115
63
  class ZipBase:
116
-
117
64
  def __init__(self, files: List[BaseFile]):
118
65
  self.__version_to_extract = 45
119
-
120
- # process file names to make sure there are no duplicates
121
- processed_files = process_file_names(deepcopy_skip_generators(files))
122
- self.files = processed_files
66
+ self.files = files
123
67
 
124
68
  self.__offset = 0 # Tracks the current offset within the ZIP archive
125
69
  self._cdir_size = 0