streamshatter 1.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,3 @@
1
+ __pycache__
2
+ dist
3
+ cache
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Thomas Xin
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,40 @@
1
+ Metadata-Version: 2.4
2
+ Name: streamshatter
3
+ Version: 1.0.0
4
+ Summary: Multiplexed chunked file downloader
5
+ Author-email: Thomas Xin <thomasxin@gmail.com>
6
+ License: MIT
7
+ License-File: LICENSE
8
+ Classifier: Programming Language :: Python :: 3
9
+ Requires-Python: >=3.8
10
+ Requires-Dist: niquests>=3.15.2
11
+ Requires-Dist: urllib3-future>=2.13.908
12
+ Description-Content-Type: text/markdown
13
+
14
+ # StreamShatter
15
+ Originally a very basic script for downloading files from servers with inconsistent connections, this project has been revisited and modernised to use https://github.com/jawah/niquests for multiplexing performance, both for private testing of niquests' stability, and for general improvements in functionality for those who still have use for such a tool.
16
+
17
+ StreamShatter takes advantage of the `Range` HTTP header to dynamically allocate multiple chunks, by starting with one streaming request and gradually bisecting it while bandwidth permits, all without restarting the download. This allows for single, large file downloads from hosts that, whether intentionally or unintentionally, have degraded throughputs. The individual chunks also serve as checkpoints for if/when connections are broken.
18
+
19
+ # Installation
20
+ - Install [python](https://www.python.org) and [pip](https://pip.pypa.io/en/stable/)
21
+ - Install StreamShatter as a package:
22
+ `pip install streamshatter`
23
+
24
+ ## Usage
25
+ ```ini
26
+ usage: streamshatter [-h] [-V] [-c CACHE_FOLDER] [-l LIMIT] url [filename]
27
+
28
+ Multiplexed chunked file downloader
29
+
30
+ positional arguments:
31
+ url Target URL
32
+ filename Output filename
33
+
34
+ options:
35
+ -h, --help show this help message and exit
36
+ -V, --version show program's version number and exit
37
+ -c, --cache-folder CACHE_FOLDER
38
+ Folder to store temporary files
39
+ -l, --limit LIMIT Limits the amount of chunks to download
40
+ ```
@@ -0,0 +1,27 @@
1
+ # StreamShatter
2
+ Originally a very basic script for downloading files from servers with inconsistent connections, this project has been revisited and modernised to use https://github.com/jawah/niquests for multiplexing performance, both for private testing of niquests' stability, and for general improvements in functionality for those who still have use for such a tool.
3
+
4
+ StreamShatter takes advantage of the `Range` HTTP header to dynamically allocate multiple chunks, by starting with one streaming request and gradually bisecting it while bandwidth permits, all without restarting the download. This allows for single, large file downloads from hosts that, whether intentionally or unintentionally, have degraded throughputs. The individual chunks also serve as checkpoints for if/when connections are broken.
5
+
6
+ # Installation
7
+ - Install [python](https://www.python.org) and [pip](https://pip.pypa.io/en/stable/)
8
+ - Install StreamShatter as a package:
9
+ `pip install streamshatter`
10
+
11
+ ## Usage
12
+ ```ini
13
+ usage: streamshatter [-h] [-V] [-c CACHE_FOLDER] [-l LIMIT] url [filename]
14
+
15
+ Multiplexed chunked file downloader
16
+
17
+ positional arguments:
18
+ url Target URL
19
+ filename Output filename
20
+
21
+ options:
22
+ -h, --help show this help message and exit
23
+ -V, --version show program's version number and exit
24
+ -c, --cache-folder CACHE_FOLDER
25
+ Folder to store temporary files
26
+ -l, --limit LIMIT Limits the amount of chunks to download
27
+ ```
@@ -0,0 +1,20 @@
1
+ [build-system]
2
+ requires = ["hatchling"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "streamshatter"
7
+ version = "1.0.0"
8
+ dependencies = [
9
+ "niquests>=3.15.2",
10
+ "urllib3-future>=2.13.908",
11
+ ]
12
+ description = "Multiplexed chunked file downloader"
13
+ readme = "README.md"
14
+ requires-python = ">=3.8"
15
+ authors = [{ name = "Thomas Xin", email = "thomasxin@gmail.com" }]
16
+ license = { text = "MIT" }
17
+ classifiers = ["Programming Language :: Python :: 3"]
18
+
19
+ [project.scripts]
20
+ streamshatter = "streamshatter:main"
@@ -0,0 +1,257 @@
1
+ import asyncio
2
+ import base64
3
+ import hashlib
4
+ import json
5
+ from math import ceil, isfinite
6
+ import os
7
+ import random
8
+ import shutil
9
+ import time
10
+ from urllib.parse import quote_plus
11
+ import niquests
12
+
13
+ session = niquests.AsyncSession()
14
+ chunk_size = 1048576
15
+ base_chunk = 65536
16
+ COLOURS = ["\x1b[38;5;16m█"]
17
+ COLOURS.extend(f"\x1b[38;5;{i}m█" for i in range(232, 256))
18
+ COLOURS.append("\x1b[38;5;15m█")
19
+
20
+ def shash(s): return base64.urlsafe_b64encode(hashlib.sha256(s if type(s) is bytes else str(s).encode("utf-8")).digest()).rstrip(b"==").decode("ascii")
21
+ def uhash(s): return min([shash(s), quote_plus(s.removeprefix("https://"))], key=len)
22
+ def header():
23
+ return {
24
+ "User-Agent": f"Mozilla/5.{random.randint(1, 9)} (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36",
25
+ "DNT": "1",
26
+ "X-Forwarded-For": ".".join(str(random.randint(0, 255)) for _ in range(4)),
27
+ }
28
+ def nth_file(tag, chunk=0):
29
+ return base64.urlsafe_b64encode(chunk.to_bytes(ceil(chunk.bit_length() / 8), "big")).rstrip(b"==").decode("ascii") + "~" + tag
30
+ def box(i):
31
+ if i < 0:
32
+ return "\x1b[38;5;196m█"
33
+ return COLOURS[round(min(1, i) * (len(COLOURS) - 1))]
34
+ def time_disp(s, rounded=True):
35
+ if not isfinite(s):
36
+ return str(s)
37
+ if rounded:
38
+ s = round(s)
39
+ output = str(s % 60)
40
+ if len(output) < 2:
41
+ output = "0" + output
42
+ if s >= 60:
43
+ temp = str((s // 60) % 60)
44
+ if len(temp) < 2 and s >= 3600:
45
+ temp = "0" + temp
46
+ output = temp + ":" + output
47
+ if s >= 3600:
48
+ temp = str((s // 3600) % 24)
49
+ if len(temp) < 2 and s >= 86400:
50
+ temp = "0" + temp
51
+ output = temp + ":" + output
52
+ if s >= 86400:
53
+ output = str(s // 86400) + ":" + output
54
+ else:
55
+ output = "0:" + output
56
+ return output
57
+ def calc_bps(bps):
58
+ for suffix in ("bps", "kbps", "Mbps", "Gbps", "Tbps", "Pbps", "Ebps", "Zbps", "Ybps"):
59
+ bps = round(bps, 4)
60
+ if bps < 1000:
61
+ if bps.is_integer():
62
+ bps = int(bps)
63
+ return f"{bps} {suffix}"
64
+ bps /= 1000
65
+ return "ERR"
66
+ def sample(arr, n):
67
+ while len(arr) > n * 2:
68
+ arr = [(a + b) / 2 for a, b in zip(arr[::2], arr[1::2])]
69
+ if len(arr) > n:
70
+ indices = [x * len(arr) / n for x in range(n)]
71
+ arr.append(arr[-1])
72
+ return [arr[int(i)] * (1 - i % 1) + arr[int(i) + 1] * (i % 1) for i in indices]
73
+ return arr
74
+ def update_progress(ctx, force=False, use_original_timestamp=False):
75
+ ct = time.perf_counter()
76
+ if not force and ct - ctx["last"] < 0.1:
77
+ return
78
+ maxbar = 64
79
+ samples = [chunk[-1] / chunk[1] for chunk in ctx["chunkinfo"]]
80
+ s = "".join(map(box, sample(samples, maxbar)))
81
+ dt = ct - ctx["start"]
82
+ timer = time_disp(dt)
83
+ progress = sum(chunk[-1] for chunk in ctx["chunkinfo"])
84
+ percentage = round(progress / ctx["size"] * 100, 4)
85
+ if percentage.is_integer():
86
+ percentage = int(percentage)
87
+ if use_original_timestamp:
88
+ bps = ctx["size"] * 8 / dt
89
+ else:
90
+ bps = sum(chunk[3] / max(0.001, ct - chunk[2]) for chunk in ctx["chunkinfo"]) * 8
91
+ bpst = calc_bps(bps)
92
+ s2 = f" {timer} ({percentage}%, {bpst})"
93
+ chars = min(maxbar, len(ctx["chunkinfo"])) + len(s2)
94
+ s += "\x1b[38;5;7m" + s2
95
+ s += " " * (100 - chars)
96
+ print(s, end="\r")
97
+ if ctx["forkable"] and len(ctx["chunkinfo"]) < ctx["limit"] and ct - ctx["last_split"] > 1 and bps > ctx["last_bps"]:
98
+ ctx["last_bps"] = bps
99
+ ctx["last_split"] = time.perf_counter()
100
+ # Allow no more than 4 stalled/errored requests at a time
101
+ return sum(chunk[-1] <= 0 for chunk in ctx["chunkinfo"]) < 4
102
+ elif ct - ctx["last_split"] > 5:
103
+ ctx["last_bps"] = bps
104
+ ctx["last_split"] = time.perf_counter()
105
+
106
+ async def write_request(ctx, chunk, resp, url, method, headers, data, filename):
107
+ file = os.path.join(ctx["cache_folder"], nth_file(uhash(url), len(ctx["chunkinfo"])))
108
+ ctx["chunkinfo"].append(chunk)
109
+ attempts = 0
110
+ fn = file + "~"
111
+ with open(fn, "wb+") as f:
112
+ while True:
113
+ timeout = (attempts + 1) * 5
114
+ try:
115
+ if not resp:
116
+ resp = await asyncio.wait_for(session.request(method, url, headers=headers, data=data, stream=True, timeout=timeout), timeout=timeout + 1)
117
+ resp.raise_for_status()
118
+ if "Range" in headers:
119
+ assert resp.headers["content-range"].split("/", 1)[0].split(None, 1)[-1] == headers["Range"].split("=", 1)[-1], "Server failed to serve range header as specified!"
120
+ chunk[-2] = time.perf_counter()
121
+ it = await asyncio.wait_for(resp.iter_content(base_chunk), timeout=timeout)
122
+ size = chunk[1]
123
+ try:
124
+ while True:
125
+ data = await asyncio.wait_for(it.__anext__(), timeout=timeout)
126
+ f.write(data)
127
+ chunk[-1] = min(max(len(data), chunk[-1] + len(data)), size)
128
+ chunk[3] += len(data)
129
+ split = update_progress(ctx)
130
+ if chunk[-1] == size:
131
+ break
132
+ if split and chunk[-1] + chunk_size < size:
133
+ start = chunk[0]
134
+ offset = round((chunk[-1] + size) / 2)
135
+ chunk2 = [start + offset, size - offset, time.perf_counter(), 0, 0]
136
+ rheaders = headers.copy()
137
+ rheaders["Range"] = f"bytes={start + offset}-{start + size - 1}"
138
+ fut = asyncio.create_task(write_request(ctx, chunk2, None, resp.url, method, rheaders, data, filename))
139
+ fut.start = chunk2[0]
140
+ ctx["workers"].append(fut)
141
+ ctx["workers"].sort(key=lambda fut: fut.start)
142
+ size = chunk[1] = offset
143
+ chunk[2] = time.perf_counter()
144
+ chunk[3] = len(data)
145
+ except (StopIteration, StopAsyncIteration):
146
+ pass
147
+ f.flush()
148
+ f.truncate(size)
149
+ f.seek(0, os.SEEK_END)
150
+ assert f.tell() == size, (f.tell, size)
151
+ except (TimeoutError, asyncio.TimeoutError):
152
+ pass
153
+ except Exception as ex:
154
+ print(repr(ex))
155
+ else:
156
+ chunk[-1] = size
157
+ chunk[2] = time.perf_counter()
158
+ chunk[3] = base_chunk
159
+ break
160
+ finally:
161
+ if resp:
162
+ try:
163
+ await asyncio.wait_for(resp.close(), timeout=timeout)
164
+ except Exception:
165
+ pass
166
+ resp = None
167
+ f.seek(0)
168
+ f.truncate(0)
169
+ chunk[-1] = -0.01
170
+ chunk[2] = time.perf_counter()
171
+ chunk[3] = 0
172
+ update_progress(ctx, force=True)
173
+ await asyncio.sleep((attempts + random.random()) ** 2 + 1)
174
+ attempts += 1
175
+ globals()["session"] = niquests.AsyncSession()
176
+ os.replace(fn, file)
177
+ return file
178
+
179
+ async def parallel_request(url, method="get", headers={}, data=None, filename=None, cache_folder="", limit=1024):
180
+ t = time.perf_counter()
181
+ head = header()
182
+ head.update(headers)
183
+ resp = await session.request(method, url, headers=head, data=data, stream=True)
184
+ resp.raise_for_status()
185
+ filename = filename or resp.headers.get("attachment-filename") or url.rstrip("/").rsplit("/", 1)[-1].split("?", 1)[0]
186
+ try:
187
+ size = int(resp.headers.get("content-length") or resp.headers["content-range"].rsplit("/", 1)[-1])
188
+ except (KeyError, ValueError):
189
+ size = -1
190
+ chunk = [0, size, time.perf_counter(), 0, 0, 0]
191
+ single = limit <= 1 or size <= 0 or "bytes" not in resp.headers.get("accept-ranges", "").casefold()
192
+ ctx = dict(
193
+ url=url,
194
+ start=t,
195
+ last=0,
196
+ size=size,
197
+ last_bps=0,
198
+ last_split=0,
199
+ cache_folder=cache_folder,
200
+ limit=limit,
201
+ forkable=not single,
202
+ chunkinfo=[],
203
+ workers=[],
204
+ )
205
+ fut = asyncio.create_task(write_request(ctx, chunk, resp, url, method, headers, data, filename))
206
+ fut.start = 0
207
+ ctx["workers"].append(fut)
208
+ removes = []
209
+ try:
210
+ fn = filename + "~"
211
+ with open(fn, "ab") as f:
212
+ f.truncate(0)
213
+ while ctx["workers"]:
214
+ file = await ctx["workers"].pop(0)
215
+ with open(file, "rb") as g:
216
+ shutil.copyfileobj(g, f)
217
+ removes.append(file)
218
+ os.replace(fn, filename)
219
+ update_progress(ctx, force=True, use_original_timestamp=True)
220
+ finally:
221
+ for file in removes:
222
+ os.remove(file)
223
+
224
+
225
+ try:
226
+ from importlib.metadata import version
227
+ __version__ = version("streamshatter")
228
+ except Exception:
229
+ __version__ = "0.0.0-unknown"
230
+
231
+ def main():
232
+ import argparse
233
+ parser = argparse.ArgumentParser(
234
+ prog="streamshatter",
235
+ description="Multiplexed chunked file downloader",
236
+ )
237
+ parser.add_argument("-V", '--version', action='version', version=f'%(prog)s {__version__}')
238
+ parser.add_argument("-H", '--headers', help="HTTP headers, interpreted as JSON", required=False, default="{}")
239
+ parser.add_argument("-c", '--cache-folder', help="Folder to store temporary files", required=False, default=os.path.join(__file__.replace("\\", "/").rsplit("/", 1)[0], "cache"))
240
+ parser.add_argument("-l", '--limit', help="Limits the amount of chunks to download", type=int, required=False, default=1024)
241
+ parser.add_argument("url", help="Target URL")
242
+ parser.add_argument("filename", help="Output filename", nargs="?", default="")
243
+ args = parser.parse_args()
244
+ if not os.path.exists(args.cache_folder):
245
+ os.mkdir(args.cache_folder)
246
+ if os.name == "nt":
247
+ os.system("color")
248
+ asyncio.run(parallel_request(
249
+ url=args.url,
250
+ filename=args.filename,
251
+ headers=json.loads(args.headers),
252
+ cache_folder=args.cache_folder,
253
+ limit=args.limit,
254
+ ))
255
+
256
+ if __name__ == "__main__":
257
+ main()