streamshatter 1.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- streamshatter-1.0.0/.gitignore +3 -0
- streamshatter-1.0.0/LICENSE +21 -0
- streamshatter-1.0.0/PKG-INFO +40 -0
- streamshatter-1.0.0/README.md +27 -0
- streamshatter-1.0.0/pyproject.toml +20 -0
- streamshatter-1.0.0/streamshatter.py +257 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 Thomas Xin
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: streamshatter
|
|
3
|
+
Version: 1.0.0
|
|
4
|
+
Summary: Multiplexed chunked file downloader
|
|
5
|
+
Author-email: Thomas Xin <thomasxin@gmail.com>
|
|
6
|
+
License: MIT
|
|
7
|
+
License-File: LICENSE
|
|
8
|
+
Classifier: Programming Language :: Python :: 3
|
|
9
|
+
Requires-Python: >=3.8
|
|
10
|
+
Requires-Dist: niquests>=3.15.2
|
|
11
|
+
Requires-Dist: urllib3-future>=2.13.908
|
|
12
|
+
Description-Content-Type: text/markdown
|
|
13
|
+
|
|
14
|
+
# StreamShatter
|
|
15
|
+
Originally a very basic script for downloading files from servers with inconsistent connections, this project has been revisited and modernised to use https://github.com/jawah/niquests for multiplexing performance, both for private testing of niquests' stability, and for general improvements in functionality for those who still have use for such a tool.
|
|
16
|
+
|
|
17
|
+
StreamShatter takes advantage of the `Range` HTTP header to dynamically allocate multiple chunks, by starting with one streaming request and gradually bisecting it while bandwidth permits, all without restarting the download. This allows for single, large file downloads from hosts that, whether intentionally or unintentionally, have degraded throughputs. The individual chunks also serve as checkpoints for if/when connections are broken.
|
|
18
|
+
|
|
19
|
+
# Installation
|
|
20
|
+
- Install [python](https://www.python.org) and [pip](https://pip.pypa.io/en/stable/)
|
|
21
|
+
- Install StreamShatter as a package:
|
|
22
|
+
`pip install streamshatter`
|
|
23
|
+
|
|
24
|
+
## Usage
|
|
25
|
+
```ini
|
|
26
|
+
usage: streamshatter [-h] [-V] [-c CACHE_FOLDER] [-l LIMIT] url [filename]
|
|
27
|
+
|
|
28
|
+
Multiplexed chunked file downloader
|
|
29
|
+
|
|
30
|
+
positional arguments:
|
|
31
|
+
url Target URL
|
|
32
|
+
filename Output filename
|
|
33
|
+
|
|
34
|
+
options:
|
|
35
|
+
-h, --help show this help message and exit
|
|
36
|
+
-V, --version show program's version number and exit
|
|
37
|
+
-c, --cache-folder CACHE_FOLDER
|
|
38
|
+
Folder to store temporary files
|
|
39
|
+
-l, --limit LIMIT Limits the amount of chunks to download
|
|
40
|
+
```
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
# StreamShatter
|
|
2
|
+
Originally a very basic script for downloading files from servers with inconsistent connections, this project has been revisited and modernised to use https://github.com/jawah/niquests for multiplexing performance, both for private testing of niquests' stability, and for general improvements in functionality for those who still have use for such a tool.
|
|
3
|
+
|
|
4
|
+
StreamShatter takes advantage of the `Range` HTTP header to dynamically allocate multiple chunks, by starting with one streaming request and gradually bisecting it while bandwidth permits, all without restarting the download. This allows for single, large file downloads from hosts that, whether intentionally or unintentionally, have degraded throughputs. The individual chunks also serve as checkpoints for if/when connections are broken.
|
|
5
|
+
|
|
6
|
+
# Installation
|
|
7
|
+
- Install [python](https://www.python.org) and [pip](https://pip.pypa.io/en/stable/)
|
|
8
|
+
- Install StreamShatter as a package:
|
|
9
|
+
`pip install streamshatter`
|
|
10
|
+
|
|
11
|
+
## Usage
|
|
12
|
+
```ini
|
|
13
|
+
usage: streamshatter [-h] [-V] [-c CACHE_FOLDER] [-l LIMIT] url [filename]
|
|
14
|
+
|
|
15
|
+
Multiplexed chunked file downloader
|
|
16
|
+
|
|
17
|
+
positional arguments:
|
|
18
|
+
url Target URL
|
|
19
|
+
filename Output filename
|
|
20
|
+
|
|
21
|
+
options:
|
|
22
|
+
-h, --help show this help message and exit
|
|
23
|
+
-V, --version show program's version number and exit
|
|
24
|
+
-c, --cache-folder CACHE_FOLDER
|
|
25
|
+
Folder to store temporary files
|
|
26
|
+
-l, --limit LIMIT Limits the amount of chunks to download
|
|
27
|
+
```
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["hatchling"]
|
|
3
|
+
build-backend = "hatchling.build"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "streamshatter"
|
|
7
|
+
version = "1.0.0"
|
|
8
|
+
dependencies = [
|
|
9
|
+
"niquests>=3.15.2",
|
|
10
|
+
"urllib3-future>=2.13.908",
|
|
11
|
+
]
|
|
12
|
+
description = "Multiplexed chunked file downloader"
|
|
13
|
+
readme = "README.md"
|
|
14
|
+
requires-python = ">=3.8"
|
|
15
|
+
authors = [{ name = "Thomas Xin", email = "thomasxin@gmail.com" }]
|
|
16
|
+
license = { text = "MIT" }
|
|
17
|
+
classifiers = ["Programming Language :: Python :: 3"]
|
|
18
|
+
|
|
19
|
+
[project.scripts]
|
|
20
|
+
streamshatter = "streamshatter:main"
|
|
@@ -0,0 +1,257 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import base64
|
|
3
|
+
import hashlib
|
|
4
|
+
import json
|
|
5
|
+
from math import ceil, isfinite
|
|
6
|
+
import os
|
|
7
|
+
import random
|
|
8
|
+
import shutil
|
|
9
|
+
import time
|
|
10
|
+
from urllib.parse import quote_plus
|
|
11
|
+
import niquests
|
|
12
|
+
|
|
13
|
+
session = niquests.AsyncSession()
|
|
14
|
+
chunk_size = 1048576
|
|
15
|
+
base_chunk = 65536
|
|
16
|
+
COLOURS = ["\x1b[38;5;16m█"]
|
|
17
|
+
COLOURS.extend(f"\x1b[38;5;{i}m█" for i in range(232, 256))
|
|
18
|
+
COLOURS.append("\x1b[38;5;15m█")
|
|
19
|
+
|
|
20
|
+
def shash(s): return base64.urlsafe_b64encode(hashlib.sha256(s if type(s) is bytes else str(s).encode("utf-8")).digest()).rstrip(b"==").decode("ascii")
|
|
21
|
+
def uhash(s): return min([shash(s), quote_plus(s.removeprefix("https://"))], key=len)
|
|
22
|
+
def header():
|
|
23
|
+
return {
|
|
24
|
+
"User-Agent": f"Mozilla/5.{random.randint(1, 9)} (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36",
|
|
25
|
+
"DNT": "1",
|
|
26
|
+
"X-Forwarded-For": ".".join(str(random.randint(0, 255)) for _ in range(4)),
|
|
27
|
+
}
|
|
28
|
+
def nth_file(tag, chunk=0):
|
|
29
|
+
return base64.urlsafe_b64encode(chunk.to_bytes(ceil(chunk.bit_length() / 8), "big")).rstrip(b"==").decode("ascii") + "~" + tag
|
|
30
|
+
def box(i):
|
|
31
|
+
if i < 0:
|
|
32
|
+
return "\x1b[38;5;196m█"
|
|
33
|
+
return COLOURS[round(min(1, i) * (len(COLOURS) - 1))]
|
|
34
|
+
def time_disp(s, rounded=True):
|
|
35
|
+
if not isfinite(s):
|
|
36
|
+
return str(s)
|
|
37
|
+
if rounded:
|
|
38
|
+
s = round(s)
|
|
39
|
+
output = str(s % 60)
|
|
40
|
+
if len(output) < 2:
|
|
41
|
+
output = "0" + output
|
|
42
|
+
if s >= 60:
|
|
43
|
+
temp = str((s // 60) % 60)
|
|
44
|
+
if len(temp) < 2 and s >= 3600:
|
|
45
|
+
temp = "0" + temp
|
|
46
|
+
output = temp + ":" + output
|
|
47
|
+
if s >= 3600:
|
|
48
|
+
temp = str((s // 3600) % 24)
|
|
49
|
+
if len(temp) < 2 and s >= 86400:
|
|
50
|
+
temp = "0" + temp
|
|
51
|
+
output = temp + ":" + output
|
|
52
|
+
if s >= 86400:
|
|
53
|
+
output = str(s // 86400) + ":" + output
|
|
54
|
+
else:
|
|
55
|
+
output = "0:" + output
|
|
56
|
+
return output
|
|
57
|
+
def calc_bps(bps):
|
|
58
|
+
for suffix in ("bps", "kbps", "Mbps", "Gbps", "Tbps", "Pbps", "Ebps", "Zbps", "Ybps"):
|
|
59
|
+
bps = round(bps, 4)
|
|
60
|
+
if bps < 1000:
|
|
61
|
+
if bps.is_integer():
|
|
62
|
+
bps = int(bps)
|
|
63
|
+
return f"{bps} {suffix}"
|
|
64
|
+
bps /= 1000
|
|
65
|
+
return "ERR"
|
|
66
|
+
def sample(arr, n):
|
|
67
|
+
while len(arr) > n * 2:
|
|
68
|
+
arr = [(a + b) / 2 for a, b in zip(arr[::2], arr[1::2])]
|
|
69
|
+
if len(arr) > n:
|
|
70
|
+
indices = [x * len(arr) / n for x in range(n)]
|
|
71
|
+
arr.append(arr[-1])
|
|
72
|
+
return [arr[int(i)] * (1 - i % 1) + arr[int(i) + 1] * (i % 1) for i in indices]
|
|
73
|
+
return arr
|
|
74
|
+
def update_progress(ctx, force=False, use_original_timestamp=False):
|
|
75
|
+
ct = time.perf_counter()
|
|
76
|
+
if not force and ct - ctx["last"] < 0.1:
|
|
77
|
+
return
|
|
78
|
+
maxbar = 64
|
|
79
|
+
samples = [chunk[-1] / chunk[1] for chunk in ctx["chunkinfo"]]
|
|
80
|
+
s = "".join(map(box, sample(samples, maxbar)))
|
|
81
|
+
dt = ct - ctx["start"]
|
|
82
|
+
timer = time_disp(dt)
|
|
83
|
+
progress = sum(chunk[-1] for chunk in ctx["chunkinfo"])
|
|
84
|
+
percentage = round(progress / ctx["size"] * 100, 4)
|
|
85
|
+
if percentage.is_integer():
|
|
86
|
+
percentage = int(percentage)
|
|
87
|
+
if use_original_timestamp:
|
|
88
|
+
bps = ctx["size"] * 8 / dt
|
|
89
|
+
else:
|
|
90
|
+
bps = sum(chunk[3] / max(0.001, ct - chunk[2]) for chunk in ctx["chunkinfo"]) * 8
|
|
91
|
+
bpst = calc_bps(bps)
|
|
92
|
+
s2 = f" {timer} ({percentage}%, {bpst})"
|
|
93
|
+
chars = min(maxbar, len(ctx["chunkinfo"])) + len(s2)
|
|
94
|
+
s += "\x1b[38;5;7m" + s2
|
|
95
|
+
s += " " * (100 - chars)
|
|
96
|
+
print(s, end="\r")
|
|
97
|
+
if ctx["forkable"] and len(ctx["chunkinfo"]) < ctx["limit"] and ct - ctx["last_split"] > 1 and bps > ctx["last_bps"]:
|
|
98
|
+
ctx["last_bps"] = bps
|
|
99
|
+
ctx["last_split"] = time.perf_counter()
|
|
100
|
+
# Allow no more than 4 stalled/errored requests at a time
|
|
101
|
+
return sum(chunk[-1] <= 0 for chunk in ctx["chunkinfo"]) < 4
|
|
102
|
+
elif ct - ctx["last_split"] > 5:
|
|
103
|
+
ctx["last_bps"] = bps
|
|
104
|
+
ctx["last_split"] = time.perf_counter()
|
|
105
|
+
|
|
106
|
+
async def write_request(ctx, chunk, resp, url, method, headers, data, filename):
|
|
107
|
+
file = os.path.join(ctx["cache_folder"], nth_file(uhash(url), len(ctx["chunkinfo"])))
|
|
108
|
+
ctx["chunkinfo"].append(chunk)
|
|
109
|
+
attempts = 0
|
|
110
|
+
fn = file + "~"
|
|
111
|
+
with open(fn, "wb+") as f:
|
|
112
|
+
while True:
|
|
113
|
+
timeout = (attempts + 1) * 5
|
|
114
|
+
try:
|
|
115
|
+
if not resp:
|
|
116
|
+
resp = await asyncio.wait_for(session.request(method, url, headers=headers, data=data, stream=True, timeout=timeout), timeout=timeout + 1)
|
|
117
|
+
resp.raise_for_status()
|
|
118
|
+
if "Range" in headers:
|
|
119
|
+
assert resp.headers["content-range"].split("/", 1)[0].split(None, 1)[-1] == headers["Range"].split("=", 1)[-1], "Server failed to serve range header as specified!"
|
|
120
|
+
chunk[-2] = time.perf_counter()
|
|
121
|
+
it = await asyncio.wait_for(resp.iter_content(base_chunk), timeout=timeout)
|
|
122
|
+
size = chunk[1]
|
|
123
|
+
try:
|
|
124
|
+
while True:
|
|
125
|
+
data = await asyncio.wait_for(it.__anext__(), timeout=timeout)
|
|
126
|
+
f.write(data)
|
|
127
|
+
chunk[-1] = min(max(len(data), chunk[-1] + len(data)), size)
|
|
128
|
+
chunk[3] += len(data)
|
|
129
|
+
split = update_progress(ctx)
|
|
130
|
+
if chunk[-1] == size:
|
|
131
|
+
break
|
|
132
|
+
if split and chunk[-1] + chunk_size < size:
|
|
133
|
+
start = chunk[0]
|
|
134
|
+
offset = round((chunk[-1] + size) / 2)
|
|
135
|
+
chunk2 = [start + offset, size - offset, time.perf_counter(), 0, 0]
|
|
136
|
+
rheaders = headers.copy()
|
|
137
|
+
rheaders["Range"] = f"bytes={start + offset}-{start + size - 1}"
|
|
138
|
+
fut = asyncio.create_task(write_request(ctx, chunk2, None, resp.url, method, rheaders, data, filename))
|
|
139
|
+
fut.start = chunk2[0]
|
|
140
|
+
ctx["workers"].append(fut)
|
|
141
|
+
ctx["workers"].sort(key=lambda fut: fut.start)
|
|
142
|
+
size = chunk[1] = offset
|
|
143
|
+
chunk[2] = time.perf_counter()
|
|
144
|
+
chunk[3] = len(data)
|
|
145
|
+
except (StopIteration, StopAsyncIteration):
|
|
146
|
+
pass
|
|
147
|
+
f.flush()
|
|
148
|
+
f.truncate(size)
|
|
149
|
+
f.seek(0, os.SEEK_END)
|
|
150
|
+
assert f.tell() == size, (f.tell, size)
|
|
151
|
+
except (TimeoutError, asyncio.TimeoutError):
|
|
152
|
+
pass
|
|
153
|
+
except Exception as ex:
|
|
154
|
+
print(repr(ex))
|
|
155
|
+
else:
|
|
156
|
+
chunk[-1] = size
|
|
157
|
+
chunk[2] = time.perf_counter()
|
|
158
|
+
chunk[3] = base_chunk
|
|
159
|
+
break
|
|
160
|
+
finally:
|
|
161
|
+
if resp:
|
|
162
|
+
try:
|
|
163
|
+
await asyncio.wait_for(resp.close(), timeout=timeout)
|
|
164
|
+
except Exception:
|
|
165
|
+
pass
|
|
166
|
+
resp = None
|
|
167
|
+
f.seek(0)
|
|
168
|
+
f.truncate(0)
|
|
169
|
+
chunk[-1] = -0.01
|
|
170
|
+
chunk[2] = time.perf_counter()
|
|
171
|
+
chunk[3] = 0
|
|
172
|
+
update_progress(ctx, force=True)
|
|
173
|
+
await asyncio.sleep((attempts + random.random()) ** 2 + 1)
|
|
174
|
+
attempts += 1
|
|
175
|
+
globals()["session"] = niquests.AsyncSession()
|
|
176
|
+
os.replace(fn, file)
|
|
177
|
+
return file
|
|
178
|
+
|
|
179
|
+
async def parallel_request(url, method="get", headers={}, data=None, filename=None, cache_folder="", limit=1024):
|
|
180
|
+
t = time.perf_counter()
|
|
181
|
+
head = header()
|
|
182
|
+
head.update(headers)
|
|
183
|
+
resp = await session.request(method, url, headers=head, data=data, stream=True)
|
|
184
|
+
resp.raise_for_status()
|
|
185
|
+
filename = filename or resp.headers.get("attachment-filename") or url.rstrip("/").rsplit("/", 1)[-1].split("?", 1)[0]
|
|
186
|
+
try:
|
|
187
|
+
size = int(resp.headers.get("content-length") or resp.headers["content-range"].rsplit("/", 1)[-1])
|
|
188
|
+
except (KeyError, ValueError):
|
|
189
|
+
size = -1
|
|
190
|
+
chunk = [0, size, time.perf_counter(), 0, 0, 0]
|
|
191
|
+
single = limit <= 1 or size <= 0 or "bytes" not in resp.headers.get("accept-ranges", "").casefold()
|
|
192
|
+
ctx = dict(
|
|
193
|
+
url=url,
|
|
194
|
+
start=t,
|
|
195
|
+
last=0,
|
|
196
|
+
size=size,
|
|
197
|
+
last_bps=0,
|
|
198
|
+
last_split=0,
|
|
199
|
+
cache_folder=cache_folder,
|
|
200
|
+
limit=limit,
|
|
201
|
+
forkable=not single,
|
|
202
|
+
chunkinfo=[],
|
|
203
|
+
workers=[],
|
|
204
|
+
)
|
|
205
|
+
fut = asyncio.create_task(write_request(ctx, chunk, resp, url, method, headers, data, filename))
|
|
206
|
+
fut.start = 0
|
|
207
|
+
ctx["workers"].append(fut)
|
|
208
|
+
removes = []
|
|
209
|
+
try:
|
|
210
|
+
fn = filename + "~"
|
|
211
|
+
with open(fn, "ab") as f:
|
|
212
|
+
f.truncate(0)
|
|
213
|
+
while ctx["workers"]:
|
|
214
|
+
file = await ctx["workers"].pop(0)
|
|
215
|
+
with open(file, "rb") as g:
|
|
216
|
+
shutil.copyfileobj(g, f)
|
|
217
|
+
removes.append(file)
|
|
218
|
+
os.replace(fn, filename)
|
|
219
|
+
update_progress(ctx, force=True, use_original_timestamp=True)
|
|
220
|
+
finally:
|
|
221
|
+
for file in removes:
|
|
222
|
+
os.remove(file)
|
|
223
|
+
|
|
224
|
+
|
|
225
|
+
try:
|
|
226
|
+
from importlib.metadata import version
|
|
227
|
+
__version__ = version("streamshatter")
|
|
228
|
+
except Exception:
|
|
229
|
+
__version__ = "0.0.0-unknown"
|
|
230
|
+
|
|
231
|
+
def main():
|
|
232
|
+
import argparse
|
|
233
|
+
parser = argparse.ArgumentParser(
|
|
234
|
+
prog="streamshatter",
|
|
235
|
+
description="Multiplexed chunked file downloader",
|
|
236
|
+
)
|
|
237
|
+
parser.add_argument("-V", '--version', action='version', version=f'%(prog)s {__version__}')
|
|
238
|
+
parser.add_argument("-H", '--headers', help="HTTP headers, interpreted as JSON", required=False, default="{}")
|
|
239
|
+
parser.add_argument("-c", '--cache-folder', help="Folder to store temporary files", required=False, default=os.path.join(__file__.replace("\\", "/").rsplit("/", 1)[0], "cache"))
|
|
240
|
+
parser.add_argument("-l", '--limit', help="Limits the amount of chunks to download", type=int, required=False, default=1024)
|
|
241
|
+
parser.add_argument("url", help="Target URL")
|
|
242
|
+
parser.add_argument("filename", help="Output filename", nargs="?", default="")
|
|
243
|
+
args = parser.parse_args()
|
|
244
|
+
if not os.path.exists(args.cache_folder):
|
|
245
|
+
os.mkdir(args.cache_folder)
|
|
246
|
+
if os.name == "nt":
|
|
247
|
+
os.system("color")
|
|
248
|
+
asyncio.run(parallel_request(
|
|
249
|
+
url=args.url,
|
|
250
|
+
filename=args.filename,
|
|
251
|
+
headers=json.loads(args.headers),
|
|
252
|
+
cache_folder=args.cache_folder,
|
|
253
|
+
limit=args.limit,
|
|
254
|
+
))
|
|
255
|
+
|
|
256
|
+
if __name__ == "__main__":
|
|
257
|
+
main()
|