warp-beacon 2.6.86__tar.gz → 2.6.88__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {warp_beacon-2.6.86/warp_beacon.egg-info → warp_beacon-2.6.88}/PKG-INFO +2 -1
- {warp_beacon-2.6.86 → warp_beacon-2.6.88}/pyproject.toml +2 -1
- warp_beacon-2.6.88/warp_beacon/__version__.py +2 -0
- {warp_beacon-2.6.86 → warp_beacon-2.6.88}/warp_beacon/scheduler/instagram_human.py +14 -12
- {warp_beacon-2.6.86 → warp_beacon-2.6.88}/warp_beacon/scheduler/scheduler.py +1 -1
- {warp_beacon-2.6.86 → warp_beacon-2.6.88}/warp_beacon/scraper/__init__.py +2 -2
- {warp_beacon-2.6.86 → warp_beacon-2.6.88}/warp_beacon/scraper/instagram/instagram.py +3 -5
- {warp_beacon-2.6.86 → warp_beacon-2.6.88}/warp_beacon/scraper/instagram/wb_instagrapi.py +9 -10
- warp_beacon-2.6.88/warp_beacon/scraper/utils.py +20 -0
- {warp_beacon-2.6.86 → warp_beacon-2.6.88}/warp_beacon/scraper/youtube/abstract.py +43 -2
- {warp_beacon-2.6.86 → warp_beacon-2.6.88/warp_beacon.egg-info}/PKG-INFO +2 -1
- {warp_beacon-2.6.86 → warp_beacon-2.6.88}/warp_beacon.egg-info/requires.txt +1 -0
- warp_beacon-2.6.86/warp_beacon/__version__.py +0 -2
- warp_beacon-2.6.86/warp_beacon/scraper/utils.py +0 -4
- {warp_beacon-2.6.86 → warp_beacon-2.6.88}/LICENSE +0 -0
- {warp_beacon-2.6.86 → warp_beacon-2.6.88}/MANIFEST.in +0 -0
- {warp_beacon-2.6.86 → warp_beacon-2.6.88}/README.md +0 -0
- {warp_beacon-2.6.86 → warp_beacon-2.6.88}/assets/placeholder.gif +0 -0
- {warp_beacon-2.6.86 → warp_beacon-2.6.88}/etc/.gitignore +0 -0
- {warp_beacon-2.6.86 → warp_beacon-2.6.88}/etc/accounts.json +0 -0
- {warp_beacon-2.6.86 → warp_beacon-2.6.88}/etc/proxies.json +0 -0
- {warp_beacon-2.6.86 → warp_beacon-2.6.88}/etc/warp_beacon.conf +0 -0
- {warp_beacon-2.6.86 → warp_beacon-2.6.88}/etc/warp_beacon.service +0 -0
- {warp_beacon-2.6.86 → warp_beacon-2.6.88}/setup.cfg +0 -0
- {warp_beacon-2.6.86 → warp_beacon-2.6.88}/setup.py +0 -0
- {warp_beacon-2.6.86 → warp_beacon-2.6.88}/warp_beacon/__init__.py +0 -0
- {warp_beacon-2.6.86 → warp_beacon-2.6.88}/warp_beacon/compress/__init__.py +0 -0
- {warp_beacon-2.6.86 → warp_beacon-2.6.88}/warp_beacon/compress/video.py +0 -0
- {warp_beacon-2.6.86 → warp_beacon-2.6.88}/warp_beacon/jobs/__init__.py +0 -0
- {warp_beacon-2.6.86 → warp_beacon-2.6.88}/warp_beacon/jobs/abstract.py +0 -0
- {warp_beacon-2.6.86 → warp_beacon-2.6.88}/warp_beacon/jobs/download_job.py +0 -0
- {warp_beacon-2.6.86 → warp_beacon-2.6.88}/warp_beacon/jobs/types.py +0 -0
- {warp_beacon-2.6.86 → warp_beacon-2.6.88}/warp_beacon/jobs/upload_job.py +0 -0
- {warp_beacon-2.6.86 → warp_beacon-2.6.88}/warp_beacon/mediainfo/__init__.py +0 -0
- {warp_beacon-2.6.86 → warp_beacon-2.6.88}/warp_beacon/mediainfo/abstract.py +0 -0
- {warp_beacon-2.6.86 → warp_beacon-2.6.88}/warp_beacon/mediainfo/audio.py +0 -0
- {warp_beacon-2.6.86 → warp_beacon-2.6.88}/warp_beacon/mediainfo/silencer.py +0 -0
- {warp_beacon-2.6.86 → warp_beacon-2.6.88}/warp_beacon/mediainfo/video.py +0 -0
- {warp_beacon-2.6.86 → warp_beacon-2.6.88}/warp_beacon/scheduler/__init__.py +0 -0
- {warp_beacon-2.6.86 → warp_beacon-2.6.88}/warp_beacon/scraper/abstract.py +0 -0
- {warp_beacon-2.6.86 → warp_beacon-2.6.88}/warp_beacon/scraper/account_selector.py +0 -0
- {warp_beacon-2.6.86 → warp_beacon-2.6.88}/warp_beacon/scraper/exceptions.py +0 -0
- {warp_beacon-2.6.86 → warp_beacon-2.6.88}/warp_beacon/scraper/fail_handler.py +0 -0
- {warp_beacon-2.6.86 → warp_beacon-2.6.88}/warp_beacon/scraper/instagram/__init__.py +0 -0
- {warp_beacon-2.6.86 → warp_beacon-2.6.88}/warp_beacon/scraper/instagram/captcha.py +0 -0
- {warp_beacon-2.6.86 → warp_beacon-2.6.88}/warp_beacon/scraper/link_resolver.py +0 -0
- {warp_beacon-2.6.86 → warp_beacon-2.6.88}/warp_beacon/scraper/youtube/__init__.py +0 -0
- {warp_beacon-2.6.86 → warp_beacon-2.6.88}/warp_beacon/scraper/youtube/music.py +0 -0
- {warp_beacon-2.6.86 → warp_beacon-2.6.88}/warp_beacon/scraper/youtube/shorts.py +0 -0
- {warp_beacon-2.6.86 → warp_beacon-2.6.88}/warp_beacon/scraper/youtube/youtube.py +0 -0
- {warp_beacon-2.6.86 → warp_beacon-2.6.88}/warp_beacon/storage/__init__.py +0 -0
- {warp_beacon-2.6.86 → warp_beacon-2.6.88}/warp_beacon/storage/mongo.py +0 -0
- {warp_beacon-2.6.86 → warp_beacon-2.6.88}/warp_beacon/telegram/__init__.py +0 -0
- {warp_beacon-2.6.86 → warp_beacon-2.6.88}/warp_beacon/telegram/bot.py +0 -0
- {warp_beacon-2.6.86 → warp_beacon-2.6.88}/warp_beacon/telegram/caption_shortener.py +0 -0
- {warp_beacon-2.6.86 → warp_beacon-2.6.88}/warp_beacon/telegram/download_status.py +0 -0
- {warp_beacon-2.6.86 → warp_beacon-2.6.88}/warp_beacon/telegram/edit_message.py +0 -0
- {warp_beacon-2.6.86 → warp_beacon-2.6.88}/warp_beacon/telegram/handlers.py +0 -0
- {warp_beacon-2.6.86 → warp_beacon-2.6.88}/warp_beacon/telegram/placeholder_message.py +0 -0
- {warp_beacon-2.6.86 → warp_beacon-2.6.88}/warp_beacon/telegram/progress_bar.py +0 -0
- {warp_beacon-2.6.86 → warp_beacon-2.6.88}/warp_beacon/telegram/progress_file_reader.py +0 -0
- {warp_beacon-2.6.86 → warp_beacon-2.6.88}/warp_beacon/telegram/types.py +0 -0
- {warp_beacon-2.6.86 → warp_beacon-2.6.88}/warp_beacon/telegram/utils.py +0 -0
- {warp_beacon-2.6.86 → warp_beacon-2.6.88}/warp_beacon/uploader/__init__.py +0 -0
- {warp_beacon-2.6.86 → warp_beacon-2.6.88}/warp_beacon/warp_beacon.py +0 -0
- {warp_beacon-2.6.86 → warp_beacon-2.6.88}/warp_beacon/yt_auth.py +0 -0
- {warp_beacon-2.6.86 → warp_beacon-2.6.88}/warp_beacon.egg-info/SOURCES.txt +0 -0
- {warp_beacon-2.6.86 → warp_beacon-2.6.88}/warp_beacon.egg-info/dependency_links.txt +0 -0
- {warp_beacon-2.6.86 → warp_beacon-2.6.88}/warp_beacon.egg-info/entry_points.txt +0 -0
- {warp_beacon-2.6.86 → warp_beacon-2.6.88}/warp_beacon.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: warp_beacon
|
3
|
-
Version: 2.6.
|
3
|
+
Version: 2.6.88
|
4
4
|
Summary: Telegram bot for expanding external media links
|
5
5
|
Home-page: https://github.com/sb0y/warp_beacon
|
6
6
|
Author: Andrey Bagrintsev
|
@@ -241,6 +241,7 @@ Requires-Dist: yt_dlp
|
|
241
241
|
Requires-Dist: pydub
|
242
242
|
Requires-Dist: SpeechRecognition
|
243
243
|
Requires-Dist: playwright
|
244
|
+
Requires-Dist: fake-useragent
|
244
245
|
Dynamic: author
|
245
246
|
Dynamic: home-page
|
246
247
|
Dynamic: license-file
|
@@ -17,7 +17,9 @@ class InstagramHuman(object):
|
|
17
17
|
self.operations_count = 0
|
18
18
|
|
19
19
|
def watch_content(self, media: list) -> None:
|
20
|
-
|
20
|
+
if not media:
|
21
|
+
return
|
22
|
+
for m in media[:random.randint(1, len(media))]:
|
21
23
|
try:
|
22
24
|
logging.info("Wathing content with pk '%s'", str(m.pk))
|
23
25
|
content = self.scrapler.cl.media_info_v1(m.pk)
|
@@ -30,7 +32,7 @@ class InstagramHuman(object):
|
|
30
32
|
|
31
33
|
def scroll_content(self, last_pk: int) -> None:
|
32
34
|
timeline_initialized = False
|
33
|
-
if random.random() > 0.
|
35
|
+
if random.random() > 0.5:
|
34
36
|
timeline_initialized = True
|
35
37
|
self.scrapler.timeline_cursor = self.scrapler.download_hndlr(self.scrapler.cl.get_timeline_feed, reason="cold_start_fetch")
|
36
38
|
logging.info("Starting to watch related reels with media_pk '%d'", last_pk)
|
@@ -38,7 +40,7 @@ class InstagramHuman(object):
|
|
38
40
|
self.operations_count += 1
|
39
41
|
self.watch_content(media)
|
40
42
|
|
41
|
-
if random.random() > 0.
|
43
|
+
if random.random() > 0.7:
|
42
44
|
time.sleep(random.uniform(2, 20))
|
43
45
|
if not timeline_initialized:
|
44
46
|
self.scrapler.timeline_cursor = self.scrapler.download_hndlr(self.scrapler.cl.get_timeline_feed, reason="cold_start_fetch")
|
@@ -75,7 +77,7 @@ class InstagramHuman(object):
|
|
75
77
|
self.scrapler.download_hndlr(self.scrapler.cl.notification_like_and_comment_on_photo_user_tagged, "everyone")
|
76
78
|
self.operations_count += 1
|
77
79
|
self.random_pause()
|
78
|
-
if random.random() > 0.
|
80
|
+
if random.random() > 0.2:
|
79
81
|
logging.info("Simulation updating reels tray feed ...")
|
80
82
|
self.scrapler.download_hndlr(self.scrapler.cl.get_reels_tray_feed, "pull_to_refresh")
|
81
83
|
self.operations_count += 1
|
@@ -98,7 +100,7 @@ class InstagramHuman(object):
|
|
98
100
|
self.random_pause()
|
99
101
|
if random.random() > 0.4:
|
100
102
|
logging.info("Watching reels ...")
|
101
|
-
reels = self.scrapler.download_hndlr(self.scrapler.cl.reels)
|
103
|
+
reels = self.scrapler.download_hndlr(self.scrapler.cl.reels, amount=random.randint(4, 15))
|
102
104
|
self.operations_count += 1
|
103
105
|
self.watch_content(reels)
|
104
106
|
self.random_pause()
|
@@ -126,11 +128,11 @@ class InstagramHuman(object):
|
|
126
128
|
self.random_pause()
|
127
129
|
if random.random() > 0.4:
|
128
130
|
logging.info("Watching reels ...")
|
129
|
-
reels = self.scrapler.download_hndlr(self.scrapler.cl.reels)
|
131
|
+
reels = self.scrapler.download_hndlr(self.scrapler.cl.reels, amount=random.randint(4, 15))
|
130
132
|
self.operations_count += 1
|
131
133
|
self.watch_content(reels)
|
132
134
|
self.random_pause()
|
133
|
-
if random.random() > 0.
|
135
|
+
if random.random() > 0.4:
|
134
136
|
logging.info("Simulation profile view ...")
|
135
137
|
self.profile_view()
|
136
138
|
self.random_pause()
|
@@ -145,9 +147,9 @@ class InstagramHuman(object):
|
|
145
147
|
self.scrapler.download_hndlr(self.scrapler.cl.direct_active_presence)
|
146
148
|
self.operations_count += 1
|
147
149
|
self.random_pause(short=True)
|
148
|
-
if random.random() > 0.
|
150
|
+
if random.random() > 0.5:
|
149
151
|
logging.info("Watching reels ...")
|
150
|
-
reels = self.scrapler.download_hndlr(self.scrapler.cl.reels)
|
152
|
+
reels = self.scrapler.download_hndlr(self.scrapler.cl.reels, amount=random.randint(4, 15))
|
151
153
|
self.operations_count += 1
|
152
154
|
self.watch_content(reels)
|
153
155
|
self.random_pause()
|
@@ -179,14 +181,14 @@ class InstagramHuman(object):
|
|
179
181
|
#self.scrapler.download_hndlr(self.scrapler.cl.user_info, target_user_id)
|
180
182
|
self.scrapler.download_hndlr(self.scrapler.cl.user_info_v1, target_user_id)
|
181
183
|
self.operations_count += 1
|
182
|
-
|
184
|
+
self.random_pause()
|
183
185
|
elif isinstance(random_friend, str):
|
184
186
|
target_user_id = self.scrapler.download_hndlr(self.scrapler.cl.user_id_from_username, random_friend)
|
185
187
|
logging.info("user_info with target_user_id = '%s' ...", target_user_id)
|
186
188
|
#self.scrapler.download_hndlr(self.scrapler.cl.user_info, target_user_id)
|
187
189
|
self.scrapler.download_hndlr(self.scrapler.cl.user_info_v1, target_user_id)
|
188
190
|
self.operations_count += 1
|
189
|
-
|
191
|
+
self.random_pause()
|
190
192
|
|
191
193
|
if random.random() > 0.5:
|
192
194
|
logging.info("Checking direct ...")
|
@@ -202,7 +204,7 @@ class InstagramHuman(object):
|
|
202
204
|
|
203
205
|
if random.random() > 0.5:
|
204
206
|
logging.info("user_medias with target_user_id = '%s' ...", target_user_id)
|
205
|
-
self.scrapler.download_hndlr(self.scrapler.cl.user_medias_v1, target_user_id, amount=random.randint(1,
|
207
|
+
self.scrapler.download_hndlr(self.scrapler.cl.user_medias_v1, target_user_id, amount=random.randint(1, 10))
|
206
208
|
self.operations_count += 1
|
207
209
|
self.random_pause()
|
208
210
|
except Exception as e:
|
@@ -126,7 +126,7 @@ class IGScheduler(object):
|
|
126
126
|
|
127
127
|
def handle_time_planning(self) -> None:
|
128
128
|
if int(self.state.get("remaining", 0)) <= 0:
|
129
|
-
self.state["remaining"] = randrange(
|
129
|
+
self.state["remaining"] = randrange(5292, 12253)
|
130
130
|
|
131
131
|
def do_work(self) -> None:
|
132
132
|
logging.info("Scheduler thread started ...")
|
@@ -130,7 +130,7 @@ class AsyncDownloader(object):
|
|
130
130
|
proxy = selector.get_current_proxy()
|
131
131
|
if job.job_origin is Origin.INSTAGRAM:
|
132
132
|
from warp_beacon.scraper.instagram.instagram import InstagramScraper
|
133
|
-
if not job.scroll_content and selector.get_ig_request_count() >= int(os.environ.get("IG_REQUESTS_PER_ACCOUNT", default="
|
133
|
+
if not job.scroll_content and selector.get_ig_request_count() >= int(os.environ.get("IG_REQUESTS_PER_ACCOUNT", default="20")):
|
134
134
|
logging.info("The account request limit has been reached. Selecting the next account.")
|
135
135
|
selector.reset_ig_request_count()
|
136
136
|
selector.next()
|
@@ -164,7 +164,7 @@ class AsyncDownloader(object):
|
|
164
164
|
break
|
165
165
|
if job.session_validation and job.job_origin in (Origin.INSTAGRAM, Origin.YOUTUBE):
|
166
166
|
if job.job_origin is Origin.INSTAGRAM:
|
167
|
-
if selector.get_ig_request_count() >= int(os.environ.get("IG_REQUESTS_PER_ACCOUNT", default="
|
167
|
+
if selector.get_ig_request_count() >= int(os.environ.get("IG_REQUESTS_PER_ACCOUNT", default="20")):
|
168
168
|
logging.info("The account request limit has been reached. Selecting the next account.")
|
169
169
|
selector.reset_ig_request_count()
|
170
170
|
selector.next()
|
@@ -31,6 +31,7 @@ from warp_beacon.jobs.download_job import DownloadJob
|
|
31
31
|
from warp_beacon.telegram.utils import Utils
|
32
32
|
from warp_beacon.scraper.instagram.wb_instagrapi import WBClient
|
33
33
|
from warp_beacon.telegram.types import ReportType
|
34
|
+
from warp_beacon.scraper.utils import ScraperUtils
|
34
35
|
|
35
36
|
INST_SESSION_FILE_TPL = "/var/warp_beacon/inst_session_account_%d.json"
|
36
37
|
|
@@ -61,10 +62,7 @@ class InstagramScraper(ScraperAbstract):
|
|
61
62
|
"Accept": "*/*",
|
62
63
|
"Accept-Encoding": "gzip, deflate, br",
|
63
64
|
"Accept-Language": "en-US,en;q=0.9",
|
64
|
-
"User-Agent": (
|
65
|
-
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
|
66
|
-
"(KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36"
|
67
|
-
)
|
65
|
+
"User-Agent": ScraperUtils.get_ua()
|
68
66
|
})
|
69
67
|
self.cl.set_progress_callback(self.download_progress)
|
70
68
|
|
@@ -96,7 +94,7 @@ class InstagramScraper(ScraperAbstract):
|
|
96
94
|
"uuid": uuids.get("uuid", self.cl.generate_uuid()),
|
97
95
|
"client_session_id": self.client_session_id,
|
98
96
|
"advertising_id": uuids.get("advertising_id", self.cl.generate_uuid()),
|
99
|
-
"device_id": uuids.get("device_id", self.cl.
|
97
|
+
"device_id": uuids.get("device_id", self.cl.generate_android_device_id())
|
100
98
|
})
|
101
99
|
|
102
100
|
def safe_write_session(self) -> None:
|
@@ -7,6 +7,8 @@ import requests
|
|
7
7
|
from instagrapi import Client
|
8
8
|
from instagrapi.exceptions import VideoNotDownload
|
9
9
|
|
10
|
+
from warp_beacon.scraper.utils import ScraperUtils
|
11
|
+
|
10
12
|
class WBClient(Client):
|
11
13
|
"""
|
12
14
|
patched instagrapi
|
@@ -17,10 +19,7 @@ class WBClient(Client):
|
|
17
19
|
self.session = requests.Session()
|
18
20
|
# may be I should remove '"Sec-Fetch-*", "Upgrade-Insecure-Requests", "DNT"' ?
|
19
21
|
self.session.headers.update({
|
20
|
-
"User-Agent": (
|
21
|
-
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
|
22
|
-
"(KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36"
|
23
|
-
),
|
22
|
+
"User-Agent": ScraperUtils.get_ua(),
|
24
23
|
"Accept": (
|
25
24
|
"text/html,application/xhtml+xml,application/xml;"
|
26
25
|
"q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8"
|
@@ -29,12 +28,12 @@ class WBClient(Client):
|
|
29
28
|
"Accept-Encoding": "gzip, deflate, br",
|
30
29
|
"Referer": "https://www.instagram.com/",
|
31
30
|
"Connection": "keep-alive",
|
32
|
-
"Sec-Fetch-Site": "same-origin",
|
33
|
-
"Sec-Fetch-Mode": "navigate",
|
34
|
-
"Sec-Fetch-User": "?1",
|
35
|
-
"Sec-Fetch-Dest": "document",
|
36
|
-
"Upgrade-Insecure-Requests": "1",
|
37
|
-
"DNT": "1",
|
31
|
+
#"Sec-Fetch-Site": "same-origin",
|
32
|
+
#"Sec-Fetch-Mode": "navigate",
|
33
|
+
#"Sec-Fetch-User": "?1",
|
34
|
+
#"Sec-Fetch-Dest": "document",
|
35
|
+
#"Upgrade-Insecure-Requests": "1",
|
36
|
+
#"DNT": "1",
|
38
37
|
})
|
39
38
|
self.essential_params = {"oe", "oh", "_nc_ht", "_nc_cat", "_nc_oc", "_nc_ohc", "_nc_gid"}
|
40
39
|
|
@@ -0,0 +1,20 @@
|
|
1
|
+
import logging
|
2
|
+
|
3
|
+
from fake_useragent import UserAgent
|
4
|
+
|
5
|
+
class ScraperUtils(object):
|
6
|
+
@staticmethod
|
7
|
+
def get_ua_dict(browsers: list = ['Facebook', 'Android'], platforms: list = ['mobile', 'tablet'], os: list =['Android', 'iOS']) -> dict:
|
8
|
+
random_client = None
|
9
|
+
try:
|
10
|
+
ua = UserAgent(browsers=browsers, platforms=platforms, os=os)
|
11
|
+
random_client = ua.getRandom
|
12
|
+
logging.info("Select random UA: %s", random_client)
|
13
|
+
except Exception as e:
|
14
|
+
logging.warning("Exception occurrd while generating random client UA!", exc_info=e)
|
15
|
+
random_client = {'useragent': 'Mozilla/5.0 (Linux; Android 14; SM-S911B Build/UP1A.231005.007; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/133.0.6943.117 Mobile Safari/537.36 [FB_IAB/FB4A;FBAV/502.0.0.66.79;IABMV/1;]', 'percent': 0.017937771404345798, 'type': 'mobile', 'device_brand': 'Samsung', 'browser': 'Facebook', 'browser_version': '502.0.0', 'browser_version_major_minor': 502.0, 'os': 'Android', 'os_version': '14', 'platform': 'Linux aarch64'}
|
16
|
+
return random_client
|
17
|
+
|
18
|
+
@staticmethod
|
19
|
+
def get_ua(browsers: list = ['Facebook', 'Android'], platforms: list = ['mobile', 'tablet'], os: list =['Android', 'iOS']) -> str:
|
20
|
+
return ScraperUtils.get_ua_dict(browsers=browsers, platforms=platforms, os=os)["useragent"]
|
@@ -29,6 +29,7 @@ from warp_beacon.scraper.abstract import ScraperAbstract
|
|
29
29
|
from warp_beacon.scraper.exceptions import (BadProxy, TimeOut, Unavailable,
|
30
30
|
extract_exception_message)
|
31
31
|
from warp_beacon.yt_auth import YtAuth
|
32
|
+
from warp_beacon.scraper.utils import ScraperUtils
|
32
33
|
|
33
34
|
class YoutubeAbstract(ScraperAbstract):
|
34
35
|
DOWNLOAD_DIR = "/tmp"
|
@@ -226,7 +227,10 @@ class YoutubeAbstract(ScraperAbstract):
|
|
226
227
|
return ret_val
|
227
228
|
|
228
229
|
def yt_on_progress(self, stream: Stream, chunk: bytes, bytes_remaining: int) -> None:
|
229
|
-
total_size = stream.filesize or stream.filesize_approx
|
230
|
+
total_size = int(stream.filesize or stream.filesize_approx or 0)
|
231
|
+
if not total_size:
|
232
|
+
logging.warning("[Download worker]: total_size is '%d'", total_size)
|
233
|
+
return
|
230
234
|
bytes_downloaded = total_size - bytes_remaining
|
231
235
|
percentage_of_completion = bytes_downloaded / (total_size or 1) * 100
|
232
236
|
if total_size == 0 or percentage_of_completion >= self._download_progress_threshold:
|
@@ -286,6 +290,28 @@ class YoutubeAbstract(ScraperAbstract):
|
|
286
290
|
yt_opts["proxies"] = self.build_proxies(proxy_dsn)
|
287
291
|
return YouTube(**yt_opts)
|
288
292
|
|
293
|
+
def yt_dlp_on_progress(self, params: dict) -> None:
|
294
|
+
if params.get("status", "") == "downloading":
|
295
|
+
total_size = int(params.get("total_bytes") or params.get("total_bytes_estimate") or 0)
|
296
|
+
if not total_size or total_size < 0:
|
297
|
+
logging.warning("[Download worker][yt_dlp]: total_size is '%d'", total_size)
|
298
|
+
return
|
299
|
+
bytes_downloaded = int(params.get("downloaded_bytes", 0))
|
300
|
+
percentage_of_completion = bytes_downloaded / (total_size or 1) * 100
|
301
|
+
if total_size == 0 or percentage_of_completion >= self._download_progress_threshold:
|
302
|
+
msg = {
|
303
|
+
"action": "report_download_status",
|
304
|
+
"current": bytes_downloaded,
|
305
|
+
"total": total_size,
|
306
|
+
"message_id": self.job.placeholder_message_id,
|
307
|
+
"chat_id": self.job.chat_id,
|
308
|
+
"completed": percentage_of_completion >= 100
|
309
|
+
}
|
310
|
+
self.status_pipe.send(msg)
|
311
|
+
logging.debug("[Download worker][yt_dlp] Downloaded %d%%", percentage_of_completion)
|
312
|
+
if total_size > 0:
|
313
|
+
self._download_progress_threshold += 20
|
314
|
+
|
289
315
|
def build_yt_dlp(self, timeout: int = 60) -> yt_dlp.YoutubeDL:
|
290
316
|
auth_data = {}
|
291
317
|
with open(self.YT_SESSION_FILE % self.account_index, 'r', encoding="utf-8") as f:
|
@@ -297,9 +323,24 @@ class YoutubeAbstract(ScraperAbstract):
|
|
297
323
|
'format': 'bestvideo+bestaudio/best',
|
298
324
|
'merge_output_format': 'mp4',
|
299
325
|
'noplaylist': True,
|
300
|
-
'
|
326
|
+
'progress_hooks': [self.yt_dlp_on_progress],
|
327
|
+
'http_headers': {
|
328
|
+
"Accept-Language": "en-US,en;q=0.9",
|
329
|
+
'User-Agent': ScraperUtils.get_ua(
|
330
|
+
browsers=["Google", "Chrome", "Firefox"],
|
331
|
+
os=["Windows", "Linux", "Ubuntu", "Chrome OS", "Mac OS X"],
|
332
|
+
platforms=["desktop"]
|
333
|
+
)
|
334
|
+
}
|
301
335
|
}
|
302
336
|
|
337
|
+
if auth_data and auth_data.get("access_token", None):
|
338
|
+
ydl_opts["http_headers"]["Authorization"] = f'Bearer {auth_data["access_token"]}'
|
339
|
+
|
340
|
+
yt_dlp_cookies_file = os.environ.get("YT_DLP_COOKIES_FILE", default="/var/warp_beacon/yt_dlp_cookies.txt")
|
341
|
+
if yt_dlp_cookies_file and os.path.exists(yt_dlp_cookies_file):
|
342
|
+
ydl_opts['cookiefile'] = yt_dlp_cookies_file
|
343
|
+
|
303
344
|
if self.proxy:
|
304
345
|
proxy_dsn = self.proxy.get("dsn", "")
|
305
346
|
logging.info("Using proxy DSN '%s'", proxy_dsn)
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: warp_beacon
|
3
|
-
Version: 2.6.
|
3
|
+
Version: 2.6.88
|
4
4
|
Summary: Telegram bot for expanding external media links
|
5
5
|
Home-page: https://github.com/sb0y/warp_beacon
|
6
6
|
Author: Andrey Bagrintsev
|
@@ -241,6 +241,7 @@ Requires-Dist: yt_dlp
|
|
241
241
|
Requires-Dist: pydub
|
242
242
|
Requires-Dist: SpeechRecognition
|
243
243
|
Requires-Dist: playwright
|
244
|
+
Requires-Dist: fake-useragent
|
244
245
|
Dynamic: author
|
245
246
|
Dynamic: home-page
|
246
247
|
Dynamic: license-file
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|