warp-beacon 2.1.8__tar.gz → 2.1.10__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {warp_beacon-2.1.8/warp_beacon.egg-info → warp_beacon-2.1.10}/PKG-INFO +1 -1
- warp_beacon-2.1.10/warp_beacon/__version__.py +2 -0
- {warp_beacon-2.1.8 → warp_beacon-2.1.10}/warp_beacon/scheduler/scheduler.py +3 -1
- warp_beacon-2.1.10/warp_beacon/scraper/__init__.py +354 -0
- {warp_beacon-2.1.8 → warp_beacon-2.1.10}/warp_beacon/telegram/bot.py +1 -1
- {warp_beacon-2.1.8 → warp_beacon-2.1.10/warp_beacon.egg-info}/PKG-INFO +1 -1
- warp_beacon-2.1.8/warp_beacon/__version__.py +0 -2
- warp_beacon-2.1.8/warp_beacon/scraper/__init__.py +0 -352
- {warp_beacon-2.1.8 → warp_beacon-2.1.10}/LICENSE +0 -0
- {warp_beacon-2.1.8 → warp_beacon-2.1.10}/MANIFEST.in +0 -0
- {warp_beacon-2.1.8 → warp_beacon-2.1.10}/README.md +0 -0
- {warp_beacon-2.1.8 → warp_beacon-2.1.10}/assets/placeholder.gif +0 -0
- {warp_beacon-2.1.8 → warp_beacon-2.1.10}/etc/.gitignore +0 -0
- {warp_beacon-2.1.8 → warp_beacon-2.1.10}/etc/accounts.json +0 -0
- {warp_beacon-2.1.8 → warp_beacon-2.1.10}/etc/warp_beacon.conf +0 -0
- {warp_beacon-2.1.8 → warp_beacon-2.1.10}/etc/warp_beacon.service +0 -0
- {warp_beacon-2.1.8 → warp_beacon-2.1.10}/pyproject.toml +0 -0
- {warp_beacon-2.1.8 → warp_beacon-2.1.10}/setup.cfg +0 -0
- {warp_beacon-2.1.8 → warp_beacon-2.1.10}/setup.py +0 -0
- {warp_beacon-2.1.8 → warp_beacon-2.1.10}/warp_beacon/__init__.py +0 -0
- {warp_beacon-2.1.8 → warp_beacon-2.1.10}/warp_beacon/compress/__init__.py +0 -0
- {warp_beacon-2.1.8 → warp_beacon-2.1.10}/warp_beacon/compress/video.py +0 -0
- {warp_beacon-2.1.8 → warp_beacon-2.1.10}/warp_beacon/jobs/__init__.py +0 -0
- {warp_beacon-2.1.8 → warp_beacon-2.1.10}/warp_beacon/jobs/abstract.py +0 -0
- {warp_beacon-2.1.8 → warp_beacon-2.1.10}/warp_beacon/jobs/download_job.py +0 -0
- {warp_beacon-2.1.8 → warp_beacon-2.1.10}/warp_beacon/jobs/types.py +0 -0
- {warp_beacon-2.1.8 → warp_beacon-2.1.10}/warp_beacon/jobs/upload_job.py +0 -0
- {warp_beacon-2.1.8 → warp_beacon-2.1.10}/warp_beacon/mediainfo/__init__.py +0 -0
- {warp_beacon-2.1.8 → warp_beacon-2.1.10}/warp_beacon/mediainfo/abstract.py +0 -0
- {warp_beacon-2.1.8 → warp_beacon-2.1.10}/warp_beacon/mediainfo/audio.py +0 -0
- {warp_beacon-2.1.8 → warp_beacon-2.1.10}/warp_beacon/mediainfo/silencer.py +0 -0
- {warp_beacon-2.1.8 → warp_beacon-2.1.10}/warp_beacon/mediainfo/video.py +0 -0
- {warp_beacon-2.1.8 → warp_beacon-2.1.10}/warp_beacon/scheduler/__init__.py +0 -0
- {warp_beacon-2.1.8 → warp_beacon-2.1.10}/warp_beacon/scraper/abstract.py +0 -0
- {warp_beacon-2.1.8 → warp_beacon-2.1.10}/warp_beacon/scraper/account_selector.py +0 -0
- {warp_beacon-2.1.8 → warp_beacon-2.1.10}/warp_beacon/scraper/exceptions.py +0 -0
- {warp_beacon-2.1.8 → warp_beacon-2.1.10}/warp_beacon/scraper/instagram/__init__.py +0 -0
- {warp_beacon-2.1.8 → warp_beacon-2.1.10}/warp_beacon/scraper/instagram/instagram.py +0 -0
- {warp_beacon-2.1.8 → warp_beacon-2.1.10}/warp_beacon/scraper/youtube/__init__.py +0 -0
- {warp_beacon-2.1.8 → warp_beacon-2.1.10}/warp_beacon/scraper/youtube/abstract.py +0 -0
- {warp_beacon-2.1.8 → warp_beacon-2.1.10}/warp_beacon/scraper/youtube/music.py +0 -0
- {warp_beacon-2.1.8 → warp_beacon-2.1.10}/warp_beacon/scraper/youtube/shorts.py +0 -0
- {warp_beacon-2.1.8 → warp_beacon-2.1.10}/warp_beacon/scraper/youtube/youtube.py +0 -0
- {warp_beacon-2.1.8 → warp_beacon-2.1.10}/warp_beacon/storage/__init__.py +0 -0
- {warp_beacon-2.1.8 → warp_beacon-2.1.10}/warp_beacon/telegram/__init__.py +0 -0
- {warp_beacon-2.1.8 → warp_beacon-2.1.10}/warp_beacon/telegram/handlers.py +0 -0
- {warp_beacon-2.1.8 → warp_beacon-2.1.10}/warp_beacon/telegram/placeholder_message.py +0 -0
- {warp_beacon-2.1.8 → warp_beacon-2.1.10}/warp_beacon/telegram/utils.py +0 -0
- {warp_beacon-2.1.8 → warp_beacon-2.1.10}/warp_beacon/uploader/__init__.py +0 -0
- {warp_beacon-2.1.8 → warp_beacon-2.1.10}/warp_beacon/warp_beacon.py +0 -0
- {warp_beacon-2.1.8 → warp_beacon-2.1.10}/warp_beacon.egg-info/SOURCES.txt +0 -0
- {warp_beacon-2.1.8 → warp_beacon-2.1.10}/warp_beacon.egg-info/dependency_links.txt +0 -0
- {warp_beacon-2.1.8 → warp_beacon-2.1.10}/warp_beacon.egg-info/entry_points.txt +0 -0
- {warp_beacon-2.1.8 → warp_beacon-2.1.10}/warp_beacon.egg-info/requires.txt +0 -0
- {warp_beacon-2.1.8 → warp_beacon-2.1.10}/warp_beacon.egg-info/top_level.txt +0 -0
@@ -1,5 +1,6 @@
|
|
1
1
|
import threading
|
2
2
|
|
3
|
+
from warp_beacon.jobs import Origin
|
3
4
|
import warp_beacon
|
4
5
|
|
5
6
|
import logging
|
@@ -42,7 +43,8 @@ class IGScheduler(object):
|
|
42
43
|
def validate_ig_session(self) -> bool:
|
43
44
|
try:
|
44
45
|
self.downloader.queue_task(warp_beacon.jobs.download_job.DownloadJob.build(
|
45
|
-
session_validation=True
|
46
|
+
session_validation=True,
|
47
|
+
job_origin=Origin.INSTAGRAM
|
46
48
|
))
|
47
49
|
except Exception as e:
|
48
50
|
logging.warning("An error occurred while validating instagram session!")
|
@@ -0,0 +1,354 @@
|
|
1
|
+
import os
|
2
|
+
|
3
|
+
from typing import Optional
|
4
|
+
import multiprocessing
|
5
|
+
from queue import Empty
|
6
|
+
|
7
|
+
from warp_beacon.scraper.exceptions import NotFound, UnknownError, TimeOut, Unavailable, FileTooBig, YotubeLiveError, YotubeAgeRestrictedError, IGRateLimitOccurred, CaptchaIssue, AllAccountsFailed
|
8
|
+
from warp_beacon.mediainfo.video import VideoInfo
|
9
|
+
from warp_beacon.mediainfo.audio import AudioInfo
|
10
|
+
from warp_beacon.mediainfo.silencer import Silencer
|
11
|
+
from warp_beacon.compress.video import VideoCompress
|
12
|
+
from warp_beacon.uploader import AsyncUploader
|
13
|
+
from warp_beacon.jobs import Origin
|
14
|
+
from warp_beacon.jobs.download_job import DownloadJob
|
15
|
+
from warp_beacon.jobs.upload_job import UploadJob
|
16
|
+
from warp_beacon.jobs.types import JobType
|
17
|
+
from warp_beacon.scraper.account_selector import AccountSelector
|
18
|
+
from warp_beacon.scheduler.scheduler import IGScheduler
|
19
|
+
|
20
|
+
import logging
|
21
|
+
|
22
|
+
ACC_FILE = os.environ.get("SERVICE_ACCOUNTS_FILE", default="/var/warp_beacon/accounts.json")
|
23
|
+
|
24
|
+
class AsyncDownloader(object):
|
25
|
+
__JOE_BIDEN_WAKEUP = None
|
26
|
+
workers = []
|
27
|
+
allow_loop = None
|
28
|
+
job_queue = multiprocessing.Queue()
|
29
|
+
uploader = None
|
30
|
+
workers_count = 0
|
31
|
+
auth_event = multiprocessing.Event()
|
32
|
+
acc_selector = None
|
33
|
+
scheduler = None
|
34
|
+
|
35
|
+
def __init__(self, uploader: AsyncUploader, workers_count: int) -> None:
|
36
|
+
self.allow_loop = multiprocessing.Value('i', 1)
|
37
|
+
self.uploader = uploader
|
38
|
+
self.workers_count = workers_count
|
39
|
+
self.acc_selector = AccountSelector(ACC_FILE)
|
40
|
+
self.scheduler = IGScheduler(self)
|
41
|
+
self.scheduler.start()
|
42
|
+
|
43
|
+
def __del__(self) -> None:
|
44
|
+
self.stop_all()
|
45
|
+
|
46
|
+
def start(self) -> None:
|
47
|
+
for _ in range(self.workers_count):
|
48
|
+
proc = multiprocessing.Process(target=self.do_work)
|
49
|
+
self.workers.append(proc)
|
50
|
+
proc.start()
|
51
|
+
|
52
|
+
def get_media_info(self, path: str, fr_media_info: dict={}, media_type: JobType = JobType.VIDEO) -> Optional[dict]:
|
53
|
+
media_info = None
|
54
|
+
try:
|
55
|
+
if path:
|
56
|
+
if media_type == JobType.VIDEO:
|
57
|
+
video_info = VideoInfo(path)
|
58
|
+
media_info = video_info.get_finfo(tuple(fr_media_info.keys()))
|
59
|
+
media_info.update(fr_media_info)
|
60
|
+
if not media_info.get("thumb", None):
|
61
|
+
media_info["thumb"] = video_info.generate_thumbnail()
|
62
|
+
media_info["has_sound"] = video_info.has_sound()
|
63
|
+
elif media_type == JobType.AUDIO:
|
64
|
+
audio_info = AudioInfo(path)
|
65
|
+
media_info = audio_info.get_finfo(tuple(fr_media_info.keys()))
|
66
|
+
except Exception as e:
|
67
|
+
logging.error("Failed to process media info!")
|
68
|
+
logging.exception(e)
|
69
|
+
|
70
|
+
return media_info
|
71
|
+
|
72
|
+
def try_next_account(self, job: DownloadJob, report_error: str = None) -> None:
|
73
|
+
logging.warning("Switching account!")
|
74
|
+
if job.account_switches > self.acc_selector.count_service_accounts(job.job_origin):
|
75
|
+
raise AllAccountsFailed("All config accounts failed!")
|
76
|
+
if report_error:
|
77
|
+
self.acc_selector.bump_acc_fail("rate_limits")
|
78
|
+
self.acc_selector.next()
|
79
|
+
cur_acc = self.acc_selector.get_current()
|
80
|
+
logging.info("Current account: '%s'", str(cur_acc))
|
81
|
+
job.account_switches += 1
|
82
|
+
|
83
|
+
def do_work(self) -> None:
|
84
|
+
logging.info("download worker started")
|
85
|
+
while self.allow_loop.value == 1:
|
86
|
+
try:
|
87
|
+
job = None
|
88
|
+
try:
|
89
|
+
job = self.job_queue.get()
|
90
|
+
if job is self.__JOE_BIDEN_WAKEUP:
|
91
|
+
continue
|
92
|
+
actor = None
|
93
|
+
try:
|
94
|
+
items = []
|
95
|
+
if job.job_origin is Origin.UNKNOWN:
|
96
|
+
logging.warning("Unknown task origin! Skipping.")
|
97
|
+
continue
|
98
|
+
if not job.in_process:
|
99
|
+
actor = None
|
100
|
+
self.acc_selector.set_module(job.job_origin)
|
101
|
+
if job.job_origin is Origin.INSTAGRAM:
|
102
|
+
from warp_beacon.scraper.instagram.instagram import InstagramScraper
|
103
|
+
actor = InstagramScraper(self.acc_selector.get_current())
|
104
|
+
elif job.job_origin is Origin.YT_SHORTS:
|
105
|
+
from warp_beacon.scraper.youtube.shorts import YoutubeShortsScraper
|
106
|
+
actor = YoutubeShortsScraper(self.acc_selector.get_current())
|
107
|
+
elif job.job_origin is Origin.YT_MUSIC:
|
108
|
+
from warp_beacon.scraper.youtube.music import YoutubeMusicScraper
|
109
|
+
actor = YoutubeMusicScraper(self.acc_selector.get_current())
|
110
|
+
elif job.job_origin is Origin.YOUTUBE:
|
111
|
+
from warp_beacon.scraper.youtube.youtube import YoutubeScraper
|
112
|
+
actor = YoutubeScraper(self.acc_selector.get_current())
|
113
|
+
actor.send_message_to_admin_func = self.send_message_to_admin
|
114
|
+
actor.auth_event = self.auth_event
|
115
|
+
while True:
|
116
|
+
try:
|
117
|
+
if job.session_validation:
|
118
|
+
logging.info("Validating '%s' session ...", job.job_origin.value)
|
119
|
+
actor.validate_session()
|
120
|
+
logging.info("done")
|
121
|
+
else:
|
122
|
+
logging.info("Downloading URL '%s'", job.url)
|
123
|
+
items = actor.download(job.url)
|
124
|
+
break
|
125
|
+
except NotFound as e:
|
126
|
+
logging.warning("Not found error occurred!")
|
127
|
+
logging.exception(e)
|
128
|
+
self.uploader.queue_task(job.to_upload_job(
|
129
|
+
job_failed=True,
|
130
|
+
job_failed_msg="Unable to access to media under this URL. Seems like the media is private.")
|
131
|
+
)
|
132
|
+
self.send_message_to_admin(
|
133
|
+
f"Task {job.job_id} failed. URL: '{job.url}'. Reason: 'NotFound'."
|
134
|
+
)
|
135
|
+
break
|
136
|
+
except Unavailable as e:
|
137
|
+
logging.warning("Not found or unavailable error occurred!")
|
138
|
+
logging.exception(e)
|
139
|
+
if job.unvailable_error_count > self.acc_selector.count_service_accounts(job.job_origin):
|
140
|
+
self.uploader.queue_task(job.to_upload_job(
|
141
|
+
job_failed=True,
|
142
|
+
job_failed_msg="Video is unvailable for all your service accounts.")
|
143
|
+
)
|
144
|
+
break
|
145
|
+
job.unvailable_error_count += 1
|
146
|
+
logging.info("Trying to switch account")
|
147
|
+
self.acc_selector.next()
|
148
|
+
self.job_queue.put(job)
|
149
|
+
break
|
150
|
+
except TimeOut as e:
|
151
|
+
logging.warning("Timeout error occurred!")
|
152
|
+
logging.exception(e)
|
153
|
+
self.uploader.queue_task(job.to_upload_job(
|
154
|
+
job_failed=True,
|
155
|
+
job_failed_msg="Failed to download content due timeout error. Please check you Internet connection, retry amount or request timeout bot configuration settings.")
|
156
|
+
)
|
157
|
+
self.send_message_to_admin(
|
158
|
+
f"Task {job.job_id} failed. URL: '{job.url}'. Reason: 'TimeOut'."
|
159
|
+
)
|
160
|
+
break
|
161
|
+
except FileTooBig as e:
|
162
|
+
logging.warning("Telegram limits exceeded :(")
|
163
|
+
logging.exception(e)
|
164
|
+
self.uploader.queue_task(job.to_upload_job(
|
165
|
+
job_failed=True,
|
166
|
+
job_failed_msg="Unfortunately this file has exceeded the Telegram limits. A file cannot be larger than 2 gigabytes.")
|
167
|
+
)
|
168
|
+
self.send_message_to_admin(
|
169
|
+
f"Task {job.job_id} failed. URL: '{job.url}'. Reason: 'FileTooBig'."
|
170
|
+
)
|
171
|
+
break
|
172
|
+
except IGRateLimitOccurred as e:
|
173
|
+
logging.warning("IG ratelimit occurred :(")
|
174
|
+
logging.exception(e)
|
175
|
+
self.try_next_account(job, report_error="rate_limits")
|
176
|
+
self.job_queue.put(job)
|
177
|
+
break
|
178
|
+
except CaptchaIssue as e:
|
179
|
+
logging.warning("Challange occurred!")
|
180
|
+
logging.exception(e)
|
181
|
+
self.try_next_account(job)
|
182
|
+
self.job_queue.put(job)
|
183
|
+
break
|
184
|
+
except YotubeLiveError as e:
|
185
|
+
logging.warning("Youtube Live videos are not supported. Skipping.")
|
186
|
+
logging.exception(e)
|
187
|
+
self.uploader.queue_task(job.to_upload_job(
|
188
|
+
job_failed=True,
|
189
|
+
job_failed_msg="Youtube Live videos are not supported. Please wait until the live broadcast ends.")
|
190
|
+
)
|
191
|
+
break
|
192
|
+
except YotubeAgeRestrictedError as e:
|
193
|
+
logging.error("Youtube Age Restricted error")
|
194
|
+
logging.exception(e)
|
195
|
+
self.uploader.queue_task(job.to_upload_job(
|
196
|
+
job_failed=True,
|
197
|
+
job_failed_msg="Youtube Age Restricted error. Check your bot Youtube account settings.")
|
198
|
+
)
|
199
|
+
self.send_message_to_admin(
|
200
|
+
f"Task {job.job_id} failed. URL: '{job.url}'. Reason: 'YotubeAgeRestrictedError'."
|
201
|
+
)
|
202
|
+
break
|
203
|
+
except AllAccountsFailed as e:
|
204
|
+
logging.error("All accounts failed!")
|
205
|
+
logging.exception(e)
|
206
|
+
self.uploader.queue_task(job.to_upload_job(
|
207
|
+
job_failed=True,
|
208
|
+
job_failed_msg="All bot accounts failed to download content. Bot administrator noticed about the issue.")
|
209
|
+
)
|
210
|
+
self.send_message_to_admin(
|
211
|
+
f"Task {job.job_id} failed. URL: '{job.url}'. Reason: 'AllAccountsFailed'."
|
212
|
+
)
|
213
|
+
break
|
214
|
+
except (UnknownError, Exception) as e:
|
215
|
+
logging.warning("UnknownError occurred!")
|
216
|
+
logging.exception(e)
|
217
|
+
exception_msg = ""
|
218
|
+
if hasattr(e, "message"):
|
219
|
+
exception_msg = e.message
|
220
|
+
else:
|
221
|
+
exception_msg = str(e)
|
222
|
+
if "geoblock_required" in exception_msg:
|
223
|
+
if job.geoblock_error_count > self.acc_selector.count_service_accounts(job.job_origin):
|
224
|
+
self.uploader.queue_task(job.to_upload_job(
|
225
|
+
job_failed=True,
|
226
|
+
job_failed_msg="This content does not accessible for all yout bot accounts. Seems like author blocked certain regions.")
|
227
|
+
)
|
228
|
+
self.send_message_to_admin(
|
229
|
+
f"Task {job.job_id} failed. URL: '{job.url}'. Reason: 'geoblock_required'."
|
230
|
+
)
|
231
|
+
break
|
232
|
+
job.geoblock_error_count += 1
|
233
|
+
logging.info("Trying to switch account")
|
234
|
+
self.acc_selector.next()
|
235
|
+
self.job_queue.put(job)
|
236
|
+
break
|
237
|
+
self.uploader.queue_task(job.to_upload_job(
|
238
|
+
job_failed=True,
|
239
|
+
job_failed_msg="WOW, unknown error occured! Please [create issue](https://github.com/sb0y/warp_beacon/issues) with service logs.")
|
240
|
+
)
|
241
|
+
self.send_message_to_admin(
|
242
|
+
f"Task {job.job_id} failed. URL: '{job.url}'. Reason: 'UnknownError'."
|
243
|
+
f"Exception:\n```\n{exception_msg}\n```"
|
244
|
+
)
|
245
|
+
break
|
246
|
+
|
247
|
+
if items:
|
248
|
+
for item in items:
|
249
|
+
media_info = {"filesize": 0}
|
250
|
+
if item["media_type"] == JobType.VIDEO:
|
251
|
+
media_info = self.get_media_info(item["local_media_path"], item.get("media_info", {}), JobType.VIDEO)
|
252
|
+
logging.info("Final media info: %s", media_info)
|
253
|
+
if media_info["filesize"] > 2e+9:
|
254
|
+
logging.info("Filesize is '%d' MiB", round(media_info["filesize"] / 1024 / 1024))
|
255
|
+
logging.info("Detected big file. Starting compressing with ffmpeg ...")
|
256
|
+
self.uploader.queue_task(job.to_upload_job(
|
257
|
+
job_warning=True,
|
258
|
+
job_warning_msg="Downloaded file size is bigger than Telegram limits! Performing video compression. This may take a while.")
|
259
|
+
)
|
260
|
+
ffmpeg = VideoCompress(file_path=item["local_media_path"])
|
261
|
+
new_filepath = ffmpeg.generate_filepath(base_filepath=item["local_media_path"])
|
262
|
+
if ffmpeg.compress_to(new_filepath, target_size=2000 * 1000):
|
263
|
+
logging.info("Successfully compressed file '%s'", new_filepath)
|
264
|
+
os.unlink(item["local_media_path"])
|
265
|
+
item["local_media_path"] = new_filepath
|
266
|
+
item["local_compressed_media_path"] = new_filepath
|
267
|
+
media_info["filesize"] = VideoInfo.get_filesize(new_filepath)
|
268
|
+
logging.info("New file size of compressed file is '%.3f'", media_info["filesize"])
|
269
|
+
if not media_info["has_sound"]:
|
270
|
+
item["media_type"] = JobType.ANIMATION
|
271
|
+
elif item["media_type"] == JobType.AUDIO:
|
272
|
+
media_info = self.get_media_info(item["local_media_path"], item.get("media_info", {}), JobType.AUDIO)
|
273
|
+
media_info["performer"] = item.get("performer", None)
|
274
|
+
media_info["thumb"] = item.get("thumb", None)
|
275
|
+
logging.info("Final media info: %s", media_info)
|
276
|
+
elif item["media_type"] == JobType.COLLECTION:
|
277
|
+
for chunk in item["items"]:
|
278
|
+
for v in chunk:
|
279
|
+
if v["media_type"] == JobType.VIDEO:
|
280
|
+
col_media_info = self.get_media_info(v["local_media_path"], v["media_info"])
|
281
|
+
media_info["filesize"] += int(col_media_info.get("filesize", 0))
|
282
|
+
v["media_info"] = col_media_info
|
283
|
+
if not v["media_info"]["has_sound"]:
|
284
|
+
silencer = Silencer(v["local_media_path"])
|
285
|
+
silent_video_path = silencer.add_silent_audio()
|
286
|
+
os.unlink(v["local_media_path"])
|
287
|
+
v["local_media_path"] = silent_video_path
|
288
|
+
v["media_info"].update(silencer.get_finfo())
|
289
|
+
v["media_info"]["has_sound"] = True
|
290
|
+
|
291
|
+
job_args = {"media_type": item["media_type"], "media_info": media_info}
|
292
|
+
if item["media_type"] == JobType.COLLECTION:
|
293
|
+
job_args["media_collection"] = item["items"]
|
294
|
+
if item.get("save_items", None) is not None:
|
295
|
+
job_args["save_items"] = item.get("save_items", False)
|
296
|
+
else:
|
297
|
+
job_args["local_media_path"] = item["local_media_path"]
|
298
|
+
if item.get("local_compressed_media_path", None):
|
299
|
+
job_args["local_media_path"] = item.get("local_compressed_media_path", None)
|
300
|
+
|
301
|
+
job_args["canonical_name"] = item.get("canonical_name", "")
|
302
|
+
|
303
|
+
logging.debug("local_media_path: '%s'", job_args.get("local_media_path", ""))
|
304
|
+
logging.debug("media_collection: '%s'", str(job_args.get("media_collection", {})))
|
305
|
+
#logging.info(job_args)
|
306
|
+
upload_job = job.to_upload_job(**job_args)
|
307
|
+
if upload_job.is_empty():
|
308
|
+
logging.info("Upload job is empty. Nothing to do here!")
|
309
|
+
self.uploader.queue_task(job.to_upload_job(
|
310
|
+
job_failed=True,
|
311
|
+
job_failed_msg="Seems like this link doesn't contains any media.")
|
312
|
+
)
|
313
|
+
else:
|
314
|
+
self.uploader.queue_task(upload_job)
|
315
|
+
else:
|
316
|
+
logging.info("Job already in work in parallel worker. Redirecting job to upload worker.")
|
317
|
+
self.uploader.queue_task(job.to_upload_job())
|
318
|
+
except Exception as e:
|
319
|
+
logging.error("Error inside download worker!")
|
320
|
+
logging.exception(e)
|
321
|
+
self.notify_task_failed(job)
|
322
|
+
except Empty:
|
323
|
+
pass
|
324
|
+
except Exception as e:
|
325
|
+
logging.error("Exception occurred inside worker!")
|
326
|
+
logging.exception(e)
|
327
|
+
|
328
|
+
logging.info("Process done")
|
329
|
+
|
330
|
+
def stop_all(self) -> None:
|
331
|
+
self.allow_loop.value = 0
|
332
|
+
self.scheduler.stop()
|
333
|
+
for proc in self.workers:
|
334
|
+
if proc.is_alive():
|
335
|
+
logging.info("stopping process #%d", proc.pid)
|
336
|
+
self.job_queue.put_nowait(self.__JOE_BIDEN_WAKEUP)
|
337
|
+
proc.join()
|
338
|
+
#proc.terminate()
|
339
|
+
logging.info("process #%d stopped", proc.pid)
|
340
|
+
self.workers.clear()
|
341
|
+
|
342
|
+
def queue_task(self, job: DownloadJob) -> str:
|
343
|
+
self.job_queue.put_nowait(job)
|
344
|
+
return str(job.job_id)
|
345
|
+
|
346
|
+
def notify_task_failed(self, job: DownloadJob) -> None:
|
347
|
+
self.uploader.queue_task(job.to_upload_job(job_failed=True))
|
348
|
+
|
349
|
+
def send_message_to_admin(self, text: str, yt_auth: bool = False) -> None:
|
350
|
+
self.uploader.queue_task(UploadJob.build(
|
351
|
+
is_message_to_admin=True,
|
352
|
+
message_text=text,
|
353
|
+
yt_auth=yt_auth
|
354
|
+
))
|
@@ -36,7 +36,7 @@ class Bot(object):
|
|
36
36
|
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", level=logging.INFO
|
37
37
|
)
|
38
38
|
|
39
|
-
logging.info(
|
39
|
+
logging.info("Starting Warp Beacon version '%s' ...", __version__)
|
40
40
|
|
41
41
|
workers_amount = min(32, os.cpu_count() + 4)
|
42
42
|
|
@@ -1,352 +0,0 @@
|
|
1
|
-
import os
|
2
|
-
|
3
|
-
from typing import Optional
|
4
|
-
import multiprocessing
|
5
|
-
from queue import Empty
|
6
|
-
|
7
|
-
from warp_beacon.scraper.exceptions import NotFound, UnknownError, TimeOut, Unavailable, FileTooBig, YotubeLiveError, YotubeAgeRestrictedError, IGRateLimitOccurred, CaptchaIssue, AllAccountsFailed
|
8
|
-
from warp_beacon.mediainfo.video import VideoInfo
|
9
|
-
from warp_beacon.mediainfo.audio import AudioInfo
|
10
|
-
from warp_beacon.mediainfo.silencer import Silencer
|
11
|
-
from warp_beacon.compress.video import VideoCompress
|
12
|
-
from warp_beacon.uploader import AsyncUploader
|
13
|
-
from warp_beacon.jobs import Origin
|
14
|
-
from warp_beacon.jobs.download_job import DownloadJob
|
15
|
-
from warp_beacon.jobs.upload_job import UploadJob
|
16
|
-
from warp_beacon.jobs.types import JobType
|
17
|
-
from warp_beacon.scraper.account_selector import AccountSelector
|
18
|
-
from warp_beacon.scheduler.scheduler import IGScheduler
|
19
|
-
|
20
|
-
import logging
|
21
|
-
|
22
|
-
ACC_FILE = os.environ.get("SERVICE_ACCOUNTS_FILE", default="/var/warp_beacon/accounts.json")
|
23
|
-
|
24
|
-
class AsyncDownloader(object):
|
25
|
-
__JOE_BIDEN_WAKEUP = None
|
26
|
-
workers = []
|
27
|
-
allow_loop = None
|
28
|
-
job_queue = multiprocessing.Queue()
|
29
|
-
uploader = None
|
30
|
-
workers_count = 0
|
31
|
-
auth_event = multiprocessing.Event()
|
32
|
-
acc_selector = None
|
33
|
-
scheduler = None
|
34
|
-
|
35
|
-
def __init__(self, uploader: AsyncUploader, workers_count: int) -> None:
|
36
|
-
self.allow_loop = multiprocessing.Value('i', 1)
|
37
|
-
self.uploader = uploader
|
38
|
-
self.workers_count = workers_count
|
39
|
-
self.acc_selector = AccountSelector(ACC_FILE)
|
40
|
-
self.scheduler = IGScheduler(self)
|
41
|
-
self.scheduler.start()
|
42
|
-
|
43
|
-
def __del__(self) -> None:
|
44
|
-
self.stop_all()
|
45
|
-
|
46
|
-
def start(self) -> None:
|
47
|
-
for _ in range(self.workers_count):
|
48
|
-
proc = multiprocessing.Process(target=self.do_work)
|
49
|
-
self.workers.append(proc)
|
50
|
-
proc.start()
|
51
|
-
|
52
|
-
def get_media_info(self, path: str, fr_media_info: dict={}, media_type: JobType = JobType.VIDEO) -> Optional[dict]:
|
53
|
-
media_info = None
|
54
|
-
try:
|
55
|
-
if path:
|
56
|
-
if media_type == JobType.VIDEO:
|
57
|
-
video_info = VideoInfo(path)
|
58
|
-
media_info = video_info.get_finfo(tuple(fr_media_info.keys()))
|
59
|
-
media_info.update(fr_media_info)
|
60
|
-
if not media_info.get("thumb", None):
|
61
|
-
media_info["thumb"] = video_info.generate_thumbnail()
|
62
|
-
media_info["has_sound"] = video_info.has_sound()
|
63
|
-
elif media_type == JobType.AUDIO:
|
64
|
-
audio_info = AudioInfo(path)
|
65
|
-
media_info = audio_info.get_finfo(tuple(fr_media_info.keys()))
|
66
|
-
except Exception as e:
|
67
|
-
logging.error("Failed to process media info!")
|
68
|
-
logging.exception(e)
|
69
|
-
|
70
|
-
return media_info
|
71
|
-
|
72
|
-
def try_next_account(self, job: DownloadJob, report_error: str = None) -> None:
|
73
|
-
logging.warning("Switching account!")
|
74
|
-
if job.account_switches > self.acc_selector.count_service_accounts(job.job_origin):
|
75
|
-
raise AllAccountsFailed("All config accounts failed!")
|
76
|
-
if report_error:
|
77
|
-
self.acc_selector.bump_acc_fail("rate_limits")
|
78
|
-
self.acc_selector.next()
|
79
|
-
cur_acc = self.acc_selector.get_current()
|
80
|
-
logging.info("Current account: '%s'", str(cur_acc))
|
81
|
-
job.account_switches += 1
|
82
|
-
|
83
|
-
def do_work(self) -> None:
|
84
|
-
logging.info("download worker started")
|
85
|
-
while self.allow_loop.value == 1:
|
86
|
-
try:
|
87
|
-
job = None
|
88
|
-
try:
|
89
|
-
job = self.job_queue.get()
|
90
|
-
if job is self.__JOE_BIDEN_WAKEUP:
|
91
|
-
continue
|
92
|
-
actor = None
|
93
|
-
try:
|
94
|
-
items = []
|
95
|
-
if job.job_origin is not Origin.UNKNOWN:
|
96
|
-
if not job.in_process:
|
97
|
-
actor = None
|
98
|
-
self.acc_selector.set_module(job.job_origin)
|
99
|
-
if job.job_origin is Origin.INSTAGRAM:
|
100
|
-
from warp_beacon.scraper.instagram.instagram import InstagramScraper
|
101
|
-
actor = InstagramScraper(self.acc_selector.get_current())
|
102
|
-
elif job.job_origin is Origin.YT_SHORTS:
|
103
|
-
from warp_beacon.scraper.youtube.shorts import YoutubeShortsScraper
|
104
|
-
actor = YoutubeShortsScraper(self.acc_selector.get_current())
|
105
|
-
elif job.job_origin is Origin.YT_MUSIC:
|
106
|
-
from warp_beacon.scraper.youtube.music import YoutubeMusicScraper
|
107
|
-
actor = YoutubeMusicScraper(self.acc_selector.get_current())
|
108
|
-
elif job.job_origin is Origin.YOUTUBE:
|
109
|
-
from warp_beacon.scraper.youtube.youtube import YoutubeScraper
|
110
|
-
actor = YoutubeScraper(self.acc_selector.get_current())
|
111
|
-
actor.send_message_to_admin_func = self.send_message_to_admin
|
112
|
-
actor.auth_event = self.auth_event
|
113
|
-
while True:
|
114
|
-
try:
|
115
|
-
if job.session_validation:
|
116
|
-
logging.info("Validating '%s' session ...", job.origin.value)
|
117
|
-
actor.validate_session()
|
118
|
-
logging.info("done")
|
119
|
-
else:
|
120
|
-
logging.info("Downloading URL '%s'", job.url)
|
121
|
-
items = actor.download(job.url)
|
122
|
-
break
|
123
|
-
except NotFound as e:
|
124
|
-
logging.warning("Not found error occurred!")
|
125
|
-
logging.exception(e)
|
126
|
-
self.uploader.queue_task(job.to_upload_job(
|
127
|
-
job_failed=True,
|
128
|
-
job_failed_msg="Unable to access to media under this URL. Seems like the media is private.")
|
129
|
-
)
|
130
|
-
self.send_message_to_admin(
|
131
|
-
f"Task {job.job_id} failed. URL: '{job.url}'. Reason: 'NotFound'."
|
132
|
-
)
|
133
|
-
break
|
134
|
-
except Unavailable as e:
|
135
|
-
logging.warning("Not found or unavailable error occurred!")
|
136
|
-
logging.exception(e)
|
137
|
-
if job.unvailable_error_count > self.acc_selector.count_service_accounts(job.job_origin):
|
138
|
-
self.uploader.queue_task(job.to_upload_job(
|
139
|
-
job_failed=True,
|
140
|
-
job_failed_msg="Video is unvailable for all your service accounts.")
|
141
|
-
)
|
142
|
-
break
|
143
|
-
job.unvailable_error_count += 1
|
144
|
-
logging.info("Trying to switch account")
|
145
|
-
self.acc_selector.next()
|
146
|
-
self.job_queue.put(job)
|
147
|
-
break
|
148
|
-
except TimeOut as e:
|
149
|
-
logging.warning("Timeout error occurred!")
|
150
|
-
logging.exception(e)
|
151
|
-
self.uploader.queue_task(job.to_upload_job(
|
152
|
-
job_failed=True,
|
153
|
-
job_failed_msg="Failed to download content due timeout error. Please check you Internet connection, retry amount or request timeout bot configuration settings.")
|
154
|
-
)
|
155
|
-
self.send_message_to_admin(
|
156
|
-
f"Task {job.job_id} failed. URL: '{job.url}'. Reason: 'TimeOut'."
|
157
|
-
)
|
158
|
-
break
|
159
|
-
except FileTooBig as e:
|
160
|
-
logging.warning("Telegram limits exceeded :(")
|
161
|
-
logging.exception(e)
|
162
|
-
self.uploader.queue_task(job.to_upload_job(
|
163
|
-
job_failed=True,
|
164
|
-
job_failed_msg="Unfortunately this file has exceeded the Telegram limits. A file cannot be larger than 2 gigabytes.")
|
165
|
-
)
|
166
|
-
self.send_message_to_admin(
|
167
|
-
f"Task {job.job_id} failed. URL: '{job.url}'. Reason: 'FileTooBig'."
|
168
|
-
)
|
169
|
-
break
|
170
|
-
except IGRateLimitOccurred as e:
|
171
|
-
logging.warning("IG ratelimit occurred :(")
|
172
|
-
logging.exception(e)
|
173
|
-
self.try_next_account(job, report_error="rate_limits")
|
174
|
-
self.job_queue.put(job)
|
175
|
-
break
|
176
|
-
except CaptchaIssue as e:
|
177
|
-
logging.warning("Challange occurred!")
|
178
|
-
logging.exception(e)
|
179
|
-
self.try_next_account(job)
|
180
|
-
self.job_queue.put(job)
|
181
|
-
break
|
182
|
-
except YotubeLiveError as e:
|
183
|
-
logging.warning("Youtube Live videos are not supported. Skipping.")
|
184
|
-
logging.exception(e)
|
185
|
-
self.uploader.queue_task(job.to_upload_job(
|
186
|
-
job_failed=True,
|
187
|
-
job_failed_msg="Youtube Live videos are not supported. Please wait until the live broadcast ends.")
|
188
|
-
)
|
189
|
-
break
|
190
|
-
except YotubeAgeRestrictedError as e:
|
191
|
-
logging.error("Youtube Age Restricted error")
|
192
|
-
logging.exception(e)
|
193
|
-
self.uploader.queue_task(job.to_upload_job(
|
194
|
-
job_failed=True,
|
195
|
-
job_failed_msg="Youtube Age Restricted error. Check your bot Youtube account settings.")
|
196
|
-
)
|
197
|
-
self.send_message_to_admin(
|
198
|
-
f"Task {job.job_id} failed. URL: '{job.url}'. Reason: 'YotubeAgeRestrictedError'."
|
199
|
-
)
|
200
|
-
break
|
201
|
-
except AllAccountsFailed as e:
|
202
|
-
logging.error("All accounts failed!")
|
203
|
-
logging.exception(e)
|
204
|
-
self.uploader.queue_task(job.to_upload_job(
|
205
|
-
job_failed=True,
|
206
|
-
job_failed_msg="All bot accounts failed to download content. Bot administrator noticed about the issue.")
|
207
|
-
)
|
208
|
-
self.send_message_to_admin(
|
209
|
-
f"Task {job.job_id} failed. URL: '{job.url}'. Reason: 'AllAccountsFailed'."
|
210
|
-
)
|
211
|
-
break
|
212
|
-
except (UnknownError, Exception) as e:
|
213
|
-
logging.warning("UnknownError occurred!")
|
214
|
-
logging.exception(e)
|
215
|
-
exception_msg = ""
|
216
|
-
if hasattr(e, "message"):
|
217
|
-
exception_msg = e.message
|
218
|
-
else:
|
219
|
-
exception_msg = str(e)
|
220
|
-
if "geoblock_required" in exception_msg:
|
221
|
-
if job.geoblock_error_count > self.acc_selector.count_service_accounts(job.job_origin):
|
222
|
-
self.uploader.queue_task(job.to_upload_job(
|
223
|
-
job_failed=True,
|
224
|
-
job_failed_msg="This content does not accessible for all yout bot accounts. Seems like author blocked certain regions.")
|
225
|
-
)
|
226
|
-
self.send_message_to_admin(
|
227
|
-
f"Task {job.job_id} failed. URL: '{job.url}'. Reason: 'geoblock_required'."
|
228
|
-
)
|
229
|
-
break
|
230
|
-
job.geoblock_error_count += 1
|
231
|
-
logging.info("Trying to switch account")
|
232
|
-
self.acc_selector.next()
|
233
|
-
self.job_queue.put(job)
|
234
|
-
break
|
235
|
-
self.uploader.queue_task(job.to_upload_job(
|
236
|
-
job_failed=True,
|
237
|
-
job_failed_msg="WOW, unknown error occured! Please [create issue](https://github.com/sb0y/warp_beacon/issues) with service logs.")
|
238
|
-
)
|
239
|
-
self.send_message_to_admin(
|
240
|
-
f"Task {job.job_id} failed. URL: '{job.url}'. Reason: 'UnknownError'."
|
241
|
-
f"Exception:\n```\n{exception_msg}\n```"
|
242
|
-
)
|
243
|
-
break
|
244
|
-
|
245
|
-
if items:
|
246
|
-
for item in items:
|
247
|
-
media_info = {"filesize": 0}
|
248
|
-
if item["media_type"] == JobType.VIDEO:
|
249
|
-
media_info = self.get_media_info(item["local_media_path"], item.get("media_info", {}), JobType.VIDEO)
|
250
|
-
logging.info("Final media info: %s", media_info)
|
251
|
-
if media_info["filesize"] > 2e+9:
|
252
|
-
logging.info("Filesize is '%d' MiB", round(media_info["filesize"] / 1024 / 1024))
|
253
|
-
logging.info("Detected big file. Starting compressing with ffmpeg ...")
|
254
|
-
self.uploader.queue_task(job.to_upload_job(
|
255
|
-
job_warning=True,
|
256
|
-
job_warning_msg="Downloaded file size is bigger than Telegram limits! Performing video compression. This may take a while.")
|
257
|
-
)
|
258
|
-
ffmpeg = VideoCompress(file_path=item["local_media_path"])
|
259
|
-
new_filepath = ffmpeg.generate_filepath(base_filepath=item["local_media_path"])
|
260
|
-
if ffmpeg.compress_to(new_filepath, target_size=2000 * 1000):
|
261
|
-
logging.info("Successfully compressed file '%s'", new_filepath)
|
262
|
-
os.unlink(item["local_media_path"])
|
263
|
-
item["local_media_path"] = new_filepath
|
264
|
-
item["local_compressed_media_path"] = new_filepath
|
265
|
-
media_info["filesize"] = VideoInfo.get_filesize(new_filepath)
|
266
|
-
logging.info("New file size of compressed file is '%.3f'", media_info["filesize"])
|
267
|
-
if not media_info["has_sound"]:
|
268
|
-
item["media_type"] = JobType.ANIMATION
|
269
|
-
elif item["media_type"] == JobType.AUDIO:
|
270
|
-
media_info = self.get_media_info(item["local_media_path"], item.get("media_info", {}), JobType.AUDIO)
|
271
|
-
media_info["performer"] = item.get("performer", None)
|
272
|
-
media_info["thumb"] = item.get("thumb", None)
|
273
|
-
logging.info("Final media info: %s", media_info)
|
274
|
-
elif item["media_type"] == JobType.COLLECTION:
|
275
|
-
for chunk in item["items"]:
|
276
|
-
for v in chunk:
|
277
|
-
if v["media_type"] == JobType.VIDEO:
|
278
|
-
col_media_info = self.get_media_info(v["local_media_path"], v["media_info"])
|
279
|
-
media_info["filesize"] += int(col_media_info.get("filesize", 0))
|
280
|
-
v["media_info"] = col_media_info
|
281
|
-
if not v["media_info"]["has_sound"]:
|
282
|
-
silencer = Silencer(v["local_media_path"])
|
283
|
-
silent_video_path = silencer.add_silent_audio()
|
284
|
-
os.unlink(v["local_media_path"])
|
285
|
-
v["local_media_path"] = silent_video_path
|
286
|
-
v["media_info"].update(silencer.get_finfo())
|
287
|
-
v["media_info"]["has_sound"] = True
|
288
|
-
|
289
|
-
job_args = {"media_type": item["media_type"], "media_info": media_info}
|
290
|
-
if item["media_type"] == JobType.COLLECTION:
|
291
|
-
job_args["media_collection"] = item["items"]
|
292
|
-
if item.get("save_items", None) is not None:
|
293
|
-
job_args["save_items"] = item.get("save_items", False)
|
294
|
-
else:
|
295
|
-
job_args["local_media_path"] = item["local_media_path"]
|
296
|
-
if item.get("local_compressed_media_path", None):
|
297
|
-
job_args["local_media_path"] = item.get("local_compressed_media_path", None)
|
298
|
-
|
299
|
-
job_args["canonical_name"] = item.get("canonical_name", "")
|
300
|
-
|
301
|
-
logging.debug("local_media_path: '%s'", job_args.get("local_media_path", ""))
|
302
|
-
logging.debug("media_collection: '%s'", str(job_args.get("media_collection", {})))
|
303
|
-
#logging.info(job_args)
|
304
|
-
upload_job = job.to_upload_job(**job_args)
|
305
|
-
if upload_job.is_empty():
|
306
|
-
logging.info("Upload job is empty. Nothing to do here!")
|
307
|
-
self.uploader.queue_task(job.to_upload_job(
|
308
|
-
job_failed=True,
|
309
|
-
job_failed_msg="Seems like this link doesn't contains any media.")
|
310
|
-
)
|
311
|
-
else:
|
312
|
-
self.uploader.queue_task(upload_job)
|
313
|
-
else:
|
314
|
-
logging.info("Job already in work in parallel worker. Redirecting job to upload worker.")
|
315
|
-
self.uploader.queue_task(job.to_upload_job())
|
316
|
-
except Exception as e:
|
317
|
-
logging.error("Error inside download worker!")
|
318
|
-
logging.exception(e)
|
319
|
-
self.notify_task_failed(job)
|
320
|
-
except Empty:
|
321
|
-
pass
|
322
|
-
except Exception as e:
|
323
|
-
logging.error("Exception occurred inside worker!")
|
324
|
-
logging.exception(e)
|
325
|
-
|
326
|
-
logging.info("Process done")
|
327
|
-
|
328
|
-
def stop_all(self) -> None:
|
329
|
-
self.allow_loop.value = 0
|
330
|
-
self.scheduler.stop()
|
331
|
-
for proc in self.workers:
|
332
|
-
if proc.is_alive():
|
333
|
-
logging.info("stopping process #%d", proc.pid)
|
334
|
-
self.job_queue.put_nowait(self.__JOE_BIDEN_WAKEUP)
|
335
|
-
proc.join()
|
336
|
-
#proc.terminate()
|
337
|
-
logging.info("process #%d stopped", proc.pid)
|
338
|
-
self.workers.clear()
|
339
|
-
|
340
|
-
def queue_task(self, job: DownloadJob) -> str:
|
341
|
-
self.job_queue.put_nowait(job)
|
342
|
-
return str(job.job_id)
|
343
|
-
|
344
|
-
def notify_task_failed(self, job: DownloadJob) -> None:
|
345
|
-
self.uploader.queue_task(job.to_upload_job(job_failed=True))
|
346
|
-
|
347
|
-
def send_message_to_admin(self, text: str, yt_auth: bool = False) -> None:
|
348
|
-
self.uploader.queue_task(UploadJob.build(
|
349
|
-
is_message_to_admin=True,
|
350
|
-
message_text=text,
|
351
|
-
yt_auth=yt_auth
|
352
|
-
))
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|