warp-beacon 2.1.8__tar.gz → 2.1.10__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. {warp_beacon-2.1.8/warp_beacon.egg-info → warp_beacon-2.1.10}/PKG-INFO +1 -1
  2. warp_beacon-2.1.10/warp_beacon/__version__.py +2 -0
  3. {warp_beacon-2.1.8 → warp_beacon-2.1.10}/warp_beacon/scheduler/scheduler.py +3 -1
  4. warp_beacon-2.1.10/warp_beacon/scraper/__init__.py +354 -0
  5. {warp_beacon-2.1.8 → warp_beacon-2.1.10}/warp_beacon/telegram/bot.py +1 -1
  6. {warp_beacon-2.1.8 → warp_beacon-2.1.10/warp_beacon.egg-info}/PKG-INFO +1 -1
  7. warp_beacon-2.1.8/warp_beacon/__version__.py +0 -2
  8. warp_beacon-2.1.8/warp_beacon/scraper/__init__.py +0 -352
  9. {warp_beacon-2.1.8 → warp_beacon-2.1.10}/LICENSE +0 -0
  10. {warp_beacon-2.1.8 → warp_beacon-2.1.10}/MANIFEST.in +0 -0
  11. {warp_beacon-2.1.8 → warp_beacon-2.1.10}/README.md +0 -0
  12. {warp_beacon-2.1.8 → warp_beacon-2.1.10}/assets/placeholder.gif +0 -0
  13. {warp_beacon-2.1.8 → warp_beacon-2.1.10}/etc/.gitignore +0 -0
  14. {warp_beacon-2.1.8 → warp_beacon-2.1.10}/etc/accounts.json +0 -0
  15. {warp_beacon-2.1.8 → warp_beacon-2.1.10}/etc/warp_beacon.conf +0 -0
  16. {warp_beacon-2.1.8 → warp_beacon-2.1.10}/etc/warp_beacon.service +0 -0
  17. {warp_beacon-2.1.8 → warp_beacon-2.1.10}/pyproject.toml +0 -0
  18. {warp_beacon-2.1.8 → warp_beacon-2.1.10}/setup.cfg +0 -0
  19. {warp_beacon-2.1.8 → warp_beacon-2.1.10}/setup.py +0 -0
  20. {warp_beacon-2.1.8 → warp_beacon-2.1.10}/warp_beacon/__init__.py +0 -0
  21. {warp_beacon-2.1.8 → warp_beacon-2.1.10}/warp_beacon/compress/__init__.py +0 -0
  22. {warp_beacon-2.1.8 → warp_beacon-2.1.10}/warp_beacon/compress/video.py +0 -0
  23. {warp_beacon-2.1.8 → warp_beacon-2.1.10}/warp_beacon/jobs/__init__.py +0 -0
  24. {warp_beacon-2.1.8 → warp_beacon-2.1.10}/warp_beacon/jobs/abstract.py +0 -0
  25. {warp_beacon-2.1.8 → warp_beacon-2.1.10}/warp_beacon/jobs/download_job.py +0 -0
  26. {warp_beacon-2.1.8 → warp_beacon-2.1.10}/warp_beacon/jobs/types.py +0 -0
  27. {warp_beacon-2.1.8 → warp_beacon-2.1.10}/warp_beacon/jobs/upload_job.py +0 -0
  28. {warp_beacon-2.1.8 → warp_beacon-2.1.10}/warp_beacon/mediainfo/__init__.py +0 -0
  29. {warp_beacon-2.1.8 → warp_beacon-2.1.10}/warp_beacon/mediainfo/abstract.py +0 -0
  30. {warp_beacon-2.1.8 → warp_beacon-2.1.10}/warp_beacon/mediainfo/audio.py +0 -0
  31. {warp_beacon-2.1.8 → warp_beacon-2.1.10}/warp_beacon/mediainfo/silencer.py +0 -0
  32. {warp_beacon-2.1.8 → warp_beacon-2.1.10}/warp_beacon/mediainfo/video.py +0 -0
  33. {warp_beacon-2.1.8 → warp_beacon-2.1.10}/warp_beacon/scheduler/__init__.py +0 -0
  34. {warp_beacon-2.1.8 → warp_beacon-2.1.10}/warp_beacon/scraper/abstract.py +0 -0
  35. {warp_beacon-2.1.8 → warp_beacon-2.1.10}/warp_beacon/scraper/account_selector.py +0 -0
  36. {warp_beacon-2.1.8 → warp_beacon-2.1.10}/warp_beacon/scraper/exceptions.py +0 -0
  37. {warp_beacon-2.1.8 → warp_beacon-2.1.10}/warp_beacon/scraper/instagram/__init__.py +0 -0
  38. {warp_beacon-2.1.8 → warp_beacon-2.1.10}/warp_beacon/scraper/instagram/instagram.py +0 -0
  39. {warp_beacon-2.1.8 → warp_beacon-2.1.10}/warp_beacon/scraper/youtube/__init__.py +0 -0
  40. {warp_beacon-2.1.8 → warp_beacon-2.1.10}/warp_beacon/scraper/youtube/abstract.py +0 -0
  41. {warp_beacon-2.1.8 → warp_beacon-2.1.10}/warp_beacon/scraper/youtube/music.py +0 -0
  42. {warp_beacon-2.1.8 → warp_beacon-2.1.10}/warp_beacon/scraper/youtube/shorts.py +0 -0
  43. {warp_beacon-2.1.8 → warp_beacon-2.1.10}/warp_beacon/scraper/youtube/youtube.py +0 -0
  44. {warp_beacon-2.1.8 → warp_beacon-2.1.10}/warp_beacon/storage/__init__.py +0 -0
  45. {warp_beacon-2.1.8 → warp_beacon-2.1.10}/warp_beacon/telegram/__init__.py +0 -0
  46. {warp_beacon-2.1.8 → warp_beacon-2.1.10}/warp_beacon/telegram/handlers.py +0 -0
  47. {warp_beacon-2.1.8 → warp_beacon-2.1.10}/warp_beacon/telegram/placeholder_message.py +0 -0
  48. {warp_beacon-2.1.8 → warp_beacon-2.1.10}/warp_beacon/telegram/utils.py +0 -0
  49. {warp_beacon-2.1.8 → warp_beacon-2.1.10}/warp_beacon/uploader/__init__.py +0 -0
  50. {warp_beacon-2.1.8 → warp_beacon-2.1.10}/warp_beacon/warp_beacon.py +0 -0
  51. {warp_beacon-2.1.8 → warp_beacon-2.1.10}/warp_beacon.egg-info/SOURCES.txt +0 -0
  52. {warp_beacon-2.1.8 → warp_beacon-2.1.10}/warp_beacon.egg-info/dependency_links.txt +0 -0
  53. {warp_beacon-2.1.8 → warp_beacon-2.1.10}/warp_beacon.egg-info/entry_points.txt +0 -0
  54. {warp_beacon-2.1.8 → warp_beacon-2.1.10}/warp_beacon.egg-info/requires.txt +0 -0
  55. {warp_beacon-2.1.8 → warp_beacon-2.1.10}/warp_beacon.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: warp_beacon
3
- Version: 2.1.8
3
+ Version: 2.1.10
4
4
  Summary: Telegram bot for expanding external media links
5
5
  Home-page: https://github.com/sb0y/warp_beacon
6
6
  Author: Andrey Bagrintsev
@@ -0,0 +1,2 @@
1
+ __version__ = "2.1.10"
2
+
@@ -1,5 +1,6 @@
1
1
  import threading
2
2
 
3
+ from warp_beacon.jobs import Origin
3
4
  import warp_beacon
4
5
 
5
6
  import logging
@@ -42,7 +43,8 @@ class IGScheduler(object):
42
43
  def validate_ig_session(self) -> bool:
43
44
  try:
44
45
  self.downloader.queue_task(warp_beacon.jobs.download_job.DownloadJob.build(
45
- session_validation=True
46
+ session_validation=True,
47
+ job_origin=Origin.INSTAGRAM
46
48
  ))
47
49
  except Exception as e:
48
50
  logging.warning("An error occurred while validating instagram session!")
@@ -0,0 +1,354 @@
1
+ import os
2
+
3
+ from typing import Optional
4
+ import multiprocessing
5
+ from queue import Empty
6
+
7
+ from warp_beacon.scraper.exceptions import NotFound, UnknownError, TimeOut, Unavailable, FileTooBig, YotubeLiveError, YotubeAgeRestrictedError, IGRateLimitOccurred, CaptchaIssue, AllAccountsFailed
8
+ from warp_beacon.mediainfo.video import VideoInfo
9
+ from warp_beacon.mediainfo.audio import AudioInfo
10
+ from warp_beacon.mediainfo.silencer import Silencer
11
+ from warp_beacon.compress.video import VideoCompress
12
+ from warp_beacon.uploader import AsyncUploader
13
+ from warp_beacon.jobs import Origin
14
+ from warp_beacon.jobs.download_job import DownloadJob
15
+ from warp_beacon.jobs.upload_job import UploadJob
16
+ from warp_beacon.jobs.types import JobType
17
+ from warp_beacon.scraper.account_selector import AccountSelector
18
+ from warp_beacon.scheduler.scheduler import IGScheduler
19
+
20
+ import logging
21
+
22
+ ACC_FILE = os.environ.get("SERVICE_ACCOUNTS_FILE", default="/var/warp_beacon/accounts.json")
23
+
24
+ class AsyncDownloader(object):
25
+ __JOE_BIDEN_WAKEUP = None
26
+ workers = []
27
+ allow_loop = None
28
+ job_queue = multiprocessing.Queue()
29
+ uploader = None
30
+ workers_count = 0
31
+ auth_event = multiprocessing.Event()
32
+ acc_selector = None
33
+ scheduler = None
34
+
35
+ def __init__(self, uploader: AsyncUploader, workers_count: int) -> None:
36
+ self.allow_loop = multiprocessing.Value('i', 1)
37
+ self.uploader = uploader
38
+ self.workers_count = workers_count
39
+ self.acc_selector = AccountSelector(ACC_FILE)
40
+ self.scheduler = IGScheduler(self)
41
+ self.scheduler.start()
42
+
43
+ def __del__(self) -> None:
44
+ self.stop_all()
45
+
46
+ def start(self) -> None:
47
+ for _ in range(self.workers_count):
48
+ proc = multiprocessing.Process(target=self.do_work)
49
+ self.workers.append(proc)
50
+ proc.start()
51
+
52
+ def get_media_info(self, path: str, fr_media_info: dict={}, media_type: JobType = JobType.VIDEO) -> Optional[dict]:
53
+ media_info = None
54
+ try:
55
+ if path:
56
+ if media_type == JobType.VIDEO:
57
+ video_info = VideoInfo(path)
58
+ media_info = video_info.get_finfo(tuple(fr_media_info.keys()))
59
+ media_info.update(fr_media_info)
60
+ if not media_info.get("thumb", None):
61
+ media_info["thumb"] = video_info.generate_thumbnail()
62
+ media_info["has_sound"] = video_info.has_sound()
63
+ elif media_type == JobType.AUDIO:
64
+ audio_info = AudioInfo(path)
65
+ media_info = audio_info.get_finfo(tuple(fr_media_info.keys()))
66
+ except Exception as e:
67
+ logging.error("Failed to process media info!")
68
+ logging.exception(e)
69
+
70
+ return media_info
71
+
72
+ def try_next_account(self, job: DownloadJob, report_error: str = None) -> None:
73
+ logging.warning("Switching account!")
74
+ if job.account_switches > self.acc_selector.count_service_accounts(job.job_origin):
75
+ raise AllAccountsFailed("All config accounts failed!")
76
+ if report_error:
77
+ self.acc_selector.bump_acc_fail("rate_limits")
78
+ self.acc_selector.next()
79
+ cur_acc = self.acc_selector.get_current()
80
+ logging.info("Current account: '%s'", str(cur_acc))
81
+ job.account_switches += 1
82
+
83
+ def do_work(self) -> None:
84
+ logging.info("download worker started")
85
+ while self.allow_loop.value == 1:
86
+ try:
87
+ job = None
88
+ try:
89
+ job = self.job_queue.get()
90
+ if job is self.__JOE_BIDEN_WAKEUP:
91
+ continue
92
+ actor = None
93
+ try:
94
+ items = []
95
+ if job.job_origin is Origin.UNKNOWN:
96
+ logging.warning("Unknown task origin! Skipping.")
97
+ continue
98
+ if not job.in_process:
99
+ actor = None
100
+ self.acc_selector.set_module(job.job_origin)
101
+ if job.job_origin is Origin.INSTAGRAM:
102
+ from warp_beacon.scraper.instagram.instagram import InstagramScraper
103
+ actor = InstagramScraper(self.acc_selector.get_current())
104
+ elif job.job_origin is Origin.YT_SHORTS:
105
+ from warp_beacon.scraper.youtube.shorts import YoutubeShortsScraper
106
+ actor = YoutubeShortsScraper(self.acc_selector.get_current())
107
+ elif job.job_origin is Origin.YT_MUSIC:
108
+ from warp_beacon.scraper.youtube.music import YoutubeMusicScraper
109
+ actor = YoutubeMusicScraper(self.acc_selector.get_current())
110
+ elif job.job_origin is Origin.YOUTUBE:
111
+ from warp_beacon.scraper.youtube.youtube import YoutubeScraper
112
+ actor = YoutubeScraper(self.acc_selector.get_current())
113
+ actor.send_message_to_admin_func = self.send_message_to_admin
114
+ actor.auth_event = self.auth_event
115
+ while True:
116
+ try:
117
+ if job.session_validation:
118
+ logging.info("Validating '%s' session ...", job.job_origin.value)
119
+ actor.validate_session()
120
+ logging.info("done")
121
+ else:
122
+ logging.info("Downloading URL '%s'", job.url)
123
+ items = actor.download(job.url)
124
+ break
125
+ except NotFound as e:
126
+ logging.warning("Not found error occurred!")
127
+ logging.exception(e)
128
+ self.uploader.queue_task(job.to_upload_job(
129
+ job_failed=True,
130
+ job_failed_msg="Unable to access to media under this URL. Seems like the media is private.")
131
+ )
132
+ self.send_message_to_admin(
133
+ f"Task {job.job_id} failed. URL: '{job.url}'. Reason: 'NotFound'."
134
+ )
135
+ break
136
+ except Unavailable as e:
137
+ logging.warning("Not found or unavailable error occurred!")
138
+ logging.exception(e)
139
+ if job.unvailable_error_count > self.acc_selector.count_service_accounts(job.job_origin):
140
+ self.uploader.queue_task(job.to_upload_job(
141
+ job_failed=True,
142
+ job_failed_msg="Video is unvailable for all your service accounts.")
143
+ )
144
+ break
145
+ job.unvailable_error_count += 1
146
+ logging.info("Trying to switch account")
147
+ self.acc_selector.next()
148
+ self.job_queue.put(job)
149
+ break
150
+ except TimeOut as e:
151
+ logging.warning("Timeout error occurred!")
152
+ logging.exception(e)
153
+ self.uploader.queue_task(job.to_upload_job(
154
+ job_failed=True,
155
+ job_failed_msg="Failed to download content due timeout error. Please check you Internet connection, retry amount or request timeout bot configuration settings.")
156
+ )
157
+ self.send_message_to_admin(
158
+ f"Task {job.job_id} failed. URL: '{job.url}'. Reason: 'TimeOut'."
159
+ )
160
+ break
161
+ except FileTooBig as e:
162
+ logging.warning("Telegram limits exceeded :(")
163
+ logging.exception(e)
164
+ self.uploader.queue_task(job.to_upload_job(
165
+ job_failed=True,
166
+ job_failed_msg="Unfortunately this file has exceeded the Telegram limits. A file cannot be larger than 2 gigabytes.")
167
+ )
168
+ self.send_message_to_admin(
169
+ f"Task {job.job_id} failed. URL: '{job.url}'. Reason: 'FileTooBig'."
170
+ )
171
+ break
172
+ except IGRateLimitOccurred as e:
173
+ logging.warning("IG ratelimit occurred :(")
174
+ logging.exception(e)
175
+ self.try_next_account(job, report_error="rate_limits")
176
+ self.job_queue.put(job)
177
+ break
178
+ except CaptchaIssue as e:
179
+ logging.warning("Challange occurred!")
180
+ logging.exception(e)
181
+ self.try_next_account(job)
182
+ self.job_queue.put(job)
183
+ break
184
+ except YotubeLiveError as e:
185
+ logging.warning("Youtube Live videos are not supported. Skipping.")
186
+ logging.exception(e)
187
+ self.uploader.queue_task(job.to_upload_job(
188
+ job_failed=True,
189
+ job_failed_msg="Youtube Live videos are not supported. Please wait until the live broadcast ends.")
190
+ )
191
+ break
192
+ except YotubeAgeRestrictedError as e:
193
+ logging.error("Youtube Age Restricted error")
194
+ logging.exception(e)
195
+ self.uploader.queue_task(job.to_upload_job(
196
+ job_failed=True,
197
+ job_failed_msg="Youtube Age Restricted error. Check your bot Youtube account settings.")
198
+ )
199
+ self.send_message_to_admin(
200
+ f"Task {job.job_id} failed. URL: '{job.url}'. Reason: 'YotubeAgeRestrictedError'."
201
+ )
202
+ break
203
+ except AllAccountsFailed as e:
204
+ logging.error("All accounts failed!")
205
+ logging.exception(e)
206
+ self.uploader.queue_task(job.to_upload_job(
207
+ job_failed=True,
208
+ job_failed_msg="All bot accounts failed to download content. Bot administrator noticed about the issue.")
209
+ )
210
+ self.send_message_to_admin(
211
+ f"Task {job.job_id} failed. URL: '{job.url}'. Reason: 'AllAccountsFailed'."
212
+ )
213
+ break
214
+ except (UnknownError, Exception) as e:
215
+ logging.warning("UnknownError occurred!")
216
+ logging.exception(e)
217
+ exception_msg = ""
218
+ if hasattr(e, "message"):
219
+ exception_msg = e.message
220
+ else:
221
+ exception_msg = str(e)
222
+ if "geoblock_required" in exception_msg:
223
+ if job.geoblock_error_count > self.acc_selector.count_service_accounts(job.job_origin):
224
+ self.uploader.queue_task(job.to_upload_job(
225
+ job_failed=True,
226
+ job_failed_msg="This content does not accessible for all yout bot accounts. Seems like author blocked certain regions.")
227
+ )
228
+ self.send_message_to_admin(
229
+ f"Task {job.job_id} failed. URL: '{job.url}'. Reason: 'geoblock_required'."
230
+ )
231
+ break
232
+ job.geoblock_error_count += 1
233
+ logging.info("Trying to switch account")
234
+ self.acc_selector.next()
235
+ self.job_queue.put(job)
236
+ break
237
+ self.uploader.queue_task(job.to_upload_job(
238
+ job_failed=True,
239
+ job_failed_msg="WOW, unknown error occured! Please [create issue](https://github.com/sb0y/warp_beacon/issues) with service logs.")
240
+ )
241
+ self.send_message_to_admin(
242
+ f"Task {job.job_id} failed. URL: '{job.url}'. Reason: 'UnknownError'."
243
+ f"Exception:\n```\n{exception_msg}\n```"
244
+ )
245
+ break
246
+
247
+ if items:
248
+ for item in items:
249
+ media_info = {"filesize": 0}
250
+ if item["media_type"] == JobType.VIDEO:
251
+ media_info = self.get_media_info(item["local_media_path"], item.get("media_info", {}), JobType.VIDEO)
252
+ logging.info("Final media info: %s", media_info)
253
+ if media_info["filesize"] > 2e+9:
254
+ logging.info("Filesize is '%d' MiB", round(media_info["filesize"] / 1024 / 1024))
255
+ logging.info("Detected big file. Starting compressing with ffmpeg ...")
256
+ self.uploader.queue_task(job.to_upload_job(
257
+ job_warning=True,
258
+ job_warning_msg="Downloaded file size is bigger than Telegram limits! Performing video compression. This may take a while.")
259
+ )
260
+ ffmpeg = VideoCompress(file_path=item["local_media_path"])
261
+ new_filepath = ffmpeg.generate_filepath(base_filepath=item["local_media_path"])
262
+ if ffmpeg.compress_to(new_filepath, target_size=2000 * 1000):
263
+ logging.info("Successfully compressed file '%s'", new_filepath)
264
+ os.unlink(item["local_media_path"])
265
+ item["local_media_path"] = new_filepath
266
+ item["local_compressed_media_path"] = new_filepath
267
+ media_info["filesize"] = VideoInfo.get_filesize(new_filepath)
268
+ logging.info("New file size of compressed file is '%.3f'", media_info["filesize"])
269
+ if not media_info["has_sound"]:
270
+ item["media_type"] = JobType.ANIMATION
271
+ elif item["media_type"] == JobType.AUDIO:
272
+ media_info = self.get_media_info(item["local_media_path"], item.get("media_info", {}), JobType.AUDIO)
273
+ media_info["performer"] = item.get("performer", None)
274
+ media_info["thumb"] = item.get("thumb", None)
275
+ logging.info("Final media info: %s", media_info)
276
+ elif item["media_type"] == JobType.COLLECTION:
277
+ for chunk in item["items"]:
278
+ for v in chunk:
279
+ if v["media_type"] == JobType.VIDEO:
280
+ col_media_info = self.get_media_info(v["local_media_path"], v["media_info"])
281
+ media_info["filesize"] += int(col_media_info.get("filesize", 0))
282
+ v["media_info"] = col_media_info
283
+ if not v["media_info"]["has_sound"]:
284
+ silencer = Silencer(v["local_media_path"])
285
+ silent_video_path = silencer.add_silent_audio()
286
+ os.unlink(v["local_media_path"])
287
+ v["local_media_path"] = silent_video_path
288
+ v["media_info"].update(silencer.get_finfo())
289
+ v["media_info"]["has_sound"] = True
290
+
291
+ job_args = {"media_type": item["media_type"], "media_info": media_info}
292
+ if item["media_type"] == JobType.COLLECTION:
293
+ job_args["media_collection"] = item["items"]
294
+ if item.get("save_items", None) is not None:
295
+ job_args["save_items"] = item.get("save_items", False)
296
+ else:
297
+ job_args["local_media_path"] = item["local_media_path"]
298
+ if item.get("local_compressed_media_path", None):
299
+ job_args["local_media_path"] = item.get("local_compressed_media_path", None)
300
+
301
+ job_args["canonical_name"] = item.get("canonical_name", "")
302
+
303
+ logging.debug("local_media_path: '%s'", job_args.get("local_media_path", ""))
304
+ logging.debug("media_collection: '%s'", str(job_args.get("media_collection", {})))
305
+ #logging.info(job_args)
306
+ upload_job = job.to_upload_job(**job_args)
307
+ if upload_job.is_empty():
308
+ logging.info("Upload job is empty. Nothing to do here!")
309
+ self.uploader.queue_task(job.to_upload_job(
310
+ job_failed=True,
311
+ job_failed_msg="Seems like this link doesn't contains any media.")
312
+ )
313
+ else:
314
+ self.uploader.queue_task(upload_job)
315
+ else:
316
+ logging.info("Job already in work in parallel worker. Redirecting job to upload worker.")
317
+ self.uploader.queue_task(job.to_upload_job())
318
+ except Exception as e:
319
+ logging.error("Error inside download worker!")
320
+ logging.exception(e)
321
+ self.notify_task_failed(job)
322
+ except Empty:
323
+ pass
324
+ except Exception as e:
325
+ logging.error("Exception occurred inside worker!")
326
+ logging.exception(e)
327
+
328
+ logging.info("Process done")
329
+
330
+ def stop_all(self) -> None:
331
+ self.allow_loop.value = 0
332
+ self.scheduler.stop()
333
+ for proc in self.workers:
334
+ if proc.is_alive():
335
+ logging.info("stopping process #%d", proc.pid)
336
+ self.job_queue.put_nowait(self.__JOE_BIDEN_WAKEUP)
337
+ proc.join()
338
+ #proc.terminate()
339
+ logging.info("process #%d stopped", proc.pid)
340
+ self.workers.clear()
341
+
342
+ def queue_task(self, job: DownloadJob) -> str:
343
+ self.job_queue.put_nowait(job)
344
+ return str(job.job_id)
345
+
346
+ def notify_task_failed(self, job: DownloadJob) -> None:
347
+ self.uploader.queue_task(job.to_upload_job(job_failed=True))
348
+
349
+ def send_message_to_admin(self, text: str, yt_auth: bool = False) -> None:
350
+ self.uploader.queue_task(UploadJob.build(
351
+ is_message_to_admin=True,
352
+ message_text=text,
353
+ yt_auth=yt_auth
354
+ ))
@@ -36,7 +36,7 @@ class Bot(object):
36
36
  format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", level=logging.INFO
37
37
  )
38
38
 
39
- logging.info(f"Starting Warp Beacon version '{__version__}' ...")
39
+ logging.info("Starting Warp Beacon version '%s' ...", __version__)
40
40
 
41
41
  workers_amount = min(32, os.cpu_count() + 4)
42
42
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: warp_beacon
3
- Version: 2.1.8
3
+ Version: 2.1.10
4
4
  Summary: Telegram bot for expanding external media links
5
5
  Home-page: https://github.com/sb0y/warp_beacon
6
6
  Author: Andrey Bagrintsev
@@ -1,2 +0,0 @@
1
- __version__ = "2.1.8"
2
-
@@ -1,352 +0,0 @@
1
- import os
2
-
3
- from typing import Optional
4
- import multiprocessing
5
- from queue import Empty
6
-
7
- from warp_beacon.scraper.exceptions import NotFound, UnknownError, TimeOut, Unavailable, FileTooBig, YotubeLiveError, YotubeAgeRestrictedError, IGRateLimitOccurred, CaptchaIssue, AllAccountsFailed
8
- from warp_beacon.mediainfo.video import VideoInfo
9
- from warp_beacon.mediainfo.audio import AudioInfo
10
- from warp_beacon.mediainfo.silencer import Silencer
11
- from warp_beacon.compress.video import VideoCompress
12
- from warp_beacon.uploader import AsyncUploader
13
- from warp_beacon.jobs import Origin
14
- from warp_beacon.jobs.download_job import DownloadJob
15
- from warp_beacon.jobs.upload_job import UploadJob
16
- from warp_beacon.jobs.types import JobType
17
- from warp_beacon.scraper.account_selector import AccountSelector
18
- from warp_beacon.scheduler.scheduler import IGScheduler
19
-
20
- import logging
21
-
22
- ACC_FILE = os.environ.get("SERVICE_ACCOUNTS_FILE", default="/var/warp_beacon/accounts.json")
23
-
24
- class AsyncDownloader(object):
25
- __JOE_BIDEN_WAKEUP = None
26
- workers = []
27
- allow_loop = None
28
- job_queue = multiprocessing.Queue()
29
- uploader = None
30
- workers_count = 0
31
- auth_event = multiprocessing.Event()
32
- acc_selector = None
33
- scheduler = None
34
-
35
- def __init__(self, uploader: AsyncUploader, workers_count: int) -> None:
36
- self.allow_loop = multiprocessing.Value('i', 1)
37
- self.uploader = uploader
38
- self.workers_count = workers_count
39
- self.acc_selector = AccountSelector(ACC_FILE)
40
- self.scheduler = IGScheduler(self)
41
- self.scheduler.start()
42
-
43
- def __del__(self) -> None:
44
- self.stop_all()
45
-
46
- def start(self) -> None:
47
- for _ in range(self.workers_count):
48
- proc = multiprocessing.Process(target=self.do_work)
49
- self.workers.append(proc)
50
- proc.start()
51
-
52
- def get_media_info(self, path: str, fr_media_info: dict={}, media_type: JobType = JobType.VIDEO) -> Optional[dict]:
53
- media_info = None
54
- try:
55
- if path:
56
- if media_type == JobType.VIDEO:
57
- video_info = VideoInfo(path)
58
- media_info = video_info.get_finfo(tuple(fr_media_info.keys()))
59
- media_info.update(fr_media_info)
60
- if not media_info.get("thumb", None):
61
- media_info["thumb"] = video_info.generate_thumbnail()
62
- media_info["has_sound"] = video_info.has_sound()
63
- elif media_type == JobType.AUDIO:
64
- audio_info = AudioInfo(path)
65
- media_info = audio_info.get_finfo(tuple(fr_media_info.keys()))
66
- except Exception as e:
67
- logging.error("Failed to process media info!")
68
- logging.exception(e)
69
-
70
- return media_info
71
-
72
- def try_next_account(self, job: DownloadJob, report_error: str = None) -> None:
73
- logging.warning("Switching account!")
74
- if job.account_switches > self.acc_selector.count_service_accounts(job.job_origin):
75
- raise AllAccountsFailed("All config accounts failed!")
76
- if report_error:
77
- self.acc_selector.bump_acc_fail("rate_limits")
78
- self.acc_selector.next()
79
- cur_acc = self.acc_selector.get_current()
80
- logging.info("Current account: '%s'", str(cur_acc))
81
- job.account_switches += 1
82
-
83
- def do_work(self) -> None:
84
- logging.info("download worker started")
85
- while self.allow_loop.value == 1:
86
- try:
87
- job = None
88
- try:
89
- job = self.job_queue.get()
90
- if job is self.__JOE_BIDEN_WAKEUP:
91
- continue
92
- actor = None
93
- try:
94
- items = []
95
- if job.job_origin is not Origin.UNKNOWN:
96
- if not job.in_process:
97
- actor = None
98
- self.acc_selector.set_module(job.job_origin)
99
- if job.job_origin is Origin.INSTAGRAM:
100
- from warp_beacon.scraper.instagram.instagram import InstagramScraper
101
- actor = InstagramScraper(self.acc_selector.get_current())
102
- elif job.job_origin is Origin.YT_SHORTS:
103
- from warp_beacon.scraper.youtube.shorts import YoutubeShortsScraper
104
- actor = YoutubeShortsScraper(self.acc_selector.get_current())
105
- elif job.job_origin is Origin.YT_MUSIC:
106
- from warp_beacon.scraper.youtube.music import YoutubeMusicScraper
107
- actor = YoutubeMusicScraper(self.acc_selector.get_current())
108
- elif job.job_origin is Origin.YOUTUBE:
109
- from warp_beacon.scraper.youtube.youtube import YoutubeScraper
110
- actor = YoutubeScraper(self.acc_selector.get_current())
111
- actor.send_message_to_admin_func = self.send_message_to_admin
112
- actor.auth_event = self.auth_event
113
- while True:
114
- try:
115
- if job.session_validation:
116
- logging.info("Validating '%s' session ...", job.origin.value)
117
- actor.validate_session()
118
- logging.info("done")
119
- else:
120
- logging.info("Downloading URL '%s'", job.url)
121
- items = actor.download(job.url)
122
- break
123
- except NotFound as e:
124
- logging.warning("Not found error occurred!")
125
- logging.exception(e)
126
- self.uploader.queue_task(job.to_upload_job(
127
- job_failed=True,
128
- job_failed_msg="Unable to access to media under this URL. Seems like the media is private.")
129
- )
130
- self.send_message_to_admin(
131
- f"Task {job.job_id} failed. URL: '{job.url}'. Reason: 'NotFound'."
132
- )
133
- break
134
- except Unavailable as e:
135
- logging.warning("Not found or unavailable error occurred!")
136
- logging.exception(e)
137
- if job.unvailable_error_count > self.acc_selector.count_service_accounts(job.job_origin):
138
- self.uploader.queue_task(job.to_upload_job(
139
- job_failed=True,
140
- job_failed_msg="Video is unvailable for all your service accounts.")
141
- )
142
- break
143
- job.unvailable_error_count += 1
144
- logging.info("Trying to switch account")
145
- self.acc_selector.next()
146
- self.job_queue.put(job)
147
- break
148
- except TimeOut as e:
149
- logging.warning("Timeout error occurred!")
150
- logging.exception(e)
151
- self.uploader.queue_task(job.to_upload_job(
152
- job_failed=True,
153
- job_failed_msg="Failed to download content due timeout error. Please check you Internet connection, retry amount or request timeout bot configuration settings.")
154
- )
155
- self.send_message_to_admin(
156
- f"Task {job.job_id} failed. URL: '{job.url}'. Reason: 'TimeOut'."
157
- )
158
- break
159
- except FileTooBig as e:
160
- logging.warning("Telegram limits exceeded :(")
161
- logging.exception(e)
162
- self.uploader.queue_task(job.to_upload_job(
163
- job_failed=True,
164
- job_failed_msg="Unfortunately this file has exceeded the Telegram limits. A file cannot be larger than 2 gigabytes.")
165
- )
166
- self.send_message_to_admin(
167
- f"Task {job.job_id} failed. URL: '{job.url}'. Reason: 'FileTooBig'."
168
- )
169
- break
170
- except IGRateLimitOccurred as e:
171
- logging.warning("IG ratelimit occurred :(")
172
- logging.exception(e)
173
- self.try_next_account(job, report_error="rate_limits")
174
- self.job_queue.put(job)
175
- break
176
- except CaptchaIssue as e:
177
- logging.warning("Challange occurred!")
178
- logging.exception(e)
179
- self.try_next_account(job)
180
- self.job_queue.put(job)
181
- break
182
- except YotubeLiveError as e:
183
- logging.warning("Youtube Live videos are not supported. Skipping.")
184
- logging.exception(e)
185
- self.uploader.queue_task(job.to_upload_job(
186
- job_failed=True,
187
- job_failed_msg="Youtube Live videos are not supported. Please wait until the live broadcast ends.")
188
- )
189
- break
190
- except YotubeAgeRestrictedError as e:
191
- logging.error("Youtube Age Restricted error")
192
- logging.exception(e)
193
- self.uploader.queue_task(job.to_upload_job(
194
- job_failed=True,
195
- job_failed_msg="Youtube Age Restricted error. Check your bot Youtube account settings.")
196
- )
197
- self.send_message_to_admin(
198
- f"Task {job.job_id} failed. URL: '{job.url}'. Reason: 'YotubeAgeRestrictedError'."
199
- )
200
- break
201
- except AllAccountsFailed as e:
202
- logging.error("All accounts failed!")
203
- logging.exception(e)
204
- self.uploader.queue_task(job.to_upload_job(
205
- job_failed=True,
206
- job_failed_msg="All bot accounts failed to download content. Bot administrator noticed about the issue.")
207
- )
208
- self.send_message_to_admin(
209
- f"Task {job.job_id} failed. URL: '{job.url}'. Reason: 'AllAccountsFailed'."
210
- )
211
- break
212
- except (UnknownError, Exception) as e:
213
- logging.warning("UnknownError occurred!")
214
- logging.exception(e)
215
- exception_msg = ""
216
- if hasattr(e, "message"):
217
- exception_msg = e.message
218
- else:
219
- exception_msg = str(e)
220
- if "geoblock_required" in exception_msg:
221
- if job.geoblock_error_count > self.acc_selector.count_service_accounts(job.job_origin):
222
- self.uploader.queue_task(job.to_upload_job(
223
- job_failed=True,
224
- job_failed_msg="This content does not accessible for all yout bot accounts. Seems like author blocked certain regions.")
225
- )
226
- self.send_message_to_admin(
227
- f"Task {job.job_id} failed. URL: '{job.url}'. Reason: 'geoblock_required'."
228
- )
229
- break
230
- job.geoblock_error_count += 1
231
- logging.info("Trying to switch account")
232
- self.acc_selector.next()
233
- self.job_queue.put(job)
234
- break
235
- self.uploader.queue_task(job.to_upload_job(
236
- job_failed=True,
237
- job_failed_msg="WOW, unknown error occured! Please [create issue](https://github.com/sb0y/warp_beacon/issues) with service logs.")
238
- )
239
- self.send_message_to_admin(
240
- f"Task {job.job_id} failed. URL: '{job.url}'. Reason: 'UnknownError'."
241
- f"Exception:\n```\n{exception_msg}\n```"
242
- )
243
- break
244
-
245
- if items:
246
- for item in items:
247
- media_info = {"filesize": 0}
248
- if item["media_type"] == JobType.VIDEO:
249
- media_info = self.get_media_info(item["local_media_path"], item.get("media_info", {}), JobType.VIDEO)
250
- logging.info("Final media info: %s", media_info)
251
- if media_info["filesize"] > 2e+9:
252
- logging.info("Filesize is '%d' MiB", round(media_info["filesize"] / 1024 / 1024))
253
- logging.info("Detected big file. Starting compressing with ffmpeg ...")
254
- self.uploader.queue_task(job.to_upload_job(
255
- job_warning=True,
256
- job_warning_msg="Downloaded file size is bigger than Telegram limits! Performing video compression. This may take a while.")
257
- )
258
- ffmpeg = VideoCompress(file_path=item["local_media_path"])
259
- new_filepath = ffmpeg.generate_filepath(base_filepath=item["local_media_path"])
260
- if ffmpeg.compress_to(new_filepath, target_size=2000 * 1000):
261
- logging.info("Successfully compressed file '%s'", new_filepath)
262
- os.unlink(item["local_media_path"])
263
- item["local_media_path"] = new_filepath
264
- item["local_compressed_media_path"] = new_filepath
265
- media_info["filesize"] = VideoInfo.get_filesize(new_filepath)
266
- logging.info("New file size of compressed file is '%.3f'", media_info["filesize"])
267
- if not media_info["has_sound"]:
268
- item["media_type"] = JobType.ANIMATION
269
- elif item["media_type"] == JobType.AUDIO:
270
- media_info = self.get_media_info(item["local_media_path"], item.get("media_info", {}), JobType.AUDIO)
271
- media_info["performer"] = item.get("performer", None)
272
- media_info["thumb"] = item.get("thumb", None)
273
- logging.info("Final media info: %s", media_info)
274
- elif item["media_type"] == JobType.COLLECTION:
275
- for chunk in item["items"]:
276
- for v in chunk:
277
- if v["media_type"] == JobType.VIDEO:
278
- col_media_info = self.get_media_info(v["local_media_path"], v["media_info"])
279
- media_info["filesize"] += int(col_media_info.get("filesize", 0))
280
- v["media_info"] = col_media_info
281
- if not v["media_info"]["has_sound"]:
282
- silencer = Silencer(v["local_media_path"])
283
- silent_video_path = silencer.add_silent_audio()
284
- os.unlink(v["local_media_path"])
285
- v["local_media_path"] = silent_video_path
286
- v["media_info"].update(silencer.get_finfo())
287
- v["media_info"]["has_sound"] = True
288
-
289
- job_args = {"media_type": item["media_type"], "media_info": media_info}
290
- if item["media_type"] == JobType.COLLECTION:
291
- job_args["media_collection"] = item["items"]
292
- if item.get("save_items", None) is not None:
293
- job_args["save_items"] = item.get("save_items", False)
294
- else:
295
- job_args["local_media_path"] = item["local_media_path"]
296
- if item.get("local_compressed_media_path", None):
297
- job_args["local_media_path"] = item.get("local_compressed_media_path", None)
298
-
299
- job_args["canonical_name"] = item.get("canonical_name", "")
300
-
301
- logging.debug("local_media_path: '%s'", job_args.get("local_media_path", ""))
302
- logging.debug("media_collection: '%s'", str(job_args.get("media_collection", {})))
303
- #logging.info(job_args)
304
- upload_job = job.to_upload_job(**job_args)
305
- if upload_job.is_empty():
306
- logging.info("Upload job is empty. Nothing to do here!")
307
- self.uploader.queue_task(job.to_upload_job(
308
- job_failed=True,
309
- job_failed_msg="Seems like this link doesn't contains any media.")
310
- )
311
- else:
312
- self.uploader.queue_task(upload_job)
313
- else:
314
- logging.info("Job already in work in parallel worker. Redirecting job to upload worker.")
315
- self.uploader.queue_task(job.to_upload_job())
316
- except Exception as e:
317
- logging.error("Error inside download worker!")
318
- logging.exception(e)
319
- self.notify_task_failed(job)
320
- except Empty:
321
- pass
322
- except Exception as e:
323
- logging.error("Exception occurred inside worker!")
324
- logging.exception(e)
325
-
326
- logging.info("Process done")
327
-
328
- def stop_all(self) -> None:
329
- self.allow_loop.value = 0
330
- self.scheduler.stop()
331
- for proc in self.workers:
332
- if proc.is_alive():
333
- logging.info("stopping process #%d", proc.pid)
334
- self.job_queue.put_nowait(self.__JOE_BIDEN_WAKEUP)
335
- proc.join()
336
- #proc.terminate()
337
- logging.info("process #%d stopped", proc.pid)
338
- self.workers.clear()
339
-
340
- def queue_task(self, job: DownloadJob) -> str:
341
- self.job_queue.put_nowait(job)
342
- return str(job.job_id)
343
-
344
- def notify_task_failed(self, job: DownloadJob) -> None:
345
- self.uploader.queue_task(job.to_upload_job(job_failed=True))
346
-
347
- def send_message_to_admin(self, text: str, yt_auth: bool = False) -> None:
348
- self.uploader.queue_task(UploadJob.build(
349
- is_message_to_admin=True,
350
- message_text=text,
351
- yt_auth=yt_auth
352
- ))
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes