warp-beacon 1.0.8__py3-none-any.whl → 1.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- warp_beacon/__version__.py +1 -1
- warp_beacon/jobs/__init__.py +6 -0
- warp_beacon/jobs/abstract.py +4 -0
- warp_beacon/mediainfo/video.py +23 -16
- warp_beacon/{scrapler → scraper}/__init__.py +34 -20
- warp_beacon/{scrapler → scraper}/abstract.py +5 -3
- warp_beacon/scraper/exceptions.py +42 -0
- warp_beacon/{scrapler → scraper}/instagram.py +22 -14
- warp_beacon/scraper/youtube/__init__.py +0 -0
- warp_beacon/scraper/youtube/shorts.py +84 -0
- warp_beacon/uploader/__init__.py +7 -2
- warp_beacon/warp_beacon.py +45 -29
- {warp_beacon-1.0.8.dist-info → warp_beacon-1.1.1.dist-info}/METADATA +2 -1
- warp_beacon-1.1.1.dist-info/RECORD +28 -0
- {warp_beacon-1.0.8.dist-info → warp_beacon-1.1.1.dist-info}/WHEEL +1 -1
- {warp_beacon-1.0.8.dist-info → warp_beacon-1.1.1.dist-info}/top_level.txt +6 -3
- warp_beacon-1.0.8.dist-info/RECORD +0 -25
- {warp_beacon-1.0.8.dist-info → warp_beacon-1.1.1.dist-info}/LICENSE +0 -0
- {warp_beacon-1.0.8.dist-info → warp_beacon-1.1.1.dist-info}/entry_points.txt +0 -0
warp_beacon/__version__.py
CHANGED
@@ -1,2 +1,2 @@
|
|
1
|
-
__version__ = "1.
|
1
|
+
__version__ = "1.1.1"
|
2
2
|
|
warp_beacon/jobs/__init__.py
CHANGED
warp_beacon/jobs/abstract.py
CHANGED
@@ -3,6 +3,8 @@ from typing import TypedDict
|
|
3
3
|
from typing_extensions import Unpack
|
4
4
|
import uuid
|
5
5
|
|
6
|
+
from warp_beacon.jobs import Origin
|
7
|
+
|
6
8
|
class JobSettings(TypedDict):
|
7
9
|
job_id: uuid.UUID
|
8
10
|
message_id: int
|
@@ -22,6 +24,7 @@ class JobSettings(TypedDict):
|
|
22
24
|
effective_url: str
|
23
25
|
save_items: bool
|
24
26
|
media_collection: list
|
27
|
+
job_origin: Origin
|
25
28
|
|
26
29
|
class AbstractJob(ABC):
|
27
30
|
job_id: uuid.UUID = None
|
@@ -42,6 +45,7 @@ class AbstractJob(ABC):
|
|
42
45
|
effective_url: str = ""
|
43
46
|
save_items: bool = False
|
44
47
|
media_collection: list = []
|
48
|
+
job_origin: Origin = Origin.UNKNOWN
|
45
49
|
|
46
50
|
def __init__(self, **kwargs: Unpack[JobSettings]) -> None:
|
47
51
|
if kwargs:
|
warp_beacon/mediainfo/video.py
CHANGED
@@ -10,24 +10,31 @@ class VideoInfo(object):
|
|
10
10
|
width = 0
|
11
11
|
height = 0
|
12
12
|
duration = 0.0
|
13
|
-
ffmpeg = None
|
14
13
|
filename = ""
|
14
|
+
container = None
|
15
15
|
|
16
16
|
def __init__(self, filename: str) -> None:
|
17
17
|
self.filename = filename
|
18
|
-
|
19
|
-
|
18
|
+
self.container = av.open(file=self.filename, mode='r')
|
19
|
+
|
20
|
+
if self.container:
|
21
|
+
stream = self.container.streams.video[0]
|
20
22
|
time_base = stream.time_base
|
21
23
|
self.duration = float(stream.duration * time_base)
|
22
24
|
framerate = stream.average_rate
|
23
25
|
frame_container_pts = round((1 / framerate) / time_base)
|
24
|
-
|
25
|
-
|
26
|
+
# !
|
27
|
+
self.container.seek(frame_container_pts, backward=True, stream=stream)
|
28
|
+
#
|
29
|
+
frame = next(self.container.decode(stream))
|
26
30
|
self.width = frame.width
|
27
31
|
self.height = frame.height
|
32
|
+
# restore original position after previous frame search
|
33
|
+
self.container.seek(0, backward=False, stream=stream)
|
28
34
|
|
29
35
|
def __del__(self) -> None:
|
30
|
-
|
36
|
+
if self.container:
|
37
|
+
self.container.close()
|
31
38
|
|
32
39
|
def get_demensions(self) -> dict:
|
33
40
|
return {"width": self.width, "height": self.height}
|
@@ -37,15 +44,15 @@ class VideoInfo(object):
|
|
37
44
|
|
38
45
|
@staticmethod
|
39
46
|
def get_filesize(filename: str) -> float:
|
40
|
-
return os.stat(filename).st_size
|
47
|
+
return os.stat(filename).st_size
|
41
48
|
|
42
49
|
def get_finfo(self, except_info: tuple=()) -> dict:
|
43
50
|
res = {}
|
44
51
|
res.update(self.get_demensions())
|
45
52
|
if "duration" not in except_info:
|
46
|
-
res["duration"] =
|
53
|
+
res["duration"] = round(self.get_duration())
|
47
54
|
if "filesize" not in except_info:
|
48
|
-
res["filesize"] =
|
55
|
+
res["filesize"] = VideoInfo.get_filesize(self.filename)
|
49
56
|
return res
|
50
57
|
|
51
58
|
def shrink_image_to_fit(self, image: Image, size: tuple = (320, 320)) -> Image:
|
@@ -59,17 +66,17 @@ class VideoInfo(object):
|
|
59
66
|
def generate_thumbnail(self) -> Union[io.BytesIO, None]:
|
60
67
|
try:
|
61
68
|
image = None
|
62
|
-
|
69
|
+
if self.container:
|
63
70
|
# Signal that we only want to look at keyframes.
|
64
|
-
stream = container.streams.video[0]
|
71
|
+
stream = self.container.streams.video[0]
|
65
72
|
stream.codec_context.skip_frame = "NONKEY"
|
66
|
-
frame_num =
|
67
|
-
time_base =
|
68
|
-
framerate =
|
73
|
+
frame_num = 30
|
74
|
+
time_base = stream.time_base
|
75
|
+
framerate = stream.average_rate
|
69
76
|
frame_container_pts = round((frame_num / framerate) / time_base)
|
70
77
|
|
71
|
-
container.seek(frame_container_pts, backward=True, stream=
|
72
|
-
frame = next(container.decode(stream))
|
78
|
+
self.container.seek(frame_container_pts, backward=True, stream=stream)
|
79
|
+
frame = next(self.container.decode(stream))
|
73
80
|
|
74
81
|
image = frame.to_image()
|
75
82
|
#image.save(
|
@@ -3,12 +3,13 @@ import time
|
|
3
3
|
|
4
4
|
from typing import Optional
|
5
5
|
import multiprocessing
|
6
|
-
from
|
7
|
-
from instagrapi.exceptions import MediaNotFound, UnknownError, ClientNotFoundError, UserNotFound
|
6
|
+
from queue import Empty
|
8
7
|
|
8
|
+
from warp_beacon.scraper.exceptions import NotFound, UnknownError, TimeOut, Unavailable
|
9
9
|
from warp_beacon.mediainfo.video import VideoInfo
|
10
10
|
from warp_beacon.compress.video import VideoCompress
|
11
11
|
from warp_beacon.uploader import AsyncUploader
|
12
|
+
from warp_beacon.jobs import Origin
|
12
13
|
from warp_beacon.jobs.download_job import DownloadJob
|
13
14
|
|
14
15
|
import logging
|
@@ -16,6 +17,7 @@ import logging
|
|
16
17
|
CONST_CPU_COUNT = multiprocessing.cpu_count()
|
17
18
|
|
18
19
|
class AsyncDownloader(object):
|
20
|
+
__JOE_BIDEN_WAKEUP = None
|
19
21
|
workers = []
|
20
22
|
allow_loop = None
|
21
23
|
job_queue = multiprocessing.Queue()
|
@@ -44,7 +46,6 @@ class AsyncDownloader(object):
|
|
44
46
|
media_info = video_info.get_finfo(tuple(fr_media_info.keys()))
|
45
47
|
media_info.update(fr_media_info)
|
46
48
|
media_info["thumb"] = video_info.generate_thumbnail()
|
47
|
-
logging.info("Media file info: %s", media_info)
|
48
49
|
except Exception as e:
|
49
50
|
logging.error("Failed to process media info!")
|
50
51
|
logging.exception(e)
|
@@ -58,30 +59,41 @@ class AsyncDownloader(object):
|
|
58
59
|
job = None
|
59
60
|
try:
|
60
61
|
job = self.job_queue.get()
|
62
|
+
if job is self.__JOE_BIDEN_WAKEUP:
|
63
|
+
continue
|
61
64
|
actor = None
|
62
65
|
try:
|
63
66
|
items = []
|
64
|
-
if
|
67
|
+
if job.job_origin is not Origin.UNKNOWN:
|
65
68
|
if not job.in_process:
|
66
|
-
|
67
|
-
|
69
|
+
actor = None
|
70
|
+
if job.job_origin is Origin.INSTAGRAM:
|
71
|
+
from warp_beacon.scraper.instagram import InstagramScraper
|
72
|
+
actor = InstagramScraper()
|
73
|
+
elif job.job_origin is Origin.YT_SHORTS:
|
74
|
+
from warp_beacon.scraper.youtube.shorts import YoutubeShortsScraper
|
75
|
+
actor = YoutubeShortsScraper()
|
68
76
|
while True:
|
69
77
|
try:
|
70
78
|
logging.info("Downloading URL '%s'", job.url)
|
71
79
|
items = actor.download(job.url)
|
72
80
|
break
|
73
|
-
except
|
74
|
-
logging.
|
75
|
-
logging.exception(e)
|
76
|
-
time.sleep(2)
|
77
|
-
except (MediaNotFound, ClientNotFoundError, UserNotFound) as e:
|
78
|
-
logging.warning("Not found error occurred!")
|
81
|
+
except (NotFound, Unavailable) as e:
|
82
|
+
logging.warning("Not found or unavailable error occurred!")
|
79
83
|
logging.exception(e)
|
80
84
|
self.uploader.queue_task(job.to_upload_job(
|
81
85
|
job_failed=True,
|
82
86
|
job_failed_msg="Unable to access to media under this URL. Seems like the media is private.")
|
83
87
|
)
|
84
88
|
break
|
89
|
+
except TimeOut as e:
|
90
|
+
logging.warning("Timeout error occurred!")
|
91
|
+
logging.exception(e)
|
92
|
+
self.uploader.queue_task(job.to_upload_job(
|
93
|
+
job_failed=True,
|
94
|
+
job_failed_msg="Failed to download content. Please check you Internet connection or retry amount bot configuration settings.")
|
95
|
+
)
|
96
|
+
break
|
85
97
|
except (UnknownError, Exception) as e:
|
86
98
|
logging.warning("UnknownError occurred!")
|
87
99
|
logging.exception(e)
|
@@ -106,8 +118,10 @@ class AsyncDownloader(object):
|
|
106
118
|
for item in items:
|
107
119
|
media_info = {"filesize": 0}
|
108
120
|
if item["media_type"] == "video":
|
109
|
-
media_info = self.get_media_info(item["local_media_path"], item
|
110
|
-
|
121
|
+
media_info = self.get_media_info(item["local_media_path"], item.get("media_info", {}))
|
122
|
+
logging.info("Final media info: %s", media_info)
|
123
|
+
if media_info["filesize"] > 52428800:
|
124
|
+
logging.info("Filesize is '%d' MiB", round(media_info["filesize"] / 1024 / 1024))
|
111
125
|
logging.info("Detected big file. Starting compressing with ffmpeg ...")
|
112
126
|
self.uploader.queue_task(job.to_upload_job(
|
113
127
|
job_warning=True,
|
@@ -153,27 +167,27 @@ class AsyncDownloader(object):
|
|
153
167
|
else:
|
154
168
|
logging.info("Job already in work in parallel worker. Redirecting job to upload worker.")
|
155
169
|
self.uploader.queue_task(job.to_upload_job())
|
156
|
-
except HTTPError as e:
|
157
|
-
logging.error("HTTP error inside download worker!")
|
158
|
-
logging.exception(e)
|
159
170
|
except Exception as e:
|
160
171
|
logging.error("Error inside download worker!")
|
161
172
|
logging.exception(e)
|
162
173
|
self.notify_task_failed(job)
|
163
174
|
#self.queue_task(url=item["url"], message_id=item["message_id"], item_in_process=item["in_process"], uniq_id=item["uniq_id"])
|
164
|
-
except
|
175
|
+
except Empty:
|
165
176
|
pass
|
166
177
|
except Exception as e:
|
167
178
|
logging.error("Exception occurred inside worker!")
|
168
179
|
logging.exception(e)
|
169
180
|
|
181
|
+
logging.info("Process done")
|
182
|
+
|
170
183
|
def stop_all(self) -> None:
|
171
184
|
self.allow_loop.value = 0
|
172
185
|
for proc in self.workers:
|
173
186
|
if proc.is_alive():
|
174
187
|
logging.info("stopping process #%d", proc.pid)
|
175
|
-
|
176
|
-
|
188
|
+
self.job_queue.put_nowait(self.__JOE_BIDEN_WAKEUP)
|
189
|
+
proc.join()
|
190
|
+
#proc.terminate()
|
177
191
|
logging.info("process #%d stopped", proc.pid)
|
178
192
|
self.workers.clear()
|
179
193
|
|
@@ -1,6 +1,7 @@
|
|
1
1
|
from abc import ABC, abstractmethod
|
2
|
+
from typing import Callable, Union
|
2
3
|
|
3
|
-
class
|
4
|
+
class ScraperAbstract(ABC):
|
4
5
|
def __init__(self) -> None:
|
5
6
|
pass
|
6
7
|
|
@@ -8,9 +9,10 @@ class ScraplerAbstract(ABC):
|
|
8
9
|
pass
|
9
10
|
|
10
11
|
@abstractmethod
|
11
|
-
def
|
12
|
+
def download(self, url: str) -> bool:
|
12
13
|
raise NotImplementedError
|
13
14
|
|
14
15
|
@abstractmethod
|
15
|
-
def
|
16
|
+
def _download_hndlr(self, func: Callable, *args: tuple[str], **kwargs: dict[str]) -> Union[str, dict]:
|
16
17
|
raise NotImplementedError
|
18
|
+
|
@@ -0,0 +1,42 @@
|
|
1
|
+
from typing import Union
|
2
|
+
|
3
|
+
class ScraperError(Exception):
|
4
|
+
def __init__(self, *args, **kwargs):
|
5
|
+
args = list(args)
|
6
|
+
if len(args) > 0:
|
7
|
+
self.message = str(args.pop(0))
|
8
|
+
for key in list(kwargs.keys()):
|
9
|
+
setattr(self, key, kwargs.pop(key))
|
10
|
+
if not self.message:
|
11
|
+
self.message = "{title} ({body})".format(
|
12
|
+
title=getattr(self, "reason", "Unknown"),
|
13
|
+
body=getattr(self, "error_type", vars(self)),
|
14
|
+
)
|
15
|
+
super().__init__(self.message, *args, **kwargs)
|
16
|
+
if hasattr(self, "response") and self.response:
|
17
|
+
self.code = self.response.status_code
|
18
|
+
|
19
|
+
class TimeOut(ScraperError):
|
20
|
+
pass
|
21
|
+
|
22
|
+
class NotFound(ScraperError):
|
23
|
+
pass
|
24
|
+
|
25
|
+
class Unavailable(ScraperError):
|
26
|
+
pass
|
27
|
+
|
28
|
+
class UnknownError(ScraperError):
|
29
|
+
pass
|
30
|
+
|
31
|
+
def extract_exception_message(e: Exception) -> str:
|
32
|
+
msg = ""
|
33
|
+
if hasattr(e, "error_string"):
|
34
|
+
msg = e.error_string
|
35
|
+
elif hasattr(e, "message"):
|
36
|
+
msg = e.message
|
37
|
+
elif hasattr(e, "reason"):
|
38
|
+
msg = e.reason
|
39
|
+
elif hasattr(e, "args"):
|
40
|
+
msg = str(e.args)
|
41
|
+
|
42
|
+
return msg
|
@@ -1,10 +1,11 @@
|
|
1
1
|
import os
|
2
|
-
from pathlib import Path
|
3
2
|
import time
|
4
|
-
import json
|
5
3
|
from typing import Callable, Optional, Union
|
4
|
+
from pathlib import Path
|
5
|
+
import json
|
6
6
|
|
7
7
|
import requests
|
8
|
+
from requests.exceptions import ConnectTimeout, HTTPError
|
8
9
|
import urllib3
|
9
10
|
from urllib.parse import urljoin, urlparse
|
10
11
|
import logging
|
@@ -12,13 +13,14 @@ import logging
|
|
12
13
|
from instagrapi.mixins.story import Story
|
13
14
|
from instagrapi.types import Media
|
14
15
|
from instagrapi import Client
|
15
|
-
from instagrapi.exceptions import LoginRequired, PleaseWaitFewMinutes
|
16
|
+
from instagrapi.exceptions import LoginRequired, PleaseWaitFewMinutes, MediaNotFound, ClientNotFoundError, UserNotFound, UnknownError as IGUnknownError
|
16
17
|
|
17
|
-
from warp_beacon.
|
18
|
+
from warp_beacon.scraper.exceptions import NotFound, UnknownError, TimeOut, extract_exception_message
|
19
|
+
from warp_beacon.scraper.abstract import ScraperAbstract
|
18
20
|
|
19
21
|
INST_SESSION_FILE = "/var/warp_beacon/inst_session.json"
|
20
22
|
|
21
|
-
class
|
23
|
+
class InstagramScraper(ScraperAbstract):
|
22
24
|
cl = None
|
23
25
|
|
24
26
|
def __init__(self) -> None:
|
@@ -85,7 +87,7 @@ class InstagramScrapler(ScraplerAbstract):
|
|
85
87
|
logging.info("media_id is '%s'", media_id)
|
86
88
|
return media_id
|
87
89
|
|
88
|
-
def
|
90
|
+
def _download_hndlr(self, func: Callable, *args: tuple[str], **kwargs: dict[str]) -> Union[str, dict]:
|
89
91
|
ret_val = {}
|
90
92
|
max_retries = int(os.environ.get("IG_MAX_RETRIES", default=5))
|
91
93
|
retries = 0
|
@@ -96,12 +98,14 @@ class InstagramScrapler(ScraplerAbstract):
|
|
96
98
|
except (requests.exceptions.ConnectionError,
|
97
99
|
requests.exceptions.ReadTimeout,
|
98
100
|
urllib3.exceptions.ReadTimeoutError,
|
99
|
-
urllib3.exceptions.ConnectionError
|
101
|
+
urllib3.exceptions.ConnectionError,
|
102
|
+
ConnectTimeout,
|
103
|
+
HTTPError) as e:
|
100
104
|
logging.warning("Instagram read timeout! Retrying in 2 seconds ...")
|
101
105
|
logging.info("Your `IG_MAX_RETRIES` values is %d", max_retries)
|
102
106
|
logging.exception(e)
|
103
107
|
if max_retries == retries:
|
104
|
-
raise e
|
108
|
+
raise TimeOut(extract_exception_message(e))
|
105
109
|
retries += 1
|
106
110
|
time.sleep(2)
|
107
111
|
|
@@ -109,11 +113,11 @@ class InstagramScrapler(ScraplerAbstract):
|
|
109
113
|
|
110
114
|
|
111
115
|
def download_video(self, url: str, media_info: dict) -> dict:
|
112
|
-
path = self.
|
113
|
-
return {"local_media_path": str(path), "media_type": "video", "media_info": {"duration": media_info.video_duration}}
|
116
|
+
path = self._download_hndlr(self.cl.video_download_by_url, url, folder='/tmp')
|
117
|
+
return {"local_media_path": str(path), "media_type": "video", "media_info": {"duration": round(media_info.video_duration)}}
|
114
118
|
|
115
119
|
def download_photo(self, url: str) -> dict:
|
116
|
-
path = self.
|
120
|
+
path = self._download_hndlr(self.cl.photo_download_by_url, url, folder='/tmp')
|
117
121
|
return {"local_media_path": str(path), "media_type": "image"}
|
118
122
|
|
119
123
|
def download_story(self, story_info: Story) -> dict:
|
@@ -127,10 +131,10 @@ class InstagramScrapler(ScraplerAbstract):
|
|
127
131
|
logging.info("Effective story id is '%s'", effective_story_id)
|
128
132
|
effective_url = "https://www.instagram.com/stories/%s/%s/" % (story_info.user.username, effective_story_id)
|
129
133
|
if story_info.media_type == 1: # photo
|
130
|
-
path = self.
|
134
|
+
path = self._download_hndlr(self.cl.story_download_by_url, url=story_info.thumbnail_url, folder='/tmp')
|
131
135
|
media_type = "image"
|
132
136
|
elif story_info.media_type == 2: # video
|
133
|
-
path = self.
|
137
|
+
path = self._download_hndlr(self.cl.story_download_by_url, url=story_info.video_url, folder='/tmp')
|
134
138
|
media_type = "video"
|
135
139
|
media_info["duration"] = story_info.video_duration
|
136
140
|
|
@@ -160,7 +164,7 @@ class InstagramScrapler(ScraplerAbstract):
|
|
160
164
|
try:
|
161
165
|
scrap_type, media_id = self.scrap(url)
|
162
166
|
if scrap_type == "media":
|
163
|
-
media_info = self.
|
167
|
+
media_info = self._download_hndlr(self.cl.media_info, media_id)
|
164
168
|
logging.info("media_type is '%d', product_type is '%s'", media_info.media_type, media_info.product_type)
|
165
169
|
if media_info.media_type == 2 and media_info.product_type == "clips": # Reels
|
166
170
|
res.append(self.download_video(url=media_info.video_url, media_info=media_info))
|
@@ -192,4 +196,8 @@ class InstagramScrapler(ScraplerAbstract):
|
|
192
196
|
os.unlink(i["local_media_path"])
|
193
197
|
os.unlink(INST_SESSION_FILE)
|
194
198
|
time.sleep(wait_timeout)
|
199
|
+
except (MediaNotFound, ClientNotFoundError, UserNotFound) as e:
|
200
|
+
raise NotFound(extract_exception_message(e))
|
201
|
+
except IGUnknownError as e:
|
202
|
+
raise UnknownError(extract_exception_message(e))
|
195
203
|
return res
|
File without changes
|
@@ -0,0 +1,84 @@
|
|
1
|
+
import os
|
2
|
+
import pathlib
|
3
|
+
import time
|
4
|
+
|
5
|
+
from typing import Callable, Union
|
6
|
+
|
7
|
+
from socket import timeout
|
8
|
+
from ssl import SSLError
|
9
|
+
from requests.exceptions import RequestException
|
10
|
+
from urllib.error import URLError
|
11
|
+
from http.client import HTTPException
|
12
|
+
|
13
|
+
from pytubefix import YouTube
|
14
|
+
from pytubefix.exceptions import VideoUnavailable, VideoPrivate, MaxRetriesExceeded
|
15
|
+
|
16
|
+
from warp_beacon.scraper.exceptions import NotFound, UnknownError, TimeOut, extract_exception_message
|
17
|
+
from warp_beacon.scraper.abstract import ScraperAbstract
|
18
|
+
|
19
|
+
import logging
|
20
|
+
|
21
|
+
class YoutubeShortsScraper(ScraperAbstract):
|
22
|
+
def __init__(self) -> None:
|
23
|
+
pass
|
24
|
+
|
25
|
+
def __del__(self) -> None:
|
26
|
+
pass
|
27
|
+
|
28
|
+
def _download_hndlr(self, func: Callable, *args: tuple[str], **kwargs: dict[str]) -> Union[str, dict]:
|
29
|
+
ret_val = ''
|
30
|
+
max_retries = int(os.environ.get("YT_MAX_RETRIES", default=8))
|
31
|
+
pause_secs = int(os.environ.get("YT_PAUSE_BEFORE_RETRY", default=3))
|
32
|
+
retries = 0
|
33
|
+
while max_retries >= retries:
|
34
|
+
try:
|
35
|
+
ret_val = func(*args, **kwargs)
|
36
|
+
break
|
37
|
+
except MaxRetriesExceeded:
|
38
|
+
# do noting, not interested
|
39
|
+
pass
|
40
|
+
except (timeout, SSLError, HTTPException, RequestException, URLError) as e:
|
41
|
+
logging.warning("Youtube read timeout! Retrying in %d seconds ...", pause_secs)
|
42
|
+
logging.info("Your `YT_MAX_RETRIES` values is %d", max_retries)
|
43
|
+
logging.exception(extract_exception_message(e))
|
44
|
+
if max_retries >= retries:
|
45
|
+
raise TimeOut(extract_exception_message(e))
|
46
|
+
retries += 1
|
47
|
+
time.sleep(pause_secs)
|
48
|
+
except (VideoUnavailable, VideoPrivate) as e:
|
49
|
+
raise Unavailable(extract_exception_message(e))
|
50
|
+
|
51
|
+
return ret_val
|
52
|
+
|
53
|
+
def rename_local_file(self, filename: str) -> str:
|
54
|
+
if not os.path.exists(filename):
|
55
|
+
raise NameError("No file provided")
|
56
|
+
path_info = pathlib.Path(filename)
|
57
|
+
ext = path_info.suffix
|
58
|
+
old_filename = path_info.stem
|
59
|
+
time_name = str(time.time()).replace('.', '_')
|
60
|
+
new_filename = "%s%s" % (time_name, ext)
|
61
|
+
new_filepath = "%s/%s" % (os.path.dirname(filename), new_filename)
|
62
|
+
|
63
|
+
os.rename(filename, new_filepath)
|
64
|
+
|
65
|
+
return new_filepath
|
66
|
+
|
67
|
+
def _download(self, url: str) -> list:
|
68
|
+
res = []
|
69
|
+
timeout = int(os.environ.get("YT_TIMEOUT", default=2))
|
70
|
+
yt = YouTube(url)
|
71
|
+
stream = yt.streams.get_highest_resolution()
|
72
|
+
if stream:
|
73
|
+
local_file = stream.download(
|
74
|
+
output_path="/tmp",
|
75
|
+
max_retries=0,
|
76
|
+
timeout=timeout,
|
77
|
+
skip_existing=False
|
78
|
+
)
|
79
|
+
res.append({"local_media_path": self.rename_local_file(local_file), "media_type": "video"})
|
80
|
+
|
81
|
+
return res
|
82
|
+
|
83
|
+
def download(self, url: str) -> list:
|
84
|
+
return self._download_hndlr(self._download, url)
|
warp_beacon/uploader/__init__.py
CHANGED
@@ -13,6 +13,7 @@ from typing import Optional, Callable, Coroutine
|
|
13
13
|
from warp_beacon.storage import Storage
|
14
14
|
|
15
15
|
class AsyncUploader(object):
|
16
|
+
__JOE_BIDEN_WAKEUP = None
|
16
17
|
threads = []
|
17
18
|
allow_loop = True
|
18
19
|
job_queue = None
|
@@ -34,8 +35,8 @@ class AsyncUploader(object):
|
|
34
35
|
def start(self) -> None:
|
35
36
|
for _ in range(self.pool_size):
|
36
37
|
thread = threading.Thread(target=self.do_work)
|
37
|
-
self.threads.append(thread)
|
38
38
|
thread.start()
|
39
|
+
self.threads.append(thread)
|
39
40
|
|
40
41
|
def add_callback(self, message_id: int, callback: Callable, update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
|
41
42
|
def callback_wrap(*args, **kwargs) -> None:
|
@@ -55,6 +56,7 @@ class AsyncUploader(object):
|
|
55
56
|
for i in self.threads:
|
56
57
|
t_id = i.native_id
|
57
58
|
logging.info("Stopping thread #'%s'", t_id)
|
59
|
+
self.job_queue.put(self.__JOE_BIDEN_WAKEUP)
|
58
60
|
i.join()
|
59
61
|
logging.info("Thread #'%s' stopped", t_id)
|
60
62
|
self.threads.clear()
|
@@ -77,6 +79,8 @@ class AsyncUploader(object):
|
|
77
79
|
try:
|
78
80
|
try:
|
79
81
|
job = self.job_queue.get()
|
82
|
+
if job is self.__JOE_BIDEN_WAKEUP:
|
83
|
+
continue
|
80
84
|
path = ""
|
81
85
|
if job.media_type == "collection":
|
82
86
|
for i in job.media_collection:
|
@@ -130,4 +134,5 @@ class AsyncUploader(object):
|
|
130
134
|
pass
|
131
135
|
except Exception as e:
|
132
136
|
logging.error("Exception occurred inside upload worker!")
|
133
|
-
logging.exception(e)
|
137
|
+
logging.exception(e)
|
138
|
+
logging.info("Thread done")
|
warp_beacon/warp_beacon.py
CHANGED
@@ -6,7 +6,6 @@ import signal
|
|
6
6
|
import asyncio
|
7
7
|
import time
|
8
8
|
from io import BytesIO
|
9
|
-
import logging
|
10
9
|
|
11
10
|
from urlextract import URLExtract
|
12
11
|
|
@@ -15,10 +14,14 @@ from telegram import Bot, ForceReply, Update, Chat, error, InputMediaVideo, Inpu
|
|
15
14
|
from telegram.ext import Application, CommandHandler, ContextTypes, MessageHandler, filters
|
16
15
|
from telegram.constants import ParseMode
|
17
16
|
|
18
|
-
import warp_beacon.
|
17
|
+
import warp_beacon.scraper
|
19
18
|
from warp_beacon.storage import Storage
|
20
19
|
from warp_beacon.uploader import AsyncUploader
|
21
|
-
from warp_beacon.jobs.download_job import DownloadJob
|
20
|
+
from warp_beacon.jobs.download_job import DownloadJob
|
21
|
+
from warp_beacon.jobs.upload_job import UploadJob
|
22
|
+
from warp_beacon.jobs import Origin
|
23
|
+
|
24
|
+
import logging
|
22
25
|
|
23
26
|
# Enable logging
|
24
27
|
logging.basicConfig(
|
@@ -361,6 +364,15 @@ async def upload_job(update: Update, context: ContextTypes.DEFAULT_TYPE, job: Up
|
|
361
364
|
|
362
365
|
return tg_file_ids
|
363
366
|
|
367
|
+
def extract_origin(url: str) -> Origin:
|
368
|
+
if "instagram.com/" in url:
|
369
|
+
return Origin.INSTAGRAM
|
370
|
+
|
371
|
+
if "youtube.com/" in url and "shorts/" in url:
|
372
|
+
return Origin.YT_SHORTS
|
373
|
+
|
374
|
+
return Origin.UNKNOWN
|
375
|
+
|
364
376
|
async def handler(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
|
365
377
|
if update.message is None:
|
366
378
|
return
|
@@ -374,8 +386,9 @@ async def handler(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
|
|
374
386
|
reply_text = "Your message should contains URLs"
|
375
387
|
else:
|
376
388
|
for url in urls:
|
377
|
-
|
378
|
-
|
389
|
+
origin = extract_origin(url)
|
390
|
+
if origin is Origin.UNKNOWN:
|
391
|
+
logging.info("Only Instagram and YouTube Shorts are now supported. Skipping.")
|
379
392
|
continue
|
380
393
|
entities, tg_file_ids = [], []
|
381
394
|
uniq_id = Storage.compute_uniq(url)
|
@@ -422,9 +435,9 @@ async def handler(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
|
|
422
435
|
if tg_file_ids:
|
423
436
|
if job.media_type == "collection" and job.save_items:
|
424
437
|
for i in job.media_collection:
|
425
|
-
storage.add_media(tg_file_ids=[i.tg_file_id], media_url=i.effective_url, media_type=i.media_type, origin=
|
438
|
+
storage.add_media(tg_file_ids=[i.tg_file_id], media_url=i.effective_url, media_type=i.media_type, origin=origin.value)
|
426
439
|
else:
|
427
|
-
storage.add_media(tg_file_ids=[','.join(tg_file_ids)], media_url=job.url, media_type=job.media_type, origin=
|
440
|
+
storage.add_media(tg_file_ids=[','.join(tg_file_ids)], media_url=job.url, media_type=job.media_type, origin=origin.value)
|
428
441
|
except Exception as e:
|
429
442
|
logging.error("Exception occurred while performing upload callback!")
|
430
443
|
logging.exception(e)
|
@@ -457,7 +470,8 @@ async def handler(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
|
|
457
470
|
placeholder_message_id=placeholder_message_id,
|
458
471
|
message_id=effective_message_id,
|
459
472
|
in_process=uploader.is_inprocess(uniq_id),
|
460
|
-
uniq_id=uniq_id
|
473
|
+
uniq_id=uniq_id,
|
474
|
+
job_origin = origin
|
461
475
|
))
|
462
476
|
uploader.set_inprocess(uniq_id)
|
463
477
|
except Exception as e:
|
@@ -476,25 +490,6 @@ def main() -> None:
|
|
476
490
|
try:
|
477
491
|
global uploader, downloader
|
478
492
|
|
479
|
-
loop = asyncio.get_event_loop()
|
480
|
-
|
481
|
-
uploader = AsyncUploader(
|
482
|
-
storage=storage,
|
483
|
-
pool_size=int(os.environ.get("UPLOAD_POOL_SIZE", default=warp_beacon.scrapler.CONST_CPU_COUNT)),
|
484
|
-
loop=loop
|
485
|
-
)
|
486
|
-
downloader = warp_beacon.scrapler.AsyncDownloader(
|
487
|
-
workers_count=int(os.environ.get("WORKERS_POOL_SIZE", default=warp_beacon.scrapler.CONST_CPU_COUNT)),
|
488
|
-
uploader=uploader
|
489
|
-
)
|
490
|
-
downloader.start()
|
491
|
-
uploader.start()
|
492
|
-
|
493
|
-
stop_signals = (signal.SIGINT, signal.SIGTERM, signal.SIGABRT)
|
494
|
-
for sig in stop_signals or []:
|
495
|
-
loop.add_signal_handler(sig, _raise_system_exit)
|
496
|
-
loop.add_signal_handler(sig, _raise_system_exit)
|
497
|
-
|
498
493
|
# Create the Application and pass it your bot's token.
|
499
494
|
tg_token = os.environ.get("TG_TOKEN", default=None)
|
500
495
|
application = Application.builder().token(tg_token).concurrent_updates(True).build()
|
@@ -508,8 +503,25 @@ def main() -> None:
|
|
508
503
|
application.add_handler(MessageHandler(filters.TEXT & ~filters.COMMAND, handler))
|
509
504
|
|
510
505
|
allow_loop = True
|
506
|
+
loop = None
|
511
507
|
while allow_loop:
|
512
508
|
try:
|
509
|
+
loop = asyncio.get_event_loop()
|
510
|
+
|
511
|
+
stop_signals = (signal.SIGINT, signal.SIGTERM, signal.SIGABRT)
|
512
|
+
for sig in stop_signals or []:
|
513
|
+
loop.add_signal_handler(sig, _raise_system_exit)
|
514
|
+
loop.add_signal_handler(sig, _raise_system_exit)
|
515
|
+
|
516
|
+
uploader = AsyncUploader(
|
517
|
+
storage=storage,
|
518
|
+
pool_size=int(os.environ.get("UPLOAD_POOL_SIZE", default=warp_beacon.scraper.CONST_CPU_COUNT)),
|
519
|
+
loop=loop
|
520
|
+
)
|
521
|
+
downloader = warp_beacon.scraper.AsyncDownloader(
|
522
|
+
workers_count=int(os.environ.get("WORKERS_POOL_SIZE", default=warp_beacon.scraper.CONST_CPU_COUNT)),
|
523
|
+
uploader=uploader
|
524
|
+
)
|
513
525
|
loop.run_until_complete(application.initialize())
|
514
526
|
if application.post_init:
|
515
527
|
loop.run_until_complete(application.post_init(application))
|
@@ -517,10 +529,12 @@ def main() -> None:
|
|
517
529
|
loop.run_until_complete(application.start())
|
518
530
|
while allow_loop:
|
519
531
|
try:
|
532
|
+
downloader.start()
|
533
|
+
uploader.start()
|
520
534
|
loop.run_forever()
|
521
535
|
except (KeyboardInterrupt, SystemExit) as e:
|
522
536
|
allow_loop = False
|
523
|
-
raise
|
537
|
+
raise
|
524
538
|
except Exception as e:
|
525
539
|
logging.error("Main loop Telegram error!")
|
526
540
|
logging.exception(e)
|
@@ -546,11 +560,13 @@ def main() -> None:
|
|
546
560
|
if application.post_shutdown:
|
547
561
|
loop.run_until_complete(application.post_shutdown(application))
|
548
562
|
finally:
|
563
|
+
loop.close()
|
549
564
|
downloader.stop_all()
|
550
565
|
uploader.stop_all()
|
551
|
-
loop.close()
|
552
566
|
except Exception as e:
|
553
567
|
logging.exception(e)
|
554
568
|
|
569
|
+
logging.info("Warp Beacon terminated.")
|
570
|
+
|
555
571
|
if __name__ == "__main__":
|
556
572
|
main()
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: warp_beacon
|
3
|
-
Version: 1.
|
3
|
+
Version: 1.1.1
|
4
4
|
Summary: Telegram bot for expanding external media links
|
5
5
|
Home-page: https://github.com/sb0y/warp_beacon
|
6
6
|
Author: Andrey Bagrintsev
|
@@ -226,6 +226,7 @@ Description-Content-Type: text/markdown
|
|
226
226
|
License-File: LICENSE
|
227
227
|
Requires-Dist: ffmpeg-python
|
228
228
|
Requires-Dist: python-telegram-bot
|
229
|
+
Requires-Dist: pytubefix
|
229
230
|
Requires-Dist: av
|
230
231
|
Requires-Dist: urlextract
|
231
232
|
Requires-Dist: pillow
|
@@ -0,0 +1,28 @@
|
|
1
|
+
etc/warp_beacon/warp_beacon.conf,sha256=1gGvh36cnFr0rU4mVomfy66hQz9EvugaNzeH6_tmBM0,266
|
2
|
+
lib/systemd/system/warp_beacon.service,sha256=lPmHqLqcI2eIV7nwHS0qcALQrznixqJuwwPfa2mDLUA,372
|
3
|
+
var/warp_beacon/placeholder.gif,sha256=cE5CGJVaop4Sx21zx6j4AyoHU0ncmvQuS2o6hJfEH88,6064
|
4
|
+
warp_beacon/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
5
|
+
warp_beacon/__version__.py,sha256=Y1WL3jwTYIGv_JONH9hc9ULjDsNL4lbh4ucw3BlkBYs,23
|
6
|
+
warp_beacon/warp_beacon.py,sha256=Z3zY7MTMZp3zKRderhNOVe6XvM-93Djo0DIsiaoucmo,20012
|
7
|
+
warp_beacon/compress/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
8
|
+
warp_beacon/compress/video.py,sha256=_PDMVYCyzLYxHv1uZmmzGcG_8rjaZr7BTXsXTTy_oS4,2846
|
9
|
+
warp_beacon/jobs/__init__.py,sha256=KsSaS0KlCNyffNnWKuvqmdfgyfKyn3niXYyZ38-exQ8,113
|
10
|
+
warp_beacon/jobs/abstract.py,sha256=PCr8RXzocKi-sTsi2Y1_spiv6D95G1NlzZ2wD2WJXRc,1760
|
11
|
+
warp_beacon/jobs/download_job.py,sha256=wfZrKUerfYIjWkRxPzfl5gwIlcotIMH7OpTUM9ae8NY,736
|
12
|
+
warp_beacon/jobs/upload_job.py,sha256=Vaogc4vbpAfyaT4VkIHEPLFRELmM44TDqkmnPYh3Ymc,740
|
13
|
+
warp_beacon/mediainfo/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
14
|
+
warp_beacon/mediainfo/video.py,sha256=8h7p4k0w45Vm0lPQNlaQaUjaDTBybX3RcKgL1QQbioA,2638
|
15
|
+
warp_beacon/scraper/__init__.py,sha256=c7NySK5Krm-zlWQckFs-uN4fD3J19A0pTS4CByXjmMs,7918
|
16
|
+
warp_beacon/scraper/abstract.py,sha256=um4wUthO_7IsoXjKiUTWyBBbKlf-N01aZJK9N2UQI9I,408
|
17
|
+
warp_beacon/scraper/exceptions.py,sha256=qra_Jx53RVCnrCKvw2OxvEHl4cXJCrPDa_yef3cvGXM,978
|
18
|
+
warp_beacon/scraper/instagram.py,sha256=s7slumqdqVVWQYpnVphx-dOLIAWvgA_UdqkTQVp6GsI,7758
|
19
|
+
warp_beacon/scraper/youtube/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
20
|
+
warp_beacon/scraper/youtube/shorts.py,sha256=xAOeHNT3_Ayjjglid00UqGqCgSMfbJua26PNrbbDYUo,2565
|
21
|
+
warp_beacon/storage/__init__.py,sha256=NhD3V7UNRiZNf61yQEAjXOfi-tfA2LaJa7a7kvbkmtE,2402
|
22
|
+
warp_beacon/uploader/__init__.py,sha256=auD1arKpJdN1eFUbTFoa9Gmv-ZYZNesMoT193__pDz8,4507
|
23
|
+
warp_beacon-1.1.1.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
24
|
+
warp_beacon-1.1.1.dist-info/METADATA,sha256=grW-AA1vEjTZEt30ShVny1vomQY-0M-HGMGuOGOZdCc,18244
|
25
|
+
warp_beacon-1.1.1.dist-info/WHEEL,sha256=Wyh-_nZ0DJYolHNn1_hMa4lM7uDedD_RGVwbmTjyItk,91
|
26
|
+
warp_beacon-1.1.1.dist-info/entry_points.txt,sha256=eSB61Rb89d56WY0O-vEIQwkn18J-4CMrJcLA_R_8h3g,119
|
27
|
+
warp_beacon-1.1.1.dist-info/top_level.txt,sha256=QuN6MynevEblMhPPAVeMrNAkcyqYUpYDholtIRq8-ew,473
|
28
|
+
warp_beacon-1.1.1.dist-info/RECORD,,
|
@@ -8,9 +8,12 @@ warp_beacon/jobs/download_job
|
|
8
8
|
warp_beacon/jobs/upload_job
|
9
9
|
warp_beacon/mediainfo
|
10
10
|
warp_beacon/mediainfo/video
|
11
|
-
warp_beacon/
|
12
|
-
warp_beacon/
|
13
|
-
warp_beacon/
|
11
|
+
warp_beacon/scraper
|
12
|
+
warp_beacon/scraper/abstract
|
13
|
+
warp_beacon/scraper/exceptions
|
14
|
+
warp_beacon/scraper/instagram
|
15
|
+
warp_beacon/scraper/youtube
|
16
|
+
warp_beacon/scraper/youtube/shorts
|
14
17
|
warp_beacon/storage
|
15
18
|
warp_beacon/uploader
|
16
19
|
warp_beacon/warp_beacon
|
@@ -1,25 +0,0 @@
|
|
1
|
-
etc/warp_beacon/warp_beacon.conf,sha256=1gGvh36cnFr0rU4mVomfy66hQz9EvugaNzeH6_tmBM0,266
|
2
|
-
lib/systemd/system/warp_beacon.service,sha256=lPmHqLqcI2eIV7nwHS0qcALQrznixqJuwwPfa2mDLUA,372
|
3
|
-
var/warp_beacon/placeholder.gif,sha256=cE5CGJVaop4Sx21zx6j4AyoHU0ncmvQuS2o6hJfEH88,6064
|
4
|
-
warp_beacon/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
5
|
-
warp_beacon/__version__.py,sha256=jOGC2HGDoU2OhY9xBiOPgX9B0ssXLXx0OeKHzO1DTV8,23
|
6
|
-
warp_beacon/warp_beacon.py,sha256=agUu6_Qt1UnGTSqYqXQZtHeeWqRkhlIHt_Repph1G3k,19581
|
7
|
-
warp_beacon/compress/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
8
|
-
warp_beacon/compress/video.py,sha256=_PDMVYCyzLYxHv1uZmmzGcG_8rjaZr7BTXsXTTy_oS4,2846
|
9
|
-
warp_beacon/jobs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
10
|
-
warp_beacon/jobs/abstract.py,sha256=-eXvrRgrUBed1z9iYZd0RxZ8fi0Okq3r0i7PDN31FkY,1666
|
11
|
-
warp_beacon/jobs/download_job.py,sha256=wfZrKUerfYIjWkRxPzfl5gwIlcotIMH7OpTUM9ae8NY,736
|
12
|
-
warp_beacon/jobs/upload_job.py,sha256=Vaogc4vbpAfyaT4VkIHEPLFRELmM44TDqkmnPYh3Ymc,740
|
13
|
-
warp_beacon/mediainfo/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
14
|
-
warp_beacon/mediainfo/video.py,sha256=CbaJmAoHAYNBI4bwvsIXYqTRNlFj9OtDhvxLgywMnho,2536
|
15
|
-
warp_beacon/scrapler/__init__.py,sha256=J1kRZfkCuTucRnklElXnrMUZUGpg8wIGYWGrfY5a1xc,7335
|
16
|
-
warp_beacon/scrapler/abstract.py,sha256=MJxpEovCWDYq2SwbbMsRDfp77WTwvbXXKiQxKWoj0ZQ,304
|
17
|
-
warp_beacon/scrapler/instagram.py,sha256=8CF_Zdxn1hStz_PgLxTc0FTt5heI84d-Ks0XzmD7-_o,7248
|
18
|
-
warp_beacon/storage/__init__.py,sha256=NhD3V7UNRiZNf61yQEAjXOfi-tfA2LaJa7a7kvbkmtE,2402
|
19
|
-
warp_beacon/uploader/__init__.py,sha256=9qQAuYisXiVIjQghxcxpF4WAdW7lm7HmpkOXQjGNJXk,4346
|
20
|
-
warp_beacon-1.0.8.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
21
|
-
warp_beacon-1.0.8.dist-info/METADATA,sha256=Ma5F75k0lR3JAK-aiZFYdZc_u2xO1IplbuWTQwzsKZ8,18219
|
22
|
-
warp_beacon-1.0.8.dist-info/WHEEL,sha256=rWxmBtp7hEUqVLOnTaDOPpR-cZpCDkzhhcBce-Zyd5k,91
|
23
|
-
warp_beacon-1.0.8.dist-info/entry_points.txt,sha256=eSB61Rb89d56WY0O-vEIQwkn18J-4CMrJcLA_R_8h3g,119
|
24
|
-
warp_beacon-1.0.8.dist-info/top_level.txt,sha256=510sqsM4LLO-DC4HbUkwdVKmYY_26lbnvJwSq_RLT00,382
|
25
|
-
warp_beacon-1.0.8.dist-info/RECORD,,
|
File without changes
|
File without changes
|