warp-beacon 1.0.8__py3-none-any.whl → 1.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,2 +1,2 @@
1
- __version__ = "1.0.8"
1
+ __version__ = "1.1.1"
2
2
 
@@ -0,0 +1,6 @@
1
+ from enum import Enum
2
+
3
+ class Origin(Enum):
4
+ INSTAGRAM = "instagram"
5
+ YT_SHORTS = "yt_shorts"
6
+ UNKNOWN = "unknown"
@@ -3,6 +3,8 @@ from typing import TypedDict
3
3
  from typing_extensions import Unpack
4
4
  import uuid
5
5
 
6
+ from warp_beacon.jobs import Origin
7
+
6
8
  class JobSettings(TypedDict):
7
9
  job_id: uuid.UUID
8
10
  message_id: int
@@ -22,6 +24,7 @@ class JobSettings(TypedDict):
22
24
  effective_url: str
23
25
  save_items: bool
24
26
  media_collection: list
27
+ job_origin: Origin
25
28
 
26
29
  class AbstractJob(ABC):
27
30
  job_id: uuid.UUID = None
@@ -42,6 +45,7 @@ class AbstractJob(ABC):
42
45
  effective_url: str = ""
43
46
  save_items: bool = False
44
47
  media_collection: list = []
48
+ job_origin: Origin = Origin.UNKNOWN
45
49
 
46
50
  def __init__(self, **kwargs: Unpack[JobSettings]) -> None:
47
51
  if kwargs:
@@ -10,24 +10,31 @@ class VideoInfo(object):
10
10
  width = 0
11
11
  height = 0
12
12
  duration = 0.0
13
- ffmpeg = None
14
13
  filename = ""
14
+ container = None
15
15
 
16
16
  def __init__(self, filename: str) -> None:
17
17
  self.filename = filename
18
- with av.open(file=self.filename, mode='r') as container:
19
- stream = container.streams.video[0]
18
+ self.container = av.open(file=self.filename, mode='r')
19
+
20
+ if self.container:
21
+ stream = self.container.streams.video[0]
20
22
  time_base = stream.time_base
21
23
  self.duration = float(stream.duration * time_base)
22
24
  framerate = stream.average_rate
23
25
  frame_container_pts = round((1 / framerate) / time_base)
24
- container.seek(frame_container_pts, backward=True, stream=stream)
25
- frame = next(container.decode(video=0))
26
+ # !
27
+ self.container.seek(frame_container_pts, backward=True, stream=stream)
28
+ #
29
+ frame = next(self.container.decode(stream))
26
30
  self.width = frame.width
27
31
  self.height = frame.height
32
+ # restore original position after previous frame search
33
+ self.container.seek(0, backward=False, stream=stream)
28
34
 
29
35
  def __del__(self) -> None:
30
- pass
36
+ if self.container:
37
+ self.container.close()
31
38
 
32
39
  def get_demensions(self) -> dict:
33
40
  return {"width": self.width, "height": self.height}
@@ -37,15 +44,15 @@ class VideoInfo(object):
37
44
 
38
45
  @staticmethod
39
46
  def get_filesize(filename: str) -> float:
40
- return os.stat(filename).st_size / 1024 / 1024
47
+ return os.stat(filename).st_size
41
48
 
42
49
  def get_finfo(self, except_info: tuple=()) -> dict:
43
50
  res = {}
44
51
  res.update(self.get_demensions())
45
52
  if "duration" not in except_info:
46
- res["duration"] = int(self.get_duration())
53
+ res["duration"] = round(self.get_duration())
47
54
  if "filesize" not in except_info:
48
- res["filesize"] = round(VideoInfo.get_filesize(self.filename), 2)
55
+ res["filesize"] = VideoInfo.get_filesize(self.filename)
49
56
  return res
50
57
 
51
58
  def shrink_image_to_fit(self, image: Image, size: tuple = (320, 320)) -> Image:
@@ -59,17 +66,17 @@ class VideoInfo(object):
59
66
  def generate_thumbnail(self) -> Union[io.BytesIO, None]:
60
67
  try:
61
68
  image = None
62
- with av.open(file=self.filename, mode='r') as container:
69
+ if self.container:
63
70
  # Signal that we only want to look at keyframes.
64
- stream = container.streams.video[0]
71
+ stream = self.container.streams.video[0]
65
72
  stream.codec_context.skip_frame = "NONKEY"
66
- frame_num = 10
67
- time_base = container.streams.video[0].time_base
68
- framerate = container.streams.video[0].average_rate
73
+ frame_num = 30
74
+ time_base = stream.time_base
75
+ framerate = stream.average_rate
69
76
  frame_container_pts = round((frame_num / framerate) / time_base)
70
77
 
71
- container.seek(frame_container_pts, backward=True, stream=container.streams.video[0])
72
- frame = next(container.decode(stream))
78
+ self.container.seek(frame_container_pts, backward=True, stream=stream)
79
+ frame = next(self.container.decode(stream))
73
80
 
74
81
  image = frame.to_image()
75
82
  #image.save(
@@ -3,12 +3,13 @@ import time
3
3
 
4
4
  from typing import Optional
5
5
  import multiprocessing
6
- from requests.exceptions import ConnectTimeout, HTTPError
7
- from instagrapi.exceptions import MediaNotFound, UnknownError, ClientNotFoundError, UserNotFound
6
+ from queue import Empty
8
7
 
8
+ from warp_beacon.scraper.exceptions import NotFound, UnknownError, TimeOut, Unavailable
9
9
  from warp_beacon.mediainfo.video import VideoInfo
10
10
  from warp_beacon.compress.video import VideoCompress
11
11
  from warp_beacon.uploader import AsyncUploader
12
+ from warp_beacon.jobs import Origin
12
13
  from warp_beacon.jobs.download_job import DownloadJob
13
14
 
14
15
  import logging
@@ -16,6 +17,7 @@ import logging
16
17
  CONST_CPU_COUNT = multiprocessing.cpu_count()
17
18
 
18
19
  class AsyncDownloader(object):
20
+ __JOE_BIDEN_WAKEUP = None
19
21
  workers = []
20
22
  allow_loop = None
21
23
  job_queue = multiprocessing.Queue()
@@ -44,7 +46,6 @@ class AsyncDownloader(object):
44
46
  media_info = video_info.get_finfo(tuple(fr_media_info.keys()))
45
47
  media_info.update(fr_media_info)
46
48
  media_info["thumb"] = video_info.generate_thumbnail()
47
- logging.info("Media file info: %s", media_info)
48
49
  except Exception as e:
49
50
  logging.error("Failed to process media info!")
50
51
  logging.exception(e)
@@ -58,30 +59,41 @@ class AsyncDownloader(object):
58
59
  job = None
59
60
  try:
60
61
  job = self.job_queue.get()
62
+ if job is self.__JOE_BIDEN_WAKEUP:
63
+ continue
61
64
  actor = None
62
65
  try:
63
66
  items = []
64
- if "instagram.com/" in job.url:
67
+ if job.job_origin is not Origin.UNKNOWN:
65
68
  if not job.in_process:
66
- from warp_beacon.scrapler.instagram import InstagramScrapler
67
- actor = InstagramScrapler()
69
+ actor = None
70
+ if job.job_origin is Origin.INSTAGRAM:
71
+ from warp_beacon.scraper.instagram import InstagramScraper
72
+ actor = InstagramScraper()
73
+ elif job.job_origin is Origin.YT_SHORTS:
74
+ from warp_beacon.scraper.youtube.shorts import YoutubeShortsScraper
75
+ actor = YoutubeShortsScraper()
68
76
  while True:
69
77
  try:
70
78
  logging.info("Downloading URL '%s'", job.url)
71
79
  items = actor.download(job.url)
72
80
  break
73
- except ConnectTimeout as e:
74
- logging.error("ConnectTimeout download error!")
75
- logging.exception(e)
76
- time.sleep(2)
77
- except (MediaNotFound, ClientNotFoundError, UserNotFound) as e:
78
- logging.warning("Not found error occurred!")
81
+ except (NotFound, Unavailable) as e:
82
+ logging.warning("Not found or unavailable error occurred!")
79
83
  logging.exception(e)
80
84
  self.uploader.queue_task(job.to_upload_job(
81
85
  job_failed=True,
82
86
  job_failed_msg="Unable to access to media under this URL. Seems like the media is private.")
83
87
  )
84
88
  break
89
+ except TimeOut as e:
90
+ logging.warning("Timeout error occurred!")
91
+ logging.exception(e)
92
+ self.uploader.queue_task(job.to_upload_job(
93
+ job_failed=True,
94
+ job_failed_msg="Failed to download content. Please check you Internet connection or retry amount bot configuration settings.")
95
+ )
96
+ break
85
97
  except (UnknownError, Exception) as e:
86
98
  logging.warning("UnknownError occurred!")
87
99
  logging.exception(e)
@@ -106,8 +118,10 @@ class AsyncDownloader(object):
106
118
  for item in items:
107
119
  media_info = {"filesize": 0}
108
120
  if item["media_type"] == "video":
109
- media_info = self.get_media_info(item["local_media_path"], item["media_info"])
110
- if media_info["filesize"] > 50.0:
121
+ media_info = self.get_media_info(item["local_media_path"], item.get("media_info", {}))
122
+ logging.info("Final media info: %s", media_info)
123
+ if media_info["filesize"] > 52428800:
124
+ logging.info("Filesize is '%d' MiB", round(media_info["filesize"] / 1024 / 1024))
111
125
  logging.info("Detected big file. Starting compressing with ffmpeg ...")
112
126
  self.uploader.queue_task(job.to_upload_job(
113
127
  job_warning=True,
@@ -153,27 +167,27 @@ class AsyncDownloader(object):
153
167
  else:
154
168
  logging.info("Job already in work in parallel worker. Redirecting job to upload worker.")
155
169
  self.uploader.queue_task(job.to_upload_job())
156
- except HTTPError as e:
157
- logging.error("HTTP error inside download worker!")
158
- logging.exception(e)
159
170
  except Exception as e:
160
171
  logging.error("Error inside download worker!")
161
172
  logging.exception(e)
162
173
  self.notify_task_failed(job)
163
174
  #self.queue_task(url=item["url"], message_id=item["message_id"], item_in_process=item["in_process"], uniq_id=item["uniq_id"])
164
- except multiprocessing.Queue.empty:
175
+ except Empty:
165
176
  pass
166
177
  except Exception as e:
167
178
  logging.error("Exception occurred inside worker!")
168
179
  logging.exception(e)
169
180
 
181
+ logging.info("Process done")
182
+
170
183
  def stop_all(self) -> None:
171
184
  self.allow_loop.value = 0
172
185
  for proc in self.workers:
173
186
  if proc.is_alive():
174
187
  logging.info("stopping process #%d", proc.pid)
175
- proc.terminate()
176
- #proc.join()
188
+ self.job_queue.put_nowait(self.__JOE_BIDEN_WAKEUP)
189
+ proc.join()
190
+ #proc.terminate()
177
191
  logging.info("process #%d stopped", proc.pid)
178
192
  self.workers.clear()
179
193
 
@@ -1,6 +1,7 @@
1
1
  from abc import ABC, abstractmethod
2
+ from typing import Callable, Union
2
3
 
3
- class ScraplerAbstract(ABC):
4
+ class ScraperAbstract(ABC):
4
5
  def __init__(self) -> None:
5
6
  pass
6
7
 
@@ -8,9 +9,10 @@ class ScraplerAbstract(ABC):
8
9
  pass
9
10
 
10
11
  @abstractmethod
11
- def scrap(self, url: str) -> str:
12
+ def download(self, url: str) -> bool:
12
13
  raise NotImplementedError
13
14
 
14
15
  @abstractmethod
15
- def download(self, url: str) -> bool:
16
+ def _download_hndlr(self, func: Callable, *args: tuple[str], **kwargs: dict[str]) -> Union[str, dict]:
16
17
  raise NotImplementedError
18
+
@@ -0,0 +1,42 @@
1
+ from typing import Union
2
+
3
+ class ScraperError(Exception):
4
+ def __init__(self, *args, **kwargs):
5
+ args = list(args)
6
+ if len(args) > 0:
7
+ self.message = str(args.pop(0))
8
+ for key in list(kwargs.keys()):
9
+ setattr(self, key, kwargs.pop(key))
10
+ if not self.message:
11
+ self.message = "{title} ({body})".format(
12
+ title=getattr(self, "reason", "Unknown"),
13
+ body=getattr(self, "error_type", vars(self)),
14
+ )
15
+ super().__init__(self.message, *args, **kwargs)
16
+ if hasattr(self, "response") and self.response:
17
+ self.code = self.response.status_code
18
+
19
+ class TimeOut(ScraperError):
20
+ pass
21
+
22
+ class NotFound(ScraperError):
23
+ pass
24
+
25
+ class Unavailable(ScraperError):
26
+ pass
27
+
28
+ class UnknownError(ScraperError):
29
+ pass
30
+
31
+ def extract_exception_message(e: Exception) -> str:
32
+ msg = ""
33
+ if hasattr(e, "error_string"):
34
+ msg = e.error_string
35
+ elif hasattr(e, "message"):
36
+ msg = e.message
37
+ elif hasattr(e, "reason"):
38
+ msg = e.reason
39
+ elif hasattr(e, "args"):
40
+ msg = str(e.args)
41
+
42
+ return msg
@@ -1,10 +1,11 @@
1
1
  import os
2
- from pathlib import Path
3
2
  import time
4
- import json
5
3
  from typing import Callable, Optional, Union
4
+ from pathlib import Path
5
+ import json
6
6
 
7
7
  import requests
8
+ from requests.exceptions import ConnectTimeout, HTTPError
8
9
  import urllib3
9
10
  from urllib.parse import urljoin, urlparse
10
11
  import logging
@@ -12,13 +13,14 @@ import logging
12
13
  from instagrapi.mixins.story import Story
13
14
  from instagrapi.types import Media
14
15
  from instagrapi import Client
15
- from instagrapi.exceptions import LoginRequired, PleaseWaitFewMinutes
16
+ from instagrapi.exceptions import LoginRequired, PleaseWaitFewMinutes, MediaNotFound, ClientNotFoundError, UserNotFound, UnknownError as IGUnknownError
16
17
 
17
- from warp_beacon.scrapler.abstract import ScraplerAbstract
18
+ from warp_beacon.scraper.exceptions import NotFound, UnknownError, TimeOut, extract_exception_message
19
+ from warp_beacon.scraper.abstract import ScraperAbstract
18
20
 
19
21
  INST_SESSION_FILE = "/var/warp_beacon/inst_session.json"
20
22
 
21
- class InstagramScrapler(ScraplerAbstract):
23
+ class InstagramScraper(ScraperAbstract):
22
24
  cl = None
23
25
 
24
26
  def __init__(self) -> None:
@@ -85,7 +87,7 @@ class InstagramScrapler(ScraplerAbstract):
85
87
  logging.info("media_id is '%s'", media_id)
86
88
  return media_id
87
89
 
88
- def __download_hndlr(self, func: Callable, *args: tuple[str], **kwargs: dict[str]) -> Union[Path, Media]:
90
+ def _download_hndlr(self, func: Callable, *args: tuple[str], **kwargs: dict[str]) -> Union[str, dict]:
89
91
  ret_val = {}
90
92
  max_retries = int(os.environ.get("IG_MAX_RETRIES", default=5))
91
93
  retries = 0
@@ -96,12 +98,14 @@ class InstagramScrapler(ScraplerAbstract):
96
98
  except (requests.exceptions.ConnectionError,
97
99
  requests.exceptions.ReadTimeout,
98
100
  urllib3.exceptions.ReadTimeoutError,
99
- urllib3.exceptions.ConnectionError) as e:
101
+ urllib3.exceptions.ConnectionError,
102
+ ConnectTimeout,
103
+ HTTPError) as e:
100
104
  logging.warning("Instagram read timeout! Retrying in 2 seconds ...")
101
105
  logging.info("Your `IG_MAX_RETRIES` values is %d", max_retries)
102
106
  logging.exception(e)
103
107
  if max_retries == retries:
104
- raise e
108
+ raise TimeOut(extract_exception_message(e))
105
109
  retries += 1
106
110
  time.sleep(2)
107
111
 
@@ -109,11 +113,11 @@ class InstagramScrapler(ScraplerAbstract):
109
113
 
110
114
 
111
115
  def download_video(self, url: str, media_info: dict) -> dict:
112
- path = self.__download_hndlr(self.cl.video_download_by_url, url, folder='/tmp')
113
- return {"local_media_path": str(path), "media_type": "video", "media_info": {"duration": media_info.video_duration}}
116
+ path = self._download_hndlr(self.cl.video_download_by_url, url, folder='/tmp')
117
+ return {"local_media_path": str(path), "media_type": "video", "media_info": {"duration": round(media_info.video_duration)}}
114
118
 
115
119
  def download_photo(self, url: str) -> dict:
116
- path = self.__download_hndlr(self.cl.photo_download_by_url, url, folder='/tmp')
120
+ path = self._download_hndlr(self.cl.photo_download_by_url, url, folder='/tmp')
117
121
  return {"local_media_path": str(path), "media_type": "image"}
118
122
 
119
123
  def download_story(self, story_info: Story) -> dict:
@@ -127,10 +131,10 @@ class InstagramScrapler(ScraplerAbstract):
127
131
  logging.info("Effective story id is '%s'", effective_story_id)
128
132
  effective_url = "https://www.instagram.com/stories/%s/%s/" % (story_info.user.username, effective_story_id)
129
133
  if story_info.media_type == 1: # photo
130
- path = self.__download_hndlr(self.cl.story_download_by_url, url=story_info.thumbnail_url, folder='/tmp')
134
+ path = self._download_hndlr(self.cl.story_download_by_url, url=story_info.thumbnail_url, folder='/tmp')
131
135
  media_type = "image"
132
136
  elif story_info.media_type == 2: # video
133
- path = self.__download_hndlr(self.cl.story_download_by_url, url=story_info.video_url, folder='/tmp')
137
+ path = self._download_hndlr(self.cl.story_download_by_url, url=story_info.video_url, folder='/tmp')
134
138
  media_type = "video"
135
139
  media_info["duration"] = story_info.video_duration
136
140
 
@@ -160,7 +164,7 @@ class InstagramScrapler(ScraplerAbstract):
160
164
  try:
161
165
  scrap_type, media_id = self.scrap(url)
162
166
  if scrap_type == "media":
163
- media_info = self.__download_hndlr(self.cl.media_info, media_id)
167
+ media_info = self._download_hndlr(self.cl.media_info, media_id)
164
168
  logging.info("media_type is '%d', product_type is '%s'", media_info.media_type, media_info.product_type)
165
169
  if media_info.media_type == 2 and media_info.product_type == "clips": # Reels
166
170
  res.append(self.download_video(url=media_info.video_url, media_info=media_info))
@@ -192,4 +196,8 @@ class InstagramScrapler(ScraplerAbstract):
192
196
  os.unlink(i["local_media_path"])
193
197
  os.unlink(INST_SESSION_FILE)
194
198
  time.sleep(wait_timeout)
199
+ except (MediaNotFound, ClientNotFoundError, UserNotFound) as e:
200
+ raise NotFound(extract_exception_message(e))
201
+ except IGUnknownError as e:
202
+ raise UnknownError(extract_exception_message(e))
195
203
  return res
File without changes
@@ -0,0 +1,84 @@
1
+ import os
2
+ import pathlib
3
+ import time
4
+
5
+ from typing import Callable, Union
6
+
7
+ from socket import timeout
8
+ from ssl import SSLError
9
+ from requests.exceptions import RequestException
10
+ from urllib.error import URLError
11
+ from http.client import HTTPException
12
+
13
+ from pytubefix import YouTube
14
+ from pytubefix.exceptions import VideoUnavailable, VideoPrivate, MaxRetriesExceeded
15
+
16
+ from warp_beacon.scraper.exceptions import NotFound, UnknownError, TimeOut, extract_exception_message
17
+ from warp_beacon.scraper.abstract import ScraperAbstract
18
+
19
+ import logging
20
+
21
+ class YoutubeShortsScraper(ScraperAbstract):
22
+ def __init__(self) -> None:
23
+ pass
24
+
25
+ def __del__(self) -> None:
26
+ pass
27
+
28
+ def _download_hndlr(self, func: Callable, *args: tuple[str], **kwargs: dict[str]) -> Union[str, dict]:
29
+ ret_val = ''
30
+ max_retries = int(os.environ.get("YT_MAX_RETRIES", default=8))
31
+ pause_secs = int(os.environ.get("YT_PAUSE_BEFORE_RETRY", default=3))
32
+ retries = 0
33
+ while max_retries >= retries:
34
+ try:
35
+ ret_val = func(*args, **kwargs)
36
+ break
37
+ except MaxRetriesExceeded:
38
+ # do noting, not interested
39
+ pass
40
+ except (timeout, SSLError, HTTPException, RequestException, URLError) as e:
41
+ logging.warning("Youtube read timeout! Retrying in %d seconds ...", pause_secs)
42
+ logging.info("Your `YT_MAX_RETRIES` values is %d", max_retries)
43
+ logging.exception(extract_exception_message(e))
44
+ if max_retries >= retries:
45
+ raise TimeOut(extract_exception_message(e))
46
+ retries += 1
47
+ time.sleep(pause_secs)
48
+ except (VideoUnavailable, VideoPrivate) as e:
49
+ raise Unavailable(extract_exception_message(e))
50
+
51
+ return ret_val
52
+
53
+ def rename_local_file(self, filename: str) -> str:
54
+ if not os.path.exists(filename):
55
+ raise NameError("No file provided")
56
+ path_info = pathlib.Path(filename)
57
+ ext = path_info.suffix
58
+ old_filename = path_info.stem
59
+ time_name = str(time.time()).replace('.', '_')
60
+ new_filename = "%s%s" % (time_name, ext)
61
+ new_filepath = "%s/%s" % (os.path.dirname(filename), new_filename)
62
+
63
+ os.rename(filename, new_filepath)
64
+
65
+ return new_filepath
66
+
67
+ def _download(self, url: str) -> list:
68
+ res = []
69
+ timeout = int(os.environ.get("YT_TIMEOUT", default=2))
70
+ yt = YouTube(url)
71
+ stream = yt.streams.get_highest_resolution()
72
+ if stream:
73
+ local_file = stream.download(
74
+ output_path="/tmp",
75
+ max_retries=0,
76
+ timeout=timeout,
77
+ skip_existing=False
78
+ )
79
+ res.append({"local_media_path": self.rename_local_file(local_file), "media_type": "video"})
80
+
81
+ return res
82
+
83
+ def download(self, url: str) -> list:
84
+ return self._download_hndlr(self._download, url)
@@ -13,6 +13,7 @@ from typing import Optional, Callable, Coroutine
13
13
  from warp_beacon.storage import Storage
14
14
 
15
15
  class AsyncUploader(object):
16
+ __JOE_BIDEN_WAKEUP = None
16
17
  threads = []
17
18
  allow_loop = True
18
19
  job_queue = None
@@ -34,8 +35,8 @@ class AsyncUploader(object):
34
35
  def start(self) -> None:
35
36
  for _ in range(self.pool_size):
36
37
  thread = threading.Thread(target=self.do_work)
37
- self.threads.append(thread)
38
38
  thread.start()
39
+ self.threads.append(thread)
39
40
 
40
41
  def add_callback(self, message_id: int, callback: Callable, update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
41
42
  def callback_wrap(*args, **kwargs) -> None:
@@ -55,6 +56,7 @@ class AsyncUploader(object):
55
56
  for i in self.threads:
56
57
  t_id = i.native_id
57
58
  logging.info("Stopping thread #'%s'", t_id)
59
+ self.job_queue.put(self.__JOE_BIDEN_WAKEUP)
58
60
  i.join()
59
61
  logging.info("Thread #'%s' stopped", t_id)
60
62
  self.threads.clear()
@@ -77,6 +79,8 @@ class AsyncUploader(object):
77
79
  try:
78
80
  try:
79
81
  job = self.job_queue.get()
82
+ if job is self.__JOE_BIDEN_WAKEUP:
83
+ continue
80
84
  path = ""
81
85
  if job.media_type == "collection":
82
86
  for i in job.media_collection:
@@ -130,4 +134,5 @@ class AsyncUploader(object):
130
134
  pass
131
135
  except Exception as e:
132
136
  logging.error("Exception occurred inside upload worker!")
133
- logging.exception(e)
137
+ logging.exception(e)
138
+ logging.info("Thread done")
@@ -6,7 +6,6 @@ import signal
6
6
  import asyncio
7
7
  import time
8
8
  from io import BytesIO
9
- import logging
10
9
 
11
10
  from urlextract import URLExtract
12
11
 
@@ -15,10 +14,14 @@ from telegram import Bot, ForceReply, Update, Chat, error, InputMediaVideo, Inpu
15
14
  from telegram.ext import Application, CommandHandler, ContextTypes, MessageHandler, filters
16
15
  from telegram.constants import ParseMode
17
16
 
18
- import warp_beacon.scrapler
17
+ import warp_beacon.scraper
19
18
  from warp_beacon.storage import Storage
20
19
  from warp_beacon.uploader import AsyncUploader
21
- from warp_beacon.jobs.download_job import DownloadJob, UploadJob
20
+ from warp_beacon.jobs.download_job import DownloadJob
21
+ from warp_beacon.jobs.upload_job import UploadJob
22
+ from warp_beacon.jobs import Origin
23
+
24
+ import logging
22
25
 
23
26
  # Enable logging
24
27
  logging.basicConfig(
@@ -361,6 +364,15 @@ async def upload_job(update: Update, context: ContextTypes.DEFAULT_TYPE, job: Up
361
364
 
362
365
  return tg_file_ids
363
366
 
367
+ def extract_origin(url: str) -> Origin:
368
+ if "instagram.com/" in url:
369
+ return Origin.INSTAGRAM
370
+
371
+ if "youtube.com/" in url and "shorts/" in url:
372
+ return Origin.YT_SHORTS
373
+
374
+ return Origin.UNKNOWN
375
+
364
376
  async def handler(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
365
377
  if update.message is None:
366
378
  return
@@ -374,8 +386,9 @@ async def handler(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
374
386
  reply_text = "Your message should contains URLs"
375
387
  else:
376
388
  for url in urls:
377
- if "instagram.com" not in url:
378
- logging.info("Only instagram.com is now supported. Skipping.")
389
+ origin = extract_origin(url)
390
+ if origin is Origin.UNKNOWN:
391
+ logging.info("Only Instagram and YouTube Shorts are now supported. Skipping.")
379
392
  continue
380
393
  entities, tg_file_ids = [], []
381
394
  uniq_id = Storage.compute_uniq(url)
@@ -422,9 +435,9 @@ async def handler(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
422
435
  if tg_file_ids:
423
436
  if job.media_type == "collection" and job.save_items:
424
437
  for i in job.media_collection:
425
- storage.add_media(tg_file_ids=[i.tg_file_id], media_url=i.effective_url, media_type=i.media_type, origin="instagram")
438
+ storage.add_media(tg_file_ids=[i.tg_file_id], media_url=i.effective_url, media_type=i.media_type, origin=origin.value)
426
439
  else:
427
- storage.add_media(tg_file_ids=[','.join(tg_file_ids)], media_url=job.url, media_type=job.media_type, origin="instagram")
440
+ storage.add_media(tg_file_ids=[','.join(tg_file_ids)], media_url=job.url, media_type=job.media_type, origin=origin.value)
428
441
  except Exception as e:
429
442
  logging.error("Exception occurred while performing upload callback!")
430
443
  logging.exception(e)
@@ -457,7 +470,8 @@ async def handler(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
457
470
  placeholder_message_id=placeholder_message_id,
458
471
  message_id=effective_message_id,
459
472
  in_process=uploader.is_inprocess(uniq_id),
460
- uniq_id=uniq_id
473
+ uniq_id=uniq_id,
474
+ job_origin = origin
461
475
  ))
462
476
  uploader.set_inprocess(uniq_id)
463
477
  except Exception as e:
@@ -476,25 +490,6 @@ def main() -> None:
476
490
  try:
477
491
  global uploader, downloader
478
492
 
479
- loop = asyncio.get_event_loop()
480
-
481
- uploader = AsyncUploader(
482
- storage=storage,
483
- pool_size=int(os.environ.get("UPLOAD_POOL_SIZE", default=warp_beacon.scrapler.CONST_CPU_COUNT)),
484
- loop=loop
485
- )
486
- downloader = warp_beacon.scrapler.AsyncDownloader(
487
- workers_count=int(os.environ.get("WORKERS_POOL_SIZE", default=warp_beacon.scrapler.CONST_CPU_COUNT)),
488
- uploader=uploader
489
- )
490
- downloader.start()
491
- uploader.start()
492
-
493
- stop_signals = (signal.SIGINT, signal.SIGTERM, signal.SIGABRT)
494
- for sig in stop_signals or []:
495
- loop.add_signal_handler(sig, _raise_system_exit)
496
- loop.add_signal_handler(sig, _raise_system_exit)
497
-
498
493
  # Create the Application and pass it your bot's token.
499
494
  tg_token = os.environ.get("TG_TOKEN", default=None)
500
495
  application = Application.builder().token(tg_token).concurrent_updates(True).build()
@@ -508,8 +503,25 @@ def main() -> None:
508
503
  application.add_handler(MessageHandler(filters.TEXT & ~filters.COMMAND, handler))
509
504
 
510
505
  allow_loop = True
506
+ loop = None
511
507
  while allow_loop:
512
508
  try:
509
+ loop = asyncio.get_event_loop()
510
+
511
+ stop_signals = (signal.SIGINT, signal.SIGTERM, signal.SIGABRT)
512
+ for sig in stop_signals or []:
513
+ loop.add_signal_handler(sig, _raise_system_exit)
514
+ loop.add_signal_handler(sig, _raise_system_exit)
515
+
516
+ uploader = AsyncUploader(
517
+ storage=storage,
518
+ pool_size=int(os.environ.get("UPLOAD_POOL_SIZE", default=warp_beacon.scraper.CONST_CPU_COUNT)),
519
+ loop=loop
520
+ )
521
+ downloader = warp_beacon.scraper.AsyncDownloader(
522
+ workers_count=int(os.environ.get("WORKERS_POOL_SIZE", default=warp_beacon.scraper.CONST_CPU_COUNT)),
523
+ uploader=uploader
524
+ )
513
525
  loop.run_until_complete(application.initialize())
514
526
  if application.post_init:
515
527
  loop.run_until_complete(application.post_init(application))
@@ -517,10 +529,12 @@ def main() -> None:
517
529
  loop.run_until_complete(application.start())
518
530
  while allow_loop:
519
531
  try:
532
+ downloader.start()
533
+ uploader.start()
520
534
  loop.run_forever()
521
535
  except (KeyboardInterrupt, SystemExit) as e:
522
536
  allow_loop = False
523
- raise e
537
+ raise
524
538
  except Exception as e:
525
539
  logging.error("Main loop Telegram error!")
526
540
  logging.exception(e)
@@ -546,11 +560,13 @@ def main() -> None:
546
560
  if application.post_shutdown:
547
561
  loop.run_until_complete(application.post_shutdown(application))
548
562
  finally:
563
+ loop.close()
549
564
  downloader.stop_all()
550
565
  uploader.stop_all()
551
- loop.close()
552
566
  except Exception as e:
553
567
  logging.exception(e)
554
568
 
569
+ logging.info("Warp Beacon terminated.")
570
+
555
571
  if __name__ == "__main__":
556
572
  main()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: warp_beacon
3
- Version: 1.0.8
3
+ Version: 1.1.1
4
4
  Summary: Telegram bot for expanding external media links
5
5
  Home-page: https://github.com/sb0y/warp_beacon
6
6
  Author: Andrey Bagrintsev
@@ -226,6 +226,7 @@ Description-Content-Type: text/markdown
226
226
  License-File: LICENSE
227
227
  Requires-Dist: ffmpeg-python
228
228
  Requires-Dist: python-telegram-bot
229
+ Requires-Dist: pytubefix
229
230
  Requires-Dist: av
230
231
  Requires-Dist: urlextract
231
232
  Requires-Dist: pillow
@@ -0,0 +1,28 @@
1
+ etc/warp_beacon/warp_beacon.conf,sha256=1gGvh36cnFr0rU4mVomfy66hQz9EvugaNzeH6_tmBM0,266
2
+ lib/systemd/system/warp_beacon.service,sha256=lPmHqLqcI2eIV7nwHS0qcALQrznixqJuwwPfa2mDLUA,372
3
+ var/warp_beacon/placeholder.gif,sha256=cE5CGJVaop4Sx21zx6j4AyoHU0ncmvQuS2o6hJfEH88,6064
4
+ warp_beacon/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
+ warp_beacon/__version__.py,sha256=Y1WL3jwTYIGv_JONH9hc9ULjDsNL4lbh4ucw3BlkBYs,23
6
+ warp_beacon/warp_beacon.py,sha256=Z3zY7MTMZp3zKRderhNOVe6XvM-93Djo0DIsiaoucmo,20012
7
+ warp_beacon/compress/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
+ warp_beacon/compress/video.py,sha256=_PDMVYCyzLYxHv1uZmmzGcG_8rjaZr7BTXsXTTy_oS4,2846
9
+ warp_beacon/jobs/__init__.py,sha256=KsSaS0KlCNyffNnWKuvqmdfgyfKyn3niXYyZ38-exQ8,113
10
+ warp_beacon/jobs/abstract.py,sha256=PCr8RXzocKi-sTsi2Y1_spiv6D95G1NlzZ2wD2WJXRc,1760
11
+ warp_beacon/jobs/download_job.py,sha256=wfZrKUerfYIjWkRxPzfl5gwIlcotIMH7OpTUM9ae8NY,736
12
+ warp_beacon/jobs/upload_job.py,sha256=Vaogc4vbpAfyaT4VkIHEPLFRELmM44TDqkmnPYh3Ymc,740
13
+ warp_beacon/mediainfo/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
14
+ warp_beacon/mediainfo/video.py,sha256=8h7p4k0w45Vm0lPQNlaQaUjaDTBybX3RcKgL1QQbioA,2638
15
+ warp_beacon/scraper/__init__.py,sha256=c7NySK5Krm-zlWQckFs-uN4fD3J19A0pTS4CByXjmMs,7918
16
+ warp_beacon/scraper/abstract.py,sha256=um4wUthO_7IsoXjKiUTWyBBbKlf-N01aZJK9N2UQI9I,408
17
+ warp_beacon/scraper/exceptions.py,sha256=qra_Jx53RVCnrCKvw2OxvEHl4cXJCrPDa_yef3cvGXM,978
18
+ warp_beacon/scraper/instagram.py,sha256=s7slumqdqVVWQYpnVphx-dOLIAWvgA_UdqkTQVp6GsI,7758
19
+ warp_beacon/scraper/youtube/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
20
+ warp_beacon/scraper/youtube/shorts.py,sha256=xAOeHNT3_Ayjjglid00UqGqCgSMfbJua26PNrbbDYUo,2565
21
+ warp_beacon/storage/__init__.py,sha256=NhD3V7UNRiZNf61yQEAjXOfi-tfA2LaJa7a7kvbkmtE,2402
22
+ warp_beacon/uploader/__init__.py,sha256=auD1arKpJdN1eFUbTFoa9Gmv-ZYZNesMoT193__pDz8,4507
23
+ warp_beacon-1.1.1.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
24
+ warp_beacon-1.1.1.dist-info/METADATA,sha256=grW-AA1vEjTZEt30ShVny1vomQY-0M-HGMGuOGOZdCc,18244
25
+ warp_beacon-1.1.1.dist-info/WHEEL,sha256=Wyh-_nZ0DJYolHNn1_hMa4lM7uDedD_RGVwbmTjyItk,91
26
+ warp_beacon-1.1.1.dist-info/entry_points.txt,sha256=eSB61Rb89d56WY0O-vEIQwkn18J-4CMrJcLA_R_8h3g,119
27
+ warp_beacon-1.1.1.dist-info/top_level.txt,sha256=QuN6MynevEblMhPPAVeMrNAkcyqYUpYDholtIRq8-ew,473
28
+ warp_beacon-1.1.1.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (71.0.4)
2
+ Generator: setuptools (71.1.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -8,9 +8,12 @@ warp_beacon/jobs/download_job
8
8
  warp_beacon/jobs/upload_job
9
9
  warp_beacon/mediainfo
10
10
  warp_beacon/mediainfo/video
11
- warp_beacon/scrapler
12
- warp_beacon/scrapler/abstract
13
- warp_beacon/scrapler/instagram
11
+ warp_beacon/scraper
12
+ warp_beacon/scraper/abstract
13
+ warp_beacon/scraper/exceptions
14
+ warp_beacon/scraper/instagram
15
+ warp_beacon/scraper/youtube
16
+ warp_beacon/scraper/youtube/shorts
14
17
  warp_beacon/storage
15
18
  warp_beacon/uploader
16
19
  warp_beacon/warp_beacon
@@ -1,25 +0,0 @@
1
- etc/warp_beacon/warp_beacon.conf,sha256=1gGvh36cnFr0rU4mVomfy66hQz9EvugaNzeH6_tmBM0,266
2
- lib/systemd/system/warp_beacon.service,sha256=lPmHqLqcI2eIV7nwHS0qcALQrznixqJuwwPfa2mDLUA,372
3
- var/warp_beacon/placeholder.gif,sha256=cE5CGJVaop4Sx21zx6j4AyoHU0ncmvQuS2o6hJfEH88,6064
4
- warp_beacon/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
- warp_beacon/__version__.py,sha256=jOGC2HGDoU2OhY9xBiOPgX9B0ssXLXx0OeKHzO1DTV8,23
6
- warp_beacon/warp_beacon.py,sha256=agUu6_Qt1UnGTSqYqXQZtHeeWqRkhlIHt_Repph1G3k,19581
7
- warp_beacon/compress/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
- warp_beacon/compress/video.py,sha256=_PDMVYCyzLYxHv1uZmmzGcG_8rjaZr7BTXsXTTy_oS4,2846
9
- warp_beacon/jobs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
10
- warp_beacon/jobs/abstract.py,sha256=-eXvrRgrUBed1z9iYZd0RxZ8fi0Okq3r0i7PDN31FkY,1666
11
- warp_beacon/jobs/download_job.py,sha256=wfZrKUerfYIjWkRxPzfl5gwIlcotIMH7OpTUM9ae8NY,736
12
- warp_beacon/jobs/upload_job.py,sha256=Vaogc4vbpAfyaT4VkIHEPLFRELmM44TDqkmnPYh3Ymc,740
13
- warp_beacon/mediainfo/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
14
- warp_beacon/mediainfo/video.py,sha256=CbaJmAoHAYNBI4bwvsIXYqTRNlFj9OtDhvxLgywMnho,2536
15
- warp_beacon/scrapler/__init__.py,sha256=J1kRZfkCuTucRnklElXnrMUZUGpg8wIGYWGrfY5a1xc,7335
16
- warp_beacon/scrapler/abstract.py,sha256=MJxpEovCWDYq2SwbbMsRDfp77WTwvbXXKiQxKWoj0ZQ,304
17
- warp_beacon/scrapler/instagram.py,sha256=8CF_Zdxn1hStz_PgLxTc0FTt5heI84d-Ks0XzmD7-_o,7248
18
- warp_beacon/storage/__init__.py,sha256=NhD3V7UNRiZNf61yQEAjXOfi-tfA2LaJa7a7kvbkmtE,2402
19
- warp_beacon/uploader/__init__.py,sha256=9qQAuYisXiVIjQghxcxpF4WAdW7lm7HmpkOXQjGNJXk,4346
20
- warp_beacon-1.0.8.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
21
- warp_beacon-1.0.8.dist-info/METADATA,sha256=Ma5F75k0lR3JAK-aiZFYdZc_u2xO1IplbuWTQwzsKZ8,18219
22
- warp_beacon-1.0.8.dist-info/WHEEL,sha256=rWxmBtp7hEUqVLOnTaDOPpR-cZpCDkzhhcBce-Zyd5k,91
23
- warp_beacon-1.0.8.dist-info/entry_points.txt,sha256=eSB61Rb89d56WY0O-vEIQwkn18J-4CMrJcLA_R_8h3g,119
24
- warp_beacon-1.0.8.dist-info/top_level.txt,sha256=510sqsM4LLO-DC4HbUkwdVKmYY_26lbnvJwSq_RLT00,382
25
- warp_beacon-1.0.8.dist-info/RECORD,,